Coverage for src / crawler / factory.py: 97%

57 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-11 14:57 +0000

1from crawler.by_source.cambridge_crawler import CambridgeCrawler 

2from crawler.by_source.emis_aas_crawler import Emis_aasCrawler 

3from crawler.by_source.geodesic_crawler import GeodesicCrawler 

4from crawler.by_source.isrp_crawler import IsrpCrawler 

5from crawler.by_source.jsig_crawler import JsigCrawler 

6from crawler.by_source.numdam_crawler import NumdamCrawler 

7from crawler.by_source.ptm_crawler import PtmCrawler 

8from crawler.by_source.scholastica_crawler import ScholasticaCrawler 

9from crawler.by_source.slc_crawler import Slc_Crawler 

10 

11from .base_crawler import BaseCollectionCrawler 

12from .by_source.amc_crawler import AmcCrawler 

13from .by_source.ami_crawler import AmiCrawler 

14from .by_source.amp_crawler import AmpCrawler 

15from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler 

16from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler 

17from .by_source.arsia_crawler import ArsiaCrawler 

18from .by_source.asuo_crawler import AsuoCrawler 

19from .by_source.aulfm_crawler import AulfmCrawler 

20from .by_source.bdim_crawler import BdimCrawler 

21from .by_source.bmms_crawler import BmmsCrawler 

22from .by_source.csis_crawler import CsisCrawler 

23from .by_source.dml_e_crawler import Dml_eCrawler 

24from .by_source.dmlbul_crawler import DmlbulCrawler 

25from .by_source.dmlcz_crawler import DmlczCrawler 

26from .by_source.dmlpl_crawler import DmlplCrawler 

27from .by_source.edpsci_crawler import EdpsciCrawler 

28from .by_source.elibm_crawler import ElibmCrawler 

29from .by_source.emis_am_crawler import Emis_amCrawler 

30from .by_source.emis_hoa_crawler import Emis_hoaCrawler 

31from .by_source.ems_crawler import EmsCrawler 

32from .by_source.episciences_crawler import EpisciencesCrawler 

33from .by_source.eudml_crawler import EudmlCrawler 

34from .by_source.hdml_crawler import HdmlCrawler 

35from .by_source.heldermann_crawler import HeldermannCrawler 

36from .by_source.impan_crawler import ImpanCrawler 

37from .by_source.ipb_crawler import IpbCrawler 

38from .by_source.jgaa_crawler import JgaaCrawler 

39from .by_source.journalfi_crawler import JournalfiCrawler 

40from .by_source.lofpl_crawler import LofplCrawler 

41from .by_source.mathbas_crawler import MathbasCrawler 

42from .by_source.mathnetru_crawler import MathnetruCrawler 

43from .by_source.msp_crawler import MspCrawler 

44from .by_source.nsjom.nsjom_crawler import NsjomCrawler 

45from .by_source.rcm_crawler import RcmCrawler 

46from .by_source.sasa_crawler import SasaCrawler 

47from .by_source.seio_crawler import SeioCrawler 

48from .by_source.tac_crawler import TacCrawler 

49 

50crawler_classes = ( 

51 AmcCrawler, 

52 AmiCrawler, 

53 AmpCrawler, 

54 Ams_eraamsCrawler, 

55 Ams_jamsCrawler, 

56 AsuoCrawler, 

57 ArsiaCrawler, 

58 AulfmCrawler, 

59 BdimCrawler, 

60 BmmsCrawler, 

61 CambridgeCrawler, 

62 CsisCrawler, 

63 Dml_eCrawler, 

64 DmlbulCrawler, 

65 DmlczCrawler, 

66 DmlplCrawler, 

67 EdpsciCrawler, 

68 EmsCrawler, 

69 EpisciencesCrawler, 

70 ElibmCrawler, 

71 Emis_amCrawler, 

72 Emis_aasCrawler, 

73 Emis_hoaCrawler, 

74 EudmlCrawler, 

75 GeodesicCrawler, 

76 HdmlCrawler, 

77 HeldermannCrawler, 

78 ImpanCrawler, 

79 IpbCrawler, 

80 IsrpCrawler, 

81 JgaaCrawler, 

82 JsigCrawler, 

83 JournalfiCrawler, 

84 LofplCrawler, 

85 MathbasCrawler, 

86 MathnetruCrawler, 

87 MspCrawler, 

88 NsjomCrawler, 

89 NumdamCrawler, 

90 PtmCrawler, 

91 RcmCrawler, 

92 SasaCrawler, 

93 ScholasticaCrawler, 

94 SeioCrawler, 

95 Slc_Crawler, 

96 TacCrawler, 

97) 

98 

99crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

100 

101 

102def get_crawler_class(source: str): 

103 "source is the source domain" 

104 return crawler_classes_map.get(source, None) 

105 

106 

107def crawler_factory( 

108 source: str, 

109 colid: str, 

110 username: str, 

111 dry: bool = False, 

112 force_refresh=False, 

113 collection_url: str | None = None, 

114) -> BaseCollectionCrawler: 

115 """ 

116 Factory for the crawlers 

117 

118 :param source: "Eudml" 

119 :param colid: collection pid 

120 :param col_url: url of the collection web page 

121 :param username: 

122 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

123 :return: a crawler derived from base_crawler 

124 """ 

125 klass = get_crawler_class(source) 

126 

127 if klass is None: 127 ↛ 128line 127 didn't jump to line 128 because the condition on line 127 was never true

128 raise NotImplementedError 

129 

130 crawler = klass( 

131 collection_id=colid, 

132 username=username, 

133 dry=dry, 

134 force_refresh=force_refresh, 

135 collection_url=collection_url, 

136 ) 

137 

138 return crawler