Coverage for src / crawler / factory.py: 97%

61 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-04-08 09:35 +0000

1from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler 

2from crawler.by_source.amc_crawler import AmcCrawler 

3from crawler.by_source.ami_crawler import AmiCrawler 

4from crawler.by_source.amp_crawler import AmpCrawler 

5from crawler.by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler 

6from crawler.by_source.ams.ams_jams_crawler import Ams_jamsCrawler 

7from crawler.by_source.amuc_crawler import AmucCrawler 

8from crawler.by_source.arsia_crawler import ArsiaCrawler 

9from crawler.by_source.asuo_crawler import AsuoCrawler 

10from crawler.by_source.aulfm_crawler import AulfmCrawler 

11from crawler.by_source.bdim_crawler import BdimCrawler 

12from crawler.by_source.bmms_crawler import BmmsCrawler 

13from crawler.by_source.cambridge_crawler import CambridgeCrawler 

14from crawler.by_source.csis_crawler import CsisCrawler 

15from crawler.by_source.cup_crawler import CupCrawler 

16from crawler.by_source.dml_e_crawler import Dml_eCrawler 

17from crawler.by_source.dmlbul_crawler import DmlbulCrawler 

18from crawler.by_source.dmlcz_crawler import DmlczCrawler 

19from crawler.by_source.dmlpl_crawler import DmlplCrawler 

20from crawler.by_source.edpsci_crawler import EdpsciCrawler 

21from crawler.by_source.ejc_crawler import EjcCrawler 

22from crawler.by_source.elibm_crawler import ElibmCrawler 

23from crawler.by_source.emis_aas_crawler import Emis_aasCrawler 

24from crawler.by_source.emis_am_crawler import Emis_amCrawler 

25from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler 

26from crawler.by_source.ems_crawler import EmsCrawler 

27from crawler.by_source.episciences_crawler import EpisciencesCrawler 

28from crawler.by_source.eudml_crawler import EudmlCrawler 

29from crawler.by_source.geodesic_crawler import GeodesicCrawler 

30from crawler.by_source.hdml_crawler import HdmlCrawler 

31from crawler.by_source.heldermann_crawler import HeldermannCrawler 

32from crawler.by_source.impan_crawler import ImpanCrawler 

33from crawler.by_source.ipb_crawler import IpbCrawler 

34from crawler.by_source.isrp_crawler import IsrpCrawler 

35from crawler.by_source.jgaa_crawler import JgaaCrawler 

36from crawler.by_source.journalfi_crawler import JournalfiCrawler 

37from crawler.by_source.jsig_crawler import JsigCrawler 

38from crawler.by_source.lofpl_crawler import LofplCrawler 

39from crawler.by_source.mathbas_crawler import MathbasCrawler 

40from crawler.by_source.mathnetru_crawler import MathnetruCrawler 

41from crawler.by_source.msp_crawler import MspCrawler 

42from crawler.by_source.mta_crawler import MtaCrawler 

43from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler 

44from crawler.by_source.numdam_crawler import NumdamCrawler 

45from crawler.by_source.ptm_crawler import PtmCrawler 

46from crawler.by_source.rcm_crawler import RcmCrawler 

47from crawler.by_source.sasa_crawler import SasaCrawler 

48from crawler.by_source.scholastica_crawler import ScholasticaCrawler 

49from crawler.by_source.seio_crawler import SeioCrawler 

50from crawler.by_source.slc_crawler import Slc_Crawler 

51from crawler.by_source.tac_crawler import TacCrawler 

52 

53crawler_classes = ( 

54 AmcCrawler, 

55 AmiCrawler, 

56 AmpCrawler, 

57 Ams_eraamsCrawler, 

58 Ams_jamsCrawler, 

59 ArsiaCrawler, 

60 AsuoCrawler, 

61 AulfmCrawler, 

62 BdimCrawler, 

63 BmmsCrawler, 

64 CambridgeCrawler, 

65 CsisCrawler, 

66 Dml_eCrawler, 

67 DmlbulCrawler, 

68 DmlczCrawler, 

69 DmlplCrawler, 

70 EdpsciCrawler, 

71 EjcCrawler, 

72 ElibmCrawler, 

73 Emis_aasCrawler, 

74 Emis_amCrawler, 

75 Emis_hoaCrawler, 

76 EmsCrawler, 

77 EpisciencesCrawler, 

78 EudmlCrawler, 

79 GeodesicCrawler, 

80 HdmlCrawler, 

81 HeldermannCrawler, 

82 ImpanCrawler, 

83 IpbCrawler, 

84 IsrpCrawler, 

85 JgaaCrawler, 

86 JournalfiCrawler, 

87 JsigCrawler, 

88 LofplCrawler, 

89 MathbasCrawler, 

90 MathnetruCrawler, 

91 MspCrawler, 

92 MtaCrawler, 

93 NsjomCrawler, 

94 NumdamCrawler, 

95 PtmCrawler, 

96 RcmCrawler, 

97 SasaCrawler, 

98 ScholasticaCrawler, 

99 SeioCrawler, 

100 Slc_Crawler, 

101 TacCrawler, 

102 CupCrawler, 

103 AmucCrawler, 

104) 

105 

106crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

107 

108 

109def get_crawler_class(source: str): 

110 "source is the source domain" 

111 return crawler_classes_map.get(source, None) 

112 

113 

114def crawler_factory( 

115 source: str, 

116 colid: str, 

117 username: str, 

118 dry: bool = False, 

119 force_refresh=False, 

120 collection_url: str | None = None, 

121) -> BaseCollectionCrawler: 

122 """ 

123 Factory for the crawlers 

124 

125 :param source: the source domain 

126 :param colid: collection pid 

127 :param col_url: url of the collection web page 

128 :param username: 

129 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

130 :return: a crawler derived from base_crawler 

131 """ 

132 klass = get_crawler_class(source) 

133 

134 if klass is None: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 raise NotImplementedError 

136 

137 crawler = klass( 

138 collection_id=colid, 

139 username=username, 

140 dry=dry, 

141 force_refresh=force_refresh, 

142 collection_url=collection_url, 

143 ) 

144 

145 return crawler