Coverage for src / crawler / factory.py: 97%

60 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-04-30 12:41 +0000

1from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler 

2from crawler.by_source.amc_crawler import AmcCrawler 

3from crawler.by_source.ami_crawler import AmiCrawler 

4from crawler.by_source.amp_crawler import AmpCrawler 

5from crawler.by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler 

6from crawler.by_source.ams.ams_jams_crawler import Ams_jamsCrawler 

7from crawler.by_source.amuc_crawler import AmucCrawler 

8from crawler.by_source.arsia_crawler import ArsiaCrawler 

9from crawler.by_source.asuo_crawler import AsuoCrawler 

10from crawler.by_source.aulfm_crawler import AulfmCrawler 

11from crawler.by_source.bdim_crawler import BdimCrawler 

12from crawler.by_source.bmms_crawler import BmmsCrawler 

13from crawler.by_source.csis_crawler import CsisCrawler 

14from crawler.by_source.cup_crawler import CupCrawler 

15from crawler.by_source.dml_e_crawler import Dml_eCrawler 

16from crawler.by_source.dmlbul_crawler import DmlbulCrawler 

17from crawler.by_source.dmlcz_crawler import DmlczCrawler 

18from crawler.by_source.dmlpl_crawler import DmlplCrawler 

19from crawler.by_source.edpsci_crawler import EdpsciCrawler 

20from crawler.by_source.ejc_crawler import EjcCrawler 

21from crawler.by_source.elibm_crawler import ElibmCrawler 

22from crawler.by_source.emis_aas_crawler import Emis_aasCrawler 

23from crawler.by_source.emis_am_crawler import Emis_amCrawler 

24from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler 

25from crawler.by_source.ems_crawler import EmsCrawler 

26from crawler.by_source.episciences_crawler import EpisciencesCrawler 

27from crawler.by_source.eudml_crawler import EudmlCrawler 

28from crawler.by_source.geodesic_crawler import GeodesicCrawler 

29from crawler.by_source.hdml_crawler import HdmlCrawler 

30from crawler.by_source.heldermann_crawler import HeldermannCrawler 

31from crawler.by_source.impan_crawler import ImpanCrawler 

32from crawler.by_source.ipb_crawler import IpbCrawler 

33from crawler.by_source.isrp_crawler import IsrpCrawler 

34from crawler.by_source.jgaa_crawler import JgaaCrawler 

35from crawler.by_source.journalfi_crawler import JournalfiCrawler 

36from crawler.by_source.jsig_crawler import JsigCrawler 

37from crawler.by_source.lofpl_crawler import LofplCrawler 

38from crawler.by_source.mathbas_crawler import MathbasCrawler 

39from crawler.by_source.mathnetru_crawler import MathnetruCrawler 

40from crawler.by_source.msp_crawler import MspCrawler 

41from crawler.by_source.mta_crawler import MtaCrawler 

42from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler 

43from crawler.by_source.numdam_crawler import NumdamCrawler 

44from crawler.by_source.ptm_crawler import PtmCrawler 

45from crawler.by_source.rcm_crawler import RcmCrawler 

46from crawler.by_source.sasa_crawler import SasaCrawler 

47from crawler.by_source.scholastica_crawler import ScholasticaCrawler 

48from crawler.by_source.seio_crawler import SeioCrawler 

49from crawler.by_source.slc_crawler import Slc_Crawler 

50from crawler.by_source.tac_crawler import TacCrawler 

51 

52crawler_classes = ( 

53 AmcCrawler, 

54 AmiCrawler, 

55 AmpCrawler, 

56 Ams_eraamsCrawler, 

57 Ams_jamsCrawler, 

58 ArsiaCrawler, 

59 AsuoCrawler, 

60 AulfmCrawler, 

61 BdimCrawler, 

62 BmmsCrawler, 

63 CsisCrawler, 

64 Dml_eCrawler, 

65 DmlbulCrawler, 

66 DmlczCrawler, 

67 DmlplCrawler, 

68 EdpsciCrawler, 

69 EjcCrawler, 

70 ElibmCrawler, 

71 Emis_aasCrawler, 

72 Emis_amCrawler, 

73 Emis_hoaCrawler, 

74 EmsCrawler, 

75 EpisciencesCrawler, 

76 EudmlCrawler, 

77 GeodesicCrawler, 

78 HdmlCrawler, 

79 HeldermannCrawler, 

80 ImpanCrawler, 

81 IpbCrawler, 

82 IsrpCrawler, 

83 JgaaCrawler, 

84 JournalfiCrawler, 

85 JsigCrawler, 

86 LofplCrawler, 

87 MathbasCrawler, 

88 MathnetruCrawler, 

89 MspCrawler, 

90 MtaCrawler, 

91 NsjomCrawler, 

92 NumdamCrawler, 

93 PtmCrawler, 

94 RcmCrawler, 

95 SasaCrawler, 

96 ScholasticaCrawler, 

97 SeioCrawler, 

98 Slc_Crawler, 

99 TacCrawler, 

100 CupCrawler, 

101 AmucCrawler, 

102) 

103 

104crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

105 

106 

107def get_crawler_class(source: str): 

108 "source is the source domain" 

109 return crawler_classes_map.get(source, None) 

110 

111 

112def crawler_factory( 

113 source: str, 

114 colid: str, 

115 username: str, 

116 dry: bool = False, 

117 force_refresh=False, 

118 collection_url: str | None = None, 

119) -> BaseCollectionCrawler: 

120 """ 

121 Factory for the crawlers 

122 

123 :param source: the source domain 

124 :param colid: collection pid 

125 :param col_url: url of the collection web page 

126 :param username: 

127 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

128 :return: a crawler derived from base_crawler 

129 """ 

130 klass = get_crawler_class(source) 

131 

132 if klass is None: 132 ↛ 133line 132 didn't jump to line 133 because the condition on line 132 was never true

133 raise NotImplementedError 

134 

135 crawler = klass( 

136 collection_id=colid, 

137 username=username, 

138 dry=dry, 

139 force_refresh=force_refresh, 

140 collection_url=collection_url, 

141 ) 

142 

143 return crawler