Coverage for src / crawler / factory.py: 97%

64 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-06-19 13:33 +0000

1import time 

2from typing import TYPE_CHECKING 

3 

4from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler 

5from crawler.by_source.amc_crawler import AmcCrawler 

6from crawler.by_source.ami_crawler import AmiCrawler 

7from crawler.by_source.amp_crawler import AmpCrawler 

8from crawler.by_source.ams_crawler import AmsCrawler 

9from crawler.by_source.amuc_crawler import AmucCrawler 

10from crawler.by_source.arsia_crawler import ArsiaCrawler 

11from crawler.by_source.asuo_crawler import AsuoCrawler 

12from crawler.by_source.aulfm_crawler import AulfmCrawler 

13from crawler.by_source.bdim_crawler import BdimCrawler 

14from crawler.by_source.bmms_crawler import BmmsCrawler 

15from crawler.by_source.compositio_crawler import CompositioCrawler 

16from crawler.by_source.csis_crawler import CsisCrawler 

17from crawler.by_source.cup_crawler import CupCrawler 

18from crawler.by_source.dml_e_crawler import Dml_eCrawler 

19from crawler.by_source.dmlbul_crawler import DmlbulCrawler 

20from crawler.by_source.dmlcz_crawler import DmlczCrawler 

21from crawler.by_source.dmlpl_crawler import DmlplCrawler 

22from crawler.by_source.edpsci_crawler import EdpsciCrawler 

23from crawler.by_source.ejc_crawler import EjcCrawler 

24from crawler.by_source.elibm_crawler import ElibmCrawler 

25from crawler.by_source.emis_aas_crawler import Emis_aasCrawler 

26from crawler.by_source.emis_am_crawler import Emis_amCrawler 

27from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler 

28from crawler.by_source.ems_crawler import EmsCrawler 

29from crawler.by_source.episciences_crawler import EpisciencesCrawler 

30from crawler.by_source.eudml_crawler import EudmlCrawler 

31from crawler.by_source.geodesic_crawler import GeodesicCrawler 

32from crawler.by_source.hdml_crawler import HdmlCrawler 

33from crawler.by_source.heldermann_crawler import HeldermannCrawler 

34from crawler.by_source.impan_crawler import ImpanCrawler 

35from crawler.by_source.ipb_crawler import IpbCrawler 

36from crawler.by_source.isrp_crawler import IsrpCrawler 

37from crawler.by_source.j_stage_crawler import JStageCrawler 

38from crawler.by_source.jgaa_crawler import JgaaCrawler 

39from crawler.by_source.journalfi_crawler import JournalfiCrawler 

40from crawler.by_source.jsig_crawler import JsigCrawler 

41from crawler.by_source.kobe_archive_crawler import KobeArchiveCrawler 

42from crawler.by_source.lofpl_crawler import LofplCrawler 

43from crawler.by_source.mathbas_crawler import MathbasCrawler 

44from crawler.by_source.mathnetru_crawler import MathnetruCrawler 

45from crawler.by_source.msp_crawler import MspCrawler 

46from crawler.by_source.mta_crawler import MtaCrawler 

47from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler 

48from crawler.by_source.numdam_crawler import NumdamCrawler 

49from crawler.by_source.ptm_crawler import PtmCrawler 

50from crawler.by_source.rcm_crawler import RcmCrawler 

51from crawler.by_source.sasa_crawler import SasaCrawler 

52from crawler.by_source.scholastica_crawler import ScholasticaCrawler 

53from crawler.by_source.seio_crawler import SeioCrawler 

54from crawler.by_source.slc_crawler import Slc_Crawler 

55from crawler.by_source.tac_crawler import TacCrawler 

56 

57if TYPE_CHECKING: 

58 from collections.abc import Callable 

59 

60crawler_classes = ( 

61 AmcCrawler, 

62 AmiCrawler, 

63 AmpCrawler, 

64 AmsCrawler, 

65 ArsiaCrawler, 

66 AsuoCrawler, 

67 AulfmCrawler, 

68 BdimCrawler, 

69 BmmsCrawler, 

70 CsisCrawler, 

71 Dml_eCrawler, 

72 DmlbulCrawler, 

73 DmlczCrawler, 

74 DmlplCrawler, 

75 EdpsciCrawler, 

76 EjcCrawler, 

77 ElibmCrawler, 

78 Emis_aasCrawler, 

79 Emis_amCrawler, 

80 Emis_hoaCrawler, 

81 EmsCrawler, 

82 EpisciencesCrawler, 

83 EudmlCrawler, 

84 GeodesicCrawler, 

85 HdmlCrawler, 

86 HeldermannCrawler, 

87 ImpanCrawler, 

88 IpbCrawler, 

89 IsrpCrawler, 

90 JgaaCrawler, 

91 JournalfiCrawler, 

92 JsigCrawler, 

93 LofplCrawler, 

94 MathbasCrawler, 

95 MathnetruCrawler, 

96 MspCrawler, 

97 MtaCrawler, 

98 NsjomCrawler, 

99 NumdamCrawler, 

100 PtmCrawler, 

101 RcmCrawler, 

102 SasaCrawler, 

103 ScholasticaCrawler, 

104 SeioCrawler, 

105 Slc_Crawler, 

106 TacCrawler, 

107 CupCrawler, 

108 AmucCrawler, 

109 CompositioCrawler, 

110 KobeArchiveCrawler, 

111 JStageCrawler, 

112) 

113 

114crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

115 

116 

117def get_crawler_class(source: str): 

118 "source is the source domain" 

119 return crawler_classes_map.get(source, None) 

120 

121 

122def crawler_factory( 

123 source: str, 

124 colid: str, 

125 username: str, 

126 dry: bool = False, 

127 force_refresh=False, 

128 collection_url: str | None = None, 

129 backend: str | None = None, 

130 pause_function: "Callable[[float], None]" = staticmethod(time.sleep), 

131) -> BaseCollectionCrawler: 

132 """ 

133 Factory for the crawlers 

134 

135 :param source: the source domain 

136 :param colid: collection pid 

137 :param col_url: url of the collection web page 

138 :param username: 

139 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

140 :return: a crawler derived from base_crawler 

141 """ 

142 klass = get_crawler_class(source) 

143 

144 if klass is None: 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true

145 raise NotImplementedError 

146 

147 crawler = klass( 

148 collection_id=colid, 

149 username=username, 

150 dry=dry, 

151 force_refresh=force_refresh, 

152 collection_url=collection_url, 

153 backend=backend, 

154 pause_function=pause_function, 

155 ) 

156 

157 return crawler