Coverage for src/crawler/factory.py: 96%

55 statements  

« prev     ^ index     » next       coverage.py v7.9.0, created at 2025-07-30 09:47 +0000

1from crawler.by_source.advc_crawler import AdvcCrawler 

2from crawler.by_source.cambridge_crawler import CambridgeCrawler 

3from crawler.by_source.emis_aas_crawler import Emis_aasCrawler 

4from crawler.by_source.isrp_crawler import IsrpCrawler 

5from crawler.by_source.jsig_crawler import JsigCrawler 

6from crawler.by_source.ptm_crawler import PtmCrawler 

7from crawler.by_source.slc_crawler import Slc_Crawler 

8 

9from .base_crawler import BaseCollectionCrawler 

10from .by_source.amc_crawler import AmcCrawler 

11from .by_source.ami_crawler import AmiCrawler 

12from .by_source.amp_crawler import AmpCrawler 

13from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler 

14from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler 

15from .by_source.arsia_crawler import ArsiaCrawler 

16from .by_source.asuo_crawler import AsuoCrawler 

17from .by_source.aulfm_crawler import AulfmCrawler 

18from .by_source.bdim_crawler import BdimCrawler 

19from .by_source.csis_crawler import CsisCrawler 

20from .by_source.da_crawler import DaCrawler 

21from .by_source.dml_e_crawler import Dml_eCrawler 

22from .by_source.dmlbul_crawler import DmlbulCrawler 

23from .by_source.dmlcz_crawler import DmlczCrawler 

24from .by_source.dmlpl_crawler import DmlplCrawler 

25from .by_source.edpsci_crawler import EdpsciCrawler 

26from .by_source.elibm_crawler import ElibmCrawler 

27from .by_source.emis_am_crawler import Emis_amCrawler 

28from .by_source.emis_hoa_crawler import Emis_hoaCrawler 

29from .by_source.ems_crawler import EmsCrawler 

30from .by_source.episciences_crawler import EpisciencesCrawler 

31from .by_source.eudml_crawler import EudmlCrawler 

32from .by_source.hdml_crawler import HdmlCrawler 

33from .by_source.heldermann_crawler import HeldermannCrawler 

34from .by_source.impan_crawler import ImpanCrawler 

35from .by_source.ipb_crawler import IpbCrawler 

36from .by_source.jgaa_crawler import JgaaCrawler 

37from .by_source.journalfi_crawler import JournalfiCrawler 

38from .by_source.lofpl_crawler import LofplCrawler 

39from .by_source.mathbas_crawler import MathbasCrawler 

40from .by_source.mathnetru_crawler import MathnetruCrawler 

41from .by_source.msp_crawler import MspCrawler 

42from .by_source.nsjom.nsjom_crawler import NsjomCrawler 

43from .by_source.rcm_crawler import RcmCrawler 

44from .by_source.sasa_crawler import SasaCrawler 

45from .by_source.seio_crawler import SeioCrawler 

46from .by_source.tac_crawler import TacCrawler 

47 

48crawler_classes = ( 

49 AdvcCrawler, 

50 AmcCrawler, 

51 AmiCrawler, 

52 AmpCrawler, 

53 Ams_eraamsCrawler, 

54 Ams_jamsCrawler, 

55 AsuoCrawler, 

56 ArsiaCrawler, 

57 AulfmCrawler, 

58 BdimCrawler, 

59 CambridgeCrawler, 

60 CsisCrawler, 

61 DaCrawler, 

62 Dml_eCrawler, 

63 DmlbulCrawler, 

64 DmlczCrawler, 

65 DmlplCrawler, 

66 EdpsciCrawler, 

67 EmsCrawler, 

68 EpisciencesCrawler, 

69 ElibmCrawler, 

70 Emis_amCrawler, 

71 Emis_aasCrawler, 

72 Emis_hoaCrawler, 

73 EudmlCrawler, 

74 HdmlCrawler, 

75 HeldermannCrawler, 

76 ImpanCrawler, 

77 IpbCrawler, 

78 IsrpCrawler, 

79 JgaaCrawler, 

80 JsigCrawler, 

81 JournalfiCrawler, 

82 LofplCrawler, 

83 MathbasCrawler, 

84 MathnetruCrawler, 

85 MspCrawler, 

86 NsjomCrawler, 

87 PtmCrawler, 

88 RcmCrawler, 

89 SasaCrawler, 

90 SeioCrawler, 

91 Slc_Crawler, 

92 TacCrawler, 

93) 

94 

95crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

96 

97 

98def get_crawler_class(source): 

99 return crawler_classes_map.get(source, None) 

100 

101 

102def crawler_factory( 

103 source: str, 

104 colid: str, 

105 col_url: str, 

106 username: str, 

107 test_mode: bool = False, 

108 force_refresh=False, 

109) -> BaseCollectionCrawler: 

110 """ 

111 Factory for the crawlers 

112 

113 :param source: "Eudml" 

114 :param colid: collection pid 

115 :param col_url: url of the collection web page 

116 :param username: 

117 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

118 :return: a crawler derived from base_crawler 

119 """ 

120 klass = get_crawler_class(source) 

121 

122 if klass is None: 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true

123 raise NotImplementedError 

124 

125 crawler = klass( 

126 collection_id=colid, 

127 collection_url=col_url, 

128 username=username, 

129 test_mode=test_mode, 

130 force_refresh=force_refresh, 

131 ) 

132 

133 return crawler