Coverage for src/crawler/factory.py: 96%

50 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-16 07:44 +0000

1from crawler.by_source.isrp_crawler import IsrpCrawler 

2from crawler.by_source.ptm_crawler import PtmCrawler 

3from crawler.by_source.slc_crawler import Slc_Crawler 

4 

5from .base_crawler import BaseCollectionCrawler 

6from .by_source.amc_crawler import AmcCrawler 

7from .by_source.ami_crawler import AmiCrawler 

8from .by_source.amp_crawler import AmpCrawler 

9from .by_source.ams_crawler import AmsCrawler 

10from .by_source.arsia_crawler import ArsiaCrawler 

11from .by_source.asuo_crawler import AsuoCrawler 

12from .by_source.aulfm_crawler import AulfmCrawler 

13from .by_source.bdim_crawler import BdimCrawler 

14from .by_source.csis_crawler import CsisCrawler 

15from .by_source.da_crawler import DaCrawler 

16from .by_source.dml_e_crawler import Dml_eCrawler 

17from .by_source.dmlbul_crawler import DmlbulCrawler 

18from .by_source.dmlcz_crawler import DmlczCrawler 

19from .by_source.dmlpl_crawler import DmlplCrawler 

20from .by_source.edpsci_crawler import EdpsciCrawler 

21from .by_source.elibm_crawler import ElibmCrawler 

22from .by_source.emis_aaa_crawler import Emis_aaaCrawler 

23from .by_source.emis_am_crawler import Emis_amCrawler 

24from .by_source.ems_crawler import EmsCrawler 

25from .by_source.episciences_crawler import EpisciencesCrawler 

26from .by_source.eudml_crawler import EudmlCrawler 

27from .by_source.hdml_crawler import HdmlCrawler 

28from .by_source.heldermann_crawler import HeldermannCrawler 

29from .by_source.impan_crawler import ImpanCrawler 

30from .by_source.ipb_crawler import IpbCrawler 

31from .by_source.jgaa_crawler import JgaaCrawler 

32from .by_source.journalfi_crawler import JournalfiCrawler 

33from .by_source.lofpl_crawler import LofplCrawler 

34from .by_source.mathbas_crawler import MathbasCrawler 

35from .by_source.mathnetru_crawler import MathnetruCrawler 

36from .by_source.msp_crawler import MspCrawler 

37from .by_source.nsjom_crawler import NsjomCrawler 

38from .by_source.rcm_crawler import RcmCrawler 

39from .by_source.sasa_crawler import SasaCrawler 

40from .by_source.seio_crawler import SeioCrawler 

41from .by_source.tac_crawler import TacCrawler 

42 

43crawler_classes = ( 

44 AmcCrawler, 

45 AmiCrawler, 

46 AmpCrawler, 

47 AmsCrawler, 

48 AsuoCrawler, 

49 ArsiaCrawler, 

50 AulfmCrawler, 

51 BdimCrawler, 

52 CsisCrawler, 

53 DaCrawler, 

54 Dml_eCrawler, 

55 DmlbulCrawler, 

56 DmlczCrawler, 

57 DmlplCrawler, 

58 EdpsciCrawler, 

59 EmsCrawler, 

60 EpisciencesCrawler, 

61 ElibmCrawler, 

62 Emis_aaaCrawler, 

63 Emis_amCrawler, 

64 EudmlCrawler, 

65 HdmlCrawler, 

66 HeldermannCrawler, 

67 ImpanCrawler, 

68 IpbCrawler, 

69 IsrpCrawler, 

70 JgaaCrawler, 

71 JournalfiCrawler, 

72 LofplCrawler, 

73 MathbasCrawler, 

74 MathnetruCrawler, 

75 MspCrawler, 

76 NsjomCrawler, 

77 PtmCrawler, 

78 RcmCrawler, 

79 SasaCrawler, 

80 SeioCrawler, 

81 Slc_Crawler, 

82 TacCrawler, 

83) 

84 

85crawler_classes_map = {c.source_domain: c for c in crawler_classes} 

86 

87 

88def get_crawler_class(source): 

89 return crawler_classes_map.get(source, None) 

90 

91 

92def crawler_factory( 

93 source: str, 

94 colid: str, 

95 col_url: str, 

96 username: str, 

97 test_mode: bool = False, 

98 force_refresh=False, 

99) -> BaseCollectionCrawler: 

100 """ 

101 Factory for the crawlers 

102 

103 :param source: "Eudml" 

104 :param colid: collection pid 

105 :param col_url: url of the collection web page 

106 :param username: 

107 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

108 :return: a crawler derived from base_crawler 

109 """ 

110 klass = get_crawler_class(source) 

111 

112 if klass is None: 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true

113 raise NotImplementedError 

114 

115 crawler = klass( 

116 collection_id=colid, 

117 collection_url=col_url, 

118 username=username, 

119 test_mode=test_mode, 

120 force_refresh=force_refresh, 

121 ) 

122 

123 return crawler