Coverage for src/crawler/factory.py: 96%

46 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2025-02-14 14:36 +0000

1from crawler.by_source.isrp_crawler import IsrpCrawler 

2from crawler.by_source.ptm_crawler import PtmCrawler 

3 

4from .base_crawler import BaseCollectionCrawler 

5from .by_source.amc_crawler import AmcCrawler 

6from .by_source.ami_crawler import AmiCrawler 

7from .by_source.amp_crawler import AmpCrawler 

8from .by_source.ams_crawler import AmsCrawler 

9from .by_source.arsia_crawler import ArsiaCrawler 

10from .by_source.asuo_crawler import AsuoCrawler 

11from .by_source.aulfm_crawler import AulfmCrawler 

12from .by_source.bdim_crawler import BdimCrawler 

13from .by_source.csis_crawler import CsisCrawler 

14from .by_source.da_crawler import DaCrawler 

15from .by_source.dml_e_crawler import Dml_eCrawler 

16from .by_source.dmlbul_crawler import DmlbulCrawler 

17from .by_source.dmlcz_crawler import DmlczCrawler 

18from .by_source.dmlpl_crawler import DmlplCrawler 

19from .by_source.edpsci_crawler import EdpsciCrawler 

20from .by_source.elibm_crawler import ElibmCrawler 

21from .by_source.ems_crawler import EmsCrawler 

22from .by_source.episciences_crawler import EpisciencesCrawler 

23from .by_source.eudml_crawler import EudmlCrawler 

24from .by_source.hdml_crawler import HdmlCrawler 

25from .by_source.heldermann_crawler import HeldermannCrawler 

26from .by_source.impan_crawler import ImpanCrawler 

27from .by_source.ipb_crawler import IpbCrawler 

28from .by_source.jgaa_crawler import JgaaCrawler 

29from .by_source.journalfi_crawler import JournalfiCrawler 

30from .by_source.lofpl_crawler import LofplCrawler 

31from .by_source.mathbas_crawler import MathbasCrawler 

32from .by_source.mathnetru_crawler import MathnetruCrawler 

33from .by_source.msp_crawler import MspCrawler 

34from .by_source.nsjom_crawler import NsjomCrawler 

35from .by_source.rcm_crawler import RcmCrawler 

36from .by_source.sasa_crawler import SasaCrawler 

37from .by_source.seio_crawler import SeioCrawler 

38from .by_source.tac_crawler import TacCrawler 

39 

40crawler_classes = ( 

41 AmcCrawler, 

42 AmiCrawler, 

43 AmpCrawler, 

44 AmsCrawler, 

45 AsuoCrawler, 

46 ArsiaCrawler, 

47 AulfmCrawler, 

48 BdimCrawler, 

49 CsisCrawler, 

50 DaCrawler, 

51 Dml_eCrawler, 

52 DmlbulCrawler, 

53 DmlczCrawler, 

54 DmlplCrawler, 

55 EdpsciCrawler, 

56 EmsCrawler, 

57 EpisciencesCrawler, 

58 ElibmCrawler, 

59 EudmlCrawler, 

60 HdmlCrawler, 

61 HeldermannCrawler, 

62 ImpanCrawler, 

63 IpbCrawler, 

64 IsrpCrawler, 

65 JgaaCrawler, 

66 JournalfiCrawler, 

67 LofplCrawler, 

68 MathbasCrawler, 

69 MathnetruCrawler, 

70 MspCrawler, 

71 NsjomCrawler, 

72 PtmCrawler, 

73 RcmCrawler, 

74 SasaCrawler, 

75 SeioCrawler, 

76 TacCrawler, 

77) 

78 

79 

80# Note : a mapping could be better than an iterable. 

81def get_crawler_class(source): 

82 return next((c for c in crawler_classes if source == c.source_domain), None) 

83 

84 

85def crawler_factory( 

86 source: str, 

87 colid: str, 

88 col_url: str, 

89 username: str, 

90 test_mode: bool = False, 

91) -> BaseCollectionCrawler: 

92 """ 

93 Factory for the crawlers 

94 

95 :param source: "Eudml" 

96 :param colid: collection pid 

97 :param col_url: url of the collection web page 

98 :param username: 

99 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

100 :return: a crawler derived from base_crawler 

101 """ 

102 klass = get_crawler_class(source) 

103 

104 if klass is None: 104 ↛ 105line 104 didn't jump to line 105 because the condition on line 104 was never true

105 raise NotImplementedError 

106 

107 crawler = klass( 

108 collection_id=colid, 

109 collection_url=col_url, 

110 username=username, 

111 test_mode=test_mode, 

112 ) 

113 

114 return crawler