Coverage for src/crawler/factory.py: 96%
48 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:36 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:36 +0000
1from crawler.by_source.isrp_crawler import IsrpCrawler
2from crawler.by_source.ptm_crawler import PtmCrawler
4from .base_crawler import BaseCollectionCrawler
5from .by_source.amc_crawler import AmcCrawler
6from .by_source.ami_crawler import AmiCrawler
7from .by_source.amp_crawler import AmpCrawler
8from .by_source.ams_crawler import AmsCrawler
9from .by_source.arsia_crawler import ArsiaCrawler
10from .by_source.asuo_crawler import AsuoCrawler
11from .by_source.aulfm_crawler import AulfmCrawler
12from .by_source.bdim_crawler import BdimCrawler
13from .by_source.csis_crawler import CsisCrawler
14from .by_source.da_crawler import DaCrawler
15from .by_source.dml_e_crawler import Dml_eCrawler
16from .by_source.dmlbul_crawler import DmlbulCrawler
17from .by_source.dmlcz_crawler import DmlczCrawler
18from .by_source.dmlpl_crawler import DmlplCrawler
19from .by_source.edpsci_crawler import EdpsciCrawler
20from .by_source.elibm_crawler import ElibmCrawler
21from .by_source.emis_crawler import EmisCrawler
22from .by_source.ems_crawler import EmsCrawler
23from .by_source.episciences_crawler import EpisciencesCrawler
24from .by_source.eudml_crawler import EudmlCrawler
25from .by_source.hdml_crawler import HdmlCrawler
26from .by_source.heldermann_crawler import HeldermannCrawler
27from .by_source.impan_crawler import ImpanCrawler
28from .by_source.ipb_crawler import IpbCrawler
29from .by_source.jgaa_crawler import JgaaCrawler
30from .by_source.journalfi_crawler import JournalfiCrawler
31from .by_source.lofpl_crawler import LofplCrawler
32from .by_source.mathbas_crawler import MathbasCrawler
33from .by_source.mathnetru_crawler import MathnetruCrawler
34from .by_source.msp_crawler import MspCrawler
35from .by_source.nsjom_crawler import NsjomCrawler
36from .by_source.rcm_crawler import RcmCrawler
37from .by_source.sasa_crawler import SasaCrawler
38from .by_source.seio_crawler import SeioCrawler
39from .by_source.tac_crawler import TacCrawler
41crawler_classes = (
42 AmcCrawler,
43 AmiCrawler,
44 AmpCrawler,
45 AmsCrawler,
46 AsuoCrawler,
47 ArsiaCrawler,
48 AulfmCrawler,
49 BdimCrawler,
50 CsisCrawler,
51 DaCrawler,
52 Dml_eCrawler,
53 DmlbulCrawler,
54 DmlczCrawler,
55 DmlplCrawler,
56 EdpsciCrawler,
57 EmsCrawler,
58 EpisciencesCrawler,
59 ElibmCrawler,
60 EmisCrawler,
61 EudmlCrawler,
62 HdmlCrawler,
63 HeldermannCrawler,
64 ImpanCrawler,
65 IpbCrawler,
66 IsrpCrawler,
67 JgaaCrawler,
68 JournalfiCrawler,
69 LofplCrawler,
70 MathbasCrawler,
71 MathnetruCrawler,
72 MspCrawler,
73 NsjomCrawler,
74 PtmCrawler,
75 RcmCrawler,
76 SasaCrawler,
77 SeioCrawler,
78 TacCrawler,
79)
81crawler_classes_map = {c.source_domain: c for c in crawler_classes}
84def get_crawler_class(source):
85 return crawler_classes_map.get(source, None)
88def crawler_factory(
89 source: str,
90 colid: str,
91 col_url: str,
92 username: str,
93 test_mode: bool = False,
94 force_refresh=False,
95) -> BaseCollectionCrawler:
96 """
97 Factory for the crawlers
99 :param source: "Eudml"
100 :param colid: collection pid
101 :param col_url: url of the collection web page
102 :param username:
103 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
104 :return: a crawler derived from base_crawler
105 """
106 klass = get_crawler_class(source)
108 if klass is None: 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true
109 raise NotImplementedError
111 crawler = klass(
112 collection_id=colid,
113 collection_url=col_url,
114 username=username,
115 test_mode=test_mode,
116 force_refresh=force_refresh,
117 )
119 return crawler