Coverage for src/crawler/factory.py: 96%
49 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-03 13:39 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-03 13:39 +0000
1from crawler.by_source.isrp_crawler import IsrpCrawler
2from crawler.by_source.ptm_crawler import PtmCrawler
4from .base_crawler import BaseCollectionCrawler
5from .by_source.amc_crawler import AmcCrawler
6from .by_source.ami_crawler import AmiCrawler
7from .by_source.amp_crawler import AmpCrawler
8from .by_source.ams_crawler import AmsCrawler
9from .by_source.arsia_crawler import ArsiaCrawler
10from .by_source.asuo_crawler import AsuoCrawler
11from .by_source.aulfm_crawler import AulfmCrawler
12from .by_source.bdim_crawler import BdimCrawler
13from .by_source.csis_crawler import CsisCrawler
14from .by_source.da_crawler import DaCrawler
15from .by_source.dml_e_crawler import Dml_eCrawler
16from .by_source.dmlbul_crawler import DmlbulCrawler
17from .by_source.dmlcz_crawler import DmlczCrawler
18from .by_source.dmlpl_crawler import DmlplCrawler
19from .by_source.edpsci_crawler import EdpsciCrawler
20from .by_source.elibm_crawler import ElibmCrawler
21from .by_source.emis_aaa_crawler import Emis_aaaCrawler
22from .by_source.emis_am_crawler import Emis_amCrawler
23from .by_source.ems_crawler import EmsCrawler
24from .by_source.episciences_crawler import EpisciencesCrawler
25from .by_source.eudml_crawler import EudmlCrawler
26from .by_source.hdml_crawler import HdmlCrawler
27from .by_source.heldermann_crawler import HeldermannCrawler
28from .by_source.impan_crawler import ImpanCrawler
29from .by_source.ipb_crawler import IpbCrawler
30from .by_source.jgaa_crawler import JgaaCrawler
31from .by_source.journalfi_crawler import JournalfiCrawler
32from .by_source.lofpl_crawler import LofplCrawler
33from .by_source.mathbas_crawler import MathbasCrawler
34from .by_source.mathnetru_crawler import MathnetruCrawler
35from .by_source.msp_crawler import MspCrawler
36from .by_source.nsjom_crawler import NsjomCrawler
37from .by_source.rcm_crawler import RcmCrawler
38from .by_source.sasa_crawler import SasaCrawler
39from .by_source.seio_crawler import SeioCrawler
40from .by_source.tac_crawler import TacCrawler
42crawler_classes = (
43 AmcCrawler,
44 AmiCrawler,
45 AmpCrawler,
46 AmsCrawler,
47 AsuoCrawler,
48 ArsiaCrawler,
49 AulfmCrawler,
50 BdimCrawler,
51 CsisCrawler,
52 DaCrawler,
53 Dml_eCrawler,
54 DmlbulCrawler,
55 DmlczCrawler,
56 DmlplCrawler,
57 EdpsciCrawler,
58 EmsCrawler,
59 EpisciencesCrawler,
60 ElibmCrawler,
61 Emis_aaaCrawler,
62 Emis_amCrawler,
63 EudmlCrawler,
64 HdmlCrawler,
65 HeldermannCrawler,
66 ImpanCrawler,
67 IpbCrawler,
68 IsrpCrawler,
69 JgaaCrawler,
70 JournalfiCrawler,
71 LofplCrawler,
72 MathbasCrawler,
73 MathnetruCrawler,
74 MspCrawler,
75 NsjomCrawler,
76 PtmCrawler,
77 RcmCrawler,
78 SasaCrawler,
79 SeioCrawler,
80 TacCrawler,
81)
83crawler_classes_map = {c.source_domain: c for c in crawler_classes}
86def get_crawler_class(source):
87 return crawler_classes_map.get(source, None)
90def crawler_factory(
91 source: str,
92 colid: str,
93 col_url: str,
94 username: str,
95 test_mode: bool = False,
96 force_refresh=False,
97) -> BaseCollectionCrawler:
98 """
99 Factory for the crawlers
101 :param source: "Eudml"
102 :param colid: collection pid
103 :param col_url: url of the collection web page
104 :param username:
105 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
106 :return: a crawler derived from base_crawler
107 """
108 klass = get_crawler_class(source)
110 if klass is None: 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true
111 raise NotImplementedError
113 crawler = klass(
114 collection_id=colid,
115 collection_url=col_url,
116 username=username,
117 test_mode=test_mode,
118 force_refresh=force_refresh,
119 )
121 return crawler