Coverage for src/crawler/factory.py: 97%
57 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-10-29 14:25 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-10-29 14:25 +0000
1from crawler.by_source.advc_crawler import AdvcCrawler
2from crawler.by_source.cambridge_crawler import CambridgeCrawler
3from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
4from crawler.by_source.isrp_crawler import IsrpCrawler
5from crawler.by_source.jsig_crawler import JsigCrawler
6from crawler.by_source.numdam_crawler import NumdamCrawler
7from crawler.by_source.ptm_crawler import PtmCrawler
8from crawler.by_source.slc_crawler import Slc_Crawler
10from .base_crawler import BaseCollectionCrawler
11from .by_source.amc_crawler import AmcCrawler
12from .by_source.ami_crawler import AmiCrawler
13from .by_source.amp_crawler import AmpCrawler
14from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
15from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler
16from .by_source.arsia_crawler import ArsiaCrawler
17from .by_source.asuo_crawler import AsuoCrawler
18from .by_source.aulfm_crawler import AulfmCrawler
19from .by_source.bdim_crawler import BdimCrawler
20from .by_source.bmms_crawler import BmmsCrawler
21from .by_source.csis_crawler import CsisCrawler
22from .by_source.da_crawler import DaCrawler
23from .by_source.dml_e_crawler import Dml_eCrawler
24from .by_source.dmlbul_crawler import DmlbulCrawler
25from .by_source.dmlcz_crawler import DmlczCrawler
26from .by_source.dmlpl_crawler import DmlplCrawler
27from .by_source.edpsci_crawler import EdpsciCrawler
28from .by_source.elibm_crawler import ElibmCrawler
29from .by_source.emis_am_crawler import Emis_amCrawler
30from .by_source.emis_hoa_crawler import Emis_hoaCrawler
31from .by_source.ems_crawler import EmsCrawler
32from .by_source.episciences_crawler import EpisciencesCrawler
33from .by_source.eudml_crawler import EudmlCrawler
34from .by_source.hdml_crawler import HdmlCrawler
35from .by_source.heldermann_crawler import HeldermannCrawler
36from .by_source.impan_crawler import ImpanCrawler
37from .by_source.ipb_crawler import IpbCrawler
38from .by_source.jgaa_crawler import JgaaCrawler
39from .by_source.journalfi_crawler import JournalfiCrawler
40from .by_source.lofpl_crawler import LofplCrawler
41from .by_source.mathbas_crawler import MathbasCrawler
42from .by_source.mathnetru_crawler import MathnetruCrawler
43from .by_source.msp_crawler import MspCrawler
44from .by_source.nsjom.nsjom_crawler import NsjomCrawler
45from .by_source.rcm_crawler import RcmCrawler
46from .by_source.sasa_crawler import SasaCrawler
47from .by_source.seio_crawler import SeioCrawler
48from .by_source.tac_crawler import TacCrawler
50crawler_classes = (
51 AdvcCrawler,
52 AmcCrawler,
53 AmiCrawler,
54 AmpCrawler,
55 Ams_eraamsCrawler,
56 Ams_jamsCrawler,
57 AsuoCrawler,
58 ArsiaCrawler,
59 AulfmCrawler,
60 BdimCrawler,
61 BmmsCrawler,
62 CambridgeCrawler,
63 CsisCrawler,
64 DaCrawler,
65 Dml_eCrawler,
66 DmlbulCrawler,
67 DmlczCrawler,
68 DmlplCrawler,
69 EdpsciCrawler,
70 EmsCrawler,
71 EpisciencesCrawler,
72 ElibmCrawler,
73 Emis_amCrawler,
74 Emis_aasCrawler,
75 Emis_hoaCrawler,
76 EudmlCrawler,
77 HdmlCrawler,
78 HeldermannCrawler,
79 ImpanCrawler,
80 IpbCrawler,
81 IsrpCrawler,
82 JgaaCrawler,
83 JsigCrawler,
84 JournalfiCrawler,
85 LofplCrawler,
86 MathbasCrawler,
87 MathnetruCrawler,
88 MspCrawler,
89 NsjomCrawler,
90 NumdamCrawler,
91 PtmCrawler,
92 RcmCrawler,
93 SasaCrawler,
94 SeioCrawler,
95 Slc_Crawler,
96 TacCrawler,
97)
99crawler_classes_map = {c.source_domain: c for c in crawler_classes}
102def get_crawler_class(source: str):
103 return crawler_classes_map.get(source, None)
106def crawler_factory(
107 source: str,
108 colid: str,
109 col_url: str,
110 username: str,
111 test_mode: bool = False,
112 force_refresh=False,
113) -> BaseCollectionCrawler:
114 """
115 Factory for the crawlers
117 :param source: "Eudml"
118 :param colid: collection pid
119 :param col_url: url of the collection web page
120 :param username:
121 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
122 :return: a crawler derived from base_crawler
123 """
124 klass = get_crawler_class(source)
126 if klass is None: 126 ↛ 127line 126 didn't jump to line 127 because the condition on line 126 was never true
127 raise NotImplementedError
129 crawler = klass(
130 collection_id=colid,
131 collection_url=col_url,
132 username=username,
133 test_mode=test_mode,
134 force_refresh=force_refresh,
135 )
137 return crawler