Coverage for src/crawler/factory.py: 97%
58 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-11-21 14:41 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-11-21 14:41 +0000
1from crawler.by_source.advc_crawler import AdvcCrawler
2from crawler.by_source.cambridge_crawler import CambridgeCrawler
3from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
4from crawler.by_source.geodesic_crawler import GeodesicCrawler
5from crawler.by_source.isrp_crawler import IsrpCrawler
6from crawler.by_source.jsig_crawler import JsigCrawler
7from crawler.by_source.numdam_crawler import NumdamCrawler
8from crawler.by_source.ptm_crawler import PtmCrawler
9from crawler.by_source.slc_crawler import Slc_Crawler
11from .base_crawler import BaseCollectionCrawler
12from .by_source.amc_crawler import AmcCrawler
13from .by_source.ami_crawler import AmiCrawler
14from .by_source.amp_crawler import AmpCrawler
15from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
16from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler
17from .by_source.arsia_crawler import ArsiaCrawler
18from .by_source.asuo_crawler import AsuoCrawler
19from .by_source.aulfm_crawler import AulfmCrawler
20from .by_source.bdim_crawler import BdimCrawler
21from .by_source.bmms_crawler import BmmsCrawler
22from .by_source.csis_crawler import CsisCrawler
23from .by_source.da_crawler import DaCrawler
24from .by_source.dml_e_crawler import Dml_eCrawler
25from .by_source.dmlbul_crawler import DmlbulCrawler
26from .by_source.dmlcz_crawler import DmlczCrawler
27from .by_source.dmlpl_crawler import DmlplCrawler
28from .by_source.edpsci_crawler import EdpsciCrawler
29from .by_source.elibm_crawler import ElibmCrawler
30from .by_source.emis_am_crawler import Emis_amCrawler
31from .by_source.emis_hoa_crawler import Emis_hoaCrawler
32from .by_source.ems_crawler import EmsCrawler
33from .by_source.episciences_crawler import EpisciencesCrawler
34from .by_source.eudml_crawler import EudmlCrawler
35from .by_source.hdml_crawler import HdmlCrawler
36from .by_source.heldermann_crawler import HeldermannCrawler
37from .by_source.impan_crawler import ImpanCrawler
38from .by_source.ipb_crawler import IpbCrawler
39from .by_source.jgaa_crawler import JgaaCrawler
40from .by_source.journalfi_crawler import JournalfiCrawler
41from .by_source.lofpl_crawler import LofplCrawler
42from .by_source.mathbas_crawler import MathbasCrawler
43from .by_source.mathnetru_crawler import MathnetruCrawler
44from .by_source.msp_crawler import MspCrawler
45from .by_source.nsjom.nsjom_crawler import NsjomCrawler
46from .by_source.rcm_crawler import RcmCrawler
47from .by_source.sasa_crawler import SasaCrawler
48from .by_source.seio_crawler import SeioCrawler
49from .by_source.tac_crawler import TacCrawler
51crawler_classes = (
52 AdvcCrawler,
53 AmcCrawler,
54 AmiCrawler,
55 AmpCrawler,
56 Ams_eraamsCrawler,
57 Ams_jamsCrawler,
58 AsuoCrawler,
59 ArsiaCrawler,
60 AulfmCrawler,
61 BdimCrawler,
62 BmmsCrawler,
63 CambridgeCrawler,
64 CsisCrawler,
65 DaCrawler,
66 Dml_eCrawler,
67 DmlbulCrawler,
68 DmlczCrawler,
69 DmlplCrawler,
70 EdpsciCrawler,
71 EmsCrawler,
72 EpisciencesCrawler,
73 ElibmCrawler,
74 Emis_amCrawler,
75 Emis_aasCrawler,
76 Emis_hoaCrawler,
77 EudmlCrawler,
78 GeodesicCrawler,
79 HdmlCrawler,
80 HeldermannCrawler,
81 ImpanCrawler,
82 IpbCrawler,
83 IsrpCrawler,
84 JgaaCrawler,
85 JsigCrawler,
86 JournalfiCrawler,
87 LofplCrawler,
88 MathbasCrawler,
89 MathnetruCrawler,
90 MspCrawler,
91 NsjomCrawler,
92 NumdamCrawler,
93 PtmCrawler,
94 RcmCrawler,
95 SasaCrawler,
96 SeioCrawler,
97 Slc_Crawler,
98 TacCrawler,
99)
101crawler_classes_map = {c.source_domain: c for c in crawler_classes}
104def get_crawler_class(source: str):
105 return crawler_classes_map.get(source, None)
108def crawler_factory(
109 source: str,
110 colid: str,
111 username: str,
112 dry: bool = False,
113 force_refresh=False,
114 collection_url: str | None = None,
115) -> BaseCollectionCrawler:
116 """
117 Factory for the crawlers
119 :param source: "Eudml"
120 :param colid: collection pid
121 :param col_url: url of the collection web page
122 :param username:
123 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
124 :return: a crawler derived from base_crawler
125 """
126 klass = get_crawler_class(source)
128 if klass is None: 128 ↛ 129line 128 didn't jump to line 129 because the condition on line 128 was never true
129 raise NotImplementedError
131 crawler = klass(
132 collection_id=colid,
133 username=username,
134 dry=dry,
135 force_refresh=force_refresh,
136 collection_url=collection_url,
137 )
139 return crawler