Coverage for src / crawler / factory.py: 97%
61 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-04-08 09:35 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-04-08 09:35 +0000
1from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler
2from crawler.by_source.amc_crawler import AmcCrawler
3from crawler.by_source.ami_crawler import AmiCrawler
4from crawler.by_source.amp_crawler import AmpCrawler
5from crawler.by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
6from crawler.by_source.ams.ams_jams_crawler import Ams_jamsCrawler
7from crawler.by_source.amuc_crawler import AmucCrawler
8from crawler.by_source.arsia_crawler import ArsiaCrawler
9from crawler.by_source.asuo_crawler import AsuoCrawler
10from crawler.by_source.aulfm_crawler import AulfmCrawler
11from crawler.by_source.bdim_crawler import BdimCrawler
12from crawler.by_source.bmms_crawler import BmmsCrawler
13from crawler.by_source.cambridge_crawler import CambridgeCrawler
14from crawler.by_source.csis_crawler import CsisCrawler
15from crawler.by_source.cup_crawler import CupCrawler
16from crawler.by_source.dml_e_crawler import Dml_eCrawler
17from crawler.by_source.dmlbul_crawler import DmlbulCrawler
18from crawler.by_source.dmlcz_crawler import DmlczCrawler
19from crawler.by_source.dmlpl_crawler import DmlplCrawler
20from crawler.by_source.edpsci_crawler import EdpsciCrawler
21from crawler.by_source.ejc_crawler import EjcCrawler
22from crawler.by_source.elibm_crawler import ElibmCrawler
23from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
24from crawler.by_source.emis_am_crawler import Emis_amCrawler
25from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler
26from crawler.by_source.ems_crawler import EmsCrawler
27from crawler.by_source.episciences_crawler import EpisciencesCrawler
28from crawler.by_source.eudml_crawler import EudmlCrawler
29from crawler.by_source.geodesic_crawler import GeodesicCrawler
30from crawler.by_source.hdml_crawler import HdmlCrawler
31from crawler.by_source.heldermann_crawler import HeldermannCrawler
32from crawler.by_source.impan_crawler import ImpanCrawler
33from crawler.by_source.ipb_crawler import IpbCrawler
34from crawler.by_source.isrp_crawler import IsrpCrawler
35from crawler.by_source.jgaa_crawler import JgaaCrawler
36from crawler.by_source.journalfi_crawler import JournalfiCrawler
37from crawler.by_source.jsig_crawler import JsigCrawler
38from crawler.by_source.lofpl_crawler import LofplCrawler
39from crawler.by_source.mathbas_crawler import MathbasCrawler
40from crawler.by_source.mathnetru_crawler import MathnetruCrawler
41from crawler.by_source.msp_crawler import MspCrawler
42from crawler.by_source.mta_crawler import MtaCrawler
43from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler
44from crawler.by_source.numdam_crawler import NumdamCrawler
45from crawler.by_source.ptm_crawler import PtmCrawler
46from crawler.by_source.rcm_crawler import RcmCrawler
47from crawler.by_source.sasa_crawler import SasaCrawler
48from crawler.by_source.scholastica_crawler import ScholasticaCrawler
49from crawler.by_source.seio_crawler import SeioCrawler
50from crawler.by_source.slc_crawler import Slc_Crawler
51from crawler.by_source.tac_crawler import TacCrawler
53crawler_classes = (
54 AmcCrawler,
55 AmiCrawler,
56 AmpCrawler,
57 Ams_eraamsCrawler,
58 Ams_jamsCrawler,
59 ArsiaCrawler,
60 AsuoCrawler,
61 AulfmCrawler,
62 BdimCrawler,
63 BmmsCrawler,
64 CambridgeCrawler,
65 CsisCrawler,
66 Dml_eCrawler,
67 DmlbulCrawler,
68 DmlczCrawler,
69 DmlplCrawler,
70 EdpsciCrawler,
71 EjcCrawler,
72 ElibmCrawler,
73 Emis_aasCrawler,
74 Emis_amCrawler,
75 Emis_hoaCrawler,
76 EmsCrawler,
77 EpisciencesCrawler,
78 EudmlCrawler,
79 GeodesicCrawler,
80 HdmlCrawler,
81 HeldermannCrawler,
82 ImpanCrawler,
83 IpbCrawler,
84 IsrpCrawler,
85 JgaaCrawler,
86 JournalfiCrawler,
87 JsigCrawler,
88 LofplCrawler,
89 MathbasCrawler,
90 MathnetruCrawler,
91 MspCrawler,
92 MtaCrawler,
93 NsjomCrawler,
94 NumdamCrawler,
95 PtmCrawler,
96 RcmCrawler,
97 SasaCrawler,
98 ScholasticaCrawler,
99 SeioCrawler,
100 Slc_Crawler,
101 TacCrawler,
102 CupCrawler,
103 AmucCrawler,
104)
106crawler_classes_map = {c.source_domain: c for c in crawler_classes}
109def get_crawler_class(source: str):
110 "source is the source domain"
111 return crawler_classes_map.get(source, None)
114def crawler_factory(
115 source: str,
116 colid: str,
117 username: str,
118 dry: bool = False,
119 force_refresh=False,
120 collection_url: str | None = None,
121) -> BaseCollectionCrawler:
122 """
123 Factory for the crawlers
125 :param source: the source domain
126 :param colid: collection pid
127 :param col_url: url of the collection web page
128 :param username:
129 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
130 :return: a crawler derived from base_crawler
131 """
132 klass = get_crawler_class(source)
134 if klass is None: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true
135 raise NotImplementedError
137 crawler = klass(
138 collection_id=colid,
139 username=username,
140 dry=dry,
141 force_refresh=force_refresh,
142 collection_url=collection_url,
143 )
145 return crawler