Coverage for src / crawler / factory.py: 97%
60 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-04-30 12:41 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-04-30 12:41 +0000
1from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler
2from crawler.by_source.amc_crawler import AmcCrawler
3from crawler.by_source.ami_crawler import AmiCrawler
4from crawler.by_source.amp_crawler import AmpCrawler
5from crawler.by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
6from crawler.by_source.ams.ams_jams_crawler import Ams_jamsCrawler
7from crawler.by_source.amuc_crawler import AmucCrawler
8from crawler.by_source.arsia_crawler import ArsiaCrawler
9from crawler.by_source.asuo_crawler import AsuoCrawler
10from crawler.by_source.aulfm_crawler import AulfmCrawler
11from crawler.by_source.bdim_crawler import BdimCrawler
12from crawler.by_source.bmms_crawler import BmmsCrawler
13from crawler.by_source.csis_crawler import CsisCrawler
14from crawler.by_source.cup_crawler import CupCrawler
15from crawler.by_source.dml_e_crawler import Dml_eCrawler
16from crawler.by_source.dmlbul_crawler import DmlbulCrawler
17from crawler.by_source.dmlcz_crawler import DmlczCrawler
18from crawler.by_source.dmlpl_crawler import DmlplCrawler
19from crawler.by_source.edpsci_crawler import EdpsciCrawler
20from crawler.by_source.ejc_crawler import EjcCrawler
21from crawler.by_source.elibm_crawler import ElibmCrawler
22from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
23from crawler.by_source.emis_am_crawler import Emis_amCrawler
24from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler
25from crawler.by_source.ems_crawler import EmsCrawler
26from crawler.by_source.episciences_crawler import EpisciencesCrawler
27from crawler.by_source.eudml_crawler import EudmlCrawler
28from crawler.by_source.geodesic_crawler import GeodesicCrawler
29from crawler.by_source.hdml_crawler import HdmlCrawler
30from crawler.by_source.heldermann_crawler import HeldermannCrawler
31from crawler.by_source.impan_crawler import ImpanCrawler
32from crawler.by_source.ipb_crawler import IpbCrawler
33from crawler.by_source.isrp_crawler import IsrpCrawler
34from crawler.by_source.jgaa_crawler import JgaaCrawler
35from crawler.by_source.journalfi_crawler import JournalfiCrawler
36from crawler.by_source.jsig_crawler import JsigCrawler
37from crawler.by_source.lofpl_crawler import LofplCrawler
38from crawler.by_source.mathbas_crawler import MathbasCrawler
39from crawler.by_source.mathnetru_crawler import MathnetruCrawler
40from crawler.by_source.msp_crawler import MspCrawler
41from crawler.by_source.mta_crawler import MtaCrawler
42from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler
43from crawler.by_source.numdam_crawler import NumdamCrawler
44from crawler.by_source.ptm_crawler import PtmCrawler
45from crawler.by_source.rcm_crawler import RcmCrawler
46from crawler.by_source.sasa_crawler import SasaCrawler
47from crawler.by_source.scholastica_crawler import ScholasticaCrawler
48from crawler.by_source.seio_crawler import SeioCrawler
49from crawler.by_source.slc_crawler import Slc_Crawler
50from crawler.by_source.tac_crawler import TacCrawler
52crawler_classes = (
53 AmcCrawler,
54 AmiCrawler,
55 AmpCrawler,
56 Ams_eraamsCrawler,
57 Ams_jamsCrawler,
58 ArsiaCrawler,
59 AsuoCrawler,
60 AulfmCrawler,
61 BdimCrawler,
62 BmmsCrawler,
63 CsisCrawler,
64 Dml_eCrawler,
65 DmlbulCrawler,
66 DmlczCrawler,
67 DmlplCrawler,
68 EdpsciCrawler,
69 EjcCrawler,
70 ElibmCrawler,
71 Emis_aasCrawler,
72 Emis_amCrawler,
73 Emis_hoaCrawler,
74 EmsCrawler,
75 EpisciencesCrawler,
76 EudmlCrawler,
77 GeodesicCrawler,
78 HdmlCrawler,
79 HeldermannCrawler,
80 ImpanCrawler,
81 IpbCrawler,
82 IsrpCrawler,
83 JgaaCrawler,
84 JournalfiCrawler,
85 JsigCrawler,
86 LofplCrawler,
87 MathbasCrawler,
88 MathnetruCrawler,
89 MspCrawler,
90 MtaCrawler,
91 NsjomCrawler,
92 NumdamCrawler,
93 PtmCrawler,
94 RcmCrawler,
95 SasaCrawler,
96 ScholasticaCrawler,
97 SeioCrawler,
98 Slc_Crawler,
99 TacCrawler,
100 CupCrawler,
101 AmucCrawler,
102)
104crawler_classes_map = {c.source_domain: c for c in crawler_classes}
107def get_crawler_class(source: str):
108 "source is the source domain"
109 return crawler_classes_map.get(source, None)
112def crawler_factory(
113 source: str,
114 colid: str,
115 username: str,
116 dry: bool = False,
117 force_refresh=False,
118 collection_url: str | None = None,
119) -> BaseCollectionCrawler:
120 """
121 Factory for the crawlers
123 :param source: the source domain
124 :param colid: collection pid
125 :param col_url: url of the collection web page
126 :param username:
127 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
128 :return: a crawler derived from base_crawler
129 """
130 klass = get_crawler_class(source)
132 if klass is None: 132 ↛ 133line 132 didn't jump to line 133 because the condition on line 132 was never true
133 raise NotImplementedError
135 crawler = klass(
136 collection_id=colid,
137 username=username,
138 dry=dry,
139 force_refresh=force_refresh,
140 collection_url=collection_url,
141 )
143 return crawler