Coverage for src / crawler / factory.py: 97%
58 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-02-02 15:55 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-02-02 15:55 +0000
1from crawler.base_crawler import BaseCollectionCrawler
2from crawler.by_source.amc_crawler import AmcCrawler
3from crawler.by_source.ami_crawler import AmiCrawler
4from crawler.by_source.amp_crawler import AmpCrawler
5from crawler.by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
6from crawler.by_source.ams.ams_jams_crawler import Ams_jamsCrawler
7from crawler.by_source.arsia_crawler import ArsiaCrawler
8from crawler.by_source.asuo_crawler import AsuoCrawler
9from crawler.by_source.aulfm_crawler import AulfmCrawler
10from crawler.by_source.bdim_crawler import BdimCrawler
11from crawler.by_source.bmms_crawler import BmmsCrawler
12from crawler.by_source.cambridge_crawler import CambridgeCrawler
13from crawler.by_source.csis_crawler import CsisCrawler
14from crawler.by_source.dml_e_crawler import Dml_eCrawler
15from crawler.by_source.dmlbul_crawler import DmlbulCrawler
16from crawler.by_source.dmlcz_crawler import DmlczCrawler
17from crawler.by_source.dmlpl_crawler import DmlplCrawler
18from crawler.by_source.edpsci_crawler import EdpsciCrawler
19from crawler.by_source.ejc_crawler import EjcCrawler
20from crawler.by_source.elibm_crawler import ElibmCrawler
21from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
22from crawler.by_source.emis_am_crawler import Emis_amCrawler
23from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler
24from crawler.by_source.ems_crawler import EmsCrawler
25from crawler.by_source.episciences_crawler import EpisciencesCrawler
26from crawler.by_source.eudml_crawler import EudmlCrawler
27from crawler.by_source.geodesic_crawler import GeodesicCrawler
28from crawler.by_source.hdml_crawler import HdmlCrawler
29from crawler.by_source.heldermann_crawler import HeldermannCrawler
30from crawler.by_source.impan_crawler import ImpanCrawler
31from crawler.by_source.ipb_crawler import IpbCrawler
32from crawler.by_source.isrp_crawler import IsrpCrawler
33from crawler.by_source.jgaa_crawler import JgaaCrawler
34from crawler.by_source.journalfi_crawler import JournalfiCrawler
35from crawler.by_source.jsig_crawler import JsigCrawler
36from crawler.by_source.lofpl_crawler import LofplCrawler
37from crawler.by_source.mathbas_crawler import MathbasCrawler
38from crawler.by_source.mathnetru_crawler import MathnetruCrawler
39from crawler.by_source.msp_crawler import MspCrawler
40from crawler.by_source.mta_crawler import MtaCrawler
41from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler
42from crawler.by_source.numdam_crawler import NumdamCrawler
43from crawler.by_source.ptm_crawler import PtmCrawler
44from crawler.by_source.rcm_crawler import RcmCrawler
45from crawler.by_source.scholastica_crawler import ScholasticaCrawler
46from crawler.by_source.seio_crawler import SeioCrawler
47from crawler.by_source.slc_crawler import Slc_Crawler
48from crawler.by_source.tac_crawler import TacCrawler
50crawler_classes = (
51 AmcCrawler,
52 AmiCrawler,
53 AmpCrawler,
54 Ams_eraamsCrawler,
55 Ams_jamsCrawler,
56 ArsiaCrawler,
57 AsuoCrawler,
58 AulfmCrawler,
59 BdimCrawler,
60 BmmsCrawler,
61 CambridgeCrawler,
62 CsisCrawler,
63 Dml_eCrawler,
64 DmlbulCrawler,
65 DmlczCrawler,
66 DmlplCrawler,
67 EdpsciCrawler,
68 EjcCrawler,
69 ElibmCrawler,
70 Emis_aasCrawler,
71 Emis_amCrawler,
72 Emis_hoaCrawler,
73 EmsCrawler,
74 EpisciencesCrawler,
75 EudmlCrawler,
76 GeodesicCrawler,
77 HdmlCrawler,
78 HeldermannCrawler,
79 ImpanCrawler,
80 IpbCrawler,
81 IsrpCrawler,
82 JgaaCrawler,
83 JournalfiCrawler,
84 JsigCrawler,
85 LofplCrawler,
86 MathbasCrawler,
87 MathnetruCrawler,
88 MspCrawler,
89 MtaCrawler,
90 NsjomCrawler,
91 NumdamCrawler,
92 PtmCrawler,
93 RcmCrawler,
94 ScholasticaCrawler,
95 SeioCrawler,
96 Slc_Crawler,
97 TacCrawler,
98)
100crawler_classes_map = {c.source_domain: c for c in crawler_classes}
103def get_crawler_class(source: str):
104 "source is the source domain"
105 return crawler_classes_map.get(source, None)
108def crawler_factory(
109 source: str,
110 colid: str,
111 username: str,
112 dry: bool = False,
113 force_refresh=False,
114 collection_url: str | None = None,
115) -> BaseCollectionCrawler:
116 """
117 Factory for the crawlers
119 :param source: "Eudml"
120 :param colid: collection pid
121 :param col_url: url of the collection web page
122 :param username:
123 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
124 :return: a crawler derived from base_crawler
125 """
126 klass = get_crawler_class(source)
128 if klass is None: 128 ↛ 129line 128 didn't jump to line 129 because the condition on line 128 was never true
129 raise NotImplementedError
131 crawler = klass(
132 collection_id=colid,
133 username=username,
134 dry=dry,
135 force_refresh=force_refresh,
136 collection_url=collection_url,
137 )
139 return crawler