Coverage for src / crawler / factory.py: 97%
57 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-11 14:57 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-11 14:57 +0000
1from crawler.by_source.cambridge_crawler import CambridgeCrawler
2from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
3from crawler.by_source.geodesic_crawler import GeodesicCrawler
4from crawler.by_source.isrp_crawler import IsrpCrawler
5from crawler.by_source.jsig_crawler import JsigCrawler
6from crawler.by_source.numdam_crawler import NumdamCrawler
7from crawler.by_source.ptm_crawler import PtmCrawler
8from crawler.by_source.scholastica_crawler import ScholasticaCrawler
9from crawler.by_source.slc_crawler import Slc_Crawler
11from .base_crawler import BaseCollectionCrawler
12from .by_source.amc_crawler import AmcCrawler
13from .by_source.ami_crawler import AmiCrawler
14from .by_source.amp_crawler import AmpCrawler
15from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
16from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler
17from .by_source.arsia_crawler import ArsiaCrawler
18from .by_source.asuo_crawler import AsuoCrawler
19from .by_source.aulfm_crawler import AulfmCrawler
20from .by_source.bdim_crawler import BdimCrawler
21from .by_source.bmms_crawler import BmmsCrawler
22from .by_source.csis_crawler import CsisCrawler
23from .by_source.dml_e_crawler import Dml_eCrawler
24from .by_source.dmlbul_crawler import DmlbulCrawler
25from .by_source.dmlcz_crawler import DmlczCrawler
26from .by_source.dmlpl_crawler import DmlplCrawler
27from .by_source.edpsci_crawler import EdpsciCrawler
28from .by_source.elibm_crawler import ElibmCrawler
29from .by_source.emis_am_crawler import Emis_amCrawler
30from .by_source.emis_hoa_crawler import Emis_hoaCrawler
31from .by_source.ems_crawler import EmsCrawler
32from .by_source.episciences_crawler import EpisciencesCrawler
33from .by_source.eudml_crawler import EudmlCrawler
34from .by_source.hdml_crawler import HdmlCrawler
35from .by_source.heldermann_crawler import HeldermannCrawler
36from .by_source.impan_crawler import ImpanCrawler
37from .by_source.ipb_crawler import IpbCrawler
38from .by_source.jgaa_crawler import JgaaCrawler
39from .by_source.journalfi_crawler import JournalfiCrawler
40from .by_source.lofpl_crawler import LofplCrawler
41from .by_source.mathbas_crawler import MathbasCrawler
42from .by_source.mathnetru_crawler import MathnetruCrawler
43from .by_source.msp_crawler import MspCrawler
44from .by_source.nsjom.nsjom_crawler import NsjomCrawler
45from .by_source.rcm_crawler import RcmCrawler
46from .by_source.sasa_crawler import SasaCrawler
47from .by_source.seio_crawler import SeioCrawler
48from .by_source.tac_crawler import TacCrawler
50crawler_classes = (
51 AmcCrawler,
52 AmiCrawler,
53 AmpCrawler,
54 Ams_eraamsCrawler,
55 Ams_jamsCrawler,
56 AsuoCrawler,
57 ArsiaCrawler,
58 AulfmCrawler,
59 BdimCrawler,
60 BmmsCrawler,
61 CambridgeCrawler,
62 CsisCrawler,
63 Dml_eCrawler,
64 DmlbulCrawler,
65 DmlczCrawler,
66 DmlplCrawler,
67 EdpsciCrawler,
68 EmsCrawler,
69 EpisciencesCrawler,
70 ElibmCrawler,
71 Emis_amCrawler,
72 Emis_aasCrawler,
73 Emis_hoaCrawler,
74 EudmlCrawler,
75 GeodesicCrawler,
76 HdmlCrawler,
77 HeldermannCrawler,
78 ImpanCrawler,
79 IpbCrawler,
80 IsrpCrawler,
81 JgaaCrawler,
82 JsigCrawler,
83 JournalfiCrawler,
84 LofplCrawler,
85 MathbasCrawler,
86 MathnetruCrawler,
87 MspCrawler,
88 NsjomCrawler,
89 NumdamCrawler,
90 PtmCrawler,
91 RcmCrawler,
92 SasaCrawler,
93 ScholasticaCrawler,
94 SeioCrawler,
95 Slc_Crawler,
96 TacCrawler,
97)
99crawler_classes_map = {c.source_domain: c for c in crawler_classes}
102def get_crawler_class(source: str):
103 "source is the source domain"
104 return crawler_classes_map.get(source, None)
107def crawler_factory(
108 source: str,
109 colid: str,
110 username: str,
111 dry: bool = False,
112 force_refresh=False,
113 collection_url: str | None = None,
114) -> BaseCollectionCrawler:
115 """
116 Factory for the crawlers
118 :param source: "Eudml"
119 :param colid: collection pid
120 :param col_url: url of the collection web page
121 :param username:
122 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
123 :return: a crawler derived from base_crawler
124 """
125 klass = get_crawler_class(source)
127 if klass is None: 127 ↛ 128line 127 didn't jump to line 128 because the condition on line 127 was never true
128 raise NotImplementedError
130 crawler = klass(
131 collection_id=colid,
132 username=username,
133 dry=dry,
134 force_refresh=force_refresh,
135 collection_url=collection_url,
136 )
138 return crawler