Coverage for src/crawler/factory.py: 97%
56 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-09-16 12:41 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-09-16 12:41 +0000
1from crawler.by_source.advc_crawler import AdvcCrawler
2from crawler.by_source.cambridge_crawler import CambridgeCrawler
3from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
4from crawler.by_source.isrp_crawler import IsrpCrawler
5from crawler.by_source.jsig_crawler import JsigCrawler
6from crawler.by_source.numdam_crawler import NumdamCrawler
7from crawler.by_source.ptm_crawler import PtmCrawler
8from crawler.by_source.slc_crawler import Slc_Crawler
10from .base_crawler import BaseCollectionCrawler
11from .by_source.amc_crawler import AmcCrawler
12from .by_source.ami_crawler import AmiCrawler
13from .by_source.amp_crawler import AmpCrawler
14from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
15from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler
16from .by_source.arsia_crawler import ArsiaCrawler
17from .by_source.asuo_crawler import AsuoCrawler
18from .by_source.aulfm_crawler import AulfmCrawler
19from .by_source.bdim_crawler import BdimCrawler
20from .by_source.csis_crawler import CsisCrawler
21from .by_source.da_crawler import DaCrawler
22from .by_source.dml_e_crawler import Dml_eCrawler
23from .by_source.dmlbul_crawler import DmlbulCrawler
24from .by_source.dmlcz_crawler import DmlczCrawler
25from .by_source.dmlpl_crawler import DmlplCrawler
26from .by_source.edpsci_crawler import EdpsciCrawler
27from .by_source.elibm_crawler import ElibmCrawler
28from .by_source.emis_am_crawler import Emis_amCrawler
29from .by_source.emis_hoa_crawler import Emis_hoaCrawler
30from .by_source.ems_crawler import EmsCrawler
31from .by_source.episciences_crawler import EpisciencesCrawler
32from .by_source.eudml_crawler import EudmlCrawler
33from .by_source.hdml_crawler import HdmlCrawler
34from .by_source.heldermann_crawler import HeldermannCrawler
35from .by_source.impan_crawler import ImpanCrawler
36from .by_source.ipb_crawler import IpbCrawler
37from .by_source.jgaa_crawler import JgaaCrawler
38from .by_source.journalfi_crawler import JournalfiCrawler
39from .by_source.lofpl_crawler import LofplCrawler
40from .by_source.mathbas_crawler import MathbasCrawler
41from .by_source.mathnetru_crawler import MathnetruCrawler
42from .by_source.msp_crawler import MspCrawler
43from .by_source.nsjom.nsjom_crawler import NsjomCrawler
44from .by_source.rcm_crawler import RcmCrawler
45from .by_source.sasa_crawler import SasaCrawler
46from .by_source.seio_crawler import SeioCrawler
47from .by_source.tac_crawler import TacCrawler
49crawler_classes = (
50 AdvcCrawler,
51 AmcCrawler,
52 AmiCrawler,
53 AmpCrawler,
54 Ams_eraamsCrawler,
55 Ams_jamsCrawler,
56 AsuoCrawler,
57 ArsiaCrawler,
58 AulfmCrawler,
59 BdimCrawler,
60 CambridgeCrawler,
61 CsisCrawler,
62 DaCrawler,
63 Dml_eCrawler,
64 DmlbulCrawler,
65 DmlczCrawler,
66 DmlplCrawler,
67 EdpsciCrawler,
68 EmsCrawler,
69 EpisciencesCrawler,
70 ElibmCrawler,
71 Emis_amCrawler,
72 Emis_aasCrawler,
73 Emis_hoaCrawler,
74 EudmlCrawler,
75 HdmlCrawler,
76 HeldermannCrawler,
77 ImpanCrawler,
78 IpbCrawler,
79 IsrpCrawler,
80 JgaaCrawler,
81 JsigCrawler,
82 JournalfiCrawler,
83 LofplCrawler,
84 MathbasCrawler,
85 MathnetruCrawler,
86 MspCrawler,
87 NsjomCrawler,
88 NumdamCrawler,
89 PtmCrawler,
90 RcmCrawler,
91 SasaCrawler,
92 SeioCrawler,
93 Slc_Crawler,
94 TacCrawler,
95)
97crawler_classes_map = {c.source_domain: c for c in crawler_classes}
100def get_crawler_class(source: str):
101 return crawler_classes_map.get(source, None)
104def crawler_factory(
105 source: str,
106 colid: str,
107 col_url: str,
108 username: str,
109 test_mode: bool = False,
110 force_refresh=False,
111) -> BaseCollectionCrawler:
112 """
113 Factory for the crawlers
115 :param source: "Eudml"
116 :param colid: collection pid
117 :param col_url: url of the collection web page
118 :param username:
119 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
120 :return: a crawler derived from base_crawler
121 """
122 klass = get_crawler_class(source)
124 if klass is None: 124 ↛ 125line 124 didn't jump to line 125 because the condition on line 124 was never true
125 raise NotImplementedError
127 crawler = klass(
128 collection_id=colid,
129 collection_url=col_url,
130 username=username,
131 test_mode=test_mode,
132 force_refresh=force_refresh,
133 )
135 return crawler