Coverage for src/crawler/factory.py: 96%
55 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-07-30 09:47 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-07-30 09:47 +0000
1from crawler.by_source.advc_crawler import AdvcCrawler
2from crawler.by_source.cambridge_crawler import CambridgeCrawler
3from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
4from crawler.by_source.isrp_crawler import IsrpCrawler
5from crawler.by_source.jsig_crawler import JsigCrawler
6from crawler.by_source.ptm_crawler import PtmCrawler
7from crawler.by_source.slc_crawler import Slc_Crawler
9from .base_crawler import BaseCollectionCrawler
10from .by_source.amc_crawler import AmcCrawler
11from .by_source.ami_crawler import AmiCrawler
12from .by_source.amp_crawler import AmpCrawler
13from .by_source.ams.ams_eraams_crawler import Ams_eraamsCrawler
14from .by_source.ams.ams_jams_crawler import Ams_jamsCrawler
15from .by_source.arsia_crawler import ArsiaCrawler
16from .by_source.asuo_crawler import AsuoCrawler
17from .by_source.aulfm_crawler import AulfmCrawler
18from .by_source.bdim_crawler import BdimCrawler
19from .by_source.csis_crawler import CsisCrawler
20from .by_source.da_crawler import DaCrawler
21from .by_source.dml_e_crawler import Dml_eCrawler
22from .by_source.dmlbul_crawler import DmlbulCrawler
23from .by_source.dmlcz_crawler import DmlczCrawler
24from .by_source.dmlpl_crawler import DmlplCrawler
25from .by_source.edpsci_crawler import EdpsciCrawler
26from .by_source.elibm_crawler import ElibmCrawler
27from .by_source.emis_am_crawler import Emis_amCrawler
28from .by_source.emis_hoa_crawler import Emis_hoaCrawler
29from .by_source.ems_crawler import EmsCrawler
30from .by_source.episciences_crawler import EpisciencesCrawler
31from .by_source.eudml_crawler import EudmlCrawler
32from .by_source.hdml_crawler import HdmlCrawler
33from .by_source.heldermann_crawler import HeldermannCrawler
34from .by_source.impan_crawler import ImpanCrawler
35from .by_source.ipb_crawler import IpbCrawler
36from .by_source.jgaa_crawler import JgaaCrawler
37from .by_source.journalfi_crawler import JournalfiCrawler
38from .by_source.lofpl_crawler import LofplCrawler
39from .by_source.mathbas_crawler import MathbasCrawler
40from .by_source.mathnetru_crawler import MathnetruCrawler
41from .by_source.msp_crawler import MspCrawler
42from .by_source.nsjom.nsjom_crawler import NsjomCrawler
43from .by_source.rcm_crawler import RcmCrawler
44from .by_source.sasa_crawler import SasaCrawler
45from .by_source.seio_crawler import SeioCrawler
46from .by_source.tac_crawler import TacCrawler
48crawler_classes = (
49 AdvcCrawler,
50 AmcCrawler,
51 AmiCrawler,
52 AmpCrawler,
53 Ams_eraamsCrawler,
54 Ams_jamsCrawler,
55 AsuoCrawler,
56 ArsiaCrawler,
57 AulfmCrawler,
58 BdimCrawler,
59 CambridgeCrawler,
60 CsisCrawler,
61 DaCrawler,
62 Dml_eCrawler,
63 DmlbulCrawler,
64 DmlczCrawler,
65 DmlplCrawler,
66 EdpsciCrawler,
67 EmsCrawler,
68 EpisciencesCrawler,
69 ElibmCrawler,
70 Emis_amCrawler,
71 Emis_aasCrawler,
72 Emis_hoaCrawler,
73 EudmlCrawler,
74 HdmlCrawler,
75 HeldermannCrawler,
76 ImpanCrawler,
77 IpbCrawler,
78 IsrpCrawler,
79 JgaaCrawler,
80 JsigCrawler,
81 JournalfiCrawler,
82 LofplCrawler,
83 MathbasCrawler,
84 MathnetruCrawler,
85 MspCrawler,
86 NsjomCrawler,
87 PtmCrawler,
88 RcmCrawler,
89 SasaCrawler,
90 SeioCrawler,
91 Slc_Crawler,
92 TacCrawler,
93)
95crawler_classes_map = {c.source_domain: c for c in crawler_classes}
98def get_crawler_class(source):
99 return crawler_classes_map.get(source, None)
102def crawler_factory(
103 source: str,
104 colid: str,
105 col_url: str,
106 username: str,
107 test_mode: bool = False,
108 force_refresh=False,
109) -> BaseCollectionCrawler:
110 """
111 Factory for the crawlers
113 :param source: "Eudml"
114 :param colid: collection pid
115 :param col_url: url of the collection web page
116 :param username:
117 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
118 :return: a crawler derived from base_crawler
119 """
120 klass = get_crawler_class(source)
122 if klass is None: 122 ↛ 123line 122 didn't jump to line 123 because the condition on line 122 was never true
123 raise NotImplementedError
125 crawler = klass(
126 collection_id=colid,
127 collection_url=col_url,
128 username=username,
129 test_mode=test_mode,
130 force_refresh=force_refresh,
131 )
133 return crawler