Coverage for src/crawler/factory.py: 96%
50 statements
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-16 07:44 +0000
« prev ^ index » next coverage.py v7.8.2, created at 2025-06-16 07:44 +0000
1from crawler.by_source.isrp_crawler import IsrpCrawler
2from crawler.by_source.ptm_crawler import PtmCrawler
3from crawler.by_source.slc_crawler import Slc_Crawler
5from .base_crawler import BaseCollectionCrawler
6from .by_source.amc_crawler import AmcCrawler
7from .by_source.ami_crawler import AmiCrawler
8from .by_source.amp_crawler import AmpCrawler
9from .by_source.ams_crawler import AmsCrawler
10from .by_source.arsia_crawler import ArsiaCrawler
11from .by_source.asuo_crawler import AsuoCrawler
12from .by_source.aulfm_crawler import AulfmCrawler
13from .by_source.bdim_crawler import BdimCrawler
14from .by_source.csis_crawler import CsisCrawler
15from .by_source.da_crawler import DaCrawler
16from .by_source.dml_e_crawler import Dml_eCrawler
17from .by_source.dmlbul_crawler import DmlbulCrawler
18from .by_source.dmlcz_crawler import DmlczCrawler
19from .by_source.dmlpl_crawler import DmlplCrawler
20from .by_source.edpsci_crawler import EdpsciCrawler
21from .by_source.elibm_crawler import ElibmCrawler
22from .by_source.emis_aaa_crawler import Emis_aaaCrawler
23from .by_source.emis_am_crawler import Emis_amCrawler
24from .by_source.ems_crawler import EmsCrawler
25from .by_source.episciences_crawler import EpisciencesCrawler
26from .by_source.eudml_crawler import EudmlCrawler
27from .by_source.hdml_crawler import HdmlCrawler
28from .by_source.heldermann_crawler import HeldermannCrawler
29from .by_source.impan_crawler import ImpanCrawler
30from .by_source.ipb_crawler import IpbCrawler
31from .by_source.jgaa_crawler import JgaaCrawler
32from .by_source.journalfi_crawler import JournalfiCrawler
33from .by_source.lofpl_crawler import LofplCrawler
34from .by_source.mathbas_crawler import MathbasCrawler
35from .by_source.mathnetru_crawler import MathnetruCrawler
36from .by_source.msp_crawler import MspCrawler
37from .by_source.nsjom_crawler import NsjomCrawler
38from .by_source.rcm_crawler import RcmCrawler
39from .by_source.sasa_crawler import SasaCrawler
40from .by_source.seio_crawler import SeioCrawler
41from .by_source.tac_crawler import TacCrawler
43crawler_classes = (
44 AmcCrawler,
45 AmiCrawler,
46 AmpCrawler,
47 AmsCrawler,
48 AsuoCrawler,
49 ArsiaCrawler,
50 AulfmCrawler,
51 BdimCrawler,
52 CsisCrawler,
53 DaCrawler,
54 Dml_eCrawler,
55 DmlbulCrawler,
56 DmlczCrawler,
57 DmlplCrawler,
58 EdpsciCrawler,
59 EmsCrawler,
60 EpisciencesCrawler,
61 ElibmCrawler,
62 Emis_aaaCrawler,
63 Emis_amCrawler,
64 EudmlCrawler,
65 HdmlCrawler,
66 HeldermannCrawler,
67 ImpanCrawler,
68 IpbCrawler,
69 IsrpCrawler,
70 JgaaCrawler,
71 JournalfiCrawler,
72 LofplCrawler,
73 MathbasCrawler,
74 MathnetruCrawler,
75 MspCrawler,
76 NsjomCrawler,
77 PtmCrawler,
78 RcmCrawler,
79 SasaCrawler,
80 SeioCrawler,
81 Slc_Crawler,
82 TacCrawler,
83)
85crawler_classes_map = {c.source_domain: c for c in crawler_classes}
88def get_crawler_class(source):
89 return crawler_classes_map.get(source, None)
92def crawler_factory(
93 source: str,
94 colid: str,
95 col_url: str,
96 username: str,
97 test_mode: bool = False,
98 force_refresh=False,
99) -> BaseCollectionCrawler:
100 """
101 Factory for the crawlers
103 :param source: "Eudml"
104 :param colid: collection pid
105 :param col_url: url of the collection web page
106 :param username:
107 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
108 :return: a crawler derived from base_crawler
109 """
110 klass = get_crawler_class(source)
112 if klass is None: 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true
113 raise NotImplementedError
115 crawler = klass(
116 collection_id=colid,
117 collection_url=col_url,
118 username=username,
119 test_mode=test_mode,
120 force_refresh=force_refresh,
121 )
123 return crawler