Coverage for src/crawler/factory.py: 96%
46 statements
« prev ^ index » next coverage.py v7.6.4, created at 2025-02-14 14:36 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2025-02-14 14:36 +0000
1from crawler.by_source.isrp_crawler import IsrpCrawler
2from crawler.by_source.ptm_crawler import PtmCrawler
4from .base_crawler import BaseCollectionCrawler
5from .by_source.amc_crawler import AmcCrawler
6from .by_source.ami_crawler import AmiCrawler
7from .by_source.amp_crawler import AmpCrawler
8from .by_source.ams_crawler import AmsCrawler
9from .by_source.arsia_crawler import ArsiaCrawler
10from .by_source.asuo_crawler import AsuoCrawler
11from .by_source.aulfm_crawler import AulfmCrawler
12from .by_source.bdim_crawler import BdimCrawler
13from .by_source.csis_crawler import CsisCrawler
14from .by_source.da_crawler import DaCrawler
15from .by_source.dml_e_crawler import Dml_eCrawler
16from .by_source.dmlbul_crawler import DmlbulCrawler
17from .by_source.dmlcz_crawler import DmlczCrawler
18from .by_source.dmlpl_crawler import DmlplCrawler
19from .by_source.edpsci_crawler import EdpsciCrawler
20from .by_source.elibm_crawler import ElibmCrawler
21from .by_source.ems_crawler import EmsCrawler
22from .by_source.episciences_crawler import EpisciencesCrawler
23from .by_source.eudml_crawler import EudmlCrawler
24from .by_source.hdml_crawler import HdmlCrawler
25from .by_source.heldermann_crawler import HeldermannCrawler
26from .by_source.impan_crawler import ImpanCrawler
27from .by_source.ipb_crawler import IpbCrawler
28from .by_source.jgaa_crawler import JgaaCrawler
29from .by_source.journalfi_crawler import JournalfiCrawler
30from .by_source.lofpl_crawler import LofplCrawler
31from .by_source.mathbas_crawler import MathbasCrawler
32from .by_source.mathnetru_crawler import MathnetruCrawler
33from .by_source.msp_crawler import MspCrawler
34from .by_source.nsjom_crawler import NsjomCrawler
35from .by_source.rcm_crawler import RcmCrawler
36from .by_source.sasa_crawler import SasaCrawler
37from .by_source.seio_crawler import SeioCrawler
38from .by_source.tac_crawler import TacCrawler
40crawler_classes = (
41 AmcCrawler,
42 AmiCrawler,
43 AmpCrawler,
44 AmsCrawler,
45 AsuoCrawler,
46 ArsiaCrawler,
47 AulfmCrawler,
48 BdimCrawler,
49 CsisCrawler,
50 DaCrawler,
51 Dml_eCrawler,
52 DmlbulCrawler,
53 DmlczCrawler,
54 DmlplCrawler,
55 EdpsciCrawler,
56 EmsCrawler,
57 EpisciencesCrawler,
58 ElibmCrawler,
59 EudmlCrawler,
60 HdmlCrawler,
61 HeldermannCrawler,
62 ImpanCrawler,
63 IpbCrawler,
64 IsrpCrawler,
65 JgaaCrawler,
66 JournalfiCrawler,
67 LofplCrawler,
68 MathbasCrawler,
69 MathnetruCrawler,
70 MspCrawler,
71 NsjomCrawler,
72 PtmCrawler,
73 RcmCrawler,
74 SasaCrawler,
75 SeioCrawler,
76 TacCrawler,
77)
80# Note : a mapping could be better than an iterable.
81def get_crawler_class(source):
82 return next((c for c in crawler_classes if source == c.source_domain), None)
85def crawler_factory(
86 source: str,
87 colid: str,
88 col_url: str,
89 username: str,
90 test_mode: bool = False,
91) -> BaseCollectionCrawler:
92 """
93 Factory for the crawlers
95 :param source: "Eudml"
96 :param colid: collection pid
97 :param col_url: url of the collection web page
98 :param username:
99 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
100 :return: a crawler derived from base_crawler
101 """
102 klass = get_crawler_class(source)
104 if klass is None: 104 ↛ 105line 104 didn't jump to line 105 because the condition on line 104 was never true
105 raise NotImplementedError
107 crawler = klass(
108 collection_id=colid,
109 collection_url=col_url,
110 username=username,
111 test_mode=test_mode,
112 )
114 return crawler