Coverage for src / crawler / factory.py: 97%
64 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
1import time
2from typing import TYPE_CHECKING
4from crawler.abstract_crawlers.base_crawler import BaseCollectionCrawler
5from crawler.by_source.amc_crawler import AmcCrawler
6from crawler.by_source.ami_crawler import AmiCrawler
7from crawler.by_source.amp_crawler import AmpCrawler
8from crawler.by_source.ams_crawler import AmsCrawler
9from crawler.by_source.amuc_crawler import AmucCrawler
10from crawler.by_source.arsia_crawler import ArsiaCrawler
11from crawler.by_source.asuo_crawler import AsuoCrawler
12from crawler.by_source.aulfm_crawler import AulfmCrawler
13from crawler.by_source.bdim_crawler import BdimCrawler
14from crawler.by_source.bmms_crawler import BmmsCrawler
15from crawler.by_source.compositio_crawler import CompositioCrawler
16from crawler.by_source.csis_crawler import CsisCrawler
17from crawler.by_source.cup_crawler import CupCrawler
18from crawler.by_source.dml_e_crawler import Dml_eCrawler
19from crawler.by_source.dmlbul_crawler import DmlbulCrawler
20from crawler.by_source.dmlcz_crawler import DmlczCrawler
21from crawler.by_source.dmlpl_crawler import DmlplCrawler
22from crawler.by_source.edpsci_crawler import EdpsciCrawler
23from crawler.by_source.ejc_crawler import EjcCrawler
24from crawler.by_source.elibm_crawler import ElibmCrawler
25from crawler.by_source.emis_aas_crawler import Emis_aasCrawler
26from crawler.by_source.emis_am_crawler import Emis_amCrawler
27from crawler.by_source.emis_hoa_crawler import Emis_hoaCrawler
28from crawler.by_source.ems_crawler import EmsCrawler
29from crawler.by_source.episciences_crawler import EpisciencesCrawler
30from crawler.by_source.eudml_crawler import EudmlCrawler
31from crawler.by_source.geodesic_crawler import GeodesicCrawler
32from crawler.by_source.hdml_crawler import HdmlCrawler
33from crawler.by_source.heldermann_crawler import HeldermannCrawler
34from crawler.by_source.impan_crawler import ImpanCrawler
35from crawler.by_source.ipb_crawler import IpbCrawler
36from crawler.by_source.isrp_crawler import IsrpCrawler
37from crawler.by_source.j_stage_crawler import JStageCrawler
38from crawler.by_source.jgaa_crawler import JgaaCrawler
39from crawler.by_source.journalfi_crawler import JournalfiCrawler
40from crawler.by_source.jsig_crawler import JsigCrawler
41from crawler.by_source.kobe_archive_crawler import KobeArchiveCrawler
42from crawler.by_source.lofpl_crawler import LofplCrawler
43from crawler.by_source.mathbas_crawler import MathbasCrawler
44from crawler.by_source.mathnetru_crawler import MathnetruCrawler
45from crawler.by_source.msp_crawler import MspCrawler
46from crawler.by_source.mta_crawler import MtaCrawler
47from crawler.by_source.nsjom.nsjom_crawler import NsjomCrawler
48from crawler.by_source.numdam_crawler import NumdamCrawler
49from crawler.by_source.ptm_crawler import PtmCrawler
50from crawler.by_source.rcm_crawler import RcmCrawler
51from crawler.by_source.sasa_crawler import SasaCrawler
52from crawler.by_source.scholastica_crawler import ScholasticaCrawler
53from crawler.by_source.seio_crawler import SeioCrawler
54from crawler.by_source.slc_crawler import Slc_Crawler
55from crawler.by_source.tac_crawler import TacCrawler
57if TYPE_CHECKING:
58 from collections.abc import Callable
60crawler_classes = (
61 AmcCrawler,
62 AmiCrawler,
63 AmpCrawler,
64 AmsCrawler,
65 ArsiaCrawler,
66 AsuoCrawler,
67 AulfmCrawler,
68 BdimCrawler,
69 BmmsCrawler,
70 CsisCrawler,
71 Dml_eCrawler,
72 DmlbulCrawler,
73 DmlczCrawler,
74 DmlplCrawler,
75 EdpsciCrawler,
76 EjcCrawler,
77 ElibmCrawler,
78 Emis_aasCrawler,
79 Emis_amCrawler,
80 Emis_hoaCrawler,
81 EmsCrawler,
82 EpisciencesCrawler,
83 EudmlCrawler,
84 GeodesicCrawler,
85 HdmlCrawler,
86 HeldermannCrawler,
87 ImpanCrawler,
88 IpbCrawler,
89 IsrpCrawler,
90 JgaaCrawler,
91 JournalfiCrawler,
92 JsigCrawler,
93 LofplCrawler,
94 MathbasCrawler,
95 MathnetruCrawler,
96 MspCrawler,
97 MtaCrawler,
98 NsjomCrawler,
99 NumdamCrawler,
100 PtmCrawler,
101 RcmCrawler,
102 SasaCrawler,
103 ScholasticaCrawler,
104 SeioCrawler,
105 Slc_Crawler,
106 TacCrawler,
107 CupCrawler,
108 AmucCrawler,
109 CompositioCrawler,
110 KobeArchiveCrawler,
111 JStageCrawler,
112)
114crawler_classes_map = {c.source_domain: c for c in crawler_classes}
117def get_crawler_class(source: str):
118 "source is the source domain"
119 return crawler_classes_map.get(source, None)
122def crawler_factory(
123 source: str,
124 colid: str,
125 username: str,
126 dry: bool = False,
127 force_refresh=False,
128 collection_url: str | None = None,
129 backend: str | None = None,
130 pause_function: "Callable[[float], None]" = staticmethod(time.sleep),
131) -> BaseCollectionCrawler:
132 """
133 Factory for the crawlers
135 :param source: the source domain
136 :param colid: collection pid
137 :param col_url: url of the collection web page
138 :param username:
139 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
140 :return: a crawler derived from base_crawler
141 """
142 klass = get_crawler_class(source)
144 if klass is None: 144 ↛ 145line 144 didn't jump to line 145 because the condition on line 144 was never true
145 raise NotImplementedError
147 crawler = klass(
148 collection_id=colid,
149 username=username,
150 dry=dry,
151 force_refresh=force_refresh,
152 collection_url=collection_url,
153 backend=backend,
154 pause_function=pause_function,
155 )
157 return crawler