Coverage for src/crawler/factory.py: 94%
29 statements
« prev ^ index » next coverage.py v7.6.4, created at 2025-01-15 14:09 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2025-01-15 14:09 +0000
1from .base_crawler import BaseCollectionCrawler
2from .by_source.amc_crawler import AmcCrawler
3from .by_source.amp_crawler import AmpCrawler
4from .by_source.arsia_crawler import ArsiaCrawler
5from .by_source.bdim_crawler import BdimCrawler
6from .by_source.da_crawler import DaCrawler
7from .by_source.dmlbul_crawler import DmlbulCrawler
8from .by_source.dmlcz_crawler import DmlczCrawler
9from .by_source.dmlpl_crawler import DmlplCrawler
10from .by_source.elibm_crawler import ElibmCrawler
11from .by_source.eudml_crawler import EudmlCrawler
12from .by_source.hdml_crawler import HdmlCrawler
13from .by_source.impan_crawler import ImpanCrawler
14from .by_source.lofpl_crawler import LofplCrawler
15from .by_source.mathbas_crawler import MathbasCrawler
16from .by_source.mathnetru_crawler import MathnetruCrawler
17from .by_source.nsjom_crawler import NsjomCrawler
18from .by_source.rcm_crawler import RcmCrawler
19from .by_source.sasa_crawler import SasaCrawler
20from .by_source.tac_crawler import TacCrawler
22crawler_classes = (
23 AmcCrawler,
24 AmpCrawler,
25 ArsiaCrawler,
26 BdimCrawler,
27 DaCrawler,
28 DmlbulCrawler,
29 DmlczCrawler,
30 DmlplCrawler,
31 ElibmCrawler,
32 EudmlCrawler,
33 HdmlCrawler,
34 ImpanCrawler,
35 LofplCrawler,
36 MathbasCrawler,
37 MathnetruCrawler,
38 NsjomCrawler,
39 RcmCrawler,
40 SasaCrawler,
41 TacCrawler,
42)
45# Note : a mapping could be better than an iterable.
46def get_crawler_class(source):
47 return next((c for c in crawler_classes if source == c.source_domain), None)
50def crawler_factory(
51 source: str,
52 colid: str,
53 col_url: str,
54 username: str,
55 start_pid: str | None = None,
56 test_mode: bool = False,
57) -> BaseCollectionCrawler:
58 """
59 Factory for the crawlers
61 :param source: "Eudml"
62 :param colid: collection pid
63 :param col_url: url of the collection web page
64 :param username:
65 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
66 :return: a crawler derived from base_crawler
67 """
68 klass = get_crawler_class(source)
70 if klass is None: 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true
71 raise NotImplementedError
73 crawler = klass(
74 collection_id=colid,
75 collection_url=col_url,
76 username=username,
77 start_pid=start_pid,
78 test_mode=test_mode,
79 )
81 return crawler