Coverage for src/crawler/factory.py: 0%
19 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-20 09:03 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-20 09:03 +0000
1from .base_crawler import BaseCollectionCrawler
2from .by_source.amc_crawler import AmcCrawler # noqa: F401 (class name dynamically set)
3from .by_source.amp_crawler import AmpCrawler # noqa: F401 (class name dynamically set)
4from .by_source.arsia_crawler import ArsiaCrawler # noqa: F401 (class name dynamically set)
5from .by_source.bdim_crawler import BdimCrawler # noqa: F401 (class name dynamically set)
6from .by_source.da_crawler import DaCrawler # noqa: F401 (class name dynamically set)
7from .by_source.dmlcz_crawler import DmlczCrawler # noqa: F401 (class name dynamically set)
8from .by_source.elibm_crawler import ElibmCrawler # noqa: F401 (class name dynamically set)
9from .by_source.eudml_crawler import EudmlCrawler # noqa: F401 (class name dynamically set)
10from .by_source.hdml_crawler import HdmlCrawler # noqa: F401 (class name dynamically set)
11from .by_source.tac_crawler import TacCrawler # noqa: F401 (class name dynamically set)
12from .by_source.sasa_crawler import SasaCrawler # noqa : F401 (class name dynamically set)
13from .by_source.nsjom_crawler import NsjomCrawler # noqa: F401 (class name dynamically set)
14from .by_source.impan_crawler import ImpanCrawler # noqa: F401 (class name dynamically set)
17def crawler_factory(
18 source, colid, col_url, username, progress_bar=None, start_pid=None, test_mode=None
19) -> BaseCollectionCrawler:
20 """
21 Factory for the crawlers
23 :param source: "Eudml"
24 :param colid: collection pid
25 :param col_url: url of the collection web page
26 :param username:
27 :param progress_bar: alive_bar progress_bar if you already have one (default: None)
28 :return: a crawler derived from base_crawler
29 """
30 class_name = source.capitalize() + "Crawler"
31 klass = globals()[class_name]
32 crawler: BaseCollectionCrawler = klass(
33 collection_id=colid,
34 collection_url=col_url,
35 username=username,
36 progress_bar=progress_bar,
37 start_pid=start_pid,
38 test_mode=test_mode,
39 )
41 return crawler