Coverage for src/crawler/by_source/emis_crawler.py: 39%
22 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:36 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-04-03 12:36 +0000
1from crawler.base_crawler import BaseCollectionCrawler
4class EmisCrawler(BaseCollectionCrawler):
5 """
6 EmisCrawler is an abstract class that transforms itself into its implementation
7 depending on the collection_id given in its constructor.
9 Check the by_source/emis directory for the real EMIS crawlers.
11 The can_crawl class function can be used to know if a PID is implemented.
12 """
14 source_name = "European Mathematical Information Service"
15 source_domain = "EMIS"
16 source_website = "https://www.emis.de"
18 EMIS_IMPLEMENTED = {}
20 @classmethod
21 def get_emis_crawler(cls, pid: str):
22 if cls.EMIS_IMPLEMENTED == {}:
23 from crawler.by_source.emis.am import EmisAmCrawler
25 cls.EMIS_IMPLEMENTED = {"AM": EmisAmCrawler}
27 return cls.EMIS_IMPLEMENTED.get(pid, None)
29 def __new__(
30 cls,
31 *args,
32 collection_id,
33 **kwargs,
34 ):
35 if cls != EmisCrawler:
36 return super().__new__(cls)
37 subcrawler = EmisCrawler.get_emis_crawler(collection_id)
38 if subcrawler:
39 return subcrawler(*args, collection_id=collection_id, **kwargs)
41 raise NotImplementedError("EMIS crawler for this PID not implemented")
43 @classmethod
44 def can_crawl(cls, pid: str):
45 return bool(EmisCrawler.get_emis_crawler(pid))