Coverage for src/crawler/by_source/emis_crawler.py: 39%

22 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-04-03 12:36 +0000

1from crawler.base_crawler import BaseCollectionCrawler 

2 

3 

4class EmisCrawler(BaseCollectionCrawler): 

5 """ 

6 EmisCrawler is an abstract class that transforms itself into its implementation 

7 depending on the collection_id given in its constructor. 

8 

9 Check the by_source/emis directory for the real EMIS crawlers. 

10 

11 The can_crawl class function can be used to know if a PID is implemented. 

12 """ 

13 

14 source_name = "European Mathematical Information Service" 

15 source_domain = "EMIS" 

16 source_website = "https://www.emis.de" 

17 

18 EMIS_IMPLEMENTED = {} 

19 

20 @classmethod 

21 def get_emis_crawler(cls, pid: str): 

22 if cls.EMIS_IMPLEMENTED == {}: 

23 from crawler.by_source.emis.am import EmisAmCrawler 

24 

25 cls.EMIS_IMPLEMENTED = {"AM": EmisAmCrawler} 

26 

27 return cls.EMIS_IMPLEMENTED.get(pid, None) 

28 

29 def __new__( 

30 cls, 

31 *args, 

32 collection_id, 

33 **kwargs, 

34 ): 

35 if cls != EmisCrawler: 

36 return super().__new__(cls) 

37 subcrawler = EmisCrawler.get_emis_crawler(collection_id) 

38 if subcrawler: 

39 return subcrawler(*args, collection_id=collection_id, **kwargs) 

40 

41 raise NotImplementedError("EMIS crawler for this PID not implemented") 

42 

43 @classmethod 

44 def can_crawl(cls, pid: str): 

45 return bool(EmisCrawler.get_emis_crawler(pid))