Coverage for src/crawler/factory.py: 94%

29 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2025-01-15 14:09 +0000

1from .base_crawler import BaseCollectionCrawler 

2from .by_source.amc_crawler import AmcCrawler 

3from .by_source.amp_crawler import AmpCrawler 

4from .by_source.arsia_crawler import ArsiaCrawler 

5from .by_source.bdim_crawler import BdimCrawler 

6from .by_source.da_crawler import DaCrawler 

7from .by_source.dmlbul_crawler import DmlbulCrawler 

8from .by_source.dmlcz_crawler import DmlczCrawler 

9from .by_source.dmlpl_crawler import DmlplCrawler 

10from .by_source.elibm_crawler import ElibmCrawler 

11from .by_source.eudml_crawler import EudmlCrawler 

12from .by_source.hdml_crawler import HdmlCrawler 

13from .by_source.impan_crawler import ImpanCrawler 

14from .by_source.lofpl_crawler import LofplCrawler 

15from .by_source.mathbas_crawler import MathbasCrawler 

16from .by_source.mathnetru_crawler import MathnetruCrawler 

17from .by_source.nsjom_crawler import NsjomCrawler 

18from .by_source.rcm_crawler import RcmCrawler 

19from .by_source.sasa_crawler import SasaCrawler 

20from .by_source.tac_crawler import TacCrawler 

21 

22crawler_classes = ( 

23 AmcCrawler, 

24 AmpCrawler, 

25 ArsiaCrawler, 

26 BdimCrawler, 

27 DaCrawler, 

28 DmlbulCrawler, 

29 DmlczCrawler, 

30 DmlplCrawler, 

31 ElibmCrawler, 

32 EudmlCrawler, 

33 HdmlCrawler, 

34 ImpanCrawler, 

35 LofplCrawler, 

36 MathbasCrawler, 

37 MathnetruCrawler, 

38 NsjomCrawler, 

39 RcmCrawler, 

40 SasaCrawler, 

41 TacCrawler, 

42) 

43 

44 

45# Note : a mapping could be better than an iterable. 

46def get_crawler_class(source): 

47 return next((c for c in crawler_classes if source == c.source_domain), None) 

48 

49 

50def crawler_factory( 

51 source: str, 

52 colid: str, 

53 col_url: str, 

54 username: str, 

55 start_pid: str | None = None, 

56 test_mode: bool = False, 

57) -> BaseCollectionCrawler: 

58 """ 

59 Factory for the crawlers 

60 

61 :param source: "Eudml" 

62 :param colid: collection pid 

63 :param col_url: url of the collection web page 

64 :param username: 

65 :param progress_bar: alive_bar progress_bar if you already have one (default: None) 

66 :return: a crawler derived from base_crawler 

67 """ 

68 klass = get_crawler_class(source) 

69 

70 if klass is None: 70 ↛ 71line 70 didn't jump to line 71 because the condition on line 70 was never true

71 raise NotImplementedError 

72 

73 crawler = klass( 

74 collection_id=colid, 

75 collection_url=col_url, 

76 username=username, 

77 start_pid=start_pid, 

78 test_mode=test_mode, 

79 ) 

80 

81 return crawler