Coverage for src / crawler / abstract_crawlers / matching_crawler.py: 48%
25 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-05-21 12:58 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-05-21 12:58 +0000
1from concurrent.futures import (
2 Executor,
3 ThreadPoolExecutor,
4)
5from typing import TYPE_CHECKING
7from crawler.abstract_crawlers.threaded_crawler import ThreadedCrawler
8from crawler.by_source.slc_crawler import IssueData
9from crawler.cmds.augment.augment_cmd import AugmentArticlesCmd
11if TYPE_CHECKING:
12 from concurrent.futures import Future
14 from crawler.cmds.augment.zbmath import MatchingOperationMessage
17class MatchingCrawler(ThreadedCrawler):
18 zbmath_executor: Executor
19 zbmath_futures: "list[Future]"
20 exception: BaseException | None = None
22 matching_event_messages: "list[MatchingOperationMessage]"
24 def __init__(
25 self,
26 # backend=None,
27 *args,
28 **kwargs,
29 ):
30 super().__init__(*args, **kwargs)
31 self.zbmath_executor = ThreadPoolExecutor(
32 max_workers=1, thread_name_prefix="crawler_zbmath_thread"
33 )
34 self.zbmath_futures = []
35 self.matching_event_messages = []
37 def _issue_added_callback(self, future: "Future[IssueData]"):
38 # Runs in database thread
39 container = future.result()
40 if not container.pid:
41 raise ValueError("Issue doesn't have any pid")
42 matching_future = self.zbmath_executor.submit(self.match_zbmath_issue, container.pid)
43 self.zbmath_futures.append(matching_future)
45 def match_zbmath_issue(self, issue_pid: str):
46 cmd = AugmentArticlesCmd(
47 {
48 "backend": self.backend,
49 "collection_pid": self.collection_id,
50 "source_id": self.source_domain,
51 "issue_pids": [issue_pid],
52 }
53 )
54 cmd.do()
55 self.matching_event_messages.extend(cmd.event_messages)