Coverage for src / crawler / cmds / xml_cmds.py: 18%
83 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
1import logging
2import math
4from django.core.exceptions import ObjectDoesNotExist
5from django.db.models import Q
6from ptf import model_helpers
7from ptf.cmds.xml_cmds import (
8 addOrUpdateBookXmlCmd,
9 addOrUpdateContainerXmlCmd,
10 addOrUpdateIssueXmlCmd,
11 importEntireCollectionXmlCmd,
12)
13from ptf.models import Collection, CollectionMembership, Container
15from crawler.models import ContainerSource
17logger = logging.getLogger(__name__)
20def add_source_to_container(source_domain: str, container: Container):
21 from crawler import factory
23 source_cls = factory.get_crawler_class(source_domain)
24 if not source_cls:
25 logger.warning(
26 f"Couldn't insert source {source_domain} to container : source not implemented"
27 )
28 return False
30 source = source_cls.get_or_create_source()
31 try:
32 container.origin.delete()
33 except ObjectDoesNotExist:
34 pass
35 ContainerSource.objects.get_or_create(
36 source=source,
37 container=container,
38 )
39 logger.debug(f"Source {source_domain} successfully added to container {container.pid}")
40 return True
43def update_collection_years(pid):
44 collection = Collection.objects.get(pid=pid)
45 parent = collection.parent
47 if parent is None:
48 qs = Container.objects.filter(my_collection__pid=pid, origin__disabled=False)
49 collection_to_update = collection
50 else:
51 qs = Container.objects.filter(
52 Q(my_collection__pid=pid) | Q(my_collection__parent__pid=pid),
53 origin__disabled=False,
54 )
55 collection_to_update = parent
57 qs_member = CollectionMembership.objects.filter(collection__pid=collection_to_update.pid)
58 qs = [*(cm.container for cm in qs_member), *qs]
60 max_year = -math.inf
61 min_year = math.inf
62 for container in qs:
63 if container.year:
64 year = container.year
65 fyear, lyear = model_helpers.get_first_last_years(year)
66 fyear = int(fyear)
67 lyear = int(lyear)
69 max_year = max(lyear, max_year)
70 min_year = min(fyear, min_year)
71 if min_year == math.inf:
72 min_year = 0
73 if max_year == -math.inf:
74 max_year = 0
76 collection_to_update.fyear = int(min_year)
77 collection_to_update.lyear = int(max_year)
79 collection_to_update.save(update_fields=["fyear", "lyear"])
82class addOrUpdateGDMLIssueXmlCmd(addOrUpdateIssueXmlCmd):
83 def internal_do(self):
84 obj = super().internal_do()
85 if not obj or not self.xissue:
86 logger.debug("Skipping source insertion : database object or xissue is not set")
87 return obj
89 source_str = self.xissue.source
90 if not source_str:
91 if not self.add_link_to_source:
92 logger.debug(
93 "Skipping source insertion : add_link_to_source is false and xissue.source is None"
94 )
95 return obj
96 source_str = "NUMDAM"
98 add_source_to_container(source_str, obj)
99 self.update_collection_years(self.xissue.journal.pid)
100 return obj
103class addOrUpdateGDMLBookXmlCmd(addOrUpdateBookXmlCmd):
104 def internal_do(self):
105 obj = super().internal_do()
106 if not obj or not self.xbook:
107 logger.debug(
108 "Skipping source insertion : no database object was returned or xbook is None"
109 )
110 return obj
112 source_str = self.xbook.source
113 if not source_str:
114 if not self.add_link_to_source:
115 logger.debug(
116 "Skipping source insertion : add_link_to_source is false and xbook.source is None"
117 )
118 return obj
119 source_str = "NUMDAM"
121 add_source_to_container(source_str, obj)
122 return obj
125class addOrUpdateGDMLContainerXmlCmd(addOrUpdateContainerXmlCmd):
126 addOrUpdateIssueXmlCmdOverride = addOrUpdateGDMLIssueXmlCmd
127 addOrUpdateBookXmlCmdOverride = addOrUpdateGDMLBookXmlCmd
130importEntireCollectionXmlCmd.addOrUpdateContainerXmlCmdOverride = addOrUpdateGDMLContainerXmlCmd