Coverage for src/crawler/cmds/mixed_citation.py: 33%
53 statements
« prev ^ index » next coverage.py v7.9.0, created at 2025-08-29 13:43 +0000
« prev ^ index » next coverage.py v7.9.0, created at 2025-08-29 13:43 +0000
1"""
2Helpers to create an xml string for mixed citations
4```py
5citation_builder = MixedCitation()
7citation_builder.label = "[1]"
9persongroup = GenericXMLTemplate()
10persongroup.name = "person-group"
11persongroup.elements.append("Michel B.")
12citation_builder.elements.append(persongroup)
14citation_builder.elements.append("untagged string to put between")
16year_builder = Year()
17year.elements.append("2020")
18citation_builder.elements.append(year)
20mixedcitation.get_jats_ref()
21```
22"""
24from lxml import etree
25from lxml.builder import E
26from lxml.etree import Element
27from ptf.cmds.xml.jats.jats_parser import JatsBase
28from ptf.cmds.xml.xml_utils import escape
29from ptf.display.resolver import extids_formats, reverse_extids_hrefs
30from ptf.model_data import RefData
33class GenericRefElement:
34 name: str
35 elements: list["str | GenericRefElement"]
36 attributes: dict[str, str]
38 def __init__(self) -> None:
39 super().__init__()
40 self.elements = []
41 self.attributes = {}
43 def get_xml_element(self):
44 xml_elements: list[str | Element] = []
46 for item in self.elements:
47 if isinstance(item, str):
48 xml_elements.append(item)
49 else:
50 xml_elements.append(item.get_xml_element())
52 return E(self.name, *xml_elements, **self.attributes)
54 def get_xml_string(self):
55 """
56 Make sure the hierarchy of the objects (self.elements) is valid JATS
57 before calling this function
58 """
59 return etree.tostring(self.get_xml_element(), pretty_print=True).decode()
62def find_extlink(url: str):
63 for pattern in reverse_extids_hrefs:
64 if url.startswith(pattern):
65 type = reverse_extids_hrefs[pattern]
66 content = url.removeprefix(pattern)
67 return type, content
68 return None, None
71class ExtLinkXml(GenericRefElement):
72 name = "ext-link"
74 def __init__(self, url: str, content: str | None = None, type: str = "uri") -> None:
75 super().__init__()
77 if not content:
78 content = url
80 if type not in extids_formats:
81 type_found, content_found = find_extlink(url)
82 type = type_found or type
83 content = content_found or content
85 self.attributes["ext-link-type"] = type
86 self.attributes["href"] = url
88 self.elements.append(escape(content))
91class MixedCitation(GenericRefElement):
92 name = "mixed-citation"
93 label: str | None = None
95 def get_jats_ref(self):
96 xref = RefData(lang="en")
97 xref.citation_xml = self.get_xml_string()
98 if self.label:
99 xref.citation_xml = f"<label>{self.label}</label>" + xref.citation_xml
100 return JatsBase.bake_ref(xref)