Coverage for src / crawler / cmds / mixed_citation.py: 93%
53 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-06-19 13:33 +0000
1"""
2Helpers to create an xml string for mixed citations
5"""
7from lxml import etree
8from lxml.builder import E
9from lxml.etree import Element
10from ptf.cmds.xml.jats.jats_parser import JatsBase
11from ptf.cmds.xml.xml_utils import escape
12from ptf.display.resolver import extids_formats, reverse_extids_hrefs
13from ptf.model_data import RefData
16class GenericRefElement:
17 name: str
18 elements: list["str | GenericRefElement"]
19 attributes: dict[str, str]
21 def __init__(self) -> None:
22 super().__init__()
23 self.elements = []
24 self.attributes = {}
26 def get_xml_element(self):
27 xml_elements: list[str | Element] = []
29 for item in self.elements:
30 if isinstance(item, str):
31 xml_elements.append(item)
32 else:
33 xml_elements.append(item.get_xml_element())
35 return E(self.name, *xml_elements, **self.attributes)
37 def get_xml_string(self):
38 """
39 Make sure the hierarchy of the objects (self.elements) is valid JATS
40 before calling this function
41 """
42 return etree.tostring(self.get_xml_element(), pretty_print=True).decode()
45def find_extlink(url: str):
46 for pattern in reverse_extids_hrefs:
47 if url.startswith(pattern):
48 type = reverse_extids_hrefs[pattern]
49 content = url.removeprefix(pattern)
50 return type, content
51 return None, None
54class ExtLinkXml(GenericRefElement):
55 """How to use :
56 ```py
57 url = "https://doi.org/10.1017/S0143385700002364"
58 extlink = ExtLinkXml(url)
59 citation_builder.elements.append(url)
60 ```
61 """
63 name = "ext-link"
65 def __init__(self, url: str, content: str | None = None, type: str = "uri") -> None:
66 super().__init__()
68 if not content: 68 ↛ 69line 68 didn't jump to line 69 because the condition on line 68 was never true
69 content = url
71 if type not in extids_formats: 71 ↛ 76line 71 didn't jump to line 76 because the condition on line 71 was always true
72 type_found, content_found = find_extlink(url)
73 type = type_found or type
74 content = content_found or content
76 self.attributes["ext-link-type"] = type
77 self.attributes["href"] = url
79 self.elements.append(escape(content))
82class MixedCitation(GenericRefElement):
83 """
84 ```py
85 citation_builder = MixedCitation()
87 citation_builder.label = "[1]"
89 persongroup = GenericXMLTemplate()
90 persongroup.name = "person-group"
91 persongroup.elements.append("Michel B.")
92 citation_builder.elements.append(persongroup)
94 citation_builder.elements.append("untagged string to put between")
96 year_builder = Year()
97 year.elements.append("2020")
98 citation_builder.elements.append(year)
100 mixedcitation.get_jats_ref()
101 ```
102 """
104 name = "mixed-citation"
105 label: str | None = None
107 def get_jats_ref(self):
108 xref = RefData(lang="en")
109 xref.citation_xml = self.get_xml_string()
110 if self.label: 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true
111 xref.citation_xml = f"<label>{self.label}</label>" + xref.citation_xml
112 return JatsBase.bake_ref(xref)