Skip to content

Commit

Permalink
Merge pull request #69 from robertatakenaka/scielo_migration_xml
Browse files Browse the repository at this point in the history
Scielo migration xml
  • Loading branch information
robertatakenaka authored Oct 28, 2024
2 parents 85989e2 + 4d91724 commit a0c3c99
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 11 deletions.
23 changes: 12 additions & 11 deletions scielo_classic_website/spsxml/sps_xml_refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ def transform(self, data):
ref.insert(0, mixed_citation)
refs.append(ref)
except Exception as e:
logging.info(f"citation number: {i}")
logging.exception(e)
raise e
refs.append(ET.Comment(str({"data": citation, "error": str(e), "error_type": str(type(e))})))
back.replace(reflist, refs)
return data

Expand Down Expand Up @@ -170,7 +168,7 @@ def transform(self, data):
raw, xml = data
articletitle = ET.Element("article-title")

articletitle.text = raw.article_title
utils.handle_bad_text(articletitle, raw.article_title)

xml.find("./element-citation").append(articletitle)

Expand All @@ -189,7 +187,7 @@ def transform(self, data):

articletitle = ET.Element("chapter-title")

articletitle.text = raw.chapter_title
utils.handle_bad_text(articletitle, raw.chapter_title)

xml.find("./element-citation").append(articletitle)

Expand Down Expand Up @@ -231,7 +229,7 @@ def transform(self, data):

source = ET.Element("source")

source.text = raw.source
utils.handle_bad_text(source, raw.source)

xml.find("./element-citation").append(source)

Expand Down Expand Up @@ -589,15 +587,15 @@ def build_name(self, author):
name = ET.Element("name")
if author.get("surname"):
elem = ET.Element("surname")
elem.text = author.get("surname")
utils.handle_bad_text(elem, author.get("surname"))
name.append(elem)
if author.get("given_names"):
elem = ET.Element("given-names")
elem.text = author.get("given_names")
utils.handle_bad_text(elem, author.get("given_names"))
name.append(elem)
if author.get("anonymous"):
elem = ET.Element("anonymous")
elem.text = author.get("given_names")
utils.handle_bad_text(elem, author.get("given_names"))
name.append(elem)
if name.getchildren():
return name
Expand All @@ -617,11 +615,14 @@ def build_person_authors(self, authors):
return group

def build_collab(self, author):
text = [author.get("name"), author.get("division")]
text = [
utils.handle_bad_characters(author.get("name")),
utils.handle_bad_characters(author.get("division")),
]
text = ", ".join([item for item in text if item])
if text:
elem = ET.Element("collab")
elem.text = text
utils.handle_bad_text(elem, text)
return elem

def build_institutional_authors(self, authors):
Expand Down
23 changes: 23 additions & 0 deletions scielo_classic_website/spsxml/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from lxml import etree


def convert_ahref_to_extlink(xml_etree):
"""
This methods receives an etree node and replace all "a href" elements to
Expand Down Expand Up @@ -40,3 +43,23 @@ def convert_all_html_tags_to_jats(xml_etree):
xml_etree = convert_html_tags_to_jats(xml_etree)

return xml_etree


def handle_bad_text(node, text):
try:
node.text = text
except Exception as e:
node.append(etree.Comment(str(e)))
node.text = handle_bad_characters(text)


def handle_bad_characters(text):
chars = []
temporary = etree.Element("temporary")
for c in text:
try:
temporary.text = c
chars.append(c)
except Exception as exc:
chars.append('?')
return ''.join(chars)

0 comments on commit a0c3c99

Please sign in to comment.