Skip to content

Commit

Permalink
Add JSON converter (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
kg583 authored Jan 12, 2024
1 parent a8f5d28 commit 9e81db1
Show file tree
Hide file tree
Showing 6 changed files with 271 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/.clear-in-built
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.github/*
scripts/*
53 changes: 42 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,51 @@ on:
branches: [ 'main' ]
pull_request:
branches: [ 'main' ]
types: [opened, synchronize, reopened, ready_for_review]

jobs:
validate:
name: Validate XML files with XMLStarlet
build:
name: Build and validate token sheets
runs-on: ubuntu-latest

if: github.event.pull_request.draft == false
steps:
- uses: actions/checkout@v3

- name: Validate 73 tokens
uses: Mudlet/[email protected]
- name: Checkout sheets
uses: actions/checkout@v4

- name: Run build script
run: |
mkdir built
python -m scripts.build
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
args: 'val -b 73.xml'

- name: Validate 8X tokens
uses: Mudlet/[email protected]
name: built
path: built/

commit:
name: Push sheets to built branch
runs-on: ubuntu-latest

permissions: write-all
needs: build

if: github.ref == 'refs/heads/main' && github.event_name == 'push'
steps:
- name: Download artifact
uses: actions/download-artifact@v4
with:
args: 'val -b 8X.xml'
name: built
path: built/

- name: Save to built branch
uses: s0/git-publish-subdir-action@develop
env:
REPO: self
BRANCH: built
FOLDER: built
SKIP_EMPTY_COMMITS: true
CLEAR_GLOBS_FILE: ".github/.clear-in-built"
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
MESSAGE: "Build {sha}: {msg}"
4 changes: 3 additions & 1 deletion scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .formats import to_json, validate
from .parse import Token, Tokens, OsVersion, OsVersions, Translation
from .tokenide import TokenIDESheet
from .trie import TokenTrie

__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation", "TokenIDESheet", "TokenTrie"]
__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation",
"TokenTrie", "TokenIDESheet", "to_json", "validate"]
23 changes: 23 additions & 0 deletions scripts/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json
import xml.etree.ElementTree as ET

from .formats import *


with open("8X.xml", encoding="UTF-8") as infile:
root = ET.fromstring(src := infile.read())

with open("built/8X.xml", "w+", encoding="UTF-8") as outfile:
validate(root)
outfile.write(src)

with open("built/8X.json", "w+", encoding="UTF-8") as outfile:
json.dump(to_json(root), outfile, indent=2, ensure_ascii=False)


with open("73.xml", encoding="UTF-8") as infile:
root = ET.fromstring(src := infile.read())

with open("built/73.xml", "w+", encoding="UTF-8") as outfile:
validate(root, for_73=True)
outfile.write(src)
200 changes: 200 additions & 0 deletions scripts/formats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import json
import re
import xml.etree.ElementTree as ET

from collections import defaultdict

from .parse import OsVersion, OsVersions


def validate(root: ET.Element, *, for_73: bool = False) -> int:
"""
Validates a token sheet, raising an error if an invalid component is found
:param root: An XML element, which must be the root element of the sheet
:param for_73: Whether to use the 73 sheet validator (defaults to False)
:return: The number of tokens in the sheet
"""

if root.tag != "tokens":
raise ValueError("not a token sheet")

all_tokens = set()
all_names = {}

version = None

def visit(element: ET.Element, byte: str = "", lang: str = ""):
nonlocal version

byte += element.attrib.get("value", "").lstrip("$")
lang += element.attrib.get("code", "")

class ValidationError(ValueError):
__qualname__ = "ValidationError"

def __init__(self, message: str):
super().__init__((f"token 0x{byte}: " if byte else "root: ") + message)

# Require attributes matching regexes
def attributes(attrs: dict[str, str]):
attrib = element.attrib.copy()

for attr, regex in attrs.items():
if attr not in attrib:
raise ValidationError(f"<{element.tag}> does not have attribute {attr}")

if not re.fullmatch(regex, value := attrib.pop(attr)):
raise ValidationError(f"<{element.tag}> {attr} '{value}' does not match r'{regex}'")

if attrib:
raise ValidationError(f"<{element.tag}> has unexpected attribute {[*attrib.values()][0]}")

# Require child tags to match regex when appended in order
def children(regex: str):
if not re.fullmatch(regex, "".join(f"<{child.tag}>" for child in element)):
raise ValidationError(f"children of <{element.tag}> do not match r'{regex}'")

# Require text to match regex
def text(regex: str):
if not re.fullmatch(regex, element.text):
raise ValidationError(f"<{element.tag}> text '{element.text}' does not match r'{regex}'")

# Check requirements for each tag
match element.tag:
case "tokens":
children(r"(<token>|<two-byte>)+")

case "two-byte":
attributes({"value": r"\$[0-9A-F]{2}"})
children(r"(<token>)+")

case "token":
attributes({"value": r"\$[0-9A-F]{2}"})
children(r"(<version>)+")

if byte in all_tokens:
raise ValidationError("token byte must be unique")

all_tokens.add(byte)

case "version":
version = OsVersions.INITIAL
children(r"<since>(<until>)?(<lang>)+")

case "since":
if not for_73:
if (this_version := OsVersion.from_element(element)) < version:
raise ValidationError(f"version {this_version} overlaps with {version}")

version = this_version

# Workaround for nested defaultdict
all_names[version] = all_names.get(version, defaultdict(set))

children(r"<model><os-version>")

case "until":
children(r"<model><os-version>")

case "lang":
attributes({"code": r"[a-z]{2}"} if for_73 else {"code": r"[a-z]{2}", "ti-ascii": r"([0-9A-F]{2})+"})
children(r"<name>" if for_73 else r"<display><accessible>(<variant>)*")

case "name" if for_73:
text(r"[\S\s]+")

case "display":
text(r"[\S\s]+")

case "accessible":
text(r"[\u0000-\u00FF]*")

if element.text in all_names[version][lang]:
raise ValidationError(f"{lang} accessible name '{element.text}' is not unique within {version}")

all_names[version][lang].add(element.text)

case "variant":
text(r".+")

if element.text in all_names[version][lang]:
raise ValidationError(f"{lang} variant name '{element.text}' is not unique within {version}")

all_names[version][lang].add(element.text)

case "model":
text(r"TI-\d\d.*")

case "os-version":
text(r"(\d+\.)+\d+")

case _:
raise ValidationError(f"unrecognized tag <{element.tag}>")

# Visit children
for child in element:
visit(child, byte, lang)

visit(root)
return len(all_tokens)


def to_json(element: ET.Element):
"""
Converts a token sheet to an equivalent JSON representation
:param element: An XML element; call on the root element to convert the entire sheet
:return: The element and all its descendants as JSON
"""

match element.tag:
case "tokens" | "two-byte":
return {child.attrib["value"]: to_json(child) for child in element}

case "token":
return [to_json(child) for child in element]

case "version":
dct = {}
langs = {}

for child in element:
if child.tag == "lang":
langs[child.attrib["code"]] = to_json(child)

else:
dct[child.tag] = to_json(child)

return dct | {"langs": langs}

case "lang":
dct = {"ti-ascii": element.attrib["ti-ascii"]}
variants = []

for child in element:
if child.tag == "variant":
variants.append(child.text)

else:
dct[child.tag] = child.text

if variants:
return dct | {"variants": variants}

else:
return dct

case _:
if list(element):
return {child.tag: to_json(child) for child in element}

else:
return element.text


# with open("../8X.xml", encoding="UTF-8") as file:
# json.dumps(to_json(ET.fromstring(file.read())), indent=2)


__all__ = ["to_json", "validate"]
2 changes: 1 addition & 1 deletion scripts/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@


@functools.total_ordering
@dataclass
@dataclass(frozen=True)
class OsVersion:
"""
Data class for defining and comparing OS versions
Expand Down

0 comments on commit 9e81db1

Please sign in to comment.