-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
271 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.github/* | ||
scripts/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,20 +3,51 @@ on: | |
branches: [ 'main' ] | ||
pull_request: | ||
branches: [ 'main' ] | ||
types: [opened, synchronize, reopened, ready_for_review] | ||
|
||
jobs: | ||
validate: | ||
name: Validate XML files with XMLStarlet | ||
build: | ||
name: Build and validate token sheets | ||
runs-on: ubuntu-latest | ||
|
||
if: github.event.pull_request.draft == false | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- name: Validate 73 tokens | ||
uses: Mudlet/[email protected] | ||
- name: Checkout sheets | ||
uses: actions/checkout@v4 | ||
|
||
- name: Run build script | ||
run: | | ||
mkdir built | ||
python -m scripts.build | ||
- name: Upload artifact | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
args: 'val -b 73.xml' | ||
|
||
- name: Validate 8X tokens | ||
uses: Mudlet/[email protected] | ||
name: built | ||
path: built/ | ||
|
||
commit: | ||
name: Push sheets to built branch | ||
runs-on: ubuntu-latest | ||
|
||
permissions: write-all | ||
needs: build | ||
|
||
if: github.ref == 'refs/heads/main' && github.event_name == 'push' | ||
steps: | ||
- name: Download artifact | ||
uses: actions/download-artifact@v4 | ||
with: | ||
args: 'val -b 8X.xml' | ||
name: built | ||
path: built/ | ||
|
||
- name: Save to built branch | ||
uses: s0/git-publish-subdir-action@develop | ||
env: | ||
REPO: self | ||
BRANCH: built | ||
FOLDER: built | ||
SKIP_EMPTY_COMMITS: true | ||
CLEAR_GLOBS_FILE: ".github/.clear-in-built" | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
MESSAGE: "Build {sha}: {msg}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
from .formats import to_json, validate | ||
from .parse import Token, Tokens, OsVersion, OsVersions, Translation | ||
from .tokenide import TokenIDESheet | ||
from .trie import TokenTrie | ||
|
||
__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation", "TokenIDESheet", "TokenTrie"] | ||
__all__ = ["Token", "Tokens", "OsVersion", "OsVersions", "Translation", | ||
"TokenTrie", "TokenIDESheet", "to_json", "validate"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import json | ||
import xml.etree.ElementTree as ET | ||
|
||
from .formats import * | ||
|
||
|
||
with open("8X.xml", encoding="UTF-8") as infile: | ||
root = ET.fromstring(src := infile.read()) | ||
|
||
with open("built/8X.xml", "w+", encoding="UTF-8") as outfile: | ||
validate(root) | ||
outfile.write(src) | ||
|
||
with open("built/8X.json", "w+", encoding="UTF-8") as outfile: | ||
json.dump(to_json(root), outfile, indent=2, ensure_ascii=False) | ||
|
||
|
||
with open("73.xml", encoding="UTF-8") as infile: | ||
root = ET.fromstring(src := infile.read()) | ||
|
||
with open("built/73.xml", "w+", encoding="UTF-8") as outfile: | ||
validate(root, for_73=True) | ||
outfile.write(src) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
import json | ||
import re | ||
import xml.etree.ElementTree as ET | ||
|
||
from collections import defaultdict | ||
|
||
from .parse import OsVersion, OsVersions | ||
|
||
|
||
def validate(root: ET.Element, *, for_73: bool = False) -> int: | ||
""" | ||
Validates a token sheet, raising an error if an invalid component is found | ||
:param root: An XML element, which must be the root element of the sheet | ||
:param for_73: Whether to use the 73 sheet validator (defaults to False) | ||
:return: The number of tokens in the sheet | ||
""" | ||
|
||
if root.tag != "tokens": | ||
raise ValueError("not a token sheet") | ||
|
||
all_tokens = set() | ||
all_names = {} | ||
|
||
version = None | ||
|
||
def visit(element: ET.Element, byte: str = "", lang: str = ""): | ||
nonlocal version | ||
|
||
byte += element.attrib.get("value", "").lstrip("$") | ||
lang += element.attrib.get("code", "") | ||
|
||
class ValidationError(ValueError): | ||
__qualname__ = "ValidationError" | ||
|
||
def __init__(self, message: str): | ||
super().__init__((f"token 0x{byte}: " if byte else "root: ") + message) | ||
|
||
# Require attributes matching regexes | ||
def attributes(attrs: dict[str, str]): | ||
attrib = element.attrib.copy() | ||
|
||
for attr, regex in attrs.items(): | ||
if attr not in attrib: | ||
raise ValidationError(f"<{element.tag}> does not have attribute {attr}") | ||
|
||
if not re.fullmatch(regex, value := attrib.pop(attr)): | ||
raise ValidationError(f"<{element.tag}> {attr} '{value}' does not match r'{regex}'") | ||
|
||
if attrib: | ||
raise ValidationError(f"<{element.tag}> has unexpected attribute {[*attrib.values()][0]}") | ||
|
||
# Require child tags to match regex when appended in order | ||
def children(regex: str): | ||
if not re.fullmatch(regex, "".join(f"<{child.tag}>" for child in element)): | ||
raise ValidationError(f"children of <{element.tag}> do not match r'{regex}'") | ||
|
||
# Require text to match regex | ||
def text(regex: str): | ||
if not re.fullmatch(regex, element.text): | ||
raise ValidationError(f"<{element.tag}> text '{element.text}' does not match r'{regex}'") | ||
|
||
# Check requirements for each tag | ||
match element.tag: | ||
case "tokens": | ||
children(r"(<token>|<two-byte>)+") | ||
|
||
case "two-byte": | ||
attributes({"value": r"\$[0-9A-F]{2}"}) | ||
children(r"(<token>)+") | ||
|
||
case "token": | ||
attributes({"value": r"\$[0-9A-F]{2}"}) | ||
children(r"(<version>)+") | ||
|
||
if byte in all_tokens: | ||
raise ValidationError("token byte must be unique") | ||
|
||
all_tokens.add(byte) | ||
|
||
case "version": | ||
version = OsVersions.INITIAL | ||
children(r"<since>(<until>)?(<lang>)+") | ||
|
||
case "since": | ||
if not for_73: | ||
if (this_version := OsVersion.from_element(element)) < version: | ||
raise ValidationError(f"version {this_version} overlaps with {version}") | ||
|
||
version = this_version | ||
|
||
# Workaround for nested defaultdict | ||
all_names[version] = all_names.get(version, defaultdict(set)) | ||
|
||
children(r"<model><os-version>") | ||
|
||
case "until": | ||
children(r"<model><os-version>") | ||
|
||
case "lang": | ||
attributes({"code": r"[a-z]{2}"} if for_73 else {"code": r"[a-z]{2}", "ti-ascii": r"([0-9A-F]{2})+"}) | ||
children(r"<name>" if for_73 else r"<display><accessible>(<variant>)*") | ||
|
||
case "name" if for_73: | ||
text(r"[\S\s]+") | ||
|
||
case "display": | ||
text(r"[\S\s]+") | ||
|
||
case "accessible": | ||
text(r"[\u0000-\u00FF]*") | ||
|
||
if element.text in all_names[version][lang]: | ||
raise ValidationError(f"{lang} accessible name '{element.text}' is not unique within {version}") | ||
|
||
all_names[version][lang].add(element.text) | ||
|
||
case "variant": | ||
text(r".+") | ||
|
||
if element.text in all_names[version][lang]: | ||
raise ValidationError(f"{lang} variant name '{element.text}' is not unique within {version}") | ||
|
||
all_names[version][lang].add(element.text) | ||
|
||
case "model": | ||
text(r"TI-\d\d.*") | ||
|
||
case "os-version": | ||
text(r"(\d+\.)+\d+") | ||
|
||
case _: | ||
raise ValidationError(f"unrecognized tag <{element.tag}>") | ||
|
||
# Visit children | ||
for child in element: | ||
visit(child, byte, lang) | ||
|
||
visit(root) | ||
return len(all_tokens) | ||
|
||
|
||
def to_json(element: ET.Element): | ||
""" | ||
Converts a token sheet to an equivalent JSON representation | ||
:param element: An XML element; call on the root element to convert the entire sheet | ||
:return: The element and all its descendants as JSON | ||
""" | ||
|
||
match element.tag: | ||
case "tokens" | "two-byte": | ||
return {child.attrib["value"]: to_json(child) for child in element} | ||
|
||
case "token": | ||
return [to_json(child) for child in element] | ||
|
||
case "version": | ||
dct = {} | ||
langs = {} | ||
|
||
for child in element: | ||
if child.tag == "lang": | ||
langs[child.attrib["code"]] = to_json(child) | ||
|
||
else: | ||
dct[child.tag] = to_json(child) | ||
|
||
return dct | {"langs": langs} | ||
|
||
case "lang": | ||
dct = {"ti-ascii": element.attrib["ti-ascii"]} | ||
variants = [] | ||
|
||
for child in element: | ||
if child.tag == "variant": | ||
variants.append(child.text) | ||
|
||
else: | ||
dct[child.tag] = child.text | ||
|
||
if variants: | ||
return dct | {"variants": variants} | ||
|
||
else: | ||
return dct | ||
|
||
case _: | ||
if list(element): | ||
return {child.tag: to_json(child) for child in element} | ||
|
||
else: | ||
return element.text | ||
|
||
|
||
# with open("../8X.xml", encoding="UTF-8") as file: | ||
# json.dumps(to_json(ET.fromstring(file.read())), indent=2) | ||
|
||
|
||
__all__ = ["to_json", "validate"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters