Skip to content

Commit

Permalink
translatons of units in tranditional chinese (#321)
Browse files Browse the repository at this point in the history
* prepare test for units

* pass 237/238 tests

* SI Prefixes

* 鮑
  • Loading branch information
hjy1210 authored Dec 13, 2024
1 parent 482b2d8 commit d3e3aaf
Show file tree
Hide file tree
Showing 4 changed files with 655 additions and 126 deletions.
42 changes: 30 additions & 12 deletions Rules/Languages/zh/tw/SharedRules/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,15 @@
# 3. does it match an SI prefix followed by an SI that accepts SI prefixes
# Due to this order, some things like "ft" and "cd" mean "feet" vs "femto-tonnes" and "pints" vs "pico-tonnes"
- name: unit
tag: mi
tag: unit
match: "$Verbosity != 'Terse' and contains(@data-intent-property, ':unit')"
variables:
# we need to look at preceding-sibling::*[2] because invisible times should have been added
# if in a fraction, only count if we are in the numerator
- IsSingular: "(parent::m:mrow and preceding-sibling::*[2][self::m:mn and . = 1]) or
(ancestor::*[2][self::m:mrow] and parent::m:fraction and
(preceding-sibling::* or parent::*[preceding-sibling::*[2][self::m:mn and . = 1]])
)"
- Prefix: "''"
- Word: "''"
replace:
Expand All @@ -361,38 +367,50 @@
then:
- set_variables: [Word: "DefinitionValue(., 'Speech', 'EnglishUnits')"]

# does the first character "da" (the only none-single letter prefix)
- else_if: "string-length(.) >= 3 and substring(., 1, 2) = 'da' and DefinitionValue(substring(., 3), 'Speech', 'SIUnits') != ''"
# do the first two chars match "da" and the remainder match an SIUnit
- else_if: "string-length(.) >= 3 and
substring(., 1, 2) = 'da' and
DefinitionValue(substring(., 3), 'Speech', 'SIUnits') != ''"
then:
- set_variables:
- Prefix: "DefinitionValue('da', 'Speech', 'SIPrefixes')"
- Word: "DefinitionValue(substring(., 3), 'Speech', 'SIUnits')"

# does the first character match a prefix
- else_if: "string-length(.) >= 2 and DefinitionValue(substring(., 1, 1), 'Speech', 'SIPrefixes') != ''"
# does the first char match a prefix and the remainder match an SIUnit
- else_if: "string-length(.) >= 2 and
DefinitionValue(substring(., 1, 1), 'Speech', 'SIPrefixes') != '' and
DefinitionValue(substring(., 2), 'Speech', 'SIUnits') != ''"
then:
- set_variables:
- Prefix: "DefinitionValue(substring(., 1, 1), 'Speech', 'SIPrefixes')"
- Word: "DefinitionValue(substring(., 2), 'Speech', 'SIUnits')"

# not a known unit -- just speak the text
# not a known unit -- just speak the text, possibly as a plural
- else:
x: "text()"
- set_variables:
- Word: "text()"

# somewhat complicated logic to avoid spaces around "-" as in "centi-grams" vs "centi - grams" -- probably doesn't matter
- test:
if: "$Prefix = ''"
then: [x: "$Word"]
then:
- test:
- if: "$IsSingular"
# HACK: '\uF8FE' is used internally for the concatenation char by 'ct' -- this gets the prefix concatinated to the base
then: [x: "$Word"]
- else_if: "DefinitionValue($Word, 'Speech', 'PluralForms') != ''"
then: [x: "DefinitionValue($Word, 'Speech', 'PluralForms')"]
else: [x: "$Word"]
else:
- x: "$Prefix"
- ct: "-"
- test:
- if: "$IsSingular"
# HACK: '\uF8FE' is used internally for the concatination char by 'ct' -- this gets the prefix concatinated to the base
# HACK: '\uF8FE' is used internally for the concatenation char by 'ct' -- this gets the prefix concatinated to the base
then: [x: "concat('\uF8FE', $Word)"]
- else_if: "DefinitionValue($Word, 'Speech', 'PluralForms') != ''"
then: [x: "DefinitionValue(concat('\uF8FE', $Word), 'Speech', 'PluralForms')"]
else: [x: "concat('\uF8FE', $Word)", ct: "s"]

then: [x: "concat('\uF8FE', DefinitionValue($Word, 'Speech', 'PluralForms'))"]
else: [x: "concat('\uF8FE', $Word)"]

- name: sin
tag: mi
Expand Down
244 changes: 130 additions & 114 deletions Rules/Languages/zh/tw/definitions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
# - name: [] "...", "..." "..." ]

- SIPrefixes: {
"Q": "quetta", "R": "ronna", "Y": "yotta", "Z": "zetta", "E": "exa", "P": "peta", "T": "tera", "G": "giga", "M": "mega", "k": "kilo", "h": "hecto", "da": "deka",
"d": "deci", "c": "centi", "m": "milli", "µ": "micro", "n": "nano", "p": "pico", "f": "femto", "a": "atto", "z": "zepto", "y": "yocto", "r": "ronto", "q": "quecto"
"Q": "", "R": "", "Y": "", "Z": "", "E": "", "P": "", "T": "", "G": "", "M": "百萬", "k": "", "h": "", "da": "",
"d": "", "c": "", "m": "", "µ": "", "n": "", "p": "", "f": "", "a": "", "z": "", "y": "", "r": "", "q": ""
}

# this is a list of all units that accept SIPrefixes
Expand All @@ -17,160 +17,176 @@
# The SI prefixes can be used with several of accepted units, but not, for example, with the non-SI units of time.
- SIUnits: {
# base units
"A": "amp",
"cd": "candela",
"K": "kelvin", "K": "kelvin", # U+212A
"g": "gram",
"m": "metre", # British spelling works for US also
"mol": "mole",
"s": "second", "″": "second", "\"": "second", "sec": "second", # "sec" not actually legal
"A": "安培",
"cd": "燭光",
"K": "凱氏", "K": "凱氏", # U+212A
"g": "",
"m": "", # British spelling works for US also
"mol": "莫耳",
"s": "", "″": "", "\"": "", "sec": "", # "sec" not actually legal

# derived units
"Bq": "becquerel",
"C": "coulomb",
"°C": "degree celsius", "℃": "degree celsius",
"F": "farad",
"Gy": "gray",
"H": "henry",
"Hz": "hertz",
"J": "joule",
"Bq": "貝克",
"C": "庫侖",
"°C": "攝氏度", "℃": "攝氏度",
"F": "法拉",
"Gy": "格雷",
"H": "亨利",
"Hz": "赫茲",
"J": "焦耳",
"kat": "kattel",
"lm": "lumen",
"lx": "lux",
"N": "newton",
"Ω": "ohm", "Ω": "ohm", # Greek Cap letter, U+2126 OHM SIGN
"Pa": "pascal",
"rad": "radian",
"S": "siemens",
"Sv": "sievert",
"lm": "流明",
"lx": "勒克斯",
"N": "牛頓",
"Ω": "歐姆", "Ω": "歐姆", # Greek Cap letter, U+2126 OHM SIGN
"Pa": "",
"rad": "",
"S": "西門子",
"Sv": "西弗",
"sr": "sterradion",
"T": "tesla",
"V": "volt",
"W": "watt",
"Wb": "weber",
"T": "特士拉",
"V": "伏特",
"W": "瓦特",
"Wb": "韋伯",

# accepted (plus a few variants) that take SI prefixes
"l": "litre", "L": "litre", "ℓ": "litre", # British spelling works for US also
"t": "metric ton",
"Da": "dalton",
"amu": "atomic mass unit", "u": "atomic mass unit", # 'u' is correct: https://en.wikipedia.org/wiki/Dalton_(unit)
"au": "astronomical unit", "AU": "astronomical unit",
"eV": "electronvolt",
"l": "公升", "L": "公升", "ℓ": "公升", # British spelling works for US also
"t": "公噸",
"Da": "道爾頓",
"Np": "奈培", # nepar # should only take negative powers
"amu": "原子質量單位", "u": "原子質量單位", # 'u' is correct: https://en.wikipedia.org/wiki/Dalton_(unit)
"au": "天文單位", "AU": "天文單位",
"eV": "電子伏特",

# copy from english version by hjy
# others that take a prefix
"a": "annum", # should only take positive powers
"as": "弧秒", # see en.wikipedia.org/wiki/Minute_and_second_of_arc

# Copy from english version by hjy
# technically wrong, but used in practice with SI Units
"b": "位元", # should only take positive powers
"B": "位元組", # should only take positive powers
"Bd": "", # should only take positive powers
}


- UnitsWithoutPrefixes: {
# time
"": "minute", "'": "minute","min": "minute",
"h": "hour", "hr": "hour", "Hr": "hour",
"d": "day", "dy": "day",
"w": "week", "wk": "week",
"y": "year", "yr": "year",
"": "", "'": "","min": "",
"h": "", "hr": "", "Hr": "",
"d": "", "dy": "",
"w": "", "wk": "",
"y": "", "yr": "",

# angles (could be temperature)
"°": "degree", "deg": "degree",
"arcmin": "arcminute",
"amin": "arcminute",
"am": "arcminute",
"MOA": "arcminute",
"arcsec": "arcsecond",
"asec": "arcsecond",
"as": "arcsecond",
"°": "", "deg": "",
"arcmin": "弧分",
"amin": "弧分",
"am": "弧分",
"MOA": "弧分",
"arcsec": "弧秒",
"asec": "弧秒",
"as": "弧秒",

# other accepted units that don't take SI prefixes
"ha": "hectare",
"Np": "neper",
"B": "bel",
"dB": "decibel",
"ha": "公頃",
"Np": "奈培",
"B": "貝爾",
"dB": "分貝",

# distance
"ltyr": "light year", "ly": "light year",
"pc": "parsec",
"Å": "angstrom", "Å": "angstrom", # U+00C5 and U+212B
"fm": "fermi",
"ltyr": "光年", "ly": "光年",
"pc": "秒差距",
"Å": "", "Å": "", # U+00C5 and U+212B
"fm": "費米",

# others
"atm": "atmosphere",
"bar": "bar",
"cal": "calorie",
"Ci": "curie",
"grad": "gradian",
"M": "molar",
"R": "roentgen",
"rpm": "revolution per minute",
"": "mho",
"dyn": "dyne",
"erg": "erg",
"atm": "大氣壓",
"bar": "",
"cal": "",
"Ci": "居里",
"grad": "百分直角",
"M": "體積莫耳濃度",
"R": "倫琴",
"rpm": "轉速每分鐘",
"": "姆歐",
"dyn": "達因",
"erg": "爾格",
# copy from english version by hjy
# powers of 2 used with bits and bytes
"Kib": "kibi-位元", "Mib": "mebi-位元", "Gib": "gibi-位元", "Tib": "tebi-位元", "Pib": "pebi-位元", "Eib": "exbi-位元", "Zib": "zebi-位元", "Yib": "yobi-位元",
"KiB": "kibi-位元組", "MiB": "mebi-位元組", "GiB": "gibi-位元組", "TiB": "tebi-位元組", "PiB": "pebi-位元組", "EiB": "exbi-位元組", "ZiB": "zebi-位元組", "YiB": "yobi-位元組",

}

# this will only be used if the language is English, so it can be empty for other countries
- EnglishUnits: {
# length
"in": "inch",
"ft": "foot",
"mi": "mile",
"rd": "rod",
"in": "英寸",
"ft": "英尺",
"mi": "英里",
"rd": "",
"li": "link",
"ch": "chain",
"ch": "",

# area
"sq in": "square inch", "sq. in": "square inch", "sq. in.": "square inch",
"sq ft": "square foot", "sq. ft": "square foot", "sq. ft.": "square foot",
"sq yd": "square yard", "sq. yd": "square yard", "sq. yd.": "square yard",
"sq mi": "square mile", "sq. mi": "square mile", "sq. mi.": "square mile",
"ac": "acre",
"FBM": "board foot",
"sq in": "平方英寸", "sq. in": "平方英寸", "sq. in.": "平方英寸",
"sq ft": "平方英尺", "sq. ft": "平方英尺", "sq. ft.": "平方英尺",
"sq yd": "平方碼", "sq. yd": "平方碼", "sq. yd.": "平方碼",
"sq mi": "平方英里", "sq. mi": "平方英里", "sq. mi.": "平方英里",
"ac": "英畝",
"FBM": "板英尺",

# volume
"cu in": "cubic inch", "cu. in": "cubic inch", "cu. in.": "cubic inch",
"cu ft": "cubic foot", "cu. ft": "cubic foot", "cu. ft.": "cubic foot",
"cu yd": "cubic yard", "cu. yd": "cubic yard", "cu. yd.": "cubic yard",
"bbl": "barrel", "BBL": "barrel",
"pk": "peck",
"bu": "bushel",
"tsp": "teaspoon",
"tbl": "tablespoon",
"cu in": "立方英寸", "cu. in": "立方英寸", "cu. in.": "立方英寸",
"cu ft": "立方英尺", "cu. ft": "立方英尺", "cu. ft.": "立方英尺",
"cu yd": "立方碼", "cu. yd": "立方碼", "cu. yd.": "立方碼",
"bbl": "", "BBL": "",
"pk": "配克",
"bu": "蒲式耳",
"tsp": "茶匙",
"tbl": "大匙",

# liquid
"fl dr": "fluid drams",
"fl oz": "fluid ounce",
"gi": "gill",
"cp": "cup", "cup": "cup",
"pt": "pint",
"qt": "quart",
"gal": "gallon",
"fl dr": "液量打蘭",
"fl oz": "液量盎司",
"gi": "及耳",
"cp": "", "cup": "",
"pt": "品脫",
"qt": "夸脫",
"gal": "加侖",

# weight
"gr": "grain",
"dr": "dram",
"oz": "ounce", "℥": "ounce",
"lb": "pound",
"cwt": "hundredweight",
"dwt": "pennyweight",
"oz t": "troy ounce",
"lb t": "troy pound",
"gr": "格林",
"dr": "打蘭",
"oz": "盎司", "℥": "盎司",
"lb": "",
"cwt": "英擔",
"dwt": "英錢",
"oz t": "金衡盎司",
"lb t": "金衡磅",

# energy
"hp": "horsepower",
"BTU": "BTU",
"°F": "degree fahrenheit", "℉": "degree fahrenheit",
"hp": "馬力",
"BTU": "英熱單位",
"°F": "華氏度", "℉": "華氏度",

# other
"mph": "mile per hour",
"mpg": "mile per gallon",
"mph": "英里每小時",
"mpg": "英里每加侖",
}

- PluralForms: {
# FIX: this needs to be flushed out
"inch": "inches", "square inch": "square inches", "cubic inch": "cubic inches",
"foot": "feet", "square foot": "square feet", "cubic foot": "cubic feet",
"board foot": "board feet",
"degree celsius": "degrees celsius",
"degree fahrenheit": "degrees fahrenheit",
"hertz": "hertz",
"siemens": "siemens",
"revolution per minute": "revolutions per minute",
"inch": "英寸", "square inch": "平方英寸", "cubic inch": "立方英寸",
"foot": "英尺", "square foot": "平方英尺", "cubic foot": "立方英尺",
"board foot": "板英尺",
"degree celsius": "攝氏度",
"degree fahrenheit": "華氏度",
"hertz": "赫茲",
"siemens": "西門子",
"revolution per minute": "轉速每分鐘",
}


Expand Down
1 change: 1 addition & 0 deletions tests/Languages/zh/tw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ mod chemistry;
mod alphabets;
mod intent;
mod mtable;
mod units;

Loading

0 comments on commit d3e3aaf

Please sign in to comment.