transvi fixes: 1. qu-/gi- onset tone placement. The u after q, and the i after g when a vowel follows, are onset glides rather than the rime nucleus, so the tone must skip them: qua -> quá (was qúa), gia -> giá. The onset was previously passed straight through to the app, so transvi never saw it and toned the glide. Keep the onset in the preedit by adding qu-/gi- clusters to telex.map (mktelex.py onsets(), appended additively to the curated map), and add onsetglide() so transvi skips the glide. gi- with no following vowel keeps i as the nucleus (gì, gìn). 2. A tone key on a vowel-less preedit (e.g. "đ" from dd) now commits the preedit and lets the tone key pass through (eat=0), matching the engine commit-on-passthrough invariant, instead of eating it into the commit. Verified against the running engine: qua/quan/quay/quê/quên/quyển, gia/già/giàu/giữ/giúp/giống, gì/gìn, dd+s; unchanged mua->mùa, của, lúa; all non-qu/gi words byte-identical to before. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
182 lines
4.8 KiB
Python
Executable File
182 lines
4.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# tone marks: s=rising f=falling r=hook x=tilde j=dot
|
|
tone = {
|
|
's': str.maketrans("aăâeêioôơuưy", "áắấéếíóốớúứý"),
|
|
'f': str.maketrans("aăâeêioôơuưy", "àằầèềìòồờùừỳ"),
|
|
'r': str.maketrans("aăâeêioôơuưy", "ảẳẩẻểỉỏổởủửỷ"),
|
|
'x': str.maketrans("aăâeêioôơuưy", "ãẵẫẽễĩõỗỡũữỹ"),
|
|
'j': str.maketrans("aăâeêioôơuưy", "ạặậẹệịọộợụựỵ"),
|
|
}
|
|
|
|
# modified vowels: input -> output
|
|
modvowel = [
|
|
("aw", "ă"),
|
|
("aa", "â"),
|
|
("ee", "ê"),
|
|
("oo", "ô"),
|
|
("ow", "ơ"),
|
|
("uw", "ư"),
|
|
]
|
|
|
|
# modified cons: input -> output
|
|
modcons = [
|
|
("dd", "đ"),
|
|
]
|
|
|
|
upper = str.maketrans(
|
|
"aăâeêioôơuưyđáắấéếíóốớúứýàằầèềìòồờùừỳảẳẩẻểỉỏổởủửỷãẵẫẽễĩõỗỡũữỹạặậẹệịọộợụựỵ",
|
|
"AĂÂEÊIOÔƠUƯYĐÁẮẤÉẾÍÓỐỚÚỨÝÀẰẦÈỀÌÒỒỜÙỪỲẢẲẨẺỂỈỎỔỞỦỬỶÃẴẪẼỄĨÕỖỠŨỮỸẠẶẬẸỆỊỌỘỢỤỰỴ")
|
|
|
|
def addtone(v, t):
|
|
return v.translate(tone[t])
|
|
|
|
entries = []
|
|
|
|
def emit(input, output):
|
|
entries.append((input, output))
|
|
print(f"{input}\t{output}")
|
|
def up(s):
|
|
c = s[0].translate(upper)
|
|
if c == s[0]:
|
|
c = s[0].upper()
|
|
return c + s[1:]
|
|
print(f"{up(input)}\t{up(output)}")
|
|
|
|
def vowel1():
|
|
for v in "aeiouy":
|
|
emit(v, v)
|
|
for t in tone:
|
|
emit(v+t, addtone(v, t))
|
|
|
|
def vowel2():
|
|
# input, output, vowel
|
|
tab = [
|
|
("oa", "oa", "a"), ("oe", "oe", "e"), ("ai", "ai", "a"),
|
|
("ao", "ao", "a"), ("au", "au", "a"), ("ay", "ay", "a"),
|
|
("eu", "eu", "e"), ("iu", "iu", "i"), ("oi", "oi", "o"),
|
|
("ui", "ui", "u"), ("uy", "uy", "y"),
|
|
("iee", "iê", "ê"), ("yee", "yê", "ê"), ("uoo", "uô", "ô"),
|
|
("uow", "ươ", "ơ"), ("uaa", "uâ", "â"), ("oaw", "oă", "ă"),
|
|
("uwa", "ưa", "a"), ("uwow", "ươ", "ơ"),
|
|
]
|
|
for i, o, v in tab:
|
|
emit(i, o)
|
|
for t in tone:
|
|
emit(i+t, o.replace(v, addtone(v, t), 1))
|
|
emit("ie", "ie")
|
|
emit("ye", "ye")
|
|
emit("uo", "uo")
|
|
emit("ua", "ua")
|
|
|
|
def vowel3():
|
|
# input, output, vowel
|
|
tab = [
|
|
("ieeu", "iêu", "ê"), ("yeeu", "yêu", "ê"),
|
|
("uooi", "uôi", "ô"), ("uowi", "ươi", "ơ"),
|
|
("oai", "oai", "a"), ("oay", "oay", "a"),
|
|
("uyee", "uyê", "ê"),
|
|
]
|
|
for i, o, v in tab:
|
|
emit(i, o)
|
|
for t in tone:
|
|
emit(i+t, o.replace(v, addtone(v, t), 1))
|
|
emit("uya", "uya")
|
|
|
|
def modvowels():
|
|
for i, o in modvowel:
|
|
emit(i, o)
|
|
|
|
def modconss():
|
|
for i, o in modcons:
|
|
emit(i, o)
|
|
|
|
def mod1tone():
|
|
# aw+s -> ắ
|
|
for i, o in modvowel:
|
|
for t in tone:
|
|
emit(i+t, addtone(o, t))
|
|
|
|
modvowel = [
|
|
("aw", "ă"),
|
|
("aa", "â"),
|
|
("ee", "ê"),
|
|
("oo", "ô"),
|
|
("ow", "ơ"),
|
|
("uw", "ư"),
|
|
]
|
|
def tone1mod():
|
|
# a+s+w -> ắ
|
|
for i, o in modvowel:
|
|
for t in tone:
|
|
emit(i[0]+t+i[1], addtone(o, t))
|
|
|
|
def tone2mod():
|
|
# ie+s+e -> iế
|
|
# input, output, vowel, suffix
|
|
tab = [
|
|
("ie", "iê", "ê", "e"), ("ye", "yê", "ê", "e"),
|
|
("uo", "uô", "ô", "o"), ("uo", "ươ", "ơ", "w"),
|
|
("ua", "uâ", "â", "a"), ("oa", "oă", "ă", "w"),
|
|
("uwo", "ươ", "ơ", "w"),
|
|
]
|
|
for i, o, v, s in tab:
|
|
for t in tone:
|
|
emit(i+t+s, o.replace(v, addtone(v, t), 1))
|
|
|
|
def escape():
|
|
# aww -> aw
|
|
for e in ["aw", "aa", "ee", "oo", "ow", "uw", "dd"]:
|
|
emit(e+e[-1], e)
|
|
|
|
def final():
|
|
# codas
|
|
coda = ["c", "m", "n", "p", "t", "ch", "ng", "nh"]
|
|
# input, output, vowel
|
|
tab = [
|
|
("a", "a", "a"), ("e", "e", "e"), ("i", "i", "i"),
|
|
("o", "o", "o"), ("u", "u", "u"), ("y", "y", "y"),
|
|
("aw", "ă", "ă"), ("aa", "â", "â"), ("ee", "ê", "ê"),
|
|
("oo", "ô", "ô"), ("ow", "ơ", "ơ"), ("uw", "ư", "ư"),
|
|
("iee", "iê", "ê"), ("yee", "yê", "ê"), ("uoo", "uô", "ô"),
|
|
("uow", "ươ", "ơ"), ("uaa", "uâ", "â"), ("oaw", "oă", "ă"),
|
|
("ai", "ai", "a"), ("ao", "ao", "a"), ("au", "au", "a"),
|
|
("ay", "ay", "a"), ("oa", "oa", "a"), ("oi", "oi", "o"),
|
|
("ui", "ui", "u"), ("uy", "uy", "y"),
|
|
]
|
|
for i, o, v in tab:
|
|
for c in coda:
|
|
emit(i+c, o+c)
|
|
for t in tone:
|
|
emit(i+c+t, o.replace(v, addtone(v, t), 1)+c)
|
|
|
|
def onsets():
|
|
# Keep the qu-/gi- onset in the preedit so the tone lands on the rime
|
|
# nucleus, not on the onset glide (qua->quá not qúa, gia->giá not gía).
|
|
# transvi (onsetglide) knows to skip the glide; here we only need the
|
|
# composed clusters to exist so the preedit accumulates them.
|
|
vowels = set("aeiouy")
|
|
tones = set("sfrxj")
|
|
base = [(i, o) for (i, o) in list(entries)
|
|
if i and i[0] in vowels and not (set(i) & tones)]
|
|
for i, o in base:
|
|
if i[0] != 'u': # no qu+u syllable; u is the glide
|
|
emit("qu" + i, "qu" + o)
|
|
emit("gi" + i, "gi" + o)
|
|
# gi- with the i as nucleus (no following vowel): gì, gìn, gìm, ...
|
|
for c in ["", "c", "m", "n", "p", "t", "ch", "ng", "nh"]:
|
|
if c:
|
|
emit("gi" + c, "gi" + c)
|
|
|
|
vowel1()
|
|
vowel2()
|
|
vowel3()
|
|
modvowels()
|
|
modconss()
|
|
mod1tone()
|
|
tone1mod()
|
|
tone2mod()
|
|
escape()
|
|
final()
|
|
onsets()
|