From ce0b63c146e910491a2d4d2403b668cc9a993d13 Mon Sep 17 00:00:00 2001 From: "Silas S. Brown" <ssb22@cam.ac.uk> Date: Mon, 6 Jul 2015 07:59:34 +0000 Subject: [PATCH] Gradint update git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/gradint@2251 29193198-4895-4776-b068-10539e920549 --- gradint-build/server/cantonese.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gradint-build/server/cantonese.py b/gradint-build/server/cantonese.py index 2c33859..63a7ac8 100644 --- a/gradint-build/server/cantonese.py +++ b/gradint-build/server/cantonese.py @@ -3,7 +3,7 @@ # cantonese.py - Python functions for processing Cantonese transliterations # (uses eSpeak and Gradint for help with some of them) -# v1.11 (c) 2013-14 Silas S. Brown. License: GPL +# v1.12 (c) 2013-15 Silas S. Brown. License: GPL dryrun_mode = False # True makes get_jyutping just batch it up for later jyutping_cache = {} ; jyutping_dryrun = set() @@ -36,7 +36,7 @@ espeak = 0 def jyutping_to_lau(j): j = j.lower().replace("j","y").replace("z","j") for k,v in jlRep: j=j.replace(k,v) - return j.lower() + return j.lower().replace("aa","a").replace("ohek","euk") # private communication 2015-07, partially confirmed w. publications jlRep = [(unchanged,unchanged.upper()) for unchanged in "aai aau aam aang aan aap aat aak ai au am ang an ap at ak a ei eng ek e iu im ing in ip it ik i oi ong on ot ok ung uk".split()] + [("eoi","UI"),("eon","UN"),("eot","UT"),("eok","EUK"),("oeng","EUNG"),("oe","EUH"),("c","ch"),("ou","O"),("o","OH"),("yu","UE"),("u","OO")] jlRep.sort(lambda a,b:len(b[0])-len(a[0])) # u to oo includes ui to ooi, un to oon, ut to oot @@ -54,7 +54,7 @@ import re def hyphenate_ping_or_lau_syl_list(sList,groupLens=None): if type(sList) in [str,unicode]: sList = ping_or_lau_to_syllable_list(sList) - if not groupLens: groupLens = [len(sList)] + if not groupLens: groupLens = [1]*len(sList) # don't hyphenate at all if we don't know else: assert sum(groupLens) == len(sList) r = [] ; start = 0 for g in groupLens: -- GitLab