From ce0b63c146e910491a2d4d2403b668cc9a993d13 Mon Sep 17 00:00:00 2001
From: "Silas S. Brown" <ssb22@cam.ac.uk>
Date: Mon, 6 Jul 2015 07:59:34 +0000
Subject: [PATCH] Gradint update

git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/gradint@2251 29193198-4895-4776-b068-10539e920549
---
 gradint-build/server/cantonese.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gradint-build/server/cantonese.py b/gradint-build/server/cantonese.py
index 2c33859..63a7ac8 100644
--- a/gradint-build/server/cantonese.py
+++ b/gradint-build/server/cantonese.py
@@ -3,7 +3,7 @@
 # cantonese.py - Python functions for processing Cantonese transliterations
 # (uses eSpeak and Gradint for help with some of them)
 
-# v1.11 (c) 2013-14 Silas S. Brown.  License: GPL
+# v1.12 (c) 2013-15 Silas S. Brown.  License: GPL
 
 dryrun_mode = False # True makes get_jyutping just batch it up for later
 jyutping_cache = {} ; jyutping_dryrun = set()
@@ -36,7 +36,7 @@ espeak = 0
 def jyutping_to_lau(j):
   j = j.lower().replace("j","y").replace("z","j")
   for k,v in jlRep: j=j.replace(k,v)
-  return j.lower()
+  return j.lower().replace("aa","a").replace("ohek","euk") # private communication 2015-07, partially confirmed w. publications
 jlRep = [(unchanged,unchanged.upper()) for unchanged in "aai aau aam aang aan aap aat aak ai au am ang an ap at ak a ei eng ek e iu im ing in ip it ik i oi ong on ot ok ung uk".split()] + [("eoi","UI"),("eon","UN"),("eot","UT"),("eok","EUK"),("oeng","EUNG"),("oe","EUH"),("c","ch"),("ou","O"),("o","OH"),("yu","UE"),("u","OO")]
 jlRep.sort(lambda a,b:len(b[0])-len(a[0]))
 # u to oo includes ui to ooi, un to oon, ut to oot
@@ -54,7 +54,7 @@ import re
 def hyphenate_ping_or_lau_syl_list(sList,groupLens=None):
     if type(sList) in [str,unicode]:
         sList = ping_or_lau_to_syllable_list(sList)
-    if not groupLens: groupLens = [len(sList)]
+    if not groupLens: groupLens = [1]*len(sList) # don't hyphenate at all if we don't know
     else: assert sum(groupLens) == len(sList)
     r = [] ; start = 0
     for g in groupLens:
-- 
GitLab