with 1021 additions and 53 deletions
......@@ -10,6 +10,8 @@ C:\Windows\system32 or somewhere.
All require Python (from www.python.org). All systems
except Windows have that anyway if you can run gradint.
However, some of these utilities are still Python2-only,
while the main Gradint is now compatible with both 2 and 3.
......@@ -18,13 +20,10 @@ autosplit.py - splits a long recording into individual files
completely automatically, but only if the recording has been
made in near-broadcasting-studio conditions.
strip0.py - strips absolute silence off the beginning and
end of audio files (only useful if you're dealing with files
from a textbook CD or something)
manual-splitter.py (Unix only but not too difficult to
modify for other systems) - a helper script so that you can
use Audacity (or another sound editor) to split the file.
use Audacity (or another sound editor) to split the file
in non-realtime. (Realtime splitting can be done in Gradint.)
Use the "export selection as wav" command (you can assign a
hot-key to it), and you don't have to type in a different
filename each time because this script can run in the
......@@ -51,4 +50,8 @@ online synthesizers (or real people) to the synth cache
transliterate.py - make a transliterated vocab report
(for use with grep or on PDAs or whatever)
player.py - play
diagram.py - make a diagram of a gradint lesson
trace.py - make a raytraced animation of a lesson
......@@ -4,13 +4,12 @@ import os, struct, sndhdr, sys
try: import winsound
except: winsound=None
macsound = (sys.platform.find("mac")>=0 or sys.platform.find("darwin")>=0)
if macsound: sys.stderr.write("Warning: You need to have qtplay (from gradint or wherever) in your PATH for this to work\n")
# python 3+:
try: input
except: input=lambda x:eval(raw_input(x))
startCount = 1 # or 0, or 485 or whatever
startCount = 0 # or however many WAVs already exist (should be even)
threshold = 10 # 3 is too low for recorded sound, but if using speech synth you might want to set it to 1
shortestSilence = 0.3
......@@ -18,17 +17,29 @@ shortestSound = 0.4
if len(sys.argv)>1: exec(" ".join(sys.argv[1:])) # so you can override the above on the command line
sox_8bit, sox_16bit, sox_32bit, sox_signed, sox_unsigned = "-b", "-w", "-l", "-s", "-u"
if not winsound: # adapted from gradint (see comments there)
sox_formats=os.popen("sox --help 2>&1").read()
sf2 = ' '.join(sox_formats.lower().split())
if sf2.startswith("sox: sox v"):
if sf2[10]==' ': soxMaj=15
else: soxMaj = int(sf2[10:sf2.index('.')])
else: soxMaj=0
if soxMaj>=14:
if soxMaj==14 and sf2[13]<'4': sox_8bit, sox_16bit, sox_32bit = "-1", "-2", "-4"
else: sox_8bit, sox_16bit, sox_32bit, sox_signed, sox_unsigned = "-b 8", "-b 16", "-b 32", "-e signed-integer", "-e unsigned-integer" # TODO: check the last one
def autosplit(filename,lang1,lang2,threshold):
(wtype,rate,channels,wframes,bits) = sndhdr.what(filename)
if bits==8:
soxBits="-b -u"
soxBits=sox_8bit+" "+sox_unsigned
elif bits==16:
soxBits="-w -s"
soxBits=sox_16bit+" "+sox_signed
threshold *= 256
elif bits==32:
soxBits="-l -s"
soxBits=sox_32bit+" "+sox_signed
threshold *= (256 * 256 * 256)
else: raise Exception("Unsupported bits per sample")
......@@ -47,7 +58,8 @@ def autosplit(filename,lang1,lang2,threshold):
(sounding, bytes) = nextSample()
if inSilence and not sounding and bytes: continue
elif bytes:
if dataToWriteout or sounding:
if sounding: numSilences = inSilence = 0
else: numSilences += 1
if numSilences >= int(shortestSilence*rate) or not bytes:
......@@ -64,8 +76,9 @@ def autosplit(filename,lang1,lang2,threshold):
open(fname, "wb").write(''.join(dataToWriteout))
os.system("sox %s \"%s\" \"%s.wav\"" % (soxParams,fname,fname))
print fname+".wav"
if winsound: winsound.PlaySound(fname+".wav",winsound.SND_FILENAME)
elif macsound: os.system("qtplay "+fname+".wav")
elif macsound: os.system("afplay "+fname+".wav")
else: os.system("play "+fname+".wav")
# Anyway, clear the output buffer
dataToWriteout = []
#!/usr/bin/env python
#!/usr/bin/env python2
# cache-synth.py [--test] language [language ...]
......@@ -9,6 +9,9 @@
# the same directory as gradint.py with all the
# settings.
# For a more advanced version of this, see the
# actually_generate option in synth-batchconvert-helper.py
import sys,os,time
langs = sys.argv[1:] ; testMode = False
if langs and langs[0]=='--test':
#!/usr/bin/env python
#!/usr/bin/env python2
# delete cached synthesized words that are not used
# (i.e. not mentioned in vocab.txt or samples).
#!/usr/bin/env python2
# diagram.py: script to generate diagrams of gradint lessons
# (C) 2008 Silas S. Brown. License: GPL
# gradint is run normally (passing any command-line arguments on)
# Gradint is run normally (passing any command-line arguments on)
# and then a diagram of the lesson it made is written to diagram.svg
# you can get .ps by doing: inkscape -p '> diagram.ps' diagram.svg
......@@ -59,7 +60,7 @@ def CompositeEvent_draw(self,startTime,pixelsPerSec,topY,height):
def Event_colour(self,language):
if hasattr(self,"wordToCancel"):
if self.makesSenseToLog():
if language==gradint.firstLanguage: return "yellow" # TODO: 2nd to 3rd lang etc?
else: return "green"
else: return "grey" # prompts
SamplesDir="samples/" # Must include trailing /
if ! [ -e $SamplesDir ]; then echo "Error: $SamplesDir does not exist (are you in the right directory?)"; exit 1; fi
if ! [ -e $ProgressFile ]; then echo "Error: $ProgressFile does not exist (are you in the right directory?)";exit 1;fi
if test "a$1" == a; then
echo "Usage: $0 oldname newname"
echo "oldname and newname are relative to $SamplesDir, and can be prefixes of several files/directories"
echo "Moves files from one samples directory to another, keeping $ProgressFile adjusted. Make sure gradint is not running (including waiting for start) when in use."
exit 1
find "$SamplesDir" -follow -type f | grep "^$SamplesDir$Src" | \
while true; do read || break;
DestFile=$(echo "$SrcFile"|sed -e "s|^$SamplesDir$Src|$SamplesDir$Dest|")
mkdir -p "$DestFile" ; rmdir "$DestFile" # ensure parent dirs exist before moving file across
mv -b "$SrcFile" "$DestFile"
SrcFile=$(echo "$SrcFile"|sed -e "s|$SamplesDir||")
DestFile=$(echo "$DestFile"|sed -e "s|$SamplesDir||")
gzip -fdc "$ProgressFile" | sed -e "s|$SrcFile|$DestFile|g" > /tmp/newprog ; mv /tmp/newprog "$ProgressFile" # (ideally should re-write to batch these changes, but leave like this for now in case need to recover from unfinished operation)
rmdir "$SamplesDir$Src" 2>/dev/null >/dev/null # IF it's a directory
#!/usr/bin/env python
#!/usr/bin/env python2
# list-synth.py language [language ...]
# list all words that can be synthesized
#!/usr/bin/env python
#!/usr/bin/env python2
# list2cache.py language
#!/usr/bin/env python
# Like splitter.py, but lets you use Audacity etc to split in non-realtime.
# Like Gradint's "record from file", but lets you use Audacity etc to split in non-realtime.
# You must export the segments in order.
# ('mv && increment count' in a loop: ok as long as in same dir so no complicatns w cross-device & still-open)
#!/usr/bin/env python
# (should work in both Python 2 and Python 3)
# Simple sound-playing server v1.59
# Silas S. Brown - public domain - no warranty
# connect to port 8124 (assumes behind firewall)
# and each connection can send WAV or MP3 data
# so gradint advanced.txt can do
# wavPlayer = mp3Player = "nc HostName 8124 -q 0 <"
# (most of this script assumes GNU/Linux)
import socket, select, os, sys, os.path, time, re
for a in sys.argv[1:]:
if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with OS versions 11 and 12; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system.
os.system(r'if ! grep "$(cat ~/.ssh/*.pub)" ~/.ssh/authorized_keys; then cat ~/.ssh/*.pub >> ~/.ssh/authorized_keys;fi && (echo "[Unit]";echo "Description=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart=bash -c \"while ! ssh localhost true; do sleep 1; done; ssh localhost '+os.path.join(os.getcwd(),sys.argv[0])+r'\"";echo "WorkingDirectory='+os.getcwd()+'";echo User="$(whoami)";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && sudo bash -c "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a $USER && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
elif a=="--aplay": use_aplay = True # aplay and madplay, for older embedded devices, NOT tested together with --rpi-bluetooth-* above
elif a.startswith("--delegate="): delegate_to_check=a.split('=')[1] # will ping that IP and delegate all sound to it when it's up. E.g. if it has better amplification but it's not always switched on.
elif a.startswith("--chime="): chime_mp3=a.split('=')[1] # if clock bell desired, e.g. echo '$i-14vfff$c48o0l1b- @'|mwr2ly > chime.ly && lilypond chime.ly && timidity -Ow chime.midi && audacity chime.wav (amplify + trim) + mp3-encode (keep default 44100 sample rate so ~38 frames per sec). Not designed to work with --delegate. Pi1's 3.5mm o/p doesn't sound very good with this bell.
else: assert 0, "unknown option "+a
os.environ["PATH"] += ":/usr/local/bin"
try: use_aplay
except: use_aplay = False
try: delegate_to_check
except: delegate_to_check = None
try: chime_mp3
except: chime_mp3 = None
last_chime = last_play = 0
delegate_known_down = 0
if type(b"")==type(""): S=lambda x:x # Python 2
else: S=lambda x:x.decode("latin1") # Python 3
eth = ""
while True:
if chime_mp3:
t = time.time()
if t > last_chime+60 and t%1800 < 60 and not t<last_play+20:
last_chime = t ; h,m=time.localtime(t)[3:5]
if m>1: numChimes = 1
elif not h%12: numChimes = 12
else: numChimes = h%12
if not 7<=h%24<=22: pass # silence the chime at night
elif use_aplay:
if numChimes > 1: os.system("(madplay -Q -t 1 -o wav:- '"+chime_mp3+"'"+(";madplay -Q -t 1 -o raw:- '"+chime_mp3+"'")*(numChimes-2)+";madplay -Q -o raw:- '"+chime_mp3+"') | aplay -q")
else: os.system("madplay -Q -o wav:- '%s' | aplay -q" % chime_mp3)
elif numChimes > 1: os.system("(mpg123 -w - -n 38 --loop %d '%s' ; mpg123 -s '%s') 2>/dev/null | play -t wav --ignore-length - 2>/dev/null" % (numChimes-1,chime_mp3,chime_mp3))
else: os.system("mpg123 -q '%s'" % chime_mp3)
if not select.select([s],[],[],1800-time.time()%1800)[0]: continue
c,(a,port) = s.accept()
try: d = S(c.recv(4))
except: # e.g. timeout, or there was an error reading the file on the remote side and we got 0 bytes
c.close() ; continue
if delegate_to_check and not a==delegate_to_check and delegate_known_down < time.time()-60 and not os.system("ping -c 1 -w 0.5 '"+delegate_to_check+"' >/dev/null 2>/dev/null"): player = "nc -N '"+delegate_to_check+"' 8124"
elif d=='RIFF': # WAV
if use_aplay: player = "aplay -q"
else: player = "play - 2>/dev/null"
elif d=='STOP':
while not d=='START':
c,a = s.accept()
try: d = S(c.recv(5))
except: d = ""
elif d=='QUIT':
s.close() ; break
elif d=="Eth=": # Eth=ethernet address to connect via Bluetooth (see --rpi-bluetooth-setup above)
eth = S(c.recv(17))
assert re.match("^[A-Fa-f0-9:]+$",eth)
os.system("E="+eth+";if ! pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then while true; do bluetoothctl --timeout 1 disconnect | grep Missing >/dev/null||sleep 5;T=5;while ! bluetoothctl --timeout $T connect $E | egrep \"Connection successful|Device $E Connected: yes\"; do sleep 5; T=10;bluetoothctl --timeout 1 devices;echo Retrying $E; done ; Got=0; for Try in 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z; do if pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then Got=1; break; fi; sleep 1; done; if [ $Got = 1 ] ; then break; fi; done; fi; pacmd set-default-sink bluez_sink."+eth.replace(":","_")+".a2dp_sink") # ; play /usr/share/scratch/Media/Sounds/Animal/Dog1.wav # (not really necessary if using 'close the socket' to signal we're ready)
c.close() ; continue
elif d=="Eth0":
if eth: os.system("bluetoothctl --timeout 1 disconnect "+eth)
c.close() ; continue
elif use_aplay: player = "madplay -Q -o wav:- - | aplay -q" # MP3
else: player = "mpg123 - 2>/dev/null" # MP3 non-aplay
if delegate_known_down < time.time()-60 and not player.startswith("nc -N "): delegate_known_down = time.time()
player = os.popen(player,"w")
if type(d)==type(u""): d = d.encode("latin1")
while d:
try: player.write(d)
except TypeError: # Python 3
except IOError: break # it was probably killed
try: d = c.recv(4096)
except: d = ""
c.close() ; player.close()
except: pass
last_play = time.time()
File moved
#!/usr/bin/env python2
# Script to recover vocabulary from the "unavailable"
# entries in Gradint's progress file. Use if for some
# reason the vocab file has been truncated (e.g. filesystem
# problems) and this propagated to your backup system before
# you noticed.
# v1.0 (c) 2012 Silas S. Brown. License: GPL
ignore_words_that_are_also_in_backup_unavail = True # if the fault just happened
import gradint, time
gradint.availablePrompts = gradint.AvailablePrompts()
d = gradint.ProgressDatabase()
if ignore_words_that_are_also_in_backup_unavail:
gradint.progressFile = gradint.progressFileBackup
gradint.pickledProgressFile = None
d2 = gradint.ProgressDatabase(alsoScan=0)
for x in d2.unavail: d.unavail.remove(x)
print "# Words recovered %d-%02d-%02d" % time.localtime()[:3]
print "# - capitalisation and comments are missing; order may be approximate"
gradint.reallyKnownThreshold = 0
poems,line2index = gradint.find_known_poems(d.unavail)
output = [] ; doneAlready = {}
for pLines in poems:
if filter(lambda x:not x.startswith("!synth:") or not gradint.languageof(x)==gradint.secondLanguage, pLines): continue
plines2 = []
for p in pLines:
idx = line2index[p] ; doneAlready[idx] = 1
prompt = d.unavail[idx][1]
equals = ""
if type(prompt)==type([]):
if len(prompt)==3: equals = prompt[1]
elif not plines2 and not prompt==p: equals=prompt # if 1st line
if equals:
assert equals.startswith("!synth:") and gradint.languageof(equals)==gradint.firstLanguage, "recovery of poems with non-L1 secondary prompts not yet supported"
equals = "="+gradint.textof(equals)
output.append((d.unavail[line2index[pLines[0]]][0], gradint.secondLanguage, gradint.firstLanguage, "\n".join(["begin poetry"]+plines2+["end poetry"])))
for count,(num,L1,L2) in zip(xrange(len(d.unavail)),d.unavail):
if count in doneAlready: continue
if type(L1)==type(L2)==type("") and L1.startswith("!synth:") and L2.startswith("!synth:"):
lang1,lang2 = gradint.languageof(L1),gradint.languageof(L2)
output.append((num,lang2,lang1,"%s=%s" % (gradint.textof(L2),gradint.textof(L1))))
output.sort() ; output.reverse()
curL2,curL1 = None,None
for num,lang2,lang1,text in output:
if not (lang2,lang1) == (curL2,curL1):
curL2,curL1 = lang2,lang1
print "SET LANGUAGES %s %s" % (curL2,curL1)
print text
#!/usr/bin/env python
#!/usr/bin/env python2
# Script to assist with using TextAloud or similar program
# that can batch-synthesize a collection of text files
......@@ -8,7 +8,7 @@
# words in vocab.txt etc, and rename the resulting *.mp3 or *.wav
# files into the synth cache.
# Should be useful if you are on Linux and want to run a
# Should be useful if you are not on Windows and want to run a
# non-English speech synth in the Windows Emulator (since
# ptts can have trouble, but tools like TextAloud still work).
# Note: This script currently assumes that the filesystem
......@@ -43,6 +43,14 @@ delete_old = 1 # if 1 (and if sporadic) then older cached
# this script moves them there, as that's how it identifies its
# "own" mp3/wav files (as opposed to anything else you may have cached).
actually_generate = 0 # if 1, will call gradint to generate
# the cached sound using its choice of voice for that language,
# instead of relying on your use of TextAloud etc.
# Might be useful if you need to move it to another machine that
# doesn't have that voice, and you still want to use sporadic
# etc (like a more advanced version of cache-synth.py)
testMode = 0 # if 1 and actually_generate is 1, will play too
# -----------------------------------------
import sys,os,time
......@@ -55,6 +63,7 @@ except: pass
sys.argv = []
import gradint
from gradint import dottxt,dotwav,dotmp3
assert gradint.synthCache, "need a synthCache for this to work"
gradint.cache_maintenance_mode = 1
try: trans = open(gradint.synthCache+os.sep+gradint.transTbl).read().replace("\n"," ")+" "
......@@ -68,20 +77,27 @@ def synth_fileExists(f):
generating = {}
fname2txt = {}
for l in os.listdir(newStuff):
if l.endswith(gradint.dottxt) and "_" in l:
if l.endswith(dottxt) and "_" in l:
txt = open(newStuff+os.sep+l).read().decode('utf-16')
txt = (sporadic+txt,l[l.rindex("_")+1:l.rindex(gradint.extsep)])
generating[txt] = 1 ; fname2txt[l[:l.rindex(gradint.extsep)]]=txt
generating[txt] = (None,l)
for l in os.listdir(newStuff):
if l.endswith(gradint.dotwav) or l.endswith(gradint.dotmp3):
if l.endswith(dotwav) or l.endswith(dotmp3):
if k in fname2txt: generating[fname2txt[k]]=newStuff+os.sep+l
del fname2txt # now 'generating' maps (txt,lang) to 1 or filename
del fname2txt # now 'generating' maps (txt,lang) to (None,txtFile) or filename
for k,v in generating.items():
if type(v)==tuple and v[0]==None: # a previous run was interrupted
del generating[k]
def getTxtLang(s):
if '!synth:' in s and "_" in s: return gradint.textof(s).decode('utf-8'),gradint.languageof(s)
elif s.endswith(gradint.extsep+"txt"): return gradint.readText(s).decode('utf-8'), gradint.languageof(s)
else: return None,None
elif s.endswith(gradint.extsep+"txt"):
langToSynth = gradint.languageof(s)
if langToSynth==languageToCache: return gradint.readText(s).decode('utf-8'), langToSynth # else don't bother reading the file (it might be over ftpfs)
return None,None
def decache(s):
textToSynth,langToSynth = getTxtLang(s)
......@@ -90,7 +106,7 @@ def decache(s):
generating[(textToSynth.lower(),langToSynth)]=1 # don't re-generate it
if delete_old and langToSynth==languageToCache:
for ext in [gradint.dottxt,gradint.dotwav,gradint.dotmp3]:
for ext in [dottxt,dotwav,dotmp3]:
if s+ext in scld:
del scld[s+ext]
......@@ -108,7 +124,12 @@ if sporadic:
else: decache(prompt)
count = 0
count = 0 ; toMove = []
def rename(old,new):
# don't use os.rename - can get problems cross-device
def maybe_cache(s):
textToSynth,langToSynth = getTxtLang(s)
......@@ -116,25 +137,35 @@ def maybe_cache(s):
if not langToSynth==languageToCache: return
if hanziOnly and not gradint.fix_compatibility(textToSynth).replace(" ","")==gradint.hanzi_and_punc(textToSynth).replace(" ",""): return
for txt in [textToSynth, sporadic+textToSynth]:
if synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+gradint.dotwav).lower()) or synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+gradint.dotmp3).lower()): return # it's already been done
if synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+gradint.dotwav).lower()) or synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+gradint.dotmp3).lower()): return # it's been rejected
if synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's already been done
if synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's been rejected
k = (textToSynth.lower(),langToSynth)
if generating.has_key(k):
if not generating[k]==1: # a file already exists
# don't use os.rename - can get problems cross-device
fname = textToSynth.lower().encode('utf-8')+'_'+langToSynth+generating[k][generating[k].rindex(gradint.extsep):]
scld[fname] = 1
if actually_generate:
tm = [gradint.synth_event(langToSynth,textToSynth[len(sporadic):].encode('utf-8')).getSound(),(textToSynth.encode('utf-8')+"_"+langToSynth+dotwav).lower()]
if gradint.got_program("lame"):
# we can MP3-encode it (TODO make this optional)
n = tm[0][:-len(dotwav)]+dotmp3
if not os.system("lame --cbr -h -b 48 -m m \"%s\" \"%s\"" % (tm[0],n)):
tm[0] = n
tm[1] = tm[1][:-len(dotwav)]+dotmp3
scld[textToSynth.lower().encode('utf-8')+'_'+langToSynth+dotwav] = 1
global count
while gradint.fileExists(newStuff+os.sep+str(count)+"_"+langToSynth+gradint.dottxt): count += 1
while gradint.fileExists(newStuff+os.sep+str(count)+"_"+langToSynth+dottxt): count += 1
count += 1
print "Checking for new ones"
......@@ -143,5 +174,22 @@ for _,s1,s2 in samples+gradint.parseSynthVocab(gradint.vocabFile):
else: maybe_cache(s1)
if toMove: sys.stderr.write("Renaming\n")
for tmpfile,dest in toMove:
oldDest = dest
except OSError: # not a valid filename
while gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotwav) or gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotmp3): count += 1
rename(tmpfile,gradint.synthCache+os.sep+("__file%d" % count)+dotwav)
open(gradint.synthCache+os.sep+gradint.transTbl,"ab").write("__file%d%s %s\n" % (count,dotwav,dest))
dest = "__file%d%s" % (count,dotwav)
if testMode:
print oldDest
t=time.time() ; e.play()
while time.time() < t+e.length: time.sleep(1) # in case play() is asynchronous
if count: print "Now convert the files in "+newStuff+" and re-run this script.\nYou might also want to adjust the volume if appropriate, e.g. mp3gain -r -d 6 -c *.mp3"
else: print "No extra files needed to be made."
elif not toMove: print "No extra files needed to be made."
else: print "All done"
#!/usr/bin/env python2
# trace.py: script to generate raytraced animations of Gradint lessons
# Version 1.32 (c) 2018-19,2021 Silas S. Brown. License: GPL
# The Disney Pixar film "Inside Out" (2015) represented
# memories as spheres. I don't have their CGI models, but
# we can do spheres in POV-Ray and I believe that idea is
# simple enough to be in the public domain (especially if
# NOT done like Pixar did it) - hopefully this might show
# some people how Gradint's method is supposed to work
# (especially if they've seen the Inside Out film).
# This script generates the POV-Ray scenes from a lesson.
# Gradint is run normally (passing any extra command-line arguments on,
# must include outputFile so audio can be included in the animation)
# and then the animation is written to /tmp/gradint.mp4.
# Optionally add a static image representing each word (image will be
# placed onto the spheres, and projected onto the back wall
# when that word is being recalled)
# e.g. word1_en.wav, word1_zh.wav, word1.jpg
# (or png or gif).
# Optionally add an mp4 video of a word in a particular language
# e.g. word1_en.mp4 (probably best synchronised to word1_en.wav),
# can also do this for commentsToAdd and orderlessCommentsToAdd files
# Requires POV-Ray, ffmpeg, and the Python packages vapory
# and futures (use sudo pip install futures vapory) -
# futures is used to run multiple instances of POV-Ray on
# multi-core machines.
from optparse import OptionParser
parser = OptionParser()
help="Frames per second (10 is insufficient for fast movement, so recommend at least 15)")
help="Y-resolution: 240=NTSC VCD, 288=PAL VCD, 480=DVD, 607=WeChat channel, 720=Standard HD (Blu-Ray), 1080=Full HD (Blu-Ray)")
help="Translucent spheres when picture visible (slows down rendering but is better quality)")
help="Maximum number of minutes to render (0 = unlimited, the default; can limit for test runs)")
help="POVRay quality setting, default 9: 1=ambient light only, 2=lighting, 4,5=shadows, 8=reflections 9-11=radiosity etc")
options, args = parser.parse_args()
theFPS,res,minutes,povray_quality = int(theFPS),int(res),int(minutes),int(povray_quality)
if res in [240,288]:
width_height_antialias = (352,res,0.3) # VCD. antialias=None doesn't look very good at 300x200, cld try it at higher resolutions (goes to the +A param, PovRay default is 0.3 if -A specified without param; supersample (default 9 rays) if colour differs from neighbours by this amount)
elif res==480: width_height_antialias = (640,480,0.001) # 480p (DVD)
elif res==607: width_height_antialias = (1080,607,None) # WeChat Channels
elif res==720: width_height_antialias = (1280,720,None) # Standard HD (Blu-Ray)
elif res==1920: width_height_antialias = (1920,1080,None) # Full HD (Blu-Ray)
else: raise Exception("Unknown vertical resolution specified: "+repr(res))
debug_frame_limit = minutes * theFPS * 60
import sys,os,traceback
oldName = __name__ ; from vapory import * ; __name__ = oldName
from concurrent.futures import ProcessPoolExecutor
assert os.path.exists("gradint.py"), "You must move trace.py to the top-level Gradint directory and run it from there"
sys.argv = [sys.argv[0]]+args
import gradint
assert gradint.outputFile, "You must run trace.py with gradint parameters that include outputFile"
try: xrange
except: xrange = range
S,B = gradint.S,gradint.B
class MovableParam:
def __init__(self): self.fixed = []
def fixAt(self,t,value):
while any(x[0]==t and not x[1]==value for x in self.fixed): t += 0.2
def getPos(self,t):
assert self.fixed, "Should fixAt before getPos"
for i in xrange(len(self.fixed)):
if self.fixed[i][0] >= t:
if i: # interpolate
if self.fixed[i-1][1]==None: return None
duration = self.fixed[i][0]-self.fixed[i-1][0]
progress = t-self.fixed[i-1][0]
return (self.fixed[i][1]*progress + self.fixed[i-1][1]*(duration-progress))*1.0/duration
else: return self.fixed[i][1] # start position
return self.fixed[-1][1]
class MovablePos:
def __init__(self): self.x,self.y,self.z = MovableParam(),MovableParam(),MovableParam()
def fixAt(self,t,*args):
if args[0]==None: x=y=z=None
else: x,y,z = args
def getPos(self,t):
if r==(None,None,None): return None
else: return r
SceneObjects = set()
class MovableSphere(MovablePos):
def __init__(self,radius=0.5,colour="prompt",imageFilename=None):
self.colour = colour
self.imageFilename = imageFilename
self.radius = MovableParam()
# fixAt(t,x,y,z) inherited
def obj(self,t):
pos = self.getPos(t)
if not pos: return # not in scene at this time
r = self.radius.getPos(t)
if self.imageFilename:
if translucent_spheres_when_picture_visible and bkgScrFade.getPos(t) < 1: transmittence = 0.5
else: transmittence = 0.3
img = wallPic(t,self.imageFilename) # if a video is playing whose key image matches ours, 'back-copy' the video frame (TODO: do this only on the correct L1 or L2 sphere?)
if not img: img = self.imageFilename
return Sphere(list(pos),r,colour(self.colour,t),Texture(Pigment(ImageMap('"'+S(img)+'"',"once","interpolate 2","transmit all "+str(transmittence)),'scale',[1.5*r,1.5*r,1],'translate',list(pos),'translate',[-.75*r,-.75*r,0])))
else: return Sphere(list(pos),r,colour(self.colour,t))
class ObjCollection:
def __init__(self): self.objs = set()
def add(self,obj,dx,dy,dz): self.objs.add((obj,dx,dy,dz))
def get(self,dx,dy,dz): # should be small so:
for o,ddx,ddy,ddz in self.objs:
if (ddx,ddy,ddz) == (dx,dy,dz): return o
def fixAt(self,t,*args):
if args[0]==None: x=y=z=None
else: x,y,z = args
for obj,dx,dy,dz in self.objs:
if args==[None]: obj.fixAt(t,None,None,None)
else: obj.fixAt(t,x+dx,y+dy,z+dz)
eventTrackers = {}
def EventTracker(rowNo,imageFilename=None):
if not rowNo in eventTrackers:
eventTrackers[rowNo] = ObjCollection()
eventTrackers[rowNo].numRepeats = 0
return eventTrackers[rowNo]
rCache = {}
def repeatSphere(rowNo,numRepeats=0):
if not (rowNo,numRepeats) in rCache:
rCache[(rowNo,numRepeats)] = MovableSphere(0.1,"prompt")
return rCache[(rowNo,numRepeats)]
def addRepeat(rowNo,t=0,length=0):
et = EventTracker(rowNo)
rpt = repeatSphere(rowNo,et.numRepeats)
if length:
rpt.fixAt(-1,None) # not exist yet (to save a tiny bit of POVRay computation)
rpt.fixAt(t-1,4*rowNo+1,0,61) # behind far wall
rpt.fixAt(t,4*rowNo-1,0,0) # ready to be 'batted'
et.fixAt(t,4*rowNo,0,10) # we're at bottom
# careful with Y : try to avoid sudden vertical motion between 2 sequences
et.add(rpt,0,1+0.2*et.numRepeats,0) # from now on we keep this marker
et.fixAt(t+length,4*rowNo,10,10) # at end of repeat (or at t=0) we're at top, and the repeat marker is in place
et.numRepeats += 1
camera_position = MovablePos()
camera_lookAt = MovablePos()
def cam(t): return Camera('location',list(camera_position.getPos(t)),'look_at',list(camera_lookAt.getPos(t)))
def lights(t): return [LightSource([camera_position.x.getPos(t)+10, 15, -20], [1.3, 1.3, 1.3])]
background_screen = [] # (startTime,endTime,pictureName,pictureActual)
background_screen_size = 50
bkgScrFade = MovableParam() ; bkgScrFade.fixAt(-1,1)
bkgScrX = MovableParam()
def wallPic(t,ifImg=None):
if bkgScrFade.getPos(t) == 1: return # no picture if we're faded out
found = None
for st,et,img,pic in background_screen:
if st <= t: found = (st,et,img,pic)
elif st > t: break
if found:
st,et,img,pic = found
if ifImg and not img==ifImg: return
if B(pic).endswith(B(os.extsep+"mp4")):
# need to take single frame
T = min(t,et-1.0/theFPS)-st # don't go past last frame
out = B(pic)[:-4]+B("-"+str(T)+os.extsep+"jpg")
while T > 0 and not os.path.exists(out): # (TODO: if its frame rate is low enough, we might already have the same frame even at a slightly different T)
cmd = "ffmpeg -n -threads 1 -accurate_seek -ss "+str(T)+" -i "+S(pic)+" -vframes 1 -q:v 1 "+S(out)+" </dev/null >/dev/null"
print (cmd)
T -= 1.0/theFPS
if os.path.exists(out): return out
else: return None
else: return pic
def wall(t):
picToUse = wallPic(t)
if picToUse: return [Plane([0, 0, 1], 60, Texture(Pigment('color', [1, 1, 1])), Texture(Pigment(ImageMap('"'+S(picToUse)+'"',"once","transmit all "+str(bkgScrFade.getPos(t))),'scale',[background_screen_size,background_screen_size,1],'translate',[bkgScrX.getPos(t)-background_screen_size/2,0,0])), Finish('ambient',0.9))]
else: return [Plane([0, 0, 1], 60, Texture(Pigment('color', [1, 1, 1])), Finish('ambient',0.9))] # TODO: why does this look brighter than with ImageMap at transmit all 1.0 ?
ground = Plane( [0, 1, 0], -1, Texture( Pigment( 'color', [1, 1, 1]), Finish( 'phong', 0.1, 'reflection',0.4, 'metallic', 0.3))) # from vapory example
def colour(c,t=None):
c = {"l1":[.8,1,.2],"l2":[.5,.5,.9],"prompt":[1,.6,.5]}[c] # TODO: better colours
if translucent_spheres_when_picture_visible and not t==None and bkgScrFade.getPos(t) < 1: return Texture(Pigment('color',c,'filter',0.7))
else: return Texture(Pigment('color',c))
def scene(t):
""" Returns the scene at time 't' (in seconds) """
return Scene(cam(t), lights(t) + wall(t) + [ground] + [o for o in [x.obj(t) for x in SceneObjects] if not o==None])
def Event_draw(self,startTime,rowNo,inRepeat): pass
gradint.Event.draw = Event_draw
def CompositeEvent_draw(self,startTime,rowNo,inRepeat):
if self.eventList:
t = startTime
for i in self.eventList:
t += i.length
if inRepeat: return
# Call addRepeat, but postpone the start until the
# first loggable event, to reduce rapid camera mvt
st0 = startTime
for i in self.eventList:
if i.makesSenseToLog(): break
else: startTime += i.length
if startTime==t: startTime = st0 # shouldn't happen
def Event_colour(self,language):
if self.makesSenseToLog():
if language==gradint.firstLanguage: return "l1"
else: return "l2"
else: return "prompt"
gradint.Event.colour = Event_colour
def eDraw(startTime,length,rowNo,colour):
minR = 0.5
if colour in ["l1","l2"]:
if colour=="l1": delta = -1
else: delta = +1
et = EventTracker(rowNo).get(delta,0,0)
r = et.radius
if hasattr(et,"imageFilename"):
r = repeatSphere(rowNo,EventTracker(rowNo).numRepeats).radius
minR = 0.1
maxR = min(max(length,minR*1.5),minR*3) # TODO: vary with event's volume, so cn see the syllables? (partials can do that anyway)
if length/2.0 > 0.5:
# TODO: wobble in the middle?
else: r.fixAt(startTime+length/2.0,maxR)
def SampleEvent_draw(self,startTime,rowNo,inRepeat):
if B(self.file).startswith(B(gradint.partialsDirectory)): l=B(self.file).split(B(os.sep))[1]
else: l = gradint.languageof(self.file)
gradint.SampleEvent.draw = SampleEvent_draw
def SynthEvent_draw(self,startTime,rowNo,inRepeat): eDraw(startTime,self.length,rowNo,self.colour(self.language))
gradint.SynthEvent.draw = SynthEvent_draw
def chkImg(i):
if not "_" in S(i.file): return
for imgExt in ["gif","png","jpeg","jpg"]:
imageFilename = B(i.file)[:B(i.file).rindex(B("_"))]+B(os.extsep+imgExt) # TODO: we're assuming no _en etc in the image filename (projected onto both L1 and L2)
if os.path.exists(imageFilename):
return os.path.abspath(imageFilename)
def runGradint():
gradint.gluedListTracker.sort(key=lambda e:e[0].glue.length+e[0].glue.adjustment)
duration = 0
for l,row in zip(gradint.gluedListTracker,xrange(len(gradint.gluedListTracker))):
def check_for_pictures():
for gluedEvent in l:
event = gluedEvent.event
try: el=event.eventList
except: el=[event]
for j in el:
try: el2=j.eventList
except: el2=[j]
for i in el2:
if hasattr(i,"file") and B("_") in B(i.file):
imageFilename = chkImg(i)
if imageFilename:
return EventTracker(row,imageFilename)
if hasattr(l[0],"timesDone"): timesDone = l[0].timesDone
else: timesDone = 0
for i in xrange(timesDone): addRepeat(row)
glueStart = 0
for i in l:
glueStart = i.getAdjustedEnd(glueStart)
duration = max(duration,glueStart)
for t,e in gradint.lastLessonMade.events: # check for videos
if hasattr(e,"file") and hasattr(e,"exactLen"):
video = B(e.file)[:B(e.file).rindex(B(os.extsep))]+B(os.extsep+"mp4")
if os.path.exists(video): # overwrite static image while playing
i,v = chkImg(e),os.path.abspath(video)
if not i: i=v
i = 0 # more items might be inserted, so don't use range here
while i < len(background_screen)-1:
if background_screen[i][1] > background_screen[i+1][1]: # overlap: we end after next one ends: insert a jump-back-to-us after
background_screen.insert(i+2,(background_screen[i+1][1],background_screen[i][1],background_screen[i][2],background_screen[i][3])) # restore old after new one ends
if background_screen[i][1] > background_screen[i+1][0] and background_screen[i][0] < background_screen[i+1][0]: # overlap: we end after next one starts, but we start before it starts
background_screen[i] = (background_screen[i][0],background_screen[i+1][0],background_screen[i][2],background_screen[i][3]) # new one takes precedence
if background_screen[i][0]==background_screen[i+1][0]: # equal start, but next one might be longer
if background_screen[i][2]==background_screen[i+1][2] and background_screen[i][1]+5>=background_screen[i+1][0] and background_screen[i][1] < background_screen[i+1][0]:
# avoid turning off for 5 seconds or less if showing the same image (or a video of it)
background_screen.insert(i+1,(background_screen[i][1],background_screen[i+1][0],background_screen[i][2],background_screen[i][2])) # just the image
i += 1
for i in xrange(len(background_screen)):
startTime,endTime,picName,img = background_screen[i]
if i and startTime > background_screen[i-1][1] + 0.5:
bkgScrFade.fixAt(startTime,1) # start faded out
# else (less than 0.5sec between images) don't try to start faded out
fadeOutTime = endTime
if i<len(background_screen)-1:
if endTime + 0.5 > background_screen[i+1][0]:
fadeOutTime = None # as above (< 0.5sec between images)
else: fadeOutTime = max(fadeOutTime,min(background_screen[i+1][0]-1,fadeOutTime+5))
if not fadeOutTime == None:
# don't move the screen during any extended fade-out:
for ii in xrange(len(bkgScrX.fixed)):
if bkgScrX.fixed[ii][0]==endTime:
if not fadeOutTime==None: bkgScrFade.fixAt(fadeOutTime,1)
if endTime >= startTime+0.5:
return duration
def tryFrame(f):
frame,numFrames = f
print ("Making frame "+str(frame)+" of "+str(numFrames))
try: os.mkdir("/tmp/"+repr(frame)) # vapory writes a temp .pov file and does not change its name per process, so better be in a process-unique directory
except: pass
scene(frame*1.0/theFPS).render(width=width_height_antialias[0], height=width_height_antialias[1], antialiasing=width_height_antialias[2], quality=povray_quality, outfile="/tmp/frame%05d.png" % frame)
# TODO: TURN OFF JITTER with -J if using anti-aliasing in animations
os.chdir("/tmp") ; os.system('rm -r '+repr(frame))
return None
if frame==0: raise
sys.stderr.write("Frame %d render error, will skip\n" % frame)
return "cp /tmp/frame%05d.png /tmp/frame%05d.png" % (frame-1,frame)
def main():
executor = ProcessPoolExecutor()
duration = runGradint()
numFrames = int(duration*theFPS)
if debug_frame_limit: numFrames=min(numFrames,debug_frame_limit)
# TODO: pickle all MovableParams so can do the rendering on a different machine than the one that makes the Gradint lesson?
for c in list(executor.map(tryFrame,[(frame,numFrames) for frame in xrange(numFrames)]))+[
"ffmpeg -nostdin -y -framerate "+repr(theFPS)+" -i /tmp/frame%05d.png -i "+gradint.outputFile+" -movflags faststart -pix_fmt yuv420p -filter_complex tpad=stop=-1:stop_mode=clone -shortest /tmp/gradint.mp4 && if [ -d /Volumes ]; then open /tmp/gradint.mp4; fi" # (could alternatively run with -vcodec huffyuv /tmp/gradint.avi for lossless, insead of --movflags etc, but will get over 6 gig and may get A/V desync problems in mplayer/VLC that -delay doesn't fix, however -b:v 1000k seems to look OK; for WeChat etc you need to recode to h.264, and for HTML 5 video need recode to WebM (but ffmpeg -c:v libvpx no good if not compiled with support for those libraries; may hv to convert on another machine i.e. ffmpeg -i gradint.mp4 -vf scale=320:240 -c:v libvpx -b:v 500k gradint.webm))
if c: # patch up skipped frames, then run ffmpeg
print (c) ; os.system(c)
for f in xrange(numFrames): os.remove("/tmp/frame%05d.png" % f) # wildcard from command line could get 'argument list too long' on BSD etc
if __name__=="__main__": main()
else: print (__name__)
#!/usr/bin/env python2
# transliterate.py - print a 2nd-language-transliterated version of vocab.txt and any .txt pairs in samples
# (may be useful for grepping, loading to Latin-only PDA, etc)
# (note: leaves comments untransliterated, + may not translit all text if gradint is set up so a transliterating synth will not be used)
......@@ -18,3 +18,5 @@ samples.cgi - CGI script to browse a samples directory
or that the site is not publically viewable)
espeak.cgi - script that lets a Web user play with espeak options
Other files - see description at the top of the file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (should work with either Python 2 or Python 3)
# cantonese.py - Python functions for processing Cantonese transliterations
# (uses eSpeak and Gradint for help with some of them)
# v1.48 (c) 2013-15,2017-24 Silas S. Brown. License: GPL
cache = {} # to avoid repeated eSpeak runs,
# zi -> jyutping or (pinyin,) -> translit
dryrun_mode = False # True = prepare to populate cache in batch
jyutping_dryrun,pinyin_dryrun = set(),set()
import re, pickle, os, sys
if '--cache' in sys.argv:
cache_fname = sys.argv[sys.argv.index('--cache')+1]
else: cache_fname = os.environ.get("JYUTPING_CACHE","/tmp/.jyutping-cache")
try: cache = pickle.Unpickler(open(cache_fname,"rb")).load()
except: pass
extra_zhy_dict = { # TODO: add these to the real zhy_list in eSpeak
def S(v): # make sure it's a string in both Python 2 and 3
if type("")==type(u""): # Python 3
try: return v.decode('utf-8') # in case it's bytes
except: return v
else: return v
def B(v): # make sure it's bytes in Python 3, str in Python 2
if type(v)==type(u""): return v.encode('utf-8')
return v
def get_jyutping(hanzi,mustWork=1):
if not type(hanzi)==type(u""): hanzi=hanzi.decode('utf-8')
for k,v in extra_zhy_dict.items(): hanzi=hanzi.replace(k,v)
global espeak
if not espeak:
espeak = import_gradint().ESpeakSynth()
if not espeak.works_on_this_platform(): # must call
raise Exception("espeak.works_on_this_platform")
assert espeak.supports_language("zhy")
global jyutping_dryrun
if dryrun_mode:
if not hanzi in cache: jyutping_dryrun.add(hanzi)
return "aai1" # placeholder value
elif jyutping_dryrun:
jyutping_dryrun = list(jyutping_dryrun)
vals = espeak.transliterate_multiple("zhy",jyutping_dryrun,0)
assert len(jyutping_dryrun)==len(vals)
for k,v in zip(jyutping_dryrun,vals):
cache[k]=S(v).replace("7","1").lower() # see below
jyutping_dryrun = set()
if hanzi in cache: jyutping = cache[hanzi]
else: cache[hanzi] = jyutping = S(espeak.transliterate("zhy",hanzi,forPartials=0)).replace("7","1").lower() # .lower() needed because espeak sometimes randomly capitalises e.g. 2nd hanzi of 'hypocrite' (Mandarin xuwei de ren)
if mustWork: assert jyutping.strip(), "No translit. result for "+repr(hanzi)
elif not jyutping.strip(): jyutping=""
return jyutping
espeak = 0
def hanzi_only(unitext): return u"".join(filter(lambda x:0x4e00<=ord(x)<0xa700 or ord(x)>=0x10000, list(unitext)))
def py2nums(pinyin):
if not type(pinyin)==type(u""):
pinyin = pinyin.decode('utf-8')
if not pinyin.strip(): return ""
global pinyin_dryrun
if pinyin_dryrun:
pinyin_dryrun = list(pinyin_dryrun)
vals = espeak.transliterate_multiple("zh",pinyin_dryrun,0)
assert len(pinyin_dryrun)==len(vals)
for i in range(len(pinyin_dryrun)):
pinyin_dryrun = set()
if (pinyin,) in cache: pyNums = cache[(pinyin,)]
else: pyNums = espeak.transliterate("zh",pinyin,forPartials=0) # (this transliterate just does tone marks to numbers, adds 5, etc; forPartials=0 because we DON'T want to change letters like X into syllables, as that won't happen in jyutping and we're going through it tone-by-tone)
assert pyNums and pyNums.strip(), "espeak.transliterate returned %s for %s" % (repr(pyNums),repr(pinyin))
return re.sub("a$","a5",re.sub("(?<=[a-zA-Z])er([1-5])",r"e\1r5",S(pyNums)))
if type(u"")==type(""): # Python 3
getNext = lambda gen: gen.__next__()
else: getNext = lambda gen: gen.next()
def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
# If we have good quality (proof-read etc) Mandarin pinyin, this can sometimes improve the automatic Cantonese transcription
if not type(hanzi)==type(u""): hanzi = hanzi.decode('utf-8')
hanzi = hanzi_only(hanzi)
if not re.search(py2j_chars,hanzi): return jyutping
pinyin = re.findall('[A-Za-z]*[1-5]',py2nums(pinyin))
if not len(pinyin)==len(hanzi): return jyutping # can't fix
jyutping = S(jyutping)
i = 0 ; tones = re.finditer('[1-7]',jyutping) ; j2 = []
for h,p in zip(list(hanzi),pinyin):
try: j = getNext(tones).end()
except StopIteration: return jyutping # one of the hanzi has no Cantonese reading in our data: we'll warn "failed to fix" below
j2.append(jyutping[i:j]) ; i = j
if h in py2j and p.lower() in py2j[h]: j2[-1]=j2[-1][:re.search("[A-Za-z]*[1-7]$",j2[-1]).start()]+py2j[h][p.lower()]
return "".join(j2)+jyutping[i:]
u"\u4EC0":{"shen2":"sam6","shi2":"sap6"}, # unless zaap6
# u"\u5207":{"qie4":"cai3","qie1":"cit3"}, # WRONG (rm'd v1.17). It's cit3 in re4qie4. It just wasn't in yiqie4 (which zhy_list has as an exception anyway)
u"\u6F02":{"piao1":"piu1","piao3 piao4":"piu3"},
u"\u843D":{"luo1 luo4 lao4":"lok6","la4":"laai6"},
u"\u8457":{"zhu4":"zyu3","zhuo2":"zoek3","zhuo2 zhao2 zhao1 zhe5":"zoek6"},
u"\u8B58\u8BC6":{"shi2 shi4":"sik1","zhi4":"zi3"},
for k in list(py2j.keys()):
if len(k)>1:
for c in list(k): py2j[c]=py2j[k]
del py2j[k]
for _,v in py2j.items():
for k in list(v.keys()):
if len(k.split())>1:
for w in k.split(): v[w]=v[k]
del v[k]
py2j_chars = re.compile(u'['+''.join(list(py2j.keys()))+']')
def jyutping_to_lau(j):
j = S(j).lower().replace("j","y").replace("z","j")
for k,v in jlRep: j=j.replace(k,v)
return j.lower().replace("ohek","euk")
def jyutping_to_lau_java(jyutpingNo=2,lauNo=1):
# for annogen.py 3.29+ --annotation-postprocess to ship Jyutping and generate Lau at runtime
return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
def incomplete_lau_to_jyutping(l):
# incomplete: assumes Lau didn't do the "aa" -> "a" rule
l = S(l).lower().replace("euk","ohek")
for k,v in ljRep: l=l.replace(k,v)
return l.lower().replace("j","z").replace("y","j")
def incomplete_lau_to_yale_u8(l): return jyutping_to_yale_u8(incomplete_lau_to_jyutping(l))
jlRep = [(unchanged,unchanged.upper()) for unchanged in "aai aau aam aang aan aap aat aak ai au am ang an ap at ak a ei eng ek e iu im ing in ip it ik i oi ong on ot ok ung uk".split()] + [("eoi","UI"),("eon","UN"),("eot","UT"),("eok","EUK"),("oeng","EUNG"),("oe","EUH"),("c","ch"),("ou","O"),("o","OH"),("yu","UE"),("u","OO")]
jlRep.sort(key=lambda a:-len(a[0])) # longest 1st
# u to oo includes ui to ooi, un to oon, ut to oot
# yu to ue includes yun to uen and yut to uet
# drawing from the table on http://www.omniglot.com/writing/cantonese.htm plus this private communication:
# Jyutping "-oeng" maps to Sidney Lau "-eung".
# Jyutping "jyu" maps to Sidney Lau "yue". (consequence of yu->ue, j->y)
ljRep=[(b.lower(),a.upper()) for a,b in jlRep]
ljRep.sort(key=lambda a:-len(a[0])) # longest 1st
def ping_or_lau_to_syllable_list(j): return re.sub(r"([1-9])(?![0-9])",r"\1 ",re.sub(r"[!-/:-@^-`]"," ",S(j))).split()
def hyphenate_ping_or_lau_syl_list(sList,groupLens=None):
if type(sList) in [str,type(u"")]:
sList = ping_or_lau_to_syllable_list(sList)
return hyphenate_syl_list(sList,groupLens)
def hyphenate_yale_syl_list(sList,groupLens=None):
# (if sList is a string, the syllables must be space-separated,
# which will be the case if to_yale functions below are used)
if not type(sList)==list: sList = sList.split()
return hyphenate_syl_list(sList,groupLens)
def hyphenate_syl_list(sList,groupLens=None):
assert type(sList) == list
if '--hyphenate-all' in sys.argv: groupLens = [len(sList)]
elif not groupLens: groupLens = [1]*len(sList) # don't hyphenate at all if we don't know
else: assert sum(groupLens) == len(sList), "sum("+repr(groupLens)+")!=len("+repr(sList)+")"
r = [] ; start = 0
for g in groupLens:
r.append("-".join(S(x) for x in sList[start:start+g]))
start += g
return " ".join(r)
def jyutping_to_yale_TeX(j): # returns space-separated syllables
for syl in ping_or_lau_to_syllable_list(S(j).lower().replace("eo","eu").replace("oe","eu").replace("j","y").replace("yyu","yu").replace("z","j").replace("c","ch")):
for i in range(len(syl)):
if syl[i] in "aeiou":
vowel=i ; break
if vowel==None and re.match(r"h?(m|ng)[456]",syl): # standalone nasal syllables
vowel = syl.find('m')
if vowel<0: vowel = syl.index('n')
lastVowel = syl.find('g')
if lastVowel<0: lastVowel = vowel
if vowel==None:
ret.append(syl.upper()) ; continue # English word or letter in the Chinese?
if syl[vowel:vowel+2] == "aa" and (len(syl)<vowel+2 or syl[vowel+2] in "123456"):
syl=syl[:vowel]+syl[vowel+1:] # final aa -> a
# the tonal 'h' goes after all the vowels but before any consonants:
for i in range(len(syl)-1,-1,-1):
if syl[i] in "aeiou":
lastVowel=i ; break
if syl[-1] in "1234567":
# get_jyutping replaces 7 with 1 because zhy_list is
# more Canton-type than Hong Kong-type Cantonese and
# there is considerable disagreement on which "1"s
# should be "7"s, but if you pass any "7" into the
# jyutping_to_yale functions we can at least process
# it here:
tone = ["\=",r"\'","",r"\`",r"\'","",r"\`"][int(syl[-1])-1]
if syl[-1] in "456":
else: ret.append(syl.upper()) # English word or letter in the Chinese?
return ' '.join(ret)
def jyutping_to_yale_u8(j): # returns space-separated syllables
import unicodedata
def mysub(z,l):
for x,y in l:
z = re.sub(re.escape(x)+r"(.)",r"\1"+y,z)
return z
if type(u"")==type(""): U=str # Python 3
else: # Python 2
def U(x):
try: return x.decode('utf-8') # might be an emoji pass-through
except: return x # already Unicode
return unicodedata.normalize('NFC',mysub(U(jyutping_to_yale_TeX(j).replace(r"\i{}","i").replace(r"\I{}","I")),[(r"\`",u"\u0300"),(r"\'",u"\u0301"),(r"\=",u"\u0304")])).encode('utf-8')
def superscript_digits_TeX(j):
# for jyutping and Sidney Lau
j = S(j)
for digit in "123456789": j=j.replace(digit,r"\raisebox{-0.3ex}{$^"+digit+r"$}\hspace{0pt}")
return j
def superscript_digits_HTML(j):
j = S(j)
for digit in "123456789": j=j.replace(digit,"<sup>"+digit+"</sup>")
return j
def superscript_digits_UTF8(j):
# WARNING: not all fonts have all digits; many have only the first 3. superscript_digits_HTML might be better for browsers, even though it does produce more bytes.
j = S(j)
for digit in range(1,10): j=j.replace(str(digit),S(u"¹²³⁴⁵⁶⁷⁸⁹"[digit-1].encode('utf-8')))
if type(j)==type(u""): j=j.encode('utf-8') # Python 3
return j
def import_gradint():
global gradint
try: return gradint
except: pass
# when importing gradint, make sure no command line
tmp,sys.argv = sys.argv,sys.argv[:1]
import gradint
sys.argv = tmp
gradint.espeak_preprocessors = {}
return gradint
def do_song_subst(hanzi_u8): return B(hanzi_u8).replace(unichr(0x4f7f).encode('utf-8'),unichr(0x38c8).encode('utf-8')) # Mandarin shi3 (normally jyutping sai2) is usually si3 in songs, so substitute a rarer character that unambiguously has that reading before sending to get_jyutping
if __name__ == "__main__":
# command-line use: output Lau for each line of stdin
# (or Yale if there's a --yale in sys.argv, or both
# with '#' separators if --yale#lau in sys.argv,
# also --yale#ping and --yale#lau#ping accepted);
# if there's a # in the line, assume it's hanzi#pinyin
# (for annogen.py --reannotator="##python cantonese.py")
lines = sys.stdin.read().replace("\r\n","\n").split("\n")
if lines and not lines[-1]: del lines[-1]
dryrun_mode = True
def songSubst(l):
if '--song-lyrics' in sys.argv: l=do_song_subst(l)
return l
for l in lines:
if '#' in l: l,pinyin = l.split('#')
else: pinyin = None
if pinyin and not type(pinyin)==type(u""):
pinyin = pinyin.decode('utf-8')
if pinyin and not (pinyin,) in cache:
for w in pinyin.split():
for h in w.split('-'):
dryrun_mode = False
for l in lines:
if '#' in l: l,pinyin = l.split('#')
else: pinyin = None
jyutping = get_jyutping(songSubst(l),0)
if not jyutping: groupLens = None # likely a Unihan-only 'fallback readings' zi that has no Cantonese
elif pinyin:
jyutping = adjust_jyutping_for_pinyin(l,jyutping,pinyin)
groupLens = [0]
for syl,space in re.findall('([A-Za-z]*[1-5])( *)',' '.join('-'.join(py2nums(h) for h in w.split('-')) for w in pinyin.split())): # doing it this way so we're not relying on espeak transliterate_multiple to preserve spacing and hyphenation
groupLens[-1] += 1
if space: groupLens.append(0)
if not groupLens[-1]: groupLens=groupLens[:-1]
lenWanted = len(ping_or_lau_to_syllable_list(jyutping))
if sum(groupLens) > lenWanted: # probably silent -r to drop
for i,word in enumerate(py2nums(pinyin).split()):
if re.search("[1-5]r5",word):
groupLens[i] -= 1
if sum(groupLens)==lenWanted: break
if not sum(groupLens)==lenWanted:
sys.stderr.write("WARNING: failed to fix "+pinyin+" ("+py2nums(pinyin)+") to "+jyutping+" ("+repr(ping_or_lau_to_syllable_list(jyutping))+") from "+l+", omitting\n")
groupLens = None ; jyutping = ""
else: groupLens = None
if "--yale#lau" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens)))
elif '--yale#ping' in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+jyutping.replace(' ',''))
elif "--yale#lau#ping" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens))+"#"+jyutping.replace(' ',''))
elif "--yale" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens))
else: print (superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens)))
try: pickle.Pickler(open(cache_fname,"wb"),-1).dump(cache)
except: pass
# email-lesson-archive.sh - archive an old email-lesson user
# (C) 2008 Silas S. Brown, License: GPL
# (C) 2008,2021-22 Silas S. Brown, License: GPL
if ! pwd|grep email_lesson_users >/dev/null; then
echo "This script should be run from an email_lesson_users directory (see email-lesson.sh)"
......@@ -13,29 +13,29 @@ if test "a$1" == a; then
. config
while ! test "a$1" == a; do
if test -e "$1"; then
if [ -e "$1" ]; then
unset U; unset Links
if echo "$1"|grep "^user.0*" >/dev/null; then
# specifying by user.0* id
export U=$1
export Links=$(find . -maxdepth 1 -lname $U)
elif ls -l --color=none "$1"|grep ' -> ' >/dev/null; then
Links=$(find . -maxdepth 1 -lname "$U")
elif find "$1" -maxdepth 0 -type l|grep . >/dev/null; then
# specifying by symlink
export Links=$1
export U=$(ls -l --color=none "$1"|sed -e 's/.* -> //')
U=$(ls -l --color=none "$1"|sed -e 's/.* -> //')
else echo "Warning: can't make sense of username $1"; fi
if ! test "a$U" == a; then
if test -e $U/lastdate; then
if ! [ "a$U" == a ]; then
if [ -e "$U/lastdate" ]; then
if test "a$Links" == a; then export Shortname=$U; else export Shortname=$Links; fi
if echo $PUBLIC_HTML | grep : >/dev/null; then
ssh $PUBLIC_HTML_EXTRA_SSH_OPTIONS $(echo $PUBLIC_HTML|sed -e 's/:.*//') rm -v $(echo $PUBLIC_HTML|sed -e 's/[^:]*://')/$U-$(cat $U/lastdate).*
else rm -v $PUBLIC_HTML/$U-$(cat $U/lastdate).*
if echo "$PUBLIC_HTML" | grep : >/dev/null; then
ssh $PUBLIC_HTML_EXTRA_SSH_OPTIONS "$(echo "$PUBLIC_HTML"|sed -e 's/:.*//')" rm -v "$(echo "$PUBLIC_HTML"|sed -e 's/[^:]*://')/$U-$(cat $U/lastdate).*"
else rm -v "$PUBLIC_HTML/$U-$(cat "$U/lastdate")".*
tar -jcvf $Shortname.tbz $U $Links
tar -jcvf "$Shortname.tbz" "$U" $Links
mkdir -p old
mv -v --backup=numbered $Shortname.tbz old/
rm -rf $U $Links
mv -v --backup=numbered "$Shortname.tbz" old/
rm -rf "$U" $Links
else echo "Warning: User $1 does not exist"; fi
shift; done