FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ssb22/gradint
  • st822/gradint
2 results
Show changes
Showing
with 1021 additions and 53 deletions
......@@ -10,6 +10,8 @@ C:\Windows\system32 or somewhere.
All require Python (from www.python.org). All systems
except Windows have that anyway if you can run gradint.
However, some of these utilities are still Python2-only,
while the main Gradint is now compatible with both 2 and 3.
SPLITTING SOUND FILES
---------------------
......@@ -18,13 +20,10 @@ autosplit.py - splits a long recording into individual files
completely automatically, but only if the recording has been
made in near-broadcasting-studio conditions.
strip0.py - strips absolute silence off the beginning and
end of audio files (only useful if you're dealing with files
from a textbook CD or something)
manual-splitter.py (Unix only but not too difficult to
modify for other systems) - a helper script so that you can
use Audacity (or another sound editor) to split the file.
use Audacity (or another sound editor) to split the file
in non-realtime. (Realtime splitting can be done in Gradint.)
Use the "export selection as wav" command (you can assign a
hot-key to it), and you don't have to type in a different
filename each time because this script can run in the
......@@ -51,4 +50,8 @@ online synthesizers (or real people) to the synth cache
transliterate.py - make a transliterated vocab report
(for use with grep or on PDAs or whatever)
player.py - play
diagram.py - make a diagram of a gradint lesson
trace.py - make a raytraced animation of a lesson
......@@ -4,13 +4,12 @@ import os, struct, sndhdr, sys
try: import winsound
except: winsound=None
macsound = (sys.platform.find("mac")>=0 or sys.platform.find("darwin")>=0)
if macsound: sys.stderr.write("Warning: You need to have qtplay (from gradint or wherever) in your PATH for this to work\n")
# python 3+:
try: input
except: input=lambda x:eval(raw_input(x))
startCount = 1 # or 0, or 485 or whatever
startCount = 0 # or however many WAVs already exist (should be even)
threshold = 10 # 3 is too low for recorded sound, but if using speech synth you might want to set it to 1
shortestSilence = 0.3
......@@ -18,17 +17,29 @@ shortestSound = 0.4
if len(sys.argv)>1: exec(" ".join(sys.argv[1:])) # so you can override the above on the command line
sox_8bit, sox_16bit, sox_32bit, sox_signed, sox_unsigned = "-b", "-w", "-l", "-s", "-u"
if not winsound: # adapted from gradint (see comments there)
sox_formats=os.popen("sox --help 2>&1").read()
sf2 = ' '.join(sox_formats.lower().split())
if sf2.startswith("sox: sox v"):
if sf2[10]==' ': soxMaj=15
else: soxMaj = int(sf2[10:sf2.index('.')])
else: soxMaj=0
if soxMaj>=14:
if soxMaj==14 and sf2[13]<'4': sox_8bit, sox_16bit, sox_32bit = "-1", "-2", "-4"
else: sox_8bit, sox_16bit, sox_32bit, sox_signed, sox_unsigned = "-b 8", "-b 16", "-b 32", "-e signed-integer", "-e unsigned-integer" # TODO: check the last one
def autosplit(filename,lang1,lang2,threshold):
(wtype,rate,channels,wframes,bits) = sndhdr.what(filename)
if bits==8:
soxBits="-b -u"
soxBits=sox_8bit+" "+sox_unsigned
structBits="B"
elif bits==16:
soxBits="-w -s"
soxBits=sox_16bit+" "+sox_signed
structBits="h"
threshold *= 256
elif bits==32:
soxBits="-l -s"
soxBits=sox_32bit+" "+sox_signed
structBits="i"
threshold *= (256 * 256 * 256)
else: raise Exception("Unsupported bits per sample")
......@@ -47,7 +58,8 @@ def autosplit(filename,lang1,lang2,threshold):
(sounding, bytes) = nextSample()
if inSilence and not sounding and bytes: continue
elif bytes:
dataToWriteout.append(bytes)
if dataToWriteout or sounding:
dataToWriteout.append(bytes)
if sounding: numSilences = inSilence = 0
else: numSilences += 1
if numSilences >= int(shortestSilence*rate) or not bytes:
......@@ -64,8 +76,9 @@ def autosplit(filename,lang1,lang2,threshold):
open(fname, "wb").write(''.join(dataToWriteout))
os.system("sox %s \"%s\" \"%s.wav\"" % (soxParams,fname,fname))
os.unlink(fname)
print fname+".wav"
if winsound: winsound.PlaySound(fname+".wav",winsound.SND_FILENAME)
elif macsound: os.system("qtplay "+fname+".wav")
elif macsound: os.system("afplay "+fname+".wav")
else: os.system("play "+fname+".wav")
# Anyway, clear the output buffer
dataToWriteout = []
......
#!/usr/bin/env python
#!/usr/bin/env python2
# cache-synth.py [--test] language [language ...]
......@@ -9,6 +9,9 @@
# the same directory as gradint.py with all the
# settings.
# For a more advanced version of this, see the
# actually_generate option in synth-batchconvert-helper.py
import sys,os,time
langs = sys.argv[1:] ; testMode = False
if langs and langs[0]=='--test':
......
#!/usr/bin/env python
#!/usr/bin/env python2
# delete cached synthesized words that are not used
# (i.e. not mentioned in vocab.txt or samples).
......
#!/usr/bin/env python2
# diagram.py: script to generate diagrams of gradint lessons
# (C) 2008 Silas S. Brown. License: GPL
# gradint is run normally (passing any command-line arguments on)
# Gradint is run normally (passing any command-line arguments on)
# and then a diagram of the lesson it made is written to diagram.svg
# you can get .ps by doing: inkscape -p '> diagram.ps' diagram.svg
......@@ -59,7 +60,7 @@ def CompositeEvent_draw(self,startTime,pixelsPerSec,topY,height):
gradint.CompositeEvent.draw=CompositeEvent_draw
def Event_colour(self,language):
if hasattr(self,"wordToCancel"):
if self.makesSenseToLog():
if language==gradint.firstLanguage: return "yellow" # TODO: 2nd to 3rd lang etc?
else: return "green"
else: return "grey" # prompts
......
#!/bin/bash
SamplesDir="samples/" # Must include trailing /
ProgressFile="progress.txt"
if ! [ -e $SamplesDir ]; then echo "Error: $SamplesDir does not exist (are you in the right directory?)"; exit 1; fi
if ! [ -e $ProgressFile ]; then echo "Error: $ProgressFile does not exist (are you in the right directory?)";exit 1;fi
if test "a$1" == a; then
echo "Usage: $0 oldname newname"
echo "oldname and newname are relative to $SamplesDir, and can be prefixes of several files/directories"
echo "Moves files from one samples directory to another, keeping $ProgressFile adjusted. Make sure gradint is not running (including waiting for start) when in use."
exit 1
fi
Src=$1
Dest=$2
find "$SamplesDir" -follow -type f | grep "^$SamplesDir$Src" | \
while true; do read || break;
SrcFile=$REPLY
DestFile=$(echo "$SrcFile"|sed -e "s|^$SamplesDir$Src|$SamplesDir$Dest|")
mkdir -p "$DestFile" ; rmdir "$DestFile" # ensure parent dirs exist before moving file across
mv -b "$SrcFile" "$DestFile"
SrcFile=$(echo "$SrcFile"|sed -e "s|$SamplesDir||")
DestFile=$(echo "$DestFile"|sed -e "s|$SamplesDir||")
gzip -fdc "$ProgressFile" | sed -e "s|$SrcFile|$DestFile|g" > /tmp/newprog ; mv /tmp/newprog "$ProgressFile" # (ideally should re-write to batch these changes, but leave like this for now in case need to recover from unfinished operation)
done
rmdir "$SamplesDir$Src" 2>/dev/null >/dev/null # IF it's a directory
#!/usr/bin/env python
#!/usr/bin/env python2
# list-synth.py language [language ...]
# list all words that can be synthesized
......
#!/usr/bin/env python
#!/usr/bin/env python2
# list2cache.py language
......
#!/usr/bin/env python
# Like splitter.py, but lets you use Audacity etc to split in non-realtime.
# Like Gradint's "record from file", but lets you use Audacity etc to split in non-realtime.
# You must export the segments in order.
# ('mv && increment count' in a loop: ok as long as in same dir so no complicatns w cross-device & still-open)
......
#!/usr/bin/env python
# (should work in both Python 2 and Python 3)
# Simple sound-playing server v1.59
# Silas S. Brown - public domain - no warranty
# connect to port 8124 (assumes behind firewall)
# and each connection can send WAV or MP3 data
# so gradint advanced.txt can do
# wavPlayer = mp3Player = "nc HostName 8124 -q 0 <"
# (most of this script assumes GNU/Linux)
import socket, select, os, sys, os.path, time, re
for a in sys.argv[1:]:
if a.startswith("--rpi-bluetooth-setup"): # tested on Raspberry Pi 400 with OS versions 11 and 12; also tested on Raspberry Pi Zero W with Raspbian 10 Lite (with the device already paired: needed to say "scan on", "discovery on", remove + pair in bluetoothctl). Send Eth=(bluetooth Ethernet addr) to start. Note that the setup command reboots the system.
os.system(r'if ! grep "$(cat ~/.ssh/*.pub)" ~/.ssh/authorized_keys; then cat ~/.ssh/*.pub >> ~/.ssh/authorized_keys;fi && (echo "[Unit]";echo "Description=Gradint player utility";echo "[Service]";echo "Type=oneshot";echo "ExecStart=bash -c \"while ! ssh localhost true; do sleep 1; done; ssh localhost '+os.path.join(os.getcwd(),sys.argv[0])+r'\"";echo "WorkingDirectory='+os.getcwd()+'";echo User="$(whoami)";echo "[Install]";echo "WantedBy=multi-user.target") > player.service && sudo mv player.service /etc/systemd/system/ && sudo systemctl daemon-reload && sudo systemctl enable player && chmod +x '+sys.argv[0]+' && sudo bash -c "apt-get -y install sox mpg123 pulseaudio pulseaudio-module-bluetooth && usermod -G bluetooth -a $USER && (echo load-module module-switch-on-connect;echo load-module module-bluetooth-policy;echo load-module module-bluetooth-discover) >> /etc/pulse/default.pa && (echo [General];echo FastConnectable = true) >> /etc/bluetooth/main.conf && reboot"') # (eee off: improves reliability of gigabit ethernet on RPi400)
elif a=="--aplay": use_aplay = True # aplay and madplay, for older embedded devices, NOT tested together with --rpi-bluetooth-* above
elif a.startswith("--delegate="): delegate_to_check=a.split('=')[1] # will ping that IP and delegate all sound to it when it's up. E.g. if it has better amplification but it's not always switched on.
elif a.startswith("--chime="): chime_mp3=a.split('=')[1] # if clock bell desired, e.g. echo '$i-14vfff$c48o0l1b- @'|mwr2ly > chime.ly && lilypond chime.ly && timidity -Ow chime.midi && audacity chime.wav (amplify + trim) + mp3-encode (keep default 44100 sample rate so ~38 frames per sec). Not designed to work with --delegate. Pi1's 3.5mm o/p doesn't sound very good with this bell.
else: assert 0, "unknown option "+a
os.environ["PATH"] += ":/usr/local/bin"
try: use_aplay
except: use_aplay = False
try: delegate_to_check
except: delegate_to_check = None
try: chime_mp3
except: chime_mp3 = None
last_chime = last_play = 0
delegate_known_down = 0
s=socket.socket()
s.bind(('',8124))
s.listen(5)
if type(b"")==type(""): S=lambda x:x # Python 2
else: S=lambda x:x.decode("latin1") # Python 3
eth = ""
while True:
if chime_mp3:
t = time.time()
if t > last_chime+60 and t%1800 < 60 and not t<last_play+20:
last_chime = t ; h,m=time.localtime(t)[3:5]
if m>1: numChimes = 1
elif not h%12: numChimes = 12
else: numChimes = h%12
if not 7<=h%24<=22: pass # silence the chime at night
elif use_aplay:
if numChimes > 1: os.system("(madplay -Q -t 1 -o wav:- '"+chime_mp3+"'"+(";madplay -Q -t 1 -o raw:- '"+chime_mp3+"'")*(numChimes-2)+";madplay -Q -o raw:- '"+chime_mp3+"') | aplay -q")
else: os.system("madplay -Q -o wav:- '%s' | aplay -q" % chime_mp3)
elif numChimes > 1: os.system("(mpg123 -w - -n 38 --loop %d '%s' ; mpg123 -s '%s') 2>/dev/null | play -t wav --ignore-length - 2>/dev/null" % (numChimes-1,chime_mp3,chime_mp3))
else: os.system("mpg123 -q '%s'" % chime_mp3)
if not select.select([s],[],[],1800-time.time()%1800)[0]: continue
c,(a,port) = s.accept()
c.settimeout(10)
try: d = S(c.recv(4))
except: # e.g. timeout, or there was an error reading the file on the remote side and we got 0 bytes
c.close() ; continue
if delegate_to_check and not a==delegate_to_check and delegate_known_down < time.time()-60 and not os.system("ping -c 1 -w 0.5 '"+delegate_to_check+"' >/dev/null 2>/dev/null"): player = "nc -N '"+delegate_to_check+"' 8124"
elif d=='RIFF': # WAV
if use_aplay: player = "aplay -q"
else: player = "play - 2>/dev/null"
elif d=='STOP':
c.close()
while not d=='START':
c,a = s.accept()
try: d = S(c.recv(5))
except: d = ""
c.close()
continue
elif d=='QUIT':
s.close() ; break
elif d=="Eth=": # Eth=ethernet address to connect via Bluetooth (see --rpi-bluetooth-setup above)
eth = S(c.recv(17))
assert re.match("^[A-Fa-f0-9:]+$",eth)
os.system("E="+eth+";if ! pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then while true; do bluetoothctl --timeout 1 disconnect | grep Missing >/dev/null||sleep 5;T=5;while ! bluetoothctl --timeout $T connect $E | egrep \"Connection successful|Device $E Connected: yes\"; do sleep 5; T=10;bluetoothctl --timeout 1 devices;echo Retrying $E; done ; Got=0; for Try in 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z; do if pacmd list-sinks | grep "+eth.replace(":","_")+" >/dev/null; then Got=1; break; fi; sleep 1; done; if [ $Got = 1 ] ; then break; fi; done; fi; pacmd set-default-sink bluez_sink."+eth.replace(":","_")+".a2dp_sink") # ; play /usr/share/scratch/Media/Sounds/Animal/Dog1.wav # (not really necessary if using 'close the socket' to signal we're ready)
c.close() ; continue
elif d=="Eth0":
if eth: os.system("bluetoothctl --timeout 1 disconnect "+eth)
c.close() ; continue
elif use_aplay: player = "madplay -Q -o wav:- - | aplay -q" # MP3
else: player = "mpg123 - 2>/dev/null" # MP3 non-aplay
if delegate_known_down < time.time()-60 and not player.startswith("nc -N "): delegate_known_down = time.time()
player = os.popen(player,"w")
if type(d)==type(u""): d = d.encode("latin1")
while d:
try:
try: player.write(d)
except TypeError: # Python 3
player.buffer.write(d)
except IOError: break # it was probably killed
try: d = c.recv(4096)
except: d = ""
try:
c.close() ; player.close()
except: pass
last_play = time.time()
File moved
#!/usr/bin/env python2
# Script to recover vocabulary from the "unavailable"
# entries in Gradint's progress file. Use if for some
# reason the vocab file has been truncated (e.g. filesystem
# problems) and this propagated to your backup system before
# you noticed.
# v1.0 (c) 2012 Silas S. Brown. License: GPL
ignore_words_that_are_also_in_backup_unavail = True # if the fault just happened
import gradint, time
gradint.availablePrompts = gradint.AvailablePrompts()
d = gradint.ProgressDatabase()
if ignore_words_that_are_also_in_backup_unavail:
gradint.progressFile = gradint.progressFileBackup
gradint.pickledProgressFile = None
d2 = gradint.ProgressDatabase(alsoScan=0)
for x in d2.unavail: d.unavail.remove(x)
print "# Words recovered %d-%02d-%02d" % time.localtime()[:3]
print "# - capitalisation and comments are missing; order may be approximate"
gradint.reallyKnownThreshold = 0
poems,line2index = gradint.find_known_poems(d.unavail)
output = [] ; doneAlready = {}
for pLines in poems:
if filter(lambda x:not x.startswith("!synth:") or not gradint.languageof(x)==gradint.secondLanguage, pLines): continue
plines2 = []
for p in pLines:
idx = line2index[p] ; doneAlready[idx] = 1
prompt = d.unavail[idx][1]
equals = ""
if type(prompt)==type([]):
if len(prompt)==3: equals = prompt[1]
elif not plines2 and not prompt==p: equals=prompt # if 1st line
if equals:
assert equals.startswith("!synth:") and gradint.languageof(equals)==gradint.firstLanguage, "recovery of poems with non-L1 secondary prompts not yet supported"
equals = "="+gradint.textof(equals)
plines2.append(gradint.textof(p)+equals)
output.append((d.unavail[line2index[pLines[0]]][0], gradint.secondLanguage, gradint.firstLanguage, "\n".join(["begin poetry"]+plines2+["end poetry"])))
for count,(num,L1,L2) in zip(xrange(len(d.unavail)),d.unavail):
if count in doneAlready: continue
if type(L1)==type(L2)==type("") and L1.startswith("!synth:") and L2.startswith("!synth:"):
lang1,lang2 = gradint.languageof(L1),gradint.languageof(L2)
output.append((num,lang2,lang1,"%s=%s" % (gradint.textof(L2),gradint.textof(L1))))
output.sort() ; output.reverse()
curL2,curL1 = None,None
for num,lang2,lang1,text in output:
if not (lang2,lang1) == (curL2,curL1):
curL2,curL1 = lang2,lang1
print "SET LANGUAGES %s %s" % (curL2,curL1)
print text
#!/usr/bin/env python
#!/usr/bin/env python2
# Script to assist with using TextAloud or similar program
# that can batch-synthesize a collection of text files
......@@ -8,7 +8,7 @@
# words in vocab.txt etc, and rename the resulting *.mp3 or *.wav
# files into the synth cache.
# Should be useful if you are on Linux and want to run a
# Should be useful if you are not on Windows and want to run a
# non-English speech synth in the Windows Emulator (since
# ptts can have trouble, but tools like TextAloud still work).
# Note: This script currently assumes that the filesystem
......@@ -43,6 +43,14 @@ delete_old = 1 # if 1 (and if sporadic) then older cached
# this script moves them there, as that's how it identifies its
# "own" mp3/wav files (as opposed to anything else you may have cached).
actually_generate = 0 # if 1, will call gradint to generate
# the cached sound using its choice of voice for that language,
# instead of relying on your use of TextAloud etc.
# Might be useful if you need to move it to another machine that
# doesn't have that voice, and you still want to use sporadic
# etc (like a more advanced version of cache-synth.py)
testMode = 0 # if 1 and actually_generate is 1, will play too
# -----------------------------------------
import sys,os,time
......@@ -55,6 +63,7 @@ except: pass
sys.argv = []
import gradint
from gradint import dottxt,dotwav,dotmp3
assert gradint.synthCache, "need a synthCache for this to work"
gradint.cache_maintenance_mode = 1
try: trans = open(gradint.synthCache+os.sep+gradint.transTbl).read().replace("\n"," ")+" "
......@@ -68,20 +77,27 @@ def synth_fileExists(f):
generating = {}
fname2txt = {}
for l in os.listdir(newStuff):
if l.endswith(gradint.dottxt) and "_" in l:
if l.endswith(dottxt) and "_" in l:
txt = open(newStuff+os.sep+l).read().decode('utf-16')
txt = (sporadic+txt,l[l.rindex("_")+1:l.rindex(gradint.extsep)])
generating[txt] = 1 ; fname2txt[l[:l.rindex(gradint.extsep)]]=txt
generating[txt] = (None,l)
fname2txt[l[:l.rindex(gradint.extsep)]]=txt
for l in os.listdir(newStuff):
if l.endswith(gradint.dotwav) or l.endswith(gradint.dotmp3):
if l.endswith(dotwav) or l.endswith(dotmp3):
k=l[:l.rindex(gradint.extsep)]
if k in fname2txt: generating[fname2txt[k]]=newStuff+os.sep+l
del fname2txt # now 'generating' maps (txt,lang) to 1 or filename
del fname2txt # now 'generating' maps (txt,lang) to (None,txtFile) or filename
for k,v in generating.items():
if type(v)==tuple and v[0]==None: # a previous run was interrupted
os.remove(newStuff+os.sep+v[1])
del generating[k]
def getTxtLang(s):
if '!synth:' in s and "_" in s: return gradint.textof(s).decode('utf-8'),gradint.languageof(s)
elif s.endswith(gradint.extsep+"txt"): return gradint.readText(s).decode('utf-8'), gradint.languageof(s)
else: return None,None
elif s.endswith(gradint.extsep+"txt"):
langToSynth = gradint.languageof(s)
if langToSynth==languageToCache: return gradint.readText(s).decode('utf-8'), langToSynth # else don't bother reading the file (it might be over ftpfs)
return None,None
def decache(s):
textToSynth,langToSynth = getTxtLang(s)
......@@ -90,7 +106,7 @@ def decache(s):
generating[(textToSynth.lower(),langToSynth)]=1 # don't re-generate it
s=textToSynth.lower().encode('utf-8')+"_"+langToSynth
if delete_old and langToSynth==languageToCache:
for ext in [gradint.dottxt,gradint.dotwav,gradint.dotmp3]:
for ext in [dottxt,dotwav,dotmp3]:
if s+ext in scld:
os.remove(gradint.synthCache+os.sep+s+ext)
del scld[s+ext]
......@@ -108,7 +124,12 @@ if sporadic:
else: decache(prompt)
decache(target)
count = 0
count = 0 ; toMove = []
def rename(old,new):
# don't use os.rename - can get problems cross-device
open(new,"wb").write(open(old,"rb").read())
os.remove(old)
def maybe_cache(s):
textToSynth,langToSynth = getTxtLang(s)
......@@ -116,25 +137,35 @@ def maybe_cache(s):
if not langToSynth==languageToCache: return
if hanziOnly and not gradint.fix_compatibility(textToSynth).replace(" ","")==gradint.hanzi_and_punc(textToSynth).replace(" ",""): return
for txt in [textToSynth, sporadic+textToSynth]:
if synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+gradint.dotwav).lower()) or synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+gradint.dotmp3).lower()): return # it's already been done
if synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+gradint.dotwav).lower()) or synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+gradint.dotmp3).lower()): return # it's been rejected
if synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's already been done
if synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's been rejected
textToSynth=sporadic+textToSynth
k = (textToSynth.lower(),langToSynth)
if generating.has_key(k):
if not generating[k]==1: # a file already exists
# don't use os.rename - can get problems cross-device
fname = textToSynth.lower().encode('utf-8')+'_'+langToSynth+generating[k][generating[k].rindex(gradint.extsep):]
open(gradint.synthCache+os.sep+fname,"wb").write(open(generating[k],"rb").read())
rename(generating[k],gradint.synthCache+os.sep+fname)
scld[fname] = 1
#open(gradint.synthCache+os.sep+textToSynth.lower().encode('utf-8')+'_'+langToSynth+gradint.dottxt,"wb").write(open(generating[k][:generating[k].rindex(gradint.extsep)]+gradint.dottxt,"rb").read())
os.remove(generating[k])
os.remove(generating[k][:generating[k].rindex(gradint.extsep)]+gradint.dottxt)
#rename(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt,gradint.synthCache+os.sep+textToSynth.lower().encode('utf-8')+'_'+langToSynth+dottxt)
os.remove(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt)
generating[k]=1
return
if actually_generate:
tm = [gradint.synth_event(langToSynth,textToSynth[len(sporadic):].encode('utf-8')).getSound(),(textToSynth.encode('utf-8')+"_"+langToSynth+dotwav).lower()]
if gradint.got_program("lame"):
# we can MP3-encode it (TODO make this optional)
n = tm[0][:-len(dotwav)]+dotmp3
if not os.system("lame --cbr -h -b 48 -m m \"%s\" \"%s\"" % (tm[0],n)):
os.remove(tm[0])
tm[0] = n
tm[1] = tm[1][:-len(dotwav)]+dotmp3
toMove.append(tm)
scld[textToSynth.lower().encode('utf-8')+'_'+langToSynth+dotwav] = 1
return
generating[k]=1
global count
while gradint.fileExists(newStuff+os.sep+str(count)+"_"+langToSynth+gradint.dottxt): count += 1
open(newStuff+os.sep+str(count)+"_"+langToSynth+gradint.dottxt,"w").write(textToSynth[len(sporadic):].encode('utf-16'))
while gradint.fileExists(newStuff+os.sep+str(count)+"_"+langToSynth+dottxt): count += 1
open(newStuff+os.sep+str(count)+"_"+langToSynth+dottxt,"w").write(textToSynth[len(sporadic):].encode('utf-16'))
count += 1
print "Checking for new ones"
......@@ -143,5 +174,22 @@ for _,s1,s2 in samples+gradint.parseSynthVocab(gradint.vocabFile):
else: maybe_cache(s1)
maybe_cache(s2)
if toMove: sys.stderr.write("Renaming\n")
for tmpfile,dest in toMove:
oldDest = dest
try:
rename(tmpfile,gradint.synthCache+os.sep+dest)
except OSError: # not a valid filename
while gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotwav) or gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotmp3): count += 1
rename(tmpfile,gradint.synthCache+os.sep+("__file%d" % count)+dotwav)
open(gradint.synthCache+os.sep+gradint.transTbl,"ab").write("__file%d%s %s\n" % (count,dotwav,dest))
dest = "__file%d%s" % (count,dotwav)
if testMode:
print oldDest
e=gradint.SampleEvent(gradint.synthCache+os.sep+dest)
t=time.time() ; e.play()
while time.time() < t+e.length: time.sleep(1) # in case play() is asynchronous
if count: print "Now convert the files in "+newStuff+" and re-run this script.\nYou might also want to adjust the volume if appropriate, e.g. mp3gain -r -d 6 -c *.mp3"
else: print "No extra files needed to be made."
elif not toMove: print "No extra files needed to be made."
else: print "All done"
#!/usr/bin/env python2
# trace.py: script to generate raytraced animations of Gradint lessons
# Version 1.32 (c) 2018-19,2021 Silas S. Brown. License: GPL
# The Disney Pixar film "Inside Out" (2015) represented
# memories as spheres. I don't have their CGI models, but
# we can do spheres in POV-Ray and I believe that idea is
# simple enough to be in the public domain (especially if
# NOT done like Pixar did it) - hopefully this might show
# some people how Gradint's method is supposed to work
# (especially if they've seen the Inside Out film).
# This script generates the POV-Ray scenes from a lesson.
# Gradint is run normally (passing any extra command-line arguments on,
# must include outputFile so audio can be included in the animation)
# and then the animation is written to /tmp/gradint.mp4.
# Optionally add a static image representing each word (image will be
# placed onto the spheres, and projected onto the back wall
# when that word is being recalled)
# e.g. word1_en.wav, word1_zh.wav, word1.jpg
# (or png or gif).
# Optionally add an mp4 video of a word in a particular language
# e.g. word1_en.mp4 (probably best synchronised to word1_en.wav),
# can also do this for commentsToAdd and orderlessCommentsToAdd files
# Requires POV-Ray, ffmpeg, and the Python packages vapory
# and futures (use sudo pip install futures vapory) -
# futures is used to run multiple instances of POV-Ray on
# multi-core machines.
from optparse import OptionParser
parser = OptionParser()
parser.add_option("--fps",default=15,dest="theFPS",
help="Frames per second (10 is insufficient for fast movement, so recommend at least 15)")
parser.add_option("--res",default=480,
help="Y-resolution: 240=NTSC VCD, 288=PAL VCD, 480=DVD, 607=WeChat channel, 720=Standard HD (Blu-Ray), 1080=Full HD (Blu-Ray)")
parser.add_option("--translucent",action="store_true",default=False,dest="translucent_spheres_when_picture_visible",
help="Translucent spheres when picture visible (slows down rendering but is better quality)")
parser.add_option("--minutes",default=0,
help="Maximum number of minutes to render (0 = unlimited, the default; can limit for test runs)")
parser.add_option("--quality",default=9,dest="povray_quality",
help="POVRay quality setting, default 9: 1=ambient light only, 2=lighting, 4,5=shadows, 8=reflections 9-11=radiosity etc")
options, args = parser.parse_args()
globals().update(options.__dict__)
theFPS,res,minutes,povray_quality = int(theFPS),int(res),int(minutes),int(povray_quality)
if res in [240,288]:
width_height_antialias = (352,res,0.3) # VCD. antialias=None doesn't look very good at 300x200, cld try it at higher resolutions (goes to the +A param, PovRay default is 0.3 if -A specified without param; supersample (default 9 rays) if colour differs from neighbours by this amount)
elif res==480: width_height_antialias = (640,480,0.001) # 480p (DVD)
elif res==607: width_height_antialias = (1080,607,None) # WeChat Channels
elif res==720: width_height_antialias = (1280,720,None) # Standard HD (Blu-Ray)
elif res==1920: width_height_antialias = (1920,1080,None) # Full HD (Blu-Ray)
else: raise Exception("Unknown vertical resolution specified: "+repr(res))
debug_frame_limit = minutes * theFPS * 60
import sys,os,traceback
oldName = __name__ ; from vapory import * ; __name__ = oldName
from concurrent.futures import ProcessPoolExecutor
assert os.path.exists("gradint.py"), "You must move trace.py to the top-level Gradint directory and run it from there"
sys.argv = [sys.argv[0]]+args
import gradint
assert gradint.outputFile, "You must run trace.py with gradint parameters that include outputFile"
try: xrange
except: xrange = range
S,B = gradint.S,gradint.B
class MovableParam:
def __init__(self): self.fixed = []
def fixAt(self,t,value):
while any(x[0]==t and not x[1]==value for x in self.fixed): t += 0.2
self.fixed.append((t,value))
def getPos(self,t):
assert self.fixed, "Should fixAt before getPos"
self.fixed.sort()
for i in xrange(len(self.fixed)):
if self.fixed[i][0] >= t:
if i: # interpolate
if self.fixed[i-1][1]==None: return None
duration = self.fixed[i][0]-self.fixed[i-1][0]
progress = t-self.fixed[i-1][0]
return (self.fixed[i][1]*progress + self.fixed[i-1][1]*(duration-progress))*1.0/duration
else: return self.fixed[i][1] # start position
return self.fixed[-1][1]
class MovablePos:
def __init__(self): self.x,self.y,self.z = MovableParam(),MovableParam(),MovableParam()
def fixAt(self,t,*args):
if args[0]==None: x=y=z=None
else: x,y,z = args
self.x.fixAt(t,x),self.y.fixAt(t,y),self.z.fixAt(t,z)
def getPos(self,t):
r=(self.x.getPos(t),self.y.getPos(t),self.z.getPos(t))
if r==(None,None,None): return None
else: return r
SceneObjects = set()
class MovableSphere(MovablePos):
def __init__(self,radius=0.5,colour="prompt",imageFilename=None):
MovablePos.__init__(self)
self.colour = colour
self.imageFilename = imageFilename
self.radius = MovableParam()
self.radius.fixAt(-1,radius)
SceneObjects.add(self)
# fixAt(t,x,y,z) inherited
def obj(self,t):
pos = self.getPos(t)
if not pos: return # not in scene at this time
r = self.radius.getPos(t)
if self.imageFilename:
if translucent_spheres_when_picture_visible and bkgScrFade.getPos(t) < 1: transmittence = 0.5
else: transmittence = 0.3
img = wallPic(t,self.imageFilename) # if a video is playing whose key image matches ours, 'back-copy' the video frame (TODO: do this only on the correct L1 or L2 sphere?)
if not img: img = self.imageFilename
return Sphere(list(pos),r,colour(self.colour,t),Texture(Pigment(ImageMap('"'+S(img)+'"',"once","interpolate 2","transmit all "+str(transmittence)),'scale',[1.5*r,1.5*r,1],'translate',list(pos),'translate',[-.75*r,-.75*r,0])))
else: return Sphere(list(pos),r,colour(self.colour,t))
class ObjCollection:
def __init__(self): self.objs = set()
def add(self,obj,dx,dy,dz): self.objs.add((obj,dx,dy,dz))
def get(self,dx,dy,dz): # should be small so:
for o,ddx,ddy,ddz in self.objs:
if (ddx,ddy,ddz) == (dx,dy,dz): return o
def fixAt(self,t,*args):
if args[0]==None: x=y=z=None
else: x,y,z = args
for obj,dx,dy,dz in self.objs:
if args==[None]: obj.fixAt(t,None,None,None)
else: obj.fixAt(t,x+dx,y+dy,z+dz)
eventTrackers = {}
def EventTracker(rowNo,imageFilename=None):
if not rowNo in eventTrackers:
eventTrackers[rowNo] = ObjCollection()
eventTrackers[rowNo].add(MovableSphere(1,"l1",imageFilename),-1,0,0)
eventTrackers[rowNo].add(MovableSphere(1,"l2",imageFilename),+1,0,0)
eventTrackers[rowNo].numRepeats = 0
return eventTrackers[rowNo]
rCache = {}
def repeatSphere(rowNo,numRepeats=0):
if not (rowNo,numRepeats) in rCache:
rCache[(rowNo,numRepeats)] = MovableSphere(0.1,"prompt")
return rCache[(rowNo,numRepeats)]
def addRepeat(rowNo,t=0,length=0):
et = EventTracker(rowNo)
rpt = repeatSphere(rowNo,et.numRepeats)
if length:
rpt.fixAt(-1,None) # not exist yet (to save a tiny bit of POVRay computation)
rpt.fixAt(t-1,4*rowNo+1,0,61) # behind far wall
rpt.fixAt(t,4*rowNo-1,0,0) # ready to be 'batted'
et.fixAt(t,4*rowNo,0,10) # we're at bottom
camera_lookAt.fixAt(t,4*rowNo,0,10)
camera_lookAt.fixAt(t+length,4*rowNo,10,10)
camera_position.x.fixAt(t+length/2.0,4*rowNo)
# careful with Y : try to avoid sudden vertical motion between 2 sequences
camera_position.y.fixAt(t+length*.2,1)
camera_position.y.fixAt(t+length*.8,4)
camera_position.z.fixAt(t+length*.2,-10)
camera_position.z.fixAt(t+length*.8,-5)
et.add(rpt,0,1+0.2*et.numRepeats,0) # from now on we keep this marker
et.fixAt(t+length,4*rowNo,10,10) # at end of repeat (or at t=0) we're at top, and the repeat marker is in place
et.numRepeats += 1
camera_position = MovablePos()
camera_lookAt = MovablePos()
def cam(t): return Camera('location',list(camera_position.getPos(t)),'look_at',list(camera_lookAt.getPos(t)))
def lights(t): return [LightSource([camera_position.x.getPos(t)+10, 15, -20], [1.3, 1.3, 1.3])]
background_screen = [] # (startTime,endTime,pictureName,pictureActual)
background_screen_size = 50
bkgScrFade = MovableParam() ; bkgScrFade.fixAt(-1,1)
bkgScrX = MovableParam()
def wallPic(t,ifImg=None):
if bkgScrFade.getPos(t) == 1: return # no picture if we're faded out
found = None
for st,et,img,pic in background_screen:
if st <= t: found = (st,et,img,pic)
elif st > t: break
if found:
st,et,img,pic = found
if ifImg and not img==ifImg: return
if B(pic).endswith(B(os.extsep+"mp4")):
# need to take single frame
T = min(t,et-1.0/theFPS)-st # don't go past last frame
out = B(pic)[:-4]+B("-"+str(T)+os.extsep+"jpg")
while T > 0 and not os.path.exists(out): # (TODO: if its frame rate is low enough, we might already have the same frame even at a slightly different T)
cmd = "ffmpeg -n -threads 1 -accurate_seek -ss "+str(T)+" -i "+S(pic)+" -vframes 1 -q:v 1 "+S(out)+" </dev/null >/dev/null"
print (cmd)
os.system(cmd)
T -= 1.0/theFPS
if os.path.exists(out): return out
else: return None
else: return pic
def wall(t):
picToUse = wallPic(t)
if picToUse: return [Plane([0, 0, 1], 60, Texture(Pigment('color', [1, 1, 1])), Texture(Pigment(ImageMap('"'+S(picToUse)+'"',"once","transmit all "+str(bkgScrFade.getPos(t))),'scale',[background_screen_size,background_screen_size,1],'translate',[bkgScrX.getPos(t)-background_screen_size/2,0,0])), Finish('ambient',0.9))]
else: return [Plane([0, 0, 1], 60, Texture(Pigment('color', [1, 1, 1])), Finish('ambient',0.9))] # TODO: why does this look brighter than with ImageMap at transmit all 1.0 ?
ground = Plane( [0, 1, 0], -1, Texture( Pigment( 'color', [1, 1, 1]), Finish( 'phong', 0.1, 'reflection',0.4, 'metallic', 0.3))) # from vapory example
def colour(c,t=None):
c = {"l1":[.8,1,.2],"l2":[.5,.5,.9],"prompt":[1,.6,.5]}[c] # TODO: better colours
if translucent_spheres_when_picture_visible and not t==None and bkgScrFade.getPos(t) < 1: return Texture(Pigment('color',c,'filter',0.7))
else: return Texture(Pigment('color',c))
def scene(t):
""" Returns the scene at time 't' (in seconds) """
return Scene(cam(t), lights(t) + wall(t) + [ground] + [o for o in [x.obj(t) for x in SceneObjects] if not o==None])
def Event_draw(self,startTime,rowNo,inRepeat): pass
gradint.Event.draw = Event_draw
def CompositeEvent_draw(self,startTime,rowNo,inRepeat):
if self.eventList:
t = startTime
for i in self.eventList:
i.draw(t,rowNo,True)
t += i.length
if inRepeat: return
# Call addRepeat, but postpone the start until the
# first loggable event, to reduce rapid camera mvt
st0 = startTime
for i in self.eventList:
if i.makesSenseToLog(): break
else: startTime += i.length
if startTime==t: startTime = st0 # shouldn't happen
addRepeat(rowNo,startTime,t-startTime)
gradint.CompositeEvent.draw=CompositeEvent_draw
def Event_colour(self,language):
if self.makesSenseToLog():
if language==gradint.firstLanguage: return "l1"
else: return "l2"
else: return "prompt"
gradint.Event.colour = Event_colour
def eDraw(startTime,length,rowNo,colour):
minR = 0.5
if colour in ["l1","l2"]:
if colour=="l1": delta = -1
else: delta = +1
et = EventTracker(rowNo).get(delta,0,0)
r = et.radius
if hasattr(et,"imageFilename"):
background_screen.append((startTime,startTime+length,et.imageFilename,et.imageFilename))
bkgScrX.fixAt(startTime,4*rowNo)
bkgScrX.fixAt(startTime+length,4*rowNo)
else:
r = repeatSphere(rowNo,EventTracker(rowNo).numRepeats).radius
minR = 0.1
maxR = min(max(length,minR*1.5),minR*3) # TODO: vary with event's volume, so cn see the syllables? (partials can do that anyway)
r.fixAt(startTime,minR)
r.fixAt(startTime+length,minR)
if length/2.0 > 0.5:
r.fixAt(startTime+0.5,maxR)
# TODO: wobble in the middle?
r.fixAt(startTime+length-0.5,maxR)
else: r.fixAt(startTime+length/2.0,maxR)
def SampleEvent_draw(self,startTime,rowNo,inRepeat):
if B(self.file).startswith(B(gradint.partialsDirectory)): l=B(self.file).split(B(os.sep))[1]
else: l = gradint.languageof(self.file)
eDraw(startTime,self.length,rowNo,self.colour(S(l)))
gradint.SampleEvent.draw = SampleEvent_draw
def SynthEvent_draw(self,startTime,rowNo,inRepeat): eDraw(startTime,self.length,rowNo,self.colour(self.language))
gradint.SynthEvent.draw = SynthEvent_draw
def chkImg(i):
if not "_" in S(i.file): return
for imgExt in ["gif","png","jpeg","jpg"]:
imageFilename = B(i.file)[:B(i.file).rindex(B("_"))]+B(os.extsep+imgExt) # TODO: we're assuming no _en etc in the image filename (projected onto both L1 and L2)
if os.path.exists(imageFilename):
return os.path.abspath(imageFilename)
def runGradint():
gradint.gluedListTracker=[]
gradint.waitBeforeStart=0
gradint.main()
gradint.gluedListTracker.sort(key=lambda e:e[0].glue.length+e[0].glue.adjustment)
duration = 0
for l,row in zip(gradint.gluedListTracker,xrange(len(gradint.gluedListTracker))):
def check_for_pictures():
for gluedEvent in l:
event = gluedEvent.event
try: el=event.eventList
except: el=[event]
for j in el:
try: el2=j.eventList
except: el2=[j]
for i in el2:
if hasattr(i,"file") and B("_") in B(i.file):
imageFilename = chkImg(i)
if imageFilename:
return EventTracker(row,imageFilename)
check_for_pictures()
if hasattr(l[0],"timesDone"): timesDone = l[0].timesDone
else: timesDone = 0
for i in xrange(timesDone): addRepeat(row)
glueStart = 0
for i in l:
i.event.draw(i.getEventStart(glueStart),row,False)
glueStart = i.getAdjustedEnd(glueStart)
duration = max(duration,glueStart)
for t,e in gradint.lastLessonMade.events: # check for videos
if hasattr(e,"file") and hasattr(e,"exactLen"):
video = B(e.file)[:B(e.file).rindex(B(os.extsep))]+B(os.extsep+"mp4")
if os.path.exists(video): # overwrite static image while playing
i,v = chkImg(e),os.path.abspath(video)
if not i: i=v
background_screen.append((t,t+e.exactLen,i,v))
background_screen.sort()
i = 0 # more items might be inserted, so don't use range here
while i < len(background_screen)-1:
if background_screen[i][1] > background_screen[i+1][1]: # overlap: we end after next one ends: insert a jump-back-to-us after
background_screen.insert(i+2,(background_screen[i+1][1],background_screen[i][1],background_screen[i][2],background_screen[i][3])) # restore old after new one ends
if background_screen[i][1] > background_screen[i+1][0] and background_screen[i][0] < background_screen[i+1][0]: # overlap: we end after next one starts, but we start before it starts
background_screen[i] = (background_screen[i][0],background_screen[i+1][0],background_screen[i][2],background_screen[i][3]) # new one takes precedence
if background_screen[i][0]==background_screen[i+1][0]: # equal start, but next one might be longer
background_screen[i+1]=(background_screen[i][1],background_screen[i+1][1],background_screen[i+1][2],background_screen[i+1][3])
if background_screen[i][2]==background_screen[i+1][2] and background_screen[i][1]+5>=background_screen[i+1][0] and background_screen[i][1] < background_screen[i+1][0]:
# avoid turning off for 5 seconds or less if showing the same image (or a video of it)
background_screen.insert(i+1,(background_screen[i][1],background_screen[i+1][0],background_screen[i][2],background_screen[i][2])) # just the image
i += 1
for i in xrange(len(background_screen)):
startTime,endTime,picName,img = background_screen[i]
if i and startTime > background_screen[i-1][1] + 0.5:
bkgScrFade.fixAt(startTime,1) # start faded out
# else (less than 0.5sec between images) don't try to start faded out
fadeOutTime = endTime
if i<len(background_screen)-1:
if endTime + 0.5 > background_screen[i+1][0]:
fadeOutTime = None # as above (< 0.5sec between images)
else: fadeOutTime = max(fadeOutTime,min(background_screen[i+1][0]-1,fadeOutTime+5))
if not fadeOutTime == None:
# don't move the screen during any extended fade-out:
for ii in xrange(len(bkgScrX.fixed)):
if bkgScrX.fixed[ii][0]==endTime:
bkgScrX.fixed[ii]=((fadeOutTime,bkgScrX.fixed[ii][1]))
break
if not fadeOutTime==None: bkgScrFade.fixAt(fadeOutTime,1)
if endTime >= startTime+0.5:
bkgScrFade.fixAt(startTime+0.5,0.3)
bkgScrFade.fixAt(endTime-0.5,0.3)
else:
bkgScrFade.fixAt((startTime+endTime)/2.0,0.3)
return duration
def tryFrame(f):
frame,numFrames = f
print ("Making frame "+str(frame)+" of "+str(numFrames))
try:
try: os.mkdir("/tmp/"+repr(frame)) # vapory writes a temp .pov file and does not change its name per process, so better be in a process-unique directory
except: pass
os.chdir("/tmp/"+repr(frame))
scene(frame*1.0/theFPS).render(width=width_height_antialias[0], height=width_height_antialias[1], antialiasing=width_height_antialias[2], quality=povray_quality, outfile="/tmp/frame%05d.png" % frame)
# TODO: TURN OFF JITTER with -J if using anti-aliasing in animations
os.chdir("/tmp") ; os.system('rm -r '+repr(frame))
return None
except:
if frame==0: raise
traceback.print_exc()
sys.stderr.write("Frame %d render error, will skip\n" % frame)
return "cp /tmp/frame%05d.png /tmp/frame%05d.png" % (frame-1,frame)
def main():
executor = ProcessPoolExecutor()
duration = runGradint()
numFrames = int(duration*theFPS)
if debug_frame_limit: numFrames=min(numFrames,debug_frame_limit)
# TODO: pickle all MovableParams so can do the rendering on a different machine than the one that makes the Gradint lesson?
for c in list(executor.map(tryFrame,[(frame,numFrames) for frame in xrange(numFrames)]))+[
"ffmpeg -nostdin -y -framerate "+repr(theFPS)+" -i /tmp/frame%05d.png -i "+gradint.outputFile+" -movflags faststart -pix_fmt yuv420p -filter_complex tpad=stop=-1:stop_mode=clone -shortest /tmp/gradint.mp4 && if [ -d /Volumes ]; then open /tmp/gradint.mp4; fi" # (could alternatively run with -vcodec huffyuv /tmp/gradint.avi for lossless, insead of --movflags etc, but will get over 6 gig and may get A/V desync problems in mplayer/VLC that -delay doesn't fix, however -b:v 1000k seems to look OK; for WeChat etc you need to recode to h.264, and for HTML 5 video need recode to WebM (but ffmpeg -c:v libvpx no good if not compiled with support for those libraries; may hv to convert on another machine i.e. ffmpeg -i gradint.mp4 -vf scale=320:240 -c:v libvpx -b:v 500k gradint.webm))
]:
if c: # patch up skipped frames, then run ffmpeg
print (c) ; os.system(c)
for f in xrange(numFrames): os.remove("/tmp/frame%05d.png" % f) # wildcard from command line could get 'argument list too long' on BSD etc
if __name__=="__main__": main()
else: print (__name__)
#!/usr/bin/env python2
# transliterate.py - print a 2nd-language-transliterated version of vocab.txt and any .txt pairs in samples
# (may be useful for grepping, loading to Latin-only PDA, etc)
# (note: leaves comments untransliterated, + may not translit all text if gradint is set up so a transliterating synth will not be used)
......
......@@ -18,3 +18,5 @@ samples.cgi - CGI script to browse a samples directory
or that the site is not publically viewable)
espeak.cgi - script that lets a Web user play with espeak options
Other files - see description at the top of the file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (should work with either Python 2 or Python 3)
# cantonese.py - Python functions for processing Cantonese transliterations
# (uses eSpeak and Gradint for help with some of them)
# v1.48 (c) 2013-15,2017-24 Silas S. Brown. License: GPL
cache = {} # to avoid repeated eSpeak runs,
# zi -> jyutping or (pinyin,) -> translit
dryrun_mode = False # True = prepare to populate cache in batch
jyutping_dryrun,pinyin_dryrun = set(),set()
import re, pickle, os, sys
if '--cache' in sys.argv:
cache_fname = sys.argv[sys.argv.index('--cache')+1]
else: cache_fname = os.environ.get("JYUTPING_CACHE","/tmp/.jyutping-cache")
try: cache = pickle.Unpickler(open(cache_fname,"rb")).load()
except: pass
extra_zhy_dict = { # TODO: add these to the real zhy_list in eSpeak
u"\u9c85":"bat3",u"\u9b81":"bat3",
}
def S(v): # make sure it's a string in both Python 2 and 3
if type("")==type(u""): # Python 3
try: return v.decode('utf-8') # in case it's bytes
except: return v
else: return v
def B(v): # make sure it's bytes in Python 3, str in Python 2
if type(v)==type(u""): return v.encode('utf-8')
return v
def get_jyutping(hanzi,mustWork=1):
if not type(hanzi)==type(u""): hanzi=hanzi.decode('utf-8')
for k,v in extra_zhy_dict.items(): hanzi=hanzi.replace(k,v)
global espeak
if not espeak:
espeak = import_gradint().ESpeakSynth()
if not espeak.works_on_this_platform(): # must call
raise Exception("espeak.works_on_this_platform")
assert espeak.supports_language("zhy")
global jyutping_dryrun
if dryrun_mode:
if not hanzi in cache: jyutping_dryrun.add(hanzi)
return "aai1" # placeholder value
elif jyutping_dryrun:
jyutping_dryrun = list(jyutping_dryrun)
vals = espeak.transliterate_multiple("zhy",jyutping_dryrun,0)
assert len(jyutping_dryrun)==len(vals)
for k,v in zip(jyutping_dryrun,vals):
cache[k]=S(v).replace("7","1").lower() # see below
jyutping_dryrun = set()
if hanzi in cache: jyutping = cache[hanzi]
else: cache[hanzi] = jyutping = S(espeak.transliterate("zhy",hanzi,forPartials=0)).replace("7","1").lower() # .lower() needed because espeak sometimes randomly capitalises e.g. 2nd hanzi of 'hypocrite' (Mandarin xuwei de ren)
if mustWork: assert jyutping.strip(), "No translit. result for "+repr(hanzi)
elif not jyutping.strip(): jyutping=""
return jyutping
espeak = 0
def hanzi_only(unitext): return u"".join(filter(lambda x:0x4e00<=ord(x)<0xa700 or ord(x)>=0x10000, list(unitext)))
def py2nums(pinyin):
if not type(pinyin)==type(u""):
pinyin = pinyin.decode('utf-8')
if not pinyin.strip(): return ""
global pinyin_dryrun
if pinyin_dryrun:
pinyin_dryrun = list(pinyin_dryrun)
vals = espeak.transliterate_multiple("zh",pinyin_dryrun,0)
assert len(pinyin_dryrun)==len(vals)
for i in range(len(pinyin_dryrun)):
cache[(pinyin_dryrun[i],)]=vals[i]
pinyin_dryrun = set()
if (pinyin,) in cache: pyNums = cache[(pinyin,)]
else: pyNums = espeak.transliterate("zh",pinyin,forPartials=0) # (this transliterate just does tone marks to numbers, adds 5, etc; forPartials=0 because we DON'T want to change letters like X into syllables, as that won't happen in jyutping and we're going through it tone-by-tone)
assert pyNums and pyNums.strip(), "espeak.transliterate returned %s for %s" % (repr(pyNums),repr(pinyin))
return re.sub("a$","a5",re.sub("(?<=[a-zA-Z])er([1-5])",r"e\1r5",S(pyNums)))
if type(u"")==type(""): # Python 3
getNext = lambda gen: gen.__next__()
else: getNext = lambda gen: gen.next()
def adjust_jyutping_for_pinyin(hanzi,jyutping,pinyin):
# If we have good quality (proof-read etc) Mandarin pinyin, this can sometimes improve the automatic Cantonese transcription
if not type(hanzi)==type(u""): hanzi = hanzi.decode('utf-8')
hanzi = hanzi_only(hanzi)
if not re.search(py2j_chars,hanzi): return jyutping
pinyin = re.findall('[A-Za-z]*[1-5]',py2nums(pinyin))
if not len(pinyin)==len(hanzi): return jyutping # can't fix
jyutping = S(jyutping)
i = 0 ; tones = re.finditer('[1-7]',jyutping) ; j2 = []
for h,p in zip(list(hanzi),pinyin):
try: j = getNext(tones).end()
except StopIteration: return jyutping # one of the hanzi has no Cantonese reading in our data: we'll warn "failed to fix" below
j2.append(jyutping[i:j]) ; i = j
if h in py2j and p.lower() in py2j[h]: j2[-1]=j2[-1][:re.search("[A-Za-z]*[1-7]$",j2[-1]).start()]+py2j[h][p.lower()]
return "".join(j2)+jyutping[i:]
py2j={
u"\u4E2D":{"zhong1":"zung1","zhong4":"zung3"},
u"\u4E3A\u70BA":{"wei2":"wai4","wei4":"wai6"},
u"\u4E50\u6A02":{"le4":"lok6","yue4":"ngok6"},
u"\u4EB2\u89AA":{"qin1":"can1","qing4":"can3"},
u"\u4EC0":{"shen2":"sam6","shi2":"sap6"}, # unless zaap6
u"\u4F20\u50B3":{"chuan2":"cyun4","zhuan4":"zyun6"},
u"\u4FBF":{"bian4":"bin6","pian2":"pin4"},
u"\u5047":{"jia3":"gaa2","jia4":"gaa3"},
u"\u5174\u8208":{"xing1":"hing1","xing4":"hing3"},
# u"\u5207":{"qie4":"cai3","qie1":"cit3"}, # WRONG (rm'd v1.17). It's cit3 in re4qie4. It just wasn't in yiqie4 (which zhy_list has as an exception anyway)
u"\u521B\u5275":{"chuang1":"cong1","chuang4":"cong3"},
u"\u53EA":{"zhi1":"zek3","zhi3":"zi2"},
u"\u53F7\u865F":{"hao4":"hou6","hao2":"hou4"},
u"\u548C":{"he2":"wo4","he4":"wo6"},
u"\u54BD":{"yan1":"jin1","yan4":"jin3","ye4":"jit3"},
u"\u5708":{"juan4":"gyun6","quan1":"hyun1"},
u"\u597D":{"hao3":"hou2","hao4":"hou3"},
u"\u5C06\u5C07":{"jiang1":"zoeng1","jiang4":"zoeng3"},
u"\u5C11":{"shao3":"siu2","shao4":"siu3"},
u"\u5DEE":{"cha4":"caa1","cha1":"caa1","chai1":"caai1"},
u"\u5F37\u5F3A":{"qiang2":"koeng4","qiang3":"koeng5"},
u"\u62C5\u64D4":{"dan1":"daam1","dan4":"daam3"},
u"\u6323\u6399":{"zheng4":"zaang6","zheng1":"zang1"},
u"\u6570\u6578":{"shu3":"sou2","shu4":"sou3"},
u"\u671D":{"chao2":"ciu4","zhao1":"ziu1"},
u"\u6ED1":{"hua2":"waat6","gu3":"gwat1"},
u"\u6F02":{"piao1":"piu1","piao3 piao4":"piu3"},
u"\u76DB":{"sheng4":"sing6","cheng2":"sing4"},
u"\u76F8":{"xiang1":"soeng1","xiang4":"soeng3"},
u"\u770B":{"kan4":"hon3","kan1":"hon1"},
u"\u79CD\u7A2E":{"zhong3":"zung2","zhong4":"zung3"},
u"\u7EF7\u7E43":{"beng1":"bang1","beng3":"maang1"},
u"\u8208":{"xing1":"hing1","xing4":"hing3"},
u"\u843D":{"luo1 luo4 lao4":"lok6","la4":"laai6"},
u"\u8457":{"zhu4":"zyu3","zhuo2":"zoek3","zhuo2 zhao2 zhao1 zhe5":"zoek6"},
u"\u8981":{"yao4":"jiu3","yao1":"jiu1"},
u"\u89C1\u898B":{"jian4":"gin3","xian4":"jin6"},
u"\u89C9\u89BA":{"jue2":"gok3","jiao4":"gaau3"},
u"\u8B58\u8BC6":{"shi2 shi4":"sik1","zhi4":"zi3"},
u"\u8ABF\u8C03":{"diao4":"diu6","tiao2":"tiu4"},
u"\u91CF":{"liang2":"loeng4","liang4":"loeng6"},
u"\u9577\u957F":{"chang2":"coeng4","zhang3":"zoeng2"},
u"\u9593\u95F4":{"jian1":"gaan1","jian4":"gaan3"},
u"\u96BE\u96E3":{"nan2":"naan4","nan4":"naan6"}}
for k in list(py2j.keys()):
if len(k)>1:
for c in list(k): py2j[c]=py2j[k]
del py2j[k]
for _,v in py2j.items():
for k in list(v.keys()):
if len(k.split())>1:
for w in k.split(): v[w]=v[k]
del v[k]
py2j_chars = re.compile(u'['+''.join(list(py2j.keys()))+']')
def jyutping_to_lau(j):
j = S(j).lower().replace("j","y").replace("z","j")
for k,v in jlRep: j=j.replace(k,v)
return j.lower().replace("ohek","euk")
def jyutping_to_lau_java(jyutpingNo=2,lauNo=1):
# for annogen.py 3.29+ --annotation-postprocess to ship Jyutping and generate Lau at runtime
return 'if(annotNo=='+str(jyutpingNo)+'||annotNo=='+str(lauNo)+'){m=Pattern.compile("<rt>(.*?)</rt>").matcher(r);sb=new StringBuffer();while(m.find()){String r2=(annotNo=='+str(jyutpingNo)+'?m.group(1).replaceAll("([1-7])(.)","$1&shy;$2"):(m.group(1)+" ").toLowerCase().replace("j","y").replace("z","j")'+''.join('.replace("'+k+'","'+v+'")' for k,v in jlRep)+'.toLowerCase().replace("ohek","euk").replaceAll("([1-7])","<sup>$1</sup>-").replace("- "," ").replaceAll(" $","")),tmp=m.group(1).substring(0,1);if(annotNo=='+str(lauNo)+'&&tmp.equals(tmp.toUpperCase()))r2=r2.substring(0,1).toUpperCase()+r2.substring(1);m.appendReplacement(sb,"<rt>"+r2+"</rt>");}m.appendTail(sb); r=sb.toString();}' # TODO: can probably go faster with mapping for some of this
def incomplete_lau_to_jyutping(l):
# incomplete: assumes Lau didn't do the "aa" -> "a" rule
l = S(l).lower().replace("euk","ohek")
for k,v in ljRep: l=l.replace(k,v)
return l.lower().replace("j","z").replace("y","j")
def incomplete_lau_to_yale_u8(l): return jyutping_to_yale_u8(incomplete_lau_to_jyutping(l))
jlRep = [(unchanged,unchanged.upper()) for unchanged in "aai aau aam aang aan aap aat aak ai au am ang an ap at ak a ei eng ek e iu im ing in ip it ik i oi ong on ot ok ung uk".split()] + [("eoi","UI"),("eon","UN"),("eot","UT"),("eok","EUK"),("oeng","EUNG"),("oe","EUH"),("c","ch"),("ou","O"),("o","OH"),("yu","UE"),("u","OO")]
jlRep.sort(key=lambda a:-len(a[0])) # longest 1st
# u to oo includes ui to ooi, un to oon, ut to oot
# yu to ue includes yun to uen and yut to uet
# drawing from the table on http://www.omniglot.com/writing/cantonese.htm plus this private communication:
# Jyutping "-oeng" maps to Sidney Lau "-eung".
# Jyutping "jyu" maps to Sidney Lau "yue". (consequence of yu->ue, j->y)
ljRep=[(b.lower(),a.upper()) for a,b in jlRep]
ljRep.sort(key=lambda a:-len(a[0])) # longest 1st
def ping_or_lau_to_syllable_list(j): return re.sub(r"([1-9])(?![0-9])",r"\1 ",re.sub(r"[!-/:-@^-`]"," ",S(j))).split()
def hyphenate_ping_or_lau_syl_list(sList,groupLens=None):
if type(sList) in [str,type(u"")]:
sList = ping_or_lau_to_syllable_list(sList)
return hyphenate_syl_list(sList,groupLens)
def hyphenate_yale_syl_list(sList,groupLens=None):
# (if sList is a string, the syllables must be space-separated,
# which will be the case if to_yale functions below are used)
if not type(sList)==list: sList = sList.split()
return hyphenate_syl_list(sList,groupLens)
def hyphenate_syl_list(sList,groupLens=None):
assert type(sList) == list
if '--hyphenate-all' in sys.argv: groupLens = [len(sList)]
elif not groupLens: groupLens = [1]*len(sList) # don't hyphenate at all if we don't know
else: assert sum(groupLens) == len(sList), "sum("+repr(groupLens)+")!=len("+repr(sList)+")"
r = [] ; start = 0
for g in groupLens:
r.append("-".join(S(x) for x in sList[start:start+g]))
start += g
return " ".join(r)
def jyutping_to_yale_TeX(j): # returns space-separated syllables
ret=[]
for syl in ping_or_lau_to_syllable_list(S(j).lower().replace("eo","eu").replace("oe","eu").replace("j","y").replace("yyu","yu").replace("z","j").replace("c","ch")):
vowel=lastVowel=None
for i in range(len(syl)):
if syl[i] in "aeiou":
vowel=i ; break
if vowel==None and re.match(r"h?(m|ng)[456]",syl): # standalone nasal syllables
vowel = syl.find('m')
if vowel<0: vowel = syl.index('n')
lastVowel = syl.find('g')
if lastVowel<0: lastVowel = vowel
if vowel==None:
ret.append(syl.upper()) ; continue # English word or letter in the Chinese?
if syl[vowel:vowel+2] == "aa" and (len(syl)<vowel+2 or syl[vowel+2] in "123456"):
syl=syl[:vowel]+syl[vowel+1:] # final aa -> a
# the tonal 'h' goes after all the vowels but before any consonants:
for i in range(len(syl)-1,-1,-1):
if syl[i] in "aeiou":
lastVowel=i ; break
if syl[-1] in "1234567":
# get_jyutping replaces 7 with 1 because zhy_list is
# more Canton-type than Hong Kong-type Cantonese and
# there is considerable disagreement on which "1"s
# should be "7"s, but if you pass any "7" into the
# jyutping_to_yale functions we can at least process
# it here:
tone = ["\=",r"\'","",r"\`",r"\'","",r"\`"][int(syl[-1])-1]
if syl[-1] in "456":
syl=syl[:lastVowel+1]+"h"+syl[lastVowel+1:]
ret.append((syl[:vowel]+tone+syl[vowel:-1]).replace(r"\=i",r"\=\i{}").replace(r"\=I",r"\=\I{}"))
else: ret.append(syl.upper()) # English word or letter in the Chinese?
return ' '.join(ret)
def jyutping_to_yale_u8(j): # returns space-separated syllables
import unicodedata
def mysub(z,l):
for x,y in l:
z = re.sub(re.escape(x)+r"(.)",r"\1"+y,z)
return z
if type(u"")==type(""): U=str # Python 3
else: # Python 2
def U(x):
try: return x.decode('utf-8') # might be an emoji pass-through
except: return x # already Unicode
return unicodedata.normalize('NFC',mysub(U(jyutping_to_yale_TeX(j).replace(r"\i{}","i").replace(r"\I{}","I")),[(r"\`",u"\u0300"),(r"\'",u"\u0301"),(r"\=",u"\u0304")])).encode('utf-8')
def superscript_digits_TeX(j):
# for jyutping and Sidney Lau
j = S(j)
for digit in "123456789": j=j.replace(digit,r"\raisebox{-0.3ex}{$^"+digit+r"$}\hspace{0pt}")
return j
def superscript_digits_HTML(j):
j = S(j)
for digit in "123456789": j=j.replace(digit,"<sup>"+digit+"</sup>")
return j
def superscript_digits_UTF8(j):
# WARNING: not all fonts have all digits; many have only the first 3. superscript_digits_HTML might be better for browsers, even though it does produce more bytes.
j = S(j)
for digit in range(1,10): j=j.replace(str(digit),S(u"¹²³⁴⁵⁶⁷⁸⁹"[digit-1].encode('utf-8')))
if type(j)==type(u""): j=j.encode('utf-8') # Python 3
return j
def import_gradint():
global gradint
try: return gradint
except: pass
# when importing gradint, make sure no command line
tmp,sys.argv = sys.argv,sys.argv[:1]
import gradint
sys.argv = tmp
gradint.espeak_preprocessors = {}
return gradint
def do_song_subst(hanzi_u8): return B(hanzi_u8).replace(unichr(0x4f7f).encode('utf-8'),unichr(0x38c8).encode('utf-8')) # Mandarin shi3 (normally jyutping sai2) is usually si3 in songs, so substitute a rarer character that unambiguously has that reading before sending to get_jyutping
if __name__ == "__main__":
# command-line use: output Lau for each line of stdin
# (or Yale if there's a --yale in sys.argv, or both
# with '#' separators if --yale#lau in sys.argv,
# also --yale#ping and --yale#lau#ping accepted);
# if there's a # in the line, assume it's hanzi#pinyin
# (for annogen.py --reannotator="##python cantonese.py")
lines = sys.stdin.read().replace("\r\n","\n").split("\n")
if lines and not lines[-1]: del lines[-1]
dryrun_mode = True
def songSubst(l):
if '--song-lyrics' in sys.argv: l=do_song_subst(l)
return l
for l in lines:
if '#' in l: l,pinyin = l.split('#')
else: pinyin = None
get_jyutping(songSubst(l))
if pinyin and not type(pinyin)==type(u""):
pinyin = pinyin.decode('utf-8')
if pinyin and not (pinyin,) in cache:
pinyin_dryrun.add(pinyin)
for w in pinyin.split():
for h in w.split('-'):
pinyin_dryrun.add(h)
dryrun_mode = False
for l in lines:
if '#' in l: l,pinyin = l.split('#')
else: pinyin = None
jyutping = get_jyutping(songSubst(l),0)
if not jyutping: groupLens = None # likely a Unihan-only 'fallback readings' zi that has no Cantonese
elif pinyin:
jyutping = adjust_jyutping_for_pinyin(l,jyutping,pinyin)
groupLens = [0]
for syl,space in re.findall('([A-Za-z]*[1-5])( *)',' '.join('-'.join(py2nums(h) for h in w.split('-')) for w in pinyin.split())): # doing it this way so we're not relying on espeak transliterate_multiple to preserve spacing and hyphenation
groupLens[-1] += 1
if space: groupLens.append(0)
if not groupLens[-1]: groupLens=groupLens[:-1]
lenWanted = len(ping_or_lau_to_syllable_list(jyutping))
if sum(groupLens) > lenWanted: # probably silent -r to drop
for i,word in enumerate(py2nums(pinyin).split()):
if re.search("[1-5]r5",word):
groupLens[i] -= 1
if sum(groupLens)==lenWanted: break
if not sum(groupLens)==lenWanted:
sys.stderr.write("WARNING: failed to fix "+pinyin+" ("+py2nums(pinyin)+") to "+jyutping+" ("+repr(ping_or_lau_to_syllable_list(jyutping))+") from "+l+", omitting\n")
groupLens = None ; jyutping = ""
else: groupLens = None
if "--yale#lau" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens)))
elif '--yale#ping' in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+jyutping.replace(' ',''))
elif "--yale#lau#ping" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens)+"#"+superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens))+"#"+jyutping.replace(' ',''))
elif "--yale" in sys.argv: print (hyphenate_yale_syl_list(jyutping_to_yale_u8(jyutping),groupLens))
else: print (superscript_digits_HTML(hyphenate_ping_or_lau_syl_list(jyutping_to_lau(jyutping),groupLens)))
try: pickle.Pickler(open(cache_fname,"wb"),-1).dump(cache)
except: pass
#!/bin/bash
# email-lesson-archive.sh - archive an old email-lesson user
# (C) 2008 Silas S. Brown, License: GPL
# (C) 2008,2021-22 Silas S. Brown, License: GPL
if ! pwd|grep email_lesson_users >/dev/null; then
echo "This script should be run from an email_lesson_users directory (see email-lesson.sh)"
......@@ -13,29 +13,29 @@ if test "a$1" == a; then
fi
. config
while ! test "a$1" == a; do
if test -e "$1"; then
if [ -e "$1" ]; then
unset U; unset Links
if echo "$1"|grep "^user.0*" >/dev/null; then
# specifying by user.0* id
export U=$1
export Links=$(find . -maxdepth 1 -lname $U)
elif ls -l --color=none "$1"|grep ' -> ' >/dev/null; then
Links=$(find . -maxdepth 1 -lname "$U")
elif find "$1" -maxdepth 0 -type l|grep . >/dev/null; then
# specifying by symlink
export Links=$1
export U=$(ls -l --color=none "$1"|sed -e 's/.* -> //')
Links=$1
U=$(ls -l --color=none "$1"|sed -e 's/.* -> //')
else echo "Warning: can't make sense of username $1"; fi
if ! test "a$U" == a; then
if test -e $U/lastdate; then
if ! [ "a$U" == a ]; then
if [ -e "$U/lastdate" ]; then
if test "a$Links" == a; then export Shortname=$U; else export Shortname=$Links; fi
if echo $PUBLIC_HTML | grep : >/dev/null; then
ssh $PUBLIC_HTML_EXTRA_SSH_OPTIONS $(echo $PUBLIC_HTML|sed -e 's/:.*//') rm -v $(echo $PUBLIC_HTML|sed -e 's/[^:]*://')/$U-$(cat $U/lastdate).*
else rm -v $PUBLIC_HTML/$U-$(cat $U/lastdate).*
if echo "$PUBLIC_HTML" | grep : >/dev/null; then
ssh $PUBLIC_HTML_EXTRA_SSH_OPTIONS "$(echo "$PUBLIC_HTML"|sed -e 's/:.*//')" rm -v "$(echo "$PUBLIC_HTML"|sed -e 's/[^:]*://')/$U-$(cat $U/lastdate).*"
else rm -v "$PUBLIC_HTML/$U-$(cat "$U/lastdate")".*
fi
fi
tar -jcvf $Shortname.tbz $U $Links
tar -jcvf "$Shortname.tbz" "$U" $Links
mkdir -p old
mv -v --backup=numbered $Shortname.tbz old/
rm -rf $U $Links
mv -v --backup=numbered "$Shortname.tbz" old/
rm -rf "$U" $Links
fi
else echo "Warning: User $1 does not exist"; fi
shift; done