FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit 314b0576 authored by Silas S. Brown's avatar Silas S. Brown
Browse files

Gradint update

git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/gradint@1388 29193198-4895-4776-b068-10539e920549
parent 25651231
No related branches found
No related tags found
No related merge requests found
...@@ -118,6 +118,11 @@ if sporadic: ...@@ -118,6 +118,11 @@ if sporadic:
count = 0 ; toMove = [] count = 0 ; toMove = []
def rename(old,new):
# don't use os.rename - can get problems cross-device
open(new,"wb").write(open(old,"rb").read())
os.remove(old)
def maybe_cache(s): def maybe_cache(s):
textToSynth,langToSynth = getTxtLang(s) textToSynth,langToSynth = getTxtLang(s)
if not textToSynth: return if not textToSynth: return
...@@ -130,12 +135,10 @@ def maybe_cache(s): ...@@ -130,12 +135,10 @@ def maybe_cache(s):
k = (textToSynth.lower(),langToSynth) k = (textToSynth.lower(),langToSynth)
if generating.has_key(k): if generating.has_key(k):
if not generating[k]==1: # a file already exists if not generating[k]==1: # a file already exists
# don't use os.rename - can get problems cross-device
fname = textToSynth.lower().encode('utf-8')+'_'+langToSynth+generating[k][generating[k].rindex(gradint.extsep):] fname = textToSynth.lower().encode('utf-8')+'_'+langToSynth+generating[k][generating[k].rindex(gradint.extsep):]
open(gradint.synthCache+os.sep+fname,"wb").write(open(generating[k],"rb").read()) rename(generating[k],gradint.synthCache+os.sep+fname)
scld[fname] = 1 scld[fname] = 1
#open(gradint.synthCache+os.sep+textToSynth.lower().encode('utf-8')+'_'+langToSynth+dottxt,"wb").write(open(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt,"rb").read()) #rename(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt,gradint.synthCache+os.sep+textToSynth.lower().encode('utf-8')+'_'+langToSynth+dottxt)
os.remove(generating[k])
os.remove(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt) os.remove(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt)
generating[k]=1 generating[k]=1
return return
...@@ -167,10 +170,10 @@ if toMove: sys.stderr.write("Renaming\n") ...@@ -167,10 +170,10 @@ if toMove: sys.stderr.write("Renaming\n")
for tmpfile,dest in toMove: for tmpfile,dest in toMove:
oldDest = dest oldDest = dest
try: try:
os.rename(tmpfile,gradint.synthCache+os.sep+dest) rename(tmpfile,gradint.synthCache+os.sep+dest)
except OSError: # not a valid filename except OSError: # not a valid filename
while gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotwav) or gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotmp3): count += 1 while gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotwav) or gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotmp3): count += 1
os.rename(tmpfile,gradint.synthCache+os.sep+("__file%d" % count)+dotwav) rename(tmpfile,gradint.synthCache+os.sep+("__file%d" % count)+dotwav)
open(gradint.synthCache+os.sep+gradint.transTbl,"ab").write("__file%d%s %s\n" % (count,dotwav,dest)) open(gradint.synthCache+os.sep+gradint.transTbl,"ab").write("__file%d%s %s\n" % (count,dotwav,dest))
dest = "__file%d%s" % (count,dotwav) dest = "__file%d%s" % (count,dotwav)
if testMode: if testMode:
......
#!/usr/bin/env python
# Script to assist with using TextAloud or similar program
# that can batch-synthesize a collection of text files
# provided it is run interactively to start the batch conversion.
# This script will generate appropriate *.txt files for the
# words in vocab.txt etc, and rename the resulting *.mp3 or *.wav
# files into the synth cache.
# Should be useful if you are on Linux and want to run a
# non-English speech synth in the Windows Emulator (since
# ptts can have trouble, but tools like TextAloud still work).
# Note: This script currently assumes that the filesystem
# can take all the characters used in the strings; that should
# probably be changed on Windows etc. However, if you're on
# Windows and are using a Windows-based synth then you shouldn't
# need this script; use cache-synth.py instead (more fully automated).
# You need to set these variables:
languageToCache = "zh" # the language we are interested in
hanziOnly = 1 # 1 or 0. If 1 then only phrases consisting
# entirely of Chinese characters will be listed (could be useful
# for voices like MeiLing which can't really manage anything else)
# (Note: If you need to artificially specify a
# division between two hanzi words, use a hyphen
# (-) to do it. MeiLing and Gradint/Yali will
# both recognise this as a word boundary that is
# not to be pronounced.)
newStuff = "new-stuff" # the directory in which *.txt files
# will be created, and to look for the resulting *.mp3/*.wav files
sporadic = 1 # 1 or 0, whether or not to ask for the cached words
# to be generated in "sporadic" mode (i.e. not used 100% of the time)
delete_old = 1 # if 1 (and if sporadic) then older cached
# files (that are still marked sporadic) are deleted. This
# requires that you don't delete the .txt files from synthCache when
# this script moves them there, as that's how it identifies its
# "own" mp3/wav files (as opposed to anything else you may have cached).
actually_generate = 0 # if 1, will call gradint to actually
# generate the cached sound using the default voice. Might
# be useful if you need to move it to another machine that
# doesn't have that voice, and you still want to use sporadic
# etc (like a more advanced version of cache-synth.py)
testMode = 0 # if 1 and actually_generate is 1, will play too
# -----------------------------------------
import sys,os,time
if sporadic: sporadic="_"
else: sporadic=""
try: os.mkdir(newStuff)
except: pass
sys.argv = []
import gradint
from gradint import dottxt,dotwav,dotmp3
assert gradint.synthCache, "need a synthCache for this to work"
gradint.cache_maintenance_mode = 1
try: trans = open(gradint.synthCache+os.sep+gradint.transTbl).read().replace("\n"," ")+" "
except: trans = ""
scld=gradint.list2dict(os.listdir(gradint.synthCache))
def synth_fileExists(f):
if f in scld: return True
else: return (" "+f+" ") in trans
# Check for previous newStuff .txt's, and any results from them
generating = {}
fname2txt = {}
for l in os.listdir(newStuff):
if l.endswith(dottxt) and "_" in l:
txt = open(newStuff+os.sep+l).read().decode('utf-16')
txt = (sporadic+txt,l[l.rindex("_")+1:l.rindex(gradint.extsep)])
generating[txt] = 1 ; fname2txt[l[:l.rindex(gradint.extsep)]]=txt
for l in os.listdir(newStuff):
if l.endswith(dotwav) or l.endswith(dotmp3):
k=l[:l.rindex(gradint.extsep)]
if k in fname2txt: generating[fname2txt[k]]=newStuff+os.sep+l
del fname2txt # now 'generating' maps (txt,lang) to 1 or filename
def getTxtLang(s):
if '!synth:' in s and "_" in s: return gradint.textof(s).decode('utf-8'),gradint.languageof(s)
elif s.endswith(gradint.extsep+"txt"): return gradint.readText(s).decode('utf-8'), gradint.languageof(s)
else: return None,None
def decache(s):
textToSynth,langToSynth = getTxtLang(s)
if not textToSynth: return
textToSynth="_"+textToSynth # sporadic mode
generating[(textToSynth.lower(),langToSynth)]=1 # don't re-generate it
s=textToSynth.lower().encode('utf-8')+"_"+langToSynth
if delete_old and langToSynth==languageToCache:
for ext in [dottxt,dotwav,dotmp3]:
if s+ext in scld:
os.remove(gradint.synthCache+os.sep+s+ext)
del scld[s+ext]
samples = gradint.scanSamples() # MUST call before sporadic so variantFiles is populated
if sporadic:
pd = gradint.ProgressDatabase()
if delete_old: print "Checking for old words to remove"
else: print "Sporadic mode: Checking for old words to avoid"
for t,prompt,target in pd.data:
if t>=gradint.reallyKnownThreshold:
if type(prompt)==type([]):
for p in prompt: decache(p)
else: decache(prompt)
decache(target)
count = 0 ; toMove = []
def maybe_cache(s):
textToSynth,langToSynth = getTxtLang(s)
if not textToSynth: return
if not langToSynth==languageToCache: return
if hanziOnly and not gradint.fix_compatibility(textToSynth).replace(" ","")==gradint.hanzi_and_punc(textToSynth).replace(" ",""): return
for txt in [textToSynth, sporadic+textToSynth]:
if synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists((txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's already been done
if synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotwav).lower()) or synth_fileExists(("__rejected_"+txt.encode('utf-8')+"_"+langToSynth+dotmp3).lower()): return # it's been rejected
textToSynth=sporadic+textToSynth
k = (textToSynth.lower(),langToSynth)
if generating.has_key(k):
if not generating[k]==1: # a file already exists
# don't use os.rename - can get problems cross-device
fname = textToSynth.lower().encode('utf-8')+'_'+langToSynth+generating[k][generating[k].rindex(gradint.extsep):]
open(gradint.synthCache+os.sep+fname,"wb").write(open(generating[k],"rb").read())
scld[fname] = 1
#open(gradint.synthCache+os.sep+textToSynth.lower().encode('utf-8')+'_'+langToSynth+dottxt,"wb").write(open(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt,"rb").read())
os.remove(generating[k])
os.remove(generating[k][:generating[k].rindex(gradint.extsep)]+dottxt)
generating[k]=1
return
if actually_generate:
tm = [gradint.synth_event(langToSynth,textToSynth[len(sporadic):].encode('utf-8')).getSound(),(textToSynth.encode('utf-8')+"_"+langToSynth+dotwav).lower()]
if gradint.got_program("lame"):
# we can MP3-encode it (TODO make this optional)
n = tm[0][:-len(dotwav)]+dotmp3
if not os.system("lame --cbr -h -b 48 -m m \"%s\" \"%s\"" % (tm[0],n)):
os.remove(tm[0])
tm[0] = n
tm[1] = tm[1][:-len(dotwav)]+dotmp3
toMove.append(tm)
scld[textToSynth.lower().encode('utf-8')+'_'+langToSynth+dotwav] = 1
return
generating[k]=1
global count
while gradint.fileExists(newStuff+os.sep+str(count)+"_"+langToSynth+dottxt): count += 1
open(newStuff+os.sep+str(count)+"_"+langToSynth+dottxt,"w").write(textToSynth[len(sporadic):].encode('utf-16'))
count += 1
print "Checking for new ones"
for _,s1,s2 in samples+gradint.parseSynthVocab(gradint.vocabFile):
if type(s1)==type([]): [maybe_cache(i) for i in s1]
else: maybe_cache(s1)
maybe_cache(s2)
if toMove: sys.stderr.write("Renaming\n")
for tmpfile,dest in toMove:
oldDest = dest
try:
os.rename(tmpfile,gradint.synthCache+os.sep+dest)
except OSError: # not a valid filename
while gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotwav) or gradint.fileExists(gradint.synthCache+os.sep+("__file%d" % count)+dotmp3): count += 1
os.rename(tmpfile,gradint.synthCache+os.sep+("__file%d" % count)+dotwav)
open(gradint.synthCache+os.sep+gradint.transTbl,"ab").write("__file%d%s %s\n" % (count,dotwav,dest))
dest = "__file%d%s" % (count,dotwav)
if testMode:
print oldDest
e=gradint.SampleEvent(gradint.synthCache+os.sep+dest)
t=time.time() ; e.play()
while time.time() < t+e.length: time.sleep(1) # in case play() is asynchronous
if count: print "Now convert the files in "+newStuff+" and re-run this script.\nYou might also want to adjust the volume if appropriate, e.g. mp3gain -r -d 6 -c *.mp3"
elif not toMove: print "No extra files needed to be made."
else: print "All done"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment