Newer
Older
# espeak.cgi - a CGI script for the eSpeak speech synthesizer
# (c) 2008,2011,2020 Silas S. Brown, License: GPL
version="1.3"
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# With most webservers you should be able to put this
# in your public_html and do chmod +x. You will also need to
# install eSpeak on the system (if you can't do it system-wide
# then add code here to modify PATH and ESPEAK_DATA_PATH, e.g.
# import os ; os.environ["PATH"]="/my/espeak/path:"+os.environ["PATH"]
# )
import cgi, cgitb ; cgitb.enable()
f = cgi.FieldStorage()
default_language = "en"
stream_if_input_bigger_than = 100
max_input_size = 1000
minSpeed, defaultSpeed, maxSpeed, speedStep = 80,170,370,30 # (NB check defaultSpeed=minSpeed+integer*speedStep)
import os,re,sys
try: from commands import getoutput # Python 2
except: from subprocess import getoutput # Python 3
if getoutput("which espeak 2>/dev/null"): prog="espeak"
elif getoutput("which speak 2>/dev/null"): prog="speak"
def S(x):
if type("")==type(u""): # Python 3
try: return x.decode('utf-8') # in case byte-string
except: pass
return x
lang = S(f.getfirst("l",default_language))
if len(lang)>10 or not re.match("^[a-z0-9-+]*$",lang): lang=default_language
if "ESPEAK_DATA_PATH" in os.environ: voiceDir = os.environ["ESPEAK_DATA_PATH"]
elif os.path.exists("/usr/share/espeak-data"): voiceDir = "/usr/share/espeak-data"
elif os.path.exists("/usr/lib/x86_64-linux-gnu/espeak-data"): voiceDir = "/usr/lib/x86_64-linux-gnu/espeak-data"
else:
print ("Content-type: text-plain\n\nUnable to find ESPEAK_DATA_PATH")
raise SystemExit
voiceDir += "/voices"
variants = os.listdir(voiceDir+"/!v")
if "whisper" in variants and "wisper" in variants: variants.remove("wisper")
variants.sort() ; variants.insert(0,"default")
if "+" in lang:
variant=lang[lang.index("+")+1:]
lang=lang[:lang.index("+")]
else: variant = f.getfirst("v",variants[0])
if not variant in variants: variant=variants[0]
speed = f.getfirst("s",str(defaultSpeed))
try: speed=int(speed)
except: speed=defaultSpeed
if speed<minSpeed or speed>maxSpeed: speed=defaultSpeed
if not type(t)==type(u""):
try: t.decode('utf-8')
except: t="" # not valid utf-8
if chr(0) in t: t="" # just in case
if len(t)>stream_if_input_bigger_than:
# streaming - will need sox to convert
if not getoutput("which sox 2>/dev/null"): raise Exception("Cannot find sox")
else:
# not streaming (so can fill in length etc) - will need a writable file in a private tmp directory, preferably in memory
worked = 0
for tmp in ["/dev/shm/",os.getenv("TMPDIR"),"/tmp/",""]:
fname = tmp + "espeak-cgi." + str(os.getpid())
try:
os.mkdir(fname)
worked = 1 ; break
except: pass # INCLUDING if already exists - avoid symlink attacks
if not worked: raise Exception("Can't find anywhere to put temp file")
fname2=fname+"/"+str(os.getpid())+".wav"
open(fname2,"w") # raising exception if it's unwritable (try changing to a suitable directory)
# in case espeak can't find a utf-8 locale
loc=getoutput("locale -a|grep -i 'utf-*8$'|head -1").strip()
line=getBuf(o).readline()
if u"name".encode('latin1') in line: return S(line.split()[1])
return f[f.rindex("/")+1:] # assumes it'll be a full pathname
def isDirectory(directory):
oldDir = os.getcwd()
try:
os.chdir(directory)
ret = 1
except: ret = 0 # was except OSError but some Python ports have been known to throw other things
os.chdir(oldDir)
return ret
def getBuf(f):
if hasattr(f,"buffer"): return f.buffer # Python 3
else: return f # Python 2
def doPipe(P,t):
if type("")==type(u""): # Python 3
P = os.popen(P,"w")
if type(t)==type(u""): P.write(t)
else: P.buffer.write(t)
P.close()
else: os.popen(P,"wb").write(t) # Python 2
if t and f.getfirst("qx",""):
sys.stdout.write("Content-Type: text/plain; charset=utf-8\n\n")
sys.stdout.flush() # help mathopd
doPipe(prog+" -v "+lang+" -q -X -m 2>/dev/null",t)
elif t:
prog_with_params = prog+" -v "+lang+"+"+variant+" -s "+str(speed)+" -m"
# TODO -p 0-99 default 50 (pitch adjustment)
# TODO -g wordgap * 10mS
if len(t)>stream_if_input_bigger_than:
sys.stdout.write("Content-Type: audio/basic\nContent-Disposition: attachment; filename=\""+t+"_"+lang+".au\"\n\n") # using .au instead of .wav because Windows Media Player doesn't like incorrect length fields in wav. And make sure it's attachment otherwise Mac OS QuickTime etc can have problems when server is slow
# problem is, WILL NEED CONVERTING for gradint (unless want to use "sox" on the Windows version before playing via winsound) (but the espeak no-length wav files will probably be wrong on that anyway). Should be OK because we're doing this only in the case of len(t)>stream_if_input_bigger_than.
sys.stdout.flush() # help mathopd
doPipe(prog_with_params+" --stdout 2>/dev/null | sox -t wav - -t au - 2>/dev/null",t)
else: # not streaming
doPipe(prog_with_params+" -w "+fname2+" 2>/dev/null",t)
sys.stdout.write("Content-Type: audio/wav\nContent-Disposition: attachment; filename=\""+t+"_"+lang+".wav\"\n\n")
sys.stdout.flush()
getBuf(sys.stdout).write(open(fname2,"rb").read())
sys.stdout.write('Content-Type: text/html; charset=utf-8\n\n<HTML><head><meta name="viewport" content="width=device-width"></head><BODY>') # (specify utf-8 here in case accept-charset is not recognised, e.g. some versions of IE6)
banner = S(getoutput(prog+" --help|head -3").strip())
sys.stdout.write("This is espeak.cgi version "+version+", using <A HREF=http://espeak.sourceforge.net/>eSpeak</A> "+" ".join(banner.split()[1:]))
if not loc: sys.stdout.write("<br>Warning: could not find a UTF-8 locale; espeak may malfunction on some languages")
warnings=S(getoutput(prog+" -q -x .").strip()) # make sure any warnings about locales are output
sys.stdout.write("<FORM method=post accept-charset=UTF-8>Text or SSML: <INPUT TYPE=text NAME=t STYLE='width:80%'><br>Language: <SELECT NAME=l>")
ld=os.listdir(voiceDir)
directories = {}
for f in ld[:]:
if f in ["!v","default","mb"]: ld.remove(f)
elif isDirectory(voiceDir+"/"+f):
ld.remove(f)
for f2 in os.listdir(voiceDir+"/"+f):
ld.append(f2)
directories[f2]=f
ld.sort()
for f in ld:
sys.stdout.write("<OPTION VALUE="+f)
if f==lang: sys.stdout.write(" SELECTED")
if f in directories: name=getName(voiceDir+"/"+directories[f]+"/"+f)
else: name=getName(voiceDir+"/"+f)
sys.stdout.write(">"+f+" ("+name+")</OPTION>")
sys.stdout.write("</SELECT> Voice: <SELECT NAME=v>")
for v in variants:
if v=="default": name="default"
else: name=getName(voiceDir+"/!v/"+v)
sys.stdout.write("<OPTION VALUE="+v)
if v==variant: sys.stdout.write(" SELECTED")
sys.stdout.write(">"+name+"</OPTION>")
sys.stdout.write("</SELECT> Speed: <SELECT NAME=s>")
for ss in list(range(minSpeed,maxSpeed,speedStep))+[maxSpeed]:
sys.stdout.write("<OPTION VALUE="+str(ss))
if ss==speed: sys.stdout.write(" SELECTED")
sys.stdout.write(">"+str(ss)+"</OPTION>")
sys.stdout.write("</SELECT> <INPUT TYPE=submit NAME=qx VALUE=\"View phonemes\"><center><big><INPUT TYPE=submit VALUE=SPEAK></big></center></FORM></BODY></HTML>")
if fname: os.system("rm -rf \""+fname+"\"") # clean up temp dir