espeak.cgi

#!/usr/bin/env python
#  (Python 2 or Python 3)

# espeak.cgi - a CGI script for the eSpeak speech synthesizer

# (c) 2008,2011,2020 Silas S. Brown, License: GPL
version="1.3"

#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

# With most webservers you should be able to put this
# in your public_html and do chmod +x.  You will also need to
# install eSpeak on the system (if you can't do it system-wide
# then add code here to modify PATH and ESPEAK_DATA_PATH, e.g.
# import os ; os.environ["PATH"]="/my/espeak/path:"+os.environ["PATH"]
# )

import cgi, cgitb ; cgitb.enable()
f = cgi.FieldStorage()

default_language = "en"
stream_if_input_bigger_than = 100
max_input_size = 1000

minSpeed, defaultSpeed, maxSpeed, speedStep = 80,170,370,30 # (NB check defaultSpeed=minSpeed+integer*speedStep)

import os,re,sys
try: from commands import getoutput # Python 2
except: from subprocess import getoutput # Python 3
if getoutput("which espeak 2>/dev/null"): prog="espeak"
elif getoutput("which speak 2>/dev/null"): prog="speak"
else: raise Exception("Cannot find espeak")

def S(x):
    if type("")==type(u""): # Python 3
        try: return x.decode('utf-8') # in case byte-string
        except: pass
    return x
lang = S(f.getfirst("l",default_language))
if len(lang)>10 or not re.match("^[a-z0-9-+]*$",lang): lang=default_language

if "ESPEAK_DATA_PATH" in os.environ: voiceDir = os.environ["ESPEAK_DATA_PATH"]
elif os.path.exists("/usr/share/espeak-data"): voiceDir = "/usr/share/espeak-data"
elif os.path.exists("/usr/lib/x86_64-linux-gnu/espeak-data"): voiceDir = "/usr/lib/x86_64-linux-gnu/espeak-data"
else:
    print ("Content-type: text-plain\n\nUnable to find ESPEAK_DATA_PATH")
    raise SystemExit
voiceDir += "/voices"

variants = os.listdir(voiceDir+"/!v")
if "whisper" in variants and "wisper" in variants: variants.remove("wisper")
variants.sort() ; variants.insert(0,"default")
if "+" in lang:
    variant=lang[lang.index("+")+1:]
    lang=lang[:lang.index("+")]
else: variant = f.getfirst("v",variants[0])
if not variant in variants: variant=variants[0]

speed = f.getfirst("s",str(defaultSpeed))
try: speed=int(speed)
except: speed=defaultSpeed
if speed<minSpeed or speed>maxSpeed: speed=defaultSpeed

t = S(f.getfirst("t",""))
if len(t)>max_input_size: t=""
else:
    if not type(t)==type(u""):
        try: t.decode('utf-8')
        except: t="" # not valid utf-8
    if chr(0) in t: t="" # just in case

if len(t)>stream_if_input_bigger_than:
    # streaming - will need sox to convert
    if not getoutput("which sox 2>/dev/null"): raise Exception("Cannot find sox")
    fname=None
else:
    # not streaming (so can fill in length etc) - will need a writable file in a private tmp directory, preferably in memory
    worked = 0
    for tmp in ["/dev/shm/",os.getenv("TMPDIR"),"/tmp/",""]:
        fname = tmp + "espeak-cgi." + str(os.getpid())
        try:
            os.mkdir(fname)
            worked = 1 ; break
        except: pass # INCLUDING if already exists - avoid symlink attacks
    if not worked: raise Exception("Can't find anywhere to put temp file")
    fname2=fname+"/"+str(os.getpid())+".wav"
    open(fname2,"w") # raising exception if it's unwritable (try changing to a suitable directory)

# in case espeak can't find a utf-8 locale
loc=getoutput("locale -a|grep -i 'utf-*8$'|head -1").strip()
if loc: os.environ["LC_CTYPE"]=loc

def getName(f):
    f = S(f)
    o=open(f)
    line=""
    for t in range(10):
        line=getBuf(o).readline()
        if u"name".encode('latin1') in line: return S(line.split()[1])
    return f[f.rindex("/")+1:] # assumes it'll be a full pathname
def isDirectory(directory):
    oldDir = os.getcwd()
    try:
        os.chdir(directory)
        ret = 1
    except: ret = 0 # was except OSError but some Python ports have been known to throw other things
    os.chdir(oldDir)
    return ret

def getBuf(f):
    if hasattr(f,"buffer"): return f.buffer # Python 3
    else: return f # Python 2
def doPipe(P,t):
    if type("")==type(u""): # Python 3
        P = os.popen(P,"w")
        if type(t)==type(u""): P.write(t)
        else: P.buffer.write(t)
        P.close()
    else: os.popen(P,"wb").write(t) # Python 2

if t and f.getfirst("qx",""):
    sys.stdout.write("Content-Type: text/plain; charset=utf-8\n\n")
    sys.stdout.flush() # help mathopd
    doPipe(prog+" -v "+lang+" -q -X -m 2>/dev/null",t)
elif t:
    prog_with_params = prog+" -v "+lang+"+"+variant+" -s "+str(speed)+" -m"
    # TODO -p 0-99 default 50 (pitch adjustment)
    # TODO -g wordgap * 10mS
    if len(t)>stream_if_input_bigger_than:
        sys.stdout.write("Content-Type: audio/basic\nContent-Disposition: attachment; filename=\""+t+"_"+lang+".au\"\n\n") # using .au instead of .wav because Windows Media Player doesn't like incorrect length fields in wav.  And make sure it's attachment otherwise Mac OS QuickTime etc can have problems when server is slow
        # problem is, WILL NEED CONVERTING for gradint (unless want to use "sox" on the Windows version before playing via winsound) (but the espeak no-length wav files will probably be wrong on that anyway).  Should be OK because we're doing this only in the case of len(t)>stream_if_input_bigger_than.
        sys.stdout.flush() # help mathopd
        doPipe(prog_with_params+" --stdout 2>/dev/null | sox -t wav - -t au - 2>/dev/null",t)
    else: # not streaming
        doPipe(prog_with_params+" -w "+fname2+" 2>/dev/null",t)
        sys.stdout.write("Content-Type: audio/wav\nContent-Disposition: attachment; filename=\""+t+"_"+lang+".wav\"\n\n")
        sys.stdout.flush()
        getBuf(sys.stdout).write(open(fname2,"rb").read())
else:
    sys.stdout.write('Content-Type: text/html; charset=utf-8\n\n<HTML><head><meta name="viewport" content="width=device-width"></head><BODY>') # (specify utf-8 here in case accept-charset is not recognised, e.g. some versions of IE6)
    banner = S(getoutput(prog+" --help|head -3").strip())
    sys.stdout.write("This is espeak.cgi version "+version+", using <A HREF=http://espeak.sourceforge.net/>eSpeak</A> "+" ".join(banner.split()[1:]))
    if not loc: sys.stdout.write("<br>Warning: could not find a UTF-8 locale; espeak may malfunction on some languages")
    warnings=S(getoutput(prog+" -q -x .").strip()) # make sure any warnings about locales are output
    if warnings: sys.stdout.write("<br>"+warnings)
    sys.stdout.write("<FORM method=post accept-charset=UTF-8>Text or SSML: <INPUT TYPE=text NAME=t STYLE='width:80%'><br>Language: <SELECT NAME=l>")
    ld=os.listdir(voiceDir)
    directories = {}
    for f in ld[:]:
        if f in ["!v","default","mb"]: ld.remove(f)
        elif isDirectory(voiceDir+"/"+f):
            ld.remove(f)
            for f2 in os.listdir(voiceDir+"/"+f):
                ld.append(f2)
                directories[f2]=f
    ld.sort()
    for f in ld:
        sys.stdout.write("<OPTION VALUE="+f)
        if f==lang: sys.stdout.write(" SELECTED")
        if f in directories: name=getName(voiceDir+"/"+directories[f]+"/"+f)
        else: name=getName(voiceDir+"/"+f)
        sys.stdout.write(">"+f+" ("+name+")</OPTION>")
    sys.stdout.write("</SELECT> Voice: <SELECT NAME=v>")
    for v in variants:
        if v=="default": name="default"
        else: name=getName(voiceDir+"/!v/"+v)
        sys.stdout.write("<OPTION VALUE="+v)
        if v==variant: sys.stdout.write(" SELECTED")
        sys.stdout.write(">"+name+"</OPTION>")
    sys.stdout.write("</SELECT> Speed: <SELECT NAME=s>")
    for ss in list(range(minSpeed,maxSpeed,speedStep))+[maxSpeed]:
        sys.stdout.write("<OPTION VALUE="+str(ss))
        if ss==speed: sys.stdout.write(" SELECTED")
        sys.stdout.write(">"+str(ss)+"</OPTION>")
    sys.stdout.write("</SELECT> <INPUT TYPE=submit NAME=qx VALUE=\"View phonemes\"><center><big><INPUT TYPE=submit VALUE=SPEAK></big></center></FORM></BODY></HTML>")
if fname: os.system("rm -rf \""+fname+"\"") # clean up temp dir