Newer
Older
# vocab2html.py - converts vocab.txt to HTML,
# linking any vocab that is cached
# (resulting html file should be put in synthCache directory for the links to work)
# HTML markup in the comments is OK,
# e.g. to comment out a section, # <!-- ... # -->
# you can run vocab2html.py with command-line arguments
# - these will be passed to gradint
# if you have set the environment variable ESPEAK_CGI_URL, this will
# be used. E.g.: export ESPEAK_CGI_URL="/~userID/espeak.cgi"
# (TODO: this script ignores the possibility of synthesizing phrases from partials)
# Version 1.2, (c) Silas S. Brown, License: GPL
from gradint import *
if not synthCache: synthCache_contents = []
langs=[secondLanguage,firstLanguage]
o=open(vocabFile,"rU")
justHadP=1
print ('<html><HEAD><META HTTP-EQUIV=Content-type CONTENT="text/html; charset=utf-8"><meta name="viewport" content="width=device-width"></HEAD><body>') # (assume utf8 in case there's any hanzi, but TODO what if using another charset for another language?)
for l in o:
l2=l.lower()
if l2.startswith("set language ") or l2.startswith("set languages "): langs=l.split()[2:]
if not l.strip():
# blank line
if l2.startswith("set language ") or l2.startswith("set languages ") or l2.startswith("limit on") or l2.startswith("limit off") or l2.startswith("begin poetry") or l2.startswith("end poetry"):
elif l2.startswith("#"):
# comment (and may be part of multi-line comment)
if not l[1:].strip().startswith("<!--"): print ("<small>#</small> ")
print (l[1:])
else:
# vocab line
langsAndWords=zip(langs,map(lambda x:x.strip(),l.split("=")))
out = []
for lang,word in langsAndWords:
lang,word = S(lang),S(word)
fname=S(synthCache_transtbl.get(word.lower()+"_"+lang+dotwav,word.lower()+"_"+lang+dotwav))
found = 0
for fn2 in [fname,fname.replace(dotwav,dotmp3)]:
if fn2 in synthCache_contents:
out.append("<A HREF=\""+fn2+"\">"+word+"</A>")
found = 1 ; break
if not found:
if os.getenv("ESPEAK_CGI_URL"):
try: from urllib import urlencode # Python 2
except: from urllib.parse import urlencode # Python 3
out.append("<A HREF=\""+os.getenv("ESPEAK_CGI_URL")+"?"+urlencode({"t":word,"l":lang})+"\">"+word+"</A>")