vocab2html.py

#!/usr/bin/env python

# vocab2html.py - converts vocab.txt to HTML,
# linking any vocab that is cached
# (resulting html file should be put in synthCache directory for the links to work)
# HTML markup in the comments is OK,
# e.g. to comment out a section, # <!-- ... # -->

# you can run vocab2html.py with command-line arguments
# - these will be passed to gradint

# if you have set the environment variable ESPEAK_CGI_URL, this will
# be used.  E.g.: export ESPEAK_CGI_URL="/~userID/espeak.cgi"
# (TODO: this script ignores the possibility of synthesizing phrases from partials)

# Version 1.2, (c) Silas S. Brown, License: GPL

from gradint import *
if not synthCache: synthCache_contents = []
langs=[secondLanguage,firstLanguage]
o=open(vocabFile,"rU")
justHadP=1
print ('<html><HEAD><META HTTP-EQUIV=Content-type CONTENT="text/html; charset=utf-8"><meta name="viewport" content="width=device-width"></HEAD><body>') # (assume utf8 in case there's any hanzi, but TODO what if using another charset for another language?)
for l in o:
  l2=l.lower()
  if l2.startswith("set language ") or l2.startswith("set languages "): langs=l.split()[2:]
  if not l.strip():
    # blank line
    if not justHadP: print ("<P>")
    justHadP=1 ; continue
  if not justHadP: print ("<BR>")
  if l2.startswith("set language ") or l2.startswith("set languages ") or l2.startswith("limit on") or l2.startswith("limit off") or l2.startswith("begin poetry") or l2.startswith("end poetry"):
    print ("<EM>%s</EM>" % (l,))
  elif l2.startswith("#"):
    # comment (and may be part of multi-line comment)
    if not l[1:].strip().startswith("<!--"): print ("<small>#</small> ")
    print (l[1:])
  else:
    # vocab line
    langsAndWords=zip(langs,map(lambda x:x.strip(),l.split("=")))
    out = []
    for lang,word in langsAndWords:
      lang,word = S(lang),S(word)
      fname=S(synthCache_transtbl.get(word.lower()+"_"+lang+dotwav,word.lower()+"_"+lang+dotwav))
      found = 0
      for fn2 in [fname,fname.replace(dotwav,dotmp3)]:
          if fn2 in synthCache_contents:
              out.append("<A HREF=\""+fn2+"\">"+word+"</A>")
              found = 1 ; break
      if not found:
          if os.getenv("ESPEAK_CGI_URL"):
              try: from urllib import urlencode # Python 2
              except: from urllib.parse import urlencode # Python 3
              out.append("<A HREF=\""+os.getenv("ESPEAK_CGI_URL")+"?"+urlencode({"t":word,"l":lang})+"\">"+word+"</A>")
          else: out.append(word)
    print (" = ".join(out))
  justHadP=0
print ("</body></html>")