Compare revisions

10a6ad67 · 10a6ad67 · a2fdf18a · a2fdf18a · a2fdf18a · a2fdf18a
--- a/charlearn/charlearn.py
+++ b/charlearn/charlearn.py
+#!/usr/bin/env python
+# (should work in either Python 2 or Python 3)
+
+# Character-learning support program
+# (C) 2006-2013, 2020 Silas S. Brown.  Version 0.3.
+
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+
+# Where to find history:
+# on GitHub at https://github.com/ssb22/gradint
+# and on GitLab at https://gitlab.com/ssb22/gradint
+# and on BitBucket https://bitbucket.org/ssb22/gradint
+# and at https://gitlab.developers.cam.ac.uk/ssb22/gradint
+# and in China: https://gitee.com/ssb22/gradint
+
+listenAddr='127.0.0.1'
+firstPortNo=9876
+
+tableFile = "characters.txt"  # for first-time setup
+knownFile = "known-chars.txt" # ditto
+dumpFile = "charlearn-data"   # for saving progress
+reviseFile = "revise.txt"     # for requesting more revision next time (will be deleted after integration into progress)
+
+import sys,os.path
+if sys.argv[-1].startswith("--"): gradint = None # (don't need to speak if we're processing options, see at end)
+elif os.path.isfile("gradint.py"): import gradint
+else: gradint = None # won't speak characters
+
+import random,os,time,socket
+try: from subprocess import getoutput
+except: from commands import getoutput
+try: from cPickle import Pickler,Unpickler
+except: from pickle import Pickler,Unpickler
+try: from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+except: from http.server import BaseHTTPRequestHandler, HTTPServer
+try: import thread
+except: import _thread as thread
+
+def byPriority(a): return a.priority
+
+priorityIfGotWrong = -10
+priorityOfOtherCharWrong = -4
+priorityOfGroupWrong = 0
+maxShowInGroup = 5 ; priorityBreakGroup = 10
+initSessionLen = sessionLen = 2 ; maxSessionLen = 10 ; sampleConst = 1.5
+
+def updateSessionLen():
+  global sessionLen
+  sessionLen = min(max(sessionLen,int(thechars.countKnown()[1]+0.95)),maxSessionLen)
+  # did have /sampleConst after countKnown()[1] but doesn't seem necessary
+
+already_spoken = {}
+
+gradint_busy = 0
+def speak_bkg():
+  gradint.just_synthesize()
+  global gradint_busy
+  gradint_busy = 0
+
+class SingleChar:
+  def __init__(self,hanzi,pinyin):
+    self.hanzi = hanzi ; self.pinyin = pinyin
+    self.priority = 0 ; self.similarityGroup = None
+    self.supposedToKnow = 0
+  def formatPinyin(self): return self.pinyin.replace("\n","<BR>") # (could make it into actual tone marks also)
+  def htmlString(self,parent,step=1,left=0):
+    self.supposedToKnow = 1
+    r=u'<html><head><title>hanzi</title><meta http-equiv="Content-Type" content="text/html; charset=%s"></head><body><h1>%s</h1>' % (parent.charset,self.hanzi)
+    if step==1: r+=self.yesno('Do you know what this is? (%d remaining)' % left,2,0)
+    else:
+      r += self.formatPinyin() + "<HR>"
+      if step<=0:
+        if self.similarityGroup:
+          l = []
+          for c in parent.chars:
+            if c.similarityGroup == self.similarityGroup and not id(c)==id(self): l.append(c)
+          l.sort(key=byPriority)
+          r+="Not to be confused with:"
+          for c in l[:maxShowInGroup-1]: r+='<h1>%s</h1>%s' % (c.hanzi,c.formatPinyin())
+          r += '<hr>'
+        if parent.thisSession:
+          r+='<A HREF="/%s">Next character</A>' % str(random.random())
+          if step==-1:
+            # got it right - might as well take that link automatically
+            r=parent.processRequest("/").decode(parent.charset).replace('</body></html>','')
+        else:
+          updateSessionLen()
+          r+='<A HREF="/quit">Quit</A> | <A HREF="/%s">Another %d</A>' % (str(random.random()),sessionLen)
+        if step==0:
+          self.priority=priorityIfGotWrong
+          self.speak(parent.charset)
+        else:
+          # knew it
+          self.priority += 1
+          if self.priority > 0:
+            if self.priority < 25000: self.priority *= 2 # give new characters a chance
+            else: self.priority = 50000 # level off
+          else: self.priority /= 2 # TRY this for a while - will make chars got-wrong recover more quickly (again to give new chars a chance)
+        parent.save()
+      elif step==2:
+        r+=self.yesno('Did you get it right?',-1,3)
+        self.speak(parent.charset)
+      elif step==3:
+        r+='What did you think it was?<P>'
+        toOut = [] # (pinyin,hanzi,id,is-in-same-group)
+        for c in parent.chars:
+          if c.similarityGroup and c.similarityGroup==self.similarityGroup: sameGrp=True
+          else: sameGrp=False # need to do it this way because Python sometimes returns 'None' from that expression
+          if c.supposedToKnow and not id(c)==id(self): toOut.append((c.pinyin,c.hanzi,id(c),sameGrp)) # NOT formatPinyin, because may want to i-search it
+        toOut.sort()
+        if len(toOut) > 20: r+="(Hint: On some browsers you can use find-as-you-type)<P>"
+        for outSameGroup in [True,False]:
+          oldL=len(r)
+          for p,hanzi,val,sameGrp in toOut:
+            if sameGrp==outSameGroup: r+='%s <A HREF="/%d_%d">%s</A><BR>' % (hanzi,id(self),val,p)
+          if len(r)>oldL and outSameGroup: r += '<HR>' # between chars in same group and others
+        r+='<A HREF="/%d=0">None of the above</A>' % id(self)
+        if not parent.thisSession:
+          global already_spoken ; already_spoken = {} # reset it so "Another N" does speak them
+    return r + '</body></html>'
+  def speak(self,charset):
+    if self.hanzi in already_spoken: return
+    already_spoken[self.hanzi] = 1 # don't set a self. attribute - it'll get pickled for next session
+    if gradint:
+      gradint.justSynthesize = self.hanzi.decode(charset).encode('utf-8')
+      global gradint_busy
+      while gradint_busy: time.sleep(0.5)
+      gradint_busy = 1
+      thread.start_new_thread(speak_bkg,())
+  def yesno(self,question,ifyes,ifno): return question+'<P><A ID="y" HREF="/%d=%d">Yes</A><SCRIPT>document.getElementById("y").focus()</SCRIPT> | <A HREF="/%d=%d">No</A>' % (id(self),ifyes,id(self),ifno) # (don't use the js anywhere except yes/no, because 'next character' etc may have too much on the screen and we don't want the focus() to scroll)
+the_speaker_process = None
+def terminate_server():
+  # portable signal.alarm(1)
+  time.sleep(1); os.abort()
+def B(s):
+    if type(u"")==type(""): return s.encode('utf-8')
+    else: return s
+def S(s):
+    if type(u"")==type("") and not type(s)==type(""): return s.decode('utf-8')
+    else: return s
+class CharDbase:
+  def __init__(self):
+    self.counter = 0 ; self.nextPriority = 0
+    self.similarityGroups = 0
+    self.chars = [] ; self.thisSession = []
+    self.readTable() ; self.readKnown() ; self.readRevise()
+  def debug_printKnown(self):
+    print ("-*- coding: %s -*-" % (self.charset,))
+    for c in self.chars:
+      if c.supposedToKnow: print ("%s %s" % (c.priority,c.hanzi))
+  def readTable(self):
+    addingTo = 0
+    if self.chars: addingTo = 1
+    lines=open(tableFile,'rb').readlines()
+    if lines[0].startswith(B("charset:")):
+      self.charset = S(lines[0].split()[-1])
+      lines = lines[1:]
+    else: self.charset = "iso-8859-1"
+    for line in lines: self.addCharFromFreqTable(line.decode(self.charset),addingTo)
+  def readKnown(self):
+    try:
+      o=open(knownFile)
+    except IOError: return
+    for line in o.readlines(): self.makeCharKnown(line.split()[0])
+  def readRevise(self):
+    try:
+      o=open(reviseFile)
+    except IOError: return
+    for line in o.readlines(): self.makeCharRevise(line.split()[0])
+  def makeCharKnown(self,hanzi):
+    if not hanzi: return # blank lines etc
+    for c in self.chars:
+      if c.hanzi==hanzi:
+        if not c.supposedToKnow:
+          c.supposedToKnow = 1
+          c.priority = priorityOfGroupWrong # just to check
+        return
+    print ("WARNING: character '%s' in %s was not in %s - ignoring" % (repr(hanzi),knownFile,tableFile))
+  def makeCharRevise(self,hanzi):
+    if not hanzi: return # blank lines etc
+    for c in self.chars:
+      if c.hanzi==hanzi:
+        c.supposedToKnow = 1
+        c.priority = priorityIfGotWrong
+        return
+    print ("WARNING: character '%s' in %s was not in %s - ignoring" % (repr(hanzi),reviseFile,tableFile))
+  def addCharFromFreqTable(self,line,checkAlreadyThere):
+    hanzi,pinyin = line.split(None,1)
+    c=SingleChar(hanzi,pinyin.replace("\\n","\n"))
+    c.priority = self.nextPriority ; self.nextPriority += 1
+    if checkAlreadyThere:
+      for c2 in self.chars:
+        if c2.hanzi == hanzi: return
+    self.chars.append(c)
+  def charIdToChar(self,charId):
+    char = None
+    for c in self.chars:
+      if id(c)==charId:
+        char = c ; break
+    assert char ; return char
+  def processRequest(self,path):
+    if '=' in path:
+      charId,step = map(lambda x:int(x),path[1:].split('='))
+      char = self.charIdToChar(charId)
+    elif '_' in path: # grouping
+      char,char2 = map(lambda x:self.charIdToChar(int(x)),path[1:].split('_'))
+      if not char.similarityGroup and not char2.similarityGroup: # new group:
+        self.similarityGroups += 1
+        char.similarityGroup = char2.similarityGroup = self.similarityGroups
+      elif not char.similarityGroup: char.similarityGroup = char2.similarityGroup
+      elif not char2.similarityGroup: char2.similarityGroup = char.similarityGroup
+      elif not char.similarityGroup == char2.similarityGroup: # merge 2 different groups:
+        for c in self.chars:
+          if c.similarityGroup == char2.similarityGroup: c.similarityGroup = char.similarityGroup
+      step = 0 # normal got-wrong for this character
+      char.priority = priorityIfGotWrong # here also, for the loop below
+      char2.priority = min(char2.priority,priorityOfOtherCharWrong)
+      for c in self.chars:
+        if c.similarityGroup == char.similarityGroup:
+          if c.priority >= priorityBreakGroup: c.similarityGroup=None
+          elif c.priority > priorityOfGroupWrong: c.priority = priorityOfGroupWrong
+    elif path=="/status":
+      self.chars.sort(key=byPriority)
+      cp=self.chars[:] ; r='<html><head><title>Current Status</title><meta http-equiv="Content-Type" content="text/html; charset=%s"></head><body><h2>Current Status</h2>(score/priority number is shown to the left of each item)<br>' % (self.charset,)
+      while cp:
+        if not cp[0].supposedToKnow:
+          del cp[0] ; continue
+        if cp[0].priority >= priorityBreakGroup: thisGrp=[0]
+        else: thisGrp=list(filter(lambda x:x==0 or (cp[x].similarityGroup and cp[x].similarityGroup==cp[0].similarityGroup and cp[x].priority < priorityBreakGroup),range(len(cp))))
+        if len(thisGrp)>1 and not r.endswith("<hr>"): r+="<hr>"
+        if len(thisGrp)>1: r+="<em>"+str(len(thisGrp))+" similar items:</em><br>"
+        for g in thisGrp: r += str(cp[g].priority)+": "+cp[g].hanzi+" "+cp[g].pinyin+"<br>"
+        if len(thisGrp)>1: r+="<hr>"
+        thisGrp.reverse()
+        for toDel in thisGrp: del cp[toDel]
+      return (r+"</body></html>").encode(self.charset)
+    else:
+      if path=="/checkallknown": self.thisSession = list(filter(lambda x:x.supposedToKnow,self.chars)) # TODO: Document this URL
+      char,step = self.chooseChar(),1
+    return char.htmlString(self,step,len(self.thisSession)).encode(self.charset)
+  def chooseChar(self):
+    if not self.thisSession:
+      self.chars.sort(key=byPriority)
+      if sessionLen==initSessionLen:
+        self.thisSession = self.chars[:sessionLen] # introduce in order the first time (especially if the second one is just a straight line ("yi1"), as one beginner thought the program had gone wrong when he saw this)
+        self.thisSession.reverse() # because taken out by pop()
+      else: self.thisSession = random.sample(self.chars[:int(sessionLen*sampleConst)],sessionLen) # TODO need a better way than that.  NB high priority should be VERY likely, but others should have a chance.  try as-is for now
+    return self.thisSession.pop()
+  def save(self): Pickler(open(dumpFile,"wb"),-1).dump(self)
+  def countKnown(self):
+    charsSeen = sessnLen = charsSecure = newChars = 0
+    secure=[] ; insecure=[]
+    self.chars.sort(key=byPriority)
+    for c in self.chars:
+      if c.supposedToKnow:
+        charsSeen += 1
+        if c.priority>0: secure.append(c.hanzi)
+        else: insecure.append(c.hanzi)
+      else: newChars += 1
+      if newChars == 2: sessnLen = charsSeen
+    return charsSeen,sessnLen,secure,insecure
+
+try:
+  dumped = open(dumpFile,"rb")
+except IOError: dumped = None
+if dumped:
+  thechars = Unpickler(dumped).load()
+  dumped.close()
+  thechars.thisSession = []
+  if os.stat(tableFile).st_mtime > os.stat(dumpFile).st_mtime: thechars.readTable()
+  try:
+    if os.stat(knownFile).st_mtime > os.stat(dumpFile).st_mtime: thechars.readKnown()
+  except OSError: pass
+  try:
+    if os.stat(reviseFile).st_mtime > os.stat(dumpFile).st_mtime: thechars.readRevise()
+  except OSError: pass
+  updateSessionLen()
+else:
+  thechars=CharDbase()
+
+class RequestHandler(BaseHTTPRequestHandler):
+  def do_GET(self):
+    if self.path.startswith("/fav"):
+      self.send_response(404) ; self.end_headers() ; return
+    self.send_response(200)
+    self.send_header("Content-type","text/html; charset="+thechars.charset)
+    self.end_headers()
+    if self.path.startswith("/quit"):
+      r=thechars.processRequest("/status").decode(thechars.charset)
+      r=r[:r.index("<body>")+6]+"Server terminating."+r[r.index("<body>")+6:]
+      self.wfile.write(r.encode(thechars.charset))
+      thread.start_new_thread(terminate_server,()) # can terminate the server after this request
+    else: self.wfile.write(thechars.processRequest(self.path))
+    self.wfile.close() # needed or will wait for bkg speaking processes etc
+def do_session():
+  portNo = firstPortNo ; server = None
+  while portNo < firstPortNo+100:
+    try:
+      server = HTTPServer((listenAddr,portNo),RequestHandler)
+      break
+    except socket.error: portNo += 1
+  assert server, "Couldn't find a port to run the server on"
+  if ("win" not in sys.platform) and getoutput("which x-www-browser 2>/dev/null"): # (try to find x-www-browser, but not on windows/cygwin/darwin)
+    os.system("x-www-browser http://localhost:%d/%s &" % (portNo,str(random.random()))) # shouldn't need a sleep as should take a while to start anyway
+  else:
+    try:
+      import webbrowser
+      webbrowser.open_new("http://localhost:%d/%s" % (portNo,str(random.random())))
+    except ImportError: pass # fall through to command-line message
+  # Do this as well, in case that command failed:
+  print ("") ; print ("") ; print ("")
+  print ("Server running.  If a web browser does not appear automatically,")
+  print ("please start one yourself and go to")
+  print ("http://localhost:%d/%d" % (portNo,random.randint(1,99999)))
+  print ("") ; print ("") ; print ("")
+  server.serve_forever()
+
+if sys.argv[-1]=='--count':
+  x,y,sec,insec=thechars.countKnown()
+  print ("%d (of which %d seem secure)" % (x,len(sec)))
+elif sys.argv[-1]=='--show-secure':
+  x,y,sec,insec=thechars.countKnown()
+  print (" ".join(sec))
+elif sys.argv[-1]=='--show-wfx':
+  # the result of this might need charset conversion
+  # (and the conversion of charlearn scores to Wenlin histories is only approximate)
+  print ("""<?xml version='1.0'?>
+<!-- Wenlin Flashcard XML file -->
+<stack owner='Anonymous' reward='points'>""")
+  thechars.chars.sort(key=byPriority)
+  for c in thechars.chars:
+    print ("<card type='d'><question>"+c.hanzi+"</question>")
+    trials = "" ; score = 0
+    if c.supposedToKnow:
+        if c.priority < 0:
+            trials += "n"
+            p = priorityIfGotWrong
+            while p < c.priority:
+                trials += "y" ; score += 1
+                p /= 2
+        p = 1
+        while p < c.priority:
+            trials += "y" ; score += 1
+            p *= 2
+    print ("<history score='%d' trials='%d' recent='%s'></history></card>" % (score,len(trials),trials))
+  print ("</stack>")
+else: do_session()
--- a/charlearn/jp/characters.txt
+++ b/charlearn/jp/characters.txt
+charset: euc-jp
+あ a
+い i
+う u
+え e
+お o
+か ka
+き ki
+く ku
+け ke
+こ ko
+さ sa
+し shi
+す su
+せ se
+そ so
+た ta
+ち chi
+つ tsu
+て te
+と to
+な na
+に ni
+ぬ nu
+ね ne
+の no
+は ha
+ひ hi
+ふ fu
+へ he
+ほ ho
+ま ma
+み mi
+む mu
+め me
+も mo
+や ya
+ゆ yu
+よ yo
+ら ra
+り ri
+る ru
+れ re
+ろ ro
+わ wa
+を wo
+ん n
+ア a
+イ i
+ウ u
+エ e
+オ o
+カ ka
+キ ki
+ク ku
+ケ ke
+コ ko
+サ sa
+シ shi
+ス su
+セ se
+ソ so
+タ ta
+チ chi
+ツ tsu
+テ te
+ト to
+ナ na
+ニ ni
+ヌ nu
+ネ ne
+ノ no
+ハ ha
+ヒ hi
+フ fu
+ヘ he
+ホ ho
+マ ma
+ミ mi
+ム mu
+メ me
+モ mo
+ヤ ya
+ユ yu
+ヨ yo
+ラ ra
+リ ri
+ル ru
+レ re
+ロ ro
+ワ wa
+ヲ wo
+ン n
--- a/gradint-build/INSTALL.txt
+++ b/gradint-build/INSTALL.txt
-Installing Gradint on Linux systems
-----------------------------------
-
-Gradint does not need to be installed, it can
-just run from the current directory.
-
-If you do want to make a system-wide installation
-(for example if you want to make a package for a
-Linux distribution), I suggest doing the following
-as root:
-
-mkdir /usr/share/gradint
-cp gradint.py /usr/share/gradint/
-cd samples/utils
-for F in *.py *.sh; do
-  export DestFile=/usr/bin/gradint-$(echo $F|sed -e 's/\..*//')
-  cp $F $DestFile
-  chmod +x $DestFile
-done
-cd ../.. ; rm -rf samples/utils
-tar -zcf /usr/share/gradint/new-user.tgz \
-  advanced.txt settings.txt vocab.txt samples
-cat > /usr/bin/gradint <<EOF
-#!/bin/bash
-if ! test -e "$HOME/gradint"; then
-  echo "You will need some prompts and samples in your home directory."
-  echo "Is it OK to unpack an example into $HOME/gradint ?"
-  echo "Ctrl-C to quit or Enter to continue"
-  read
-  echo -n "Unpacking... "
-  mkdir "$HOME/gradint"
-  cd "$HOME/gradint"
-  tar -zxf /usr/share/gradint/new-user.tgz
-  echo "done."
-  echo "Please check the contents of $HOME/gradint"
-  echo "especially the README files."
-  echo "Then you can run gradint again."
-  exit
-fi
-cd "$HOME/gradint"
-python /usr/share/gradint/gradint.py $@
-EOF
-chmod +x /usr/bin/gradint
-
-For a distribution you might also have to write
-man pages and tidy up the help text etc.
-
-Depends: python + a sound player (e.g. alsa-utils)
-Recommends: python-tk python-tksnack sox libsox-fmt-all madplay
--- a/gradint-build/PocketPC/espeak.bgz
+++ b/gradint-build/PocketPC/espeak.bgz
--- a/gradint-build/PocketPC/tkinter.bgz
+++ b/gradint-build/PocketPC/tkinter.bgz
--- a/gradint-build/hanzi-prompts/sayAgain_zh.txt
+++ b/gradint-build/hanzi-prompts/sayAgain_zh.txt
-在说一次
--- a/gradint-build/mac/start-gradint.app/AudioRecorder.zip
+++ b/gradint-build/mac/start-gradint.app/AudioRecorder.zip
--- a/gradint-build/mac/start-gradint.app/Contents/MacOS/start-gradint
+++ b/gradint-build/mac/start-gradint.app/Contents/MacOS/start-gradint
-#!/bin/bash
-if test -e /usr/lib/tkConfig.sh || test -e /usr/local/lib/tkConfig.sh; then
-# run using only the Tk windows:
-cd "$(echo $0 | sed -e 's|start-gradint.app/Contents/MacOS/start-gradint.*$||')"
-exec pythonw gradint.py
-else
-# run in Terminal:
-open -a Terminal.app "$(echo $0 | sed -e 's|start-gradint.app/Contents/MacOS/start-gradint.*$|gradint.py|')"
-fi
--- a/gradint-build/mac/start-gradint.app/espeak-OSX.zip
+++ b/gradint-build/mac/start-gradint.app/espeak-OSX.zip
--- a/gradint-build/mac/start-gradint.app/sox
+++ b/gradint-build/mac/start-gradint.app/sox
--- a/gradint-build/partials-cache.bin
+++ b/gradint-build/partials-cache.bin
-(]q}q}q]qt.
\ No newline at end of file
--- a/gradint-build/riscos.zip
+++ b/gradint-build/riscos.zip
--- a/gradint-build/samples/prompts/en_en.txt
+++ b/gradint-build/samples/prompts/en_en.txt
-English
--- a/gradint-build/samples/prompts/longpause_zh.txt
+++ b/gradint-build/samples/prompts/longpause_zh.txt
-xian4zai4 wo3men5 yao4 deng3, ran2hou4 fu4xi2. zai4 di4 yi1 ke4 wo3men5 hai2 mei2you3 xue2xi2 hen3 duo1 ci2yu3 suo3yi3 ting2dun4 bi3jiao4 chang2. dan4shi4 zai4 wei4lai2 de5 ke4 wo3men5 mei2you3 zhe4yang4 chang2 de5 ting2dun4.
--- a/gradint-build/samples/utils/equalise.py
+++ b/gradint-build/samples/utils/equalise.py
-#!/usr/bin/env python
-
-import os,commands,sys
-
-def equalise():
-    oldDir=os.getcwd()
-    for l in os.listdir(oldDir):
-        isDir = 0
-        try:
-            os.chdir(l)
-            isDir=1
-        except: pass
-        if isDir:
-            equalise()
-            os.chdir(oldDir)
-        elif l.endswith("wav"):
-            vol = commands.getoutput('sox "%s" t.nul stat' % (l,)).split("\n")[-1].split()[-1]
-            os.system('sox -t wav - -t wav __adjusted vol %s < "%s"' % (vol,l))
-            os.remove(l) ; os.rename('__adjusted',l)
-            try: os.remove('t.nul')
-            except: pass
-
-sys.stdout.write("""WARNING - Use this script ONLY if there is a large
-perceptual variation in the volume levels.  Works on all
-samples in current directory and subdirectories.  Really go
-ahead?
-Press Ctrl-C to cancel or Enter to continue\n""")
-raw_input()
-equalise()
--- a/gradint-build/samples/utils/filemove.sh
+++ b/gradint-build/samples/utils/filemove.sh
-#!/bin/bash
-
-export SamplesDir="samples/" # Must include trailing /
-export ProgressFile="progress.txt"
-if ! test -e $SamplesDir; then echo "Error: $SamplesDir does not exist (are you in the right directory?)"; exit 1; fi
-if ! test -e $ProgressFile; then echo "Error: $ProgressFile does not exist (are you in the right directory?)";exit 1;fi
-
-if test "a$1" == a; then
-  echo "Usage: $0 oldname newname"
-  echo "oldname and newname are relative to $SamplesDir, and can be prefixes of several files/directories"
-  echo "Moves files from one samples directory to another, keeping $ProgressFile adjusted.  Make sure gradint is not running (including waiting for start) when in use."
-  exit 1
-fi
-
-export Src=$1
-export Dest=$2
-
-find $SamplesDir -follow -type f | grep ^$SamplesDir$Src | \
-while true; do read || break;
-  export SrcFile=$REPLY
-  export DestFile=$(echo $SrcFile|sed -e "s|^$SamplesDir$Src|$SamplesDir$Dest|")
-  mkdir -p $DestFile ; rmdir $DestFile # ensure parent dirs exist before moving file across
-  mv -b $SrcFile $DestFile
-  export SrcFile=$(echo $SrcFile|sed -e "s|$SamplesDir||")
-  export DestFile=$(echo $DestFile|sed -e "s|$SamplesDir||")
-  gzip -fdc $ProgressFile | sed -e "s|$SrcFile|$DestFile|g" > /tmp/newprog ; mv /tmp/newprog $ProgressFile # (ideally should re-write to batch these changes, but leave like this for now in case need to recover from unfinished operation)
-done
-
-rmdir $SamplesDir$Src 2>/dev/null >/dev/null # IF it's a directory
--- a/gradint-build/samples/utils/log2opl.py
+++ b/gradint-build/samples/utils/log2opl.py
-# log2opl.py (c) 2008 Silas S. Brown.  License: GPL.
-# This is a Python script to translate log.txt into an OPL
-# program for a palmtop or smartphone running EPOC.  The
-# resulting file lesson.opl needs to be imported into Program
-# and translated.  The program will show the log of the lesson
-# in real time, providing a countdown for each item.  This
-# is for use as a speaker's cue when demonstrating the
-# graduated-interval method in an extemporaneous talk (works
-# best with a lesson 1 so there are plenty of gaps to speak in).
-# Make sure you're using vocab.txt or meaningful filenames.
-# It may also be useful to set partialsDirectory=None
-
-# If you have a PDA that can run Gradint by itself, then
-# see ask_teacherMode in advanced.txt for a more flexible approach.
-
-o=open("lesson.opl","wb")
-o.write("PROC m:\r\nfont 8,9\r\n")
-curS = -5 # allow lead-in
-for l in open("log.txt"):
-  m,s = l.split()[0].split(":") ; m,s = int(m),int(s)
-  s=s+60*m
-  o.write("a:("+str(s-curS)+",\""+" ".join(l.split()[1:])+"\")\r\n")
-  curS = s
-
-o.write('PRINT "Finished.":GET\r\nENDP\r\nPROC a:(secs%,a$)\r\nLOCAL i%\r\nPRINT "   ";a$+chr$(13),\r\ni%=secs%\r\nWHILE i%\r\nprint CHR$(13)+GEN$(i%,2)+" ";\r\nPAUSE 20\r\ni%=i%-1\r\nENDWH\r\nPRINT CHR$(13)+" "\r\nENDP\r\n')
--- a/gradint-build/samples/utils/make-smaller
+++ b/gradint-build/samples/utils/make-smaller
-
-
-Note: Now that gradint supports MP3 input, you
-can replace your WAVs with MP3s instead of
-following the instructions here.  See samples/ReadmeMP3.txt
-for notes on getting this to work.  You can
-update all progress.txt's with the change like
-this:
-
-for N in $(find . -name progress.txt); do sed -e "s/\.wav/.mp3/g" < $N > n ; mv n $N; done
-
-and do the encoding itself (in-place) with:
-
-for N in $(find samples|grep wav$); do lame --cbr -b 48 -h -m m $N $(echo $N|sed -e s/.wav$/.mp3) && rm $N; done
-
---------------------
-
-To squash down to 128kbps (16k bytes/s), be in the directory above 'samples' and do:
-
-for Dir in $(find samples/ -type d); do mkdir -p "compressed-$Dir"; done; for F in $(find samples/ -type f|grep wav$); do if test "$F" -nt "compressed-$F"; then sox "$F" -r 16000 -c 1 -b -u test.wav; if test $(wc -c test.wav|sed -e 's/ .*//') -lt $(wc -c "$F"|sed -e 's/ .*//'); then mv test.wav "compressed-$F"; else rm test.wav; cp -p "$F" "compressed-$F"; fi; fi; done; for F in $(find samples/|grep -v wav$); do cp -up "$F" "compressed-$F" 2>/dev/null; done
-
-The result will be in a directory called compressed-samples.  Any samples that were already smaller than the "compressed" versions, or anything that is not a .wav file, will simply be copied into compressed-samples uncompressed.  Any files already in compressed-samples will not be touched unless the "samples" equivalent is newer.  Additionally you may want to delete any samples in compressed-samples that are no longer in samples, in which case do this as well:
-
-for F in $(find compressed-samples/ -type f); do if ! test -e $(echo "$F"|sed -e s/compressed-//); then rm "$F"; fi; done
-
-To compress in place (erasing original files), go into samples directory and do:
-
-for F in $(find . -type f|grep wav$); do sox "$F" -r 16000 -c 1 -b -u test.wav; if test $(wc -c test.wav|sed -e 's/ .*//') -lt $(wc -c "$F"|sed -e 's/ .*//'); then mv test.wav "$F"; else rm test.wav; fi; done
-
-On some systems, 8-bit playback is noisy (e.g. because volume adjustments cause too many of those 8 bits to be lost); if you can't work around this then you could use 16-bit by deleting '-b -u' from the above commands, but the result will be twice as big.
--- a/gradint-build/samples/utils/splitter.py
+++ b/gradint-build/samples/utils/splitter.py
-#!/usr/bin/env python
-
-# Program to support splitting a long sound file into
-# several little ones.
-
-# Needs 'sox' - if Windows, download from
-# sox.sourceforge.net
-# (e.g. http://prdownloads.sourceforge.net/sox/sox12172.zip
-# - note gives a "select a mirror" dialogue) and put sox.exe
-# in the same directory or on the path
-
-# -----------------------
-
-# lowpri: 2nd sort key by length ? (only matters if adding a lot of new words & phrases at same time)
-
-import time,os,sndhdr,sys
-try: import winsound
-except: winsound=None
-macsound = (sys.platform.find("mac")>=0 or sys.platform.find("darwin")>=0)
-if macsound: sys.stderr.write("Warning: You need to have qtplay (from gradint or wherever) in your PATH for this to work\n")
-
-def rawcut(allData,fromSecs,toSecs,rate=22050,bits=16,channels=1):
-    return allData[secbyte(fromSecs,rate,channels,bits):secbyte(toSecs,rate,channels,bits)]
-def secbyte(sec,rate,channels,bits):
-    # Convert a time in seconds to a byte offset in the raw
-    # data
-    # Note: Result MUST be a multiple of bytesPerSample
-    # 'sec' is not necessarily an integer
-    sampleNo = int(0.5+sec*rate) # nearest integer sample no
-    bytesPerSample = channels*int(bits/8)
-    return sampleNo * bytesPerSample
-
-def readTimings(langs):
-    if macsound: time.sleep(1) # OS X hack due to qtplay delay (1sec on an Intel 2GHz Core Duo running OSX 10.5)
-    sys.stdout.write("Starting clock\n")
-    # Now using time.time() rather than time.clock()
-    # due to clock units confusion
-    # Just have to hope the system is accurate enough
-    offset = time.time()
-    ret = [] ; ip=''
-    start = offset
-    while not ip=='q':
-        ip = raw_input(langs[len(ret)%len(langs)]+": ")
-        t = time.time()
-        if ip=="c" and ret: ret[-1]=(ret[-1][0],t-offset)
-        elif not ip: ret.append((start-offset,t-offset))
-        start = t
-    sys.stdout.write("Finishing at %f seconds\n" % (t-offset,))
-    return ret
-
-def instructions():
-    sys.stdout.write("Press Return between samples\n")
-    sys.stdout.write("Enter 'c' to change the time of the last Return to this one\n")
-    sys.stdout.write("Enter 'x' to omit this bit (e.g. silence)\n")
-    sys.stdout.write("Enter 'q' when done (AFTER stopping last sample)\n")
-    sys.stdout.write("PRESS RETURN TO START\n")
-    raw_input()
-
-def getParams():
-    wavFile=raw_input("Enter filename of main recording: ")
-    header = sndhdr.what(wavFile)
-    if not header:
-        sys.stdout.write("Problem opening that file\n")
-        return None
-    (wtype,rate,channels,wframes,bits) = header
-    sys.stdout.write("WAV file is %d-bit\n" % (bits,))
-    if bits==8: soxBits="-b -u" # unsigned
-    elif bits==16: soxBits="-w -s" # signed
-    elif bits==32: soxBits="-l -s" # signed
-    else:
-        sys.stdout.write("Unsupported bits per sample '%s'\n" % (bits,))
-        return None
-    soxParams = "-t raw %s -r %d -c %d" % (soxBits,rate,channels)
-    rawFile = wavFile + ".raw"
-    convertToRaw(soxParams,wavFile,rawFile)
-    lang1=lang2=None
-    while not lang1: lang1=raw_input("Enter first language on recording (e.g. zh): ")
-    interleaved=input("Are two languages interleaved? (1/0): ") # (horrible hack)
-    if interleaved:
-        while not lang2: lang2=raw_input("Enter second language on recording (e.g. en): ")
-    else:
-        lang2=lang1
-        sys.stdout.write("OK - should run this program again for other language's recording\n")
-    return soxParams,wavFile,rawFile,lang1,lang2,rate,bits,channels
-
-def convertToWav(soxParams,rawFile,wavFile):
-    os.system("sox %s \"%s\" \"%s\"" % (soxParams,rawFile,wavFile))
-def convertToRaw(soxParams,wavFile,rawFile):
-    os.system("sox \"%s\" %s \"%s\"" % (wavFile,soxParams,rawFile))
-
-def main():
-    tuple=None
-    while not tuple: tuple=getParams()
-    soxParams,wavFile,rawFile,lang1,lang2,rate,bits,channels = tuple
-    mainLoop(soxParams,wavFile,rawFile,lang1,lang2,rate,bits,channels)
-    os.unlink(rawFile)
-
-# Set lang1 & lang2 equal if not interleaving
-def mainLoop(soxParams,wavFile,rawFile,lang1="zh",lang2="en",rate=22050,bits=16,channels=1):
-    allData=open(rawFile,"rb").read()
-    open(wavFile,"rb").read() # to cache before starting clock and 'play' (especailly because just loaded the separate raw data) (could also play from raw data if got sox)
-    instructions()
-    # Start sound asynchronously - hope for the best that
-    # the first clock reading is near enough to the actual
-    # start of the sound
-    if winsound: winsound.PlaySound(wavFile,winsound.SND_FILENAME | winsound.SND_ASYNC)
-    elif macsound: os.spawnlp(os.P_NOWAIT,"qtplay","qtplay",wavFile)
-    # else: os.spawnlp(os.P_NOWAIT,"play","play",wavFile)
-    # Problem: What if 'play' o/p's at slightly less than the correct rate - will think the cuts are further on in the file than they really are.  (e.g. 16000Hz on a z61p Cygwin, "time play" shows it takes slightly longer than sox thinks the file is)
-    # Better convert to 44100 just to make sure.
-    else: os.system("sox \"%s\" -r 44100 -t wav - | play -t wav - &" % wavFile)
-    # Read timings, cut up, and write out the samples
-    samples = [ rawcut(allData,s,f,rate,bits,channels) for s,f in readTimings([lang1,lang2]) ]
-    formatString = "%0"+str(len(str(int(len(samples)/(2-(lang2==lang1))-1))))+"d_%s"
-    # (pad with 0s as necessary so it's in order)
-    # (len(samples)-1 gives highest number, so len(str(l..))
-    # gives number of digits in it)
-    for i in range(len(samples)):
-        if i%2: lang=lang2
-        else: lang=lang1
-        if lang1==lang2: c=i
-        else: c=int(i/2)
-        fname = formatString % (c,lang)
-        f=open(fname, "wb")
-        f.write(samples[i])
-        f.close()
-        convertToWav(soxParams,fname,fname+".wav")
-        os.unlink(fname)
-        sys.stdout.write("Written %s.wav\n" % (fname,))
-
-if __name__=="__main__":
-    main()
--- a/gradint-build/samples/utils/strip0.py
+++ b/gradint-build/samples/utils/strip0.py
-#!/usr/bin/env python
-
-# Program to strip any silence from the beginning/end of a
-# sound file (must be real 0-bytes not background noise)
-
-# (This is useful as a "splitter" post-processor when
-# getting samples from CD-ROMs e.g. "Colloquial Chinese" -
-# don't use audacity here because some versions of audacity
-# distort 8-bit audio files)
-
-# Needs 'sox' + splitter
-
-from splitter import *
-
-for wavFile in sys.argv[1:]:
-    # Figure out sox parameters
-    header = sndhdr.what(wavFile)
-    if not header: raise IOError("Problem opening %s" % (wavFile,))
-    (wtype,rate,channels,wframes,bits) = header
-    if bits==8: soxBits="-b -u" # unsigned
-    elif bits==16: soxBits="-w -s" # signed
-    elif bits==32: soxBits="-l -s" # signed
-    else: raise Exception("Unsupported bits per sample")
-    soxParams = "-t raw %s -r %d -c %d" % (soxBits,rate,channels)
-    rawFile = wavFile + ".raw"
-    # Now ready to convert to raw, and read it in
-    convertToRaw(soxParams,wavFile,rawFile)
-    o=open(rawFile,"rb")
-    allData=o.read()
-    o.close()
-    # Now figure out how many samples we can take out
-    bytesPerSample = channels*int(bits/8)
-    if bytesPerSample==1: silenceVal=chr(128)
-    else: silenceVal=chr(0)
-    startIdx = 0
-    while startIdx < len(allData):
-        if not allData[startIdx]==silenceVal: break
-        startIdx = startIdx + 1
-    startIdx = int(startIdx/bytesPerSample) * bytesPerSample
-    endIdx = len(allData)
-    while endIdx:
-        if not allData[endIdx-1]==silenceVal: break
-        endIdx = endIdx - 1
-    endIdx = endIdx - len(allData) # put it into -ve notatn
-    endIdx = int(endIdx/bytesPerSample) * bytesPerSample
-    endIdx = endIdx + len(allData) # avoid 0
-    sys.stderr.write("Debugger: Clipping %s to %d:%d\n" % (wavFile,startIdx,endIdx))
-    allData = allData[startIdx:endIdx]
-    # Write back the file, and convert it back to wav
-    o=open(rawFile,"wb")
-    o.write(allData)
-    o.close()
-    convertToWav(soxParams,rawFile,wavFile)
-    # Clean up
-    os.unlink(rawFile)
No results found