From fbe44f96f8e24aaf390ab258d72f5fc795bdfb20 Mon Sep 17 00:00:00 2001
From: "Silas S. Brown" <ssb22@cam.ac.uk>
Date: Wed, 6 May 2015 16:34:14 +0000
Subject: [PATCH] Update adjuster/annogen/termlayout

git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/adjuster@2188 29193198-4895-4776-b068-10539e920549
---
 annogen.py | 58 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/annogen.py b/annogen.py
index b7ff13a..336cfa4 100755
--- a/annogen.py
+++ b/annogen.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-program_name = "Annotator Generator v0.588 (c) 2012-15 Silas S. Brown"
+program_name = "Annotator Generator v0.589 (c) 2012-15 Silas S. Brown"
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -152,6 +152,10 @@ parser.add_option("--data-driven",
                   action="store_true",default=False,
                   help="Generate a program that works by interpreting embedded data tables for comparisons, instead of writing these as code.  This can take some load off the compiler (so try it if you get errors like clang's \"section too large\"), as well as compiling faster and reducing the resulting binary's RAM size (by 35-40% is typical), at the expense of a small reduction in execution speed.  Javascript and Python output is always data-driven anyway.") # If the resulting binary is compressed (e.g. in an APK), its compressed size will likely not change much (same information content), so I'm specifically saying "RAM size" i.e. when decompressed
 
+parser.add_option("--zlib",
+                  action="store_true",default=False,
+                  help="Enable --data-driven and compress the embedded data table using zlib, and include code to call zlib to decompress it on load.  Useful if the runtime machine has the zlib library and you need to save disk space but not RAM (the decompressed table is stored separately in RAM, unlike --compress which, although giving less compression, at least works 'in place').  Once --zlib is in use, specifying --compress too will typically give an additional disk space saving of less than 1% (and a runtime RAM saving that's greater but more than offset by zlib's extraction RAM).") # and additional_compact_opcodes typically still helps no matter what the other options are
+
 parser.add_option("--windows-clipboard",
                   action="store_true",default=False,
                   help="Include C code to read the clipboard on Windows or Windows Mobile and to write an annotated HTML file and launch a browser, instead of using the default cross-platform command-line C wrapper.  See the start of the generated C file for instructions on how to compile for Windows or Windows Mobile.")
@@ -285,6 +289,11 @@ elif ios:
   if c_filename.endswith(".c"): c_filename = c_filename[:-2]+".m" # (if the instructions are followed, it'll be ViewController.m, but no need to enforce that here)
 elif ndk:
   if not outcode=="utf-8": errExit("outcode must be utf-8 when using --ndk")
+if zlib:
+  del zlib ; import zlib ; data_driven = True
+  if javascript: errExit("--zlib is not yet implemented in Javascript") # C or Python for now
+  if windows_clipboard: sys.stderr.write("WARNING: --zlib with --windows-clipboard is inadvisable because ZLib is not typically present on Windows platforms. If you really want it, you'll need to figure out the compiler options and library setup for it.")
+  if ios: sys.stderr.write("WARNING: --zlib with --ios will require -lz to be added to the linker options in XCode, and I don't have instructions for that (it probably differs across XCode versions)\n")
 if data_driven and (c_sharp or java or golang): errExit("--data-driven is not yet implemented in C#, Java or Go")
 elif javascript or python: data_driven = True
 additional_compact_opcodes = data_driven and not (python or javascript) # currently implemented only in the C version of the data-driven runtime
@@ -502,7 +511,7 @@ if compress:
       totSaved += bSaved
       sys.stderr.write("Compress: %d/%d tokens, %d bytes saved%s" % (len(orig_tokens)-len(tokens),len(orig_tokens),totSaved,clear_eol))
     squashStrings = "done"
-    while len(pairs) > 255 and pairs[-1]==chr(0): pairs = pairs[:-1] # 255 not 256 because C will add a chr(0) anyway (however the compression isn't working if it's < 256)
+    while len(pairs) > 256 and pairs[-1]==chr(0): pairs = pairs[:-1]
     sys.stderr.write("\n")
     if totSaved < len(pairs)+50: sys.stderr.write("Warning: --compress on this data made it bigger!  Consider dropping --compress\n") # 50 as rough guess for OutWriteDecompress binary (probably about 12 instructions at 4+ bytes each)
     return c_escapeRawBytes("".join(pairs))
@@ -598,6 +607,7 @@ cat > jni/annotator.c <<"EOF"
 #include <stdlib.h>
 #include <jni.h>
 """
+  if zlib: c_preamble=c_preamble.replace("LOCAL_PATH","LOCAL_LDLIBS := -lz\nLOCAL_PATH",1)
   c_defs = r"""static const char *readPtr, *writePtr, *startPtr;
 static char *outBytes;
 static size_t outWriteLen,outWritePtr;
@@ -792,6 +802,9 @@ enum {
   c_switch3 = "if (annotation_mode == ruby_markup) {"
   c_switch4 = "} else o(numBytes,annot);"
 
+if data_driven and not ndk: c_preamble += '#include <stdlib.h>\n' # for malloc (ndk includes it anyway, above)
+if zlib: c_preamble += '#include "zlib.h"\n'
+
 if ndk: c_start = ""
 # line below: just say 'code generated by', not 'C code' as it might also be Objective-C (if ios is set; TODO: check and say which one?)
 else: c_start = "/* -*- coding: "+outcode+" -*- */\n/* code generated by "+program_name[:program_name.index("(c)")].strip()+" */\n"
@@ -824,8 +837,9 @@ static void o2(int numBytes,const char *annot,const char *title) {"""+c_switch3+
 if not compress: c_start = c_start.replace("OutWriteDecompress","OutWriteStr")
 
 c_end = r"""
-void matchAll() {
-  while(!FINISHED) {
+void matchAll() {"""
+if zlib: c_end += "  if(!data) init();\n"
+c_end += r"""  while(!FINISHED) {
     POSTYPE oldPos=THEPOS;
     topLevelMatch();
     if (oldPos==THEPOS) { needSpace=0; OutWriteByte(NEXTBYTE); COPY_BYTE_SKIP; }
@@ -1699,7 +1713,7 @@ class BytecodeAssembler:
         try:
           lDic = {} # the label dictionary
           for P in [1,2,3]:
-            labelMove = 0 # amount future labels have to move by, due to instructions taking longer than we thought on pass 2
+            labelMove = 0 # amount future labels have to move by, due to instructions taking longer than we thought on pass 2.  NB: this labelMove logic relies on the assumption that, if a short-forward-jump is confirmed in pass 2, then the instructions it jumps over will not have to expand in that pass (otherwise it's possible that the label it jumps to will be moved out of range and the instruction will have to expand on pass 3, causing labels to move on pass 3 which would necessitate another pass; assert should catch this). Assumption should hold in the code we generate ('nested switch' stuff: a 'break' from an inner switch can't possibly refer to a label that occurs after the one referred to by 'break's in the outer switch before that inner switch started, hence if the outer switch is confirmed to be within range of its end label then the inner switch must necessarily be in range of ITS end label) but this might not hold if the generator were to start to emit spaghetti state jumps
             compacted = 0
             labels_seen_this_pass = set() # to avoid backward jumps (as we can't just apply labelMove to them and see if they're behind the program counter, since need to know if they're backward before knowing if labelMove applies)
             r = [chr(numBytes)] ; ll = 1
@@ -1767,9 +1781,15 @@ class BytecodeAssembler:
               if not additional_compact_opcodes: break # need only 2 passes if have fixed-length addressing
             else: assert not labelMove, "Labels move only on pass 2"
             sys.stderr.write('.')
-          if additional_compact_opcodes: sys.stderr.write("%d bytes (opcode compaction saved %d)\n" % (ll,compacted))
+          r = "".join(r)
+          if zlib:
+            self.origLen = ll # needed for efficient malloc in the C code later
+            r = zlib.compress(r,9)
+            if additional_compact_opcodes: sys.stderr.write("%d bytes (zlib compressed from %d after opcode compaction saved %d)\n" % (len(r),ll,compacted))
+            else: sys.stderr.write("%d bytes (zlib compressed from %d)\n" % (len(r),ll))
+          elif additional_compact_opcodes: sys.stderr.write("%d bytes (opcode compaction saved %d)\n" % (ll,compacted))
           else: sys.stderr.write("%d bytes\n" % ll)
-          return "".join(r)
+          return r
         except TooNarrow: pass
     assert 0, "can't even assemble it with 255-byte addressing !?!"
 
@@ -2024,6 +2044,16 @@ def main():
 if __name__=="__main__": main()
 """
 
+c_zlib = r"""static unsigned char *data=NULL;
+static void init() {
+  z_stream s; memset(&s,0,sizeof(s));
+  s.next_in=origData; s.avail_in=%%ZLIBLEN%%;
+  data=malloc(%%ORIGLEN%%); // TODO: check non-NULL
+  s.next_out=data; s.avail_out=%%ORIGLEN%%;
+  inflateInit(&s); inflate(&s, Z_NO_FLUSH); // TODO: check for memory and data-corruption errors
+  inflateEnd(&s);
+}
+"""
 c_datadrive = r"""
 static unsigned char *dPtr; static int addrLen;
 
@@ -2755,6 +2785,7 @@ def c_escape(unistr):
 def zapTrigraphs(x): return re.sub(r"\?\?([=/'()<>!-])",r'?""?\1',x) # to get rid of trigraph warnings, TODO might get a marginal efficiency increase if do it to the entire C file at once instead)
 
 def c_escapeRawBytes(s): # as it won't be valid outcode; don't want to crash any editors/viewers of the C file
+  if s.endswith(chr(0)): s=s[:-1] # as the C compiler will add a terminating 0 anyway
   return re.sub(r"(?<!\\)((?:\\\\)*\\x..)([0-9a-fA-F])",r'\1""\2',zapTrigraphs(s.replace('\\','\\\\').decode('unicode_escape').encode('unicode_escape').replace('"','\\"')))
 
 def c_length(unistr): return len(unistr.encode(outcode))
@@ -2906,6 +2937,7 @@ def outputParser(rulesAndConds):
       b = BytecodeAssembler()
       b.addActionDictSwitch(byteSeq_to_action_dict,False)
       print "data=",repr(b.link()) ; del b
+      if zlib: print "import zlib; data=zlib.decompress(data)"
       print py_end
       return
     if java: start = java_src.replace("%%JPACKAGE%%",jPackage)
@@ -2916,8 +2948,12 @@ def outputParser(rulesAndConds):
     if data_driven:
       b = BytecodeAssembler()
       b.addActionDictSwitch(byteSeq_to_action_dict,False)
-      print "static unsigned char data[]=\""+c_escapeRawBytes(b.link())+'\";' ; del b
-      print c_datadrive
+      ddrivn = b.link()
+      if zlib: data = "origData"
+      else: data = "data"
+      print "static unsigned char "+data+"[]=\""+c_escapeRawBytes(ddrivn)+'\";'
+      if zlib: print c_zlib.replace('%%ORIGLEN%%',str(b.origLen)).replace('%%ZLIBLEN%%',str(len(ddrivn))) # rather than using sizeof() because we might or might not want to include the compiler's terminating nul byte
+      del b,ddrivn ; print c_datadrive
     else:
       subFuncL = []
       ret = stringSwitch(byteSeq_to_action_dict,subFuncL)
@@ -3061,6 +3097,8 @@ if c_filename:
     sys.stdout.close()
     sys.stdout = stdout_old # in case running with python -m cProfile or something
     if c_compiler:
-      cmd = c_compiler+" \""+c_filename+"\"" # (the -o option is part of c_compiler)
+      cmd = c_compiler # should include any -o option
+      if zlib: cmd += " -lz" # TODO: is this always correct on all platforms? (although user can always simply redirect the C to a file and compile separately)
+      cmd += " \""+c_filename+"\""
       sys.stderr.write(cmd+"\n")
       sys.exit(os.system(cmd))
-- 
GitLab