From 00de1ad0f904e05e9299cd2230fd3bc47dcefe2c Mon Sep 17 00:00:00 2001
From: "Silas S. Brown" <ssb22@cam.ac.uk>
Date: Mon, 22 Jan 2018 04:13:46 -0600
Subject: [PATCH] Update Annotator Generator

---
 annogen.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/annogen.py b/annogen.py
index d3b9f1a..511a5c5 100755
--- a/annogen.py
+++ b/annogen.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-program_name = "Annotator Generator v0.6286 (c) 2012-18 Silas S. Brown"
+program_name = "Annotator Generator v0.6287 (c) 2012-18 Silas S. Brown"
 
 # See http://people.ds.cam.ac.uk/ssb22/adjuster/annogen.html
 
@@ -183,6 +183,14 @@ parser.add_option("-j","--javascript",
                   action="store_true",default=False,
                   help="Instead of generating C code, generate JavaScript.  This might be useful if you want to run an annotator on a device that has a JS interpreter but doesn't let you run native code.  The JS will be table-driven to make it load faster (and --no-summary will also be set).  See comments at the start for usage.") # but it's better to use the C version if you're in an environment where 'standard input' makes sense
 
+parser.add_option("-8","--js-octal",
+                  action="store_true",default=False,
+                  help="When generating a Javascript annotator, use octal instead of hexadecimal codes in the data string when doing so would save space. This does not comply with ECMAScript 5 and may give errors in its strict mode.")
+
+parser.add_option("-9","--ignore-ie8",
+                  action="store_true",default=False,
+                  help="When generating a Javascript annotator, do not make it backward-compatible with Microsoft Internet Explorer 8 and below. This may save a few bytes.")
+
 parser.add_option("-Y","--python",
                   action="store_true",default=False,
                   help="Instead of generating C code, generate a Python module.  Similar to the Javascript option, this is for when you can't run native code, and it is table-driven for fast loading.")
@@ -3128,7 +3136,12 @@ def c_escapeRawBytes(s): # as it won't be valid outcode; don't want to crash any
   if s.endswith(chr(0)): s=s[:-1] # as the C compiler will add a terminating 0 anyway
   return re.sub(r"(?<!\\)((?:\\\\)*\\x..)([0-9a-fA-F])",r'\1""\2',zapTrigraphs(s.replace('\\','\\\\').decode('unicode_escape').encode('unicode_escape').replace('"','\\"')))
 
-def js_escapeRawBytes(s): return re.sub("[\x00-\x1f\x7f-\xff]",lambda m:"\\x%02x"%ord(m.group()),re.sub(chr(0)+r"(?![0-9])",r"\\0",s.replace("\\",r"\\").replace('"',r'\"').replace(chr(8),r"\b").replace(chr(9),r"\t").replace(chr(10),r"\n").replace(chr(12),r"\f"))) # TODO: could also convert chars 1-7 (without following digits) to single-digit octal and 11 + 13-31 (without following digits) to double-digit octal, but deprecated in ECMAScript 5 (errors in strict mode); 11 = \v but not in MSIE 8 or below
+def js_escapeRawBytes(s):
+  s = s.replace("\\",r"\\").replace('"',r'\"').replace(chr(8),r"\b").replace(chr(9),r"\t").replace(chr(10),r"\n").replace(chr(12),r"\f").replace(chr(13),r"\r")
+  if ignore_ie8: s = s.replace(chr(11),r"\v")
+  if js_octal: s = re.sub("[\x00-\x1f](?![0-9])",lambda m:r"\%o"%ord(m.group()),s)
+  else: s = re.sub(chr(0)+r"(?![0-9])",r"\0",s) # \0 is allowed even if not js_octal
+  return re.sub("[\x00-\x1f\x7f-\xff]",lambda m:r"\x%02x"%ord(m.group()),s)
 
 def c_length(unistr): return len(unistr.encode(outcode))
 
-- 
GitLab