From afe008df95ec15f33bc5664ae8dd614a45a4e4c5 Mon Sep 17 00:00:00 2001
From: "Silas S. Brown" <ssb22@cam.ac.uk>
Date: Mon, 17 Oct 2016 15:22:00 +0000
Subject: [PATCH] Update Annotator Generator, Web Adjuster

git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/adjuster@2600 29193198-4895-4776-b068-10539e920549
---
 adjuster.py | 4 ++--
 annogen.py  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/adjuster.py b/adjuster.py
index dafb83c..33785de 100755
--- a/adjuster.py
+++ b/adjuster.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-program_name = "Web Adjuster v0.21 (c) 2012-16 Silas S. Brown"
+program_name = "Web Adjuster v0.22 (c) 2012-16 Silas S. Brown"
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -1524,7 +1524,7 @@ document.forms[0].i.focus()
             codeTextList.append(i.encode('utf-8'))
         def callback(out,err):
             self.add_header("Content-Type","application/json")
-            self.write(json.dumps([i.decode('utf-8') for i in out[1:].split(chr(0))]))
+            self.write(json.dumps([i.decode('utf-8','replace') for i in out[1:].split(chr(0))])) # 'replace' here because we don't want utf-8 errors to time-out the entire request (although hopefully the filter WON'T produce utf-8 errors...)
             self.finish()
         runFilterOnText(self.getHtmlFilter(filterNo),codeTextList,callback)
 
diff --git a/annogen.py b/annogen.py
index 4f700dd..ce0f7d0 100755
--- a/annogen.py
+++ b/annogen.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-program_name = "Annotator Generator v0.599 (c) 2012-16 Silas S. Brown"
+program_name = "Annotator Generator v0.6 (c) 2012-16 Silas S. Brown"
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -3049,6 +3049,7 @@ def outputParser(rulesAndConds):
     for l in read_manual_rules(): addRule(l,[],byteSeq_to_action_dict,True)
     write_glossMiss(glossMiss)
     longest_rule_len = max(len(b) for b in byteSeq_to_action_dict.iterkeys())
+    longest_rule_len += ybytes_max # because buffer len is 2*longest_rule_len, we shift half of it when (readPtr-bufStart +ybytes >= bufLen) and we don't want this shift to happen when writePtr-bufStart = Half_Bufsize-1 and readPtr = writePtr + Half_Bufsize-1 (TODO: could we get away with max(0,ybytes_max-1) instead? but check how this interacts with the line below; things should be safe as they are now).  This line's correction was missing in Annogen v0.599 and below, which could therefore occasionally emit code that, when running from stdin, occasionally replaced one of the document's bytes with an undefined byte (usually 0) while emitting correct annotation for the original byte.  (This could result in bad UTF-8 that crashed the bookmarklet feature of Web Adjuster v0.21 and below.)
     longest_rule_len = max(ybytes_max*2, longest_rule_len) # make sure the half-bufsize is at least ybytes_max*2, so that a read-ahead when pos is ybytes_max from the end, resulting in a shift back to the 1st half of the buffer, will still leave ybytes_max from the beginning, so yar() can look ybytes_max-wide in both directions
     if javascript:
       print js_start
-- 
GitLab