From aa180a20fb2b51bdcd62e6283509b564e4f39766 Mon Sep 17 00:00:00 2001
From: "Silas S. Brown" <ssb22@cam.ac.uk>
Date: Tue, 22 Mar 2016 13:05:34 +0000
Subject: [PATCH] Update Annotator Generator, ImapFix

git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/adjuster@2462 29193198-4895-4776-b068-10539e920549
---
 annogen.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/annogen.py b/annogen.py
index 0d776f5..1476a36 100755
--- a/annogen.py
+++ b/annogen.py
@@ -2563,9 +2563,6 @@ def within_Nbytes(markedDown,matchEndPos,nbytes):
     # Assumes multibyte codes are self-synchronizing, i.e. if you start in the middle of a multibyte sequence, the first valid character will be the start of the next sequence, ok for utf-8 but TODO might not be the case for some codes
     return markedDown[max(0,matchEndPos-nbytes):matchEndPos].encode(outcode)[-nbytes:].decode(outcode,'ignore')+markedDown[matchEndPos:matchEndPos+nbytes].encode(outcode)[:nbytes].decode(outcode,'ignore')
 
-def occurrences(haystack,needle):
-  # Returns number of occurrences of 'needle' in 'haystack', needs to be fast (TODO: can we do better than this?)
-  return len(re.findall(re.escape(needle),haystack))
 def test_rule(withAnnot_unistr,markedDown,yBytesRet):
     # Tests to see if the rule withAnnot_unistr is
     # ALWAYS right in the examples, i.e.
@@ -2583,7 +2580,7 @@ def test_rule(withAnnot_unistr,markedDown,yBytesRet):
         yBytesRet.append(ybr) # (negate, list of indicators, nbytes)
         return True
     phrase = markDown(withAnnot_unistr)
-    ret = occurrences(markedDown,phrase) == len(getOkStarts(withAnnot_unistr))
+    ret = markedDown.count(phrase) == len(getOkStarts(withAnnot_unistr))
     if diagnose and diagnose==phrase:
       diagnose_write("occurrences(%s)==occurrences(%s) = %s" % (phrase,withAnnot_unistr,ret))
     return ret
-- 
GitLab