From 8f147cb235257c0dd3ee5c16f763f6d05421350d Mon Sep 17 00:00:00 2001 From: "Silas S. Brown" <ssb22@cam.ac.uk> Date: Mon, 3 Oct 2016 14:57:48 +0000 Subject: [PATCH] Update Annotator Generator, ImapFix, Web Adjuster, WebCheck git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/adjuster@2588 29193198-4895-4776-b068-10539e920549 --- adjuster.py | 28 ++++++++++++++++++++-------- annogen.py | 3 ++- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/adjuster.py b/adjuster.py index ffa2abb..dafb83c 100755 --- a/adjuster.py +++ b/adjuster.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -program_name = "Web Adjuster v0.209 (c) 2012-16 Silas S. Brown" +program_name = "Web Adjuster v0.21 (c) 2012-16 Silas S. Brown" # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -2919,6 +2919,18 @@ def detect_renderCheck(): return r"""(document.getElementsByTagName && function( # do NOT use fffd, it's sometimes displayed differently to other unrenderable characters # Works even in Opera Mini, which must somehow communicate the client's font metrics to the proxy +def htmlFind(html,markup): + # basically html.lower().find(markup), but we need to be + # aware of things like Tencent's <!--headTrap<body></body><head></head><html></html>--> + # preferably without running a slow full parser + r = html.lower().find(markup) + if r<0: return r + c = html.find("<!--") + if c<0 or c>r: return r + # If gets here, we might have a headTrap situation + def blankOut(m): return " "*(m.end()-m.start()) + return re.sub("<!--.*?-->",blankOut,html,flags=re.DOTALL).lower().find(markup) # TODO: improve efficiency of this? (blankOut doesn't need to go through the entire document) + def html_additions(html,(cssToAdd,attrsToAdd),slow_CSS_switch,cookieHostToSet,jsCookieString,canRender,cookie_host,is_password_domain,addHtmlFilterOptions): # Additions to make to HTML only (not on HTML embedded in JSON) # called from doResponse2 if do_html_process is set @@ -3010,20 +3022,20 @@ if(document.getElementById) { # (Above code works around a bug in MSIE 9 by setting the cookie BEFORE doing the removeChild. Otherwise the cookie does not persist.) if options.headAppendRuby: bodyAppend += rubyEndScript if headAppend: - i=html.lower().find("</head") + i=htmlFind(html,"</head") if i==-1: # no head section? headAppend = "<head>"+headAppend+"</head>" - i=html.lower().find("<body") + i=htmlFind(html,"<body") if i==-1: # no body section either? - i=html.lower().find("<html") + i=htmlFind(html,"<html") if i > -1: i = html.find('>',i) if i==-1: i=html.find('>') i += 1 # 0 if we're still -1, else past the '>' html = html[:i]+headAppend+html[i:] if bodyPrepend: - i=html.lower().find("<body") - if i==-1: i = html.lower().find("</head") - if i==-1: i = html.lower().find("<html") + i=htmlFind(html,"<body") + if i==-1: i = htmlFind(html,"</head") + if i==-1: i = htmlFind(html,"<html") if i>-1: i=html.find(">",i) if i>-1: html=html[:i+1]+bodyPrepend+html[i+1:] @@ -3045,7 +3057,7 @@ if(document.getElementById) { return html def addCssHtmlAttrs(html,attrsToAdd): - i=html.lower().find("<body") + i=htmlFind(html,"<body") if i==-1: return html # TODO: what of HTML documents that lack <body> (and frameset), do we add one somewhere? (after any /head ??) i += 5 # after the "<body" j = html.find('>', i) diff --git a/annogen.py b/annogen.py index d475b78..4f700dd 100755 --- a/annogen.py +++ b/annogen.py @@ -306,7 +306,8 @@ if java or javascript or python or c_sharp or ios or ndk or golang: try: import locale terminal_charset = locale.getdefaultlocale()[1] -except: terminal_charset = "utf-8" +except: terminal_charset = None +if not terminal_charset: terminal_charset = "utf-8" try: import urlparse except: if os.environ.get("ANNOGEN_ANDROID_URLS"): errExit("Need urlparse module for ANNOGEN_ANDROID_URLS") # unless we re-implement -- GitLab