Newer
Older

Silas S. Brown
committed
def setCookie_with_dots(self,kv):
for dot in ["","."]: self.add_header("Set-Cookie",kv+"; Domain="+dot+self.cookieHostToSet()+"; Path=/; Expires="+cookieExpires) # (at least in Safari, need BOTH with and without the dot to be sure of setting the domain and all subdomains. TODO: might be able to skip the dot if not wildcard_dns, here and in the cookie-setting scripts.)

Silas S. Brown
committed
def addCookieFromURL(self):

Silas S. Brown
committed
if self.cookieViaURL: self.add_header("Set-Cookie",adjust_domain_cookieName+"="+quote(self.cookieViaURL)+"; Path=/; Expires="+cookieExpires) # don't need dots for this (non-wildcard)

Silas S. Brown
committed
def removeArgument(self,argName,value):
if "&"+argName+"="+value in self.request.uri: self.request.uri=self.request.uri.replace("&"+argName+"="+value,"")
elif self.request.uri.endswith("?"+argName+"="+value): self.request.uri=self.request.uri[:-len("?"+argName+"="+value)]
elif "?"+argName+"="+value+"&" in self.request.uri: self.request.uri=self.request.uri.replace("?"+argName+"="+value+"&","?")
def checkViewsource(self):
# if URI ends with .viewsource, return True and take it out of the URI and all arguments (need to do this before further processing)
# - and in js_interpreter mode, recognise .screenshot too and return "screenshot", also (webdriver_click_code + .*)
if options.js_interpreter and options.js_links and webdriver_click_code in self.request.uri:
toRemove = self.request.uri[self.request.uri.index(webdriver_click_code):]

Silas S. Brown
committed
ret2 = unquote(toRemove[len(webdriver_click_code):])
elif not options.viewsource: return False
else: ret2 = None
if self.request.uri.endswith(".viewsource"):
if toRemove: ret2 = ret2[:-len(".viewsource")]
else: toRemove = ".viewsource"
ret = True
elif options.js_interpreter and self.request.uri.endswith(".screenshot"):
if toRemove: ret2 = ret2[:-len(".screenshot")]
ret = "screenshot"
elif not toRemove: return False
if ret2: ret = (ret2,ret)
self.request.uri = self.request.uri[:-len(toRemove)]

Silas S. Brown
committed
if not S(self.request.method).lower() in ['get','head']: return ret # TODO: unless arguments are taken from both url AND body in that case

Silas S. Brown
committed
for k,argList in self.request.arguments.items():
if argList and argList[-1].endswith(toRemove):
argList[-1]=argList[-1][:-len(toRemove)]

Silas S. Brown
committed
break

Silas S. Brown
committed
def cookieHostToSet(self):
# for the Domain= field of cookies

Silas S. Brown
committed
host = S(self.request.host)

Silas S. Brown
committed
for hs in options.host_suffix.split("/"):

Silas S. Brown
committed
if host.endswith("."+hs):

Silas S. Brown
committed
return hs

Silas S. Brown
committed
if host.endswith(pp): return host[:-len(pp)]
p = ':'+str(options.port) # possible for local connections, if publicPort is set to something else
if host.endswith(p): return host[:-len(p)]

Silas S. Brown
committed
return host

Silas S. Brown
committed
def authenticates_ok(self,host):
if not options.password: return True

Silas S. Brown
committed
host = S(host)

Silas S. Brown
committed
if options.password_domain and host and not any((host==p or host.endswith("."+p)) for p in options.password_domain.split('/')): return True

Silas S. Brown
committed
if options.password_domain: self.is_password_domain=True
# if they said ?p=(password), it's OK and we can
# give them a cookie with it

Silas S. Brown
committed
if B(self.getArg("p")) == B(options.password):
self.setCookie_with_dots(password_cookie_name+"="+quote(options.password))

Silas S. Brown
committed
self.removeArgument("p",options.password)
return True

Silas S. Brown
committed
return self.getCookie(password_cookie_name)==quote(options.password)

Silas S. Brown
committed

Silas S. Brown
committed
def decode_argument(self, value, name=None): return value # don't try to UTF8-decode; it might not be UTF8

Silas S. Brown
committed
SUPPORTED_METHODS = the_supported_methods
@asynchronous
def get(self, *args, **kwargs): return self.doReq()
@asynchronous
def head(self, *args, **kwargs): return self.doReq()
@asynchronous
def post(self, *args, **kwargs): return self.doReq()
@asynchronous
def put(self, *args, **kwargs): return self.doReq()
@asynchronous
def delete(self, *args, **kwargs): return self.doReq()
@asynchronous
def patch(self, *args, **kwargs): return self.doReq()

Silas S. Brown
committed
@asynchronous
def options(self, *args, **kwargs): return self.doReq()
@asynchronous
def connect(self, *args, **kwargs):

Silas S. Brown
committed
try: host, port = S(self.request.uri).split(':')
is_sshProxy = (host,port)==(allowConnectHost,allowConnectPort)

Silas S. Brown
committed
if host and (options.real_proxy or self.isPjsUpstream or self.isSslUpstream or is_sshProxy) and not (self.isPjsUpstream and options.js_interpreter in ["HeadlessFirefox","Firefox"] and host in block_headless_firefox): # support tunnelling if real_proxy (but we might not be able to adjust anything, see below), but at any rate support ssh_proxy if set
upstream = tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0))

Silas S. Brown
committed
client = self.request.connection.stream
# See note about Tornado versions in writeAndClose
if not is_sshProxy and not self.isSslUpstream and int(port)==443:
# We can change the host/port to ourselves
# and adjust the SSL site (assuming this CONNECT
# is for an SSL site)
# This should result in a huge "no cert" warning
host,port = "127.0.0.1",port_randomise.get(self.WA_connectPort,self.WA_connectPort)
debuglog("Rerouting CONNECT to "+host+":"+str(port))

Silas S. Brown
committed
self.request.suppress_logging = True # no need to log the CONNECT if our other port will be logging the GET
def callback(*args):

Silas S. Brown
committed
readUntilClose(client,lambda data:writeAndClose(upstream,data),lambda data:writeOrError(client,"upstream "+host+":"+str(port)+self.debugExtras(),upstream,data)) # (DO say 'upstream', as if host==localhost it can be confusing (TODO: say 'upstream' only if it's 127.0.0.1?))
if self.isPjsUpstream: clientErr=None # we won't mind if our js_interpreter client gives up on an upstream fetch
else: clientErr = "client "+self.request.remote_ip+self.debugExtras()

Silas S. Brown
committed
readUntilClose(upstream,lambda data:writeAndClose(client,data),lambda data:writeOrError(upstream,clientErr,client,data))
try:
client.write(B('HTTP/1.0 200 Connection established\r\n\r\n'))
debuglog("Connection established")
except tornado.iostream.StreamClosedError:
if not self.isPjsUpstream: logging.error("client "+self.request.remote_ip+" closed before we said Established"+self.debugExtras())

Silas S. Brown
committed
doCallback(self,upstream.connect,callback,(host, int(port)))
# Tornado _log is not called until finish(); it would be useful to log the in-process connection at this point
try: self._log()
except: pass # not all Tornado versions support this?

Silas S. Brown
committed
else: self.set_status(400),self.myfinish()

Silas S. Brown
committed
def myfinish(self):
if hasattr(self,"_finished") and self._finished: pass # try to avoid "connection closed" exceptions if browser has already gone away
else:
try:

Silas S. Brown
committed
self.finish()
self._finished = 1 # (just in case)
except: pass # belt and braces (depends on Tornado version?)
if self.isPjsUpstream:
try:
webdriver_inProgress[self.WA_PjsIndex].remove(self.request.uri)
except: pass
elif options.one_request_only and not self.isSslUpstream: stopServer("Stopping after one request")
try: reqsInFlight.remove(id(self))
except: pass
try: origReqInFlight.remove(id(self))
except: pass

Silas S. Brown
committed

Silas S. Brown
committed
def redirect(self,redir,status=301):
if self.can_serve_without_redirect(redir):
return self.doReq0()
debuglog("Serving redirect ("+repr(status)+" to "+repr(redir)+")"+self.debugExtras())
try: self.set_status(status)
except ValueError: self.set_status(status, "Redirect") # e.g. 308 (not all Tornado versions handle it)
for h in ["Location","Content-Type","Content-Language"]: self.clear_header(h) # clear these here, so redirect() can still be called even after a site's headers were copied in
if redir_relative_when_possible: url_relative = url_is_ours(redir) # (no need to send the correct cookieHost, just need to know if host gets changed)
else: url_relative = False
if url_relative:
# If we're behind an optional HTTPS-terminating proxy, it would be nice to tell the browser to keep whatever protocol it's currently using, IF the browser would definitely understand this.
# RFC 7231 from 2014 allows relative redirects in updated HTTP/1.1 based on browser observations, but original 1999 HTTP/1.1 RFC didn't. MSIE 9 from 2011 allows relative.
if self.checkBrowser(["Lynx/2.8","Gecko/20100101","Trident/7","Trident/8","Trident/9","Edge"]): pass
else:
ua = S(self.request.headers.get("User-Agent",""))
def v(b):
if b in ua:
m = re.match("[0-9]+",ua[ua.index(b)+len(b):])
if m: return m.group()
return 0
if v("WebKit/") < 537: # TODO: or v("") < ... etc
# I haven't been able to test it works on these old versions
url_relative = False
if url_relative: redir = S(redir).replace("http:","",1)

Silas S. Brown
committed
self.add_header("Location",S(redir))
if url_relative: pass # these browsers don't need a body
else:
self.add_header("Content-Type","text/html")
if self.canWriteBody(): self.write(B('<html lang="en"><body><a href="%s">Redirect</a></body></html>' % S(redir).replace('&','&').replace('"','"')))

Silas S. Brown
committed
self.myfinish()
def can_serve_without_redirect(self,redir):
# Try to serve without redirect if all links can be rewritten and urlboxPath might matter
if self.isSslUpstream or self.isPjsUpstream or options.wildcard_dns or options.urlboxPath=="/" or not self.htmlOnlyMode(): return # TODO: isProxyRequest argument to htmlOnlyMode? (relevant only if someone configures an adjuster with a non-/ urlbox-path that ALSO accepts real-proxy requests)
if not hasattr(self.request,"redirCount"):
self.request.redirCount = 0
if self.request.redirCount >= 10: return # loop?
self.request.redirCount += 1
self.cookieViaURL = None # recalculate:
oldArgs = self.request.arguments

Silas S. Brown
committed
(scheme, netloc, path, query, fragment) = urlparse.urlsplit(S(redir))
self.request.arguments = urlparse.parse_qs(query)
if not url_is_ours(redir,self.cookie_host()):
# raise Exception(repr((redir,self.cookie_host()))) # for testing
self.request.arguments = oldArgs
return
if not path.startswith("/"): path="/"+path
if query: query = "?"+query
self.request.uri = scheme+"://"+netloc+path+query
self.request.path = path
return True
def request_no_external_referer(self):
# Not all browsers implement this, but we can ask.
# Some sites publically log their Referer headers,
# so if an adjusted page needs to link directly to a
# non-adjusted page then we could end up with a
# 'deep link' in a public log, which bad robots (that
# ignore our robots.txt) might try to crawl. Try to
# stop this from happening by politely asking the
# browser to suppress Referer in this case.
# (For --renderOmitGoAway, we could redirect to an
# 'air lock' URL before providing the link out to the
# site, but that wouldn't help with --redirectFiles)
self.add_header("Referrer-Policy","same-origin")

Silas S. Brown
committed
def add_nocache_headers(self):
self.add_header("Pragma","no-cache")
self.add_header("Vary","*")
self.add_header("Expires","Thu Jan 01 00:00:00 1970")
self.add_header("Cache-Control","no-cache, no-store, must-revalidate, max-stale=0, post-check=0, pre-check=0")

Silas S. Brown
committed
def inProgress(self):
# If appropriate, writes a "conversion in progress" page and returns True, and then self.inProgress_run() should return True.
# Not on wget or curl (TODO: configurable?)
if not options.waitpage or not options.pdfepubkeep: return False

Silas S. Brown
committed
ua = " "+S(self.request.headers.get("User-Agent",""))

Silas S. Brown
committed
if " curl/" in ua or " Wget/" in ua: return False # (but don't return false for libcurl/)
self.set_status(200)

Silas S. Brown
committed
self.add_nocache_headers()
self.add_header("Refresh","10") # TODO: configurable refresh period? and make sure it does not exceed options.pdfepubkeep

Silas S. Brown
committed
self.clear_header("Content-Disposition")
self.clear_header("Content-Type")
self.add_header("Content-Type","text/html")
self.inProgress_has_run = True # doResponse2 may set a callback for render, so can't set _finished yet, but do need to set something so txtCallback knows not to write the actual text into this response (TODO could do a "first one there gets it" approach, but it's unlikely to be needed)

Silas S. Brown
committed
warn=self.checkBrowser(["IEMobile 6","IEMobile 7","Opera Mobi"],"<h3>WARNING: Your browser might not save this file</h3>You are using {B}, which has been known to try to display text attachments in its own window using very small print, giving no option to save to a file. You might get better results in IEMobile 8+ or Opera Mini (although the latter may have a more limited range of font sizes in the browser itself).") # TODO: make this warning configurable? See comment after set_header("Content-Disposition",...) below for details

Silas S. Brown
committed
self.doResponse2(("""%s<h1>File conversion in progress</h1>The result should start downloading soon. If it does not, try <script><!--

Silas S. Brown
committed
document.write('<a href="javascript:location.reload(true)">refreshing this page</a>')

Silas S. Brown
committed
//--></script><noscript>refreshing this page</noscript>.%s%s<hr>This is %s</body></html>""" % (htmlhead("File conversion in progress"),backScript,warn,serverName_html)),True,False)

Silas S. Brown
committed
# TODO: if (and only if) refreshing from this page, might then need a final 'conversion finished' page before serving the attachment, so as not to leave an 'in progress' page up afterwards
return True
def inProgress_run(self): return hasattr(self,"inProgress_has_run") and self.inProgress_has_run

Silas S. Brown
committed
def addToHeader(self,header,toAdd):

Silas S. Brown
committed
val = S(self.request.headers.get(header,""))
toAdd = S(toAdd)
if (", "+val).endswith(", "+toAdd): return # seems we're running inside a software stack that already added it

Silas S. Brown
committed
if val: val += ", "
self.request.headers[header] = val+toAdd
def forwardFor(self,server,serverType="ownServer"):

Silas S. Brown
committed
server = S(server)

Silas S. Brown
committed
if wsgi_mode: raise Exception("Not implemented for WSGI mode") # no .connection

Silas S. Brown
committed
if server==options.own_server and options.ownServer_useragent_ip:

Silas S. Brown
committed
r = S(self.request.headers.get("User-Agent",""))

Silas S. Brown
committed
if r: r=" "+r

Silas S. Brown
committed
r="("+S(self.request.remote_ip)+")"+r

Silas S. Brown
committed
self.request.headers["User-Agent"]=r
upstream = tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0))

Silas S. Brown
committed
client = self.request.connection.stream
if ':' in server: host, port = server.split(':')
else: host, port = server, 80

Silas S. Brown
committed
doCallback(self,upstream.connect,lambda *args:(readUntilClose(upstream,lambda data:writeAndClose(client,data),lambda data:writeOrError(upstream,serverType+" client",client,data)),readUntilClose(client,lambda data:writeAndClose(upstream,data),lambda data:writeOrError(client,serverType+" upstream",upstream,data))),(host, int(port)))

Silas S. Brown
committed
try: self.request.uri = self.request.original_uri
except: pass

Silas S. Brown
committed
upstream.write(B(self.request.method)+B(" ")+B(self.request.uri)+B(" ")+B(self.request.version)+B("\r\n")+B("\r\n".join(("%s: %s" % (k,v)) for k,v in (list(h for h in self.request.headers.get_all() if not h[0].lower()=="x-real-ip")+[("X-Real-Ip",self.request.remote_ip)]))+"\r\n\r\n")+B(self.request.body))

Silas S. Brown
committed
def thin_down_headers(self):
# For ping, and for SSH tunnel. Need to make the response short, but still allow keepalive

Silas S. Brown
committed
self.request.suppress_logging = True

Silas S. Brown
committed
for h in ["Server","Content-Type","Date"]:

Silas S. Brown
committed
try: self.clear_header(h)
except: pass

Silas S. Brown
committed
# (Date is added by Tornado 3, which can also add "Vary: Accept-Encoding" but that's done after we get here, TODO: option to ping via a connect and low-level TCP keepalive bytes?)
self.set_header("Etag","0") # clear_header won't work with Etag, but at least we can set one that's shorter than Tornado's computed one (TODO: could override RequestHandler's compute_etag and make it return None if we've set somewhere that we don't want Etag on the current request)
def answerPing(self,newVersion):
# answer a "ping" request from another machine that's using us as a fasterServer
self.thin_down_headers()

Silas S. Brown
committed
# Forget the headers, just write one byte per second for as long as the connection is open
# TODO: document that it's a bad idea to set up a fasterServer in wsgi_mode (can't do ipTrustReal, must have fasterServerNew=False, ...)

Silas S. Brown
committed
stream = self.request.connection.stream
stream.socket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
def writeBytes():
try:

Silas S. Brown
committed
stream.write(B("1"))
IOLoopInstance().add_timeout(time.time()+1,lambda *args:writeBytes())

Silas S. Brown
committed
except:

Silas S. Brown
committed
# logging.info("ping2: disconnected")

Silas S. Brown
committed
self.myfinish()

Silas S. Brown
committed
if not options.background: sys.stderr.write("ping2: "+S(self.request.remote_ip)+" connected\n") # (don't bother logging this normally, but might want to know when running in foreground)

Silas S. Brown
committed
writeBytes()
else:

Silas S. Brown
committed
self.write(B("1")) ; self.myfinish()

Silas S. Brown
committed

Silas S. Brown
committed
def answer_load_balancer(self):
self.request.suppress_logging = True
self.add_header("Content-Type","text/html")

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(htmlhead()+"<h1>Web Adjuster load-balancer page</h1>This page should not be shown to normal browsers, only to load balancers and uptime checkers. If you are a human reading this message, <b>it probably means your browser is \"cloaked\"</b> (hidden User-Agent string); please set a browser string to see the top-level page.</body></html>"))

Silas S. Brown
committed
self.myfinish()

Silas S. Brown
committed
def find_real_IP(self):

Silas S. Brown
committed
if wsgi_mode: return
if options.trust_XForwardedFor:
xff = self.request.headers.get_list("X-Forwarded-For")
if xff:
xff = xff[0].split()
# (TODO: do we always want FIRST header?)

Silas S. Brown
committed
if xff:
self.request.remote_ip = xff[0]
return

Silas S. Brown
committed
if not options.ipTrustReal in [S(self.request.remote_ip),'*']: return
try: self.request.remote_ip = self.request.connection.stream.confirmed_ip

Silas S. Brown
committed
except:
self.request.remote_ip = self.request.headers.get("X-Real-Ip",self.request.remote_ip)
try: self.request.connection.stream.confirmed_ip = self.request.remote_ip # keep it for keepalive connections (X-Real-Ip is set only on the 1st request)

Silas S. Brown
committed
except: pass
try: del self.request.headers["X-Real-Ip"]
except: pass
def serveRobots(self):
self.add_header("Content-Type","text/plain")

Silas S. Brown
committed
if self.canWriteBody(): self.write(B("User-agent: *\nDisallow: /\n"))

Silas S. Brown
committed
def serveImage(self,img):
if not options.renderLog:
self.request.suppress_logging = True
self.add_header("Content-Type","image/"+options.renderFormat)
self.add_header("Last-Modified","Sun, 06 Jul 2008 13:20:05 GMT")
self.add_header("Expires","Wed, 1 Dec 2036 23:59:59 GMT") # TODO: S2G (may need Cache-Control with max-age directive instead, drop older browsers)

Silas S. Brown
committed
# self.clear_header("Server") # save bytes if possible as we could be serving a LOT of these images .. but is this really needed? (TODO)
if self.canWriteBody(): self.write(img)
self.myfinish()

Silas S. Brown
committed
def set_htmlonly_cookie(self):
# Set the cookie according to the value of "pr" entered from the URL box.
# TODO: option to combine this and other cookie-based settings with enable_adjustDomainCookieName_URL_override so the setting can be bookmarked ? (some users might want that off however, as an address is different from a setting; in the case of htmlOnly the q= URL can already be bookmarked if can stop it before the redirect)
if options.htmlonly_mode:
htmlonly_mode = (force_htmlonly_mode or "pr" in self.request.arguments)

Silas S. Brown
committed
current_setting = htmlmode_cookie_name+"=1" in ';'.join(self.request.headers.get_list("Cookie"))
if not htmlonly_mode == current_setting:
if htmlonly_mode: val="1"
else: val="0"

Silas S. Brown
committed
self.setCookie_with_dots(htmlmode_cookie_name+"="+val)
# and also add it to self.request.headers,
# for the benefit of htmlOnlyMode below
# which sees the same request
# (TODO: delete old setting? but it's
# usually used only by redir)
self.request.headers.add("Cookie",htmlmode_cookie_name+"="+val)
elif self.isPjsUpstream or self.isSslUpstream:
return False
elif self.auto_htmlOnlyMode(isProxyRequest):
return True
elif isProxyRequest: return False
elif force_htmlonly_mode: return True
elif hasattr(self.request,"old_cookie"): ck = self.request.old_cookie # so this can be called between change_request_headers and restore_request_headers, e.g. at the start of send_request for js_interpreter mode
else: ck = ';'.join(self.request.headers.get_list("Cookie"))
def auto_htmlOnlyMode(self,isProxyRequest): return options.js_interpreter and (isProxyRequest or (not options.wildcard_dns and not can_do_cookie_host()))

Silas S. Brown
committed

Silas S. Brown
committed
def handle_URLbox_query(self,v):
self.set_htmlonly_cookie()

Silas S. Brown
committed
v = B(v)
if not (v.startswith(B("http://")) or v.startswith(B("https://"))):
if B(' ') in v or not B('.') in v: v=getSearchURL(v)
else: v=B("http://")+v
if not options.wildcard_dns: # need to use cookie_host

Silas S. Brown
committed
j = i = v.index(B('/'))+2 # after the http:// or https://
while j<len(v) and v[j] in B(letters+digits+'.-'): j += 1

Silas S. Brown
committed
if v[i-4:i-3]==B('s'): wanted_host += B(".0") # HTTPS hack (see protocolAndHost)
ch = self.cookie_host(checkURL=False) # current cookie hostname

Silas S. Brown
committed
if B(convert_to_requested_host(wanted_host,ch))==B(wanted_host):
debuglog("Need to change cookie_host to get "+repr(wanted_host))

Silas S. Brown
committed
if enable_adjustDomainCookieName_URL_override:
# do it by URL so they can bookmark it (that is if it doesn't immediately redirect)
# (TODO: option to also include the password in this link so it can be passed it around? and also in the 'back to URL box' link? but it would be inconsistent because not all links can do that, unless we consistently 302-redirect everything so that they do, but that would reduce the efficiency of the browser's HTTP fetches. Anyway under normal circumstances we probably won't want users accidentally spreading include-password URLs)

Silas S. Brown
committed
v = addArgument(v,adjust_domain_cookieName+'='+quote(wanted_host))
else: self.add_header("Set-Cookie",adjust_domain_cookieName+"="+quote(wanted_host)+"; Path=/; Expires="+cookieExpires) # (DON'T do this unconditionally, convert_to_requested_host above might see we already have another fixed domain for it)

Silas S. Brown
committed
# (TODO: if convert_to_requested_host somehow returns a *different* non-default_site domain, that cookie will be lost. Might need to enforce max 1 non-default_site domain.)
else: wanted_host = ch
else: wanted_host=None # not needed if wildcard_dns

Silas S. Brown
committed
def forwardToOtherPid(self):
if not (options.ssl_fork and self.WA_UseSSL): return
# We're handling SSL in a separate PID, so we have to
# forward the request back to the original PID in
# case it needs to do things with webdrivers etc.
self.forwardFor("127.0.0.1:%d" % (port_randomise.get(self.WA_origPort,self.WA_origPort)),"SSL helper:"+str(port_randomise.get(self.WA_connectPort,self.WA_connectPort)))

Silas S. Brown
committed
def handleFullLocation(self):
# HTTP 1.1 spec says ANY request can be of form http://...., not just a proxy request. The differentiation of proxy/not-proxy depends on what host is requested. So rewrite all http://... requests to HTTP1.0-style host+uri requests.
if options.ssl_fork and self.request.headers.get("X-From-Adjuster-Ssl-Helper",""):
debuglog("Setting isFromSslHelper"+self.debugExtras())
self.request.connection.stream.isFromSslHelper = True # it doesn't matter if some browser spoofs that header: it'll mean they'll get .0 asked for; however we could check the remote IP is localhost if doing anything more complex with it
del self.request.headers["X-From-Adjuster-Ssl-Helper"] # don't pass it to upstream servers

Silas S. Brown
committed
if B(self.request.uri).startswith(B("http://")):

Silas S. Brown
committed
self.request.original_uri = self.request.uri

Silas S. Brown
committed
parsed = urlparse.urlparse(S(self.request.uri))

Silas S. Brown
committed
self.request.host = self.request.headers["Host"] = parsed.netloc
self.request.uri = urlparse.urlunparse(("","")+parsed[2:])
if not self.request.uri: self.request.uri="/"

Silas S. Brown
committed
elif not B(self.request.uri).startswith(B("/")): # invalid

Silas S. Brown
committed
self.set_status(400) ; self.myfinish() ; return True

Silas S. Brown
committed
if self.WA_UseSSL or (hasattr(self.request,"connection") and hasattr(self.request.connection,"stream") and hasattr(self.request.connection.stream,"isFromSslHelper")): # we're the SSL helper on port+1 and we've been CONNECT'd to, or we're on port+0 and forked SSL helper has forwarded it to us, so the host asked for must be a .0 host for https
if self.request.host and not B(self.request.host).endswith(B(".0")): self.request.host = S(self.request.host)+".0"

Silas S. Brown
committed
if not B(allowConnectURL)==B("http://")+B(self.request.host)+B(self.request.uri): return
self.thin_down_headers() ; self.add_header("Pragma","no-cache") # hopefully "Pragma: no-cache" is enough and we don't need all of self.add_nocache_headers
global the_ssh_tunnel # TODO: support more than one SSH tunnel? (but will need to use session IDs etc; GNU httptunnel does just 1 tunnel as of 3.x so maybe we're OK)

Silas S. Brown
committed
if B(self.request.body)==B("new connection"):
self.request.body = B("")
the_ssh_tunnel[1].append(None) # if exists
if None in the_ssh_tunnel[1]:
try: the_ssh_tunnel[0].close()
except: pass
raise NameError # as though the_ssh_tunnel didn't yet exist
except NameError: # not yet established
sessionID = time.time() # for now
the_ssh_tunnel = [tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)),[],sessionID] # upstream connection, data waiting for client, id
def add(data):
if sessionID==the_ssh_tunnel[2]:
the_ssh_tunnel[1].append(data)

Silas S. Brown
committed
doCallback(self,the_ssh_tunnel[0].connect,lambda *args:readUntilClose(the_ssh_tunnel[0],lambda data:(add(data),add(None)),add),(allowConnectHost, int(allowConnectPort)))
# TODO: log the fact we're starting a tunnel?
if self.request.body: the_ssh_tunnel[0].write(self.request.body) # TODO: will this work even when it's not yet established? (not a big problem on SSH because server always speaks first)
def check_ssh_response(startTime,sessionID):
if not the_ssh_tunnel[2]==sessionID: return self.myfinish()

Silas S. Brown
committed
if the_ssh_tunnel[1]==[] and not time.time()>startTime+3: return IOLoopInstance().add_timeout(time.time()+0.2,lambda *args:check_ssh_response(startTime,sessionID)) # keep waiting (up to max 3sec - not too long because if client issues a 'read on timeout' while the SSH layer above is waiting for user input then we still want it to be reasonably responsive to that input; it's the client side that should wait longer between polls)

Silas S. Brown
committed
self.write(B('').join(the_ssh_tunnel[1][:-1]))

Silas S. Brown
committed
self.write(B('').join(the_ssh_tunnel[1]))

Silas S. Brown
committed
IOLoopInstance().add_timeout(time.time()+0.2,lambda *args:check_ssh_response(time.time(),the_ssh_tunnel[2]))

Silas S. Brown
committed
def handleSpecificIPs(self):
if not ipMatchingFunc: return False
msg = ipMatchingFunc(self.request.remote_ip)
if not msg: return False

Silas S. Brown
committed
if B(msg).startswith(B('*')): # a block
self.write(B(htmlhead("Blocked"))+B(msg)[1:]+B("</body></html>")) ; self.myfinish() ; return True
if B(self.request.uri) in [B("/robots.txt"),B("/favicon.ico")]: return False

Silas S. Brown
committed
cookies = ';'.join(self.request.headers.get_list("Cookie"))

Silas S. Brown
committed
if B(msg).startswith(B('-')): # minor edit
msg = B(msg)[1:]

Silas S. Brown
committed
if seen_ipMessage_cookieName+"=" in cookies:
# seen ANY message before (not just this)
return False
val = cookieHash(msg)
if seen_ipMessage_cookieName+"="+val in cookies:
# seen THIS message before
return False
hs = self.cookieHostToSet()
self.add_nocache_headers()

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(htmlhead("Message"))+B(msg)+(B("<p><form><label><input type=\"checkbox\" name=\"gotit\">Don't show this message again</label><br><input type=\"submit\" value=\"Continue\" onClick=\"var a='%s=%s;domain=',b=(document.forms[0].gotit.checked?'expires=%s;':'')+'path=/',h='%s;';document.cookie=a+'.'+h+b;document.cookie=a+h+b;location.reload(true);return false\"></body></html>" % (seen_ipMessage_cookieName,val,cookieExpires,hs))))
logging.info("ip_messages: done "+S(self.request.remote_ip))

Silas S. Brown
committed
self.myfinish() ; return True

Silas S. Brown
committed
if not options.renderOmitGoAway: return False
browser = self.checkBrowser(options.renderOmit)
if not browser: return False
if maybeRobots:
self.serveRobots() # regardless of which browser header it presents
return True # do NOT shorten this by making serveRobots return True: it must return None due to other uses

Silas S. Brown
committed
# TODO: option to redirect immediately without this message? (but then we'd be supplying a general redirection service, which might have issues of its own)

Silas S. Brown
committed
msg = ' and <a rel="noreferrer" href="%s%s">go directly to the original site</a>' % (S(protocolWithHost(realHost)),S(self.request.uri))

Silas S. Brown
committed
else: msg = ''
self.add_nocache_headers()

Silas S. Brown
committed
if self.canWriteBody(): self.write(B("%s<h1>You don't need this!</h1>This installation of Web Adjuster has been set up to change certain characters into pictures, for people using old computers that don't know how to display them themselves. However, <em>you</em> seem to be using %s, which is <noscript>either </noscript>definitely capable of showing these characters by itself<noscript>, or else wouldn't be able to show the pictures anyway<!-- like Lynx --></noscript>. Please save our bandwidth for those who really need it%s. Thank you.</body></html>" % (htmlhead(),S(browser),msg)))

Silas S. Brown
committed
self.myfinish() ; return True
def needCssCookies(self):
h = options.headAppendCSS
if not h or not '%s' in h: return False
for ckCount in range(len(h.split(';'))-1):
if not self.getCookie("adjustCss" + str(ckCount) + "s", ""):
# Looks like we need to redirect back to the main page to get a CSS selection. But just double-check it doesn't look like an XMLHttpRequest, which doesn't always send the cookies:
if any(h in S(self.request.headers.get("Referer","")) for h in options.host_suffix.split("/")):
accept = S(self.request.headers.get("Accept",""))
if "application/json" in accept or len(accept.split(","))==2:
return False
return True

Silas S. Brown
committed
return False
def cssAndAttrsToAdd(self):
h = options.headAppendCSS ; cha = options.cssHtmlAttrs
if not h or not '%s' in h: return h, cha
h,opts = h.split(';',1)
opts=opts.split(';')
ckCount = N = 0
for o in opts:
chosen = self.getCookie("adjustCss" + str(ckCount) + "s", "")
if not chosen:
# we don't have all the necessary cookies to choose a stylesheet, so don't have one (TODO: or do we just want to go to the first listed?)

Silas S. Brown
committed
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
if cha and ';' in cha: return "", ""
else: return "", cha
poss_vals = [re.sub('=.*','',x) for x in o.split(',')]
if '' in poss_vals: poss_vals[poss_vals.index('')]='-'
if not chosen in poss_vals: chosen = re.sub('=.*','',o.split(',',1)[0]) # make sure it's an existing option, to protect against cross-site-scripting injection of CSS (as some browsers accept JS in CSS)
N = poss_vals.index(chosen)
if chosen=="-": chosen = "" # TODO: document in headAppendCSS that we use '-' as a placeholder because we need non-empty values in cookies etc
h=h.replace('%s',chosen,1)
ckCount += 1
if cha and ';' in cha: return h, options.cssHtmlAttrs.split(';')[N]
else: return h, cha
def cssOptionsHtml(self):
h = options.headAppendCSS
if not h or not '%s' in h: return ""
h,opts = h.split(';',1)
opts=opts.split(';')
ckCount = 0
r = ["<p>Style:"]
for o in opts:
ckName = "adjustCss" + str(ckCount) + "s"
r.append(' <select name="%s">' % ckName)
chosen = self.getCookie(ckName, "")
for val in o.split(','):
if '=' in val: val,desc = val.split('=',1)
else: desc = val
if val=="": val = "-" # TODO: document in headAppendCSS that we use '-' as a placeholder because we need non-empty values in cookies etc
if val==chosen: sel = " selected"
else: sel = ""
r.append('<option value="%s"%s>%s</option>' % (val,sel,desc))
ckCount += 1
r.append('</select>')
return ''.join(r)+' <input type="submit" name="try" value="Try"></p>'
def set_css_from_urlbox(self):
h = options.headAppendCSS
if not h or not '%s' in h: return
h,opts = h.split(';',1)
opts=opts.split(';')
ckCount = 0
for o in opts:
ckName = "adjustCss" + str(ckCount) + "s"
ckVal = self.getArg(ckName)
if ckVal:

Silas S. Brown
committed
self.setCookie_with_dots(ckName+"="+ckVal) # TODO: do we ever need to quote() ckVal ? (document to be careful when configuring?)

Silas S. Brown
committed
self.setCookie(ckName,ckVal) # pretend it was already set on THIS request as well (for 'Try' button; URL should be OK as it redirects)
ckCount += 1

Silas S. Brown
committed
def serve_URLbox(self):
if not options.wildcard_dns: self.clearUnrecognisedCookies() # TODO: optional?
self.addCookieFromURL()
r = urlbox_html(self.htmlOnlyMode() or self.checkBrowser(["Lynx/"]),self.cssOptionsHtml(),self.getArg("q") or self.getArg("d"))
self.doResponse2(r,True,False) # TODO: run htmlFilter on it also? (render etc will be done by doResponse2)

Silas S. Brown
committed
def serve_hostError(self):
l = []
if options.wildcard_dns: l.append("prefixing its domain with the one you want to adjust")
if options.real_proxy: l.append("setting it as a <b>proxy</b>")
if l: err="This adjuster can be used only by "+", or ".join(l)+"."
else: err="This adjuster cannot be used. Check the configuration."
self.doResponse2(htmlhead()+err+'</body></html>',True,False) # TODO: run htmlFilter on it also? (render etc will be done by doResponse2)

Silas S. Brown
committed
def serve_mailtoPage(self):

Silas S. Brown
committed
ua = S(self.request.headers.get("User-Agent",""))
if any(re.search(x,ua) for x in options.prohibitUA): return self.serveRobots()
uri = S(self.request.uri)[len(options.mailtoPath):].replace('%%+','%') # we encode % as %%+ to stop browsers and transcoders from arbitrarily decoding e.g. %26 to &

Silas S. Brown
committed
if '?' in uri:
addr,rest = uri.split('?',1)
self.request.arguments = urlparse.parse_qs(rest) # after the above decoding of %'s

Silas S. Brown
committed
else: addr=uri

Silas S. Brown
committed
addr = unquote(addr)

Silas S. Brown
committed
body = self.getArg("body")
subj = self.getArg("subject")

Silas S. Brown
committed
r = [] ; smsLink = ""
if addr: r.append("To: "+ampEncode(addr))
if subj: r.append("Subject: "+ampEncode(subj))
if body:
r.append("Body: "+ampEncode(body))
if self.checkBrowser(options.mailtoSMS):
if subj and not body.startswith(subj): smsLink = subj+" "+body
else: smsLink = body

Silas S. Brown
committed
if '&' in smsLink:
smsLink="[Before sending this text, replace -amp- with an ampersand. This substitution has been done in case your phone isn't compliant with RFC 5724.] "+smsLink.replace('&',' -amp- ')
# RFC 5724 shows we ought to get away with ampersands encoded as %26, but on Windows Mobile (Opera or IE) we don't; the SMS is truncated at that point. TODO: whitelist some other platforms? (test with <a href="sms:?body=test1%26test2">this</a>)
if self.checkBrowser(["iPhone OS 4","iPhone OS 5","iPhone OS 6","iPhone OS 7"]): sep = ';'
elif self.checkBrowser(["iPhone OS 8","iPhone OS 9"]): sep = '&'
else: sep = '?'

Silas S. Brown
committed
smsLink = B('<br><a href="sms:'+sep+'body=')+quote(rm_u8punc(B(smsLink)))+B('">Send as SMS (text message)</a>')
if self.checkBrowser(["Windows Mobile"]):
# TODO: others? configurable?
# browsers that may also have this problem with EMAIL

Silas S. Brown
committed
uri = uri.replace("%26","%20-amp-%20")
if not "body=" in uri: uri += "&body="
uri = uri.replace("body=","body=[Before%20sending%20this%20text,%20replace%20-amp-%20with%20an%20ampersand.%20This%20substitution%20has%20been%20done%20as%20your%20phone%20isn't%20compliant%20with%20RFC%205724.]%20")

Silas S. Brown
committed
if len(r)==1: # different format if only 1 item is specified
if addr: r=["The email will be sent to "+ampEncode(addr)]
elif subj: r=["The email's Subject will be: "+ampEncode(subj)]
else: r=["The email's Body will be: "+ampEncode(body)]
elif not r: r.append("The link does not specify any recognised email details")
else: r.insert(0,"The following information will be sent to the email client:")

Silas S. Brown
committed
self.doResponse2(('%s<h3>mailto: link</h3>This link is meant to open an email client.<br>%s<br><a href=\"mailto:%s\">Open in email client</a> (if set up)%s%s<hr>This is %s</body></html>' % (htmlhead("mailto: link - Web Adjuster"),"<br>".join(r),uri,S(smsLink),backScript,serverName_html)),True,False)

Silas S. Brown
committed
def serve_submitPage(self):

Silas S. Brown
committed
self.request.suppress_logger_host_convert = True

Silas S. Brown
committed
if B(self.request.uri)==B("/favicon.ico") or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA):
# avoid logging favicon.ico tracebacks when submitPath=="/"
self.set_status(400) ; self.myfinish() ; return

Silas S. Brown
committed
if len(self.request.uri) > len(options.submitPath):

Silas S. Brown
committed
txt = S(self.request.uri[len(options.submitPath):])

Silas S. Brown
committed
if len(txt)==2 and options.submitBookmarklet:
filterNo = ord(txt[1])-ord('A')
if txt[0] in 'bB': return self.serve_bookmarklet_code(txt[1],txt[0]=='B')

Silas S. Brown
committed
elif txt[0]=='j': return self.serve_bookmarklet_json(filterNo)
elif txt[0]=='u': return self.serve_backend_post(filterNo)
elif txt[0] in 'iap':
return self.doResponse2(android_ios_instructions(txt[0],self.request.host,self.request.headers.get("User-Agent",""),filterNo),"noFilterOptions",False) # on Android and iOS, 'def bookmarklet' gives instruction_url#javascript:bookmarklet_code, so serve instructions here

Silas S. Brown
committed
txt = zlib.decompressobj().decompress(base64.b64decode(txt),16834) # limit to 16k to avoid zip bombs (limit is also in the compress below)

Silas S. Brown
committed
self.request.uri = "%s (input not logged, len=%d)" % (options.submitPath,len(txt))
else: txt = self.request.arguments.get("i",None)
if not txt:
self.is_password_domain=True # no prominentNotice needed
# In the markup below, body's height=100% is needed to ensure we can set a percentage height on the textarea consistently across many browsers (otherwise e.g. Safari 6 without user CSS might start making the textarea larger as soon as it contains input, overprinting the rest of the document)
local_submit_url = "http://"+self.request.host+options.submitPath
if options.submitBookmarkletDomain: submit_url = "//"+options.submitBookmarkletDomain+options.submitPath
if (options.password and submitPathIgnorePassword) or options.submitPath=='/' or defaultSite(): urlbox_footer = "" # not much point linking them back to the URL box under the first circumstance, and there isn't one for the other two
else: urlbox_footer = '<p><a href="http://'+hostSuffix()+publicPortStr()+options.urlboxPath+'">Process a website</a></p>'
# TODO: what if their browser doesn't submit in the correct charset? for example some versions of Lynx need -display_charset=UTF-8 otherwise they might double-encode pasted-in UTF-8 and remove A0 bytes even though it appears to display correctly (and no, adding accept-charset won't help: that's for if the one to be accepted differs from the document's)
return self.doResponse2(("""%s<body style="height:100%%;overflow:auto"><form method="post" action="%s"><h3>Upload Text</h3>%s:<p><span style="float:right"><input type="submit" value="Upload"><script><!--

Silas S. Brown
committed
document.write(' (Ctrl-Enter) | <a href="javascript:history.go(-1)">Back</a>')
//--></script></span><br><textarea name="i" style="width:100%%;clear:both;height:60%%" rows="5" cols="20" placeholder="Type or paste your text here"
onKeyDown="if((event.ctrlKey||event.metaKey) && (event.keyCode==13 || event.which==13)) document.forms[0].submit(); else return true;">

Silas S. Brown
committed
</textarea></form>%s<script><!--

Silas S. Brown
committed
document.forms[0].i.focus()
//--></script></body></html>""" % (htmlhead("Upload Text - Web Adjuster").replace("<body>",""),options.submitPath,options.submitPrompt,bookmarklet(submit_url,local_submit_url)+urlbox_footer)),"noFilterOptions",False)

Silas S. Brown
committed
if type(txt) == list: # came from the POST form
txt = txt[0].strip()
# On at least some browsers (e.g. some Safari versions), clicking one of our JS reload links after the POST text has been shown will reload the form (instead of re-submitting the POST text) and can scroll to an awkward position whether the code below calls focus() or not. Could at least translate to GET if it's short enough (don't want to start storing things on the adjuster machine - that would require a shared database if load-balancing)
if len(txt) <= 16384: # (else we wouldn't decompress all; see comment above)

Silas S. Brown
committed
enc = base64.b64encode(zlib.compress(B(txt),9))
if 0 < len(enc) < 2000: return self.redirect(B("http://")+B(hostSuffix())+B(publicPortStr())+B(options.submitPath)+B(enc),303) # POST to GET

Silas S. Brown
committed
# pretend it was served by a remote site; go through everything including filters (TODO: could bypass most of doResponse instead of rigging it up like this; alternatively keep this as it shows how to feed data to doResponse)
self.connection_header = None
self.urlToFetch = "" # for js_process
class H:

Silas S. Brown
committed
def get(self,h,d):
if h=="Content-Type": return "text/html; charset=utf-8"
else: return d

Silas S. Brown
committed
def get_all(self): return [("Content-Type","text/html; charset=utf-8")]
if options.htmlUrl: line1 = "about:submitted\n"
else: line1 = ""

Silas S. Brown
committed
runFilterOnText(self,self.getHtmlFilter(),find_text_in_HTML(B(htmlhead("Uploaded Text - Web Adjuster"))+B("<h3>Your text</h3>")+B(txt2html(txt))+B("<hr>This is %s. %s</body></html>" % (serverName_html,backScriptNoBr))),lambda out,err:self.doResponse2(out,True,False),prefix=line1) # backScriptNoBr AFTER the server notice to save vertical space
def serve_bookmarklet_code(self,xtra,forceSameWindow): # (forceSameWindow is used by the "plus" bookmarklets)

Silas S. Brown
committed
self.add_header("Content-Type","application/javascript")
self.add_header("Access-Control-Allow-Origin","*")
if options.submitBookmarkletDomain: submit = "//"+options.submitBookmarkletDomain
else: submit = "http://"+self.request.host

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(bookmarkletMainScript(submit+options.submitPath+'j'+xtra,forceSameWindow)))

Silas S. Brown
committed
self.myfinish()
def serve_err(self,err):
self.set_status(500)
self.add_header("Content-Type","text/plain")

Silas S. Brown
committed
logging.error("Bookmarklet error: "+S(err))

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(err))
try: self.set_status(429,"Too many requests")
except: self.set_status(429)
if retrySecs: self.add_header("Retry-After",str(retrySecs))

Silas S. Brown
committed
if self.canWriteBody(): self.write(B("Too many requests (HTTP 429)"))

Silas S. Brown
committed
try: f = " for "+S(self.urlToFetch)

Silas S. Brown
committed
logging.error("Returning HTTP 429 (too many requests)"+f+" to "+S(self.request.remote_ip))
self.request.suppress_logging = True
self.myfinish()

Silas S. Brown
committed
def serve_bookmarklet_json(self,filterNo):
self.add_header("Access-Control-Allow-Origin","*")
self.add_header("Access-Control-Allow-Headers","Content-Type")
if not self.request.body:
self.add_header("Content-Type","text/plain")
self.add_header("Allow","POST") # some browsers send OPTIONS first before POSTing via XMLHttpRequest (TODO: check if OPTIONS really is the request method before sending this?)

Silas S. Brown
committed
if self.canWriteBody(): self.write(B("OK"))

Silas S. Brown
committed
try: l = json.loads(self.request.body)
except: return self.serve_err("Bad JSON")

Silas S. Brown
committed
if l[i]=='': l[i] = u'' # shouldn't get this (TODO: fix in bookmarkletMainScript? e.g. if submitBookmarkletFilterJS can match empty strings, or conversion to 'cnv' makes it empty, anything else?), but if we do, don't let it trip up the 'wrong data structure' below on Python 2
if not (type(l)==list and all(((type(i)==unicode or (type(i)==str and all(ord(c)<0x80 for c in i))) and not chr(0) in i) for i in l)): return self.serve_err("Wrong data structure")

Silas S. Brown
committed
codeTextList = []
for i in l:

Silas S. Brown
committed
codeTextList.append(B(chr(0)))
if type(i)==bytes: codeTextList.append(i)
else: codeTextList.append(i.encode('utf-8'))

Silas S. Brown
committed
def callback(out,err):
self.add_header("Content-Type","application/json")

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(json.dumps([i.decode('utf-8','replace') for i in B(out)[1:].split(B(chr(0)))]))) # 'replace' here because we don't want utf-8 errors to time-out the entire request (although hopefully the filter WON'T produce utf-8 errors...)

Silas S. Brown
committed
self.finish()
if options.htmlUrl: line1 = "about:bookmarklet\n" # TODO: get the bookmarklet to report the location.href of the site (and update htmlUrl help text)
else: line1 = ""

Silas S. Brown
committed
runFilterOnText(self,self.getHtmlFilter(filterNo),codeTextList,callback,prefix=line1)
# for another instance's htmlFilter=http://...uA etc

Silas S. Brown
committed
runFilter(self.getHtmlFilter(filterNo),self.request.body,lambda out,err: (self.write(B(out)),self.finish()))

Silas S. Brown
committed

Silas S. Brown
committed
def checkTextCache(self,newext):

Silas S. Brown
committed
# check for PDF/EPUB conversion on other threads or cached
if not options.pdfepubkeep: return False # we don't guarantee to update kept_tempfiles properly if it's 0 (e.g. pdf can just pipe, so don't need unlinkOutputLater)

Silas S. Brown
committed
ktkey = (self.request.host, self.request.uri)
if ktkey in kept_tempfiles:
def tryRead():

Silas S. Brown
committed
try: txt=open(kept_tempfiles[ktkey],'rb').read() # ('rb' makes it give you a byte-string in Python 3)

Silas S. Brown
committed
except: txt = None
if txt:
if self.canWriteBody():
if newext==".mobi": self.write(txt)
else: self.write(remove_blanks_add_utf8_BOM(txt))

Silas S. Brown
committed
self.myfinish()

Silas S. Brown
committed
elif not self.inProgress(): IOLoopInstance().add_timeout(time.time()+1,lambda *args:tryRead())

Silas S. Brown
committed
tryRead() ; return True
kept_tempfiles[ktkey] = 1 # conversion in progress
return False

Silas S. Brown
committed
def getArg(self,arg):
a = self.request.arguments.get(arg,None)
if type(a)==type([]): a=a[0]
return a
def debugExtras(self):
r = " for "+self.request.method+" "+self.request.uri

Silas S. Brown
committed
if not self.request.uri.startswith("http"):
r += " host="+str(self.request.host)
if self.WA_UseSSL or (hasattr(self.request,"connection") and hasattr(self.request.connection,"stream") and hasattr(self.request.connection.stream,"isFromSslHelper")): r += " WA_UseSSL"
if self.isPjsUpstream: r += " isPjsUpstream instance "+str(self.WA_PjsIndex+self.WA_PjsStart)
if self.isSslUpstream: r += " isSslUpstream"
return r

Silas S. Brown
committed
def canWriteBody(self): return not B(self.request.method) in [B("HEAD"),B("OPTIONS")] and not (hasattr(self,"_finished") and self._finished)
def justMeCheck(self):
# Ideally we should do this asynchronously, but as
# it's only for the --just-me option and we assume a
# local ident server, we can probably get away with:
try: s.connect(('localhost',113))
except:
import pwd

Silas S. Brown
committed
for l in getoutput("netstat -tpn").split("\n"):

Silas S. Brown
committed
if len(l)>6 and l[3].endswith(":"+str(self.request.connection.stream.socket.getpeername()[1])) and l[5]=="ESTABLISHED" and "/" in l[6] and S(pwd.getpwuid(os.stat("/proc/"+l[6].split("/",1)[0]).st_uid).pw_name)==myUsername: return True
logging.error("no ident server and couldn't confirm username with netstat: rejecting this connection")
return

Silas S. Brown
committed
s.send(B("%d, %d\r\n" % (self.request.connection.stream.socket.getpeername()[1], port_randomise.get(self.WA_port,self.WA_port))))

Silas S. Brown
committed
if usr.split(B(':'))[-1]==B(myUsername): return True
else: logging.error("ident server didn't confirm username: rejecting this connection")
except Exception as e: logging.error("Trouble connecting to ident server (%s): rejecting this connection" % repr(e))

Silas S. Brown
committed
if usr: self.write(B(usr+": "))
self.write(B("Connection from wrong account (ident check failed)\n"))

Silas S. Brown
committed
def doReq(self):
if options.just_me and not self.justMeCheck(): return
if mainServerPaused and not self.isPjsUpstream and not self.isSslUpstream: return self.serve429()
debuglog("doReq"+self.debugExtras()) # MUST keep this debuglog call: it also sets profileIdle=False
if not self.isPjsUpstream and not self.isSslUpstream:
try: origReqInFlight.add(id(self))
except: pass # e.g. not options.profile
if options.one_request_only:

Silas S. Brown
committed
IOLoopInstance().handle_callback_exception = lambda *args:0 # Tornado 4 raises EBADF in accept_handler if you call server.stop() from a request handler, so disable its handle_callback_exception to reduce log clutter (TODO: handle other errors using the original exception handler if present?)

Silas S. Brown
committed
if wsgi_mode and B(self.request.path)==B(quote(os.environ.get("SCRIPT_NAME","")+os.environ.get("PATH_INFO",""))) and 'SCRIPT_URL' in os.environ:
# workaround for Tornado 2.x limitation when used with CGI and htaccess redirects
self.request.uri = os.environ['SCRIPT_URL']
qs = os.environ.get("QUERY_STRING","")
if not qs: qs = os.environ.get("REDIRECT_QUERY_STRING","")
if qs:
self.request.uri += "?"+qs
self.request.arguments = urlparse.parse_qs(qs)
self.request.path = self.request.uri

Silas S. Brown
committed
elif sys.version_info[0]==2:
# HTTP/1.x headers are officially Latin-1 (but usually ASCII), and Tornado versions 2 through 4 decodes the Latin-1 and re-encodes it as UTF-8. This can cause confusion, so let's emulate modern browsers and %-encode any non-ASCII URIs:
try: self.request.uri = self.request.uri.decode('utf-8').encode('latin1')
except: pass

Silas S. Brown
committed
self.request.uri=re.sub("[^!-~]+",lambda m:quote(m.group()),S(self.request.uri))
self.request.method = S(self.request.method)
if self.request.host:
self.request.host = S(self.request.host)
else: self.request.host = ""
if self.request.method=="HEAD": self.set_header("Content-Length","-1") # we don't yet the content length, so Tornado please don't add it! (NB this is for HEAD only, not OPTIONS, which should have Content-Length 0 or some browsers time out) (TODO: in non-WSGI mode could call .flush() after writing headers (with callback param), then Content-Length won't be added on .finish())

Silas S. Brown
committed
if self.request.headers.get("User-Agent","")=="ping":
if self.request.uri=="/ping2": return self.answerPing(True)
elif self.request.uri=="/ping": return self.answerPing(False)

Silas S. Brown
committed
elif options.loadBalancer and B(self.request.headers.get("User-Agent",""))==B("") and self.request.uri=="/": return self.answer_load_balancer()
self.find_real_IP() # must find real ip BEFORE forwarding to fasterServer, because might also be behind nginx etc

Silas S. Brown
committed
if fasterServer_up:
return self.forwardFor(options.fasterServer,"fasterServer")
if self.handleFullLocation(): return # if returns here, URL is invalid; if not, handleFullLocation has 'normalised' self.request.host and self.request.uri
if options.js_UA:
if options.js_UA.startswith("*"): self.request.headers["User-Agent"] = options.js_UA[1:]
else: self.request.headers["User-Agent"] = webdriver_UA[self.WA_PjsIndex]
webdriver_inProgress[self.WA_PjsIndex].add(self.request.uri)
if self.handleSSHTunnel(): return
if self.handleSpecificIPs(): return
# TODO: Slow down heavy users by self.request.remote_ip ?
if extensions.handle("http://"+self.request.host+self.request.uri,self):
self.request.suppress_logger_host_convert = self.request.valid_for_whois = True
return self.myfinish()
if ownServer_regexp and ownServer_regexp.match(self.request.host+self.request.uri):
self.request.headers["Connection"] = "close" # MUST use 'Connection: Close' here, as keepalive can go wrong if it subsequently fetches a URL that DOESN'T match ownServer_regexp but comes from the same domain and this goes to ownServer incorrectly. TODO mention it in the help text?, TODO might we occasionally need something similar for ownServer_if_not_root etc?, TODO at lower priority: if we can reasonably repeat the requests then do that insntead of using forwardFor
return self.forwardFor(options.own_server)
if cssReload_cookieSuffix and cssReload_cookieSuffix in self.request.uri:
ruri,rest = self.request.uri.split(cssReload_cookieSuffix,1)
self.setCookie_with_dots(rest)
return self.redirect(ruri) # so can set another

Silas S. Brown
committed
self.cookieViaURL = None
if self.isPjsUpstream or self.isSslUpstream: realHost = self.request.host
else: realHost = convert_to_real_host(self.request.host,self.cookie_host(checkReal=False)) # don't need checkReal if return value will be passed to convert_to_real_host anyway

Silas S. Brown
committed
if realHost == -1:

Silas S. Brown
committed
return self.forwardFor(options.own_server)

Silas S. Brown
committed
# (TODO: what if it's keep-alive and some browser figures out our other domains are on the same IP and tries to fetch them through the same connection? is that supposed to be allowed?)

Silas S. Brown
committed
elif realHost==0 and options.ownServer_if_not_root: realHost=options.own_server # asking by cookie to adjust the same host, so don't forwardFor() it but fetch it normally and adjust it

Silas S. Brown
committed
if type(realHost)==bytes and not bytes==str:
realHost = S(realHost)
isProxyRequest = self.isPjsUpstream or self.isSslUpstream or (options.real_proxy and realHost == self.request.host)
if not isProxyRequest and not self.isPjsUpstream and not self.isSslUpstream and (self.request.host=="localhost" or self.request.host.startswith("localhost:")) and not "localhost" in options.host_suffix: return self.redirect("http://"+hostSuffix(0)+publicPortStr()+self.request.uri) # save confusion later (e.g. set 'HTML-only mode' cookie on 'localhost' but then redirect to host_suffix and cookie is lost). Bugfix 0.314: do not do this redirect if we're a real proxy for another server on localhost
self.request.valid_for_whois = True # (if options.whois, don't whois unless it gets this far, e.g. don't whois any that didn't even match "/(.*)" etc)
maybeRobots = (not self.isPjsUpstream and not self.isSslUpstream and not options.robots and self.request.uri=="/robots.txt")
# don't actually serveRobots yet, because MIGHT want to pass it to own_server (see below)

Silas S. Brown
committed
self.is_password_domain=False # needed by doResponse2
if options.password and not options.real_proxy and not self.isPjsUpstream and not self.isSslUpstream:
# whether or not open_proxy, because might still have password (perhaps on password_domain), anyway the doc for open_proxy says "allow running" not "run"

Silas S. Brown
committed
# First ensure the wildcard part of the host is de-dotted, so the authentication cookie can be shared across hosts.
# (This is not done if options.real_proxy because we don't want to touch the hostname for that)
host = self.request.host
if host:

Silas S. Brown
committed
if host.endswith(":"+str(options.publicPort)): host=host[:-len(":"+str(options.publicPort))]

Silas S. Brown
committed
for hs in options.host_suffix.split("/"):
ohs = "."+hs
if host.endswith(ohs) and host.index(".")<len(host)-len(ohs):
if maybeRobots: return self.serveRobots()

Silas S. Brown
committed
if options.publicPort==80: colPort=""
else: colPort=":"+str(options.publicPort)

Silas S. Brown
committed
return self.redirect("http://"+dedot(host[:-len(ohs)])+ohs+colPort+self.request.uri)
# Now OK to check authentication:
if not self.authenticates_ok(host) and not (submitPathIgnorePassword and self.request.uri.startswith(submitPathForTest)):

Silas S. Brown
committed
if options.auth_error=="http://":

Silas S. Brown
committed
if options.own_server: return self.forwardFor(options.own_server)

Silas S. Brown
committed
elif maybeRobots: return self.serveRobots()
else: options.auth_error = "auth_error set incorrectly (own_server not set)" # see auth_error help (TODO: is it really a good idea to say this HERE?)
elif maybeRobots: return self.serveRobots()
self.add_nocache_headers() # in case they try the exact same request again after authenticating (unlikely if they add &p=..., but they might come back to the other URL later, and refresh is particularly awkward if we redirect)
if options.auth_error.startswith("http://") or options.auth_error.startswith("https://"): return self.redirect(options.auth_error)

Silas S. Brown
committed
if options.auth_error.startswith("*"): auth_error = options.auth_error[1:]
else:
self.set_status(401)
auth_error = options.auth_error

Silas S. Brown
committed
if self.canWriteBody(): self.write(B(htmlhead("")+auth_error+"</body></html>"))

Silas S. Brown
committed
return self.myfinish()

Silas S. Brown
committed
# Authentication is now OK
if not self.isPjsUpstream and not self.isSslUpstream:
if self.handleGoAway(realHost,maybeRobots): return
# Now check if it's an image request:
_olduri = self.request.uri

Silas S. Brown
committed
self.request.uri=unquote(self.request.uri)
img = Renderer.getImage(self.request.uri)
if img: return self.serveImage(img)
# Not an image:
if options.mailtoPath and self.request.uri.startswith(options.mailtoPath): return self.serve_mailtoPage()
if options.submitPath and self.request.uri.startswith(submitPathForTest): return self.serve_submitPage()
self.request.uri = _olduri
if realHost=="error" and not maybeRobots:
return self.serve_hostError()

Silas S. Brown
committed
if not realHost: # default_site(s) not set

Silas S. Brown
committed
if options.own_server and options.ownServer_if_not_root and len(self.request.path)>1: return self.forwardFor(options.own_server)
elif maybeRobots or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA): return self.serveRobots()

Silas S. Brown
committed
# Serve URL box

Silas S. Brown
committed
self.set_css_from_urlbox()
if self.getArg("try"): return self.serve_URLbox() # we just set the stylesheet
if options.submitPath and self.getArg("sPath"): return self.redirect("http://"+hostSuffix()+publicPortStr()+options.submitPath)

Silas S. Brown
committed
v=self.getArg("q")

Silas S. Brown
committed
if v: return self.handle_URLbox_query(v)
else: return self.serve_URLbox()
if maybeRobots: return self.serveRobots()
viewSource = (not self.isPjsUpstream and not self.isSslUpstream) and self.checkViewsource()
if not self.isPjsUpstream and not self.isSslUpstream and self.needCssCookies():
self.add_nocache_headers() # please don't cache this redirect! otherwise user might not be able to leave the URL box after:

Silas S. Brown
committed
return self.redirect("http://"+hostSuffix()+publicPortStr()+options.urlboxPath+"?d="+quote(protocolWithHost(realHost)+self.request.uri),302) # go to the URL box - need to set more options (and 302 not 301, or some browsers could cache it despite the above)
if not self.isPjsUpstream and not self.isSslUpstream: self.addCookieFromURL() # for cookie_host

Silas S. Brown
committed
converterFlags = []
for opt,suffix,ext,fmt in [
(options.pdftotext,pdftotext_suffix,".pdf","pdf"),
(options.epubtotext,epubtotext_suffix,".epub","epub"),
(options.epubtozip,epubtozip_suffix,".epub","epub"),
(options.askBitrate,mp3lofi_suffix,".mp3",None),
]:
if opt and not self.isPjsUpstream and not self.isSslUpstream and self.request.uri.endswith(suffix) and (self.request.uri.lower()[:-len(suffix)].endswith(ext) or guessCMS(self.request.uri,fmt)):

Silas S. Brown
committed
self.request.uri = self.request.uri[:-len(suffix)]
converterFlags.append(True)
else: converterFlags.append(False)
if upstream_rewrite_ssl and not self.isSslUpstream and not (options.js_interpreter and not self.isPjsUpstream): protocol = "http://" # keep the .0 in and call protocolAndHost again on the isSslUpstream pass
else: protocol,realHost = protocolAndHost(realHost)

Silas S. Brown
committed
self.change_request_headers(realHost,isProxyRequest)
self.urlToFetch = protocol+self.request.headers["Host"]+self.request.uri
if not isProxyRequest and (any(re.search(x,self.urlToFetch) for x in options.prohibit) or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA)):

Silas S. Brown
committed
self.restore_request_headers()
return self.redirect(self.urlToFetch)
# TODO: consider adding "not self.request.headers.get('If-Modified-Since','')" to the below list of sendHead() conditions, in case any referer-denying servers decide it's OK to send out "not modified" replies even to the wrong referer (which they arguably shouldn't, and seem not to as of 2013-09, but if they did then adjuster might erroneously redirect the SECOND time a browser displays the image)
def ext(u):

Silas S. Brown
committed
u = S(u)

Silas S. Brown
committed
if '?' in u:
e = ext(u[:u.index('?')])
if e: return e
if not '.' in u: return
e = u[u.rindex('.')+1:].lower()
if not (e=="mp3" and options.bitrate and not options.askBitrate): return e
if options.redirectFiles and not (isProxyRequest or any(converterFlags) or viewSource) and ext(self.request.uri) in redirectFiles_Extensions: self.sendHead()
elif self.isPjsUpstream and "text/html" in self.request.headers.get("Accept","") and not (any(converterFlags) or viewSource): self.sendHead(forPjs=True) # to check it's not a download link

Silas S. Brown
committed
else: self.sendRequest(converterFlags,viewSource,isProxyRequest,follow_redirects=False) # (DON'T follow redirects - browser needs to know about them!)
def change_request_headers(self,realHost,isProxyRequest):

Silas S. Brown
committed
if options.default_cookies:
for defaultCookie in options.default_cookies.split(';'):
defaultCookie = defaultCookie.strip()
if defaultCookie.startswith("(") and ")" in defaultCookie: # browser-specific
if not defaultCookie[1:defaultCookie.index(")")] in self.request.headers.get("User-Agent",""): continue
defaultCookie=defaultCookie[defaultCookie.index(")")+1:]
# add if a cookie of that name is not already set
dcName,dcValue=defaultCookie.strip().split('=',1)
if not self.getCookie(dcName): self.request.headers.add("Cookie",defaultCookie)
if self.request.headers.get_list("Cookie"):
# some sites require them all in one header
ck = "; ".join(self.request.headers.get_list("Cookie"))

Silas S. Brown
committed
self.request.old_cookie = ck
def ours(c): # don't forward our own cookies upstream (may confuse some sites, especially if a site uses Web Adjuster their end)
c = c.strip()
if not '=' in c: return 0
c = c[:c.index('=')]

Silas S. Brown
committed
return c in upstreamGuard or (c==adjust_domain_cookieName and self.cookie_host())

Silas S. Brown
committed
if options.upstream_guard:

Silas S. Brown
committed
def maketheirs(c):
for ck in upstreamGuard: c=c.replace(ck+"1",ck)
return c
self.request.headers["Cookie"]=";".join(maketheirs(x) for x in ck.split(";") if not ours(x))

Silas S. Brown
committed
for v in self.request.headers.get_list("Referer"):

Silas S. Brown
committed
if v:
self.original_referer = v

Silas S. Brown
committed
if enable_adjustDomainCookieName_URL_override: v = re.sub(B("[?&]"+re.escape(adjust_domain_cookieName)+"=[^&]*$"),B(""),B(v))
if S(v) in ["","http://","http:///"]:

Silas S. Brown
committed
# it must have come from the URL box
del self.request.headers["Referer"]

Silas S. Brown
committed
else: self.request.headers["Referer"] = S(v)

Silas S. Brown
committed
for http in ["http://","http%3A%2F%2F"]: # xyz?q=http://... stuff
if http in self.request.uri[1:]:
u=self.request.uri.split(http)
if not isProxyRequest:
for i in range(1,len(u)):

Silas S. Brown
committed
u[i]=S(fixDNS(http+u[i],self))

Silas S. Brown
committed
l = self.request.headers.get_list(h)
if l:
del self.request.headers[h]

Silas S. Brown
committed
self.request.headers["Host"]=realHost
if options.via and not self.isSslUpstream:

Silas S. Brown
committed
v = S(self.request.version)

Silas S. Brown
committed
if v.startswith("HTTP/"): v=v[5:]

Silas S. Brown
committed
self.addToHeader("Via",v+" "+convert_to_via_host(self.request.host)+" ("+viaName+")")

Silas S. Brown
committed
self.addToHeader("X-Forwarded-For",self.request.remote_ip)
if options.uavia and not self.isSslUpstream: self.addToHeader("User-Agent","via "+convert_to_via_host(self.request.host)+" ("+viaName+")")
if self.checkBrowser(options.cacheOmit):
self.request.headers["Cache-Control"] = "max-age=0, must-revalidate"
self.request.headers["Pragma"] = "no-cache"

Silas S. Brown
committed
def restore_request_headers(self): # restore the ones Tornado might use (Connection etc)
if not hasattr(self,"removed_headers"): return # haven't called change_request_headers (probably means this is user input)
for k,v in self.removed_headers: self.request.headers[k]=v

Silas S. Brown
committed
if hasattr(self.request,"old_cookie"): self.request.headers["Cookie"] = self.request.old_cookie # + put this back so we can refer to our own cookies

Silas S. Brown
committed
def sendRequest(self,converterFlags,viewSource,isProxyRequest,follow_redirects):
if self.isPjsUpstream and webdriver_prefetched[self.WA_PjsIndex]:
debuglog("sendRequest returning webdriver_prefetched["+str(self.WA_PjsIndex)+"] ("+repr(webdriver_prefetched[self.WA_PjsIndex])+")"+self.debugExtras())
r = webdriver_prefetched[self.WA_PjsIndex]
webdriver_prefetched[self.WA_PjsIndex] = None
return self.doResponse(r,converterFlags,viewSource,isProxyRequest)

Silas S. Brown
committed
body = self.request.body
if not body: body = None # required by some Tornado versions
if self.isSslUpstream: ph,pp = None,None
else: ph,pp = upstream_proxy_host,upstream_proxy_port
if options.js_interpreter and self.htmlOnlyMode(isProxyRequest) and not follow_redirects and not self.request.uri in ["/favicon.ico","/robots.txt"] and self.canWriteBody():
if options.via: via = self.request.headers["Via"],self.request.headers["X-Forwarded-For"]
else: via = None # they might not be defined