FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
adjuster.py 489 KiB
Newer Older
    def setCookie_with_dots(self,kv):
        for dot in ["","."]: self.add_header("Set-Cookie",kv+"; Domain="+dot+self.cookieHostToSet()+"; Path=/; Expires="+cookieExpires) # (at least in Safari, need BOTH with and without the dot to be sure of setting the domain and all subdomains.  TODO: might be able to skip the dot if not wildcard_dns, here and in the cookie-setting scripts.)
        if self.cookieViaURL: self.add_header("Set-Cookie",adjust_domain_cookieName+"="+quote(self.cookieViaURL)+"; Path=/; Expires="+cookieExpires) # don't need dots for this (non-wildcard)

    def removeArgument(self,argName,value):
        if "&"+argName+"="+value in self.request.uri: self.request.uri=self.request.uri.replace("&"+argName+"="+value,"")
        elif self.request.uri.endswith("?"+argName+"="+value): self.request.uri=self.request.uri[:-len("?"+argName+"="+value)]
        elif "?"+argName+"="+value+"&" in self.request.uri: self.request.uri=self.request.uri.replace("?"+argName+"="+value+"&","?")

    def checkViewsource(self):
        # if URI ends with .viewsource, return True and take it out of the URI and all arguments (need to do this before further processing)
Silas S. Brown's avatar
Silas S. Brown committed
        # - and in js_interpreter mode, recognise .screenshot too and return "screenshot", also (webdriver_click_code + .*)
        toRemove = ret = None
Silas S. Brown's avatar
Silas S. Brown committed
        if options.js_interpreter and options.js_links and webdriver_click_code in self.request.uri:
            toRemove = self.request.uri[self.request.uri.index(webdriver_click_code):]
            ret2 = unquote(toRemove[len(webdriver_click_code):])
        elif not options.viewsource: return False
        else: ret2 = None
        if self.request.uri.endswith(".viewsource"):
            if toRemove: ret2 = ret2[:-len(".viewsource")]
            else: toRemove = ".viewsource"
            ret = True
Silas S. Brown's avatar
Silas S. Brown committed
        elif options.js_interpreter and self.request.uri.endswith(".screenshot"):
            if toRemove: ret2 = ret2[:-len(".screenshot")]
Silas S. Brown's avatar
Silas S. Brown committed
            else: toRemove = ".screenshot"
            ret = "screenshot"
        elif not toRemove: return False
        if ret2: ret = (ret2,ret)
        self.request.uri = self.request.uri[:-len(toRemove)]
        if not S(self.request.method).lower() in ['get','head']: return ret # TODO: unless arguments are taken from both url AND body in that case
        for k,argList in self.request.arguments.items():
            if argList and argList[-1].endswith(toRemove):
                argList[-1]=argList[-1][:-len(toRemove)]
    
    def cookieHostToSet(self):
        # for the Domain= field of cookies
        for hs in options.host_suffix.split("/"):
        pp = ':'+str(options.publicPort)
        if host.endswith(pp): return host[:-len(pp)]
        p = ':'+str(options.port) # possible for local connections, if publicPort is set to something else
        if host.endswith(p): return host[:-len(p)]
    
    def authenticates_ok(self,host):
        if not options.password: return True
        if options.password_domain and host and not any((host==p or host.endswith("."+p)) for p in options.password_domain.split('/')): return True
        if options.password_domain: self.is_password_domain=True
        # if they said ?p=(password), it's OK and we can
        # give them a cookie with it
        if B(self.getArg("p")) == B(options.password):
            self.setCookie_with_dots(password_cookie_name+"="+quote(options.password))
            self.removeArgument("p",options.password)
            return True
        return self.getCookie(password_cookie_name)==quote(options.password)
    def decode_argument(self, value, name=None): return value # don't try to UTF8-decode; it might not be UTF8
    
    SUPPORTED_METHODS = the_supported_methods
    @asynchronous
    def get(self, *args, **kwargs):     return self.doReq()
    @asynchronous
    def head(self, *args, **kwargs):    return self.doReq()
    @asynchronous
    def post(self, *args, **kwargs):    return self.doReq()
    @asynchronous
    def put(self, *args, **kwargs):     return self.doReq()
    @asynchronous
    def delete(self, *args, **kwargs):  return self.doReq()
    @asynchronous
Silas S. Brown's avatar
Silas S. Brown committed
    def patch(self, *args, **kwargs):   return self.doReq()
    @asynchronous
    def options(self, *args, **kwargs): return self.doReq()

    @asynchronous
    def connect(self, *args, **kwargs):
      try: host, port = S(self.request.uri).split(':')
Silas S. Brown's avatar
Silas S. Brown committed
      except: host,port = None,None
Silas S. Brown's avatar
Silas S. Brown committed
      is_sshProxy = (host,port)==(allowConnectHost,allowConnectPort)
      if host and (options.real_proxy or self.isPjsUpstream or self.isSslUpstream or is_sshProxy) and not (self.isPjsUpstream and options.js_interpreter in ["HeadlessFirefox","Firefox"] and host in block_headless_firefox): # support tunnelling if real_proxy (but we might not be able to adjust anything, see below), but at any rate support ssh_proxy if set
Silas S. Brown's avatar
Silas S. Brown committed
        upstream = tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0))
Silas S. Brown's avatar
Silas S. Brown committed
        # See note about Tornado versions in writeAndClose
        if not is_sshProxy and not self.isSslUpstream and int(port)==443:
Silas S. Brown's avatar
Silas S. Brown committed
            # We can change the host/port to ourselves
            # and adjust the SSL site (assuming this CONNECT
            # is for an SSL site)
            # This should result in a huge "no cert" warning
Silas S. Brown's avatar
Silas S. Brown committed
            host,port = "127.0.0.1",port_randomise.get(self.WA_connectPort,self.WA_connectPort)
Silas S. Brown's avatar
Silas S. Brown committed
            debuglog("Rerouting CONNECT to "+host+":"+str(port))
            self.request.suppress_logging = True # no need to log the CONNECT if our other port will be logging the GET
          readUntilClose(client,lambda data:writeAndClose(upstream,data),lambda data:writeOrError(client,"upstream "+host+":"+str(port)+self.debugExtras(),upstream,data)) # (DO say 'upstream', as if host==localhost it can be confusing (TODO: say 'upstream' only if it's 127.0.0.1?))
Silas S. Brown's avatar
Silas S. Brown committed
          if self.isPjsUpstream: clientErr=None # we won't mind if our js_interpreter client gives up on an upstream fetch
Silas S. Brown's avatar
Silas S. Brown committed
          else: clientErr = "client "+self.request.remote_ip+self.debugExtras()
          readUntilClose(upstream,lambda data:writeAndClose(client,data),lambda data:writeOrError(upstream,clientErr,client,data))
          try:
              client.write(B('HTTP/1.0 200 Connection established\r\n\r\n'))
              debuglog("Connection established")
Silas S. Brown's avatar
Silas S. Brown committed
          except tornado.iostream.StreamClosedError:
              if not self.isPjsUpstream: logging.error("client "+self.request.remote_ip+" closed before we said Established"+self.debugExtras())
        doCallback(self,upstream.connect,callback,(host, int(port)))
Silas S. Brown's avatar
Silas S. Brown committed
        # Tornado _log is not called until finish(); it would be useful to log the in-process connection at this point
        try: self._log()
        except: pass # not all Tornado versions support this?
      else: self.set_status(400),self.myfinish()
Silas S. Brown's avatar
Silas S. Brown committed
    def on_connection_close(self): self.myfinish()
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("myfinish"+self.debugExtras())
Silas S. Brown's avatar
Silas S. Brown committed
        if hasattr(self,"_finished") and self._finished: pass # try to avoid "connection closed" exceptions if browser has already gone away
        else:
          try:
            self.finish()
            self._finished = 1 # (just in case)
Silas S. Brown's avatar
Silas S. Brown committed
          except: pass # belt and braces (depends on Tornado version?)
Silas S. Brown's avatar
Silas S. Brown committed
        if self.isPjsUpstream:
            try:
                webdriver_inProgress[self.WA_PjsIndex].remove(self.request.uri)
            except: pass
Silas S. Brown's avatar
Silas S. Brown committed
        elif options.one_request_only and not self.isSslUpstream: stopServer("Stopping after one request")
Silas S. Brown's avatar
Silas S. Brown committed
        try: reqsInFlight.remove(id(self))
        except: pass
        try: origReqInFlight.remove(id(self))
        except: pass
Silas S. Brown's avatar
Silas S. Brown committed
        if self.can_serve_without_redirect(redir):
            return self.doReq0()
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("Serving redirect ("+repr(status)+" to "+repr(redir)+")"+self.debugExtras())
Silas S. Brown's avatar
Silas S. Brown committed
        try: self.set_status(status)
        except ValueError: self.set_status(status, "Redirect") # e.g. 308 (not all Tornado versions handle it)
Silas S. Brown's avatar
Silas S. Brown committed
        for h in ["Location","Content-Type","Content-Language"]: self.clear_header(h) # clear these here, so redirect() can still be called even after a site's headers were copied in
        if redir_relative_when_possible: url_relative = url_is_ours(redir) # (no need to send the correct cookieHost, just need to know if host gets changed)
        else: url_relative = False
        if url_relative:
            # If we're behind an optional HTTPS-terminating proxy, it would be nice to tell the browser to keep whatever protocol it's currently using, IF the browser would definitely understand this.
            # RFC 7231 from 2014 allows relative redirects in updated HTTP/1.1 based on browser observations, but original 1999 HTTP/1.1 RFC didn't.  MSIE 9 from 2011 allows relative.
            if self.checkBrowser(["Lynx/2.8","Gecko/20100101","Trident/7","Trident/8","Trident/9","Edge"]): pass
            else:
                ua = S(self.request.headers.get("User-Agent",""))
                def v(b):
                    if b in ua:
                        m = re.match("[0-9]+",ua[ua.index(b)+len(b):])
                        if m: return m.group()
                    return 0
                if v("WebKit/") < 537: # TODO: or v("") < ... etc
                    # I haven't been able to test it works on these old versions
                    url_relative = False
            if url_relative: redir = S(redir).replace("http:","",1)
        if url_relative: pass # these browsers don't need a body
        else:
            self.add_header("Content-Type","text/html")
            if self.canWriteBody(): self.write(B('<html lang="en"><body><a href="%s">Redirect</a></body></html>' % S(redir).replace('&','&amp;').replace('"','&quot;')))
Silas S. Brown's avatar
Silas S. Brown committed
    def can_serve_without_redirect(self,redir):
        # Try to serve without redirect if all links can be rewritten and urlboxPath might matter
Silas S. Brown's avatar
Silas S. Brown committed
        if self.isSslUpstream or self.isPjsUpstream or options.wildcard_dns or options.urlboxPath=="/" or not self.htmlOnlyMode(): return # TODO: isProxyRequest argument to htmlOnlyMode? (relevant only if someone configures an adjuster with a non-/ urlbox-path that ALSO accepts real-proxy requests)
Silas S. Brown's avatar
Silas S. Brown committed
        if not hasattr(self.request,"redirCount"):
            self.request.redirCount = 0
        if self.request.redirCount >= 10: return # loop?
        self.request.redirCount += 1
        self.cookieViaURL = None # recalculate:
        oldArgs = self.request.arguments
        (scheme, netloc, path, query, fragment) = urlparse.urlsplit(S(redir))
Silas S. Brown's avatar
Silas S. Brown committed
        self.request.arguments = urlparse.parse_qs(query)
        if not url_is_ours(redir,self.cookie_host()):
            # raise Exception(repr((redir,self.cookie_host()))) # for testing
            self.request.arguments = oldArgs
            return
        if not path.startswith("/"): path="/"+path
        if query: query = "?"+query
        self.request.uri = scheme+"://"+netloc+path+query
        self.request.path = path
        return True

Silas S. Brown's avatar
Silas S. Brown committed
    def request_no_external_referer(self):
        # Not all browsers implement this, but we can ask.
        # Some sites publically log their Referer headers,
        # so if an adjusted page needs to link directly to a
        # non-adjusted page then we could end up with a
        # 'deep link' in a public log, which bad robots (that
        # ignore our robots.txt) might try to crawl.  Try to
        # stop this from happening by politely asking the
        # browser to suppress Referer in this case.
        # (For --renderOmitGoAway, we could redirect to an
        # 'air lock' URL before providing the link out to the
        # site, but that wouldn't help with --redirectFiles)
        self.add_header("Referrer-Policy","same-origin")
    def add_nocache_headers(self):
        self.add_header("Pragma","no-cache")
        self.add_header("Vary","*")
        self.add_header("Expires","Thu Jan 01 00:00:00 1970")
        self.add_header("Cache-Control","no-cache, no-store, must-revalidate, max-stale=0, post-check=0, pre-check=0")
    def inProgress(self):
        # If appropriate, writes a "conversion in progress" page and returns True, and then self.inProgress_run() should return True.
        # Not on wget or curl (TODO: configurable?)
        if not options.waitpage or not options.pdfepubkeep: return False
        ua = " "+S(self.request.headers.get("User-Agent",""))
        if " curl/" in ua or " Wget/" in ua: return False # (but don't return false for libcurl/)
        self.set_status(200)
Silas S. Brown's avatar
Silas S. Brown committed
        self.add_header("Refresh","10") # TODO: configurable refresh period?  and make sure it does not exceed options.pdfepubkeep
        self.clear_header("Content-Disposition")
        self.clear_header("Content-Type")
        self.add_header("Content-Type","text/html")
        self.inProgress_has_run = True # doResponse2 may set a callback for render, so can't set _finished yet, but do need to set something so txtCallback knows not to write the actual text into this response (TODO could do a "first one there gets it" approach, but it's unlikely to be needed)
        warn=self.checkBrowser(["IEMobile 6","IEMobile 7","Opera Mobi"],"<h3>WARNING: Your browser might not save this file</h3>You are using {B}, which has been known to try to display text attachments in its own window using very small print, giving no option to save to a file. You might get better results in IEMobile 8+ or Opera Mini (although the latter may have a more limited range of font sizes in the browser itself).") # TODO: make this warning configurable?  See comment after set_header("Content-Disposition",...) below for details
        self.doResponse2(("""%s<h1>File conversion in progress</h1>The result should start downloading soon. If it does not, try <script><!--
document.write('<a href="javascript:location.reload(true)">refreshing this page</a>')
//--></script><noscript>refreshing this page</noscript>.%s%s<hr>This is %s</body></html>""" % (htmlhead("File conversion in progress"),backScript,warn,serverName_html)),True,False)
        # TODO: if (and only if) refreshing from this page, might then need a final 'conversion finished' page before serving the attachment, so as not to leave an 'in progress' page up afterwards
        return True
    def inProgress_run(self): return hasattr(self,"inProgress_has_run") and self.inProgress_has_run

        val = S(self.request.headers.get(header,""))
        toAdd = S(toAdd)
Silas S. Brown's avatar
Silas S. Brown committed
        if (", "+val).endswith(", "+toAdd): return # seems we're running inside a software stack that already added it
        if val: val += ", "
        self.request.headers[header] = val+toAdd

Silas S. Brown's avatar
Silas S. Brown committed
    def forwardFor(self,server,serverType="ownServer"):
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("forwardFor "+server+self.debugExtras())
        if wsgi_mode: raise Exception("Not implemented for WSGI mode") # no .connection
        if server==options.own_server and options.ownServer_useragent_ip:
            r = S(self.request.headers.get("User-Agent",""))
            self.request.headers["User-Agent"]=r
        upstream = tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0))
        client = self.request.connection.stream
        if ':' in server: host, port = server.split(':')
        else: host, port = server, 80
        doCallback(self,upstream.connect,lambda *args:(readUntilClose(upstream,lambda data:writeAndClose(client,data),lambda data:writeOrError(upstream,serverType+" client",client,data)),readUntilClose(client,lambda data:writeAndClose(upstream,data),lambda data:writeOrError(client,serverType+" upstream",upstream,data))),(host, int(port)))
        try: self.request.uri = self.request.original_uri
        except: pass
        upstream.write(B(self.request.method)+B(" ")+B(self.request.uri)+B(" ")+B(self.request.version)+B("\r\n")+B("\r\n".join(("%s: %s" % (k,v)) for k,v in (list(h for h in self.request.headers.get_all() if not h[0].lower()=="x-real-ip")+[("X-Real-Ip",self.request.remote_ip)]))+"\r\n\r\n")+B(self.request.body))
Silas S. Brown's avatar
Silas S. Brown committed
    def thin_down_headers(self):
        # For ping, and for SSH tunnel.  Need to make the response short, but still allow keepalive
        for h in ["Server","Content-Type","Date"]:
        # (Date is added by Tornado 3, which can also add "Vary: Accept-Encoding" but that's done after we get here, TODO: option to ping via a connect and low-level TCP keepalive bytes?)
Silas S. Brown's avatar
Silas S. Brown committed
        self.set_header("Etag","0") # clear_header won't work with Etag, but at least we can set one that's shorter than Tornado's computed one (TODO: could override RequestHandler's compute_etag and make it return None if we've set somewhere that we don't want Etag on the current request)
Silas S. Brown's avatar
Silas S. Brown committed

    def answerPing(self,newVersion):
        # answer a "ping" request from another machine that's using us as a fasterServer
        self.thin_down_headers()
Silas S. Brown's avatar
Silas S. Brown committed
        if newVersion and not wsgi_mode:
            # Forget the headers, just write one byte per second for as long as the connection is open
Silas S. Brown's avatar
Silas S. Brown committed
            # TODO: document that it's a bad idea to set up a fasterServer in wsgi_mode (can't do ipTrustReal, must have fasterServerNew=False, ...)
            stream = self.request.connection.stream
            stream.socket.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, 1)
            def writeBytes():
                try:
                    stream.write(B("1"))
                    IOLoopInstance().add_timeout(time.time()+1,lambda *args:writeBytes())
            if not options.background: sys.stderr.write("ping2: "+S(self.request.remote_ip)+" connected\n") # (don't bother logging this normally, but might want to know when running in foreground)
    def answer_load_balancer(self):
        self.request.suppress_logging = True
        self.add_header("Content-Type","text/html")
        if self.canWriteBody(): self.write(B(htmlhead()+"<h1>Web Adjuster load-balancer page</h1>This page should not be shown to normal browsers, only to load balancers and uptime checkers. If you are a human reading this message, <b>it probably means your browser is \"cloaked\"</b> (hidden User-Agent string); please set a browser string to see the top-level page.</body></html>"))
        if wsgi_mode: return
        if options.trust_XForwardedFor:
            xff = self.request.headers.get_list("X-Forwarded-For")
            if xff:
Silas S. Brown's avatar
Silas S. Brown committed
                xff = xff[0].split()
                # (TODO: do we always want FIRST header?)
        if not options.ipTrustReal in [S(self.request.remote_ip),'*']: return
Silas S. Brown's avatar
Silas S. Brown committed
        try: self.request.remote_ip = self.request.connection.stream.confirmed_ip
        except:
            self.request.remote_ip = self.request.headers.get("X-Real-Ip",self.request.remote_ip)
Silas S. Brown's avatar
Silas S. Brown committed
            try: self.request.connection.stream.confirmed_ip = self.request.remote_ip # keep it for keepalive connections (X-Real-Ip is set only on the 1st request)
            except: pass
        try: del self.request.headers["X-Real-Ip"]
        except: pass
    
    def serveRobots(self):
        self.add_header("Content-Type","text/plain")
        if self.canWriteBody(): self.write(B("User-agent: *\nDisallow: /\n"))
Silas S. Brown's avatar
Silas S. Brown committed
        self.request.suppress_logger_host_convert = True
Silas S. Brown's avatar
Silas S. Brown committed
        self.myfinish()

    def serveImage(self,img):
        if not options.renderLog:
            self.request.suppress_logging = True
        self.add_header("Content-Type","image/"+options.renderFormat)
        self.add_header("Last-Modified","Sun, 06 Jul 2008 13:20:05 GMT")
        self.add_header("Expires","Wed, 1 Dec 2036 23:59:59 GMT") # TODO: S2G (may need Cache-Control with max-age directive instead, drop older browsers)
        # self.clear_header("Server") # save bytes if possible as we could be serving a LOT of these images .. but is this really needed? (TODO)
Silas S. Brown's avatar
Silas S. Brown committed
        if self.canWriteBody(): self.write(img)
        self.myfinish()

    def set_htmlonly_cookie(self):
        # Set the cookie according to the value of "pr" entered from the URL box.
        # TODO: option to combine this and other cookie-based settings with enable_adjustDomainCookieName_URL_override so the setting can be bookmarked ?  (some users might want that off however, as an address is different from a setting; in the case of htmlOnly the q= URL can already be bookmarked if can stop it before the redirect)
        if options.htmlonly_mode:
Silas S. Brown's avatar
Silas S. Brown committed
            htmlonly_mode = (force_htmlonly_mode or "pr" in self.request.arguments)
            current_setting = htmlmode_cookie_name+"=1" in ';'.join(self.request.headers.get_list("Cookie"))
            if not htmlonly_mode == current_setting:
                if htmlonly_mode: val="1"
                else: val="0"
                self.setCookie_with_dots(htmlmode_cookie_name+"="+val)
Silas S. Brown's avatar
Silas S. Brown committed
                # and also add it to self.request.headers,
                # for the benefit of htmlOnlyMode below
                # which sees the same request
                # (TODO: delete old setting? but it's
                # usually used only by redir)
                self.request.headers.add("Cookie",htmlmode_cookie_name+"="+val)
Silas S. Brown's avatar
Silas S. Brown committed
    def htmlOnlyMode(self,isProxyRequest=False):
Silas S. Brown's avatar
Silas S. Brown committed
        # order is important here
Silas S. Brown's avatar
Silas S. Brown committed
        if not options.htmlonly_mode: return False
Silas S. Brown's avatar
Silas S. Brown committed
        elif self.isPjsUpstream or self.isSslUpstream:
            return False
        elif self.auto_htmlOnlyMode(isProxyRequest):
            return True
        elif isProxyRequest: return False
        elif force_htmlonly_mode: return True
        elif hasattr(self.request,"old_cookie"): ck = self.request.old_cookie # so this can be called between change_request_headers and restore_request_headers, e.g. at the start of send_request for js_interpreter mode
Silas S. Brown's avatar
Silas S. Brown committed
        else: ck = ';'.join(self.request.headers.get_list("Cookie"))
Silas S. Brown's avatar
Silas S. Brown committed
        return htmlmode_cookie_name+"=1" in ck
Silas S. Brown's avatar
Silas S. Brown committed
    def auto_htmlOnlyMode(self,isProxyRequest): return options.js_interpreter and (isProxyRequest or (not options.wildcard_dns and not can_do_cookie_host()))
    def handle_URLbox_query(self,v):
        self.set_htmlonly_cookie()
        v = B(v)
        if not (v.startswith(B("http://")) or v.startswith(B("https://"))):
            if B(' ') in v or not B('.') in v: v=getSearchURL(v)
            else: v=B("http://")+v
Silas S. Brown's avatar
Silas S. Brown committed
        if not options.wildcard_dns: # need to use cookie_host
            j = i = v.index(B('/'))+2 # after the http:// or https://
            while j<len(v) and v[j] in B(letters+digits+'.-'): j += 1
Silas S. Brown's avatar
Silas S. Brown committed
            wanted_host = v[i:j]
            if v[i-4:i-3]==B('s'): wanted_host += B(".0") # HTTPS hack (see protocolAndHost)
Silas S. Brown's avatar
Silas S. Brown committed
            ch = self.cookie_host(checkURL=False) # current cookie hostname
            if B(convert_to_requested_host(wanted_host,ch))==B(wanted_host):
                debuglog("Need to change cookie_host to get "+repr(wanted_host))
                if enable_adjustDomainCookieName_URL_override:
                    # do it by URL so they can bookmark it (that is if it doesn't immediately redirect)
                    # (TODO: option to also include the password in this link so it can be passed it around?  and also in the 'back to URL box' link?  but it would be inconsistent because not all links can do that, unless we consistently 302-redirect everything so that they do, but that would reduce the efficiency of the browser's HTTP fetches.  Anyway under normal circumstances we probably won't want users accidentally spreading include-password URLs)
                    v = addArgument(v,adjust_domain_cookieName+'='+quote(wanted_host))
                else: self.add_header("Set-Cookie",adjust_domain_cookieName+"="+quote(wanted_host)+"; Path=/; Expires="+cookieExpires) # (DON'T do this unconditionally, convert_to_requested_host above might see we already have another fixed domain for it)
                # (TODO: if convert_to_requested_host somehow returns a *different* non-default_site domain, that cookie will be lost.  Might need to enforce max 1 non-default_site domain.)
Silas S. Brown's avatar
Silas S. Brown committed
            else: wanted_host = ch
        else: wanted_host=None # not needed if wildcard_dns
Silas S. Brown's avatar
Silas S. Brown committed
        self.redirect(domain_process(v,wanted_host,True))
Silas S. Brown's avatar
Silas S. Brown committed
    def forwardToOtherPid(self):
        if not (options.ssl_fork and self.WA_UseSSL): return
        # We're handling SSL in a separate PID, so we have to
        # forward the request back to the original PID in
        # case it needs to do things with webdrivers etc.
Silas S. Brown's avatar
Silas S. Brown committed
        self.request.headers["X-From-Adjuster-Ssl-Helper"] = "1"
Silas S. Brown's avatar
Silas S. Brown committed
        self.forwardFor("127.0.0.1:%d" % (port_randomise.get(self.WA_origPort,self.WA_origPort)),"SSL helper:"+str(port_randomise.get(self.WA_connectPort,self.WA_connectPort)))
Silas S. Brown's avatar
Silas S. Brown committed
        return True
    def handleFullLocation(self):
        # HTTP 1.1 spec says ANY request can be of form http://...., not just a proxy request.  The differentiation of proxy/not-proxy depends on what host is requested.  So rewrite all http://... requests to HTTP1.0-style host+uri requests.
Silas S. Brown's avatar
Silas S. Brown committed
        if options.ssl_fork and self.request.headers.get("X-From-Adjuster-Ssl-Helper",""):
            debuglog("Setting isFromSslHelper"+self.debugExtras())
            self.request.connection.stream.isFromSslHelper = True # it doesn't matter if some browser spoofs that header: it'll mean they'll get .0 asked for; however we could check the remote IP is localhost if doing anything more complex with it
            del self.request.headers["X-From-Adjuster-Ssl-Helper"] # don't pass it to upstream servers
        if B(self.request.uri).startswith(B("http://")):
            self.request.original_uri = self.request.uri
            parsed = urlparse.urlparse(S(self.request.uri))
            self.request.host = self.request.headers["Host"] = parsed.netloc
            self.request.uri = urlparse.urlunparse(("","")+parsed[2:])
            if not self.request.uri: self.request.uri="/"
        elif not B(self.request.uri).startswith(B("/")): # invalid
            self.set_status(400) ; self.myfinish() ; return True
        if self.WA_UseSSL or (hasattr(self.request,"connection") and hasattr(self.request.connection,"stream") and hasattr(self.request.connection.stream,"isFromSslHelper")): # we're the SSL helper on port+1 and we've been CONNECT'd to, or we're on port+0 and forked SSL helper has forwarded it to us, so the host asked for must be a .0 host for https
            if self.request.host and not B(self.request.host).endswith(B(".0")): self.request.host = S(self.request.host)+".0"
Silas S. Brown's avatar
Silas S. Brown committed
    def handleSSHTunnel(self):
        if not B(allowConnectURL)==B("http://")+B(self.request.host)+B(self.request.uri): return
Silas S. Brown's avatar
Silas S. Brown committed
        self.thin_down_headers() ; self.add_header("Pragma","no-cache") # hopefully "Pragma: no-cache" is enough and we don't need all of self.add_nocache_headers
        global the_ssh_tunnel # TODO: support more than one SSH tunnel? (but will need to use session IDs etc; GNU httptunnel does just 1 tunnel as of 3.x so maybe we're OK)
Silas S. Brown's avatar
Silas S. Brown committed
        try:
            if B(self.request.body)==B("new connection"):
                self.request.body = B("")
Silas S. Brown's avatar
Silas S. Brown committed
                the_ssh_tunnel[1].append(None) # if exists
            if None in the_ssh_tunnel[1]:
                try: the_ssh_tunnel[0].close()
                except: pass
                raise NameError # as though the_ssh_tunnel didn't yet exist
        except NameError: # not yet established
            sessionID = time.time() # for now
            the_ssh_tunnel = [tornado.iostream.IOStream(socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)),[],sessionID] # upstream connection, data waiting for client, id
            def add(data):
                if sessionID==the_ssh_tunnel[2]:
                    the_ssh_tunnel[1].append(data)
            doCallback(self,the_ssh_tunnel[0].connect,lambda *args:readUntilClose(the_ssh_tunnel[0],lambda data:(add(data),add(None)),add),(allowConnectHost, int(allowConnectPort)))
Silas S. Brown's avatar
Silas S. Brown committed
            # TODO: log the fact we're starting a tunnel?
        if self.request.body: the_ssh_tunnel[0].write(self.request.body) # TODO: will this work even when it's not yet established? (not a big problem on SSH because server always speaks first)
        def check_ssh_response(startTime,sessionID):
            if not the_ssh_tunnel[2]==sessionID: return self.myfinish()
            if the_ssh_tunnel[1]==[] and not time.time()>startTime+3: return IOLoopInstance().add_timeout(time.time()+0.2,lambda *args:check_ssh_response(startTime,sessionID)) # keep waiting (up to max 3sec - not too long because if client issues a 'read on timeout' while the SSH layer above is waiting for user input then we still want it to be reasonably responsive to that input; it's the client side that should wait longer between polls)
Silas S. Brown's avatar
Silas S. Brown committed
            if None in the_ssh_tunnel[1]:
                self.write(B('').join(the_ssh_tunnel[1][:-1]))
Silas S. Brown's avatar
Silas S. Brown committed
                the_ssh_tunnel[1] = [None]
            else:
                self.write(B('').join(the_ssh_tunnel[1]))
Silas S. Brown's avatar
Silas S. Brown committed
                the_ssh_tunnel[1] = []
            self.myfinish()
        IOLoopInstance().add_timeout(time.time()+0.2,lambda *args:check_ssh_response(time.time(),the_ssh_tunnel[2]))
Silas S. Brown's avatar
Silas S. Brown committed
        return True

    def handleSpecificIPs(self):
        if not ipMatchingFunc: return False
        msg = ipMatchingFunc(self.request.remote_ip)
        if not msg: return False
        if B(msg).startswith(B('*')): # a block
            self.write(B(htmlhead("Blocked"))+B(msg)[1:]+B("</body></html>")) ; self.myfinish() ; return True
        if B(self.request.uri) in [B("/robots.txt"),B("/favicon.ico")]: return False
        cookies = ';'.join(self.request.headers.get_list("Cookie"))
        if B(msg).startswith(B('-')): # minor edit
            msg = B(msg)[1:]
            if seen_ipMessage_cookieName+"=" in cookies:
                # seen ANY message before (not just this)
                return False
        val = cookieHash(msg)
        if seen_ipMessage_cookieName+"="+val in cookies:
            # seen THIS message before
            return False
        hs = self.cookieHostToSet()
        self.add_nocache_headers()
        if self.canWriteBody(): self.write(B(htmlhead("Message"))+B(msg)+(B("<p><form><label><input type=\"checkbox\" name=\"gotit\">Don't show this message again</label><br><input type=\"submit\" value=\"Continue\" onClick=\"var a='%s=%s;domain=',b=(document.forms[0].gotit.checked?'expires=%s;':'')+'path=/',h='%s;';document.cookie=a+'.'+h+b;document.cookie=a+h+b;location.reload(true);return false\"></body></html>" % (seen_ipMessage_cookieName,val,cookieExpires,hs))))
        logging.info("ip_messages: done "+S(self.request.remote_ip))
Silas S. Brown's avatar
Silas S. Brown committed
    def handleGoAway(self,realHost,maybeRobots):
        if not options.renderOmitGoAway: return False
        browser = self.checkBrowser(options.renderOmit)
        if not browser: return False
Silas S. Brown's avatar
Silas S. Brown committed
        if maybeRobots:
            self.serveRobots() # regardless of which browser header it presents
            return True # do NOT shorten this by making serveRobots return True: it must return None due to other uses
        # TODO: option to redirect immediately without this message?  (but then we'd be supplying a general redirection service, which might have issues of its own)
Silas S. Brown's avatar
Silas S. Brown committed
        if realHost:
            msg = ' and <a rel="noreferrer" href="%s%s">go directly to the original site</a>' % (S(protocolWithHost(realHost)),S(self.request.uri))
Silas S. Brown's avatar
Silas S. Brown committed
            self.request_no_external_referer()
        if self.canWriteBody(): self.write(B("%s<h1>You don't need this!</h1>This installation of Web Adjuster has been set up to change certain characters into pictures, for people using old computers that don't know how to display them themselves. However, <em>you</em> seem to be using %s, which is <noscript>either </noscript>definitely capable of showing these characters by itself<noscript>, or else wouldn't be able to show the pictures anyway<!-- like Lynx --></noscript>. Please save our bandwidth for those who really need it%s. Thank you.</body></html>" % (htmlhead(),S(browser),msg)))
        self.myfinish() ; return True

    def needCssCookies(self):
        h = options.headAppendCSS
        if not h or not '%s' in h: return False
        for ckCount in range(len(h.split(';'))-1):
            if not self.getCookie("adjustCss" + str(ckCount) + "s", ""):
Silas S. Brown's avatar
Silas S. Brown committed
                # Looks like we need to redirect back to the main page to get a CSS selection.  But just double-check it doesn't look like an XMLHttpRequest, which doesn't always send the cookies:
                if any(h in S(self.request.headers.get("Referer","")) for h in options.host_suffix.split("/")):
                    accept = S(self.request.headers.get("Accept",""))
                    if "application/json" in accept or len(accept.split(","))==2:
                        return False
                return True
        return False
    def cssAndAttrsToAdd(self):
        h = options.headAppendCSS ; cha = options.cssHtmlAttrs
        if not h or not '%s' in h: return h, cha
        h,opts = h.split(';',1)
        opts=opts.split(';')
        ckCount = N = 0
        for o in opts:
            chosen = self.getCookie("adjustCss" + str(ckCount) + "s", "")
Silas S. Brown's avatar
Silas S. Brown committed
            if not chosen:
                # we don't have all the necessary cookies to choose a stylesheet, so don't have one (TODO: or do we just want to go to the first listed?)
                if cha and ';' in cha: return "", ""
                else: return "", cha
            poss_vals = [re.sub('=.*','',x) for x in o.split(',')]
            if '' in poss_vals: poss_vals[poss_vals.index('')]='-'
            if not chosen in poss_vals: chosen = re.sub('=.*','',o.split(',',1)[0]) # make sure it's an existing option, to protect against cross-site-scripting injection of CSS (as some browsers accept JS in CSS)
            N = poss_vals.index(chosen)
            if chosen=="-": chosen = "" # TODO: document in headAppendCSS that we use '-' as a placeholder because we need non-empty values in cookies etc
            h=h.replace('%s',chosen,1)
            ckCount += 1
        if cha and ';' in cha: return h, options.cssHtmlAttrs.split(';')[N]
        else: return h, cha
    def cssOptionsHtml(self):
        h = options.headAppendCSS
        if not h or not '%s' in h: return ""
        h,opts = h.split(';',1)
        opts=opts.split(';')
        ckCount = 0
        r = ["<p>Style:"]
        for o in opts:
            ckName = "adjustCss" + str(ckCount) + "s"
            r.append(' <select name="%s">' % ckName)
            chosen = self.getCookie(ckName, "")
            for val in o.split(','):
                if '=' in val: val,desc = val.split('=',1)
                else: desc = val
                if val=="": val = "-" # TODO: document in headAppendCSS that we use '-' as a placeholder because we need non-empty values in cookies etc
                if val==chosen: sel = " selected"
                else: sel = ""
                r.append('<option value="%s"%s>%s</option>' % (val,sel,desc))
            ckCount += 1
            r.append('</select>')
        return ''.join(r)+' <input type="submit" name="try" value="Try"></p>'
    def set_css_from_urlbox(self):
        h = options.headAppendCSS
        if not h or not '%s' in h: return
        h,opts = h.split(';',1)
        opts=opts.split(';')
        ckCount = 0
        for o in opts:
            ckName = "adjustCss" + str(ckCount) + "s"
            ckVal = self.getArg(ckName)
            if ckVal:
                self.setCookie_with_dots(ckName+"="+ckVal) # TODO: do we ever need to quote() ckVal ?  (document to be careful when configuring?)
                self.setCookie(ckName,ckVal) # pretend it was already set on THIS request as well (for 'Try' button; URL should be OK as it redirects)
            ckCount += 1
    
    def serve_URLbox(self):
        if not options.wildcard_dns: self.clearUnrecognisedCookies() # TODO: optional?
        self.addCookieFromURL()
Silas S. Brown's avatar
Silas S. Brown committed
        r = urlbox_html(self.htmlOnlyMode() or self.checkBrowser(["Lynx/"]),self.cssOptionsHtml(),self.getArg("q") or self.getArg("d"))
        self.doResponse2(r,True,False) # TODO: run htmlFilter on it also? (render etc will be done by doResponse2)
Silas S. Brown's avatar
Silas S. Brown committed
    def serve_hostError(self):
        l = []
        if options.wildcard_dns: l.append("prefixing its domain with the one you want to adjust")
        if options.real_proxy: l.append("setting it as a <b>proxy</b>")
        if l: err="This adjuster can be used only by "+", or ".join(l)+"."
        else: err="This adjuster cannot be used. Check the configuration."
        self.doResponse2(htmlhead()+err+'</body></html>',True,False) # TODO: run htmlFilter on it also? (render etc will be done by doResponse2)

        ua = S(self.request.headers.get("User-Agent",""))
        if any(re.search(x,ua) for x in options.prohibitUA): return self.serveRobots()
        uri = S(self.request.uri)[len(options.mailtoPath):].replace('%%+','%') # we encode % as %%+ to stop browsers and transcoders from arbitrarily decoding e.g. %26 to &
        if '?' in uri:
            addr,rest = uri.split('?',1)
            self.request.arguments = urlparse.parse_qs(rest) # after the above decoding of %'s
        body = self.getArg("body")
        subj = self.getArg("subject")
        r = [] ; smsLink = ""
        if addr: r.append("To: "+ampEncode(addr))
        if subj: r.append("Subject: "+ampEncode(subj))
        if body:
            r.append("Body: "+ampEncode(body))
            if self.checkBrowser(options.mailtoSMS):
                if subj and not body.startswith(subj): smsLink = subj+" "+body
                else: smsLink = body
                if '&' in smsLink:
                    smsLink="[Before sending this text, replace -amp- with an ampersand. This substitution has been done in case your phone isn't compliant with RFC 5724.] "+smsLink.replace('&',' -amp- ')
                    # RFC 5724 shows we ought to get away with ampersands encoded as %26, but on Windows Mobile (Opera or IE) we don't; the SMS is truncated at that point.  TODO: whitelist some other platforms? (test with <a href="sms:?body=test1%26test2">this</a>)
                if self.checkBrowser(["iPhone OS 4","iPhone OS 5","iPhone OS 6","iPhone OS 7"]): sep = ';'
                elif self.checkBrowser(["iPhone OS 8","iPhone OS 9"]): sep = '&'
                else: sep = '?'
                smsLink = B('<br><a href="sms:'+sep+'body=')+quote(rm_u8punc(B(smsLink)))+B('">Send as SMS (text message)</a>')
Silas S. Brown's avatar
Silas S. Brown committed
                if self.checkBrowser(["Windows Mobile"]):
                    # TODO: others? configurable?
                    # browsers that may also have this problem with EMAIL
                    uri = uri.replace("%26","%20-amp-%20")
                    if not "body=" in uri: uri += "&body="
                    uri = uri.replace("body=","body=[Before%20sending%20this%20text,%20replace%20-amp-%20with%20an%20ampersand.%20This%20substitution%20has%20been%20done%20as%20your%20phone%20isn't%20compliant%20with%20RFC%205724.]%20")
        if len(r)==1: # different format if only 1 item is specified
            if addr: r=["The email will be sent to "+ampEncode(addr)]
            elif subj: r=["The email's Subject will be: "+ampEncode(subj)]
            else: r=["The email's Body will be: "+ampEncode(body)]
        elif not r: r.append("The link does not specify any recognised email details")
        else: r.insert(0,"The following information will be sent to the email client:")
        self.doResponse2(('%s<h3>mailto: link</h3>This link is meant to open an email client.<br>%s<br><a href=\"mailto:%s\">Open in email client</a> (if set up)%s%s<hr>This is %s</body></html>' % (htmlhead("mailto: link - Web Adjuster"),"<br>".join(r),uri,S(smsLink),backScript,serverName_html)),True,False)
        self.request.suppress_logger_host_convert = True
        if B(self.request.uri)==B("/favicon.ico") or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA):
Silas S. Brown's avatar
Silas S. Brown committed
            # avoid logging favicon.ico tracebacks when submitPath=="/"
            self.set_status(400) ; self.myfinish() ; return
        if len(self.request.uri) > len(options.submitPath):
            txt = S(self.request.uri[len(options.submitPath):])
            if len(txt)==2 and options.submitBookmarklet:
                filterNo = ord(txt[1])-ord('A')
                if txt[0] in 'bB': return self.serve_bookmarklet_code(txt[1],txt[0]=='B')
                elif txt[0]=='j': return self.serve_bookmarklet_json(filterNo)
Silas S. Brown's avatar
Silas S. Brown committed
                elif txt[0]=='u': return self.serve_backend_post(filterNo)
                elif txt[0] in 'iap':
                    return self.doResponse2(android_ios_instructions(txt[0],self.request.host,self.request.headers.get("User-Agent",""),filterNo),"noFilterOptions",False) # on Android and iOS, 'def bookmarklet' gives instruction_url#javascript:bookmarklet_code, so serve instructions here
            txt = zlib.decompressobj().decompress(base64.b64decode(txt),16834) # limit to 16k to avoid zip bombs (limit is also in the compress below)
            self.request.uri = "%s (input not logged, len=%d)" % (options.submitPath,len(txt))
        else: txt = self.request.arguments.get("i",None)
        if not txt:
            self.is_password_domain=True # no prominentNotice needed
            # In the markup below, body's height=100% is needed to ensure we can set a percentage height on the textarea consistently across many browsers (otherwise e.g. Safari 6 without user CSS might start making the textarea larger as soon as it contains input, overprinting the rest of the document)
Silas S. Brown's avatar
Silas S. Brown committed
            local_submit_url = "http://"+self.request.host+options.submitPath
Silas S. Brown's avatar
Silas S. Brown committed
            if options.submitBookmarkletDomain: submit_url = "//"+options.submitBookmarkletDomain+options.submitPath
Silas S. Brown's avatar
Silas S. Brown committed
            else: submit_url = local_submit_url
            if (options.password and submitPathIgnorePassword) or options.submitPath=='/' or defaultSite(): urlbox_footer = "" # not much point linking them back to the URL box under the first circumstance, and there isn't one for the other two
Silas S. Brown's avatar
Silas S. Brown committed
            else: urlbox_footer = '<p><a href="http://'+hostSuffix()+publicPortStr()+options.urlboxPath+'">Process a website</a></p>'
Silas S. Brown's avatar
Silas S. Brown committed
            # TODO: what if their browser doesn't submit in the correct charset?  for example some versions of Lynx need -display_charset=UTF-8 otherwise they might double-encode pasted-in UTF-8 and remove A0 bytes even though it appears to display correctly (and no, adding accept-charset won't help: that's for if the one to be accepted differs from the document's)
Silas S. Brown's avatar
Silas S. Brown committed
            return self.doResponse2(("""%s<body style="height:100%%;overflow:auto"><form method="post" action="%s"><h3>Upload Text</h3>%s:<p><span style="float:right"><input type="submit" value="Upload"><script><!--
document.write(' (Ctrl-Enter) | <a href="javascript:history.go(-1)">Back</a>')
//--></script></span><br><textarea name="i" style="width:100%%;clear:both;height:60%%" rows="5" cols="20" placeholder="Type or paste your text here"
onKeyDown="if((event.ctrlKey||event.metaKey) && (event.keyCode==13 || event.which==13)) document.forms[0].submit(); else return true;">
Silas S. Brown's avatar
Silas S. Brown committed
//--></script></body></html>""" % (htmlhead("Upload Text - Web Adjuster").replace("<body>",""),options.submitPath,options.submitPrompt,bookmarklet(submit_url,local_submit_url)+urlbox_footer)),"noFilterOptions",False)
        if type(txt) == list: # came from the POST form
            txt = txt[0].strip()
            # On at least some browsers (e.g. some Safari versions), clicking one of our JS reload links after the POST text has been shown will reload the form (instead of re-submitting the POST text) and can scroll to an awkward position whether the code below calls focus() or not.  Could at least translate to GET if it's short enough (don't want to start storing things on the adjuster machine - that would require a shared database if load-balancing)
            if len(txt) <= 16384: # (else we wouldn't decompress all; see comment above)
                enc = base64.b64encode(zlib.compress(B(txt),9))
                if 0 < len(enc) < 2000: return self.redirect(B("http://")+B(hostSuffix())+B(publicPortStr())+B(options.submitPath)+B(enc),303) # POST to GET

        # pretend it was served by a remote site; go through everything including filters (TODO: could bypass most of doResponse instead of rigging it up like this; alternatively keep this as it shows how to feed data to doResponse)
        self.connection_header = None
        self.urlToFetch = "" # for js_process
        class H:
            def get(self,h,d):
                if h=="Content-Type": return "text/html; charset=utf-8"
                else: return d
            def get_all(self): return [("Content-Type","text/html; charset=utf-8")]
Silas S. Brown's avatar
Silas S. Brown committed
        if options.htmlUrl: line1 = "about:submitted\n"
        else: line1 = ""
        runFilterOnText(self,self.getHtmlFilter(),find_text_in_HTML(B(htmlhead("Uploaded Text - Web Adjuster"))+B("<h3>Your text</h3>")+B(txt2html(txt))+B("<hr>This is %s. %s</body></html>" % (serverName_html,backScriptNoBr))),lambda out,err:self.doResponse2(out,True,False),prefix=line1) # backScriptNoBr AFTER the server notice to save vertical space
    def serve_bookmarklet_code(self,xtra,forceSameWindow): # (forceSameWindow is used by the "plus" bookmarklets)
        self.add_header("Content-Type","application/javascript")
        self.add_header("Access-Control-Allow-Origin","*")
Silas S. Brown's avatar
Silas S. Brown committed
        if options.submitBookmarkletDomain: submit = "//"+options.submitBookmarkletDomain
        else: submit = "http://"+self.request.host
        if self.canWriteBody(): self.write(B(bookmarkletMainScript(submit+options.submitPath+'j'+xtra,forceSameWindow)))
        self.myfinish()
    def serve_err(self,err):
        self.set_status(500)
        self.add_header("Content-Type","text/plain")
        logging.error("Bookmarklet error: "+S(err))
Silas S. Brown's avatar
Silas S. Brown committed
        # +' '+repr(self.request.body)
        if self.canWriteBody(): self.write(B(err))
Silas S. Brown's avatar
Silas S. Brown committed
        self.myfinish()
Silas S. Brown's avatar
Silas S. Brown committed
    def serve429(self,retrySecs=0):
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("serve429"+self.debugExtras())
Silas S. Brown's avatar
Silas S. Brown committed
        try: self.set_status(429,"Too many requests")
        except: self.set_status(429)
        if retrySecs: self.add_header("Retry-After",str(retrySecs))
        if self.canWriteBody(): self.write(B("Too many requests (HTTP 429)"))
Silas S. Brown's avatar
Silas S. Brown committed
        if not self.request.remote_ip in options.ipNoLog:
Silas S. Brown's avatar
Silas S. Brown committed
            except: f = ""
            logging.error("Returning HTTP 429 (too many requests)"+f+" to "+S(self.request.remote_ip))
Silas S. Brown's avatar
Silas S. Brown committed
        self.request.suppress_logging = True
        self.myfinish()
    def serve_bookmarklet_json(self,filterNo):
        self.add_header("Access-Control-Allow-Origin","*")
        self.add_header("Access-Control-Allow-Headers","Content-Type")
        if not self.request.body:
            self.add_header("Content-Type","text/plain")
            self.add_header("Allow","POST") # some browsers send OPTIONS first before POSTing via XMLHttpRequest (TODO: check if OPTIONS really is the request method before sending this?)
            if self.canWriteBody(): self.write(B("OK"))
Silas S. Brown's avatar
Silas S. Brown committed
            return self.myfinish()
        try: l = json.loads(self.request.body)
        except: return self.serve_err("Bad JSON")
Silas S. Brown's avatar
Silas S. Brown committed
        for i in xrange(len(l)):
            if l[i]=='': l[i] = u'' # shouldn't get this (TODO: fix in bookmarkletMainScript? e.g. if submitBookmarkletFilterJS can match empty strings, or conversion to 'cnv' makes it empty, anything else?), but if we do, don't let it trip up the 'wrong data structure' below on Python 2
Silas S. Brown's avatar
Silas S. Brown committed
        if not (type(l)==list and all(((type(i)==unicode or (type(i)==str and all(ord(c)<0x80 for c in i))) and not chr(0) in i) for i in l)): return self.serve_err("Wrong data structure")
            codeTextList.append(B(chr(0)))
            if type(i)==bytes: codeTextList.append(i)
            else: codeTextList.append(i.encode('utf-8'))
        def callback(out,err):
            self.add_header("Content-Type","application/json")
            if self.canWriteBody(): self.write(B(json.dumps([i.decode('utf-8','replace') for i in B(out)[1:].split(B(chr(0)))]))) # 'replace' here because we don't want utf-8 errors to time-out the entire request (although hopefully the filter WON'T produce utf-8 errors...)
Silas S. Brown's avatar
Silas S. Brown committed
        if options.htmlUrl: line1 = "about:bookmarklet\n" # TODO: get the bookmarklet to report the location.href of the site (and update htmlUrl help text)
        else: line1 = ""
        runFilterOnText(self,self.getHtmlFilter(filterNo),codeTextList,callback,prefix=line1)
Silas S. Brown's avatar
Silas S. Brown committed
    def serve_backend_post(self,filterNo):
        # for another instance's htmlFilter=http://...uA etc
        runFilter(self.getHtmlFilter(filterNo),self.request.body,lambda out,err: (self.write(B(out)),self.finish()))
        # check for PDF/EPUB conversion on other threads or cached
Silas S. Brown's avatar
Silas S. Brown committed
        if not options.pdfepubkeep: return False # we don't guarantee to update kept_tempfiles properly if it's 0 (e.g. pdf can just pipe, so don't need unlinkOutputLater)
        ktkey = (self.request.host, self.request.uri)
        if ktkey in kept_tempfiles:
            def tryRead():
                try: txt=open(kept_tempfiles[ktkey],'rb').read() # ('rb' makes it give you a byte-string in Python 3)
Silas S. Brown's avatar
Silas S. Brown committed
                    if self.canWriteBody():
                        if newext==".mobi": self.write(txt)
                        else: self.write(remove_blanks_add_utf8_BOM(txt))
                elif not self.inProgress(): IOLoopInstance().add_timeout(time.time()+1,lambda *args:tryRead())
            tryRead() ; return True
        kept_tempfiles[ktkey] = 1 # conversion in progress
        return False

    def getArg(self,arg):
        a = self.request.arguments.get(arg,None)
        if type(a)==type([]): a=a[0]
        return a

Silas S. Brown's avatar
Silas S. Brown committed
    def debugExtras(self):
        r = " for "+self.request.method+" "+self.request.uri
        if not self.request.uri.startswith("http"):
            r += " host="+str(self.request.host)
        if self.WA_UseSSL or (hasattr(self.request,"connection") and hasattr(self.request.connection,"stream") and hasattr(self.request.connection.stream,"isFromSslHelper")): r += " WA_UseSSL"
        if self.isPjsUpstream: r += " isPjsUpstream instance "+str(self.WA_PjsIndex+self.WA_PjsStart)
Silas S. Brown's avatar
Silas S. Brown committed
        if self.isSslUpstream: r += " isSslUpstream"
        return r

    def canWriteBody(self): return not B(self.request.method) in [B("HEAD"),B("OPTIONS")] and not (hasattr(self,"_finished") and self._finished)
Silas S. Brown's avatar
Silas S. Brown committed

    def justMeCheck(self):
        # Ideally we should do this asynchronously, but as
        # it's only for the --just-me option and we assume a
        # local ident server, we can probably get away with:
Silas S. Brown's avatar
Silas S. Brown committed
        usr = None
Silas S. Brown's avatar
Silas S. Brown committed
        try:
            s = socket.socket()
            try: s.connect(('localhost',113))
            except:
                import pwd
                for l in getoutput("netstat -tpn").split("\n"):
                    l = l.split()
                    if len(l)>6 and l[3].endswith(":"+str(self.request.connection.stream.socket.getpeername()[1])) and l[5]=="ESTABLISHED" and "/" in l[6] and S(pwd.getpwuid(os.stat("/proc/"+l[6].split("/",1)[0]).st_uid).pw_name)==myUsername: return True
                logging.error("no ident server and couldn't confirm username with netstat: rejecting this connection")
                return
            s.send(B("%d, %d\r\n" % (self.request.connection.stream.socket.getpeername()[1], port_randomise.get(self.WA_port,self.WA_port))))
Silas S. Brown's avatar
Silas S. Brown committed
            usr = s.recv(1024).strip()
            if usr.split(B(':'))[-1]==B(myUsername): return True
Silas S. Brown's avatar
Silas S. Brown committed
            else: logging.error("ident server didn't confirm username: rejecting this connection")
        except Exception as e: logging.error("Trouble connecting to ident server (%s): rejecting this connection" % repr(e))
Silas S. Brown's avatar
Silas S. Brown committed
        self.set_status(401)
        if usr: self.write(B(usr+": "))
        self.write(B("Connection from wrong account (ident check failed)\n"))
Silas S. Brown's avatar
Silas S. Brown committed
        self.myfinish()

Silas S. Brown's avatar
Silas S. Brown committed
        if options.just_me and not self.justMeCheck(): return
Silas S. Brown's avatar
Silas S. Brown committed
        if mainServerPaused and not self.isPjsUpstream and not self.isSslUpstream: return self.serve429()
Silas S. Brown's avatar
Silas S. Brown committed
        self.doReq0()
    def doReq0(self):
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("doReq"+self.debugExtras()) # MUST keep this debuglog call: it also sets profileIdle=False
Silas S. Brown's avatar
Silas S. Brown committed
        try: reqsInFlight.add(id(self)) # for profile
Silas S. Brown's avatar
Silas S. Brown committed
        except: pass # e.g. not options.profile
Silas S. Brown's avatar
Silas S. Brown committed
        if not self.isPjsUpstream and not self.isSslUpstream:
            try: origReqInFlight.add(id(self))
Silas S. Brown's avatar
Silas S. Brown committed
            except: pass # e.g. not options.profile
            if options.one_request_only:
                IOLoopInstance().handle_callback_exception = lambda *args:0 # Tornado 4 raises EBADF in accept_handler if you call server.stop() from a request handler, so disable its handle_callback_exception to reduce log clutter (TODO: handle other errors using the original exception handler if present?)
Silas S. Brown's avatar
Silas S. Brown committed
                mainServer.stop()
        if wsgi_mode and B(self.request.path)==B(quote(os.environ.get("SCRIPT_NAME","")+os.environ.get("PATH_INFO",""))) and 'SCRIPT_URL' in os.environ:
Silas S. Brown's avatar
Silas S. Brown committed
            # workaround for Tornado 2.x limitation when used with CGI and htaccess redirects
            self.request.uri = os.environ['SCRIPT_URL']
            qs = os.environ.get("QUERY_STRING","")
            if not qs: qs = os.environ.get("REDIRECT_QUERY_STRING","")
            if qs:
                self.request.uri += "?"+qs
                self.request.arguments = urlparse.parse_qs(qs)
            self.request.path = self.request.uri
        elif sys.version_info[0]==2:
            # HTTP/1.x headers are officially Latin-1 (but usually ASCII), and Tornado versions 2 through 4 decodes the Latin-1 and re-encodes it as UTF-8.  This can cause confusion, so let's emulate modern browsers and %-encode any non-ASCII URIs:
            try: self.request.uri = self.request.uri.decode('utf-8').encode('latin1')
            except: pass
        self.request.uri=re.sub("[^!-~]+",lambda m:quote(m.group()),S(self.request.uri))
        self.request.method = S(self.request.method)
        if self.request.host:
            self.request.host = S(self.request.host)
        else: self.request.host = ""
Silas S. Brown's avatar
Silas S. Brown committed
        if self.request.method=="HEAD": self.set_header("Content-Length","-1") # we don't yet the content length, so Tornado please don't add it!  (NB this is for HEAD only, not OPTIONS, which should have Content-Length 0 or some browsers time out) (TODO: in non-WSGI mode could call .flush() after writing headers (with callback param), then Content-Length won't be added on .finish())
        if self.request.headers.get("User-Agent","")=="ping":
            if self.request.uri=="/ping2": return self.answerPing(True)
            elif self.request.uri=="/ping": return self.answerPing(False)
        elif options.loadBalancer and B(self.request.headers.get("User-Agent",""))==B("") and self.request.uri=="/": return self.answer_load_balancer()
Silas S. Brown's avatar
Silas S. Brown committed
        self.find_real_IP() # must find real ip BEFORE forwarding to fasterServer, because might also be behind nginx etc
Silas S. Brown's avatar
Silas S. Brown committed
            return self.forwardFor(options.fasterServer,"fasterServer")
Silas S. Brown's avatar
Silas S. Brown committed
        if self.forwardToOtherPid(): return
Silas S. Brown's avatar
Silas S. Brown committed
        if self.handleFullLocation(): return # if returns here, URL is invalid; if not, handleFullLocation has 'normalised' self.request.host and self.request.uri
Silas S. Brown's avatar
Silas S. Brown committed
        if self.isPjsUpstream:
Silas S. Brown's avatar
Silas S. Brown committed
            if options.js_UA:
                if options.js_UA.startswith("*"): self.request.headers["User-Agent"] = options.js_UA[1:]
            else: self.request.headers["User-Agent"] = webdriver_UA[self.WA_PjsIndex]
Silas S. Brown's avatar
Silas S. Brown committed
            webdriver_inProgress[self.WA_PjsIndex].add(self.request.uri)
        elif not self.isSslUpstream:
Silas S. Brown's avatar
Silas S. Brown committed
            if self.handleSSHTunnel(): return
            if self.handleSpecificIPs(): return
            # TODO: Slow down heavy users by self.request.remote_ip ?
            if extensions.handle("http://"+self.request.host+self.request.uri,self):
                self.request.suppress_logger_host_convert = self.request.valid_for_whois = True
                return self.myfinish()
            if ownServer_regexp and ownServer_regexp.match(self.request.host+self.request.uri):
Silas S. Brown's avatar
Silas S. Brown committed
                self.request.headers["Connection"] = "close" # MUST use 'Connection: Close' here, as keepalive can go wrong if it subsequently fetches a URL that DOESN'T match ownServer_regexp but comes from the same domain and this goes to ownServer incorrectly.  TODO mention it in the help text?, TODO might we occasionally need something similar for ownServer_if_not_root etc?, TODO at lower priority: if we can reasonably repeat the requests then do that insntead of using forwardFor
Silas S. Brown's avatar
Silas S. Brown committed
                return self.forwardFor(options.own_server)
            if cssReload_cookieSuffix and cssReload_cookieSuffix in self.request.uri:
                ruri,rest = self.request.uri.split(cssReload_cookieSuffix,1)
                self.setCookie_with_dots(rest)
                return self.redirect(ruri) # so can set another
        if self.isPjsUpstream or self.isSslUpstream: realHost = self.request.host
Silas S. Brown's avatar
Silas S. Brown committed
        else: realHost = convert_to_real_host(self.request.host,self.cookie_host(checkReal=False)) # don't need checkReal if return value will be passed to convert_to_real_host anyway
            return self.forwardFor(options.own_server)
            # (TODO: what if it's keep-alive and some browser figures out our other domains are on the same IP and tries to fetch them through the same connection?  is that supposed to be allowed?)
        elif realHost==0 and options.ownServer_if_not_root: realHost=options.own_server # asking by cookie to adjust the same host, so don't forwardFor() it but fetch it normally and adjust it
        if type(realHost)==bytes and not bytes==str:
            realHost = S(realHost)
        isProxyRequest = self.isPjsUpstream or self.isSslUpstream or (options.real_proxy and realHost == self.request.host)
        if not isProxyRequest and not self.isPjsUpstream and not self.isSslUpstream and (self.request.host=="localhost" or self.request.host.startswith("localhost:")) and not "localhost" in options.host_suffix: return self.redirect("http://"+hostSuffix(0)+publicPortStr()+self.request.uri) # save confusion later (e.g. set 'HTML-only mode' cookie on 'localhost' but then redirect to host_suffix and cookie is lost).  Bugfix 0.314: do not do this redirect if we're a real proxy for another server on localhost
        self.request.valid_for_whois = True # (if options.whois, don't whois unless it gets this far, e.g. don't whois any that didn't even match "/(.*)" etc)
Silas S. Brown's avatar
Silas S. Brown committed
        maybeRobots = (not self.isPjsUpstream and not self.isSslUpstream and not options.robots and self.request.uri=="/robots.txt")
        # don't actually serveRobots yet, because MIGHT want to pass it to own_server (see below)
        
        self.is_password_domain=False # needed by doResponse2
Silas S. Brown's avatar
Silas S. Brown committed
        if options.password and not options.real_proxy and not self.isPjsUpstream and not self.isSslUpstream:
          # whether or not open_proxy, because might still have password (perhaps on password_domain), anyway the doc for open_proxy says "allow running" not "run"
          # First ensure the wildcard part of the host is de-dotted, so the authentication cookie can be shared across hosts.
          # (This is not done if options.real_proxy because we don't want to touch the hostname for that)
          host = self.request.host
          if host:
            if host.endswith(":"+str(options.publicPort)): host=host[:-len(":"+str(options.publicPort))]
            for hs in options.host_suffix.split("/"):
              ohs = "."+hs
              if host.endswith(ohs) and host.index(".")<len(host)-len(ohs):
                if maybeRobots: return self.serveRobots()
                if options.publicPort==80: colPort=""
                else: colPort=":"+str(options.publicPort)
                return self.redirect("http://"+dedot(host[:-len(ohs)])+ohs+colPort+self.request.uri)
          # Now OK to check authentication:
          if not self.authenticates_ok(host) and not (submitPathIgnorePassword and self.request.uri.startswith(submitPathForTest)):
Silas S. Brown's avatar
Silas S. Brown committed
              self.request.suppress_logger_host_convert = True
                  if options.own_server: return self.forwardFor(options.own_server)
                  elif maybeRobots: return self.serveRobots()
                  else: options.auth_error = "auth_error set incorrectly (own_server not set)" # see auth_error help (TODO: is it really a good idea to say this HERE?)
              elif maybeRobots: return self.serveRobots()
Silas S. Brown's avatar
Silas S. Brown committed
              self.add_nocache_headers() # in case they try the exact same request again after authenticating (unlikely if they add &p=..., but they might come back to the other URL later, and refresh is particularly awkward if we redirect)
              if options.auth_error.startswith("http://") or options.auth_error.startswith("https://"): return self.redirect(options.auth_error)
              if options.auth_error.startswith("*"): auth_error = options.auth_error[1:]
              else:
                  self.set_status(401)
                  auth_error = options.auth_error
              if self.canWriteBody(): self.write(B(htmlhead("")+auth_error+"</body></html>"))
Silas S. Brown's avatar
Silas S. Brown committed
        fixServerHeader(self)
        if not self.isPjsUpstream and not self.isSslUpstream:
Silas S. Brown's avatar
Silas S. Brown committed
          if self.handleGoAway(realHost,maybeRobots): return
          # Now check if it's an image request:
          _olduri = self.request.uri
          self.request.uri=unquote(self.request.uri)
Silas S. Brown's avatar
Silas S. Brown committed
          img = Renderer.getImage(self.request.uri)
          if img: return self.serveImage(img)
          # Not an image:
          if options.mailtoPath and self.request.uri.startswith(options.mailtoPath): return self.serve_mailtoPage()
          if options.submitPath and self.request.uri.startswith(submitPathForTest): return self.serve_submitPage()
          self.request.uri = _olduri
Silas S. Brown's avatar
Silas S. Brown committed
        if realHost=="error" and not maybeRobots:
            return self.serve_hostError()
        if not realHost: # default_site(s) not set
            if options.own_server and options.ownServer_if_not_root and len(self.request.path)>1: return self.forwardFor(options.own_server)
Silas S. Brown's avatar
Silas S. Brown committed
            elif maybeRobots or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA): return self.serveRobots()
Silas S. Brown's avatar
Silas S. Brown committed
            if self.getArg("try"): return self.serve_URLbox() # we just set the stylesheet
Silas S. Brown's avatar
Silas S. Brown committed
            if options.submitPath and self.getArg("sPath"): return self.redirect("http://"+hostSuffix()+publicPortStr()+options.submitPath)
            if v: return self.handle_URLbox_query(v)
            else: return self.serve_URLbox()
        if maybeRobots: return self.serveRobots()
Silas S. Brown's avatar
Silas S. Brown committed
        viewSource = (not self.isPjsUpstream and not self.isSslUpstream) and self.checkViewsource()
        if not self.isPjsUpstream and not self.isSslUpstream and self.needCssCookies():
Silas S. Brown's avatar
Silas S. Brown committed
            self.add_nocache_headers() # please don't cache this redirect!  otherwise user might not be able to leave the URL box after:
            return self.redirect("http://"+hostSuffix()+publicPortStr()+options.urlboxPath+"?d="+quote(protocolWithHost(realHost)+self.request.uri),302) # go to the URL box - need to set more options (and 302 not 301, or some browsers could cache it despite the above)
        if not self.isPjsUpstream and not self.isSslUpstream: self.addCookieFromURL() # for cookie_host
        converterFlags = []
        for opt,suffix,ext,fmt in [
            (options.pdftotext,pdftotext_suffix,".pdf","pdf"),
            (options.epubtotext,epubtotext_suffix,".epub","epub"),
            (options.epubtozip,epubtozip_suffix,".epub","epub"),
            (options.askBitrate,mp3lofi_suffix,".mp3",None),
            ]:
            if opt and not self.isPjsUpstream and not self.isSslUpstream and self.request.uri.endswith(suffix) and (self.request.uri.lower()[:-len(suffix)].endswith(ext) or guessCMS(self.request.uri,fmt)):
                self.request.uri = self.request.uri[:-len(suffix)]
                converterFlags.append(True)
            else: converterFlags.append(False)
Silas S. Brown's avatar
Silas S. Brown committed
        if upstream_rewrite_ssl and not self.isSslUpstream and not (options.js_interpreter and not self.isPjsUpstream): protocol = "http://" # keep the .0 in and call protocolAndHost again on the isSslUpstream pass
        else: protocol,realHost = protocolAndHost(realHost)
        self.change_request_headers(realHost,isProxyRequest)
Silas S. Brown's avatar
Silas S. Brown committed
        self.urlToFetch = protocol+self.request.headers["Host"]+self.request.uri
Silas S. Brown's avatar
Silas S. Brown committed
        if not isProxyRequest and (any(re.search(x,self.urlToFetch) for x in options.prohibit) or any(re.search(x,self.request.headers.get("User-Agent","")) for x in options.prohibitUA)):
            self.restore_request_headers()
            return self.redirect(self.urlToFetch)
        # TODO: consider adding "not self.request.headers.get('If-Modified-Since','')" to the below list of sendHead() conditions, in case any referer-denying servers decide it's OK to send out "not modified" replies even to the wrong referer (which they arguably shouldn't, and seem not to as of 2013-09, but if they did then adjuster might erroneously redirect the SECOND time a browser displays the image)
        def ext(u):
            if '?' in u:
                e = ext(u[:u.index('?')])
                if e: return e
            if not '.' in u: return
            e = u[u.rindex('.')+1:].lower()
            if not (e=="mp3" and options.bitrate and not options.askBitrate): return e
        if options.redirectFiles and not (isProxyRequest or any(converterFlags) or viewSource) and ext(self.request.uri) in redirectFiles_Extensions: self.sendHead()
Silas S. Brown's avatar
Silas S. Brown committed
        elif self.isPjsUpstream and "text/html" in self.request.headers.get("Accept","") and not (any(converterFlags) or viewSource): self.sendHead(forPjs=True) # to check it's not a download link
        else: self.sendRequest(converterFlags,viewSource,isProxyRequest,follow_redirects=False) # (DON'T follow redirects - browser needs to know about them!)
    
    def change_request_headers(self,realHost,isProxyRequest):
        if options.default_cookies:
          for defaultCookie in options.default_cookies.split(';'):
            defaultCookie = defaultCookie.strip()
            if defaultCookie.startswith("(") and ")" in defaultCookie: # browser-specific
                if not defaultCookie[1:defaultCookie.index(")")] in self.request.headers.get("User-Agent",""): continue
                defaultCookie=defaultCookie[defaultCookie.index(")")+1:]
            # add if a cookie of that name is not already set
            dcName,dcValue=defaultCookie.strip().split('=',1)
            if not self.getCookie(dcName): self.request.headers.add("Cookie",defaultCookie)
        if self.request.headers.get_list("Cookie"):
            # some sites require them all in one header
            ck = "; ".join(self.request.headers.get_list("Cookie"))
            self.request.old_cookie = ck
            def ours(c): # don't forward our own cookies upstream (may confuse some sites, especially if a site uses Web Adjuster their end)
                c = c.strip()
                if not '=' in c: return 0
                c = c[:c.index('=')]
                return c in upstreamGuard or (c==adjust_domain_cookieName and self.cookie_host())
                def maketheirs(c):
                    for ck in upstreamGuard: c=c.replace(ck+"1",ck)
                    return c
                self.request.headers["Cookie"]=";".join(maketheirs(x) for x in ck.split(";") if not ours(x))
        for v in self.request.headers.get_list("Referer"):
Silas S. Brown's avatar
Silas S. Brown committed
                if not isProxyRequest: v = fixDNS(v,self)
                if enable_adjustDomainCookieName_URL_override: v = re.sub(B("[?&]"+re.escape(adjust_domain_cookieName)+"=[^&]*$"),B(""),B(v))
                if S(v) in ["","http://","http:///"]:
                    # it must have come from the URL box
                    del self.request.headers["Referer"]
                else: self.request.headers["Referer"] = S(v)
        for http in ["http://","http%3A%2F%2F"]: # xyz?q=http://... stuff
          if http in self.request.uri[1:]:
            u=self.request.uri.split(http)
Silas S. Brown's avatar
Silas S. Brown committed
            if not isProxyRequest:
                for i in range(1,len(u)):
Silas S. Brown's avatar
Silas S. Brown committed
            self.request.uri="".join(u)
Silas S. Brown's avatar
Silas S. Brown committed
        self.removed_headers = []
Silas S. Brown's avatar
Silas S. Brown committed
        for h in rmClientHeaders:
            l = self.request.headers.get_list(h)
            if l:
                del self.request.headers[h]
Silas S. Brown's avatar
Silas S. Brown committed
                self.removed_headers.append((h,l[0]))
        if options.via and not self.isSslUpstream:
            self.addToHeader("Via",v+" "+convert_to_via_host(self.request.host)+" ("+viaName+")")
            self.addToHeader("X-Forwarded-For",self.request.remote_ip)
        if options.uavia and not self.isSslUpstream: self.addToHeader("User-Agent","via "+convert_to_via_host(self.request.host)+" ("+viaName+")")
Silas S. Brown's avatar
Silas S. Brown committed
        if self.checkBrowser(options.cacheOmit):
            self.request.headers["Cache-Control"] = "max-age=0, must-revalidate"
            self.request.headers["Pragma"] = "no-cache"
    def restore_request_headers(self): # restore the ones Tornado might use (Connection etc)
Silas S. Brown's avatar
Silas S. Brown committed
        if not hasattr(self,"removed_headers"): return # haven't called change_request_headers (probably means this is user input)
        for k,v in self.removed_headers: self.request.headers[k]=v
        if hasattr(self.request,"old_cookie"): self.request.headers["Cookie"] = self.request.old_cookie # + put this back so we can refer to our own cookies
    
    def sendRequest(self,converterFlags,viewSource,isProxyRequest,follow_redirects):
Silas S. Brown's avatar
Silas S. Brown committed
        debuglog("sendRequest"+self.debugExtras())
Silas S. Brown's avatar
Silas S. Brown committed
        if self.isPjsUpstream and webdriver_prefetched[self.WA_PjsIndex]:
            debuglog("sendRequest returning webdriver_prefetched["+str(self.WA_PjsIndex)+"] ("+repr(webdriver_prefetched[self.WA_PjsIndex])+")"+self.debugExtras())
Silas S. Brown's avatar
Silas S. Brown committed
            r = webdriver_prefetched[self.WA_PjsIndex]
            webdriver_prefetched[self.WA_PjsIndex] = None
            return self.doResponse(r,converterFlags,viewSource,isProxyRequest)
        body = self.request.body
        if not body: body = None # required by some Tornado versions
        if self.isSslUpstream: ph,pp = None,None
        else: ph,pp = upstream_proxy_host,upstream_proxy_port
Silas S. Brown's avatar
Silas S. Brown committed
        if options.js_interpreter and self.htmlOnlyMode(isProxyRequest) and not follow_redirects and not self.request.uri in ["/favicon.ico","/robots.txt"] and self.canWriteBody():
Silas S. Brown's avatar
Silas S. Brown committed
            if options.via: via = self.request.headers["Via"],self.request.headers["X-Forwarded-For"]
            else: via = None # they might not be defined