adjuster.py

    sslRetries = []

def IOLoopInstance():
    global ioLoopInstance
    try: return ioLoopInstance
    except: # better call this from the main thread first:
        if hasattr(IOLoop,"current"): ioLoopInstance = IOLoop.current() # for Tornado 5+ to work
        else: ioLoopInstance = IOLoop.instance() # in Tornado 4 and older, this can be called on-demand from any thread, but we're putting it in a global for forward-compatibility with the above
        return ioLoopInstance

def startServers():
    workaround_tornado_fd_issue()
    for core,sList in list(theServers.items()):
        if core == "all" or core == coreNo:
            for port,s in sList: s.start()

#@file: overload.py
# --------------------------------------------------
# Multicore: pause/restart when a core is overloaded
# --------------------------------------------------

mainServerPaused = mainServerReallyPaused = False
def pauseOrRestartMainServer(shouldRun=True):
    if not (options.multicore and options.js_429): return
    global mainServerPaused
    if (not shouldRun) == mainServerPaused: return
    # if shouldRun: return # uncomment this 'once closed, stay closed' line to demonstrate the OS forwards to open cores only
    reallyPauseOrRestartMainServer(shouldRun)
    mainServerPaused = not mainServerPaused
    debuglog("Paused=%s on core %s" % (repr(mainServerPaused),repr(coreNo)))
    CrossProcess429.q.put((coreNo,mainServerPaused))
def reallyPauseOrRestartMainServer(shouldRun):
    global mainServerReallyPaused
    if shouldRun == "IfNotPaused": # called by CrossProcess429 to re-pause if and only if hasn't been reopened by the outer level in the meantime
        shouldRun = mainServerPaused
    if (not shouldRun) == mainServerReallyPaused: return
    for core,sList in theServers.items():
        if not (core == "all" or core == coreNo): continue
        for port,s in sList:
            if not port==options.port: continue
            if not hasattr(s,"_sockets"):
                logging.error("Cannot pause server: wrong Tornado version?")
                return
            if shouldRun: s.add_sockets(s._sockets.values())
            else:
                for fd, sock in s._sockets.items():
                    if hasattr(s,"io_loop"): s.io_loop.remove_handler(fd) # Tornado 4
                    else: IOLoopInstance().remove_handler(fd) # Tornado 5, not tested (TODO)
    mainServerReallyPaused = not mainServerReallyPaused
    debuglog("reallyPaused=%s on core %s" % (repr(mainServerReallyPaused),repr(coreNo)))

#@file: workarounds.py
# --------------------------------------------------
# Miscellaneous bug workarounds
# --------------------------------------------------

def workaround_raspbian7_IPv6_bug():
    """Old Debian 7 based versions of Raspbian can boot with IPv6 enabled but later fail to configure it, hence tornado/netutil.py's AI_ADDRCONFIG flag is ineffective and socket.socket raises "Address family not supported by protocol" when it tries to listen on IPv6.  If that happens, we'll need to set address="0.0.0.0" for IPv4 only.  However, if we tried IPv6 and got the error, then at that point Tornado's bind_sockets will likely have ALREADY bound an IPv4 socket but not returned it; the socket does NOT get closed on dealloc, so a retry would get "Address already in use" unless we quit and re-run the application (or somehow try to figure out the socket number so it can be closed).  Instead of that, let's try to detect the situation in advance so we can set options.address to IPv4-only the first time."""
    if options.address: return # don't need to do this if we're listening on a specific address
    flags = socket.AI_PASSIVE
    if hasattr(socket, "AI_ADDRCONFIG"): flags |= socket.AI_ADDRCONFIG
    for af,socktype,proto,r1,r2 in socket.getaddrinfo(None,options.port,socket.AF_UNSPEC,socket.SOCK_STREAM,0,flags):
        try: socket.socket(af,socktype,proto)
        except socket.error as e:
            if "family not supported" in e.strerror:
                options.address = "0.0.0.0" # use IPv4 only
                return

def workaround_timeWait_problem():
    """Work around listen-port failing to bind when there are still TIME_WAIT connections from the previous run.  This at least seems to work around the problem MOST of the time."""
    global bind_sockets
    bind_sockets = tornado.netutil.bind_sockets
    if "win" in sys.platform and not sys.platform=="darwin":
        # Don't do this on MS-Windows.  It can result in
        # 'stealing' a port from another server even while
        # that other server is still running.
        return
    if not hasattr(socket, "SO_REUSEPORT"): return
    if getargspec==None: return
    if not 'reuse_port' in getargspec(tornado.netutil.bind_sockets).args: return # Tornado version too old
    def bind_sockets(*args,**kwargs):
        if not args[0]: pass # wer're doing port_randomise
        elif len(args) < 6: kwargs['reuse_port'] = True
        else: args=tuple(args[:6])+(True,)
        return tornado.netutil.bind_sockets(*args,**kwargs)

def workaround_tornado_fd_issue(): # TODO: is this still needed post-v0.3 now we fixed start-order bug?
    if not hasattr(IOLoopInstance(),'handle_callback_exception'):
        return # Tornado 6 doesn't have this, let's hope it's not needed
    cxFunc = IOLoopInstance().handle_callback_exception
    def newCx(callback):
        if callback: return cxFunc(callback)
        # self._handlers[fd] raised KeyError.  This means
        # we don't want to keep being told about the fd.
        fr = sys.exc_info()[2]
        while fr.tb_next: fr = fr.tb_next
        fd = fr.tb_frame.f_locals.get("fd",None)
        if not fd: return cxFunc("callback="+repr(callback)+" and newCx couldn't get fd from stack")
        logging.info("IOLoop has no handler left for fd "+repr(fd)+" but is still getting events from it.  Attempting low-level close to avoid loop.")
        try: IOLoopInstance().remove_handler(fd)
        except: pass
        try: os.close(fd)
        except: pass
    IOLoopInstance().handle_callback_exception = newCx

def check_LXML():
    # Might not find ALL problems with lxml installations, but at least we can check some basics
    global etree
    try:
        from lxml import etree
        return etree.HTMLParser(target=None) # works on lxml 2.3.2
    except ImportError: sys.stderr.write("LXML library not found - ignoring useLXML option\n")
    except TypeError: sys.stderr.write("LXML library too old - ignoring useLXML option\n") # no target= option in 1.x
    options.useLXML = False

#@file: unix.py
# --------------------------------------------------
# More setup: Unix forking, privileges etc
# --------------------------------------------------

def dropPrivileges():
    if options.user and not os.getuid():
        # need to drop privileges
        import pwd ; pwd=pwd.getpwnam(options.user)
        os.setuid(pwd[2])
        # and help our external programs so they
        # don't try to load root's preferences etc
        os.environ['HOME'] = pwd[5]
        os.environ['USER']=os.environ['LOGNAME']=options.user

def unixfork():
    if os.fork(): sys.exit()
    os.setsid()
    if os.fork(): sys.exit()
    devnull = os.open("/dev/null", os.O_RDWR)
    for fd in range(3): os.dup2(devnull,fd) # commenting out this loop will let you see stderr after the fork (TODO debug option?)
    if options.pidfile:
        try: open(options.pidfile,"w").write(str(os.getpid()))
        except: pass

def notifyReady():
    try: import sdnotify # sudo pip install sdnotify
    except ImportError: return
    sdnotify.SystemdNotifier().notify("READY=1") # we send READY=1 so you can do an adjuster.service (w/out --background) with Type=notify and ExecStart=/usr/bin/python /path/to/adjuster.py --config=...
# TODO: also send "WATCHDOG=1" so can use WatchdogSec ? (but multicore / js_interpreter could be a problem)

#@file: curl-setup.py
# --------------------------------------------------
# cURL client setup
# --------------------------------------------------

def MyAsyncHTTPClient(): return AsyncHTTPClient()
def curlFinished(): pass
def setupCurl(maxCurls,error=None):
  global pycurl
  try:
    import pycurl # check it's there
    curl_async = pycurl.version_info()[4] & (1 << 7) # CURL_VERSION_ASYNCHDNS
    if not curl_async: curl_async = ('c-ares' in pycurl.version or 'threaded' in pycurl.version) # older
    if not curl_async:
        if error: warn("The libcurl on this system might hold up our main thread while it resolves DNS (try building curl with ./configure --enable-ares)")
        else:
            del pycurl ; return # TODO: and say 'not using'?
    if float('.'.join(pycurl.version.split()[0].split('/')[1].rsplit('.')[:2])) < 7.5:
        if error: warn("The curl on this system is old and might hang when fetching certain SSL sites") # strace -p (myPID) shows busy looping on poll (TODO: option to not use it if we're not using upstream_proxy)
        else:
            del pycurl ; return # TODO: as above
    _oldCurl = pycurl.Curl
    def _newCurl(*args,**kwargs):
        c = _oldCurl(*args,**kwargs)
        so = c.setopt
        def mySetopt(k,v):
            so(k,v)
            if k==pycurl.PROXY: so(pycurl.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_1_0) # workaround 599 "transfer closed with outstanding read data remaining" in Curl 7.55.1 with polipo2 as upstream proxy (TODO: curl-version dependent? 7.43.0 seemed OK in this aspect, although it had the above problem)
        c.setopt = mySetopt
        return c
    pycurl.Curl = _newCurl
    curl_max_clients = min(max(maxCurls,10),1000) # constrain curl_max_clients to between 10 and 1000 to work around Tornado issue 2127, and we'll warn about the issue ourselves if we go over:
    curl_inUse_clients = 0
    try: AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient",max_clients=curl_max_clients)
    except: AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") # will try in MyAsyncHTTPClient too (different versions of Tornado and all that...) (TODO: if that one also falls back to no max_clients, we might be reduced to 10 and should set curl_max_clients accordingly in order to get appropriate warning messages)
    def MyAsyncHTTPClient():
        try: problem = not len(AsyncHTTPClient()._free_list)
        except:
            global curl_inUse_clients
            curl_inUse_clients += 1
            problem = curl_inUse_clients >= curl_max_clients
        if problem:
            if upstream_rewrite_ssl and not options.ssl_fork: logging.error("curl_max_clients too low; AsyncHTTPClient will queue requests and COULD DEADLOCK due to upstream_rewrite_ssl (try --ssl-fork if you can't increase curl_max_clients)")
            else: logging.info("curl_max_clients too low; AsyncHTTPClient will queue requests")
        try: return AsyncHTTPClient(max_clients=curl_max_clients)
        except: return AsyncHTTPClient()
    def curlFinished(): # for callbacks to call
        global curl_inUse_clients
        curl_inUse_clients -= 1
        if curl_inUse_clients < 0:
            # This shouldn't happen.  But if it does, don't let the effect 'run away'.
            curl_inUse_clients = 0
  except: # fall back to the pure-Python one
      if error: errExit(error) # (unless it won't do)

try:
    import zlib
    enable_gzip = True # for fetching remote sites
except: # Windows?
    enable_gzip = False
    class zlib:
        def compress(self,s,level): return s
        def decompressobj():
            class o:
                def decompress(self,s,maxlen): return s
            return o()
    zlib = zlib()

#@file: message-user.py
# --------------------------------------------------
# Support for showing messages to specific IP addresses
# --------------------------------------------------

try:
    import hashlib # Python 2.5+, platforms?
    hashlib.md5
except: hashlib = None # (TODO: does this ever happen on a platform that supports Tornado?  Cygwin has hashlib with md5)
if hashlib: cookieHash = lambda msg: base64.b64encode(hashlib.md5(B(msg)).digest())[:10]
else: cookieHash = lambda msg: hex(hash(msg))[2:] # this fallback is not portable across different Python versions etc, so no good if you're running a fasterServer

ipv4_regexp = re.compile(r'^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)$')
def ipv4_to_int(ip):
    m = re.match(ipv4_regexp,S(ip))
    if m: return (int(m.group(1))<<24) | (int(m.group(2))<<16) | (int(m.group(3))<<8) | int(m.group(4))
    else: return None
def ipv4range_to_ints(ip):
    ip = S(ip)
    if '-' in ip: return tuple(ipv4_to_int(i) for i in ip.split('-'))
    elif '/' in ip:
        start,bits = ip.split('/')
        start = ipv4_to_int(start)
        return start, start | ~(-1 << (32-int(bits)))
    else: return ipv4_to_int(ip),ipv4_to_int(ip)
def ipv4ranges_func(ipRanges_and_results):
    isIP = True ; rangeList=None ; fList = []
    for field in S(ipRanges_and_results).split('|'):
        if isIP: rangeList = [ipv4range_to_ints(i) for i in field.split(',')]
        else: fList.append((rangeList,field))
        isIP = not isIP
    def f(ip):
        ipInt = ipv4_to_int(ip)
        for rl,result in fList:
            if any((l<=ipInt<=h) for l,h in rl):
                return result # else None
    return f

#@file: connect-ssl.py
# --------------------------------------------------
# Service routines for CONNECT passing to SSL terminator
# --------------------------------------------------

debug_connections = False
def myRepr(d):
    if re.search(B("[\x00-\x09\x0e-\x1f]"),B(d)): return "%d bytes" % len(d)
    elif len(d) >= 512: return repr(d[:500]+"...")
    else: return repr(d)
def peerName(socket):
    try: return socket.getpeername()
    except: return "(no socket??)"
def writeAndClose(stream,data):
    if data:
        if debug_connections: print ("Writing "+myRepr(data)+" to "+peerName(stream.socket)+" and closing it")
        try: stream.write(data,lambda *args:True)
        except: pass # ignore errors like client disconnected
    if not stream.closed():
        try: stream.close()
        except: pass
def writeOrError(opposite,name,stream,data):
    if debug_connections: print ("Writing "+myRepr(data)+" to "+peerName(stream.socket))
    try: stream.write(data)
    except:
        if name and not hasattr(stream,"writeOrError_already_complained"): logging.error("Error writing data to "+name)
        stream.writeOrError_already_complained = True
        try: stream.close()
        except: pass
        try: opposite.close() # (try to close the server stream we're reading if the client has gone away, and vice versa)
        except: pass

#@file: misc.py
# --------------------------------------------------
# Miscellaneous variables
# --------------------------------------------------

cookieExpires = "Tue Jan 19 03:14:07 2038" # TODO: S2G (may have to switch to Max-Age and drop support for ~IE8)

set_window_onerror = False # for debugging Javascript on some mobile browsers (TODO make this a config option? but will have to check which browsers do and don't support window.onerror)

# Domain-setting cookie for when we have no wildcard_dns and no default_site:
adjust_domain_cookieName = "_adjusterDN_"

adjust_domain_none = B("0") # not a valid top-level domain (TODO hopefully no user wants this as a local domain...)

enable_adjustDomainCookieName_URL_override = True # TODO: document this!  (Allow &_adjusterDN_=0 or &_adjusterDN_=wherever in bookmark URLs, so it doesn't matter what setting the cookie has when the bookmark is activated)

seen_ipMessage_cookieName = "_adjusterIPM_"

htmlmode_cookie_name = "_adjustZJCG_" # zap JS, CSS and Graphics

password_cookie_name = "_pxyAxsP_" # "proxy access password". have to pick something that's unlikely to collide with a site's cookie

webdriver_click_code = "._adjustPJSC_"

redirectFiles_Extensions=set("pdf epub mp3 aac zip gif png jpeg jpg exe tar tgz tbz ttf woff swf txt doc rtf midi mid wav ly c h py".split()) # TODO: make this list configurable + maybe add a "minimum content length before it's worth re-directing" option

#@file: js-webdriver.py
# --------------------------------------------------
# Server-side Javascript execution support
# --------------------------------------------------

class WebdriverWrapper:
    "Wrapper for webdriver that might or might not be in a separate process without shared memory"
    def __init__(self): self.theWebDriver = self.tmpDirToDelete = None
    def new(self,*args):
        try:
            # No coredump, for emergency_zap_pid_and_children
            import resource ; resource.setrlimit(resource.RLIMIT_CORE,(0,0))
        except: pass # oh well, have coredumps then :-(
        if options.js_multiprocess:
            # we have a whole process to ourselves (not just a thread)
            # so we can set the environment here.
            # Selenium doesn't always clean up temporary files on exit
            # (especially with Firefox), so let's set TMPDIR uniquely so we
            # can clean them ourselves.
            tmp = os.environ.get("TMPDIR",None)
            self.tempDirToDelete=os.environ['TMPDIR']=os.environ.get("TMPDIR","/tmp")+"/"+str(os.getpid())+"."+str(args[0])
            try: os.mkdir(self.tempDirToDelete)
            except: pass
        else: tmp = self.tempDirToDelete = None
        self.theWebDriver = get_new_webdriver(*args)
        if tmp: os.environ["TMPDIR"] = tmp
        elif options.js_multiprocess: del os.environ["TMPDIR"]
    def getTmp(self,*args): return self.tempDirToDelete
    def quit(self,*args):
        if not self.theWebDriver: return
        try: pid = self.theWebDriver.service.process.pid
        except: pid = debuglog("WebdriverWrapper: Unable to get self.theWebDriver.service.process.pid")
        try: self.theWebDriver.quit()
        except: debuglog("WebdriverWrapper: exception on quit") # e.g. sometimes get 'bad fd' in selenium's send_remote_shutdown_command _cookie_temp_file_handle
        # Try zapping the process ourselves anyway (even if theWebDriver.quit DIDN'T return error: seems it's sometimes still left around.  TODO: this could have unexpected consequences if the system's pid-reuse rate is excessively high.)
        self.theWebDriver = None
        emergency_zap_pid_and_children(pid)
        if self.tempDirToDelete: emergency_zap_tempdir(self.tempDirToDelete)
    def current_url(self):
        try: return self.theWebDriver.current_url
        except: return "" # PhantomJS Issue #13114: unconditional reload for now
    def get(self,url):
        self.theWebDriver.get(S(url))
        if options.logDebug:
          try:
            for e in self.theWebDriver.get_log('browser'):
                print ("webdriver log: "+e['message'])
          except Exception as e: print ("webdriver get_log exception: "+repr(e))
    def execute_script(self,script): self.theWebDriver.execute_script(S(script))
    def click_id(self,clickElementID): self.theWebDriver.find_element_by_id(S(clickElementID)).click()
    def click_xpath(self,xpath): self.theWebDriver.find_element_by_xpath(S(xpath)).click()
    def click_linkText(self,clickLinkText): self.theWebDriver.find_element_by_link_text(S(clickLinkText)).click()
    def getu8(self):
        def f(switchBack):
            src = self.theWebDriver.find_element_by_xpath("//*").get_attribute("outerHTML")
            if options.js_frames:
                for el in ['frame','iframe']:
                    for frame in self.theWebDriver.find_elements_by_tag_name(el):
                        self.theWebDriver.switch_to.frame(frame)
                        src += f(switchBack+[frame])
                        self.theWebDriver.switch_to.default_content()
                        for fr in switchBack: self.theWebDriver.switch_to.frame(fr)
            return src
        return B(f([]))
    def getpng(self):
        if options.js_interpreter=="HeadlessChrome": # resize not needed for PhantomJS (but PhantomJS is worse at font configuration and is no longer maintained)
            self.theWebDriver.set_window_size(js_size[0],min(16000,int(self.theWebDriver.execute_script("return document.body.parentNode.scrollHeight")))) # TODO: check the 16000: what is Selenium's limit? (confirmed over 8000)
            time.sleep(1)
        png = self.theWebDriver.get_screenshot_as_png()
        if options.js_interpreter=="HeadlessChrome": self.theWebDriver.set_window_size(*js_size)
        try: # can we optimise the screenshot image size?
            from PIL import Image
            s = BytesIO() ; Image.open(StringIO(png)).save(s,'png',optimize=True)
            png = s.getvalue()
        except: pass # just return non-optimized
        return png
def emergency_zap_pid_and_children(pid):
    if not pid: return
    try:
      for c in psutil.Process(pid).children(recursive=True):
        try: c.kill(9)
        except: pass
    except: pass # no psutil, or process already gone
    try: os.kill(pid,9),os.waitpid(pid, 0) # waitpid is necessary to clear it from the process table, but we should NOT use os.WNOHANG, as if we do, there's a race condition with the os.kill taking effect (even -9 isn't instant)
    except OSError: pass # maybe pid already gone
def emergency_zap_tempdir(d):
    try:
        for f in os.listdir(d): emergency_zap_tempdir(d+os.sep+f)
        os.rmdir(d)
    except: unlink(d)
try: from selenium.common.exceptions import TimeoutException
except: # no Selenium or wrong version
    class TimeoutException(Exception): pass # placeholder
class SeriousTimeoutException(Exception): pass
def webdriverWrapper_receiver(pipe,timeoutLock):
    "Command receiver for WebdriverWrapper for when it's running over IPC (--js-multiprocess).  Receives (command,args) and sends (return,exception), releasing the timeoutLock whenever it's ready to return."
    setProcName("adjusterWDhelp")
    CrossProcessLogging.initChild()
    try: w = WebdriverWrapper()
    except KeyboardInterrupt: return
    while True:
        try: cmd,args = pipe.recv()
        except KeyboardInterrupt: # all shutting down
            try: w.quit()
            except: pass
            try: timeoutLock.release()
            except ValueError: pass
            pipe.send(("INT","INT"))
            return pipe.close()
        if cmd=="EOF": return pipe.close()
        try: ret,exc = getattr(w,cmd)(*args), None
        except Exception as e:
            p = find_adjuster_in_traceback()
            if p: # see if we can add it to the message (note p will start with ", " so no need to add a space before it)
              try:
                  if hasattr(e,"msg") and e.msg: e.msg += p # should work with WebDriverException
                  elif type(e.args[0])==str: e.args=(repr(e.args[0])+p,) + tuple(e.args[1:]) # should work with things like httplib.BadStatusLine that are fussy about the number of arguments they get
                  else: e.args += (p,) # works with things like KeyError (although so should the above)
              except: e.message += p # works with base Exception
            ret,exc = None,e
        try: timeoutLock.release()
        except: pass # (may fail if controller's timeoutLock is turned off during quit_wd_atexit)
        try: pipe.send((ret,exc))
        except: pass # if they closed it, we'll get EOFError on next iteration
class WebdriverWrapperController:
    "Proxy for WebdriverWrapper if it's running over IPC"
    def __init__(self):
        self.pipe, cPipe = multiprocessing.Pipe()
        self.timeoutLock = multiprocessing.Lock()
        self.process = multiprocessing.Process(target=webdriverWrapper_receiver,args=(cPipe,self.timeoutLock))
        self.process.start()
    def send(self,cmd,args=()):
        "Send a command to a WebdriverWrapper over IPC, and either return its result or raise its exception in this process.  Also handle the raising of SeriousTimeoutException if needed, in which case the WebdriverWrapper should be stopped."
        try:
          if not self.timeoutLock.acquire(timeout=0):
            logging.error("REALLY serious SeriousTimeout (should never happen). Lock unavailable before sending command.")
            raise SeriousTimeoutException()
        except AttributeError: pass # self.timeoutLock==None because quit(final=True) called from another thread
        try: self.pipe.send((cmd,args))
        except IOError: return # already closed
        if cmd=="EOF":
            return self.pipe.close() # no return code
        try:
            if not self.timeoutLock.acquire(timeout=options.js_timeout2): # fallback in case Selenium timeout doesn't catch it (signal.alarm in the child process isn't guaranteed to help, so catch it here)
                try: logging.error("SeriousTimeout: WebdriverWrapper process took over "+str(options.js_timeout2)+"s to respond to "+repr((cmd,args))+". Emergency restarting this process.")
                except: pass # absolutely do not throw anything except SeriousTimeoutException from this branch
                raise SeriousTimeoutException()
            self.timeoutLock.release()
        except AttributeError: return # self.timeoutLock==None because quit(final=True) called from another thread
        ret,exc = self.pipe.recv()
        if ret==exc=="INT": return self.pipe.close()
        if exc: raise exc
        else: return ret
    def new(self,*args):
        self.send("new",args)
        self.tempDirToDelete=self.send("getTmp")
    def quit(self,final=False):
        if final: self.timeoutLock = None # quit_wd_atexit could plausibly run while another thread's still processing its last command, so allow these commands to be queued in the pipe from another thread without worrying about timeout when that happens
        self.send("quit")
        if final: self.send("EOF")
    def current_url(self): return self.send("current_url")
    def get(self,url): return self.send("get",(url,))
    def execute_script(self,script): self.send("execute_script",(script,))
    def click_id(self,clickElementID): self.send("click_id",(clickElementID,))
    def click_xpath(self,xpath): self.send("click_xpath",(xpath),)
    def click_linkText(self,clickLinkText): self.send("click_linkText",(clickLinkText,))
    def getu8(self): return self.send("getu8")
    def getpng(self): return self.send("getpng")
try: import multiprocessing # Python 2.6
except: multiprocessing = None
class WebdriverRunner:
    "Manage a WebdriverWrapperController (or a WebdriverWrapper if we're not using IPC) from a thread of the main process"
    def __init__(self,start=0,index=0):
        self.start,self.index = start,index
        if options.js_multiprocess:
            self.wrapper = WebdriverWrapperController()
        else: self.wrapper = WebdriverWrapper()
        self.renew_webdriver_newThread(True) # sets wd_threadStart
    def renew_controller(self): # SeriousTimeoutException
        emergency_zap_pid_and_children(self.wrapper.process.pid)
        emergency_zap_tempdir(self.wrapper.tempDirToDelete)
        self.wrapper = WebdriverWrapperController()
    def renew_webdriver_sameThread(self,firstTime=False):
        self.usageCount = 0 ; self.maybe_stuck = False
        while True:
          try:
              self.wrapper.quit(),self.wrapper.new(self.start+self.index,not firstTime)
              break
          except SeriousTimeoutException: # already logged
              self.renew_controller()
          except:
              logging.error("Exception "+exc_logStr()+" while renewing webdriver, retrying")
              time.sleep(1) # just in case
        self.usageCount = 0 ; self.maybe_stuck = False
    def renew_webdriver_newThread(self,firstTime=False):
        self.wd_threadStart = time.time() # cleared in _renew_wd after renew_webdriver_sameThread returns (it loops on exception)
        threading.Thread(target=_renew_wd,args=(self,firstTime)).start() ; return
    def quit_webdriver(self): self.wrapper.quit(final=True)
    def fetch(self,url,prefetched,clickElementID,clickLinkText,asScreenshot,callback,tooLate):
        assert not self.wd_threadStart # webdriver_checkServe
        self.wd_threadStart = time.time() # cleared in wd_fetch after _wd_fetch returns or throws + possible renew-loop (TODO: if wd_fetch ITSELF somehow throws an exception, should be logged but this JS instance gets tied up until next adjuster restart)
        self.maybe_stuck = False
        threading.Thread(target=wd_fetch,args=(url,prefetched,clickElementID,clickLinkText,asScreenshot,callback,self,tooLate)).start()
    def current_url(self): return self.wrapper.current_url()
    def get(self,url): return self.wrapper.get(url)
    def execute_script(self,script): self.wrapper.execute_script(script)
    def click_id(self,clickElementID): self.wrapper.click_id(clickElementID)
    def click_xpath(self,xpath): self.wrapper.click_xpath(xpath)
    def click_linkText(self,clickLinkText): self.wrapper.click_linkText(clickLinkText)
    def getu8(self): return self.wrapper.getu8()
    def getpng(self): return self.wrapper.getpng()
def _renew_wd(wd,firstTime):
    wd.renew_webdriver_sameThread(firstTime)
    wd.wd_threadStart = False
    IOLoopInstance().add_callback(webdriver_checkServe)
def find_adjuster_in_traceback():
    ei = sys.exc_info()
    try:
        p = ei[1].args[-1]
        if "adjuster line" in p: return p # for webdriverWrapper_receiver
    except: pass
    try: __file__
    except: return "" # sometimes not defined ??
    l = traceback.extract_tb(ei[2])
    for i in xrange(len(l)-1,-1,-1):
        if __file__ in l[i][0]: return ", adjuster line "+str(l[i][1])
    return ""
def wd_fetch(url,prefetched,clickElementID,clickLinkText,asScreenshot,callback,manager,tooLate):
    url = S(url)
    helper_threads.append('wd_fetch')
    need_restart = False
    def errHandle(error,extraMsg,prefetched):
        if not options.js_fallback: prefetched=None
        if prefetched: toRet = "non-webdriver page (js_fallback set)"
        else:
            toRet = "error"
            prefetched = wrapResponse("webdriver "+error)
        logging.error(extraMsg+" returning "+toRet)
        if options.js_fallback:
            try:
                prefetched.headers.add(options.js_fallback,error)
            except: logging.error("Could not add "+repr(options.js_fallback)+" to error response")
        return prefetched
    try:
        r = _wd_fetch(manager,url,prefetched,clickElementID,clickLinkText,asScreenshot)
        try:
            if options.js_fallback: r.headers.add(options.js_fallback,"OK")
        except: pass
    except TimeoutException:
        r = errHandle("timeout","webdriver "+str(manager.start+manager.index)+" timeout fetching "+url+find_adjuster_in_traceback()+"; no partial result, so",prefetched) # "webdriver timeout" sent to browser (can't include url here: domain gets rewritten)
    except SeriousTimeoutException:
        r = errHandle("serious timeout","lost communication with webdriver "+str(manager.start+manager.index)+" when fetching "+url+"; no partial result, so",prefetched)
        need_restart = "serious"
    except:
        if options.js_retry and not tooLate():
            logging.info("webdriver error fetching "+url+" ("+exc_logStr()+"); restarting webdriver "+str(manager.start+manager.index)+" for retry") # usually a BadStatusLine
            manager.renew_webdriver_sameThread()
            if tooLate(): r = errHandle("err","too late")
            else:
              try:
                  r = _wd_fetch(manager,url,prefetched,clickElementID,clickLinkText,asScreenshot)
                  try:
                      if options.js_fallback: r.headers.add(options.js_fallback,"OK")
                  except: pass
              except SeriousTimeoutException:
                r = errHandle("serious timeout","webdriver serious timeout on "+url+" after restart, so re-restarting and",prefetched)
                need_restart = "serious"
              except:
                r = errHandle("error","webdriver error on "+url+" even after restart, so re-restarting and",prefetched)
                need_restart = True
        else: # no retry
            r = errHandle("error","webdriver error on "+url+", so restarting and",prefetched)
            need_restart = True
    IOLoopInstance().add_callback(lambda *args:callback(r))
    manager.usageCount += 1
    if need_restart or (options.js_restartAfter and manager.usageCount >= options.js_restartAfter):
        if need_restart=="serious":manager.renew_controller()
        manager.renew_webdriver_sameThread()
    else: manager.finishTime = time.time()
    manager.wd_threadStart = manager.maybe_stuck = False
    IOLoopInstance().add_callback(webdriver_checkServe)
    helper_threads.remove('wd_fetch')
def exc_logStr():
    toLog = sys.exc_info()[:2]
    if hasattr(toLog[1],"msg") and toLog[1].msg: toLog=(toLog[0],toLog[1].msg) # for WebDriverException
    return repr(toLog)+find_adjuster_in_traceback()
def _wd_fetch(manager,url,prefetched,clickElementID,clickLinkText,asScreenshot): # single-user only! (and relies on being called only in htmlOnlyMode so leftover Javascript is removed and doesn't double-execute on JS-enabled browsers)
    import tornado.httputil ; url = S(url)
    currentUrl = S(manager.current_url())
    timed_out = False
    if prefetched or not re.sub('#.*','',currentUrl) == url:
        if prefetched:
            debuglog("webdriver %d get about:blank" % (manager.start+manager.index))
            manager.get("about:blank") # ensure no race condition with current page's XMLHttpRequests
            webdriver_prefetched[manager.index] = prefetched
        webdriver_inProgress[manager.index].clear() # race condition with start of next 'get' if we haven't done about:blank, but worst case is we'll wait a bit too long for page to finish
        debuglog(("webdriver %d get " % (manager.start+manager.index))+url)
        try: manager.get(url) # waits for onload
        except TimeoutException:
            # we might have got SOMEthing (e.g. on a page bringing in hundreds of scripts from a slow server, but still running some of them before the timeout)
            # May also be "Received error page"
            if currentUrl == S(manager.current_url()):
                debuglog(("webdriver %d get() timeout " % (manager.start+manager.index))+url+" - URL unchanged at "+currentUrl)
                raise # treat as "no partial result"
            debuglog(("webdriver %d get() timeout " % (manager.start+manager.index))+url+" - extracting partial")
        if not timed_out:
         debuglog(("webdriver %d loaded " % (manager.start+manager.index))+url)
         # we want to double-check XMLHttpRequests have gone through (TODO: low-value setTimeout as well? TODO: abort this early if currentUrl has changed and we're just going to issue a redirect? but would then need to ensure it's finished if client comes back to same instance that's still running after it follows the redirect)
         if options.js_reproxy:
          wasActive = True
          for _ in xrange(40): # up to 8+ seconds in steps of 0.2 (on top of the inital load)
            time.sleep(0.2) # unconditional first-wait hopefully long enough to catch XMLHttpRequest delayed-send, very-low-value setTimeout etc, but we don't want to wait a whole second if the page isn't GOING to make any requests (TODO: monitor the js going through the upstream proxy to see if it contains any calls to this? but we'll have to deal with js_interpreter's cache, unless set it to not cache and we cache upstream)
            active = webdriver_inProgress[manager.index]
            if not active and not wasActive: break # TODO: wait longer than 0.2-0.4 to see if it restarts another request?
            wasActive = active
         else: time.sleep(1) # can't do much if we're not reproxying, so just sleep 1sec and hope for the best
        currentUrl = None
    if (clickElementID or clickLinkText) and not timed_out:
      try:
        manager.execute_script("window.open = window.confirm = function(){return true;}") # in case any link has a "Do you really want to follow this link?" confirmation (webdriver default is usually Cancel), or has 'pop-under' window (TODO: switch to pop-up?)
        if clickElementID: manager.click_id(clickElementID)
        if clickLinkText:
            if not type(clickLinkText)==type(u""): clickLinkText=clickLinkText.decode('utf-8')
            if not '"' in clickLinkText: manager.click_xpath(u'//a[text()="'+clickLinkText+'"]')
            elif not "'" in clickLinkText: manager.click_xpath(u"//a[text()='"+clickLinkText+"']")
            else: manager.click_linkText(clickLinkText) # least reliable
        time.sleep(0.2) # TODO: more? what if the click results in fetching a new URL, had we better wait for XMLHttpRequests to finish?  (loop as above but how do we know when they've started?)  currentUrl code below should at least show us the new URL even if it hasn't finished loading, and then there's a delay while the client browser is told to fetch it, but that might not be enough
      except: debuglog("js_links find_element exception ignored",False)
      currentUrl = None
    if currentUrl == None: # we need to ask for it again
        currentUrl = manager.current_url()
        if not currentUrl: currentUrl = url # PhantomJS Issue #13114: relative links after a redirect are not likely to work now
    if S(currentUrl) == "about:blank":
        debuglog("got about:blank instead of "+S(url))
        return wrapResponse("webdriver failed to load") # don't return an actual redirect to about:blank, which breaks some versions of Lynx
    debuglog("Getting data from webdriver %d (current_url=%s)" % (manager.start+manager.index,S(currentUrl)))
    if asScreenshot: return wrapResponse(manager.getpng(),tornado.httputil.HTTPHeaders.parse("Content-type: image/png"),200)
    body = get_and_remove_httpequiv_charset(manager.getu8())[1]
    if timed_out: manager.get("about:blank") # as the timeout might have been due to a hard-locked script, so interrupting it should save some CPU
    if not re.sub(B('#.*'),B(''),B(currentUrl)) == B(url): # we have to ignore anything after a # in this comparison because we have no way of knowing (here) whether the user's browser already includes the # or not: might send it into a redirect loop
        # If we redirect, and if we have more than one user session active (and especially if we're multicore) then the second request might not come back to the same webdriver instance (or even the same adjuster process, so we can't even cache it unless shared), and reload is bad, so try to avoid redirect if possible.
        # We could set 'base href' instead, seeing as 'document.location' does not have to be right on the user's side as we've already executed the site's scripts here (unless the user has any extensions that require it to be right).  Don't use Content-Location header: not all browsers support + might cause caches to tread POST requests as invariant.
        # Any in-document "#" links will cause a reload if 'base href' is set, but at least we won't have to reload UNLESS the user follows such a link.
        if htmlFind(body,"<base href=") >= 0:
            pass # if it already has a <base href> we can leave it as-is, since it won't matter from which URL it was served
        else: return wrapResponse(addToHead(body,B('<base href="')+re.sub(B('#.*'),B(''),B(currentUrl))+B('">')),tornado.httputil.HTTPHeaders.parse("Content-type: text/html; charset=utf-8"),200)
    return wrapResponse(body,tornado.httputil.HTTPHeaders.parse("Content-type: text/html; charset=utf-8"),200)
def get_new_webdriver(index,renewing=False):
    if options.js_interpreter in ["HeadlessChrome","Chrome"]:
        return get_new_Chrome(index,renewing,options.js_interpreter=="HeadlessChrome")
    elif options.js_interpreter in ["HeadlessFirefox","Firefox"]:
        return get_new_Firefox(index,renewing,options.js_interpreter=="HeadlessFirefox")
    else: return get_new_PhantomJS(index,renewing)
def get_new_Chrome(index,renewing,headless):
    log_complaints = (index==0 and not renewing)
    from selenium.webdriver.chrome.options import Options
    opts = Options() ; dc = None
    # TODO: can set opts.binary_location if needed (e.g. for chromium, if distro's linking doesn't work)
    if headless:
        opts.add_argument("--headless")
        opts.add_argument("--disable-gpu")
    # Specify user-data-dir ourselves, further to Chromium bug 795 comment 12.  Include username and port (in case others are running or have run adjuster) as well as index.
    global myUsername
    try: myUsername
    except NameError:
        try: import getpass
        except ImportError: getpass = None
        if getpass: myUsername = S(getpass.getuser())
        else: myUsername = ""
    extra = ""
    while True: # might be restarting from a corrupted user-data-dir state; in worst case might not even be able to cleanly remove it (TODO: what if some processes associated with an older instance somehow took a while to go away and still have named referenc to previous path: increment counter unconditionally?  still rm the old one)
        path = writable_tmpdir()+"hChrome-"+myUsername+str(options.port)+"."+str(index)+extra
        if not os.path.exists(path): break
        import shutil ; shutil.rmtree(path,True)
        if not os.path.exists(path): break
        if extra: extra="-"+str(int(extra[1:])+1)
        else: extra = "-0"
    opts.add_argument("--user-data-dir="+path)
    opts.add_argument("--incognito") # reduce space taken up by above
    if options.js_reproxy:
        opts.add_argument("--proxy-server=127.0.0.1:%d" % proxyPort(index))
        opts.add_argument("--ignore-certificate-errors") # --ignore-certificate-errors is ignored by Chrome 59 (which was the first version to support Headless) and possibly some earlier versions, but we'll put it in just in case somebody runs an ancient non-headless Chrome in an offline experiment
        opts.add_argument("--allow-insecure-localhost") # Chrome 62+ can at least do *.localhost & 127.* but we'd need to domain-rewrite for this to help (proxy-host doesn't count)
        # Chrome 65 and chromedriver 2.35/2.36? can do:
        dc = wd_DesiredCapabilities(log_complaints)
        if dc:
            dc = dc.CHROME.copy()
            dc['acceptInsecureCerts'] = True
    elif options.upstream_proxy: opts.add_argument('--proxy-server='+options.upstream_proxy)
    if options.logDebug: opts.add_argument("--verbose")
    if options.js_UA and not options.js_UA.startswith("*"): opts.add_argument("--user-agent="+options.js_UA)
    if not options.js_images: opts.add_experimental_option("prefs",{"profile.managed_default_content_settings.images":2})
    # TODO: do we need to disable Javascript's ability to open new windows and tabs, plus target="_blank" etc, especially if using clickElementID?
    if options.via and not options.js_reproxy and log_complaints:
        # Oops: how can we put in a Via: header if we don't
        # have an upstream proxy to do so?  unless you want
        # to implement a Chrome extension to do it (TODO?)
        warn("--via ignored when running Chrome without --js-reproxy")
    if js_size: opts.add_argument("--window-size=%d,%d" % js_size)
    if dc: p = wd_instantiateLoop(webdriver.Chrome,index,renewing,chrome_options=opts,desired_capabilities=dc)
    else: p = wd_instantiateLoop(webdriver.Chrome,index,renewing,chrome_options=opts)
    if options.js_reproxy:
        chromeVersion = int(p.capabilities['version'].split(".")[0])
        if 59 <= chromeVersion < 65:
            if [int(x) for x in p.capabilities['chrome']['chromedriverVersion'].split(".",2)[:2]] < [2,35]: extrawarn = " (and chromedriver 2.35+)"
            else: extrawarn = ""
            warn("This version of Chrome will hang when used with js_reproxy on https pages. Try upgrading to Chrome 65+"+extrawarn) # TODO: is 59 really the first version to drop --ignore-certificate-errors ?
        elif chromeVersion >= 65 and not p.capabilities.get('acceptInsecureCerts',False): warn("This version of chromedriver will hang when used with js_reproxy on https pages. Your Chrome is new enough, but your chromedriver is not. Try downloading chromedriver 2.35/36+")
    try: p.set_page_load_timeout(options.js_timeout1)
    except: logging.info("Couldn't set HeadlessChrome page load timeout")
    return p
def get_new_Firefox(index,renewing,headless):
    if headless:
        os.environ['MOZ_HEADLESS'] = '1' # in case -headless not yet working
    from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
    from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
    profile = FirefoxProfile() ; caps = None
    log_complaints = (index==0 and not renewing) ; op = None
    if options.js_reproxy:
        from selenium.webdriver.common.proxy import Proxy,ProxyType
        import warnings
        warnings.filterwarnings("ignore","This method has been deprecated. Please pass in the proxy object to the Driver Object") # set_proxy deprecated, but documentation unclear how it should be replaced
        profile.set_proxy(Proxy({'proxyType':ProxyType.MANUAL,'httpProxy':"127.0.0.1:%d" % proxyPort(index),'sslProxy':"127.0.0.1:%d" % proxyPort(index),'ftpProxy':'','noProxy':''}))
        profile.accept_untrusted_certs = True # needed for some older versions?
        caps = wd_DesiredCapabilities(log_complaints)
        if caps:
            caps = caps.FIREFOX.copy()
            caps['acceptInsecureCerts'] = True
            caps['acceptSslCerts'] = True # older versions
    elif options.upstream_proxy: profile.set_proxy(options.upstream_proxy)
    if options.js_UA and not options.js_UA.startswith("*"): profile.set_preference("general.useragent.override",options.js_UA)
    if not options.js_images: profile.set_preference("permissions.default.image", 2)
    if options.via and not options.js_reproxy and log_complaints:
        # Oops: how can we put in a Via: header if we don't
        # have an upstream proxy to do so?  unless you want
        # to implement a Firefox extension to do it (TODO?)
        warn("--via ignored when running Firefox without --js-reproxy")
    # TODO: do any other options need to be set?  disable plugins, Firefox-update prompts, new windows/tabs with JS, etc?  or does Selenium do that?
    if options.logDebug: binary=FirefoxBinary(log_file=sys.stderr) # TODO: support logDebug to a file as well
    else: binary=FirefoxBinary()
    if headless: cmdL = ('-headless','-no-remote')
    else: cmdL = ('-no-remote',)
    if js_size: cmdL += ("-width",str(js_size[0]),"-height",str(js_size[1]))
    cmdL += ("about:blank",) # not Firefox start page
    binary.add_command_line_options(*cmdL) # cannot call this more than once
    if caps: p = wd_instantiateLoop(webdriver.Firefox,index,renewing,firefox_profile=profile,firefox_binary=binary,capabilities=caps)
    else: p = wd_instantiateLoop(webdriver.Firefox,index,renewing,firefox_profile=profile,firefox_binary=binary)
    try: p.set_page_load_timeout(options.js_timeout1)
    except: logging.info("Couldn't set Firefox page load timeout")
    return p
block_headless_firefox = [
    # servers that Firefox tries to CONNECT to on startup
    "push.services.mozilla.com","snippets.cdn.mozilla.net","firefox.settings.services.mozilla.com","location.services.mozilla.com","shavar.services.mozilla.com",
    "aus5.mozilla.org","ftp.mozilla.org",
    "fonts.googleapis.com", # Fedora version of Firefox connects to this
    # "start.fedoraproject.org","fedoraproject.org", # Fedora version of Firefox does this (but what if user actually wants to view one of those pages?)
]
def wd_DesiredCapabilities(log_complaints):
    try:
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
        return DesiredCapabilities
    except:
        if log_complaints: warn("Your Selenium installation is too old to set DesiredCapabilities.\nThis is likely to stop some js options from working properly.")
        return None
def wd_instantiateLoop(wdClass,index,renewing,**kw):
    debuglog("Instantiating "+wdClass.__name__+" "+repr(kw))
    if 'chrome_options' in kw:
        try: newChromedriver = 'options' in getargspec(webdriver.chrome.webdriver.WebDriver.__init__).args
        except: newChromedriver = False
        if newChromedriver:
            kw['options'] = kw['chrome_options']
            del kw['chrome_options']
    if not renewing: time.sleep(min(2*(index % js_per_core),int(options.js_timeout2 / 2))) # try not to start them all at once at the beginning (may reduce chance of failure)
    while True:
        try:
            if wdClass==webdriver.Chrome: p = wdClass(getoutput("which chromedriver 2>/dev/null"),**kw) # some versions need to be told explicitly where chromedriver is, rather than looking in PATH themselves, in order to get "wrong version" errors etc (otherwise errors ignored, Selenium looks for a different chromedriver and gives a slightly confusing error about 'none found' rather than the error you should have seen about 'wrong version')
            else: p = wdClass(**kw)
            if not p.capabilities: raise Exception("Didn't seem to get a p.capabilities")
            elif 'browserVersion' in p.capabilities:
                # Selenium 2.x calls it version, but Selenium
                # 3.x calls it browserVersion.  Map this back
                # to 'version' for our other code.
                p.capabilities['version'] = p.capabilities['browserVersion']
            elif not 'version' in p.capabilities: raise Exception("capabilities has no version: "+repr(p.capabilities.items()))
        except:
            if index==0 and not renewing: raise
            logging.error("Unhandled exception "+exc_logStr()+" when instantiating webdriver %d, retrying in 2sec" % index)
            time.sleep(2) ; p = None
        if p: break
    debuglog(wdClass.__name__+" instantiated")
    return p
def _get_new_PhantomJS(index,renewing):
    log_complaints = (index==0 and not renewing)
    os.environ["QT_QPA_PLATFORM"]="offscreen"
    sa = ['--ssl-protocol=any']
    # if options.logDebug: sa.append("--debug=true") # doesn't work: we don't see the debug output on stdout or stderr
    if options.js_reproxy:
        sa.append('--ignore-ssl-errors=true')
        sa.append('--proxy=127.0.0.1:%d' % proxyPort(index))
    elif options.upstream_proxy: sa.append('--proxy='+options.upstream_proxy)
    dc = wd_DesiredCapabilities(log_complaints)
    if dc:
        dc = dict(dc.PHANTOMJS)
        if options.js_UA and not options.js_UA.startswith("*"): dc["phantomjs.page.settings.userAgent"]=options.js_UA
        if not options.js_images: dc["phantomjs.page.settings.loadImages"]=False
        dc["phantomjs.page.settings.javascriptCanOpenWindows"]=dc["phantomjs.page.settings.javascriptCanCloseWindows"]=False # TODO: does this cover target="_blank" in clickElementID etc (which could have originated via DOM manipulation, so stripping them on the upstream proxy is insufficient; close/restart the driver every so often?)
        if options.via and not options.js_reproxy: dc["phantomjs.page.customHeaders.Via"]="1.0 "+convert_to_via_host("")+" ("+viaName+")" # customHeaders works in PhantomJS 1.5+ (TODO: make it per-request so can include old Via headers & update protocol version, via_host + X-Forwarded-For; will webdriver.DesiredCapabilities.PHANTOMJS[k]=v work before a request?) (don't have to worry about this if js_reproxy)
        return wd_instantiateLoop(webdriver.PhantomJS,index,renewing,desired_capabilities=dc,service_args=sa)
    else: return wd_instantiateLoop(webdriver.PhantomJS,index,renewing,service_args=sa)
def get_new_PhantomJS(index,renewing=False):
    wd = _get_new_PhantomJS(index,renewing)
    log_complaints = (index==0 and not renewing)
    if log_complaints and not options.js_reproxy:
     try: is_v2 = wd.capabilities['version'].startswith("2.")
     except: is_v2 = False
     if is_v2: warn("You may be affected by PhantomJS issue #13114.\nRelative links may be wrong after a redirect if the site sets Content-Security-Policy.\nTry --js_reproxy, or downgrade your PhantomJS to version 1.9.8")
    try: wd.set_window_size(*js_size)
    except: logging.info("Couldn't set PhantomJS window size")
    try: wd.set_page_load_timeout(options.js_timeout1)
    except: logging.info("Couldn't set PhantomJS page load timeout")
    return wd
def proxyPort(index): return port_randomise.get(js_proxy_port[index],js_proxy_port[index])
webdriver_runner = [] ; webdriver_prefetched = []
webdriver_via = [] ; webdriver_UA = []
webdriver_inProgress = [] ; webdriver_queue = []
webdriver_lambda = webdriver_mu = 0
def test_init_webdriver():
    "Check that we CAN start a webdriver, before forking to background and starting all of them"
    sys.stderr.write("Checking webdriver configuration... "),sys.stderr.flush()
    get_new_webdriver(0).quit()
    sys.stderr.write("OK\n")
quitFuncToCall = None
def init_webdrivers(start,N):
    informing = not options.background and not start and not (options.multicore and options.ssl_fork) # (if ssl_fork, we don't want the background 'starting N processes' messages to be interleaved with this)
    if informing:
        sys.stderr.write("Starting %d webdriver%s... " % (options.js_instances,plural(options.js_instances))),sys.stderr.flush()
    for i in xrange(N):
        webdriver_runner.append(WebdriverRunner(start,len(webdriver_runner)))
        webdriver_prefetched.append(None)
        webdriver_inProgress.append(set())
        webdriver_via.append(None) ; webdriver_UA.append("")
    def quit_wd_atexit(*args):
      if informing: sys.stderr.write("Quitting %d webdriver%s... " % (options.js_instances,plural(options.js_instances))),sys.stderr.flush()
      try:
        for i in webdriver_runner:
            try: i.quit_webdriver()
            except: pass
      except: pass
      if informing: sys.stderr.write("done\n")
    global quitFuncToCall ; quitFuncToCall = quit_wd_atexit # don't use the real atexit, as we have our own thread-stop logic which might kick in first, leaving a stuck adjusterWDhelp process if js_multiprocess==True, and additionally holding up calling process if --stdio is in use (fixed in v0.2795)
    if options.js_restartMins and not options.js_restartAfter==1: IOLoopInstance().add_timeout(time.time()+60,webdriver_checkRenew)
    if informing: sys.stderr.write("done\n")
webdriver_maxBusy = 0
def webdriver_allBusy():
    busyNow = sum(1 for i in webdriver_runner if i.wd_threadStart)
    global webdriver_maxBusy
    webdriver_maxBusy = max(webdriver_maxBusy,busyNow)
    return busyNow == len(webdriver_runner)
def webdriver_checkServe(*args):
    # how many queue items can be served right now?
    # (called on IOLoop thread when new item added, or when
    # a server is finished)
    debuglog("Entering webdriver_checkServe, runners=%d" % len(webdriver_runner))
    for i in xrange(len(webdriver_runner)):
        if not webdriver_queue: break # just to save a little
        if not webdriver_runner[i].wd_threadStart:
            while webdriver_queue:
                url,prefetched,ua,clickElementID,clickLinkText,via,asScreenshot,callback,tooLate = webdriver_queue.pop(0)
                if tooLate():
                    debuglog("tooLate() for "+url)
                    continue
                debuglog("Starting fetch of "+url+" on webdriver instance "+str(i+webdriver_runner[i].start))
                webdriver_via[i],webdriver_UA[i] = via,ua
                webdriver_runner[i].fetch(url,prefetched,clickElementID,clickLinkText,asScreenshot,callback,tooLate)
                global webdriver_mu ; webdriver_mu += 1
                break
    if webdriver_allBusy(): pauseOrRestartMainServer(0) # we're "paused" anyway when not in the poll wait, so might as well call this only at end, to depend on the final status (and make sure to call webdriver_allBusy() no matter what, as it has the side-effect of updating webdriver_maxBusy)
    else: pauseOrRestartMainServer(1)
    debuglog("Finishing webdriver_checkServe, webdriver_queue len=%d" % len(webdriver_queue))
def webdriver_checkRenew(*args):
    for i in webdriver_runner:
        if not i.wd_threadStart and i.usageCount and i.finishTime + options.js_restartMins < time.time(): i.renew_webdriver_newThread() # safe because we're running in the IOLoop thread, which therefore can't start wd_thread between our test of wd_threadStart and renew_webdriver_newThread
    IOLoopInstance().add_timeout(time.time()+60,webdriver_checkRenew)
def webdriver_fetch(url,prefetched,ua,clickElementID,clickLinkText,via,asScreenshot,callback,tooLate):
    if tooLate(): return # probably webdriver_queue overload (which will also be logged)
    elif prefetched and (not hasattr(prefetched,"code") or prefetched.code >= 500): return callback(prefetched) # don't bother allocating a webdriver if we got a timeout or DNS error or something
    elif wsgi_mode: return callback(_wd_fetch(webdriver_runner[0],url,prefetched,clickElementID,clickLinkText,asScreenshot)) # (can't reproxy in wsgi_mode, so can't use via and ua) TODO: if *threaded* wsgi, index 0 might already be in use (we said threadsafe:true in AppEngine instructions but AppEngine can't do js_interpreter anyway; where else might we have threaded wsgi?  js_interpreter really is better run in non-wsgi mode anyway, so can js_reproxy)
    webdriver_queue.append((url,prefetched,ua,clickElementID,clickLinkText,via,asScreenshot,callback,tooLate))
    global webdriver_lambda ; webdriver_lambda += 1
    debuglog("webdriver_queue len=%d after adding %s" % (len(webdriver_queue),url))
    webdriver_checkServe() # safe as we're IOLoop thread

#@file: http-rewrite.py
# --------------------------------------------------
# Service routines for basic HTTP header rewriting
# --------------------------------------------------

def fixServerHeader(i):
    i.set_header("Server",serverName)
    # TODO: in "real" proxy mode, "Server" might not be the most appropriate header to set for this
    try: i.clear_header("Date") # Date is added by Tornado 3; HTTP 1.1 says it's mandatory but then says don't put it if you're a clockless server (which we might be I suppose) so it seems leaving it out is OK especially if not specifying Age etc, and leaving it out saves bytes.  But if the REMOTE server specifies a Date then we should probably pass it on (see comments in doResponse below)
    except: pass # (ok if "Date" wasn't there)

rmServerHeaders = set([
    # server headers to remove.  We'll do our own connection type etc (but don't include "Date" in this list: if the remote server includes a Date it would be useful to propagate that as a reference for its Age headers etc, TODO: unless remote server is broken? see also comment in fixServerHeader re having no Date by default).  Many servers' Content-Location is faulty; it DOESN'T necessarily provide the new base href; it might be relative; it might be identical to the actual URL fetched; many browsers ignore it anyway
    "connection","content-length","content-encoding","transfer-encoding","etag","content-md5","server","alternate-protocol","strict-transport-security","content-location",
    
    "x-associated-content", # should NOT be sent to browser (should be interpreted by a server's SPDY/push module) but somebody might misread the specs (at least one Wikipedia editor did)
    
    "x-host","x-http-reason", # won't necessarily be the same
    
    "content-security-policy","x-webkit-csp","x-content-security-policy","x-frame-options", # sorry but if we're adjusting the site by adding our own scripts/styles we are likely to be broken by a CSP that restricts which of these we're allowed to do. (Even if we adjust the domains listed on those headers, what if our scripts rely on injecting inline code?)  Sites shouldn't *depend* on CSP to prevent XSS: it's just a belt-and-braces that works only in recent browsers.  Hopefully our added styles etc will break the XSS-introduced ones if we hit a lazy site.
    
    "vary", # we modify this (see code)
    
    "alt-svc",
    "public-key-pins","public-key-pins-report-only",
])
# TODO: WebSocket (and Microsoft SM) gets the client to say 'Connection: Upgrade' with a load of Sec-WebSocket-* headers, check what Tornado does with that
rmClientHeaders = ['Connection','Proxy-Connection','Accept-Charset','Accept-Encoding','X-Forwarded-Host','X-Forwarded-Port','X-Forwarded-Server','X-Forwarded-Proto','X-Request-Start','TE','Upgrade',
                   'Upgrade-Insecure-Requests', # we'd better remove this from the client headers if we're removing Content-Security-Policy etc from the server's
                   'Range', # TODO: we can pass Range to remote server if and only if we guarantee not to need to change anything  (could also add If-Range and If-None-Match to the list, but these should be harmless to pass to the remote server and If-None-Match might actually help a bit in the case where the document doesn't change)
]

#@file: request-forwarder.py
# --------------------------------------------------
# Our main RequestForwarder class.  Handles incoming
# HTTP requests, generates requests to upstream servers
# and handles responses.  Sorry it's got a bit big :-(
# --------------------------------------------------

the_supported_methods = ("GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "CONNECT")
# Don't support PROPFIND (from WebDAV) unless be careful about how to handle image requests with it
# TODO: image requests with OPTIONS ?

class RequestForwarder(RequestHandler):
    
    def get_error_html(self,status,**kwargs): return htmlhead("Web Adjuster error")+options.errorHTML+"</body></html>" # Tornado 2.0
    def write_error(self,status,**kwargs): # Tornado 2.1+
        if hasattr(self,"_finished") and self._finished: return
        msg = self.get_error_html(status,**kwargs)
        if "{traceback}" in msg and 'exc_info' in kwargs:
            msg = msg.replace("{traceback}","<pre>"+ampEncode("".join(traceback.format_exception(*kwargs["exc_info"])))+"</pre>")
            # TODO: what about substituting for {traceback} on pre-2.1 versions of Tornado that relied on get_error_html and put the error into sys.exc_info()?  (need to check their source to see how reliable the traceback is in this case; post-2.1 versions re-raise it from write_error itself)
        if self.canWriteBody(): self.write(msg)
        self.finish()

    def cookie_host(self,checkReal=True,checkURL=True):
        # for cookies telling us what host the user wants
        if self.isPjsUpstream or self.isSslUpstream:
            return False
        if checkReal and convert_to_real_host(self.request.host,None): return # if we can get a real host without the cookie, the cookie does not apply to this host
        if enable_adjustDomainCookieName_URL_override and checkURL:
            if self.cookieViaURL: v = self.cookieViaURL
            else:
                v = self.request.arguments.get(adjust_domain_cookieName,None)
                if type(v)==type([]): v=v[-1]
                if v: self.removeArgument(adjust_domain_cookieName,quote(v))
            if v:
                self.cookieViaURL = v
                if v==adjust_domain_none: return None
                else: return v
        return self.getCookie(adjust_domain_cookieName,adjust_domain_none)

    def readCookies(self):
        if hasattr(self,"_adjuster_cookies_"): return # already OK
        self._adjuster_cookies_ = {}
        for c in self.request.headers.get_list("Cookie"):
            for cc in c.split(';'):
                if not '=' in cc: continue # (e.g. Maxthon has been known to append a ; to the end of the cookie string)
                n,v = cc.strip().split('=',1)
                self._adjuster_cookies_[n] = v
    def getCookie(self,cookieName,zeroValue=None):
        # zeroValue is a value that the cookie can be set to so as to "clear" it (because some browsers don't seem to understand JS that clears a cookie)
        self.readCookies()
        v = self._adjuster_cookies_.get(cookieName,None)
        if v==zeroValue: v = None
        return v
    def setCookie(self,cookieName,val):
        # This is ONLY for ADJUSTER OPTIONS - does NOT propagate to the server
        self.readCookies()
        self._adjuster_cookies_[cookieName] = val
    
    def clearUnrecognisedCookies(self):
        # When serving via adjust_domain_cookieName, on URL box try to save browser memory (and request time) and improve security by deleting cookies set by previous sites.  But can cover only the path=/ ones from here.
        self.readCookies()
        for n,v in self._adjuster_cookies_.items():
            if n in cRecogniseAny or n==adjust_domain_cookieName: continue # don't do adjust_domain_cookieName unless take into account addCookieFromURL (TODO: but would we ever GET here if that happens?)
            elif n in cRecognise1 and v=="1": continue
            for dot in ["","."]: self.add_header("Set-Cookie",n+"="+v+"; Domain="+dot+self.cookieHostToSet()+"; Path=/; Expires=Thu Jan 01 00:00:00 1970") # to clear it