From ace65fe6c3c0547e602fc4ffbdcba257dc2262b2 Mon Sep 17 00:00:00 2001 From: "Silas S. Brown" <ssb22@cam.ac.uk> Date: Sat, 11 Oct 2014 10:46:31 +0000 Subject: [PATCH] Update adjuster/annogen/termlayout git-svn-id: http://svn.code.sf.net/p/e-guidedog/code/ssb22/adjuster@2054 29193198-4895-4776-b068-10539e920549 --- termlayout.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/termlayout.py b/termlayout.py index 1d70050..117acf4 100644 --- a/termlayout.py +++ b/termlayout.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# TermLayout v0.1 (c) 2014 Silas S. Brown +# TermLayout v0.11 (c) 2014 Silas S. Brown # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -275,7 +275,7 @@ class XYGrid: else: axis = 1 if spaceForBorder: border = nEnd-N-1 # the internal borders in the range else: border = 0 - return border + sum([0]+[max([0]+[contribOf(v[1].getSize(direction),v[0][axis],n == k[axis]+v[0][axis]-1) for k,v in self.items.iteritems() if k[axis] <= n < k[axis]+v[0][axis]]) for n in xrange(N,nEnd)]) + return border + sum(max([0]+[contribOf(v[1].getSize(direction),v[0][axis],n == k[axis]+v[0][axis]-1) for k,v in self.items.iteritems() if k[axis] <= n < k[axis]+v[0][axis]]) for n in xrange(N,nEnd)) def getSize(self,direction='x'): if direction=='x': axis = 0 else: axis = 1 @@ -398,16 +398,23 @@ tagsToIgnore = [ tagsToIgnore = re.compile('(?i)<('+'|'.join(re.escape(t) for t in tagsToIgnore)+')( [^>]*)?>') def parseDoc(html,width=None,attList=None,realWidth=None, - inPre=False,isOL=False,inCentre=False): + inPre=False,isOL=False,inCentre=False, + callback=None): + "if callback is not None, we're reading the top-level document and callback is called to 'flush' lines from it (so don't need to build up entire doc in memory before starting to print)" if width==None: width = screenWidth if realWidth==None: realWidth = width # for lists if attList==None: attList = [] theDoc = StackingRectangle('y',name='doc') realDoc = theDoc # if we go into <li>, THAT becomes the new theDoc (this is a 'messy-HTML' parser) lstrip = True ; liNum = 0 + def flush(): + if callback and theDoc.items: + theDoc.lineBreakAndPadLeading(realWidth) + callback(theDoc.getLines()) + theDoc.items = [] def makeP(): theP=StackingRectangle('x',name='p') - theDoc.items.append(theP) + flush() ; theDoc.items.append(theP) return theP def closeLI(): if not theDoc==realDoc: @@ -439,7 +446,9 @@ def parseDoc(html,width=None,attList=None,realWidth=None, if m: gobbled += m.end() ; html = html[m.end():] lstrip=False ; continue - if re.match('(?i)</p>',html): thisTag = ('p',4,len(html),len(html)) # treat as <p> (see below) + m = re.match('(?i)</?p>',html) + if m: # treat both <p> and </p> as <p> (see below), and don't bother trying to find their closing tags + thisTag = ('p',m.end(),len(html),len(html)) else: thisTag = matchToClosingTag(html) if thisTag: tagName, inStart, inEnd, pastClose = thisTag @@ -454,6 +463,7 @@ def parseDoc(html,width=None,attList=None,realWidth=None, else: thisTag = None # invalid lstrip = False elif tagName in ['ul','ol']: + flush() theDoc.items.append(parseDoc(html[inStart:inEnd],max(min(10,width),width-4),attList,width,isOL=(tagName=='ol'))) # plus we need a new para at the end theP = makeP() @@ -479,6 +489,7 @@ def parseDoc(html,width=None,attList=None,realWidth=None, theP = makeP() ; pastClose = inStart lstrip = True elif tagName in ['address','blockquote','h1','h2','h3','h4','h5','h6','dd']: # TODO: leave space above/below for some of these ? + flush() if tagName.startswith('h'): al2 = attList + ansiAttributesForTag('<b>') elif tagName.startswith('a'): al2 = attList + ansiAttributesForTag('<em>') else: al2 = attList @@ -495,9 +506,11 @@ def parseDoc(html,width=None,attList=None,realWidth=None, theP = makeP() ; lstrip = True elif tagName in ['div','pre','dt','center']: # as above but without the extra margins + flush() theDoc.items.append(parseDoc(html[inStart:inEnd],width,attList,inPre=(tagName=='pre'),inCentre=(tagName=='center'))) theP = makeP() ; lstrip = True elif tagName in ['table']: + flush() theDoc.items.append(parseTable(html[inStart:inEnd],width,attList,'border' in html[:inStart])) theP = makeP() ; lstrip = True elif tagName in ['hr']: @@ -512,7 +525,7 @@ def parseDoc(html,width=None,attList=None,realWidth=None, else: # it's a tag we didn't recognise, or something that's malformed theP.items.append(f('<')) gobbled += 1 ; html = html[1:] ; lstrip=False - closeLI() + closeLI() ; flush() if realDoc==theDoc: realDoc.lineBreakAndPadLeading(realWidth) if inCentre: realDoc.padToSize(desired=realWidth) # TODO: unless we're inside a table or something, in which case leave desired=0 @@ -690,11 +703,8 @@ term = os.environ.get("TERM","") supports_ansi = ("xterm" in term or term in ["screen","linux"]) # TODO: others? if __name__ == "__main__": - ansiLines = parseDoc(htmlPreprocess(sys.stdin.read().decode(terminal_charset))).getLines() - # TODO: although we definitely .encode(terminal_charset) below, the .decode above might have to be something else if there's a META specifying it - toOut = mergeAnsifiedLines(ansiLines,not supports_ansi).encode(terminal_charset) - if sys.stdout.isatty() and len(ansiLines) > screenDim('ROWS')-2 and os.path.exists('/usr/bin/less'): - if '\x1b(0' in toOut: raw = '-r' # because we used ANSI line drawing (not just colours). Customising less's -R option with the LESSANSIMIDCHARS and LESSANSIENDCHARS environment variables won't help us here, because '0' is an "end" character on one control sequence but a "mid" on the other, so we have no choice but to ask for ALL codes to go through raw, which in theory means less loses track of the real line lengths, although it seems to work well enough for the ANSI codes we generate - else: raw = '-R' # just the ANSI colours - os.popen('/usr/bin/less '+raw,'w').write(toOut) - else: sys.stdout.write(toOut) + if sys.stdin.isatty(): sys.stderr.write("termlayout: reading HTML from standard input\n") + if sys.stdout.isatty() and not sys.stdin.isatty() and os.path.exists('/usr/bin/less'): + outstream = os.popen('/usr/bin/less -FrX','w') + else: outstream = sys.stdout + parseDoc(htmlPreprocess(sys.stdin.read().decode(terminal_charset)),callback=lambda lines:(outstream.write(mergeAnsifiedLines(lines,not supports_ansi).encode(terminal_charset)),outstream.flush())) # TODO: although we definitely .encode(terminal_charset), the .decode might have to be something else if there's a META specifying it -- GitLab