GIF89a;
" lines in HTML files). That may be
because this is the only method of the 3 that has a *concept* of
"junk"
"""
_legend = """
"""
class HtmlDiff(object):
"""For producing HTML side by side comparison with change highlights.
This class can be used to create an HTML table (or a complete HTML file
containing the table) showing a side by side, line by line comparison
of text with inter-line and intra-line change highlights. The table can
be generated in either full or contextual difference mode.
The following methods are provided for HTML generation:
make_table -- generates HTML for a single side by side table
make_file -- generates complete HTML file with a single side by side table
See tools/scripts/diff.py for an example usage of this class.
"""
_file_template = _file_template
_styles = _styles
_table_template = _table_template
_legend = _legend
_default_prefix = 0
def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
charjunk=IS_CHARACTER_JUNK):
"""HtmlDiff instance initializer
Arguments:
tabsize -- tab stop spacing, defaults to 8.
wrapcolumn -- column number where lines are broken and wrapped,
defaults to None where lines are not wrapped.
linejunk,charjunk -- keyword arguments passed into ndiff() (used to by
HtmlDiff() to generate the side by side HTML differences). See
ndiff() documentation for argument default values and descriptions.
"""
self._tabsize = tabsize
self._wrapcolumn = wrapcolumn
self._linejunk = linejunk
self._charjunk = charjunk
def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
numlines=5):
"""Returns HTML file of side by side comparison with change highlights
Arguments:
fromlines -- list of "from" lines
tolines -- list of "to" lines
fromdesc -- "from" file column header string
todesc -- "to" file column header string
context -- set to True for contextual differences (defaults to False
which shows full differences).
numlines -- number of context lines. When context is set True,
controls number of lines displayed before and after the change.
When context is False, controls the number of lines to place
the "next" link anchors before the next change (so click of
"next" link jumps to just before the change).
"""
return self._file_template % dict(
styles = self._styles,
legend = self._legend,
table = self.make_table(fromlines,tolines,fromdesc,todesc,
context=context,numlines=numlines))
def _tab_newline_replace(self,fromlines,tolines):
"""Returns from/to line lists with tabs expanded and newlines removed.
Instead of tab characters being replaced by the number of spaces
needed to fill in to the next tab stop, this function will fill
the space with tab characters. This is done so that the difference
algorithms can identify changes in a file when tabs are replaced by
spaces and vice versa. At the end of the HTML generation, the tab
characters will be replaced with a nonbreakable space.
"""
def expand_tabs(line):
# hide real spaces
line = line.replace(' ','\0')
# expand tabs into spaces
line = line.expandtabs(self._tabsize)
# replace spaces from expanded tabs back into tab characters
# (we'll replace them with markup after we do differencing)
line = line.replace(' ','\t')
return line.replace('\0',' ').rstrip('\n')
fromlines = [expand_tabs(line) for line in fromlines]
tolines = [expand_tabs(line) for line in tolines]
return fromlines,tolines
def _split_line(self,data_list,line_num,text):
"""Builds list of text lines by splitting text lines at wrap point
This function will determine if the input text line needs to be
wrapped (split) into separate lines. If so, the first wrap point
will be determined and the first line appended to the output
text line list. This function is used recursively to handle
the second part of the split line to further split it.
"""
# if blank line or context separator, just add it to the output list
if not line_num:
data_list.append((line_num,text))
return
# if line text doesn't need wrapping, just add it to the output list
size = len(text)
max = self._wrapcolumn
if (size <= max) or ((size -(text.count('\0')*3)) <= max):
data_list.append((line_num,text))
return
# scan text looking for the wrap point, keeping track if the wrap
# point is inside markers
i = 0
n = 0
mark = ''
while n < max and i < size:
if text[i] == '\0':
i += 1
mark = text[i]
i += 1
elif text[i] == '\1':
i += 1
mark = ''
else:
i += 1
n += 1
# wrap point is inside text, break it up into separate lines
line1 = text[:i]
line2 = text[i:]
# if wrap point is inside markers, place end marker at end of first
# line and start marker at beginning of second line because each
# line will have its own table tag markup around it.
if mark:
line1 = line1 + '\1'
line2 = '\0' + mark + line2
# tack on first line onto the output list
data_list.append((line_num,line1))
# use this routine again to wrap the remaining text
self._split_line(data_list,'>',line2)
def _line_wrapper(self,diffs):
"""Returns iterator that splits (wraps) mdiff text lines"""
# pull from/to data and flags from mdiff iterator
for fromdata,todata,flag in diffs:
# check for context separators and pass them through
if flag is None:
yield fromdata,todata,flag
continue
(fromline,fromtext),(toline,totext) = fromdata,todata
# for each from/to line split it at the wrap column to form
# list of text lines.
fromlist,tolist = [],[]
self._split_line(fromlist,fromline,fromtext)
self._split_line(tolist,toline,totext)
# yield from/to line in pairs inserting blank lines as
# necessary when one side has more wrapped lines
while fromlist or tolist:
if fromlist:
fromdata = fromlist.pop(0)
else:
fromdata = ('',' ')
if tolist:
todata = tolist.pop(0)
else:
todata = ('',' ')
yield fromdata,todata,flag
def _collect_lines(self,diffs):
"""Collects mdiff output into separate lists
Before storing the mdiff from/to data into a list, it is converted
into a single line of text with HTML markup.
"""
fromlist,tolist,flaglist = [],[],[]
# pull from/to data and flags from mdiff style iterator
for fromdata,todata,flag in diffs:
try:
# store HTML markup of the lines into the lists
fromlist.append(self._format_line(0,flag,*fromdata))
tolist.append(self._format_line(1,flag,*todata))
except TypeError:
# exceptions occur for lines where context separators go
fromlist.append(None)
tolist.append(None)
flaglist.append(flag)
return fromlist,tolist,flaglist
def _format_line(self,side,flag,linenum,text):
"""Returns HTML markup of "from" / "to" text lines
side -- 0 or 1 indicating "from" or "to" text
flag -- indicates if difference on line
linenum -- line number (used for line number column)
text -- line text to be marked up
"""
try:
linenum = '%d' % linenum
id = ' id="%s%s"' % (self._prefix[side],linenum)
except TypeError:
# handle blank lines where linenum is '>' or ''
id = ''
# replace those things that would get confused with HTML symbols
text=text.replace("&","&").replace(">",">").replace("<","<")
# make space non-breakable so they don't get compressed or line wrapped
text = text.replace(' ',' ').rstrip()
return '
Legends
Colors Added Changed Deleted
Links (f)irst change (n)ext change (t)op %s %s ' \
% (id,linenum,text)
def _make_prefix(self):
"""Create unique anchor prefixes"""
# Generate a unique anchor prefix so multiple tables
# can exist on the same HTML page without conflicts.
fromprefix = "from%d_" % HtmlDiff._default_prefix
toprefix = "to%d_" % HtmlDiff._default_prefix
HtmlDiff._default_prefix += 1
# store prefixes so line format method has access
self._prefix = [fromprefix,toprefix]
def _convert_flags(self,fromlist,tolist,flaglist,context,numlines):
"""Makes list of "next" links"""
# all anchor names will be generated using the unique "to" prefix
toprefix = self._prefix[1]
# process change flags, generating middle column of next anchors/links
next_id = ['']*len(flaglist)
next_href = ['']*len(flaglist)
num_chg, in_change = 0, False
last = 0
for i,flag in enumerate(flaglist):
if flag:
if not in_change:
in_change = True
last = i
# at the beginning of a change, drop an anchor a few lines
# (the context lines) before the change for the previous
# link
i = max([0,i-numlines])
next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
# at the beginning of a change, drop a link to the next
# change
num_chg += 1
next_href[last] = 'n' % (
toprefix,num_chg)
else:
in_change = False
# check for cases where there is no content to avoid exceptions
if not flaglist:
flaglist = [False]
next_id = ['']
next_href = ['']
last = 0
if context:
fromlist = [' No Differences Found ']
tolist = fromlist
else:
fromlist = tolist = [' Empty File ']
# if not a change on first line, drop a link
if not flaglist[0]:
next_href[0] = 'f' % toprefix
# redo the last link to link to the top
next_href[last] = 't' % (toprefix)
return fromlist,tolist,flaglist,next_href,next_id
def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
numlines=5):
"""Returns HTML table of side by side comparison with change highlights
Arguments:
fromlines -- list of "from" lines
tolines -- list of "to" lines
fromdesc -- "from" file column header string
todesc -- "to" file column header string
context -- set to True for contextual differences (defaults to False
which shows full differences).
numlines -- number of context lines. When context is set True,
controls number of lines displayed before and after the change.
When context is False, controls the number of lines to place
the "next" link anchors before the next change (so click of
"next" link jumps to just before the change).
"""
# make unique anchor prefixes so that multiple tables may exist
# on the same page without conflict.
self._make_prefix()
# change tabs to spaces before it gets more difficult after we insert
# markkup
fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
# create diffs iterator which generates side by side from/to data
if context:
context_lines = numlines
else:
context_lines = None
diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk,
charjunk=self._charjunk)
# set up iterator to wrap lines that exceed desired width
if self._wrapcolumn:
diffs = self._line_wrapper(diffs)
# collect up from/to lines and flags into lists (also format the lines)
fromlist,tolist,flaglist = self._collect_lines(diffs)
# process change flags, generating middle column of next anchors/links
fromlist,tolist,flaglist,next_href,next_id = self._convert_flags(
fromlist,tolist,flaglist,context,numlines)
s = []
fmt = ' \n'
for i in range(len(flaglist)):
if flaglist[i] is None:
# mdiff yields None on separator lines skip the bogus ones
# generated for the first line
if i > 0:
s.append(' \n \n')
else:
s.append( fmt % (next_id[i],next_href[i],fromlist[i],
next_href[i],tolist[i]))
if fromdesc or todesc:
header_row = '%s %s' + \
'%s %s%s%s%s%s ' % (
' ',
'%s ' % fromdesc,
' ',
'%s ' % todesc)
else:
header_row = ''
table = self._table_template % dict(
data_rows=''.join(s),
header_row=header_row,
prefix=self._prefix[1])
return table.replace('\0+',''). \
replace('\0-',''). \
replace('\0^',''). \
replace('\1',''). \
replace('\t',' ')
del re
def restore(delta, which):
r"""
Generate one of the two sequences that generated a delta.
Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
lines originating from file 1 or 2 (parameter `which`), stripping off line
prefixes.
Examples:
>>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1),
... 'ore\ntree\nemu\n'.splitlines(1))
>>> diff = list(diff)
>>> print ''.join(restore(diff, 1)),
one
two
three
>>> print ''.join(restore(diff, 2)),
ore
tree
emu
"""
try:
tag = {1: "- ", 2: "+ "}[int(which)]
except KeyError:
raise ValueError, ('unknown delta choice (must be 1 or 2): %r'
% which)
prefixes = (" ", tag)
for line in delta:
if line[:2] in prefixes:
yield line[2:]
def _test():
import doctest, difflib
return doctest.testmod(difflib)
if __name__ == "__main__":
_test()