--- rfcdiff.orig 2010-04-19 10:46:14.000000000 -0600 +++ rfcdiff 2010-04-19 11:58:38.000000000 -0600 @@ -46,6 +46,8 @@ # --ab-diff Before/After diff, suitable for rfc-editor # --abdiff # +# --errata Before/After .html diff, suitable for errata pages +# # --stdout Send output to stdout instead to a file # # @@ -212,7 +214,7 @@ # End: # -export version="1.15" +export version="1.15e" export prelines="10" export basename=$(basename $0) export workdir="/tmp/$basename-$$" @@ -240,7 +242,7 @@ /^[^ \t]/ { sentence=1; } /[^ \t]/ { if (newpage) { - if (sentence) { + if (sentence || haveblank > 5) { outline++; print ""; } } else { @@ -253,8 +255,8 @@ newpage=0; } /[.:][ \t]*$/ { sentence=1; } -/^[ \t]*$/ { haveblank=1; next; } - { outline++; print; } +/^[ \t]*$/ { haveblank++; next; } + { outline++; haveblank=0; print; } ' $1 } @@ -639,6 +641,8 @@ ' $1 } + + # ---------------------------------------------------------------------- # Generate before/after text output from a context diff # ---------------------------------------------------------------------- @@ -714,6 +718,217 @@ # ---------------------------------------------------------------------- +# Generate before/after html errata from a unified diff +# ---------------------------------------------------------------------- +erratadiff() { +awk ' +BEGIN { + FS = "[ \t,]"; + + # Read pagecache1 + maxpage[1] = 1 + pageend[1,0] = 2; + while ( getline < ENVIRON["pagecache1"] > 0) { + pageend[1,$1] = $2; + if ($1+0 > maxpage[1]) maxpage[1] = $1+0; + } + + # Read pagecache2 + maxpage[2] = 1 + pageend[2,0] = 2; + while ( getline < ENVIRON["pagecache2"] > 0) { + pageend[2,$1] = $2; + if ($1+0 > maxpage[2]) maxpage[2] = $1+0; + } + + wdiff = ENVIRON["wdiffbin"]; + base1 = ENVIRON["base1"] + base2 = ENVIRON["base2"] + + beforeDiff = ""; + afterDiff = ""; + oldchunk = ""; + newchunk = ""; + + chunk = 0; +} + +function getpage(which, line) { # borrowed from htmldiff() + line = line + ENVIRON["prelines"]; + page = "?"; + for (p=1; p <= maxpage[which]; p++) { + if (pageend[which,p] == 0) continue; + if (line <= pageend[which,p]) { + page = p; + break; + } + } + return page; +} + +function getpageline(which, line, page) { # borrowed from htmldiff() + if (page == "?") { + return line + ENVIRON["prelines"]; + } else { + if (pageend[which,page-1]+0 != 0) { + return line + ENVIRON["prelines"] - pageend[which,page-1] + 3; # numbers of header lines stripped + } else { + return "?"; + } + } +} + +function htmlesc(line) { # borrowed from htmldiff() + gsub("&", "\\&", line); + gsub("<", "\\<", line); + gsub(">", "\\>", line); + return line; +} + +function fixesc(line) { # borrowed with mods from htmldiff() + line = gensub(/&(<[^>]*>)/, "\\1\\&", "g", line); # borrowed from htmldiff + + # XXX: when wdiff is modified to report significant whitespace changes, + # add a check that whitespace changes elimited below are not significant + + # do not mark whitespace at the beginning of a line + line = gensub(/^()( +)/, "\\2\\1", "g", line); + # remove empty marks (inserted when newlines are added) + line = gensub(/^<.span>/, "", "g", line); + + return line; +} + +function appendLine(buf, line) { + if (length(buf) > 0) + return buf "\n" line; + else + return line; +} + +function chunkdiff() { # borrowed with mods from htmldiff() + chunk++; + chunkfile11= sprintf("1/chunk%04d", chunk); + chunkfile22= sprintf("2/chunk%04d", chunk); + printf oldchunk > chunkfile11; + printf newchunk > chunkfile22; + close(chunkfile11); + close(chunkfile22); + + chunkfile12 = chunkfile22; + chunkfile21 = chunkfile11; + + # figure out how many old words were preserved (not deleted or changed) + oldStats = ""; + cmd0 = sprintf("%s -n --statistics --no-inserted --no-deleted --no-common %s %s", + wdiff, chunkfile11, chunkfile12); + if (cmd0 | getline > 0) { oldStats = $6; } # extract change percent + close(cmd0); + sub(/\%$/, "", oldStats); + + # do not generate word-based diff if not enough words were preserved + if (length(oldStats) > 0 && oldStats < 50) { + chunkfile12 = "/dev/null"; # as if everything old was deleted + chunkfile21 = "/dev/null"; # as if everything new was added from scratch + } + + cmd1 = sprintf("%s -n --no-inserted --start-delete=\"\" --end-delete=\"\" %s %s", + wdiff, chunkfile11, chunkfile12); + + cmd2 = sprintf("%s -n --no-deleted --start-insert=\"\" --end-insert=\"\" %s %s", + wdiff, chunkfile21, chunkfile22); + + while (cmd1 | getline > 0) { beforeDiff = appendLine(beforeDiff, fixesc($0)); } + while (cmd2 | getline > 0) { afterDiff = appendLine(afterDiff, fixesc($0)); } + + close(cmd1); + close(cmd2); +} + +function push() { + if (length(oldchunk) <= 0 && length(newchunk) <= 0) + return; + + chunkdiff(); + + oldchunk = ""; + newchunk = ""; +} + +function flush() { + push(); + if (length(beforeDiff) <= 0 && length(afterDiff) <= 0) + return; + + print "
\n"; + printf "Old %s:\n", posinfo1; + print "
";
+   print beforeDiff;
+   print "
"; + printf "New %s:\n", posinfo2; + print "
";
+   print afterDiff;
+   print "
"; + print "
"; + + beforeDiff = ""; + afterDiff = ""; +} + +/^\+\+/ { + next; + } +/^\-\-/ { + next; + } +/^@@/ { + linenum1 = 0 - $2; + linenum2 = 0 + $4; + flush(); + if (linenum1 > 1) { + page1 = getpage(1,linenum1); + page2 = getpage(2,linenum2); + if (page1 == "?") { + posinfo1 = sprintf("line %s", getpageline(1, linenum1, page1)); + } else { + posinfo1 = sprintf("page %s, line %s", page1, getpageline(1, linenum1, page1)); + } + + if (page2 == "?") { + posinfo2 = sprintf("line %s", getpageline(2, linenum2, page2)); + } else { + posinfo2 = sprintf("page %s, line %s", page2, getpageline(2, linenum2, page2)); + } + } + next; # required to avoid $0 being set to last getline in chunkdiff? + } +/^ / { + sub(/^./, ""); + line = $0; + if (length(line) <= 0) # old empty line + push(); + oldchunk = oldchunk line "\n" + newchunk = newchunk line "\n" + } +/^\-/ { + sub(/^./, ""); + oldchunk = oldchunk $0 "\n" + } +/^\+/ { + sub(/^./, ""); + line = $0; + if (length(line) <= 0) # added an empty line + push(); + newchunk = newchunk line "\n" + } +END { + flush(); + } +' +} + + +# ---------------------------------------------------------------------- # Utility to find an executable # ---------------------------------------------------------------------- lookfor() { @@ -803,6 +1018,7 @@ # ---------------------------------------------------------------------- opthtml=1; optdiff=0; optchbars=0; optwdiff=0; optshow=0; optnowdiff=0; optkeep=0; optinfo=0; optwidth=0; optnums=0; optbody=0; optabdiff=0; +opterrata=0; optstrip=1; optstdout=0; @@ -817,6 +1033,7 @@ --abdiff) opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;; --ab-diff)opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;; --rfc-editor-diff)opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;; + --errata) opterrata=1;; --version)echo -e "$basename\t$version"; exit 0;; --browse) optshow=1;; --nowdiff)optnowdiff=1;; @@ -828,6 +1045,7 @@ --body) optbody=1;; --nostrip)optstrip=0; optbody=0;; --stdout) optstdout=1;; + --ctxdepth) prelines=$2; shift;; --) shift; break;; -r) options="$options $1 $2"; rev=$2; shift;; @@ -947,6 +1165,9 @@ if [ $optabdiff -gt 0 ]; then outfile=$outbase.changes fi + if [ $opterrata -gt 0 ]; then + outfile=$outbase.errata.html + fi fi if [ "$outfile" ]; then tempout=$(basename $outfile) @@ -960,11 +1181,12 @@ if [ -n "$wdiffbin" ]; then wdiffver=$($wdiffbin --version 2>/dev/null | grep "wdiff.\+[0-9]\.[0-9]") if [ -z "$wdiffver" ]; then - wdiffbin=""; - echo -en "\n Found wdiff, but it reported no recognisable version." + #wdiffbin=""; + wdiffver='0.5' + echo "unrecognisable wdiff version, assuming 0.5." > /dev/stderr fi else - echo -en "\n Couldn't find wdiff." + echo "\n Couldn't find wdiff." > /dev/stderr fi if [ -z "$wdiffbin" ]; then echo " Falling back to builtin diff colouring..."; fi export wdiffbin @@ -997,25 +1219,30 @@ fi if [ $opthtml -gt 0 ]; then - diff -bBwd -u$prelines 1/$base1 2/$base2 | tee $workdir/diff | htmldiff > $tempout + diff -bBwd -U $prelines 1/$base1 2/$base2 | tee $workdir/diff | htmldiff > $tempout fi if [ $optchbars -gt 0 ]; then - diff -bBwd -u10000 1/$base1 2/$base2 | tee $workdir/diff | grep -v "^-" | tail +3 | sed 's/^+/|/' > $tempout + diff -bBwd -U 10000 1/$base1 2/$base2 | tee $workdir/diff | grep -v "^-" | tail +3 | sed 's/^+/|/' > $tempout fi if [ $optdiff -gt 0 ]; then - diff -bBwd -u$prelines 1/$base1 2/$base2 | tee $workdir/diff > $tempout + diff -bBwd -U $prelines 1/$base1 2/$base2 | tee $workdir/diff > $tempout fi if [ $optabdiff -gt 0 ]; then - diff -wd -u1000 1/$base1 2/$base2 | tee $workdir/diff | abdiff + diff -wd -U 1000 1/$base1 2/$base2 | tee $workdir/diff | abdiff > $tempout +fi +if [ $opterrata -gt 0 ]; then + diff -w -U $prelines 1/$base1 2/$base2 | tee $workdir/diff | erratadiff > $tempout fi if [ $optwdiff -gt 0 ]; then wdiff -a 1/$base1 2/$base2 fi -if [ $optstdout -gt 0 ]; then - cat $tempout; rm $tmpout +if [ $tempout -a $optstdout -gt 0 ]; then + cat $tempout; rm $tempout else +if [ $tempout -a $outfile ]; then cd -; if [ -f $workdir/$tempout ]; then mv $workdir/$tempout $outfile; fi +fi; fi if [ $optshow -gt 0 ]; then