--- rfcdiff.orig	2010-04-19 10:46:14.000000000 -0600
+++ rfcdiff	2010-04-19 11:58:38.000000000 -0600
@@ -46,6 +46,8 @@
 #	--ab-diff	Before/After diff, suitable for rfc-editor
 #	--abdiff
 #
+#	--errata	Before/After .html diff, suitable for errata pages
+#
 #	--stdout	Send output to stdout instead to a file
 #
 #
@@ -212,7 +214,7 @@
 # End:
 #
 
-export version="1.15"
+export version="1.15e"
 export prelines="10"
 export basename=$(basename $0)
 export workdir="/tmp/$basename-$$"
@@ -240,7 +242,7 @@
 /^[^ \t]/			{ sentence=1; }
 /[^ \t]/			{
 				   if (newpage) {
-				      if (sentence) {
+				      if (sentence || haveblank > 5) {
 					 outline++; print "";
 				      }
 				   } else {
@@ -253,8 +255,8 @@
 				   newpage=0;
 				}
 /[.:][ \t]*$/			{ sentence=1; }
-/^[ \t]*$/			{ haveblank=1; next; }
-				{ outline++; print; }
+/^[ \t]*$/			{ haveblank++; next; }
+				{ outline++; haveblank=0; print; }
 ' $1
 }
 
@@ -639,6 +641,8 @@
 ' $1
 }
 
+
+
 # ----------------------------------------------------------------------
 # Generate before/after text output from a context diff
 # ----------------------------------------------------------------------
@@ -714,6 +718,217 @@
 
 
 # ----------------------------------------------------------------------
+# Generate before/after html errata from a unified diff
+# ----------------------------------------------------------------------
+erratadiff() {
+awk '
+BEGIN	{
+	   FS = "[ \t,]";
+
+	   # Read pagecache1
+	   maxpage[1] = 1
+	   pageend[1,0] = 2;
+	   while ( getline < ENVIRON["pagecache1"] > 0) {
+	      pageend[1,$1] = $2;
+	      if ($1+0 > maxpage[1]) maxpage[1] = $1+0;
+	   }
+
+	   # Read pagecache2
+	   maxpage[2] = 1
+	   pageend[2,0] = 2;
+	   while ( getline < ENVIRON["pagecache2"] > 0) {
+	      pageend[2,$1] = $2;
+	      if ($1+0 > maxpage[2]) maxpage[2] = $1+0;
+	   }
+
+	   wdiff = ENVIRON["wdiffbin"];
+	   base1 = ENVIRON["base1"]
+	   base2 = ENVIRON["base2"]
+
+	   beforeDiff = "";
+	   afterDiff = "";
+	   oldchunk = "";
+	   newchunk = "";
+
+	   chunk = 0;
+}
+
+function getpage(which, line) { # borrowed from htmldiff()
+    line = line + ENVIRON["prelines"];
+    page = "?";
+    for (p=1; p <= maxpage[which]; p++) {
+	if (pageend[which,p] == 0) continue;
+	if (line <= pageend[which,p]) {
+	    page = p;
+	    break;
+	}
+    }
+    return page;
+}
+
+function getpageline(which, line, page) { # borrowed from htmldiff()
+    if (page == "?") {
+	return line + ENVIRON["prelines"];
+    } else {
+	if (pageend[which,page-1]+0 != 0) {
+	    return line + ENVIRON["prelines"] - pageend[which,page-1] + 3; # numbers of header lines stripped
+	} else {
+	    return "?";
+	}
+    }
+}
+
+function htmlesc(line) { # borrowed from htmldiff()
+    gsub("&", "\\&amp;", line);
+    gsub("<", "\\&lt;", line);
+    gsub(">", "\\&gt;", line);
+    return line;
+}
+
+function fixesc(line) { # borrowed with mods from htmldiff()
+    line = gensub(/&(<[^>]*>)/, "\\1\\&", "g", line); # borrowed from htmldiff
+
+    # XXX: when wdiff is modified to report significant whitespace changes,
+    # add a check that whitespace changes elimited below are not significant
+
+    # do not mark whitespace at the beginning of a line
+    line = gensub(/^(<span class="diff">)( +)/, "\\2\\1", "g", line);
+    # remove empty marks (inserted when newlines are added)
+    line = gensub(/^<span class="diff"><.span>/, "", "g", line);
+
+    return line;
+}
+
+function appendLine(buf, line) {
+    if (length(buf) > 0)
+	return buf "\n" line;
+    else
+	return line;
+}
+
+function chunkdiff() { # borrowed with mods from htmldiff()
+   chunk++;
+   chunkfile11= sprintf("1/chunk%04d", chunk);
+   chunkfile22= sprintf("2/chunk%04d", chunk);
+   printf oldchunk > chunkfile11;
+   printf newchunk > chunkfile22;
+   close(chunkfile11);
+   close(chunkfile22);
+
+   chunkfile12 = chunkfile22;
+   chunkfile21 = chunkfile11;
+
+   # figure out how many old words were preserved (not deleted or changed)
+   oldStats = "";
+   cmd0 = sprintf("%s -n --statistics --no-inserted --no-deleted --no-common %s %s",
+	wdiff, chunkfile11, chunkfile12);
+   if (cmd0 | getline > 0) { oldStats = $6; } # extract change percent 
+   close(cmd0);
+   sub(/\%$/, "", oldStats);
+
+   # do not generate word-based diff if not enough words were preserved
+   if (length(oldStats) > 0 && oldStats < 50) { 
+	chunkfile12 = "/dev/null"; # as if everything old was deleted
+	chunkfile21 = "/dev/null"; # as if everything new was added from scratch
+   }
+
+   cmd1 = sprintf("%s -n --no-inserted --start-delete=\"<span class=\\\"diff\\\">\" --end-delete=\"</span>\" %s %s",
+	wdiff, chunkfile11, chunkfile12);
+
+   cmd2 = sprintf("%s -n --no-deleted --start-insert=\"<span class=\\\"diff\\\">\" --end-insert=\"</span>\" %s %s",
+	wdiff, chunkfile21, chunkfile22);
+
+   while (cmd1 | getline > 0) { beforeDiff = appendLine(beforeDiff, fixesc($0)); }
+   while (cmd2 | getline > 0) { afterDiff = appendLine(afterDiff, fixesc($0)); }
+
+   close(cmd1);
+   close(cmd2);
+}
+
+function push() {
+   if (length(oldchunk) <= 0 && length(newchunk) <= 0)
+	return;
+
+   chunkdiff();
+
+   oldchunk = "";
+   newchunk = "";
+}
+
+function flush() {
+   push();
+   if (length(beforeDiff) <= 0 && length(afterDiff) <= 0)
+	return;
+
+   print "<div class=\"diffChunk\">\n";
+   printf "<span class=\"label\">Old %s</span>:\n", posinfo1;
+   print "<pre class=\"before\">";
+   print beforeDiff;
+   print "</pre>";
+   printf "<span class=\"label\">New %s</span>:\n", posinfo2;
+   print "<pre class=\"after\">";
+   print afterDiff;
+   print "</pre>";
+   print "</div>";
+
+   beforeDiff = "";
+   afterDiff = "";
+}
+
+/^\+\+/ {
+	   next;
+	}
+/^\-\-/ {
+	   next;
+	}
+/^@@/	{
+	   linenum1 = 0 - $2;
+	   linenum2 = 0 + $4;
+	   flush();
+	   if (linenum1 > 1) {
+	      page1 = getpage(1,linenum1);
+	      page2 = getpage(2,linenum2);
+	      if (page1 == "?") {
+		 posinfo1 = sprintf("line %s", getpageline(1, linenum1, page1));
+	      } else {
+		 posinfo1 = sprintf("page %s, line %s", page1, getpageline(1, linenum1, page1));
+	      }
+
+	      if (page2 == "?") {
+		 posinfo2 = sprintf("line %s", getpageline(2, linenum2, page2));
+	      } else {
+		 posinfo2 = sprintf("page %s, line %s", page2, getpageline(2, linenum2, page2));
+	      }
+	   }
+	   next; # required to avoid $0 being set to last getline in chunkdiff?
+	}
+/^ /	{
+	   sub(/^./, "");
+	   line = $0;
+	   if (length(line) <= 0) # old empty line
+		push();
+	   oldchunk = oldchunk line "\n"
+	   newchunk = newchunk line "\n"
+	}
+/^\-/	{
+	   sub(/^./, "");
+	   oldchunk = oldchunk $0 "\n"
+	}
+/^\+/	{
+	   sub(/^./, "");
+	   line = $0;
+	   if (length(line) <= 0) # added an empty line
+		push();
+	   newchunk = newchunk line "\n"
+	}
+END     {
+           flush();
+	}
+'
+}
+
+
+# ----------------------------------------------------------------------
 # Utility to find an executable
 # ----------------------------------------------------------------------
 lookfor() {
@@ -803,6 +1018,7 @@
 # ----------------------------------------------------------------------
 opthtml=1; optdiff=0; optchbars=0; optwdiff=0; optshow=0; optnowdiff=0;
 optkeep=0; optinfo=0; optwidth=0;  optnums=0;  optbody=0; optabdiff=0;
+opterrata=0;
 optstrip=1;
 optstdout=0;
 
@@ -817,6 +1033,7 @@
       --abdiff)	opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;;
       --ab-diff)opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;;
       --rfc-editor-diff)opthtml=0; optdiff=0; optchbars=0; optwdiff=0; optabdiff=1;;
+      --errata) opterrata=1;;
       --version)echo -e "$basename\t$version"; exit 0;;
       --browse) optshow=1;;
       --nowdiff)optnowdiff=1;;
@@ -828,6 +1045,7 @@
       --body)	optbody=1;;
       --nostrip)optstrip=0; optbody=0;;
       --stdout) optstdout=1;;
+      --ctxdepth) prelines=$2; shift;;
       --)	shift; break;;
 
       -r) options="$options $1 $2"; rev=$2; shift;;
@@ -947,6 +1165,9 @@
     if [ $optabdiff -gt 0 ]; then
       outfile=$outbase.changes
     fi
+    if [ $opterrata -gt 0 ]; then
+      outfile=$outbase.errata.html
+    fi
 fi
 if [ "$outfile" ]; then
    tempout=$(basename $outfile)
@@ -960,11 +1181,12 @@
    if [ -n "$wdiffbin" ]; then
       wdiffver=$($wdiffbin --version 2>/dev/null | grep "wdiff.\+[0-9]\.[0-9]")
       if [ -z "$wdiffver" ]; then
-        wdiffbin="";
-	echo -en "\n  Found wdiff, but it reported no recognisable version."
+        #wdiffbin="";
+	wdiffver='0.5'
+	echo "unrecognisable wdiff version, assuming 0.5." > /dev/stderr
       fi
    else
-      echo -en "\n  Couldn't find wdiff."
+      echo "\n  Couldn't find wdiff." > /dev/stderr
    fi
    if [ -z "$wdiffbin" ]; then echo " Falling back to builtin diff colouring..."; fi
    export wdiffbin
@@ -997,25 +1219,30 @@
 fi
 
 if [ $opthtml -gt 0 ]; then
-   diff -bBwd -u$prelines 1/$base1 2/$base2 | tee $workdir/diff | htmldiff > $tempout
+   diff -bBwd -U $prelines 1/$base1 2/$base2 | tee $workdir/diff | htmldiff > $tempout
 fi
 if [ $optchbars -gt 0 ]; then
-   diff -bBwd -u10000 1/$base1 2/$base2 | tee $workdir/diff | grep -v "^-" | tail +3 | sed 's/^+/|/' > $tempout
+   diff -bBwd -U 10000 1/$base1 2/$base2 | tee $workdir/diff | grep -v "^-" | tail +3 | sed 's/^+/|/' > $tempout
 fi
 if [ $optdiff -gt 0 ]; then
-   diff -bBwd -u$prelines 1/$base1 2/$base2 | tee $workdir/diff > $tempout
+   diff -bBwd -U $prelines 1/$base1 2/$base2 | tee $workdir/diff > $tempout
 fi
 if [ $optabdiff -gt 0 ]; then
-   diff -wd -u1000 1/$base1 2/$base2 | tee $workdir/diff | abdiff
+   diff -wd -U 1000 1/$base1 2/$base2 | tee $workdir/diff | abdiff > $tempout
+fi
+if [ $opterrata -gt 0 ]; then
+   diff -w -U $prelines 1/$base1 2/$base2 | tee $workdir/diff | erratadiff > $tempout
 fi
 if [ $optwdiff -gt 0 ]; then
    wdiff -a 1/$base1 2/$base2
 fi
 
-if [ $optstdout -gt 0 ]; then
-  cat $tempout; rm $tmpout
+if [ $tempout -a $optstdout -gt 0 ]; then
+  cat $tempout; rm $tempout
 else
+if [ $tempout -a $outfile ]; then
   cd -; if [ -f $workdir/$tempout ]; then mv $workdir/$tempout $outfile; fi
+fi;
 fi
 
 if [ $optshow -gt 0 ]; then
