permit escaped text segments which pass through tabwriter

undisturbed and uninterpreted R=rsc DELTA=141 (82 added, 23 deleted, 36 changed) OCL=35747 CL=35769
2024-11-26 04:07:59 -07:00 · 2009-10-15 09:28:52 -07:00 · 2009-10-15 09:28:52 -07:00 · b55e6d1ba5
commit b55e6d1ba5
parent da829d6608
2 changed files with 110 additions and 51 deletions
--- a/src/pkg/tabwriter/tabwriter.go
+++ b/src/pkg/tabwriter/tabwriter.go
@ -51,6 +51,11 @@ type cell struct {
 // terminated by horizontal (or "hard") tabs are not affected by
 // this flag.
 //
 // A segment of text may be escaped by bracketing it with Escape
 // characters. The tabwriter strips the Escape characters but otherwise
 // passes escaped text segments through unchanged. In particular, it
 // does not interpret any tabs or line breaks within the segment.
 //
 // The Writer assumes that all characters have the same width;
 // this may not be true in some fonts, especially with certain
 // UTF-8 characters.
@ -62,8 +67,8 @@ type cell struct {
 // The formfeed character ('\f') acts like a newline but it also
 // terminates all columns in the current line (effectively calling
 // Flush). Cells in the next line start new columns. Unless found
-// inside an HTML tag, formfeed characters appear as newlines in
+// inside an HTML tag or inside an escaped text segment, formfeed
-// the output.
+// characters appear as newlines in the output.
 //
 // The Writer must buffer input internally, because proper spacing
 // of one line may depend on the cells in future lines. Clients must
@ -81,7 +86,7 @@ type Writer struct {
 	buf bytes.Buffer;  // collected text w/o tabs, newlines, or formfeed chars
 	pos int;  // buffer position up to which width of incomplete cell has been computed
 	cell cell;  // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
-	html_char byte;  // terminating char of html tag/entity, or 0 ('>', ';', or 0)
+	endChar byte;  // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
 	lines vector.Vector;  // list if lines; each line is a list of cells
 	widths vector.IntVector;  // list of column widths in runes - re-used during formatting
 }
@ -102,7 +107,7 @@ func (b *Writer) reset() {
 	b.buf.Reset();
 	b.pos = 0;
 	b.cell = cell{};
-	b.html_char = 0;
+	b.endChar = 0;
 	b.lines.Init(0);
 	b.widths.Init(0);
 	b.addLine();
@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
 }
-// Append text to current cell. Only update the cell width if updateWidth
+// Append text to current cell.
-// is set (the cell width can only be updated if we know that we cannot be
+func (b *Writer) append(text []byte) {
 // in the middle of a UTF-8 encoded Unicode character).
 //
 func (b *Writer) append(text []byte, updateWidth bool) {
 	b.buf.Write(text);
 	b.cell.size += len(text);
-	if updateWidth {
+}
 // Update the cell width.
 func (b *Writer) updateWidth() {
 	b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
 	b.pos = b.buf.Len();
 	}
 }
-// Start HTML-escape mode.
+// To escape a text segment, bracket it with Escape characters.
-func (b *Writer) startHTML(ch byte) {
+// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
-	if ch == '<' {
+// does not terminate a cell and constitutes a single character of
-		b.html_char = '>';
+// width one for formatting purposes.
 	} else {
 		b.html_char = ';';
 	}
 }
 // Terminate HTML-escape mode. If the HTML text was an entity, its width
 // is assumed to be one for formatting purposes; otherwise it assumed to
 // be zero.
 //
-func (b *Writer) terminateHTML() {
+// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
-	if b.html_char == ';' {
+//
-		// was entity, count as one rune
+const Escape ='\xff'
-		b.cell.width++;
+
 // Start escaped mode.
 func (b *Writer) startEscape(ch byte) {
 	switch ch {
 	case Escape: b.endChar = Escape;
 	case '<': b.endChar = '>';
 	case '&': b.endChar = ';';
 	}
 }
 // Terminate escaped mode. If the escaped text was an HTML tag, its width
 // is assumed to be zero for formatting purposes; if it was an HTML entity,
 // its width is assumed to be one. In all other cases, the width is the
 // unicode width of the text.
 //
 func (b *Writer) endEscape() {
 	switch b.endChar {
 	case Escape: b.updateWidth();
 	case '>': // tag of zero width
 	case ';': b.cell.width++;  // entity, count as one rune
 	}
 	b.pos = b.buf.Len();
-	b.html_char = 0;
+	b.endChar = 0;
 }
@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int {
 // Flush should be called after the last call to Write to ensure
 // that any data buffered in the Writer is written to output. Any
-// incomplete HTML tag or entity at the end is simply considered
+// incomplete escape sequence at the end is simply considered
 // complete for formatting purposes.
 //
 func (b *Writer) Flush() os.Error {
 	// add current cell if not empty
 	if b.cell.size > 0 {
-		if b.html_char != 0 {
+		if b.endChar != 0 {
-			// inside html tag/entity - terminate it even if incomplete
+			// inside escape - terminate it even if incomplete
-			b.terminateHTML();
+			b.endEscape();
 		}
 		b.terminateCell(false);
 	}
@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error {
 // The only errors returned are ones encountered
 // while writing to the underlying output stream.
 //
-func (b *Writer) Write(buf []byte) (written int, err os.Error) {
+func (b *Writer) Write(buf []byte) (n int, err os.Error) {
 	// split text into cells
-	i0 := 0;
+	n = 0;
 	for i, ch := range buf {
-		if b.html_char == 0 {
+		if b.endChar == 0 {
-			// outside html tag/entity
+			// outside escape
 			switch ch {
 			case '\t', '\v', '\n', '\f':
 				// end of cell
-				b.append(buf[i0 : i], true);
+				b.append(buf[n : i]);
-				i0 = i+1;  // exclude ch from (next) cell
+				b.updateWidth();
 				n = i+1;  // ch consumed
 				ncells := b.terminateCell(ch == '\t');
 				if ch == '\n' || ch == '\f' {
 					// terminate line
@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
 						// line is ignored by format()), thus we can flush the
 						// Writer contents.
 						if err = b.Flush(); err != nil {
-							return i0, err;
+							return;
 						}
 					}
 				}
 			case Escape:
 				// start of escaped sequence
 				b.append(buf[n : i]);
 				b.updateWidth();
 				n = i+1;  // exclude Escape
 				b.startEscape(Escape);
 			case '<', '&':
 				// possibly an html tag/entity
 				if b.flags & FilterHTML != 0 {
 					// begin of tag/entity
-					b.append(buf[i0 : i], true);
+					b.append(buf[n : i]);
-					i0 = i;
+					b.updateWidth();
-					b.startHTML(ch);
+					n = i;
 					b.startEscape(ch);
 				}
 			}
 		} else {
-			// inside html tag/entity
+			// inside escape
-			if ch == b.html_char {
+			if ch == b.endChar {
 				// end of tag/entity
-				b.append(buf[i0 : i+1], false);
+				j := i+1;
-				i0 = i+1;  // exclude ch from (next) cell
+				if ch == Escape {
-				b.terminateHTML();
+					j = i;  // exclude Escape
 				}
 				b.append(buf[n : j]);
 				n = i+1;  // ch consumed
 				b.endEscape();
 			}
 		}
 	}
 	// append leftover text
-	b.append(buf[i0 : len(buf)], false);
+	b.append(buf[n : len(buf)]);
-	return len(buf), nil;
+	n = len(buf);
 	return;
 }
--- a/src/pkg/tabwriter/tabwriter_test.go
+++ b/src/pkg/tabwriter/tabwriter_test.go
@ -113,19 +113,47 @@ type entry struct {
 var tests = []entry {
 	entry{
-		"1",
+		"1a",
 		8, 1, '.', 0,
 		"",
 		""
 	},
 	entry{
-		"1 debug",
+		"1a debug",
 		8, 1, '.', Debug,
 		"",
 		""
 	},
 	entry{
 		"1b esc",
 		8, 1, '.', 0,
 		"\xff\xff",
 		""
 	},
 	entry{
 		"1c esc",
 		8, 1, '.', 0,
 		"\xff\t\xff",
 		"\t"
 	},
 	entry{
 		"1d esc",
 		8, 1, '.', 0,
 		"\xff\"foo\t\n\tbar\"\xff",
 		"\"foo\t\n\tbar\"",
 	},
 	entry{
 		"1e esc",
 		8, 1, '.', 0,
 		"abc\xff\tdef",  // unterminated escape
 		"abc\tdef",
 	},
 	entry{
 		"2",
 		8, 1, '.', 0,