From 1b9734b995f3d79ec9a412d7e33d3d50ad1a32be Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Thu, 18 Jun 2009 17:06:08 -0700 Subject: [PATCH] 1) Fix a problem with tabwriter.Flush: any pending text not yet in a cell makes a final cell in that line (this showed up as occasionally missing single spaces in godoc-formatted declarations that fit on a single line) 2) Cleaned up tabwriter implementation a bit: - replaced local unicodeLen() with utf8.RuneCount() - instead of having 2 parallel arrays for line widths and sizes, have a single array of cells containing a width and size - factored code a bit better - added more comments - added testnames to tabwriter tests - added more test cases and fixed a broken test case that now works correctly R=r DELTA=279 (133 added, 62 deleted, 84 changed) OCL=30509 CL=30514 --- src/pkg/tabwriter/tabwriter.go | 241 ++++++++++++++++------------ src/pkg/tabwriter/tabwriter_test.go | 72 +++++++-- 2 files changed, 192 insertions(+), 121 deletions(-) diff --git a/src/pkg/tabwriter/tabwriter.go b/src/pkg/tabwriter/tabwriter.go index 6799f72d10..aa942d482e 100644 --- a/src/pkg/tabwriter/tabwriter.go +++ b/src/pkg/tabwriter/tabwriter.go @@ -20,6 +20,16 @@ import ( // ---------------------------------------------------------------------------- // Filter implementation +// A cell represents a segment of text delineated by tabs, form-feed, +// or newline chars. The text itself is stored in a separate buffer; +// cell only describes the segment's size in bytes and width in runes. +// +type cell struct { + size int; // cell size in bytes + width int; // cell width in runes +} + + // A Writer is a filter that inserts padding around // tab-delimited columns in its input to align them // in the output. @@ -37,8 +47,8 @@ import ( // UTF-8 characters. // // If a Writer is configured to filter HTML, HTML tags and entities -// are simply passed through and their widths are assumed to be zero -// for formatting purposes. +// are simply passed through. The widths of tags and entities are +// assumed to be zero (tags) and one (entities) for formatting purposes. // // The form feed character ('\f') acts like a newline but it also // terminates all columns in the current line (effectively calling @@ -59,22 +69,42 @@ type Writer struct { flags uint; // current state - html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0) buf io.ByteBuffer; // collected text w/o tabs, newlines, or form feed chars - size int; // size of incomplete cell in bytes - width int; // width of incomplete cell in runes up to buf[pos] w/o ignored sections pos int; // buffer position up to which width of incomplete cell has been computed - lines_size vector.Vector; // list of lines; each line is a list of cell sizes in bytes - lines_width vector.Vector; // list of lines; each line is a list of cell widths in runes + cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections + html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0) + lines vector.Vector; // list if lines; each line is a list of cells widths vector.IntVector; // list of column widths in runes - re-used during formatting } +func (b *Writer) addLine() { + b.lines.Push(vector.New(0)); +} + + +func (b *Writer) line(i int) *vector.Vector { + return b.lines.At(i).(*vector.Vector); +} + + +// Reset the current state. +func (b *Writer) reset() { + b.buf.Reset(); + b.pos = 0; + b.cell = cell{}; + b.html_char = 0; + b.lines.Init(0); + b.widths.Init(0); + b.addLine(); +} + + // Internal representation (current state): // // - all text written is appended to buf; form feed chars, tabs and newlines are stripped away // - at any given time there is a (possibly empty) incomplete cell at the end -// (the cell starts after a tab or newline) +// (the cell starts after a tab, form feed, or newline) // - size is the number of bytes belonging to the cell so far // - width is text width in runes of that cell from the start of the cell to // position pos; html tags and entities are excluded from this width if html @@ -94,12 +124,6 @@ type Writer struct { // buf start of incomplete cell pos -func (b *Writer) addLine() { - b.lines_size.Push(vector.NewIntVector(0)); - b.lines_width.Push(vector.NewIntVector(0)); -} - - // Formatting can be controlled with these flags. const ( // Ignore html tags and treat entities (starting with '&' @@ -144,22 +168,12 @@ func (b *Writer) Init(output io.Writer, cellwidth, padding int, padchar byte, fl } b.flags = flags; - b.lines_size.Init(0); - b.lines_width.Init(0); - b.widths.Init(0); - b.addLine(); // the very first line + b.reset(); return b; } -func (b *Writer) line(i int) (*vector.IntVector, *vector.IntVector) { - return - b.lines_size.At(i).(*vector.IntVector), - b.lines_width.At(i).(*vector.IntVector); -} - - // debugging support (keep code around) /* func (b *Writer) dump() { @@ -219,19 +233,19 @@ func (b *Writer) writePadding(textw, cellw int) os.Error { func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) { pos := pos0; for i := line0; i < line1; i++ { - line_size, line_width := b.line(i); - for j := 0; j < line_size.Len(); j++ { - s, w := line_size.At(j), line_width.At(j); + line := b.line(i); + for j := 0; j < line.Len(); j++ { + c := line.At(j).(cell); switch { default: // align left - if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil { + if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil { return pos, err; } - pos += s; + pos += c.size; if j < b.widths.Len() { - if err := b.writePadding(w, b.widths.At(j)); err != nil { + if err := b.writePadding(c.width, b.widths.At(j)); err != nil { return pos, err; } } @@ -239,24 +253,24 @@ func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) { case b.flags & AlignRight != 0: // align right if j < b.widths.Len() { - if err := b.writePadding(w, b.widths.At(j)); err != nil { + if err := b.writePadding(c.width, b.widths.At(j)); err != nil { return pos, err; } } - if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil { + if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil { return pos, err; } - pos += s; + pos += c.size; } } - if i+1 == b.lines_size.Len() { + if i+1 == b.lines.Len() { // last buffered line - we don't have a newline, so just write // any outstanding buffered data - if err := b.write0(b.buf.Data()[pos : pos + b.size]); err != nil { + if err := b.write0(b.buf.Data()[pos : pos + b.cell.size]); err != nil { return pos, err; } - pos += b.size; + pos += b.cell.size; } else { // not the last line - write newline if err := b.write0(newline); err != nil { @@ -273,9 +287,9 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) { column := b.widths.Len(); last := line0; for this := line0; this < line1; this++ { - line_size, line_width := b.line(this); + line := b.line(this); - if column < line_size.Len() - 1 { + if column < line.Len() - 1 { // cell exists in this column // (note that the last cell per line is ignored) @@ -289,10 +303,10 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) { // column block begin width := b.cellwidth; // minimal width for ; this < line1; this++ { - line_size, line_width = b.line(this); - if column < line_size.Len() - 1 { + line = b.line(this); + if column < line.Len() - 1 { // cell exists in this column => update width - w := line_width.At(column) + b.padding; + w := line.At(column).(cell).width + b.padding; if w > width { width = w; } @@ -316,40 +330,77 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) { } +// Append text to current cell. Only update the cell width if updateWidth +// is set (the cell width can only be updated if we know that we cannot be +// in the middle of a UTF-8 encoded Unicode character). +// +func (b *Writer) append(text []byte, updateWidth bool) { + b.buf.Write(text); + b.cell.size += len(text); + if updateWidth { + b.cell.width += utf8.RuneCount(b.buf.Data()[b.pos : b.buf.Len()]); + b.pos = b.buf.Len(); + } +} + + +// Start HTML-escape mode. +func (b *Writer) startHTML(ch byte) { + if ch == '<' { + b.html_char = '>'; + } else { + b.html_char = ';'; + } +} + + +// Terminate HTML-escape mode. If the HTML text was an entity, its width +// is assumed to be one for formatting purposes; otherwise it assumed to +// be zero. +// +func (b *Writer) terminateHTML() { + if b.html_char == ';' { + // was entity, count as one rune + b.cell.width++; + } + b.pos = b.buf.Len(); + b.html_char = 0; +} + + +// Terminate the current cell by adding it to the list of cells of the +// current line. Returns the number of cells in that line. +// +func (b *Writer) terminateCell() int { + line := b.line(b.lines.Len() - 1); + line.Push(b.cell); + b.cell = cell{}; + return line.Len(); +} + + // Flush should be called after the last call to Write to ensure -// that any data buffered in the Writer is written to output. +// that any data buffered in the Writer is written to output. Any +// incomplete HTML tag or entity at the end is simply considered +// complete for formatting purposes. // func (b *Writer) Flush() os.Error { - _, err := b.format(0, 0, b.lines_size.Len()); - // reset (even in the presence of errors) - b.buf.Reset(); - b.size, b.width = 0, 0; - b.pos = 0; - b.lines_size.Init(0); - b.lines_width.Init(0); - b.addLine(); - return err; -} - - -func unicodeLen(buf []byte) int { - l := 0; - for i := 0; i < len(buf); { - if buf[i] < utf8.RuneSelf { - i++; - } else { - rune, size := utf8.DecodeRune(buf[i : len(buf)]); - i += size; + // add current cell if not empty + if b.cell.size > 0 { + if b.html_char != 0 { + // inside html tag/entity - terminate it even if incomplete + b.terminateHTML(); } - l++; + b.terminateCell(); } - return l; -} + // format contents of buffer + _, err := b.format(0, 0, b.lines.Len()); -func (b *Writer) append(buf []byte) { - b.buf.Write(buf); - b.size += len(buf); + // reset, even in the presence of errors + b.reset(); + + return err; } @@ -358,31 +409,21 @@ func (b *Writer) append(buf []byte) { // while writing to the underlying output stream. // func (b *Writer) Write(buf []byte) (written int, err os.Error) { - i0, n := 0, len(buf); - // split text into cells - for i := 0; i < n; i++ { - ch := buf[i]; - + i0 := 0; + for i, ch := range buf { if b.html_char == 0 { // outside html tag/entity switch ch { case '\t', '\n', '\f': - b.append(buf[i0 : i]); - i0 = i + 1; // exclude ch from (next) cell - b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]); - b.pos = b.buf.Len(); - - // terminate cell - last_size, last_width := b.line(b.lines_size.Len() - 1); - last_size.Push(b.size); - last_width.Push(b.width); - b.size, b.width = 0, 0; - + // end of cell + b.append(buf[i0 : i], true); + i0 = i+1; // exclude ch from (next) cell + ncells := b.terminateCell(); if ch != '\t' { // terminate line b.addLine(); - if ch == '\f' || last_size.Len() == 1 { + if ch == '\f' || ncells == 1 { // A '\f' always forces a flush. Otherwise, if the previous // line has only one cell which does not have an impact on // the formatting of the following lines (the last cell per @@ -395,37 +436,29 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) { } case '<', '&': + // possibly an html tag/entity if b.flags & FilterHTML != 0 { - b.append(buf[i0 : i]); + // begin of tag/entity + b.append(buf[i0 : i], true); i0 = i; - b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]); - b.pos = -1; // preventative - should not be used (will cause index out of bounds) - if ch == '<' { - b.html_char = '>'; - } else { - b.html_char = ';'; - } + b.startHTML(ch); } } } else { // inside html tag/entity if ch == b.html_char { - // reached the end of tag/entity - b.append(buf[i0 : i + 1]); - i0 = i + 1; - if b.html_char == ';' { - b.width++; // count as one char - } - b.pos = b.buf.Len(); - b.html_char = 0; + // end of tag/entity + b.append(buf[i0 : i+1], false); + i0 = i+1; // exclude ch from (next) cell + b.terminateHTML(); } } } // append leftover text - b.append(buf[i0 : n]); - return n, nil; + b.append(buf[i0 : len(buf)], false); + return len(buf), nil; } diff --git a/src/pkg/tabwriter/tabwriter_test.go b/src/pkg/tabwriter/tabwriter_test.go index 7026446e62..7967a46743 100644 --- a/src/pkg/tabwriter/tabwriter_test.go +++ b/src/pkg/tabwriter/tabwriter_test.go @@ -47,31 +47,31 @@ func (b *buffer) String() string { } -func write(t *testing.T, w *tabwriter.Writer, src string) { +func write(t *testing.T, testname string, w *tabwriter.Writer, src string) { written, err := io.WriteString(w, src); if err != nil { - t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err); + t.Errorf("--- test: %s\n--- src:\n%s\n--- write error: %v\n", testname, src, err); } if written != len(src) { - t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src)); + t.Errorf("--- test: %s\n--- src:\n%s\n--- written = %d, len(src) = %d\n", testname, src, written, len(src)); } } -func verify(t *testing.T, w *tabwriter.Writer, b *buffer, src, expected string) { +func verify(t *testing.T, testname string, w *tabwriter.Writer, b *buffer, src, expected string) { err := w.Flush(); if err != nil { - t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err); + t.Errorf("--- test: %s\n--- src:\n%s\n--- flush error: %v\n", testname, src, err); } res := b.String(); if res != expected { - t.Errorf("--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", src, res, expected) + t.Errorf("--- test: %s\n--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", testname, src, res, expected) } } -func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, expected string) { +func check(t *testing.T, testname string, tabwidth, padding int, padchar byte, flags uint, src, expected string) { var b buffer; b.init(1000); @@ -80,30 +80,31 @@ func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, e // write all at once b.clear(); - write(t, &w, src); - verify(t, &w, &b, src, expected); + write(t, testname, &w, src); + verify(t, testname, &w, &b, src, expected); // write byte-by-byte b.clear(); for i := 0; i < len(src); i++ { - write(t, &w, src[i : i+1]); + write(t, testname, &w, src[i : i+1]); } - verify(t, &w, &b, src, expected); + verify(t, testname, &w, &b, src, expected); // write using Fibonacci slice sizes b.clear(); for i, d := 0, 0; i < len(src); { - write(t, &w, src[i : i+d]); + write(t, testname, &w, src[i : i+d]); i, d = i+d, d+1; if i+d > len(src) { d = len(src) - i; } } - verify(t, &w, &b, src, expected); + verify(t, testname, &w, &b, src, expected); } type entry struct { + testname string; tabwidth, padding int; padchar byte; flags uint; @@ -113,108 +114,133 @@ type entry struct { var tests = []entry { entry{ + "1", 8, 1, '.', 0, "", "" }, entry{ + "2", 8, 1, '.', 0, "\n\n\n", "\n\n\n" }, entry{ + "3", 8, 1, '.', 0, "a\nb\nc", "a\nb\nc" }, entry{ + "4a", 8, 1, '.', 0, "\t", // '\t' terminates an empty cell on last line - nothing to print "" }, entry{ + "4b", 8, 1, '.', tabwriter.AlignRight, "\t", // '\t' terminates an empty cell on last line - nothing to print "" }, entry{ + "5", 8, 1, '.', 0, "*\t*", - "**" + "*.......*" }, entry{ + "5b", 8, 1, '.', 0, "*\t*\n", "*.......*\n" }, entry{ + "5c", 8, 1, '.', 0, "*\t*\t", "*.......*" }, entry{ + "5d", 8, 1, '.', tabwriter.AlignRight, "*\t*\t", ".......**" }, entry{ + "6", 8, 1, '.', 0, "\t\n", "........\n" }, entry{ + "7a", 8, 1, '.', 0, "a) foo", "a) foo" }, entry{ + "7b", 8, 1, ' ', 0, - "b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed - "b) foobar" + "b) foo\tbar", + "b) foo bar" }, entry{ + "7c", 8, 1, '.', 0, "c) foo\tbar\t", "c) foo..bar" }, entry{ + "7d", 8, 1, '.', 0, "d) foo\tbar\n", "d) foo..bar\n" }, entry{ + "7e", 8, 1, '.', 0, "e) foo\tbar\t\n", "e) foo..bar.....\n" }, entry{ + "7f", 8, 1, '.', tabwriter.FilterHTML, "f) f<o\tbar\t\n", "f) f<o..bar.....\n" }, entry{ + "7g", + 8, 1, '.', tabwriter.FilterHTML, + "g) f<o\tbar\t non-terminated entity &", + "g) f<o..bar..... non-terminated entity &" + }, + + entry{ + "8", 8, 1, '*', 0, "Hello, world!\n", "Hello, world!\n" }, entry{ + "9a", 0, 0, '.', 0, "1\t2\t3\t4\n" "11\t222\t3333\t44444\n", @@ -224,6 +250,7 @@ var tests = []entry { }, entry{ + "9b", 0, 0, '.', tabwriter.FilterHTML, "1\t2\t3\t4\n" // \f inside HTML is ignored "11\t222\t3333\t44444\n", @@ -233,6 +260,7 @@ var tests = []entry { }, entry{ + "9c", 0, 0, '.', 0, "1\t2\t3\t4\f" // \f causes a newline and flush "11\t222\t3333\t44444\n", @@ -242,18 +270,21 @@ var tests = []entry { }, entry{ + "10a", 5, 0, '.', 0, "1\t2\t3\t4\n", "1....2....3....4\n" }, entry{ + "10b", 5, 0, '.', 0, "1\t2\t3\t4\t\n", "1....2....3....4....\n" }, entry{ + "11", 8, 1, '.', 0, "本\tb\tc\n" "aa\t\u672c\u672c\u672c\tcccc\tddddd\n" @@ -265,6 +296,7 @@ var tests = []entry { }, entry{ + "12a", 8, 1, ' ', tabwriter.AlignRight, "a\tè\tc\t\n" "aa\tèèè\tcccc\tddddd\t\n" @@ -276,6 +308,7 @@ var tests = []entry { }, entry{ + "12b", 2, 0, ' ', 0, "a\tb\tc\n" "aa\tbbb\tcccc\n" @@ -287,6 +320,7 @@ var tests = []entry { }, entry{ + "12c", 8, 1, '_', 0, "a\tb\tc\n" "aa\tbbb\tcccc\n" @@ -298,6 +332,7 @@ var tests = []entry { }, entry{ + "13a", 4, 1, '-', 0, "4444\t日本語\t22\t1\t333\n" "999999999\t22\n" @@ -317,6 +352,7 @@ var tests = []entry { }, entry{ + "13b", 4, 3, '.', 0, "4444\t333\t22\t1\t333\n" "999999999\t22\n" @@ -336,6 +372,7 @@ var tests = []entry { }, entry{ + "13c", 8, 1, '\t', tabwriter.FilterHTML, "4444\t333\t22\t1\t333\n" "999999999\t22\n" @@ -355,6 +392,7 @@ var tests = []entry { }, entry{ + "14", 0, 2, ' ', tabwriter.AlignRight, ".0\t.3\t2.4\t-5.1\t\n" "23.0\t12345678.9\t2.4\t-989.4\t\n" @@ -375,6 +413,6 @@ var tests = []entry { func Test(t *testing.T) { for _, e := range tests { - check(t, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected); + check(t, e.testname, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected); } }