diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index feef431eb1c..2ef90a87321 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -371,6 +371,13 @@ func inBodyIM(p *parser) (insertionMode, bool) { } else { p.addElement(p.tok.Data, p.tok.Attr) } + case "h1", "h2", "h3", "h4", "h5", "h6": + // TODO: auto-insert

if necessary. + switch n := p.top(); n.Data { + case "h1", "h2", "h3", "h4", "h5", "h6": + p.pop() + } + p.addElement(p.tok.Data, p.tok.Attr) case "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u": p.reconstructActiveFormattingElements() p.addFormattingElement(p.tok.Data, p.tok.Attr) diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index dbfc57f666f..d153533b588 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -11,6 +11,7 @@ import ( "io" "io/ioutil" "os" + "strings" "testing" ) @@ -124,9 +125,14 @@ func TestParser(t *testing.T) { rc := make(chan io.Reader) go readDat(filename, rc) // TODO(nigeltao): Process all test cases, not just a subset. - for i := 0; i < 21; i++ { + for i := 0; i < 22; i++ { // Parse the #data section. - doc, err := Parse(<-rc) + b, err := ioutil.ReadAll(<-rc) + if err != nil { + t.Fatal(err) + } + text := string(b) + doc, err := Parse(strings.NewReader(text)) if err != nil { t.Fatal(err) } @@ -139,13 +145,13 @@ func TestParser(t *testing.T) { t.Fatal(err) } // Compare the parsed tree to the #document section. - b, err := ioutil.ReadAll(<-rc) + b, err = ioutil.ReadAll(<-rc) if err != nil { t.Fatal(err) } expected := string(b) if actual != expected { - t.Errorf("%s test #%d, actual vs expected:\n----\n%s----\n%s----", filename, i, actual, expected) + t.Errorf("%s test #%d %q, actual vs expected:\n----\n%s----\n%s----", filename, i, text, actual, expected) } } } diff --git a/src/pkg/html/token.go b/src/pkg/html/token.go index dc2a6ec5c31..d6388385051 100644 --- a/src/pkg/html/token.go +++ b/src/pkg/html/token.go @@ -277,7 +277,7 @@ func (z *Tokenizer) trim(i int) int { return k } -// lower finds the largest alphabetic [a-zA-Z]* word at the start of z.buf[i:] +// lower finds the largest alphabetic [0-9A-Za-z]* word at the start of z.buf[i:] // and returns that word lower-cased, as well as the trimmed cursor location // after that word. func (z *Tokenizer) lower(i int) ([]byte, int) { @@ -285,8 +285,9 @@ func (z *Tokenizer) lower(i int) ([]byte, int) { loop: for ; i < z.p1; i++ { c := z.buf[i] - // TODO(nigeltao): Check what '0' <= c && c <= '9' should do. switch { + case '0' <= c && c <= '9': + // No-op. case 'A' <= c && c <= 'Z': z.buf[i] = c + 'a' - 'A' case 'a' <= c && c <= 'z':