2010-12-06 18:02:36 -07:00
|
|
|
// Copyright 2010 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package html
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
2011-11-01 20:04:37 -06:00
|
|
|
"errors"
|
2010-12-06 18:02:36 -07:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
2010-12-14 17:39:56 -07:00
|
|
|
"strings"
|
2010-12-06 18:02:36 -07:00
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func pipeErr(err error) io.Reader {
|
2010-12-06 18:02:36 -07:00
|
|
|
pr, pw := io.Pipe()
|
|
|
|
pw.CloseWithError(err)
|
|
|
|
return pr
|
|
|
|
}
|
|
|
|
|
|
|
|
func readDat(filename string, c chan io.Reader) {
|
2011-11-06 15:38:40 -07:00
|
|
|
defer close(c)
|
2011-04-05 00:42:14 -06:00
|
|
|
f, err := os.Open("testdata/webkit/" + filename)
|
2010-12-06 18:02:36 -07:00
|
|
|
if err != nil {
|
|
|
|
c <- pipeErr(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
// Loop through the lines of the file. Each line beginning with "#" denotes
|
|
|
|
// a new section, which is returned as a separate io.Reader.
|
|
|
|
r := bufio.NewReader(f)
|
|
|
|
var pw *io.PipeWriter
|
|
|
|
for {
|
|
|
|
line, err := r.ReadSlice('\n')
|
|
|
|
if err != nil {
|
|
|
|
if pw != nil {
|
|
|
|
pw.CloseWithError(err)
|
|
|
|
pw = nil
|
|
|
|
} else {
|
|
|
|
c <- pipeErr(err)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if len(line) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if line[0] == '#' {
|
|
|
|
if pw != nil {
|
|
|
|
pw.Close()
|
|
|
|
}
|
|
|
|
var pr *io.PipeReader
|
|
|
|
pr, pw = io.Pipe()
|
|
|
|
c <- pr
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if line[0] != '|' {
|
|
|
|
// Strip the trailing '\n'.
|
|
|
|
line = line[:len(line)-1]
|
|
|
|
}
|
|
|
|
if pw != nil {
|
|
|
|
if _, err := pw.Write(line); err != nil {
|
|
|
|
pw.CloseWithError(err)
|
|
|
|
pw = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-24 16:33:15 -06:00
|
|
|
func dumpIndent(w io.Writer, level int) {
|
2010-12-06 18:02:36 -07:00
|
|
|
io.WriteString(w, "| ")
|
|
|
|
for i := 0; i < level; i++ {
|
|
|
|
io.WriteString(w, " ")
|
|
|
|
}
|
2011-10-24 16:33:15 -06:00
|
|
|
}
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func dumpLevel(w io.Writer, n *Node, level int) error {
|
2011-10-24 16:33:15 -06:00
|
|
|
dumpIndent(w, level)
|
2010-12-06 18:02:36 -07:00
|
|
|
switch n.Type {
|
|
|
|
case ErrorNode:
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("unexpected ErrorNode")
|
2010-12-06 18:02:36 -07:00
|
|
|
case DocumentNode:
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("unexpected DocumentNode")
|
2010-12-06 18:02:36 -07:00
|
|
|
case ElementNode:
|
2011-10-18 15:03:30 -06:00
|
|
|
fmt.Fprintf(w, "<%s>", n.Data)
|
2011-10-24 16:33:15 -06:00
|
|
|
for _, a := range n.Attr {
|
|
|
|
io.WriteString(w, "\n")
|
|
|
|
dumpIndent(w, level+1)
|
|
|
|
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
|
|
|
|
}
|
2010-12-06 18:02:36 -07:00
|
|
|
case TextNode:
|
2011-10-18 15:03:30 -06:00
|
|
|
fmt.Fprintf(w, "%q", n.Data)
|
2010-12-06 18:02:36 -07:00
|
|
|
case CommentNode:
|
2011-10-19 18:45:30 -06:00
|
|
|
fmt.Fprintf(w, "<!-- %s -->", n.Data)
|
2011-07-31 18:26:46 -06:00
|
|
|
case DoctypeNode:
|
2011-10-18 15:03:30 -06:00
|
|
|
fmt.Fprintf(w, "<!DOCTYPE %s>", n.Data)
|
2011-07-20 19:20:54 -06:00
|
|
|
case scopeMarkerNode:
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("unexpected scopeMarkerNode")
|
2010-12-06 18:02:36 -07:00
|
|
|
default:
|
2011-11-01 20:04:37 -06:00
|
|
|
return errors.New("unknown node type")
|
2010-12-06 18:02:36 -07:00
|
|
|
}
|
|
|
|
io.WriteString(w, "\n")
|
|
|
|
for _, c := range n.Child {
|
|
|
|
if err := dumpLevel(w, c, level+1); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2011-11-01 20:04:37 -06:00
|
|
|
func dump(n *Node) (string, error) {
|
2010-12-06 18:02:36 -07:00
|
|
|
if n == nil || len(n.Child) == 0 {
|
|
|
|
return "", nil
|
|
|
|
}
|
|
|
|
b := bytes.NewBuffer(nil)
|
2010-12-07 14:59:20 -07:00
|
|
|
for _, child := range n.Child {
|
|
|
|
if err := dumpLevel(b, child, 0); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
2010-12-06 18:02:36 -07:00
|
|
|
}
|
|
|
|
return b.String(), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestParser(t *testing.T) {
|
2011-11-06 15:38:40 -07:00
|
|
|
testFiles := []struct {
|
|
|
|
filename string
|
|
|
|
// n is the number of test cases to run from that file.
|
|
|
|
// -1 means all test cases.
|
|
|
|
n int
|
|
|
|
}{
|
|
|
|
// TODO(nigeltao): Process all the test cases from all the .dat files.
|
2011-11-08 15:43:55 -07:00
|
|
|
{"tests1.dat", 92},
|
2011-11-06 15:38:40 -07:00
|
|
|
{"tests2.dat", 0},
|
|
|
|
{"tests3.dat", 0},
|
2010-12-06 18:02:36 -07:00
|
|
|
}
|
2011-11-06 15:38:40 -07:00
|
|
|
for _, tf := range testFiles {
|
2010-12-06 18:02:36 -07:00
|
|
|
rc := make(chan io.Reader)
|
2011-11-06 15:38:40 -07:00
|
|
|
go readDat(tf.filename, rc)
|
|
|
|
for i := 0; i != tf.n; i++ {
|
2010-12-06 18:02:36 -07:00
|
|
|
// Parse the #data section.
|
2011-11-06 15:38:40 -07:00
|
|
|
dataReader := <-rc
|
|
|
|
if dataReader == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
b, err := ioutil.ReadAll(dataReader)
|
2010-12-14 17:39:56 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
text := string(b)
|
|
|
|
doc, err := Parse(strings.NewReader(text))
|
2010-12-06 18:02:36 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2011-10-09 21:44:37 -06:00
|
|
|
got, err := dump(doc)
|
2010-12-06 18:02:36 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
// Skip the #error section.
|
2011-04-27 16:47:04 -06:00
|
|
|
if _, err := io.Copy(ioutil.Discard, <-rc); err != nil {
|
2010-12-06 18:02:36 -07:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
// Compare the parsed tree to the #document section.
|
2010-12-14 17:39:56 -07:00
|
|
|
b, err = ioutil.ReadAll(<-rc)
|
2010-12-06 18:02:36 -07:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2011-10-09 21:44:37 -06:00
|
|
|
if want := string(b); got != want {
|
2011-11-06 15:38:40 -07:00
|
|
|
t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want)
|
2011-10-12 18:53:15 -06:00
|
|
|
continue
|
2011-10-09 21:44:37 -06:00
|
|
|
}
|
2011-11-01 16:42:25 -06:00
|
|
|
if renderTestBlacklist[text] {
|
2011-10-23 01:36:01 -06:00
|
|
|
continue
|
|
|
|
}
|
2011-11-01 16:42:25 -06:00
|
|
|
// Check that rendering and re-parsing results in an identical tree.
|
2011-10-09 21:44:37 -06:00
|
|
|
pr, pw := io.Pipe()
|
|
|
|
go func() {
|
|
|
|
pw.CloseWithError(Render(pw, doc))
|
|
|
|
}()
|
|
|
|
doc1, err := Parse(pr)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
got1, err := dump(doc1)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if got != got1 {
|
2011-11-06 15:38:40 -07:00
|
|
|
t.Errorf("%s test #%d %q, got vs got1:\n----\n%s----\n%s----", tf.filename, i, text, got, got1)
|
2011-10-12 18:53:15 -06:00
|
|
|
continue
|
2010-12-06 18:02:36 -07:00
|
|
|
}
|
|
|
|
}
|
2011-11-06 15:38:40 -07:00
|
|
|
// Drain any untested cases for the test file.
|
|
|
|
for r := range rc {
|
|
|
|
if _, err := ioutil.ReadAll(r); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
2010-12-06 18:02:36 -07:00
|
|
|
}
|
|
|
|
}
|
2011-11-01 16:42:25 -06:00
|
|
|
|
|
|
|
// Some test input result in parse trees are not 'well-formed' despite
|
|
|
|
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
|
|
|
|
// tree will not result in an exact clone of that tree. We blacklist such
|
|
|
|
// inputs from the render test.
|
|
|
|
var renderTestBlacklist = map[string]bool{
|
|
|
|
// The second <a> will be reparented to the first <table>'s parent. This
|
|
|
|
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
|
|
|
|
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
|
2011-11-08 15:43:55 -07:00
|
|
|
// More cases of <a> being reparented:
|
2011-11-01 16:42:25 -06:00
|
|
|
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
|
2011-11-08 15:43:55 -07:00
|
|
|
`<a><table><a></table><p><a><div><a>`: true,
|
2011-11-01 16:42:25 -06:00
|
|
|
}
|