1
0
mirror of https://github.com/golang/go synced 2024-09-24 15:30:13 -06:00

html: a first step at parsing foreign content (MathML, SVG).

Nodes now have a Namespace field.

Pass adoption01.dat, test 12:
<a><svg><tr><input></a>

| <html>
|   <head>
|   <body>
|     <a>
|       <svg svg>
|         <svg tr>
|           <svg input>

The other adoption01.dat tests already passed.

R=andybalholm
CC=golang-dev
https://golang.org/cl/5467075
This commit is contained in:
Nigel Tao 2011-12-13 13:52:47 +11:00
parent 0643aacee9
commit b9064fb132
5 changed files with 127 additions and 12 deletions

View File

@ -11,6 +11,7 @@ GOFILES=\
doctype.go\
entity.go\
escape.go\
foreign.go\
node.go\
parse.go\
render.go\

56
src/pkg/html/foreign.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"font": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// TODO: add look-up tables for MathML and SVG adjustments.

View File

@ -24,14 +24,15 @@ var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// contain a slice of Attributes. Data is unescaped, so that it looks like
// "a<b" rather than "a&lt;b".
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b".
type Node struct {
Parent *Node
Child []*Node
Type NodeType
Data string
Attr []Attribute
Parent *Node
Child []*Node
Type NodeType
Data string
Namespace string
Attr []Attribute
}
// Add adds a node as a child of n.

View File

@ -192,9 +192,10 @@ func (p *parser) addText(text string) {
// addElement calls addChild with an element node.
func (p *parser) addElement(tag string, attr []Attribute) {
p.addChild(&Node{
Type: ElementNode,
Data: tag,
Attr: attr,
Type: ElementNode,
Data: tag,
Namespace: p.top().Namespace,
Attr: attr,
})
}
@ -318,7 +319,10 @@ func (p *parser) resetInsertionMode() {
case "html":
p.im = beforeHeadIM
default:
continue
if p.top().Namespace == "" {
continue
}
p.im = inForeignContentIM
}
return
}
@ -792,6 +796,21 @@ func inBodyIM(p *parser) bool {
p.reconstructActiveFormattingElements()
p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr)
case "math", "svg":
p.reconstructActiveFormattingElements()
namespace := ""
if p.tok.Data == "math" {
// TODO: adjust MathML attributes.
namespace = "mathml"
} else {
// TODO: adjust SVG attributes.
namespace = "svg"
}
// TODO: adjust foreign attributes.
p.addElement(p.tok.Data, p.tok.Attr)
p.top().Namespace = namespace
p.im = inForeignContentIM
return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token.
default:
@ -1557,6 +1576,39 @@ func afterAfterFramesetIM(p *parser) bool {
return true
}
// TODO: fix up the other IM's section numbers to match the latest spec.
// Section 12.2.5.5.
func inForeignContentIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
if breakout[p.tok.Data] {
// TODO.
}
switch p.top().Namespace {
case "mathml":
// TODO: adjust MathML attributes.
case "svg":
// TODO: adjust SVG tag names.
// TODO: adjust SVG attributes.
default:
panic("html: bad parser state: unexpected namespace")
}
// TODO: adjust foreign attributes.
p.addElement(p.tok.Data, p.tok.Attr)
case EndTagToken:
// TODO.
default:
// Ignore the token.
}
return true
}
func (p *parser) parse() error {
// Iterate until EOF. Any other error will cause an early return.
consumed := true

View File

@ -98,7 +98,11 @@ func dumpLevel(w io.Writer, n *Node, level int) error {
case DocumentNode:
return errors.New("unexpected DocumentNode")
case ElementNode:
fmt.Fprintf(w, "<%s>", n.Data)
if n.Namespace != "" {
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
} else {
fmt.Fprintf(w, "<%s>", n.Data)
}
for _, a := range n.Attr {
io.WriteString(w, "\n")
dumpIndent(w, level+1)
@ -161,6 +165,7 @@ func TestParser(t *testing.T) {
n int
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
{"adoption01.dat", -1},
{"doctype01.dat", -1},
{"tests1.dat", -1},
{"tests2.dat", -1},