1
0
mirror of https://github.com/golang/go synced 2024-11-12 02:50:25 -07:00

html: a first step at parsing foreign content (MathML, SVG).

Nodes now have a Namespace field.

Pass adoption01.dat, test 12:
<a><svg><tr><input></a>

| <html>
|   <head>
|   <body>
|     <a>
|       <svg svg>
|         <svg tr>
|           <svg input>

The other adoption01.dat tests already passed.

R=andybalholm
CC=golang-dev
https://golang.org/cl/5467075
This commit is contained in:
Nigel Tao 2011-12-13 13:52:47 +11:00
parent 0643aacee9
commit b9064fb132
5 changed files with 127 additions and 12 deletions

View File

@ -11,6 +11,7 @@ GOFILES=\
doctype.go\ doctype.go\
entity.go\ entity.go\
escape.go\ escape.go\
foreign.go\
node.go\ node.go\
parse.go\ parse.go\
render.go\ render.go\

56
src/pkg/html/foreign.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"font": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// TODO: add look-up tables for MathML and SVG adjustments.

View File

@ -24,14 +24,15 @@ var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes, // A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also // content for text) and are part of a tree of Nodes. Element nodes may also
// contain a slice of Attributes. Data is unescaped, so that it looks like // have a Namespace and contain a slice of Attributes. Data is unescaped, so
// "a<b" rather than "a&lt;b". // that it looks like "a<b" rather than "a&lt;b".
type Node struct { type Node struct {
Parent *Node Parent *Node
Child []*Node Child []*Node
Type NodeType Type NodeType
Data string Data string
Attr []Attribute Namespace string
Attr []Attribute
} }
// Add adds a node as a child of n. // Add adds a node as a child of n.

View File

@ -192,9 +192,10 @@ func (p *parser) addText(text string) {
// addElement calls addChild with an element node. // addElement calls addChild with an element node.
func (p *parser) addElement(tag string, attr []Attribute) { func (p *parser) addElement(tag string, attr []Attribute) {
p.addChild(&Node{ p.addChild(&Node{
Type: ElementNode, Type: ElementNode,
Data: tag, Data: tag,
Attr: attr, Namespace: p.top().Namespace,
Attr: attr,
}) })
} }
@ -318,7 +319,10 @@ func (p *parser) resetInsertionMode() {
case "html": case "html":
p.im = beforeHeadIM p.im = beforeHeadIM
default: default:
continue if p.top().Namespace == "" {
continue
}
p.im = inForeignContentIM
} }
return return
} }
@ -792,6 +796,21 @@ func inBodyIM(p *parser) bool {
p.reconstructActiveFormattingElements() p.reconstructActiveFormattingElements()
p.framesetOK = false p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr)
case "math", "svg":
p.reconstructActiveFormattingElements()
namespace := ""
if p.tok.Data == "math" {
// TODO: adjust MathML attributes.
namespace = "mathml"
} else {
// TODO: adjust SVG attributes.
namespace = "svg"
}
// TODO: adjust foreign attributes.
p.addElement(p.tok.Data, p.tok.Attr)
p.top().Namespace = namespace
p.im = inForeignContentIM
return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token. // Ignore the token.
default: default:
@ -1557,6 +1576,39 @@ func afterAfterFramesetIM(p *parser) bool {
return true return true
} }
// TODO: fix up the other IM's section numbers to match the latest spec.
// Section 12.2.5.5.
func inForeignContentIM(p *parser) bool {
switch p.tok.Type {
case CommentToken:
p.addChild(&Node{
Type: CommentNode,
Data: p.tok.Data,
})
case StartTagToken:
if breakout[p.tok.Data] {
// TODO.
}
switch p.top().Namespace {
case "mathml":
// TODO: adjust MathML attributes.
case "svg":
// TODO: adjust SVG tag names.
// TODO: adjust SVG attributes.
default:
panic("html: bad parser state: unexpected namespace")
}
// TODO: adjust foreign attributes.
p.addElement(p.tok.Data, p.tok.Attr)
case EndTagToken:
// TODO.
default:
// Ignore the token.
}
return true
}
func (p *parser) parse() error { func (p *parser) parse() error {
// Iterate until EOF. Any other error will cause an early return. // Iterate until EOF. Any other error will cause an early return.
consumed := true consumed := true

View File

@ -98,7 +98,11 @@ func dumpLevel(w io.Writer, n *Node, level int) error {
case DocumentNode: case DocumentNode:
return errors.New("unexpected DocumentNode") return errors.New("unexpected DocumentNode")
case ElementNode: case ElementNode:
fmt.Fprintf(w, "<%s>", n.Data) if n.Namespace != "" {
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
} else {
fmt.Fprintf(w, "<%s>", n.Data)
}
for _, a := range n.Attr { for _, a := range n.Attr {
io.WriteString(w, "\n") io.WriteString(w, "\n")
dumpIndent(w, level+1) dumpIndent(w, level+1)
@ -161,6 +165,7 @@ func TestParser(t *testing.T) {
n int n int
}{ }{
// TODO(nigeltao): Process all the test cases from all the .dat files. // TODO(nigeltao): Process all the test cases from all the .dat files.
{"adoption01.dat", -1},
{"doctype01.dat", -1}, {"doctype01.dat", -1},
{"tests1.dat", -1}, {"tests1.dat", -1},
{"tests2.dat", -1}, {"tests2.dat", -1},