mirror of
https://github.com/golang/go
synced 2024-11-12 02:50:25 -07:00
html: a first step at parsing foreign content (MathML, SVG).
Nodes now have a Namespace field. Pass adoption01.dat, test 12: <a><svg><tr><input></a> | <html> | <head> | <body> | <a> | <svg svg> | <svg tr> | <svg input> The other adoption01.dat tests already passed. R=andybalholm CC=golang-dev https://golang.org/cl/5467075
This commit is contained in:
parent
0643aacee9
commit
b9064fb132
@ -11,6 +11,7 @@ GOFILES=\
|
|||||||
doctype.go\
|
doctype.go\
|
||||||
entity.go\
|
entity.go\
|
||||||
escape.go\
|
escape.go\
|
||||||
|
foreign.go\
|
||||||
node.go\
|
node.go\
|
||||||
parse.go\
|
parse.go\
|
||||||
render.go\
|
render.go\
|
||||||
|
56
src/pkg/html/foreign.go
Normal file
56
src/pkg/html/foreign.go
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package html
|
||||||
|
|
||||||
|
// Section 12.2.5.5.
|
||||||
|
var breakout = map[string]bool{
|
||||||
|
"b": true,
|
||||||
|
"big": true,
|
||||||
|
"blockquote": true,
|
||||||
|
"body": true,
|
||||||
|
"br": true,
|
||||||
|
"center": true,
|
||||||
|
"code": true,
|
||||||
|
"dd": true,
|
||||||
|
"div": true,
|
||||||
|
"dl": true,
|
||||||
|
"dt": true,
|
||||||
|
"em": true,
|
||||||
|
"embed": true,
|
||||||
|
"font": true,
|
||||||
|
"h1": true,
|
||||||
|
"h2": true,
|
||||||
|
"h3": true,
|
||||||
|
"h4": true,
|
||||||
|
"h5": true,
|
||||||
|
"h6": true,
|
||||||
|
"head": true,
|
||||||
|
"hr": true,
|
||||||
|
"i": true,
|
||||||
|
"img": true,
|
||||||
|
"li": true,
|
||||||
|
"listing": true,
|
||||||
|
"menu": true,
|
||||||
|
"meta": true,
|
||||||
|
"nobr": true,
|
||||||
|
"ol": true,
|
||||||
|
"p": true,
|
||||||
|
"pre": true,
|
||||||
|
"ruby": true,
|
||||||
|
"s": true,
|
||||||
|
"small": true,
|
||||||
|
"span": true,
|
||||||
|
"strong": true,
|
||||||
|
"strike": true,
|
||||||
|
"sub": true,
|
||||||
|
"sup": true,
|
||||||
|
"table": true,
|
||||||
|
"tt": true,
|
||||||
|
"u": true,
|
||||||
|
"ul": true,
|
||||||
|
"var": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: add look-up tables for MathML and SVG adjustments.
|
@ -24,14 +24,15 @@ var scopeMarker = Node{Type: scopeMarkerNode}
|
|||||||
|
|
||||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||||
// content for text) and are part of a tree of Nodes. Element nodes may also
|
// content for text) and are part of a tree of Nodes. Element nodes may also
|
||||||
// contain a slice of Attributes. Data is unescaped, so that it looks like
|
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
|
||||||
// "a<b" rather than "a<b".
|
// that it looks like "a<b" rather than "a<b".
|
||||||
type Node struct {
|
type Node struct {
|
||||||
Parent *Node
|
Parent *Node
|
||||||
Child []*Node
|
Child []*Node
|
||||||
Type NodeType
|
Type NodeType
|
||||||
Data string
|
Data string
|
||||||
Attr []Attribute
|
Namespace string
|
||||||
|
Attr []Attribute
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add adds a node as a child of n.
|
// Add adds a node as a child of n.
|
||||||
|
@ -192,9 +192,10 @@ func (p *parser) addText(text string) {
|
|||||||
// addElement calls addChild with an element node.
|
// addElement calls addChild with an element node.
|
||||||
func (p *parser) addElement(tag string, attr []Attribute) {
|
func (p *parser) addElement(tag string, attr []Attribute) {
|
||||||
p.addChild(&Node{
|
p.addChild(&Node{
|
||||||
Type: ElementNode,
|
Type: ElementNode,
|
||||||
Data: tag,
|
Data: tag,
|
||||||
Attr: attr,
|
Namespace: p.top().Namespace,
|
||||||
|
Attr: attr,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -318,7 +319,10 @@ func (p *parser) resetInsertionMode() {
|
|||||||
case "html":
|
case "html":
|
||||||
p.im = beforeHeadIM
|
p.im = beforeHeadIM
|
||||||
default:
|
default:
|
||||||
continue
|
if p.top().Namespace == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
p.im = inForeignContentIM
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -792,6 +796,21 @@ func inBodyIM(p *parser) bool {
|
|||||||
p.reconstructActiveFormattingElements()
|
p.reconstructActiveFormattingElements()
|
||||||
p.framesetOK = false
|
p.framesetOK = false
|
||||||
p.addElement(p.tok.Data, p.tok.Attr)
|
p.addElement(p.tok.Data, p.tok.Attr)
|
||||||
|
case "math", "svg":
|
||||||
|
p.reconstructActiveFormattingElements()
|
||||||
|
namespace := ""
|
||||||
|
if p.tok.Data == "math" {
|
||||||
|
// TODO: adjust MathML attributes.
|
||||||
|
namespace = "mathml"
|
||||||
|
} else {
|
||||||
|
// TODO: adjust SVG attributes.
|
||||||
|
namespace = "svg"
|
||||||
|
}
|
||||||
|
// TODO: adjust foreign attributes.
|
||||||
|
p.addElement(p.tok.Data, p.tok.Attr)
|
||||||
|
p.top().Namespace = namespace
|
||||||
|
p.im = inForeignContentIM
|
||||||
|
return true
|
||||||
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
|
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
|
||||||
// Ignore the token.
|
// Ignore the token.
|
||||||
default:
|
default:
|
||||||
@ -1557,6 +1576,39 @@ func afterAfterFramesetIM(p *parser) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: fix up the other IM's section numbers to match the latest spec.
|
||||||
|
|
||||||
|
// Section 12.2.5.5.
|
||||||
|
func inForeignContentIM(p *parser) bool {
|
||||||
|
switch p.tok.Type {
|
||||||
|
case CommentToken:
|
||||||
|
p.addChild(&Node{
|
||||||
|
Type: CommentNode,
|
||||||
|
Data: p.tok.Data,
|
||||||
|
})
|
||||||
|
case StartTagToken:
|
||||||
|
if breakout[p.tok.Data] {
|
||||||
|
// TODO.
|
||||||
|
}
|
||||||
|
switch p.top().Namespace {
|
||||||
|
case "mathml":
|
||||||
|
// TODO: adjust MathML attributes.
|
||||||
|
case "svg":
|
||||||
|
// TODO: adjust SVG tag names.
|
||||||
|
// TODO: adjust SVG attributes.
|
||||||
|
default:
|
||||||
|
panic("html: bad parser state: unexpected namespace")
|
||||||
|
}
|
||||||
|
// TODO: adjust foreign attributes.
|
||||||
|
p.addElement(p.tok.Data, p.tok.Attr)
|
||||||
|
case EndTagToken:
|
||||||
|
// TODO.
|
||||||
|
default:
|
||||||
|
// Ignore the token.
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (p *parser) parse() error {
|
func (p *parser) parse() error {
|
||||||
// Iterate until EOF. Any other error will cause an early return.
|
// Iterate until EOF. Any other error will cause an early return.
|
||||||
consumed := true
|
consumed := true
|
||||||
|
@ -98,7 +98,11 @@ func dumpLevel(w io.Writer, n *Node, level int) error {
|
|||||||
case DocumentNode:
|
case DocumentNode:
|
||||||
return errors.New("unexpected DocumentNode")
|
return errors.New("unexpected DocumentNode")
|
||||||
case ElementNode:
|
case ElementNode:
|
||||||
fmt.Fprintf(w, "<%s>", n.Data)
|
if n.Namespace != "" {
|
||||||
|
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(w, "<%s>", n.Data)
|
||||||
|
}
|
||||||
for _, a := range n.Attr {
|
for _, a := range n.Attr {
|
||||||
io.WriteString(w, "\n")
|
io.WriteString(w, "\n")
|
||||||
dumpIndent(w, level+1)
|
dumpIndent(w, level+1)
|
||||||
@ -161,6 +165,7 @@ func TestParser(t *testing.T) {
|
|||||||
n int
|
n int
|
||||||
}{
|
}{
|
||||||
// TODO(nigeltao): Process all the test cases from all the .dat files.
|
// TODO(nigeltao): Process all the test cases from all the .dat files.
|
||||||
|
{"adoption01.dat", -1},
|
||||||
{"doctype01.dat", -1},
|
{"doctype01.dat", -1},
|
||||||
{"tests1.dat", -1},
|
{"tests1.dat", -1},
|
||||||
{"tests2.dat", -1},
|
{"tests2.dat", -1},
|
||||||
|
Loading…
Reference in New Issue
Block a user