mirror of
https://github.com/golang/go
synced 2024-11-12 03:10:22 -07:00
html: a first step at parsing foreign content (MathML, SVG).
Nodes now have a Namespace field. Pass adoption01.dat, test 12: <a><svg><tr><input></a> | <html> | <head> | <body> | <a> | <svg svg> | <svg tr> | <svg input> The other adoption01.dat tests already passed. R=andybalholm CC=golang-dev https://golang.org/cl/5467075
This commit is contained in:
parent
0643aacee9
commit
b9064fb132
@ -11,6 +11,7 @@ GOFILES=\
|
||||
doctype.go\
|
||||
entity.go\
|
||||
escape.go\
|
||||
foreign.go\
|
||||
node.go\
|
||||
parse.go\
|
||||
render.go\
|
||||
|
56
src/pkg/html/foreign.go
Normal file
56
src/pkg/html/foreign.go
Normal file
@ -0,0 +1,56 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
// Section 12.2.5.5.
|
||||
var breakout = map[string]bool{
|
||||
"b": true,
|
||||
"big": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"center": true,
|
||||
"code": true,
|
||||
"dd": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"em": true,
|
||||
"embed": true,
|
||||
"font": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"hr": true,
|
||||
"i": true,
|
||||
"img": true,
|
||||
"li": true,
|
||||
"listing": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nobr": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"pre": true,
|
||||
"ruby": true,
|
||||
"s": true,
|
||||
"small": true,
|
||||
"span": true,
|
||||
"strong": true,
|
||||
"strike": true,
|
||||
"sub": true,
|
||||
"sup": true,
|
||||
"table": true,
|
||||
"tt": true,
|
||||
"u": true,
|
||||
"ul": true,
|
||||
"var": true,
|
||||
}
|
||||
|
||||
// TODO: add look-up tables for MathML and SVG adjustments.
|
@ -24,13 +24,14 @@ var scopeMarker = Node{Type: scopeMarkerNode}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||
// content for text) and are part of a tree of Nodes. Element nodes may also
|
||||
// contain a slice of Attributes. Data is unescaped, so that it looks like
|
||||
// "a<b" rather than "a<b".
|
||||
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
|
||||
// that it looks like "a<b" rather than "a<b".
|
||||
type Node struct {
|
||||
Parent *Node
|
||||
Child []*Node
|
||||
Type NodeType
|
||||
Data string
|
||||
Namespace string
|
||||
Attr []Attribute
|
||||
}
|
||||
|
||||
|
@ -194,6 +194,7 @@ func (p *parser) addElement(tag string, attr []Attribute) {
|
||||
p.addChild(&Node{
|
||||
Type: ElementNode,
|
||||
Data: tag,
|
||||
Namespace: p.top().Namespace,
|
||||
Attr: attr,
|
||||
})
|
||||
}
|
||||
@ -318,8 +319,11 @@ func (p *parser) resetInsertionMode() {
|
||||
case "html":
|
||||
p.im = beforeHeadIM
|
||||
default:
|
||||
if p.top().Namespace == "" {
|
||||
continue
|
||||
}
|
||||
p.im = inForeignContentIM
|
||||
}
|
||||
return
|
||||
}
|
||||
p.im = inBodyIM
|
||||
@ -792,6 +796,21 @@ func inBodyIM(p *parser) bool {
|
||||
p.reconstructActiveFormattingElements()
|
||||
p.framesetOK = false
|
||||
p.addElement(p.tok.Data, p.tok.Attr)
|
||||
case "math", "svg":
|
||||
p.reconstructActiveFormattingElements()
|
||||
namespace := ""
|
||||
if p.tok.Data == "math" {
|
||||
// TODO: adjust MathML attributes.
|
||||
namespace = "mathml"
|
||||
} else {
|
||||
// TODO: adjust SVG attributes.
|
||||
namespace = "svg"
|
||||
}
|
||||
// TODO: adjust foreign attributes.
|
||||
p.addElement(p.tok.Data, p.tok.Attr)
|
||||
p.top().Namespace = namespace
|
||||
p.im = inForeignContentIM
|
||||
return true
|
||||
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
|
||||
// Ignore the token.
|
||||
default:
|
||||
@ -1557,6 +1576,39 @@ func afterAfterFramesetIM(p *parser) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO: fix up the other IM's section numbers to match the latest spec.
|
||||
|
||||
// Section 12.2.5.5.
|
||||
func inForeignContentIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case CommentToken:
|
||||
p.addChild(&Node{
|
||||
Type: CommentNode,
|
||||
Data: p.tok.Data,
|
||||
})
|
||||
case StartTagToken:
|
||||
if breakout[p.tok.Data] {
|
||||
// TODO.
|
||||
}
|
||||
switch p.top().Namespace {
|
||||
case "mathml":
|
||||
// TODO: adjust MathML attributes.
|
||||
case "svg":
|
||||
// TODO: adjust SVG tag names.
|
||||
// TODO: adjust SVG attributes.
|
||||
default:
|
||||
panic("html: bad parser state: unexpected namespace")
|
||||
}
|
||||
// TODO: adjust foreign attributes.
|
||||
p.addElement(p.tok.Data, p.tok.Attr)
|
||||
case EndTagToken:
|
||||
// TODO.
|
||||
default:
|
||||
// Ignore the token.
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *parser) parse() error {
|
||||
// Iterate until EOF. Any other error will cause an early return.
|
||||
consumed := true
|
||||
|
@ -98,7 +98,11 @@ func dumpLevel(w io.Writer, n *Node, level int) error {
|
||||
case DocumentNode:
|
||||
return errors.New("unexpected DocumentNode")
|
||||
case ElementNode:
|
||||
if n.Namespace != "" {
|
||||
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
|
||||
} else {
|
||||
fmt.Fprintf(w, "<%s>", n.Data)
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
io.WriteString(w, "\n")
|
||||
dumpIndent(w, level+1)
|
||||
@ -161,6 +165,7 @@ func TestParser(t *testing.T) {
|
||||
n int
|
||||
}{
|
||||
// TODO(nigeltao): Process all the test cases from all the .dat files.
|
||||
{"adoption01.dat", -1},
|
||||
{"doctype01.dat", -1},
|
||||
{"tests1.dat", -1},
|
||||
{"tests2.dat", -1},
|
||||
|
Loading…
Reference in New Issue
Block a user