mirror of
https://github.com/golang/go
synced 2024-11-20 08:04:42 -07:00
html: implement foster parenting
Implement the foster-parenting algorithm for content that is inside a table but not in a cell. Also fix a bug in reconstructing the active formatting elements. Pass test 30 in tests1.dat: <a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y R=nigeltao CC=golang-dev https://golang.org/cl/5309052
This commit is contained in:
parent
2f352ae48a
commit
2aa589c843
@ -32,6 +32,9 @@ type parser struct {
|
||||
// originalIM is the insertion mode to go back to after completing a text
|
||||
// or inTableText insertion mode.
|
||||
originalIM insertionMode
|
||||
// fosterParenting is whether new elements should be inserted according to
|
||||
// the foster parenting rules (section 11.2.5.3).
|
||||
fosterParenting bool
|
||||
}
|
||||
|
||||
func (p *parser) top() *Node {
|
||||
@ -103,12 +106,56 @@ func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
|
||||
// addChild adds a child node n to the top element, and pushes n onto the stack
|
||||
// of open elements if it is an element node.
|
||||
func (p *parser) addChild(n *Node) {
|
||||
p.top().Add(n)
|
||||
if p.fosterParenting {
|
||||
p.fosterParent(n)
|
||||
} else {
|
||||
p.top().Add(n)
|
||||
}
|
||||
|
||||
if n.Type == ElementNode {
|
||||
p.oe = append(p.oe, n)
|
||||
}
|
||||
}
|
||||
|
||||
// fosterParent adds a child node according to the foster parenting rules.
|
||||
// Section 11.2.5.3, "foster parenting".
|
||||
func (p *parser) fosterParent(n *Node) {
|
||||
var table, parent *Node
|
||||
var i int
|
||||
for i = len(p.oe) - 1; i >= 0; i-- {
|
||||
if p.oe[i].Data == "table" {
|
||||
table = p.oe[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if table == nil {
|
||||
// The foster parent is the html element.
|
||||
parent = p.oe[0]
|
||||
} else {
|
||||
parent = table.Parent
|
||||
}
|
||||
if parent == nil {
|
||||
parent = p.oe[i-1]
|
||||
}
|
||||
|
||||
var child *Node
|
||||
for i, child = range parent.Child {
|
||||
if child == table {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if i == len(parent.Child) {
|
||||
parent.Add(n)
|
||||
} else {
|
||||
// Insert n into parent.Child at index i.
|
||||
parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
|
||||
parent.Child[i] = n
|
||||
n.Parent = parent
|
||||
}
|
||||
}
|
||||
|
||||
// addText adds text to the preceding node if it is a text node, or else it
|
||||
// calls addChild with a new text node.
|
||||
func (p *parser) addText(text string) {
|
||||
@ -170,9 +217,9 @@ func (p *parser) reconstructActiveFormattingElements() {
|
||||
}
|
||||
for {
|
||||
i++
|
||||
n = p.afe[i]
|
||||
p.addChild(n.clone())
|
||||
p.afe[i] = n
|
||||
clone := p.afe[i].clone()
|
||||
p.addChild(clone)
|
||||
p.afe[i] = clone
|
||||
if i == len(p.afe)-1 {
|
||||
break
|
||||
}
|
||||
@ -536,10 +583,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
|
||||
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
|
||||
p.inBodyEndTagFormatting(p.tok.Data)
|
||||
default:
|
||||
// TODO: any other end tag
|
||||
if p.tok.Data == p.top().Data {
|
||||
p.oe.pop()
|
||||
}
|
||||
p.inBodyEndTagOther(p.tok.Data)
|
||||
}
|
||||
case CommentToken:
|
||||
p.addChild(&Node{
|
||||
@ -573,6 +617,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
|
||||
}
|
||||
}
|
||||
if formattingElement == nil {
|
||||
p.inBodyEndTagOther(tag)
|
||||
return
|
||||
}
|
||||
feIndex := p.oe.index(formattingElement)
|
||||
@ -645,8 +690,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
|
||||
}
|
||||
switch commonAncestor.Data {
|
||||
case "table", "tbody", "tfoot", "thead", "tr":
|
||||
// TODO: fix up misnested table nodes; find the foster parent.
|
||||
fallthrough
|
||||
p.fosterParent(lastNode)
|
||||
default:
|
||||
commonAncestor.Add(lastNode)
|
||||
}
|
||||
@ -667,6 +711,19 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
|
||||
}
|
||||
}
|
||||
|
||||
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
|
||||
func (p *parser) inBodyEndTagOther(tag string) {
|
||||
for i := len(p.oe) - 1; i >= 0; i-- {
|
||||
if p.oe[i].Data == tag {
|
||||
p.oe = p.oe[:i]
|
||||
break
|
||||
}
|
||||
if isSpecialElement[p.oe[i].Data] {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Section 11.2.5.4.8.
|
||||
func textIM(p *parser) (insertionMode, bool) {
|
||||
switch p.tok.Type {
|
||||
@ -683,12 +740,6 @@ func textIM(p *parser) (insertionMode, bool) {
|
||||
|
||||
// Section 11.2.5.4.9.
|
||||
func inTableIM(p *parser) (insertionMode, bool) {
|
||||
var (
|
||||
add bool
|
||||
data string
|
||||
attr []Attribute
|
||||
consumed bool
|
||||
)
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
// Stop parsing.
|
||||
@ -698,13 +749,19 @@ func inTableIM(p *parser) (insertionMode, bool) {
|
||||
case StartTagToken:
|
||||
switch p.tok.Data {
|
||||
case "tbody", "tfoot", "thead":
|
||||
add = true
|
||||
data = p.tok.Data
|
||||
attr = p.tok.Attr
|
||||
consumed = true
|
||||
p.clearStackToTableContext()
|
||||
p.addElement(p.tok.Data, p.tok.Attr)
|
||||
return inTableBodyIM, true
|
||||
case "td", "th", "tr":
|
||||
add = true
|
||||
data = "tbody"
|
||||
p.clearStackToTableContext()
|
||||
p.addElement("tbody", nil)
|
||||
return inTableBodyIM, false
|
||||
case "table":
|
||||
if p.popUntil(tableScopeStopTags, "table") {
|
||||
return p.resetInsertionMode(), false
|
||||
}
|
||||
// Ignore the token.
|
||||
return inTableIM, true
|
||||
default:
|
||||
// TODO.
|
||||
}
|
||||
@ -727,13 +784,23 @@ func inTableIM(p *parser) (insertionMode, bool) {
|
||||
})
|
||||
return inTableIM, true
|
||||
}
|
||||
if add {
|
||||
// TODO: clear the stack back to a table context.
|
||||
p.addElement(data, attr)
|
||||
return inTableBodyIM, consumed
|
||||
|
||||
switch p.top().Data {
|
||||
case "table", "tbody", "tfoot", "thead", "tr":
|
||||
p.fosterParenting = true
|
||||
defer func() { p.fosterParenting = false }()
|
||||
}
|
||||
|
||||
return useTheRulesFor(p, inTableIM, inBodyIM)
|
||||
}
|
||||
|
||||
func (p *parser) clearStackToTableContext() {
|
||||
for i := len(p.oe) - 1; i >= 0; i-- {
|
||||
if x := p.oe[i].Data; x == "table" || x == "html" {
|
||||
p.oe = p.oe[:i+1]
|
||||
return
|
||||
}
|
||||
}
|
||||
// TODO: return useTheRulesFor(inTableIM, inBodyIM, p) unless etc. etc. foster parenting.
|
||||
return inTableIM, true
|
||||
}
|
||||
|
||||
// Section 11.2.5.4.13.
|
||||
|
@ -123,7 +123,7 @@ func TestParser(t *testing.T) {
|
||||
rc := make(chan io.Reader)
|
||||
go readDat(filename, rc)
|
||||
// TODO(nigeltao): Process all test cases, not just a subset.
|
||||
for i := 0; i < 30; i++ {
|
||||
for i := 0; i < 31; i++ {
|
||||
// Parse the #data section.
|
||||
b, err := ioutil.ReadAll(<-rc)
|
||||
if err != nil {
|
||||
@ -152,6 +152,13 @@ func TestParser(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
// Check that rendering and re-parsing results in an identical tree.
|
||||
if filename == "tests1.dat" && i == 30 {
|
||||
// Test 30 in tests1.dat is such messed-up markup that a correct parse
|
||||
// results in a non-conforming tree (one <a> element nested inside another).
|
||||
// Therefore when it is rendered and re-parsed, it isn't the same.
|
||||
// So we skip rendering on that test.
|
||||
continue
|
||||
}
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
pw.CloseWithError(Render(pw, doc))
|
||||
|
Loading…
Reference in New Issue
Block a user