mirror of
https://github.com/golang/go
synced 2024-11-20 09:34:52 -07:00
html: improve parsing of tables
When foster parenting, merge adjacent text nodes. Properly close table row at </tr> tag. Pass tests1.dat, test 32: <!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X--> | <!-- - --> | <html> | <head> | <body> | <font> | <div> | "helloexcite!" | <b> | "me!" | <table> | <tbody> | <tr> | <th> | <i> | "please!" | <!-- X --> R=nigeltao CC=golang-dev https://golang.org/cl/5323048
This commit is contained in:
parent
7959aeb0f9
commit
6e318bda6c
@ -52,6 +52,11 @@ var (
|
|||||||
tableScopeStopTags = []string{"html", "table"}
|
tableScopeStopTags = []string{"html", "table"}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// stopTags for use in clearStackToContext.
|
||||||
|
var (
|
||||||
|
tableRowContextStopTags = []string{"tr", "html"}
|
||||||
|
)
|
||||||
|
|
||||||
// popUntil pops the stack of open elements at the highest element whose tag
|
// popUntil pops the stack of open elements at the highest element whose tag
|
||||||
// is in matchTags, provided there is no higher element in stopTags. It returns
|
// is in matchTags, provided there is no higher element in stopTags. It returns
|
||||||
// whether or not there was such an element. If there was not, popUntil leaves
|
// whether or not there was such an element. If there was not, popUntil leaves
|
||||||
@ -146,6 +151,11 @@ func (p *parser) fosterParent(n *Node) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
|
||||||
|
parent.Child[i-1].Data += n.Data
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if i == len(parent.Child) {
|
if i == len(parent.Child) {
|
||||||
parent.Add(n)
|
parent.Add(n)
|
||||||
} else {
|
} else {
|
||||||
@ -749,11 +759,11 @@ func inTableIM(p *parser) (insertionMode, bool) {
|
|||||||
case StartTagToken:
|
case StartTagToken:
|
||||||
switch p.tok.Data {
|
switch p.tok.Data {
|
||||||
case "tbody", "tfoot", "thead":
|
case "tbody", "tfoot", "thead":
|
||||||
p.clearStackToTableContext()
|
p.clearStackToContext(tableScopeStopTags)
|
||||||
p.addElement(p.tok.Data, p.tok.Attr)
|
p.addElement(p.tok.Data, p.tok.Attr)
|
||||||
return inTableBodyIM, true
|
return inTableBodyIM, true
|
||||||
case "td", "th", "tr":
|
case "td", "th", "tr":
|
||||||
p.clearStackToTableContext()
|
p.clearStackToContext(tableScopeStopTags)
|
||||||
p.addElement("tbody", nil)
|
p.addElement("tbody", nil)
|
||||||
return inTableBodyIM, false
|
return inTableBodyIM, false
|
||||||
case "table":
|
case "table":
|
||||||
@ -794,11 +804,15 @@ func inTableIM(p *parser) (insertionMode, bool) {
|
|||||||
return useTheRulesFor(p, inTableIM, inBodyIM)
|
return useTheRulesFor(p, inTableIM, inBodyIM)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) clearStackToTableContext() {
|
// clearStackToContext pops elements off the stack of open elements
|
||||||
|
// until an element listed in stopTags is found.
|
||||||
|
func (p *parser) clearStackToContext(stopTags []string) {
|
||||||
for i := len(p.oe) - 1; i >= 0; i-- {
|
for i := len(p.oe) - 1; i >= 0; i-- {
|
||||||
if x := p.oe[i].Data; x == "table" || x == "html" {
|
for _, tag := range stopTags {
|
||||||
p.oe = p.oe[:i+1]
|
if p.oe[i].Data == tag {
|
||||||
return
|
p.oe = p.oe[:i+1]
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -877,7 +891,12 @@ func inRowIM(p *parser) (insertionMode, bool) {
|
|||||||
case EndTagToken:
|
case EndTagToken:
|
||||||
switch p.tok.Data {
|
switch p.tok.Data {
|
||||||
case "tr":
|
case "tr":
|
||||||
// TODO.
|
if !p.elementInScope(tableScopeStopTags, "tr") {
|
||||||
|
return inRowIM, true
|
||||||
|
}
|
||||||
|
p.clearStackToContext(tableRowContextStopTags)
|
||||||
|
p.oe.pop()
|
||||||
|
return inTableBodyIM, true
|
||||||
case "table":
|
case "table":
|
||||||
if p.popUntil(tableScopeStopTags, "tr") {
|
if p.popUntil(tableScopeStopTags, "tr") {
|
||||||
return inTableBodyIM, false
|
return inTableBodyIM, false
|
||||||
|
@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
|
|||||||
rc := make(chan io.Reader)
|
rc := make(chan io.Reader)
|
||||||
go readDat(filename, rc)
|
go readDat(filename, rc)
|
||||||
// TODO(nigeltao): Process all test cases, not just a subset.
|
// TODO(nigeltao): Process all test cases, not just a subset.
|
||||||
for i := 0; i < 32; i++ {
|
for i := 0; i < 33; i++ {
|
||||||
// Parse the #data section.
|
// Parse the #data section.
|
||||||
b, err := ioutil.ReadAll(<-rc)
|
b, err := ioutil.ReadAll(<-rc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user