mirror of
https://github.com/golang/go
synced 2024-11-22 03:34:40 -07:00
html: spin doctype.go out of parse.go.
R=andybalholm CC=golang-dev https://golang.org/cl/5445049
This commit is contained in:
parent
b2329e997b
commit
929290d5a0
@ -8,6 +8,7 @@ TARG=html
|
|||||||
GOFILES=\
|
GOFILES=\
|
||||||
const.go\
|
const.go\
|
||||||
doc.go\
|
doc.go\
|
||||||
|
doctype.go\
|
||||||
entity.go\
|
entity.go\
|
||||||
escape.go\
|
escape.go\
|
||||||
node.go\
|
node.go\
|
||||||
|
156
src/pkg/html/doctype.go
Normal file
156
src/pkg/html/doctype.go
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package html
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// parseDoctype parses the data from a DoctypeToken into a name,
|
||||||
|
// public identifier, and system identifier. It returns a Node whose Type
|
||||||
|
// is DoctypeNode, whose Data is the name, and which has attributes
|
||||||
|
// named "system" and "public" for the two identifiers if they were present.
|
||||||
|
// quirks is whether the document should be parsed in "quirks mode".
|
||||||
|
func parseDoctype(s string) (n *Node, quirks bool) {
|
||||||
|
n = &Node{Type: DoctypeNode}
|
||||||
|
|
||||||
|
// Find the name.
|
||||||
|
space := strings.IndexAny(s, whitespace)
|
||||||
|
if space == -1 {
|
||||||
|
space = len(s)
|
||||||
|
}
|
||||||
|
n.Data = s[:space]
|
||||||
|
// The comparison to "html" is case-sensitive.
|
||||||
|
if n.Data != "html" {
|
||||||
|
quirks = true
|
||||||
|
}
|
||||||
|
n.Data = strings.ToLower(n.Data)
|
||||||
|
s = strings.TrimLeft(s[space:], whitespace)
|
||||||
|
|
||||||
|
if len(s) < 6 {
|
||||||
|
// It can't start with "PUBLIC" or "SYSTEM".
|
||||||
|
// Ignore the rest of the string.
|
||||||
|
return n, quirks || s != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
key := strings.ToLower(s[:6])
|
||||||
|
s = s[6:]
|
||||||
|
for key == "public" || key == "system" {
|
||||||
|
s = strings.TrimLeft(s, whitespace)
|
||||||
|
if s == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
quote := s[0]
|
||||||
|
if quote != '"' && quote != '\'' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
s = s[1:]
|
||||||
|
q := strings.IndexRune(s, rune(quote))
|
||||||
|
var id string
|
||||||
|
if q == -1 {
|
||||||
|
id = s
|
||||||
|
s = ""
|
||||||
|
} else {
|
||||||
|
id = s[:q]
|
||||||
|
s = s[q+1:]
|
||||||
|
}
|
||||||
|
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
|
||||||
|
if key == "public" {
|
||||||
|
key = "system"
|
||||||
|
} else {
|
||||||
|
key = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if key != "" || s != "" {
|
||||||
|
quirks = true
|
||||||
|
} else if len(n.Attr) > 0 {
|
||||||
|
if n.Attr[0].Key == "public" {
|
||||||
|
public := strings.ToLower(n.Attr[0].Val)
|
||||||
|
switch public {
|
||||||
|
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
|
||||||
|
quirks = true
|
||||||
|
default:
|
||||||
|
for _, q := range quirkyIDs {
|
||||||
|
if strings.HasPrefix(public, q) {
|
||||||
|
quirks = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The following two public IDs only cause quirks mode if there is no system ID.
|
||||||
|
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
|
||||||
|
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
|
||||||
|
quirks = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
|
||||||
|
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
|
||||||
|
quirks = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, quirks
|
||||||
|
}
|
||||||
|
|
||||||
|
// quirkyIDs is a list of public doctype identifiers that cause a document
|
||||||
|
// to be interpreted in quirks mode. The identifiers should be in lower case.
|
||||||
|
var quirkyIDs = []string{
|
||||||
|
"+//silmaril//dtd html pro v0r11 19970101//",
|
||||||
|
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
||||||
|
"-//as//dtd html 3.0 aswedit + extensions//",
|
||||||
|
"-//ietf//dtd html 2.0 level 1//",
|
||||||
|
"-//ietf//dtd html 2.0 level 2//",
|
||||||
|
"-//ietf//dtd html 2.0 strict level 1//",
|
||||||
|
"-//ietf//dtd html 2.0 strict level 2//",
|
||||||
|
"-//ietf//dtd html 2.0 strict//",
|
||||||
|
"-//ietf//dtd html 2.0//",
|
||||||
|
"-//ietf//dtd html 2.1e//",
|
||||||
|
"-//ietf//dtd html 3.0//",
|
||||||
|
"-//ietf//dtd html 3.2 final//",
|
||||||
|
"-//ietf//dtd html 3.2//",
|
||||||
|
"-//ietf//dtd html 3//",
|
||||||
|
"-//ietf//dtd html level 0//",
|
||||||
|
"-//ietf//dtd html level 1//",
|
||||||
|
"-//ietf//dtd html level 2//",
|
||||||
|
"-//ietf//dtd html level 3//",
|
||||||
|
"-//ietf//dtd html strict level 0//",
|
||||||
|
"-//ietf//dtd html strict level 1//",
|
||||||
|
"-//ietf//dtd html strict level 2//",
|
||||||
|
"-//ietf//dtd html strict level 3//",
|
||||||
|
"-//ietf//dtd html strict//",
|
||||||
|
"-//ietf//dtd html//",
|
||||||
|
"-//metrius//dtd metrius presentational//",
|
||||||
|
"-//microsoft//dtd internet explorer 2.0 html strict//",
|
||||||
|
"-//microsoft//dtd internet explorer 2.0 html//",
|
||||||
|
"-//microsoft//dtd internet explorer 2.0 tables//",
|
||||||
|
"-//microsoft//dtd internet explorer 3.0 html strict//",
|
||||||
|
"-//microsoft//dtd internet explorer 3.0 html//",
|
||||||
|
"-//microsoft//dtd internet explorer 3.0 tables//",
|
||||||
|
"-//netscape comm. corp.//dtd html//",
|
||||||
|
"-//netscape comm. corp.//dtd strict html//",
|
||||||
|
"-//o'reilly and associates//dtd html 2.0//",
|
||||||
|
"-//o'reilly and associates//dtd html extended 1.0//",
|
||||||
|
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
||||||
|
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
||||||
|
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
||||||
|
"-//spyglass//dtd html 2.0 extended//",
|
||||||
|
"-//sq//dtd html 2.0 hotmetal + extensions//",
|
||||||
|
"-//sun microsystems corp.//dtd hotjava html//",
|
||||||
|
"-//sun microsystems corp.//dtd hotjava strict html//",
|
||||||
|
"-//w3c//dtd html 3 1995-03-24//",
|
||||||
|
"-//w3c//dtd html 3.2 draft//",
|
||||||
|
"-//w3c//dtd html 3.2 final//",
|
||||||
|
"-//w3c//dtd html 3.2//",
|
||||||
|
"-//w3c//dtd html 3.2s draft//",
|
||||||
|
"-//w3c//dtd html 4.0 frameset//",
|
||||||
|
"-//w3c//dtd html 4.0 transitional//",
|
||||||
|
"-//w3c//dtd html experimental 19960712//",
|
||||||
|
"-//w3c//dtd html experimental 970421//",
|
||||||
|
"-//w3c//dtd w3 html//",
|
||||||
|
"-//w3o//dtd w3 html 3.0//",
|
||||||
|
"-//webtechs//dtd mozilla html 2.0//",
|
||||||
|
"-//webtechs//dtd mozilla html//",
|
||||||
|
}
|
@ -323,153 +323,6 @@ func (p *parser) resetInsertionMode() {
|
|||||||
|
|
||||||
const whitespace = " \t\r\n\f"
|
const whitespace = " \t\r\n\f"
|
||||||
|
|
||||||
// quirkyIDs is a list of public doctype identifiers that cause a document
|
|
||||||
// to be interpreted in quirks mode. The identifiers should be in lower case.
|
|
||||||
var quirkyIDs = []string{
|
|
||||||
"+//silmaril//dtd html pro v0r11 19970101//",
|
|
||||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
|
||||||
"-//as//dtd html 3.0 aswedit + extensions//",
|
|
||||||
"-//ietf//dtd html 2.0 level 1//",
|
|
||||||
"-//ietf//dtd html 2.0 level 2//",
|
|
||||||
"-//ietf//dtd html 2.0 strict level 1//",
|
|
||||||
"-//ietf//dtd html 2.0 strict level 2//",
|
|
||||||
"-//ietf//dtd html 2.0 strict//",
|
|
||||||
"-//ietf//dtd html 2.0//",
|
|
||||||
"-//ietf//dtd html 2.1e//",
|
|
||||||
"-//ietf//dtd html 3.0//",
|
|
||||||
"-//ietf//dtd html 3.2 final//",
|
|
||||||
"-//ietf//dtd html 3.2//",
|
|
||||||
"-//ietf//dtd html 3//",
|
|
||||||
"-//ietf//dtd html level 0//",
|
|
||||||
"-//ietf//dtd html level 1//",
|
|
||||||
"-//ietf//dtd html level 2//",
|
|
||||||
"-//ietf//dtd html level 3//",
|
|
||||||
"-//ietf//dtd html strict level 0//",
|
|
||||||
"-//ietf//dtd html strict level 1//",
|
|
||||||
"-//ietf//dtd html strict level 2//",
|
|
||||||
"-//ietf//dtd html strict level 3//",
|
|
||||||
"-//ietf//dtd html strict//",
|
|
||||||
"-//ietf//dtd html//",
|
|
||||||
"-//metrius//dtd metrius presentational//",
|
|
||||||
"-//microsoft//dtd internet explorer 2.0 html strict//",
|
|
||||||
"-//microsoft//dtd internet explorer 2.0 html//",
|
|
||||||
"-//microsoft//dtd internet explorer 2.0 tables//",
|
|
||||||
"-//microsoft//dtd internet explorer 3.0 html strict//",
|
|
||||||
"-//microsoft//dtd internet explorer 3.0 html//",
|
|
||||||
"-//microsoft//dtd internet explorer 3.0 tables//",
|
|
||||||
"-//netscape comm. corp.//dtd html//",
|
|
||||||
"-//netscape comm. corp.//dtd strict html//",
|
|
||||||
"-//o'reilly and associates//dtd html 2.0//",
|
|
||||||
"-//o'reilly and associates//dtd html extended 1.0//",
|
|
||||||
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
|
||||||
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
|
||||||
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
|
||||||
"-//spyglass//dtd html 2.0 extended//",
|
|
||||||
"-//sq//dtd html 2.0 hotmetal + extensions//",
|
|
||||||
"-//sun microsystems corp.//dtd hotjava html//",
|
|
||||||
"-//sun microsystems corp.//dtd hotjava strict html//",
|
|
||||||
"-//w3c//dtd html 3 1995-03-24//",
|
|
||||||
"-//w3c//dtd html 3.2 draft//",
|
|
||||||
"-//w3c//dtd html 3.2 final//",
|
|
||||||
"-//w3c//dtd html 3.2//",
|
|
||||||
"-//w3c//dtd html 3.2s draft//",
|
|
||||||
"-//w3c//dtd html 4.0 frameset//",
|
|
||||||
"-//w3c//dtd html 4.0 transitional//",
|
|
||||||
"-//w3c//dtd html experimental 19960712//",
|
|
||||||
"-//w3c//dtd html experimental 970421//",
|
|
||||||
"-//w3c//dtd w3 html//",
|
|
||||||
"-//w3o//dtd w3 html 3.0//",
|
|
||||||
"-//webtechs//dtd mozilla html 2.0//",
|
|
||||||
"-//webtechs//dtd mozilla html//",
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseDoctype parses the data from a DoctypeToken into a name,
|
|
||||||
// public identifier, and system identifier. It returns a Node whose Type
|
|
||||||
// is DoctypeNode, whose Data is the name, and which has attributes
|
|
||||||
// named "system" and "public" for the two identifiers if they were present.
|
|
||||||
// quirks is whether the document should be parsed in "quirks mode".
|
|
||||||
func parseDoctype(s string) (n *Node, quirks bool) {
|
|
||||||
n = &Node{Type: DoctypeNode}
|
|
||||||
|
|
||||||
// Find the name.
|
|
||||||
space := strings.IndexAny(s, whitespace)
|
|
||||||
if space == -1 {
|
|
||||||
space = len(s)
|
|
||||||
}
|
|
||||||
n.Data = s[:space]
|
|
||||||
// The comparison to "html" is case-sensitive.
|
|
||||||
if n.Data != "html" {
|
|
||||||
quirks = true
|
|
||||||
}
|
|
||||||
n.Data = strings.ToLower(n.Data)
|
|
||||||
s = strings.TrimLeft(s[space:], whitespace)
|
|
||||||
|
|
||||||
if len(s) < 6 {
|
|
||||||
// It can't start with "PUBLIC" or "SYSTEM".
|
|
||||||
// Ignore the rest of the string.
|
|
||||||
return n, quirks || s != ""
|
|
||||||
}
|
|
||||||
|
|
||||||
key := strings.ToLower(s[:6])
|
|
||||||
s = s[6:]
|
|
||||||
for key == "public" || key == "system" {
|
|
||||||
s = strings.TrimLeft(s, whitespace)
|
|
||||||
if s == "" {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
quote := s[0]
|
|
||||||
if quote != '"' && quote != '\'' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
s = s[1:]
|
|
||||||
q := strings.IndexRune(s, rune(quote))
|
|
||||||
var id string
|
|
||||||
if q == -1 {
|
|
||||||
id = s
|
|
||||||
s = ""
|
|
||||||
} else {
|
|
||||||
id = s[:q]
|
|
||||||
s = s[q+1:]
|
|
||||||
}
|
|
||||||
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
|
|
||||||
if key == "public" {
|
|
||||||
key = "system"
|
|
||||||
} else {
|
|
||||||
key = ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if key != "" || s != "" {
|
|
||||||
quirks = true
|
|
||||||
} else if len(n.Attr) > 0 {
|
|
||||||
if n.Attr[0].Key == "public" {
|
|
||||||
public := strings.ToLower(n.Attr[0].Val)
|
|
||||||
switch public {
|
|
||||||
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
|
|
||||||
quirks = true
|
|
||||||
default:
|
|
||||||
for _, q := range quirkyIDs {
|
|
||||||
if strings.HasPrefix(public, q) {
|
|
||||||
quirks = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// The following two public IDs only cause quirks mode if there is no system ID.
|
|
||||||
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
|
|
||||||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
|
|
||||||
quirks = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
|
|
||||||
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
|
|
||||||
quirks = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n, quirks
|
|
||||||
}
|
|
||||||
|
|
||||||
// Section 11.2.5.4.1.
|
// Section 11.2.5.4.1.
|
||||||
func initialIM(p *parser) bool {
|
func initialIM(p *parser) bool {
|
||||||
switch p.tok.Type {
|
switch p.tok.Type {
|
||||||
|
Loading…
Reference in New Issue
Block a user