From cd21eff70520a433f6ee67819e539b2ebe043120 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Thu, 7 Jun 2012 13:05:35 +1000 Subject: [PATCH] exp/html: make the tokenizer return atoms for tag tokens. This is part 1 of a 2 part changelist. Part 2 contains the mechanical change to parse.go to compare atoms (ints) instead of strings. The overall effect of the two changes are: benchmark old ns/op new ns/op delta BenchmarkParser 4462274 4058254 -9.05% BenchmarkRawLevelTokenizer 913202 912917 -0.03% BenchmarkLowLevelTokenizer 1268626 1267836 -0.06% BenchmarkHighLevelTokenizer 1947305 1968944 +1.11% R=rsc CC=andybalholm, golang-dev, r https://golang.org/cl/6305053 --- src/pkg/exp/html/node.go | 17 +++++++++---- src/pkg/exp/html/parse.go | 46 ++++++++++++++++++---------------- src/pkg/exp/html/parse_test.go | 6 +++-- src/pkg/exp/html/token.go | 24 ++++++++++++------ 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/src/pkg/exp/html/node.go b/src/pkg/exp/html/node.go index c105a4e709a..65fa558b248 100644 --- a/src/pkg/exp/html/node.go +++ b/src/pkg/exp/html/node.go @@ -4,8 +4,12 @@ package html +import ( + "exp/html/atom" +) + // A NodeType is the type of a Node. -type NodeType int +type NodeType uint32 const ( ErrorNode NodeType = iota @@ -25,7 +29,8 @@ var scopeMarker = Node{Type: scopeMarkerNode} // A Node consists of a NodeType and some Data (tag name for element nodes, // content for text) and are part of a tree of Nodes. Element nodes may also // have a Namespace and contain a slice of Attributes. Data is unescaped, so -// that it looks like "a