mirror of
https://github.com/golang/go
synced 2024-11-24 20:00:10 -07:00
http: add lexing functions
In particular, add field-value tokenizer which respects quoting rules. The code is intended for use in tokenizing the Transfer-Encoding and Trailer fields. The lexing function is not connected to the main parsing code yet (in the next CL). R=rsc CC=golang-dev https://golang.org/cl/190085
This commit is contained in:
parent
a0e6f03add
commit
dd77c63d3d
@ -9,6 +9,7 @@ GOFILES=\
|
||||
chunked.go\
|
||||
client.go\
|
||||
fs.go\
|
||||
lex.go\
|
||||
request.go\
|
||||
response.go\
|
||||
server.go\
|
||||
|
152
src/pkg/http/lex.go
Normal file
152
src/pkg/http/lex.go
Normal file
@ -0,0 +1,152 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// This file deals with lexical matters of HTTP
|
||||
|
||||
func isSeparator(c byte) bool {
|
||||
switch c {
|
||||
case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
panic()
|
||||
}
|
||||
|
||||
func isSpace(c byte) bool {
|
||||
switch c {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
panic()
|
||||
}
|
||||
|
||||
func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 }
|
||||
|
||||
func isChar(c byte) bool { return 0 <= c && c <= 127 }
|
||||
|
||||
func isAnyText(c byte) bool { return !isCtl(c) }
|
||||
|
||||
func isQdText(c byte) bool { return isAnyText(c) && c != '"' }
|
||||
|
||||
func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) }
|
||||
|
||||
// Valid escaped sequences are not specified in RFC 2616, so for now, we assume
|
||||
// that they coincide with the common sense ones used by GO. Malformed
|
||||
// characters should probably not be treated as errors by a robust (forgiving)
|
||||
// parser, so we replace them with the '?' character.
|
||||
func httpUnquotePair(b byte) byte {
|
||||
// skip the first byte, which should always be '\'
|
||||
switch b {
|
||||
case 'a':
|
||||
return '\a'
|
||||
case 'b':
|
||||
return '\b'
|
||||
case 'f':
|
||||
return '\f'
|
||||
case 'n':
|
||||
return '\n'
|
||||
case 'r':
|
||||
return '\r'
|
||||
case 't':
|
||||
return '\t'
|
||||
case 'v':
|
||||
return '\v'
|
||||
case '\\':
|
||||
return '\\'
|
||||
case '\'':
|
||||
return '\''
|
||||
case '"':
|
||||
return '"'
|
||||
}
|
||||
return '?'
|
||||
}
|
||||
|
||||
// raw must begin with a valid quoted string. Only the first quoted string is
|
||||
// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1
|
||||
// upon failure.
|
||||
func httpUnquote(raw []byte) (eaten int, result string) {
|
||||
buf := make([]byte, len(raw))
|
||||
if raw[0] != '"' {
|
||||
return -1, ""
|
||||
}
|
||||
eaten = 1
|
||||
j := 0 // # of bytes written in buf
|
||||
for i := 1; i < len(raw); i++ {
|
||||
switch b := raw[i]; b {
|
||||
case '"':
|
||||
eaten++
|
||||
buf = buf[0:j]
|
||||
return i + 1, string(buf)
|
||||
case '\\':
|
||||
if len(raw) < i+2 {
|
||||
return -1, ""
|
||||
}
|
||||
buf[j] = httpUnquotePair(raw[i+1])
|
||||
eaten += 2
|
||||
j++
|
||||
i++
|
||||
default:
|
||||
if isQdText(b) {
|
||||
buf[j] = b
|
||||
} else {
|
||||
buf[j] = '?'
|
||||
}
|
||||
eaten++
|
||||
j++
|
||||
}
|
||||
}
|
||||
return -1, ""
|
||||
}
|
||||
|
||||
// This is a best effort parse, so errors are not returned, instead not all of
|
||||
// the input string might be parsed. result is always non-nil.
|
||||
func httpSplitFieldValue(fv string) (eaten int, result []string) {
|
||||
result = make([]string, 0, len(fv))
|
||||
raw := strings.Bytes(fv)
|
||||
i := 0
|
||||
chunk := ""
|
||||
for i < len(raw) {
|
||||
b := raw[i]
|
||||
switch {
|
||||
case b == '"':
|
||||
eaten, unq := httpUnquote(raw[i:len(raw)])
|
||||
if eaten < 0 {
|
||||
return i, result
|
||||
} else {
|
||||
i += eaten
|
||||
chunk += unq
|
||||
}
|
||||
case isSeparator(b):
|
||||
if chunk != "" {
|
||||
result = result[0 : len(result)+1]
|
||||
result[len(result)-1] = chunk
|
||||
chunk = ""
|
||||
}
|
||||
i++
|
||||
case isToken(b):
|
||||
chunk += string(b)
|
||||
i++
|
||||
case b == '\n' || b == '\r':
|
||||
i++
|
||||
default:
|
||||
chunk += "?"
|
||||
i++
|
||||
}
|
||||
}
|
||||
if chunk != "" {
|
||||
result = result[0 : len(result)+1]
|
||||
result[len(result)-1] = chunk
|
||||
chunk = ""
|
||||
}
|
||||
return i, result
|
||||
}
|
70
src/pkg/http/lex_test.go
Normal file
70
src/pkg/http/lex_test.go
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type lexTest struct {
|
||||
Raw string
|
||||
Parsed int // # of parsed characters
|
||||
Result []string
|
||||
}
|
||||
|
||||
var lexTests = []lexTest{
|
||||
lexTest{
|
||||
Raw: `"abc"def,:ghi`,
|
||||
Parsed: 13,
|
||||
Result: []string{"abcdef", "ghi"},
|
||||
},
|
||||
// My understanding of the RFC is that escape sequences outside of
|
||||
// quotes are not interpreted?
|
||||
lexTest{
|
||||
Raw: `"\t"\t"\t"`,
|
||||
Parsed: 10,
|
||||
Result: []string{"\t", "t\t"},
|
||||
},
|
||||
lexTest{
|
||||
Raw: `"\yab"\r\n`,
|
||||
Parsed: 10,
|
||||
Result: []string{"?ab", "r", "n"},
|
||||
},
|
||||
lexTest{
|
||||
Raw: "ab\f",
|
||||
Parsed: 3,
|
||||
Result: []string{"ab?"},
|
||||
},
|
||||
lexTest{
|
||||
Raw: "\"ab \" c,de f, gh, ij\n\t\r",
|
||||
Parsed: 23,
|
||||
Result: []string{"ab ", "c", "de", "f", "gh", "ij"},
|
||||
},
|
||||
}
|
||||
|
||||
func min(x, y int) int {
|
||||
if x <= y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func TestSplitFieldValue(t *testing.T) {
|
||||
for k, l := range lexTests {
|
||||
parsed, result := httpSplitFieldValue(l.Raw)
|
||||
if parsed != l.Parsed {
|
||||
t.Errorf("#%d: Parsed %d, expected %d", k, parsed, l.Parsed)
|
||||
}
|
||||
if len(result) != len(l.Result) {
|
||||
t.Errorf("#%d: Result len %d, expected %d", k, len(result), len(l.Result))
|
||||
}
|
||||
for i := 0; i < min(len(result), len(l.Result)); i++ {
|
||||
if result[i] != l.Result[i] {
|
||||
t.Errorf("#%d: %d-th entry mismatch. Have {%s}, expect {%s}",
|
||||
k, i, result[i], l.Result[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user