diff --git a/src/pkg/http/Makefile b/src/pkg/http/Makefile index 7654de807f..8a45621224 100644 --- a/src/pkg/http/Makefile +++ b/src/pkg/http/Makefile @@ -9,6 +9,7 @@ GOFILES=\ chunked.go\ client.go\ fs.go\ + lex.go\ request.go\ response.go\ server.go\ diff --git a/src/pkg/http/lex.go b/src/pkg/http/lex.go new file mode 100644 index 0000000000..46e0519576 --- /dev/null +++ b/src/pkg/http/lex.go @@ -0,0 +1,152 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "strings" +) + +// This file deals with lexical matters of HTTP + +func isSeparator(c byte) bool { + switch c { + case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t': + return true + default: + return false + } + panic() +} + +func isSpace(c byte) bool { + switch c { + case ' ', '\t', '\r', '\n': + return true + default: + return false + } + panic() +} + +func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 } + +func isChar(c byte) bool { return 0 <= c && c <= 127 } + +func isAnyText(c byte) bool { return !isCtl(c) } + +func isQdText(c byte) bool { return isAnyText(c) && c != '"' } + +func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) } + +// Valid escaped sequences are not specified in RFC 2616, so for now, we assume +// that they coincide with the common sense ones used by GO. Malformed +// characters should probably not be treated as errors by a robust (forgiving) +// parser, so we replace them with the '?' character. +func httpUnquotePair(b byte) byte { + // skip the first byte, which should always be '\' + switch b { + case 'a': + return '\a' + case 'b': + return '\b' + case 'f': + return '\f' + case 'n': + return '\n' + case 'r': + return '\r' + case 't': + return '\t' + case 'v': + return '\v' + case '\\': + return '\\' + case '\'': + return '\'' + case '"': + return '"' + } + return '?' +} + +// raw must begin with a valid quoted string. Only the first quoted string is +// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1 +// upon failure. +func httpUnquote(raw []byte) (eaten int, result string) { + buf := make([]byte, len(raw)) + if raw[0] != '"' { + return -1, "" + } + eaten = 1 + j := 0 // # of bytes written in buf + for i := 1; i < len(raw); i++ { + switch b := raw[i]; b { + case '"': + eaten++ + buf = buf[0:j] + return i + 1, string(buf) + case '\\': + if len(raw) < i+2 { + return -1, "" + } + buf[j] = httpUnquotePair(raw[i+1]) + eaten += 2 + j++ + i++ + default: + if isQdText(b) { + buf[j] = b + } else { + buf[j] = '?' + } + eaten++ + j++ + } + } + return -1, "" +} + +// This is a best effort parse, so errors are not returned, instead not all of +// the input string might be parsed. result is always non-nil. +func httpSplitFieldValue(fv string) (eaten int, result []string) { + result = make([]string, 0, len(fv)) + raw := strings.Bytes(fv) + i := 0 + chunk := "" + for i < len(raw) { + b := raw[i] + switch { + case b == '"': + eaten, unq := httpUnquote(raw[i:len(raw)]) + if eaten < 0 { + return i, result + } else { + i += eaten + chunk += unq + } + case isSeparator(b): + if chunk != "" { + result = result[0 : len(result)+1] + result[len(result)-1] = chunk + chunk = "" + } + i++ + case isToken(b): + chunk += string(b) + i++ + case b == '\n' || b == '\r': + i++ + default: + chunk += "?" + i++ + } + } + if chunk != "" { + result = result[0 : len(result)+1] + result[len(result)-1] = chunk + chunk = "" + } + return i, result +} diff --git a/src/pkg/http/lex_test.go b/src/pkg/http/lex_test.go new file mode 100644 index 0000000000..a67070f5eb --- /dev/null +++ b/src/pkg/http/lex_test.go @@ -0,0 +1,70 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "testing" +) + +type lexTest struct { + Raw string + Parsed int // # of parsed characters + Result []string +} + +var lexTests = []lexTest{ + lexTest{ + Raw: `"abc"def,:ghi`, + Parsed: 13, + Result: []string{"abcdef", "ghi"}, + }, + // My understanding of the RFC is that escape sequences outside of + // quotes are not interpreted? + lexTest{ + Raw: `"\t"\t"\t"`, + Parsed: 10, + Result: []string{"\t", "t\t"}, + }, + lexTest{ + Raw: `"\yab"\r\n`, + Parsed: 10, + Result: []string{"?ab", "r", "n"}, + }, + lexTest{ + Raw: "ab\f", + Parsed: 3, + Result: []string{"ab?"}, + }, + lexTest{ + Raw: "\"ab \" c,de f, gh, ij\n\t\r", + Parsed: 23, + Result: []string{"ab ", "c", "de", "f", "gh", "ij"}, + }, +} + +func min(x, y int) int { + if x <= y { + return x + } + return y +} + +func TestSplitFieldValue(t *testing.T) { + for k, l := range lexTests { + parsed, result := httpSplitFieldValue(l.Raw) + if parsed != l.Parsed { + t.Errorf("#%d: Parsed %d, expected %d", k, parsed, l.Parsed) + } + if len(result) != len(l.Result) { + t.Errorf("#%d: Result len %d, expected %d", k, len(result), len(l.Result)) + } + for i := 0; i < min(len(result), len(l.Result)); i++ { + if result[i] != l.Result[i] { + t.Errorf("#%d: %d-th entry mismatch. Have {%s}, expect {%s}", + k, i, result[i], l.Result[i]) + } + } + } +}