From 448aa49cfe7f4ee0b983fc4cf31e23787eefaa01 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Fri, 8 Jan 2010 12:59:20 +1100 Subject: [PATCH] Add a parser to the time package, the inverse of time.Format R=rsc CC=golang-dev https://golang.org/cl/183141 --- src/pkg/time/format.go | 230 ++++++++++++++++++++++++++++++++++++++ src/pkg/time/time_test.go | 116 ++++++++++++++++++- 2 files changed, 345 insertions(+), 1 deletion(-) diff --git a/src/pkg/time/format.go b/src/pkg/time/format.go index 28550aa364f..8f81df980cd 100644 --- a/src/pkg/time/format.go +++ b/src/pkg/time/format.go @@ -2,6 +2,7 @@ package time import ( "bytes" + "os" "strconv" ) @@ -104,6 +105,15 @@ var longMonthNames = []string{ "December", } +func lookup(tab []string, val string) (int, os.Error) { + for i, v := range tab { + if v == val { + return i, nil + } + } + return -1, errBad +} + func charType(c uint8) int { switch { case '0' <= c && c <= '9': @@ -215,3 +225,223 @@ func (t *Time) Format(layout string) string { // String returns a Unix-style representation of the time value. func (t *Time) String() string { return t.Format(UnixDate) } + +var errBad = os.ErrorString("bad") // just a marker; not returned to user + +// ParseError describes a problem parsing a time string. +type ParseError struct { + Layout string + Value string + LayoutElem string + ValueElem string + Message string +} + +// String is the string representation of a ParseError. +func (e *ParseError) String() string { + if e.Message == "" { + return "parsing time " + + strconv.Quote(e.Value) + " as " + + strconv.Quote(e.Layout) + ": cannot parse " + + strconv.Quote(e.ValueElem) + " as " + + strconv.Quote(e.LayoutElem) + } + return "parsing time " + + strconv.Quote(e.Value) + e.Message +} + +// Parse parses a formatted string and returns the time value it represents. +// The layout defines the format by showing the representation of a standard +// time, which is then used to describe the string to be parsed. Predefined +// layouts ANSIC, UnixDate, ISO8601 and others describe standard +// representations. +// +// Only those elements present in the value will be set in the returned time +// structure. Also, if the input string represents an inconsistent time +// (such as having the wrong day of the week), the returned value will also +// be inconsistent. In any case, the elements of the returned time will be +// sane: hours in 0..23, minutes in 0..59, day of month in 0..31, etc. +func Parse(alayout, avalue string) (*Time, os.Error) { + var t Time + const formatErr = ": different format from " + rangeErrString := "" // set if a value is out of range + pmSet := false // do we need to add 12 to the hour? + // Each iteration steps along one piece + nextIsYear := false // whether next item is a Year; means we saw a minus sign. + layout, value := alayout, avalue + for len(layout) > 0 && len(value) > 0 { + c := layout[0] + pieceType := charType(c) + var i int + for i = 0; i < len(layout) && charType(layout[i]) == pieceType; i++ { + } + reference := layout[0:i] + layout = layout[i:] + if reference == "Z" { + // Special case for ISO8601 time zone: "Z" or "-0800" + if value[0] == 'Z' { + i = 1 + } else if len(value) >= 5 { + i = 5 + } else { + return nil, &ParseError{Layout: alayout, Value: avalue, Message: formatErr + alayout} + } + } else { + c = value[0] + if charType(c) != pieceType { + return nil, &ParseError{Layout: alayout, Value: avalue, Message: formatErr + alayout} + } + for i = 0; i < len(value) && charType(value[i]) == pieceType; i++ { + } + } + p := value[0:i] + value = value[i:] + // Separators must match except possibly for a following minus sign (for negative years) + if pieceType == separator { + if len(p) != len(reference) { + // must be exactly a following minus sign + if len(p) != len(reference)+1 || p[len(p)-1] != '-' { + return nil, &ParseError{Layout: alayout, Value: avalue, Message: formatErr + alayout} + } + nextIsYear = true + continue + } + } + var err os.Error + switch reference { + case stdYear: + t.Year, err = strconv.Atoi64(p) + if t.Year >= 69 { // Unix time starts Dec 31 1969 in some time zones + t.Year += 1900 + } else { + t.Year += 2000 + } + case stdLongYear: + t.Year, err = strconv.Atoi64(p) + if nextIsYear { + t.Year = -t.Year + nextIsYear = false + } + case stdMonth: + t.Month, err = lookup(shortMonthNames, p) + case stdLongMonth: + t.Month, err = lookup(longMonthNames, p) + case stdNumMonth, stdZeroMonth: + t.Month, err = strconv.Atoi(p) + if t.Month <= 0 || 12 < t.Month { + rangeErrString = "month" + } + case stdWeekDay: + t.Weekday, err = lookup(shortDayNames, p) + case stdLongWeekDay: + t.Weekday, err = lookup(longDayNames, p) + case stdDay, stdZeroDay: + t.Day, err = strconv.Atoi(p) + if t.Day < 0 || 31 < t.Day { + // TODO: be more thorough in date check? + rangeErrString = "day" + } + case stdHour: + t.Hour, err = strconv.Atoi(p) + if t.Hour < 0 || 24 <= t.Hour { + rangeErrString = "hour" + } + case stdHour12, stdZeroHour12: + t.Hour, err = strconv.Atoi(p) + if t.Hour < 0 || 12 < t.Hour { + rangeErrString = "hour" + } + case stdMinute, stdZeroMinute: + t.Minute, err = strconv.Atoi(p) + if t.Minute < 0 || 60 <= t.Minute { + rangeErrString = "minute" + } + case stdSecond, stdZeroSecond: + t.Second, err = strconv.Atoi(p) + if t.Second < 0 || 60 <= t.Second { + rangeErrString = "second" + } + case stdZulu: + if len(p) != 4 { + err = os.ErrorString("HHMM value must be 4 digits") + break + } + t.Hour, err = strconv.Atoi(p[0:2]) + if err != nil { + t.Minute, err = strconv.Atoi(p[2:4]) + } + case stdISO8601TZ: + if p == "Z" { + t.Zone = "UTC" + break + } + // len(p) known to be 5: "-0800" + var hr, min int + hr, err = strconv.Atoi(p[1:3]) + if err != nil { + min, err = strconv.Atoi(p[3:5]) + } + t.ZoneOffset = (hr*60 + min) * 60 // offset is in seconds + switch p[0] { + case '+': + case '-': + t.ZoneOffset = -t.ZoneOffset + default: + err = errBad + } + case stdPM: + if p == "PM" { + pmSet = true + } else if p != "AM" { + err = errBad + } + case stdpm: + if p == "pm" { + pmSet = true + } else if p != "am" { + err = errBad + } + case stdTZ: + // Does it look like a time zone? + if p == "UTC" { + t.Zone = p + break + } + // All other time zones look like XXT or XXXT. + if len(p) != 3 && len(p) != 4 || p[len(p)-1] != 'T' { + err = errBad + } + for i := 0; i < len(p); i++ { + if p[i] < 'A' || 'Z' < p[i] { + err = errBad + } + } + if err != nil { + break + } + // It's a valid format. + t.Zone = p + // Can we find it in the table? + for _, z := range zones { + if p == z.zone.name { + t.ZoneOffset = z.zone.utcoff + break + } + } + } + if nextIsYear { + // Means we didn't see a year when we were expecting one + return nil, &ParseError{Layout: alayout, Value: value, Message: formatErr + alayout} + } + if rangeErrString != "" { + return nil, &ParseError{alayout, avalue, reference, p, ": " + rangeErrString + " out of range"} + } + if err != nil { + return nil, &ParseError{alayout, avalue, reference, p, ""} + } + } + if pmSet && t.Hour < 12 { + t.Hour += 12 + } + return &t, nil +} diff --git a/src/pkg/time/time_test.go b/src/pkg/time/time_test.go index 8f7c267cb60..dab6d2073aa 100644 --- a/src/pkg/time/time_test.go +++ b/src/pkg/time/time_test.go @@ -6,6 +6,7 @@ package time_test import ( "os" + "strings" "testing" "testing/quick" . "time" @@ -140,7 +141,7 @@ var formatTests = []FormatTest{ } func TestFormat(t *testing.T) { - // The numeric time represents Thu Feb 4 21:00:57 EST 2010 + // The numeric time represents Thu Feb 4 21:00:57 PST 2010 time := SecondsToLocalTime(1265346057) for _, test := range formatTests { result := time.Format(test.format) @@ -150,6 +151,113 @@ func TestFormat(t *testing.T) { } } +type ParseTest struct { + name string + format string + value string + hasTZ bool // contains a time zone + hasWD bool // contains a weekday +} + +var parseTests = []ParseTest{ + ParseTest{"ANSIC", ANSIC, "Thu Feb 4 21:00:57 2010", false, true}, + ParseTest{"UnixDate", UnixDate, "Thu Feb 4 21:00:57 PST 2010", true, true}, + ParseTest{"RFC850", RFC850, "Thursday, 04-Feb-10 21:00:57 PST", true, true}, + ParseTest{"RFC1123", RFC1123, "Thu, 04 Feb 2010 21:00:57 PST", true, true}, + ParseTest{"ISO8601", ISO8601, "2010-02-04T21:00:57-0800", true, false}, +} + +func TestParse(t *testing.T) { + for _, test := range parseTests { + time, err := Parse(test.format, test.value) + if err != nil { + t.Errorf("%s error: %v", test.name, err) + } else { + checkTime(time, &test, t) + } + } +} + +func checkTime(time *Time, test *ParseTest, t *testing.T) { + // The time should be Thu Feb 4 21:00:57 PST 2010 + if time.Year != 2010 { + t.Errorf("%s: bad year: %d not %d\n", test.name, time.Year, 2010) + } + if time.Month != 2 { + t.Errorf("%s: bad month: %d not %d\n", test.name, time.Month, 2) + } + if time.Day != 4 { + t.Errorf("%s: bad day: %d not %d\n", test.name, time.Day, 4) + } + if time.Hour != 21 { + t.Errorf("%s: bad hour: %d not %d\n", test.name, time.Hour, 21) + } + if time.Minute != 0 { + t.Errorf("%s: bad minute: %d not %d\n", test.name, time.Minute, 0) + } + if time.Second != 57 { + t.Errorf("%s: bad second: %d not %d\n", test.name, time.Second, 57) + } + if test.hasTZ && time.ZoneOffset != -28800 { + t.Errorf("%s: bad tz offset: %d not %d\n", test.name, time.ZoneOffset, -28800) + } + if test.hasWD && time.Weekday != 4 { + t.Errorf("%s: bad weekday: %d not %d\n", test.name, time.Weekday, 4) + } +} + +func TestFormatAndParse(t *testing.T) { + const fmt = "Mon MST " + ISO8601 // all fields + f := func(sec int64) bool { + t1 := SecondsToLocalTime(sec) + t2, err := Parse(fmt, t1.Format(fmt)) + if err != nil { + t.Errorf("error: %s", err) + return false + } + if !same(t1, t2) { + t.Errorf("different: %q %q", t1, t2) + return false + } + return true + } + f32 := func(sec int32) bool { return f(int64(sec)) } + cfg := &quick.Config{MaxCount: 10000} + + // Try a reasonable date first, then the huge ones. + if err := quick.Check(f32, cfg); err != nil { + t.Fatal(err) + } + if err := quick.Check(f, cfg); err != nil { + t.Fatal(err) + } +} + +type ParseErrorTest struct { + format string + value string + expect string // must appear within the error +} + +var parseErrorTests = []ParseErrorTest{ + ParseErrorTest{ANSIC, "Feb 4 21:00:60 2010", "parse"}, // cannot parse Feb as Mon + ParseErrorTest{ANSIC, "Thu Feb 4 21:00:57 @2010", "format"}, + ParseErrorTest{ANSIC, "Thu Feb 4 21:00:60 2010", "second out of range"}, + ParseErrorTest{ANSIC, "Thu Feb 4 21:61:57 2010", "minute out of range"}, + ParseErrorTest{ANSIC, "Thu Feb 4 24:00:60 2010", "hour out of range"}, +} + +func TestParseErrors(t *testing.T) { + for _, test := range parseErrorTests { + _, err := Parse(test.format, test.value) + if err == nil { + t.Errorf("expected error for %q %q\n", test.format, test.value) + } else if strings.Index(err.String(), test.expect) < 0 { + t.Errorf("expected error with %q for %q %q; got %s\n", test.expect, test.format, test.value, err) + } + } +} + func BenchmarkSeconds(b *testing.B) { for i := 0; i < b.N; i++ { Seconds() @@ -168,3 +276,9 @@ func BenchmarkFormat(b *testing.B) { time.Format("Mon Jan 2 15:04:05 2006") } } + +func BenchmarkParse(b *testing.B) { + for i := 0; i < b.N; i++ { + Parse(ANSIC, "Mon Jan 2 15:04:05 2006") + } +}