diff --git a/src/pkg/fmt/doc.go b/src/pkg/fmt/doc.go index 15aae50e3d6..f3067eac9f1 100644 --- a/src/pkg/fmt/doc.go +++ b/src/pkg/fmt/doc.go @@ -26,6 +26,7 @@ %o base 8 %x base 16, with lower-case letters for a-f %X base 16, with upper-case letters for A-F + %U unicode format: U+1234; same as "U+%x" with 4 digits default Floating-point and complex constituents: %e scientific notation, e.g. -1234.456e+78 %E scientific notation, e.g. -1234.456E+78 diff --git a/src/pkg/fmt/fmt_test.go b/src/pkg/fmt/fmt_test.go index fbc2536ee15..d87b93a7956 100644 --- a/src/pkg/fmt/fmt_test.go +++ b/src/pkg/fmt/fmt_test.go @@ -161,6 +161,14 @@ var fmttests = []fmtTest{ {"% d", 0, " 0"}, {"% d", 12345, " 12345"}, + // unicode format + {"%U", 0x1, "U+0001"}, + {"%.8U", 0x2, "U+00000002"}, + {"%U", 0x1234, "U+1234"}, + {"%U", 0x12345, "U+12345"}, + {"%10.6U", 0xABC, " U+000ABC"}, + {"%-10.6U", 0xABC, "U+000ABC "}, + // floats {"%+.3e", 0.0, "+0.000e+00"}, {"%+.3e", 1.0, "+1.000e+00"}, diff --git a/src/pkg/fmt/format.go b/src/pkg/fmt/format.go index 010280bf851..0121dda31dc 100644 --- a/src/pkg/fmt/format.go +++ b/src/pkg/fmt/format.go @@ -49,6 +49,7 @@ type fmt struct { plus bool sharp bool space bool + unicode bool zero bool } @@ -61,6 +62,7 @@ func (f *fmt) clearflags() { f.plus = false f.sharp = false f.space = false + f.unicode = false f.zero = false } @@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) { buf[i] = '0' } } + if f.unicode { + i-- + buf[i] = '+' + i-- + buf[i] = 'U' + } if negative { i-- diff --git a/src/pkg/fmt/print.go b/src/pkg/fmt/print.go index 3bb14eeb146..7ac6648c70c 100644 --- a/src/pkg/fmt/print.go +++ b/src/pkg/fmt/print.go @@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { p.fmt.integer(v, 8, signed, ldigits) case 'x': p.fmt.integer(v, 16, signed, ldigits) + case 'U': + p.fmtUnicode(v) case 'X': p.fmt.integer(v, 16, signed, udigits) default: @@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { } } -// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by +// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by // temporarily turning on the sharp flag. func (p *pp) fmt0x64(v uint64) { sharp := p.fmt.sharp @@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) { p.fmt.sharp = sharp } +// fmtUnicode formats a uint64 in U+1234 form by +// temporarily turning on the unicode flag and tweaking the precision. +func (p *pp) fmtUnicode(v int64) { + precPresent := p.fmt.precPresent + prec := p.fmt.prec + if !precPresent { + // If prec is already set, leave it alone; otherwise 4 is minimum. + p.fmt.prec = 4 + p.fmt.precPresent = true + } + p.fmt.unicode = true // turn on U+ + p.fmt.integer(int64(v), 16, unsigned, udigits) + p.fmt.unicode = false + p.fmt.prec = prec + p.fmt.precPresent = precPresent +} + func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) { switch verb { case 'b': diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go index 9b414cb9a7e..dcc42bc92da 100644 --- a/src/pkg/fmt/scan.go +++ b/src/pkg/fmt/scan.go @@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) { var complexError = os.ErrorString("syntax error scanning complex number") var boolError = os.ErrorString("syntax error scanning boolean") -// accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the -// buffer and returns true. Otherwise it return false. -func (s *ss) accept(ok string) bool { +// consume reads the next rune in the input and reports whether it is in the ok string. +// If accept is true, it puts the character into the input token. +func (s *ss) consume(ok string, accept bool) bool { if s.wid >= s.maxWid { return false } @@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool { } for i := 0; i < len(ok); i++ { if int(ok[i]) == rune { - s.buf.WriteRune(rune) - s.wid++ + if accept { + s.buf.WriteRune(rune) + s.wid++ + } return true } } - if rune != EOF { + if rune != EOF && accept { s.UngetRune() } return false } +// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the +// buffer and returns true. Otherwise it return false. +func (s *ss) accept(ok string) bool { + return s.consume(ok, true) +} + // okVerb verifies that the verb is present in the list, setting s.err appropriately if not. func (s *ss) okVerb(verb int, okVerbs, typ string) bool { for _, v := range okVerbs { @@ -460,7 +468,7 @@ const ( // getBase returns the numeric base represented by the verb and its digit string. func (s *ss) getBase(verb int) (base int, digits string) { - s.okVerb(verb, "bdoxXv", "integer") // sets s.err + s.okVerb(verb, "bdoUxXv", "integer") // sets s.err base = 10 digits = decimalDigits switch verb { @@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) { case 'o': base = 8 digits = octalDigits - case 'x', 'X': + case 'x', 'X', 'U': base = 16 digits = hexadecimalDigits } @@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 { } base, digits := s.getBase(verb) s.skipSpace(false) - s.accept(sign) // If there's a sign, it will be left in the token buffer. + if verb == 'U' { + if !s.consume("U", false) || !s.consume("+", false) { + s.errorString("bad unicode format ") + } + } else { + s.accept(sign) // If there's a sign, it will be left in the token buffer. + } tok := s.scanNumber(digits) i, err := strconv.Btoi64(tok, base) if err != nil { @@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 { } base, digits := s.getBase(verb) s.skipSpace(false) + if verb == 'U' { + if !s.consume("U", false) || !s.consume("+", false) { + s.errorString("bad unicode format ") + } + } tok := s.scanNumber(digits) i, err := strconv.Btoui64(tok, base) if err != nil { diff --git a/src/pkg/fmt/scan_test.go b/src/pkg/fmt/scan_test.go index cf8a3a766f1..7a0baae2455 100644 --- a/src/pkg/fmt/scan_test.go +++ b/src/pkg/fmt/scan_test.go @@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{ {"%o", "075\n", &uintVal, uint(075)}, {"%x", "a75\n", &uintVal, uint(0xa75)}, {"%x", "A75\n", &uintVal, uint(0xa75)}, + {"%U", "U+1234\n", &intVal, int(0x1234)}, + {"%U", "U+4567\n", &uintVal, uint(0x4567)}, // Strings {"%s", "using-%s\n", &stringVal, "using-%s"},