fmt: add %U format for standard Unicode representation of integer values.

fmt.Printf("%U", 1) yields "U+0001" It's essentially "U+%.4x" but lets you override the precision works in scan, too. R=rsc CC=golang-dev https://golang.org/cl/3423043
2024-11-22 01:04:40 -07:00 · 2010-12-06 14:23:37 -05:00 · 2010-12-06 14:23:37 -05:00 · 730e39cd13
commit 730e39cd13
parent e2d1595c81
6 changed files with 67 additions and 10 deletions
--- a/src/pkg/fmt/doc.go
+++ b/src/pkg/fmt/doc.go
@ -26,6 +26,7 @@
 		%o	base 8
 		%x	base 16, with lower-case letters for a-f
 		%X	base 16, with upper-case letters for A-F
 		%U	unicode format: U+1234; same as "U+%x" with 4 digits default
 	Floating-point and complex constituents:
 		%e	scientific notation, e.g. -1234.456e+78
 		%E	scientific notation, e.g. -1234.456E+78
--- a/src/pkg/fmt/fmt_test.go
+++ b/src/pkg/fmt/fmt_test.go
@ -161,6 +161,14 @@ var fmttests = []fmtTest{
 	{"% d", 0, " 0"},
 	{"% d", 12345, " 12345"},
 	// unicode format
 	{"%U", 0x1, "U+0001"},
 	{"%.8U", 0x2, "U+00000002"},
 	{"%U", 0x1234, "U+1234"},
 	{"%U", 0x12345, "U+12345"},
 	{"%10.6U", 0xABC, "  U+000ABC"},
 	{"%-10.6U", 0xABC, "U+000ABC  "},
 	// floats
 	{"%+.3e", 0.0, "+0.000e+00"},
 	{"%+.3e", 1.0, "+1.000e+00"},
--- a/src/pkg/fmt/format.go
+++ b/src/pkg/fmt/format.go
@ -49,6 +49,7 @@ type fmt struct {
 	plus        bool
 	sharp       bool
 	space       bool
 	unicode     bool
 	zero        bool
 }
@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
 	f.plus = false
 	f.sharp = false
 	f.space = false
 	f.unicode = false
 	f.zero = false
 }
@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
 			buf[i] = '0'
 		}
 	}
 	if f.unicode {
 		i--
 		buf[i] = '+'
 		i--
 		buf[i] = 'U'
 	}
 	if negative {
 		i--
--- a/src/pkg/fmt/print.go
+++ b/src/pkg/fmt/print.go
@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
 		p.fmt.integer(v, 8, signed, ldigits)
 	case 'x':
 		p.fmt.integer(v, 16, signed, ldigits)
 	case 'U':
 		p.fmtUnicode(v)
 	case 'X':
 		p.fmt.integer(v, 16, signed, udigits)
 	default:
@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
 	}
 }
-// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by
+// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
 // temporarily turning on the sharp flag.
 func (p *pp) fmt0x64(v uint64) {
 	sharp := p.fmt.sharp
@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
 	p.fmt.sharp = sharp
 }
 // fmtUnicode formats a uint64 in U+1234 form by
 // temporarily turning on the unicode flag and tweaking the precision.
 func (p *pp) fmtUnicode(v int64) {
 	precPresent := p.fmt.precPresent
 	prec := p.fmt.prec
 	if !precPresent {
 		// If prec is already set, leave it alone; otherwise 4 is minimum.
 		p.fmt.prec = 4
 		p.fmt.precPresent = true
 	}
 	p.fmt.unicode = true // turn on U+
 	p.fmt.integer(int64(v), 16, unsigned, udigits)
 	p.fmt.unicode = false
 	p.fmt.prec = prec
 	p.fmt.precPresent = precPresent
 }
 func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
 	switch verb {
 	case 'b':
--- a/src/pkg/fmt/scan.go
+++ b/src/pkg/fmt/scan.go
@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
 var complexError = os.ErrorString("syntax error scanning complex number")
 var boolError = os.ErrorString("syntax error scanning boolean")
-// accepts checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
+// consume reads the next rune in the input and reports whether it is in the ok string.
-// buffer and returns true. Otherwise it return false.
+// If accept is true, it puts the character into the input token.
-func (s *ss) accept(ok string) bool {
+func (s *ss) consume(ok string, accept bool) bool {
 	if s.wid >= s.maxWid {
 		return false
 	}
@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
 	}
 	for i := 0; i < len(ok); i++ {
 		if int(ok[i]) == rune {
-			s.buf.WriteRune(rune)
+			if accept {
-			s.wid++
+				s.buf.WriteRune(rune)
 				s.wid++
 			}
 			return true
 		}
 	}
-	if rune != EOF {
+	if rune != EOF && accept {
 		s.UngetRune()
 	}
 	return false
 }
 // accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 // buffer and returns true. Otherwise it return false.
 func (s *ss) accept(ok string) bool {
 	return s.consume(ok, true)
 }
 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
 	for _, v := range okVerbs {
@ -460,7 +468,7 @@ const (
 // getBase returns the numeric base represented by the verb and its digit string.
 func (s *ss) getBase(verb int) (base int, digits string) {
-	s.okVerb(verb, "bdoxXv", "integer") // sets s.err
+	s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 	base = 10
 	digits = decimalDigits
 	switch verb {
@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
 	case 'o':
 		base = 8
 		digits = octalDigits
-	case 'x', 'X':
+	case 'x', 'X', 'U':
 		base = 16
 		digits = hexadecimalDigits
 	}
@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
 	}
 	base, digits := s.getBase(verb)
 	s.skipSpace(false)
-	s.accept(sign) // If there's a sign, it will be left in the token buffer.
+	if verb == 'U' {
 		if !s.consume("U", false) || !s.consume("+", false) {
 			s.errorString("bad unicode format ")
 		}
 	} else {
 		s.accept(sign) // If there's a sign, it will be left in the token buffer.
 	}
 	tok := s.scanNumber(digits)
 	i, err := strconv.Btoi64(tok, base)
 	if err != nil {
@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
 	}
 	base, digits := s.getBase(verb)
 	s.skipSpace(false)
 	if verb == 'U' {
 		if !s.consume("U", false) || !s.consume("+", false) {
 			s.errorString("bad unicode format ")
 		}
 	}
 	tok := s.scanNumber(digits)
 	i, err := strconv.Btoui64(tok, base)
 	if err != nil {
--- a/src/pkg/fmt/scan_test.go
+++ b/src/pkg/fmt/scan_test.go
@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
 	{"%o", "075\n", &uintVal, uint(075)},
 	{"%x", "a75\n", &uintVal, uint(0xa75)},
 	{"%x", "A75\n", &uintVal, uint(0xa75)},
 	{"%U", "U+1234\n", &intVal, int(0x1234)},
 	{"%U", "U+4567\n", &uintVal, uint(0x4567)},
 	// Strings
 	{"%s", "using-%s\n", &stringVal, "using-%s"},