mirror of
https://github.com/golang/go
synced 2024-11-22 01:04:40 -07:00
fmt: add %U format for standard Unicode representation of integer values.
fmt.Printf("%U", 1) yields "U+0001" It's essentially "U+%.4x" but lets you override the precision works in scan, too. R=rsc CC=golang-dev https://golang.org/cl/3423043
This commit is contained in:
parent
e2d1595c81
commit
730e39cd13
@ -26,6 +26,7 @@
|
|||||||
%o base 8
|
%o base 8
|
||||||
%x base 16, with lower-case letters for a-f
|
%x base 16, with lower-case letters for a-f
|
||||||
%X base 16, with upper-case letters for A-F
|
%X base 16, with upper-case letters for A-F
|
||||||
|
%U unicode format: U+1234; same as "U+%x" with 4 digits default
|
||||||
Floating-point and complex constituents:
|
Floating-point and complex constituents:
|
||||||
%e scientific notation, e.g. -1234.456e+78
|
%e scientific notation, e.g. -1234.456e+78
|
||||||
%E scientific notation, e.g. -1234.456E+78
|
%E scientific notation, e.g. -1234.456E+78
|
||||||
|
@ -161,6 +161,14 @@ var fmttests = []fmtTest{
|
|||||||
{"% d", 0, " 0"},
|
{"% d", 0, " 0"},
|
||||||
{"% d", 12345, " 12345"},
|
{"% d", 12345, " 12345"},
|
||||||
|
|
||||||
|
// unicode format
|
||||||
|
{"%U", 0x1, "U+0001"},
|
||||||
|
{"%.8U", 0x2, "U+00000002"},
|
||||||
|
{"%U", 0x1234, "U+1234"},
|
||||||
|
{"%U", 0x12345, "U+12345"},
|
||||||
|
{"%10.6U", 0xABC, " U+000ABC"},
|
||||||
|
{"%-10.6U", 0xABC, "U+000ABC "},
|
||||||
|
|
||||||
// floats
|
// floats
|
||||||
{"%+.3e", 0.0, "+0.000e+00"},
|
{"%+.3e", 0.0, "+0.000e+00"},
|
||||||
{"%+.3e", 1.0, "+1.000e+00"},
|
{"%+.3e", 1.0, "+1.000e+00"},
|
||||||
|
@ -49,6 +49,7 @@ type fmt struct {
|
|||||||
plus bool
|
plus bool
|
||||||
sharp bool
|
sharp bool
|
||||||
space bool
|
space bool
|
||||||
|
unicode bool
|
||||||
zero bool
|
zero bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
|
|||||||
f.plus = false
|
f.plus = false
|
||||||
f.sharp = false
|
f.sharp = false
|
||||||
f.space = false
|
f.space = false
|
||||||
|
f.unicode = false
|
||||||
f.zero = false
|
f.zero = false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
|
|||||||
buf[i] = '0'
|
buf[i] = '0'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if f.unicode {
|
||||||
|
i--
|
||||||
|
buf[i] = '+'
|
||||||
|
i--
|
||||||
|
buf[i] = 'U'
|
||||||
|
}
|
||||||
|
|
||||||
if negative {
|
if negative {
|
||||||
i--
|
i--
|
||||||
|
@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
|
|||||||
p.fmt.integer(v, 8, signed, ldigits)
|
p.fmt.integer(v, 8, signed, ldigits)
|
||||||
case 'x':
|
case 'x':
|
||||||
p.fmt.integer(v, 16, signed, ldigits)
|
p.fmt.integer(v, 16, signed, ldigits)
|
||||||
|
case 'U':
|
||||||
|
p.fmtUnicode(v)
|
||||||
case 'X':
|
case 'X':
|
||||||
p.fmt.integer(v, 16, signed, udigits)
|
p.fmt.integer(v, 16, signed, udigits)
|
||||||
default:
|
default:
|
||||||
@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by
|
// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
|
||||||
// temporarily turning on the sharp flag.
|
// temporarily turning on the sharp flag.
|
||||||
func (p *pp) fmt0x64(v uint64) {
|
func (p *pp) fmt0x64(v uint64) {
|
||||||
sharp := p.fmt.sharp
|
sharp := p.fmt.sharp
|
||||||
@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
|
|||||||
p.fmt.sharp = sharp
|
p.fmt.sharp = sharp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fmtUnicode formats a uint64 in U+1234 form by
|
||||||
|
// temporarily turning on the unicode flag and tweaking the precision.
|
||||||
|
func (p *pp) fmtUnicode(v int64) {
|
||||||
|
precPresent := p.fmt.precPresent
|
||||||
|
prec := p.fmt.prec
|
||||||
|
if !precPresent {
|
||||||
|
// If prec is already set, leave it alone; otherwise 4 is minimum.
|
||||||
|
p.fmt.prec = 4
|
||||||
|
p.fmt.precPresent = true
|
||||||
|
}
|
||||||
|
p.fmt.unicode = true // turn on U+
|
||||||
|
p.fmt.integer(int64(v), 16, unsigned, udigits)
|
||||||
|
p.fmt.unicode = false
|
||||||
|
p.fmt.prec = prec
|
||||||
|
p.fmt.precPresent = precPresent
|
||||||
|
}
|
||||||
|
|
||||||
func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
|
func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
|
||||||
switch verb {
|
switch verb {
|
||||||
case 'b':
|
case 'b':
|
||||||
|
@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
|
|||||||
var complexError = os.ErrorString("syntax error scanning complex number")
|
var complexError = os.ErrorString("syntax error scanning complex number")
|
||||||
var boolError = os.ErrorString("syntax error scanning boolean")
|
var boolError = os.ErrorString("syntax error scanning boolean")
|
||||||
|
|
||||||
// accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
|
// consume reads the next rune in the input and reports whether it is in the ok string.
|
||||||
// buffer and returns true. Otherwise it return false.
|
// If accept is true, it puts the character into the input token.
|
||||||
func (s *ss) accept(ok string) bool {
|
func (s *ss) consume(ok string, accept bool) bool {
|
||||||
if s.wid >= s.maxWid {
|
if s.wid >= s.maxWid {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
|
|||||||
}
|
}
|
||||||
for i := 0; i < len(ok); i++ {
|
for i := 0; i < len(ok); i++ {
|
||||||
if int(ok[i]) == rune {
|
if int(ok[i]) == rune {
|
||||||
s.buf.WriteRune(rune)
|
if accept {
|
||||||
s.wid++
|
s.buf.WriteRune(rune)
|
||||||
|
s.wid++
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if rune != EOF {
|
if rune != EOF && accept {
|
||||||
s.UngetRune()
|
s.UngetRune()
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
|
||||||
|
// buffer and returns true. Otherwise it return false.
|
||||||
|
func (s *ss) accept(ok string) bool {
|
||||||
|
return s.consume(ok, true)
|
||||||
|
}
|
||||||
|
|
||||||
// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
|
// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
|
||||||
func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
|
func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
|
||||||
for _, v := range okVerbs {
|
for _, v := range okVerbs {
|
||||||
@ -460,7 +468,7 @@ const (
|
|||||||
|
|
||||||
// getBase returns the numeric base represented by the verb and its digit string.
|
// getBase returns the numeric base represented by the verb and its digit string.
|
||||||
func (s *ss) getBase(verb int) (base int, digits string) {
|
func (s *ss) getBase(verb int) (base int, digits string) {
|
||||||
s.okVerb(verb, "bdoxXv", "integer") // sets s.err
|
s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
|
||||||
base = 10
|
base = 10
|
||||||
digits = decimalDigits
|
digits = decimalDigits
|
||||||
switch verb {
|
switch verb {
|
||||||
@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
|
|||||||
case 'o':
|
case 'o':
|
||||||
base = 8
|
base = 8
|
||||||
digits = octalDigits
|
digits = octalDigits
|
||||||
case 'x', 'X':
|
case 'x', 'X', 'U':
|
||||||
base = 16
|
base = 16
|
||||||
digits = hexadecimalDigits
|
digits = hexadecimalDigits
|
||||||
}
|
}
|
||||||
@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
|
|||||||
}
|
}
|
||||||
base, digits := s.getBase(verb)
|
base, digits := s.getBase(verb)
|
||||||
s.skipSpace(false)
|
s.skipSpace(false)
|
||||||
s.accept(sign) // If there's a sign, it will be left in the token buffer.
|
if verb == 'U' {
|
||||||
|
if !s.consume("U", false) || !s.consume("+", false) {
|
||||||
|
s.errorString("bad unicode format ")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s.accept(sign) // If there's a sign, it will be left in the token buffer.
|
||||||
|
}
|
||||||
tok := s.scanNumber(digits)
|
tok := s.scanNumber(digits)
|
||||||
i, err := strconv.Btoi64(tok, base)
|
i, err := strconv.Btoi64(tok, base)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
|
|||||||
}
|
}
|
||||||
base, digits := s.getBase(verb)
|
base, digits := s.getBase(verb)
|
||||||
s.skipSpace(false)
|
s.skipSpace(false)
|
||||||
|
if verb == 'U' {
|
||||||
|
if !s.consume("U", false) || !s.consume("+", false) {
|
||||||
|
s.errorString("bad unicode format ")
|
||||||
|
}
|
||||||
|
}
|
||||||
tok := s.scanNumber(digits)
|
tok := s.scanNumber(digits)
|
||||||
i, err := strconv.Btoui64(tok, base)
|
i, err := strconv.Btoui64(tok, base)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
|
|||||||
{"%o", "075\n", &uintVal, uint(075)},
|
{"%o", "075\n", &uintVal, uint(075)},
|
||||||
{"%x", "a75\n", &uintVal, uint(0xa75)},
|
{"%x", "a75\n", &uintVal, uint(0xa75)},
|
||||||
{"%x", "A75\n", &uintVal, uint(0xa75)},
|
{"%x", "A75\n", &uintVal, uint(0xa75)},
|
||||||
|
{"%U", "U+1234\n", &intVal, int(0x1234)},
|
||||||
|
{"%U", "U+4567\n", &uintVal, uint(0x4567)},
|
||||||
|
|
||||||
// Strings
|
// Strings
|
||||||
{"%s", "using-%s\n", &stringVal, "using-%s"},
|
{"%s", "using-%s\n", &stringVal, "using-%s"},
|
||||||
|
Loading…
Reference in New Issue
Block a user