1
0
mirror of https://github.com/golang/go synced 2024-09-23 11:20:17 -06:00

encoding: document that base32 and base64 do not use UTF-8

The invention of base32 and base64 predates the invention of UTF-8
and was never meant to output valid UTF-8.
By default, the output is always valid ASCII (and thus valid UTF-8)
except when the user specifies an alphabet or padding value
that is larger than '\x7f'. If that is done,
then the exact byte symbol is used rather than the UTF-8 encoding.

Fixes #60689

Change-Id: I4ec88d974ec0658ad1a578bbd65a809e27c73ea7
Reviewed-on: https://go-review.googlesource.com/c/go/+/505237
Run-TryBot: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Joe Tsai 2023-06-22 11:10:18 -07:00 committed by Joseph Tsai
parent 8008c0840f
commit 4f36f7e4dd
2 changed files with 8 additions and 2 deletions

View File

@ -50,7 +50,8 @@ const encodeStd = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
const encodeHex = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
// NewEncoding returns a new Encoding defined by the given alphabet,
// which must be a 32-byte string.
// which must be a 32-byte string. The alphabet is treated as sequence
// of byte values without any special treatment for multi-byte UTF-8.
func NewEncoding(encoder string) *Encoding {
if len(encoder) != 32 {
panic("encoding alphabet is not 32-bytes long")
@ -80,6 +81,8 @@ var HexEncoding = NewEncoding(encodeHex)
// The padding character must not be '\r' or '\n', must not
// be contained in the encoding's alphabet and must be a rune equal or
// below '\xff'.
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
if padding == '\r' || padding == '\n' || padding > 0xff {
panic("invalid padding")

View File

@ -54,7 +54,8 @@ const encodeURL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012345678
// NewEncoding returns a new padded Encoding defined by the given alphabet,
// which must be a 64-byte string that does not contain the padding character
// or CR / LF ('\r', '\n').
// or CR / LF ('\r', '\n'). The alphabet is treated as sequence of byte values
// without any special treatment for multi-byte UTF-8.
// The resulting Encoding uses the default padding character ('='),
// which may be changed or disabled via WithPadding.
func NewEncoding(encoder string) *Encoding {
@ -83,6 +84,8 @@ func NewEncoding(encoder string) *Encoding {
// The padding character must not be '\r' or '\n', must not
// be contained in the encoding's alphabet and must be a rune equal or
// below '\xff'.
// Padding characters above '\x7f' are encoded as their exact byte value
// rather than using the UTF-8 representation of the codepoint.
func (enc Encoding) WithPadding(padding rune) *Encoding {
if padding == '\r' || padding == '\n' || padding > 0xff {
panic("invalid padding")