diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go index 7b33968618..ad8457c95a 100644 --- a/src/archive/zip/writer.go +++ b/src/archive/zip/writer.go @@ -219,7 +219,9 @@ func (w *Writer) Create(name string) (io.Writer, error) { // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, // or any other common encoding). func detectUTF8(s string) (valid, require bool) { - for _, r := range s { + for i := 0; i < len(s); { + r, size := utf8.DecodeRuneInString(s[i:]) + i += size // Officially, ZIP uses CP-437, but many readers use the system's // local character encoding. Most encoding are compatible with a large // subset of CP-437, which itself is ASCII-like. @@ -227,7 +229,7 @@ func detectUTF8(s string) (valid, require bool) { // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those // characters with localized currency and overline characters. if r < 0x20 || r > 0x7d || r == 0x5c { - if !utf8.ValidRune(r) || r == utf8.RuneError { + if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { return false, false } require = true diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go index ee5c866310..f217a42e74 100644 --- a/src/archive/zip/writer_test.go +++ b/src/archive/zip/writer_test.go @@ -136,40 +136,45 @@ func TestWriterUTF8(t *testing.T) { var utf8Tests = []struct { name string comment string - expect uint16 nonUTF8 bool + flags uint16 }{ { name: "hi, hello", comment: "in the world", - expect: 0x8, + flags: 0x8, }, { name: "hi, こんにちわ", comment: "in the world", - expect: 0x808, + flags: 0x808, }, { name: "hi, こんにちわ", comment: "in the world", nonUTF8: true, - expect: 0x8, + flags: 0x8, }, { name: "hi, hello", comment: "in the 世界", - expect: 0x808, + flags: 0x808, }, { name: "hi, こんにちわ", comment: "in the 世界", - expect: 0x808, + flags: 0x808, + }, + { + name: "the replacement rune is �", + comment: "the replacement rune is �", + flags: 0x808, }, { // Name is Japanese encoded in Shift JIS. name: "\x93\xfa\x96{\x8c\xea.txt", comment: "in the 世界", - expect: 0x008, // UTF-8 must not be set + flags: 0x008, // UTF-8 must not be set }, } @@ -201,10 +206,9 @@ func TestWriterUTF8(t *testing.T) { t.Fatal(err) } for i, test := range utf8Tests { - got := r.File[i].Flags - t.Logf("name %v, comment %v", test.name, test.comment) - if got != test.expect { - t.Fatalf("Flags: got %v, want %v", got, test.expect) + flags := r.File[i].Flags + if flags != test.flags { + t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags) } } }