From 414161ebfaa58218ffac7907b4d8ef2babb236cb Mon Sep 17 00:00:00 2001 From: Jes Cok Date: Wed, 7 Feb 2024 04:56:26 +0000 Subject: [PATCH] encoding/xml: rewrite func procInst This CL tries to make function procInst more exact, also adds test cases, however, including tricky ones. Change-Id: If421299fc84d136e56a25dba7a4919c4424702c8 GitHub-Last-Rev: b9a3192718ae5535d66fddd260c27b48d93b4af1 GitHub-Pull-Request: golang/go#64336 Reviewed-on: https://go-review.googlesource.com/c/go/+/544475 LUCI-TryBot-Result: Go LUCI Reviewed-by: Dmitri Shuralyov Auto-Submit: Ian Lance Taylor Reviewed-by: Ian Lance Taylor --- src/encoding/xml/xml.go | 27 +++++++++++++++++++-------- src/encoding/xml/xml_test.go | 7 +++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go index 73eedad290f..6b8f2e79784 100644 --- a/src/encoding/xml/xml.go +++ b/src/encoding/xml/xml.go @@ -2045,16 +2045,27 @@ func procInst(param, s string) string { // TODO: this parsing is somewhat lame and not exact. // It works for all actual cases, though. param = param + "=" - _, v, _ := strings.Cut(s, param) - if v == "" { + lenp := len(param) + i := 0 + var sep byte + for i < len(s) { + sub := s[i:] + k := strings.Index(sub, param) + if k < 0 || lenp+k >= len(sub) { + return "" + } + i += lenp + k + 1 + if c := sub[lenp+k]; c == '\'' || c == '"' { + sep = c + break + } + } + if sep == 0 { return "" } - if v[0] != '\'' && v[0] != '"' { + j := strings.IndexByte(s[i:], sep) + if j < 0 { return "" } - unquote, _, ok := strings.Cut(v[1:], v[:1]) - if !ok { - return "" - } - return unquote + return s[i : i+j] } diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go index 42f5f5f8a60..4bec4e7f1e9 100644 --- a/src/encoding/xml/xml_test.go +++ b/src/encoding/xml/xml_test.go @@ -830,6 +830,13 @@ var procInstTests = []struct { {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}}, {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}}, {`encoding="FOO" `, [2]string{"", "FOO"}}, + {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}}, + {`version= encoding=`, [2]string{"", ""}}, + {`encoding="version=1.0"`, [2]string{"", "version=1.0"}}, + {``, [2]string{"", ""}}, + // TODO: what's the right approach to handle these nested cases? + {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}}, + {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}}, } func TestProcInstEncoding(t *testing.T) {