From ab96ee7da999f6ff9f9d470956bb215e6987073e Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Wed, 5 Jun 2024 15:55:15 +0900 Subject: [PATCH] regexp: add find test cases for matching edges of multibyte characters There are currently similar tests for Replace but not for Find. Given the nature of the matching implementation, it seems unlikely, but given rune handling is separate for Find vs Replace, it would be good to have explicit regression tests for otherwise easily missed corner cases. --- src/regexp/find_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/regexp/find_test.go b/src/regexp/find_test.go index 2edbe9b86e6..7cc467d85e5 100644 --- a/src/regexp/find_test.go +++ b/src/regexp/find_test.go @@ -99,6 +99,11 @@ var findTests = []FindTest{ {`\B`, "xx yy", build(2, 1, 1, 4, 4)}, {`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)}, + // Multibyte characters -- verify that we don't try to match in the middle + // of a character. + {"[a-c]*", "\u65e5", build(2, 0, 0, 3, 3)}, + {"[^\u65e5]", "abc\u65e5def", build(6, 0, 1, 1, 2, 2, 3, 6, 7, 7, 8, 8, 9)}, + // RE2 tests {`[^\S\s]`, "abcd", nil}, {`[^\S[:space:]]`, "abcd", nil},