From 8530e8ef6566c2345866a1e42b484dbf24c64264 Mon Sep 17 00:00:00 2001 From: Benny Siegert Date: Fri, 12 Nov 2010 12:47:50 -0800 Subject: [PATCH] strings: add LastIndexAny The need for a LastIndexAny function has come up in the discussion for https://golang.org/cl/3008041/. This function is implemented analogously to lastIndexFunc, using functions from the utf8 package. R=r, rsc, PeterGo CC=golang-dev https://golang.org/cl/3057041 --- src/pkg/bytes/bytes.go | 19 +++++++++++++++++++ src/pkg/bytes/bytes_test.go | 31 +++++++++++++++++++++++++------ src/pkg/strings/strings.go | 18 ++++++++++++++++++ src/pkg/strings/strings_test.go | 20 +++++++++++++++++--- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 1939fd56784..e26b29fb553 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -165,6 +165,25 @@ func IndexAny(s []byte, chars string) int { return -1 } +// LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code +// points. It returns the byte index of the last occurrence in s of any of +// the Unicode code points in chars. It returns -1 if chars is empty or if +// there is no code point in common. +func LastIndexAny(s []byte, chars string) int { + if len(chars) > 0 { + for i := len(s); i > 0; { + rune, size := utf8.DecodeLastRune(s[0:i]) + i -= size + for _, m := range chars { + if rune == m { + return i + } + } + } + } + return -1 +} + // Generic split: splits after each instance of sep, // including sepSave bytes of sep in the subarrays. func genSplit(s, sep []byte, sepSave, n int) [][]byte { diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go index f3ca371f83e..28e70865291 100644 --- a/src/pkg/bytes/bytes_test.go +++ b/src/pkg/bytes/bytes_test.go @@ -128,6 +128,20 @@ var indexAnyTests = []BinOpTest{ {dots + dots + dots, " ", -1}, } +var lastIndexAnyTests = []BinOpTest{ + {"", "", -1}, + {"", "a", -1}, + {"", "abc", -1}, + {"a", "", -1}, + {"a", "a", 0}, + {"aaa", "a", 2}, + {"abc", "xyz", -1}, + {"abc", "ab", 1}, + {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")}, + {"a.RegExp*", ".(|)*+?^$[]", 8}, + {dots + dots + dots, " ", -1}, +} + var indexRuneTests = []BinOpTest{ {"", "a", -1}, {"", "☺", -1}, @@ -150,18 +164,23 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes } } -func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } -func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } -func TestIndexAny(t *testing.T) { - for _, test := range indexAnyTests { +func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) { + for _, test := range testCases { a := []byte(test.a) - actual := IndexAny(a, test.b) + actual := f(a, test.b) if actual != test.i { - t.Errorf("IndexAny(%q,%q) = %v; want %v", a, test.b, actual, test.i) + t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, test.b, actual, test.i) } } } +func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } +func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } +func TestIndexAny(t *testing.T) { runIndexAnyTests(t, IndexAny, "IndexAny", indexAnyTests) } +func TestLastIndexAny(t *testing.T) { + runIndexAnyTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) +} + func TestIndexByte(t *testing.T) { for _, tt := range indexTests { if len(tt.b) != 1 { diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index f08b855999e..8bf86dadd0a 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -142,6 +142,24 @@ func IndexAny(s, chars string) int { return -1 } +// LastIndexAny returns the index of the last instance of any Unicode code +// point from chars in s, or -1 if no Unicode code point from chars is +// present in s. +func LastIndexAny(s, chars string) int { + if len(chars) > 0 { + for i := len(s); i > 0; { + rune, size := utf8.DecodeLastRuneInString(s[0:i]) + i -= size + for _, m := range chars { + if rune == m { + return i + } + } + } + } + return -1 +} + // Generic split: splits after each instance of sep, // including sepSave bytes of sep in the subarrays. func genSplit(s, sep string, sepSave, n int) []string { diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 657c8e89064..734fdd33daa 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -86,6 +86,19 @@ var indexAnyTests = []IndexTest{ {"aRegExp*", ".(|)*+?^$[]", 7}, {dots + dots + dots, " ", -1}, } +var lastIndexAnyTests = []IndexTest{ + {"", "", -1}, + {"", "a", -1}, + {"", "abc", -1}, + {"a", "", -1}, + {"a", "a", 0}, + {"aaa", "a", 2}, + {"abc", "xyz", -1}, + {"abc", "ab", 1}, + {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")}, + {"a.RegExp*", ".(|)*+?^$[]", 8}, + {dots + dots + dots, " ", -1}, +} // Execute f on each test case. funcName should be the name of f; it's used // in failure reports. @@ -98,9 +111,10 @@ func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, tes } } -func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } -func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } -func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } +func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) } +func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) } +func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) } +func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests) } type ExplodeTest struct { s string