From c5430dc1d8d96cfd0c85a6d760262676d798a6a9 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Mon, 27 May 2024 16:55:02 +0000 Subject: [PATCH] regexp: allow patterns with no alternates to be one-pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether a regex has any 'alt' instructions before rejecting it as one-pass. Previously `^abc` would run the backtrack matcher. I tried to make the comment match what the code does now. Updates #21463 ``` name old time/op new time/op delta Find-8 167ns ± 1% 170ns ± 3% ~ (p=0.500 n=5+5) FindAllNoMatches-8 88.8ns ± 5% 87.3ns ± 0% ~ (p=0.095 n=5+5) FindString-8 166ns ± 3% 164ns ± 0% ~ (p=0.063 n=5+5) FindSubmatch-8 191ns ± 1% 191ns ± 0% ~ (p=0.556 n=4+5) FindStringSubmatch-8 183ns ± 0% 182ns ± 0% -0.43% (p=0.048 n=5+5) Literal-8 50.3ns ± 0% 50.1ns ± 0% -0.40% (p=0.016 n=5+4) NotLiteral-8 914ns ± 0% 927ns ± 7% ~ (p=0.730 n=5+5) MatchClass-8 1.20µs ± 1% 1.22µs ± 6% ~ (p=0.738 n=5+5) MatchClass_InRange-8 1.20µs ± 6% 1.21µs ± 6% ~ (p=0.548 n=5+5) ReplaceAll-8 796ns ± 0% 792ns ± 0% -0.51% (p=0.032 n=5+5) AnchoredLiteralShortNonMatch-8 41.0ns ± 2% 34.2ns ± 2% -16.47% (p=0.008 n=5+5) AnchoredLiteralLongNonMatch-8 53.3ns ± 0% 34.3ns ± 3% -35.74% (p=0.008 n=5+5) AnchoredShortMatch-8 74.0ns ± 2% 75.8ns ± 0% +2.46% (p=0.032 n=5+4) AnchoredLongMatch-8 146ns ± 3% 76ns ± 1% -48.12% (p=0.008 n=5+5) OnePassShortA-8 424ns ± 0% 423ns ± 0% ~ (p=0.222 n=5+4) NotOnePassShortA-8 373ns ± 1% 375ns ± 2% ~ (p=0.690 n=5+5) OnePassShortB-8 315ns ± 2% 308ns ± 0% -2.12% (p=0.008 n=5+5) NotOnePassShortB-8 244ns ± 3% 239ns ± 0% ~ (p=0.476 n=5+5) OnePassLongPrefix-8 61.6ns ± 2% 60.9ns ± 0% -1.13% (p=0.016 n=5+4) OnePassLongNotPrefix-8 236ns ± 3% 230ns ± 0% ~ (p=0.143 n=5+5) ``` Change-Id: I8a94b53bc761cd7ec89923c905ec8baaaa58a5fd GitHub-Last-Rev: e9e0c29b7448c8ab7cb203c1ed58766dc5d91456 GitHub-Pull-Request: golang/go#48748 Reviewed-on: https://go-review.googlesource.com/c/go/+/353711 Reviewed-by: Daniel Martí Reviewed-by: Russ Cox Auto-Submit: Ian Lance Taylor Reviewed-by: Dmitri Shuralyov Auto-Submit: Ian Lance Taylor LUCI-TryBot-Result: Go LUCI Commit-Queue: Ian Lance Taylor Reviewed-by: Brad Fitzpatrick Reviewed-by: Ian Lance Taylor --- src/regexp/onepass.go | 12 ++++++++++-- src/regexp/onepass_test.go | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/regexp/onepass.go b/src/regexp/onepass.go index 53cbd958394..96e360661b9 100644 --- a/src/regexp/onepass.go +++ b/src/regexp/onepass.go @@ -465,12 +465,20 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) { syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText { return nil } - // every instruction leading to InstMatch must be EmptyEndText + hasAlt := false + for _, inst := range prog.Inst { + if inst.Op == syntax.InstAlt || inst.Op == syntax.InstAltMatch { + hasAlt = true + break + } + } + // If we have alternates, every instruction leading to InstMatch must be EmptyEndText. + // Also, any match on empty text must be $. for _, inst := range prog.Inst { opOut := prog.Inst[inst.Out].Op switch inst.Op { default: - if opOut == syntax.InstMatch { + if opOut == syntax.InstMatch && hasAlt { return nil } case syntax.InstAlt, syntax.InstAltMatch: diff --git a/src/regexp/onepass_test.go b/src/regexp/onepass_test.go index 3f44dc7b150..70eba1f577a 100644 --- a/src/regexp/onepass_test.go +++ b/src/regexp/onepass_test.go @@ -142,6 +142,7 @@ var onePassTests = []struct { {`^(?:(a)|(?:a*))$`, false}, {`^(?:(?:(?:.(?:$))?))$`, true}, {`^abcd$`, true}, + {`^abcd`, true}, {`^(?:(?:a{0,})*?)$`, false}, {`^(?:(?:a+)*)$`, true}, {`^(?:(?:a|(?:aa)))$`, true}, @@ -154,6 +155,7 @@ var onePassTests = []struct { {`^(?:(?:aa)|a)$`, true}, {`^[a-c]*`, false}, {`^...$`, true}, + {`^...`, true}, {`^(?:a|(?:aa))$`, true}, {`^a((b))c$`, true}, {`^a.[l-nA-Cg-j]?e$`, true},