1
0
mirror of https://github.com/golang/go synced 2024-11-26 04:07:59 -07:00

regexp: allow patterns with no alternates to be one-pass

Check whether a regex has any 'alt' instructions before rejecting it as one-pass.
Previously `^abc` would run the backtrack matcher.

I tried to make the comment match what the code does now.

Updates #21463

```
name                            old time/op    new time/op    delta
Find-8                             167ns ± 1%     170ns ± 3%     ~     (p=0.500 n=5+5)
FindAllNoMatches-8                88.8ns ± 5%    87.3ns ± 0%     ~     (p=0.095 n=5+5)
FindString-8                       166ns ± 3%     164ns ± 0%     ~     (p=0.063 n=5+5)
FindSubmatch-8                     191ns ± 1%     191ns ± 0%     ~     (p=0.556 n=4+5)
FindStringSubmatch-8               183ns ± 0%     182ns ± 0%   -0.43%  (p=0.048 n=5+5)
Literal-8                         50.3ns ± 0%    50.1ns ± 0%   -0.40%  (p=0.016 n=5+4)
NotLiteral-8                       914ns ± 0%     927ns ± 7%     ~     (p=0.730 n=5+5)
MatchClass-8                      1.20µs ± 1%    1.22µs ± 6%     ~     (p=0.738 n=5+5)
MatchClass_InRange-8              1.20µs ± 6%    1.21µs ± 6%     ~     (p=0.548 n=5+5)
ReplaceAll-8                       796ns ± 0%     792ns ± 0%   -0.51%  (p=0.032 n=5+5)
AnchoredLiteralShortNonMatch-8    41.0ns ± 2%    34.2ns ± 2%  -16.47%  (p=0.008 n=5+5)
AnchoredLiteralLongNonMatch-8     53.3ns ± 0%    34.3ns ± 3%  -35.74%  (p=0.008 n=5+5)
AnchoredShortMatch-8              74.0ns ± 2%    75.8ns ± 0%   +2.46%  (p=0.032 n=5+4)
AnchoredLongMatch-8                146ns ± 3%      76ns ± 1%  -48.12%  (p=0.008 n=5+5)
OnePassShortA-8                    424ns ± 0%     423ns ± 0%     ~     (p=0.222 n=5+4)
NotOnePassShortA-8                 373ns ± 1%     375ns ± 2%     ~     (p=0.690 n=5+5)
OnePassShortB-8                    315ns ± 2%     308ns ± 0%   -2.12%  (p=0.008 n=5+5)
NotOnePassShortB-8                 244ns ± 3%     239ns ± 0%     ~     (p=0.476 n=5+5)
OnePassLongPrefix-8               61.6ns ± 2%    60.9ns ± 0%   -1.13%  (p=0.016 n=5+4)
OnePassLongNotPrefix-8             236ns ± 3%     230ns ± 0%     ~     (p=0.143 n=5+5)
```

Change-Id: I8a94b53bc761cd7ec89923c905ec8baaaa58a5fd
GitHub-Last-Rev: e9e0c29b74
GitHub-Pull-Request: golang/go#48748
Reviewed-on: https://go-review.googlesource.com/c/go/+/353711
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Russ Cox <rsc@golang.org>
Auto-Submit: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Commit-Queue: Ian Lance Taylor <iant@google.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
This commit is contained in:
Bryan Boreham 2024-05-27 16:55:02 +00:00 committed by Gopher Robot
parent 8659ad972f
commit c5430dc1d8
2 changed files with 12 additions and 2 deletions

View File

@ -465,12 +465,20 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
return nil
}
// every instruction leading to InstMatch must be EmptyEndText
hasAlt := false
for _, inst := range prog.Inst {
if inst.Op == syntax.InstAlt || inst.Op == syntax.InstAltMatch {
hasAlt = true
break
}
}
// If we have alternates, every instruction leading to InstMatch must be EmptyEndText.
// Also, any match on empty text must be $.
for _, inst := range prog.Inst {
opOut := prog.Inst[inst.Out].Op
switch inst.Op {
default:
if opOut == syntax.InstMatch {
if opOut == syntax.InstMatch && hasAlt {
return nil
}
case syntax.InstAlt, syntax.InstAltMatch:

View File

@ -142,6 +142,7 @@ var onePassTests = []struct {
{`^(?:(a)|(?:a*))$`, false},
{`^(?:(?:(?:.(?:$))?))$`, true},
{`^abcd$`, true},
{`^abcd`, true},
{`^(?:(?:a{0,})*?)$`, false},
{`^(?:(?:a+)*)$`, true},
{`^(?:(?:a|(?:aa)))$`, true},
@ -154,6 +155,7 @@ var onePassTests = []struct {
{`^(?:(?:aa)|a)$`, true},
{`^[a-c]*`, false},
{`^...$`, true},
{`^...`, true},
{`^(?:a|(?:aa))$`, true},
{`^a((b))c$`, true},
{`^a.[l-nA-Cg-j]?e$`, true},