1
0
mirror of https://github.com/golang/go synced 2024-11-27 01:01:21 -07:00

regexp/syntax: reject large repetitions created by nesting small ones

Fixes #7609.

LGTM=r
R=r
CC=golang-codereviews
https://golang.org/cl/150270043
This commit is contained in:
Russ Cox 2014-09-30 12:08:09 -04:00
parent ac9218f5f0
commit 9b2b0c8c16
2 changed files with 47 additions and 0 deletions

View File

@ -244,6 +244,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
if sub.Op >= opPseudo {
return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
}
re := p.newRegexp(op)
re.Min = min
re.Max = max
@ -251,9 +252,42 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
}
return after, nil
}
// repeatIsValid reports whether the repetition re is valid.
// Valid means that the combination of the top-level repetition
// and any inner repetitions does not exceed n copies of the
// innermost thing.
// This function rewalks the regexp tree and is called for every repetition,
// so we have to worry about inducing quadratic behavior in the parser.
// We avoid this by only calling repeatIsValid when min or max >= 2.
// In that case the depth of any >= 2 nesting can only get to 9 without
// triggering a parse error, so each subtree can only be rewalked 9 times.
func repeatIsValid(re *Regexp, n int) bool {
if re.Op == OpRepeat {
m := re.Max
if m < 0 {
m = re.Min
}
if m > n {
return false
}
n /= m
}
for _, sub := range re.Sub {
if !repeatIsValid(sub, n) {
return false
}
}
return true
}
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
p.maybeConcat(-1, 0)

View File

@ -200,6 +200,10 @@ var parseTests = []parseTest{
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
{`x{2}y|x{2}[0-9]y`,
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
// Valid repetitions.
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
}
const testFlags = MatchNL | PerlX | UnicodeGroups
@ -262,6 +266,10 @@ func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
if tt.Dump == "" {
// It parsed. That's all we care about.
continue
}
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
@ -470,6 +478,7 @@ var invalidRegexps = []string{
`(?i)[a-Z]`,
`a{100000}`,
`a{100000,}`,
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
}
var onlyPerl = []string{
@ -527,6 +536,10 @@ func TestToStringEquivalentParse(t *testing.T) {
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
continue
}
if tt.Dump == "" {
// It parsed. That's all we care about.
continue
}
d := dump(re)
if d != tt.Dump {
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)