1
0
mirror of https://github.com/golang/go synced 2024-10-02 16:28:34 -06:00

path/filepath: avoid allocation in Clean of cleaned path

Alternative to https://golang.org/cl/6330044.

Fixes #3681.

R=golang-dev, r, hanwen, iant
CC=golang-dev
https://golang.org/cl/6335056
This commit is contained in:
Russ Cox 2012-06-27 16:52:36 -04:00
parent 5a5e698c8f
commit 9525372e31
4 changed files with 149 additions and 49 deletions

View File

@ -13,6 +13,43 @@ import (
"strings"
)
// A lazybuf is a lazily constructed path buffer.
// It supports append, reading previously appended bytes,
// and retrieving the final string. It does not allocate a buffer
// to hold the output until that output diverges from s.
type lazybuf struct {
s string
buf []byte
w int
}
func (b *lazybuf) index(i int) byte {
if b.buf != nil {
return b.buf[i]
}
return b.s[i]
}
func (b *lazybuf) append(c byte) {
if b.buf == nil {
if b.w < len(b.s) && b.s[b.w] == c {
b.w++
return
}
b.buf = make([]byte, len(b.s))
copy(b.buf, b.s[:b.w])
}
b.buf[b.w] = c
b.w++
}
func (b *lazybuf) string() string {
if b.buf == nil {
return b.s[:b.w]
}
return string(b.buf[:b.w])
}
const (
Separator = os.PathSeparator
ListSeparator = os.PathListSeparator
@ -57,11 +94,11 @@ func Clean(path string) string {
// dotdot is index in buf where .. must stop, either because
// it is the leading slash or it is a leading ../../.. prefix.
n := len(path)
buf := []byte(path)
r, w, dotdot := 0, 0, 0
out := lazybuf{s: path}
r, dotdot := 0, 0
if rooted {
buf[0] = Separator
r, w, dotdot = 1, 1, 1
out.append(Separator)
r, dotdot = 1, 1
}
for r < n {
@ -76,46 +113,40 @@ func Clean(path string) string {
// .. element: remove to last separator
r += 2
switch {
case w > dotdot:
case out.w > dotdot:
// can backtrack
w--
for w > dotdot && !os.IsPathSeparator(buf[w]) {
w--
out.w--
for out.w > dotdot && !os.IsPathSeparator(out.index(out.w)) {
out.w--
}
case !rooted:
// cannot backtrack, but not rooted, so append .. element.
if w > 0 {
buf[w] = Separator
w++
if out.w > 0 {
out.append(Separator)
}
buf[w] = '.'
w++
buf[w] = '.'
w++
dotdot = w
out.append('.')
out.append('.')
dotdot = out.w
}
default:
// real path element.
// add slash if needed
if rooted && w != 1 || !rooted && w != 0 {
buf[w] = Separator
w++
if rooted && out.w != 1 || !rooted && out.w != 0 {
out.append(Separator)
}
// copy element
for ; r < n && !os.IsPathSeparator(path[r]); r++ {
buf[w] = path[r]
w++
out.append(path[r])
}
}
}
// Turn empty string into "."
if w == 0 {
buf[w] = '.'
w++
if out.w == 0 {
out.append('.')
}
return FromSlash(vol + string(buf[0:w]))
return FromSlash(vol + out.string())
}
// ToSlash returns the result of replacing each separator character

View File

@ -99,6 +99,24 @@ func TestClean(t *testing.T) {
if s := filepath.Clean(test.path); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result)
}
if s := filepath.Clean(test.result); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.result, s, test.result)
}
}
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
allocs := -ms.Mallocs
const rounds = 100
for i := 0; i < rounds; i++ {
for _, test := range tests {
filepath.Clean(test.result)
}
}
runtime.ReadMemStats(&ms)
allocs += ms.Mallocs
if allocs >= rounds {
t.Errorf("Clean cleaned paths: %d allocations per test round, want zero", allocs/rounds)
}
}

View File

@ -10,6 +10,43 @@ import (
"strings"
)
// A lazybuf is a lazily constructed path buffer.
// It supports append, reading previously appended bytes,
// and retrieving the final string. It does not allocate a buffer
// to hold the output until that output diverges from s.
type lazybuf struct {
s string
buf []byte
w int
}
func (b *lazybuf) index(i int) byte {
if b.buf != nil {
return b.buf[i]
}
return b.s[i]
}
func (b *lazybuf) append(c byte) {
if b.buf == nil {
if b.w < len(b.s) && b.s[b.w] == c {
b.w++
return
}
b.buf = make([]byte, len(b.s))
copy(b.buf, b.s[:b.w])
}
b.buf[b.w] = c
b.w++
}
func (b *lazybuf) string() string {
if b.buf == nil {
return b.s[:b.w]
}
return string(b.buf[:b.w])
}
// Clean returns the shortest path name equivalent to path
// by purely lexical processing. It applies the following rules
// iteratively until no further processing can be done:
@ -42,10 +79,11 @@ func Clean(path string) string {
// writing to buf; w is index of next byte to write.
// dotdot is index in buf where .. must stop, either because
// it is the leading slash or it is a leading ../../.. prefix.
buf := []byte(path)
r, w, dotdot := 0, 0, 0
out := lazybuf{s: path}
r, dotdot := 0, 0
if rooted {
r, w, dotdot = 1, 1, 1
out.append('/')
r, dotdot = 1, 1
}
for r < n {
@ -60,46 +98,40 @@ func Clean(path string) string {
// .. element: remove to last /
r += 2
switch {
case w > dotdot:
case out.w > dotdot:
// can backtrack
w--
for w > dotdot && buf[w] != '/' {
w--
out.w--
for out.w > dotdot && out.index(out.w) != '/' {
out.w--
}
case !rooted:
// cannot backtrack, but not rooted, so append .. element.
if w > 0 {
buf[w] = '/'
w++
if out.w > 0 {
out.append('/')
}
buf[w] = '.'
w++
buf[w] = '.'
w++
dotdot = w
out.append('.')
out.append('.')
dotdot = out.w
}
default:
// real path element.
// add slash if needed
if rooted && w != 1 || !rooted && w != 0 {
buf[w] = '/'
w++
if rooted && out.w != 1 || !rooted && out.w != 0 {
out.append('/')
}
// copy element
for ; r < n && path[r] != '/'; r++ {
buf[w] = path[r]
w++
out.append(path[r])
}
}
}
// Turn empty string into "."
if w == 0 {
buf[w] = '.'
w++
if out.w == 0 {
return "."
}
return string(buf[0:w])
return out.string()
}
// Split splits path immediately following the final slash.

View File

@ -5,6 +5,7 @@
package path
import (
"runtime"
"testing"
)
@ -67,6 +68,24 @@ func TestClean(t *testing.T) {
if s := Clean(test.path); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result)
}
if s := Clean(test.result); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.result, s, test.result)
}
}
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
allocs := -ms.Mallocs
const rounds = 100
for i := 0; i < rounds; i++ {
for _, test := range cleantests {
Clean(test.result)
}
}
runtime.ReadMemStats(&ms)
allocs += ms.Mallocs
if allocs >= rounds {
t.Errorf("Clean cleaned paths: %d allocations per test round, want zero", allocs/rounds)
}
}