From 1951857ec07c1d491e1836770a647d3902934a67 Mon Sep 17 00:00:00 2001 From: qmuntal Date: Tue, 22 Nov 2022 18:46:35 +0100 Subject: [PATCH] os: use handle based APIs to read directories on windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL updates File.readdir() on windows so it uses GetFileInformationByHandleEx with FILE_ID_BOTH_DIR_INFO instead of Find* APIs. The former is more performant because it allows us to buffer IO calls and reduces the number of system calls, passing from 1 per file to 1 every ~100 files (depending on the size of the file name and the size of the buffer). This change improve performance of File.ReadDir by 20-30%. name old time/op new time/op delta ReadDir-12 562µs ±14% 385µs ± 9% -31.60% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ReadDir-12 29.7kB ± 0% 29.5kB ± 0% -0.88% (p=0.000 n=8+10) name old allocs/op new allocs/op delta ReadDir-12 399 ± 0% 397 ± 0% -0.50% (p=0.000 n=10+10) This change also speeds up calls to os.SameFile when using FileStats returned from File.readdir(), as their file ID can be inferred while reading the directory. Change-Id: Id56a338ee66c39656b564105cac131099218fb5d Reviewed-on: https://go-review.googlesource.com/c/go/+/452995 TryBot-Result: Gopher Robot Reviewed-by: Damien Neil Reviewed-by: Alex Brainman Run-TryBot: Quim Muntal Reviewed-by: Bryan Mills --- .../syscall/windows/syscall_windows.go | 20 +++ .../syscall/windows/zsyscall_windows.go | 73 +++++----- src/os/dir_windows.go | 127 +++++++++++++----- src/os/file_windows.go | 69 ---------- src/os/types_windows.go | 41 ++++-- 5 files changed, 185 insertions(+), 145 deletions(-) diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go index 8ace2a27e79..311d083f45c 100644 --- a/src/internal/syscall/windows/syscall_windows.go +++ b/src/internal/syscall/windows/syscall_windows.go @@ -367,3 +367,23 @@ func LoadGetFinalPathNameByHandle() error { //sys DestroyEnvironmentBlock(block *uint16) (err error) = userenv.DestroyEnvironmentBlock //sys RtlGenRandom(buf []byte) (err error) = advapi32.SystemFunction036 + +type FILE_ID_BOTH_DIR_INFO struct { + NextEntryOffset uint32 + FileIndex uint32 + CreationTime syscall.Filetime + LastAccessTime syscall.Filetime + LastWriteTime syscall.Filetime + ChangeTime syscall.Filetime + EndOfFile uint64 + AllocationSize uint64 + FileAttributes uint32 + FileNameLength uint32 + EaSize uint32 + ShortNameLength uint32 + ShortName [12]uint16 + FileID uint64 + FileName [1]uint16 +} + +//sys GetVolumeInformationByHandle(file syscall.Handle, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) = GetVolumeInformationByHandleW diff --git a/src/internal/syscall/windows/zsyscall_windows.go b/src/internal/syscall/windows/zsyscall_windows.go index afd64e318e5..fd2f255b9b4 100644 --- a/src/internal/syscall/windows/zsyscall_windows.go +++ b/src/internal/syscall/windows/zsyscall_windows.go @@ -45,38 +45,39 @@ var ( moduserenv = syscall.NewLazyDLL(sysdll.Add("userenv.dll")) modws2_32 = syscall.NewLazyDLL(sysdll.Add("ws2_32.dll")) - procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges") - procDuplicateTokenEx = modadvapi32.NewProc("DuplicateTokenEx") - procImpersonateSelf = modadvapi32.NewProc("ImpersonateSelf") - procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW") - procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken") - procRevertToSelf = modadvapi32.NewProc("RevertToSelf") - procSetTokenInformation = modadvapi32.NewProc("SetTokenInformation") - procSystemFunction036 = modadvapi32.NewProc("SystemFunction036") - procGetAdaptersAddresses = modiphlpapi.NewProc("GetAdaptersAddresses") - procGetACP = modkernel32.NewProc("GetACP") - procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") - procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") - procGetCurrentThread = modkernel32.NewProc("GetCurrentThread") - procGetFileInformationByHandleEx = modkernel32.NewProc("GetFileInformationByHandleEx") - procGetFinalPathNameByHandleW = modkernel32.NewProc("GetFinalPathNameByHandleW") - procGetModuleFileNameW = modkernel32.NewProc("GetModuleFileNameW") - procLockFileEx = modkernel32.NewProc("LockFileEx") - procModule32FirstW = modkernel32.NewProc("Module32FirstW") - procModule32NextW = modkernel32.NewProc("Module32NextW") - procMoveFileExW = modkernel32.NewProc("MoveFileExW") - procMultiByteToWideChar = modkernel32.NewProc("MultiByteToWideChar") - procSetFileInformationByHandle = modkernel32.NewProc("SetFileInformationByHandle") - procUnlockFileEx = modkernel32.NewProc("UnlockFileEx") - procVirtualQuery = modkernel32.NewProc("VirtualQuery") - procNetShareAdd = modnetapi32.NewProc("NetShareAdd") - procNetShareDel = modnetapi32.NewProc("NetShareDel") - procNetUserGetLocalGroups = modnetapi32.NewProc("NetUserGetLocalGroups") - procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo") - procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock") - procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock") - procGetProfilesDirectoryW = moduserenv.NewProc("GetProfilesDirectoryW") - procWSASocketW = modws2_32.NewProc("WSASocketW") + procAdjustTokenPrivileges = modadvapi32.NewProc("AdjustTokenPrivileges") + procDuplicateTokenEx = modadvapi32.NewProc("DuplicateTokenEx") + procImpersonateSelf = modadvapi32.NewProc("ImpersonateSelf") + procLookupPrivilegeValueW = modadvapi32.NewProc("LookupPrivilegeValueW") + procOpenThreadToken = modadvapi32.NewProc("OpenThreadToken") + procRevertToSelf = modadvapi32.NewProc("RevertToSelf") + procSetTokenInformation = modadvapi32.NewProc("SetTokenInformation") + procSystemFunction036 = modadvapi32.NewProc("SystemFunction036") + procGetAdaptersAddresses = modiphlpapi.NewProc("GetAdaptersAddresses") + procGetACP = modkernel32.NewProc("GetACP") + procGetComputerNameExW = modkernel32.NewProc("GetComputerNameExW") + procGetConsoleCP = modkernel32.NewProc("GetConsoleCP") + procGetCurrentThread = modkernel32.NewProc("GetCurrentThread") + procGetFileInformationByHandleEx = modkernel32.NewProc("GetFileInformationByHandleEx") + procGetFinalPathNameByHandleW = modkernel32.NewProc("GetFinalPathNameByHandleW") + procGetModuleFileNameW = modkernel32.NewProc("GetModuleFileNameW") + procGetVolumeInformationByHandleW = modkernel32.NewProc("GetVolumeInformationByHandleW") + procLockFileEx = modkernel32.NewProc("LockFileEx") + procModule32FirstW = modkernel32.NewProc("Module32FirstW") + procModule32NextW = modkernel32.NewProc("Module32NextW") + procMoveFileExW = modkernel32.NewProc("MoveFileExW") + procMultiByteToWideChar = modkernel32.NewProc("MultiByteToWideChar") + procSetFileInformationByHandle = modkernel32.NewProc("SetFileInformationByHandle") + procUnlockFileEx = modkernel32.NewProc("UnlockFileEx") + procVirtualQuery = modkernel32.NewProc("VirtualQuery") + procNetShareAdd = modnetapi32.NewProc("NetShareAdd") + procNetShareDel = modnetapi32.NewProc("NetShareDel") + procNetUserGetLocalGroups = modnetapi32.NewProc("NetUserGetLocalGroups") + procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo") + procCreateEnvironmentBlock = moduserenv.NewProc("CreateEnvironmentBlock") + procDestroyEnvironmentBlock = moduserenv.NewProc("DestroyEnvironmentBlock") + procGetProfilesDirectoryW = moduserenv.NewProc("GetProfilesDirectoryW") + procWSASocketW = modws2_32.NewProc("WSASocketW") ) func adjustTokenPrivileges(token syscall.Token, disableAllPrivileges bool, newstate *TOKEN_PRIVILEGES, buflen uint32, prevstate *TOKEN_PRIVILEGES, returnlen *uint32) (ret uint32, err error) { @@ -219,6 +220,14 @@ func GetModuleFileName(module syscall.Handle, fn *uint16, len uint32) (n uint32, return } +func GetVolumeInformationByHandle(file syscall.Handle, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) { + r1, _, e1 := syscall.Syscall9(procGetVolumeInformationByHandleW.Addr(), 8, uintptr(file), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize), 0) + if r1 == 0 { + err = errnoErr(e1) + } + return +} + func LockFileEx(file syscall.Handle, flags uint32, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) { r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(file), uintptr(flags), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped))) if r1 == 0 { diff --git a/src/os/dir_windows.go b/src/os/dir_windows.go index 445e4f7c4ff..ab120546c0a 100644 --- a/src/os/dir_windows.go +++ b/src/os/dir_windows.go @@ -5,59 +5,126 @@ package os import ( + "internal/syscall/windows" "io" "runtime" + "sync" "syscall" + "unicode/utf16" + "unsafe" ) +// Auxiliary information if the File describes a directory +type dirInfo struct { + // buf is a slice pointer so the slice header + // does not escape to the heap when returning + // buf to dirBufPool. + buf *[]byte // buffer for directory I/O + bufp int // location of next record in buf + vol uint32 +} + +const ( + // dirBufSize is the size of the dirInfo buffer. + // The buffer must be big enough to hold at least a single entry. + // The filename alone can be 512 bytes (MAX_PATH*2), and the fixed part of + // the FILE_ID_BOTH_DIR_INFO structure is 105 bytes, so dirBufSize + // should not be set below 1024 bytes (512+105+safety buffer). + // Windows 8.1 and earlier only works with buffer sizes up to 64 kB. + dirBufSize = 64 * 1024 // 64kB +) + +var dirBufPool = sync.Pool{ + New: func() any { + // The buffer must be at least a block long. + buf := make([]byte, dirBufSize) + return &buf + }, +} + +func (d *dirInfo) close() { + if d.buf != nil { + dirBufPool.Put(d.buf) + d.buf = nil + } +} + func (file *File) readdir(n int, mode readdirMode) (names []string, dirents []DirEntry, infos []FileInfo, err error) { // If this file has no dirinfo, create one. - needdata := true + var infoClass uint32 = windows.FileIdBothDirectoryInfo if file.dirinfo == nil { - needdata = false - file.dirinfo, err = openDir(file.name) + // vol is used by os.SameFile. + // It is safe to query it once and reuse the value. + // Hard links are not allowed to reference files in other volumes. + // Junctions and symbolic links can reference files and directories in other volumes, + // but the reparse point should still live in the parent volume. + var vol uint32 + err = windows.GetVolumeInformationByHandle(file.pfd.Sysfd, nil, 0, &vol, nil, nil, nil, 0) + runtime.KeepAlive(file) if err != nil { err = &PathError{Op: "readdir", Path: file.name, Err: err} return } + infoClass = windows.FileIdBothDirectoryRestartInfo + file.dirinfo = new(dirInfo) + file.dirinfo.buf = dirBufPool.Get().(*[]byte) + file.dirinfo.vol = vol } + d := file.dirinfo wantAll := n <= 0 if wantAll { n = -1 } - d := &file.dirinfo.data - for n != 0 && !file.dirinfo.isempty { - if needdata { - e := syscall.FindNextFile(file.dirinfo.h, d) + for n != 0 { + // Refill the buffer if necessary + if d.bufp == 0 { + err = windows.GetFileInformationByHandleEx(file.pfd.Sysfd, infoClass, (*byte)(unsafe.Pointer(&(*d.buf)[0])), uint32(len(*d.buf))) runtime.KeepAlive(file) - if e != nil { - if e == syscall.ERROR_NO_MORE_FILES { + if err != nil { + if err == syscall.ERROR_NO_MORE_FILES { break + } + if s, _ := file.Stat(); s != nil && !s.IsDir() { + err = &PathError{Op: "readdir", Path: file.name, Err: syscall.ENOTDIR} } else { - err = &PathError{Op: "FindNextFile", Path: file.name, Err: e} - return + err = &PathError{Op: "GetFileInformationByHandleEx", Path: file.name, Err: err} + } + return + } + infoClass = windows.FileIdBothDirectoryInfo + } + // Drain the buffer + var islast bool + for n != 0 && !islast { + info := (*windows.FILE_ID_BOTH_DIR_INFO)(unsafe.Pointer(&(*d.buf)[d.bufp])) + d.bufp += int(info.NextEntryOffset) + islast = info.NextEntryOffset == 0 + if islast { + d.bufp = 0 + } + nameslice := unsafe.Slice(&info.FileName[0], info.FileNameLength/2) + name := string(utf16.Decode(nameslice)) + if name == "." || name == ".." { // Useless names + continue + } + if mode == readdirName { + names = append(names, name) + } else { + f := newFileStatFromFileIDBothDirInfo(info) + f.name = name + f.vol = d.vol + // f.path is used by os.SameFile to decide if it needs + // to fetch vol, idxhi and idxlo. But these are already set, + // so set f.path to "" to prevent os.SameFile doing it again. + f.path = "" + if mode == readdirDirEntry { + dirents = append(dirents, dirEntry{f}) + } else { + infos = append(infos, f) } } + n-- } - needdata = true - name := syscall.UTF16ToString(d.FileName[0:]) - if name == "." || name == ".." { // Useless names - continue - } - if mode == readdirName { - names = append(names, name) - } else { - f := newFileStatFromWin32finddata(d) - f.name = name - f.path = file.dirinfo.path - f.appendNameToPath = true - if mode == readdirDirEntry { - dirents = append(dirents, dirEntry{f}) - } else { - infos = append(infos, f) - } - } - n-- } if !wantAll && len(names)+len(dirents)+len(infos) == 0 { return nil, nil, nil, io.EOF diff --git a/src/os/file_windows.go b/src/os/file_windows.go index d94b78f5247..a48feca855a 100644 --- a/src/os/file_windows.go +++ b/src/os/file_windows.go @@ -84,18 +84,6 @@ func NewFile(fd uintptr, name string) *File { return newFile(h, name, "file") } -// Auxiliary information if the File describes a directory -type dirInfo struct { - h syscall.Handle // search handle created with FindFirstFile - data syscall.Win32finddata - path string - isempty bool // set if FindFirstFile returns ERROR_FILE_NOT_FOUND -} - -func (d *dirInfo) close() error { - return syscall.FindClose(d.h) -} - func epipecheck(file *File, e error) { } @@ -103,63 +91,6 @@ func epipecheck(file *File, e error) { // On Unix-like systems, it is "/dev/null"; on Windows, "NUL". const DevNull = "NUL" -func openDir(name string) (d *dirInfo, e error) { - var mask string - - path := fixLongPath(name) - - if len(path) == 2 && path[1] == ':' { // it is a drive letter, like C: - mask = path + `*` - } else if len(path) > 0 { - lc := path[len(path)-1] - if lc == '/' || lc == '\\' { - mask = path + `*` - } else { - mask = path + `\*` - } - } else { - mask = `\*` - } - maskp, e := syscall.UTF16PtrFromString(mask) - if e != nil { - return nil, e - } - d = new(dirInfo) - d.h, e = syscall.FindFirstFile(maskp, &d.data) - if e != nil { - // FindFirstFile returns ERROR_FILE_NOT_FOUND when - // no matching files can be found. Then, if directory - // exists, we should proceed. - // If FindFirstFile failed because name does not point - // to a directory, we should return ENOTDIR. - var fa syscall.Win32FileAttributeData - pathp, e1 := syscall.UTF16PtrFromString(path) - if e1 != nil { - return nil, e - } - e1 = syscall.GetFileAttributesEx(pathp, syscall.GetFileExInfoStandard, (*byte)(unsafe.Pointer(&fa))) - if e1 != nil { - return nil, e - } - if fa.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY == 0 { - return nil, syscall.ENOTDIR - } - if e != syscall.ERROR_FILE_NOT_FOUND { - return nil, e - } - d.isempty = true - } - d.path = path - if !isAbs(d.path) { - d.path, e = syscall.FullPath(d.path) - if e != nil { - d.close() - return nil, e - } - } - return d, nil -} - // openFileNolog is the Windows implementation of OpenFile. func openFileNolog(name string, flag int, perm FileMode) (*File, error) { if name == "" { diff --git a/src/os/types_windows.go b/src/os/types_windows.go index 9a3d5087839..d1623f7b17e 100644 --- a/src/os/types_windows.go +++ b/src/os/types_windows.go @@ -16,7 +16,7 @@ import ( type fileStat struct { name string - // from ByHandleFileInformation, Win32FileAttributeData and Win32finddata + // from ByHandleFileInformation, Win32FileAttributeData, Win32finddata, and GetFileInformationByHandleEx FileAttributes uint32 CreationTime syscall.Filetime LastAccessTime syscall.Filetime @@ -24,7 +24,7 @@ type fileStat struct { FileSizeHigh uint32 FileSizeLow uint32 - // from Win32finddata + // from Win32finddata and GetFileInformationByHandleEx ReparseTag uint32 // what syscall.GetFileType returns @@ -32,11 +32,10 @@ type fileStat struct { // used to implement SameFile sync.Mutex - path string - vol uint32 - idxhi uint32 - idxlo uint32 - appendNameToPath bool + path string + vol uint32 + idxhi uint32 + idxlo uint32 } // newFileStatFromGetFileInformationByHandle calls GetFileInformationByHandle @@ -80,6 +79,26 @@ func newFileStatFromGetFileInformationByHandle(path string, h syscall.Handle) (f }, nil } +// newFileStatFromFileIDBothDirInfo copies all required information +// from windows.FILE_ID_BOTH_DIR_INFO d into the newly created fileStat. +func newFileStatFromFileIDBothDirInfo(d *windows.FILE_ID_BOTH_DIR_INFO) *fileStat { + // The FILE_ID_BOTH_DIR_INFO MSDN documentations isn't completely correct. + // FileAttributes can contain any file attributes that is currently set on the file, + // not just the ones documented. + // EaSize contains the reparse tag if the file is a reparse point. + return &fileStat{ + FileAttributes: d.FileAttributes, + CreationTime: d.CreationTime, + LastAccessTime: d.LastAccessTime, + LastWriteTime: d.LastWriteTime, + FileSizeHigh: uint32(d.EndOfFile >> 32), + FileSizeLow: uint32(d.EndOfFile), + ReparseTag: d.EaSize, + idxhi: uint32(d.FileID >> 32), + idxlo: uint32(d.FileID), + } +} + // newFileStatFromWin32finddata copies all required information // from syscall.Win32finddata d into the newly created fileStat. func newFileStatFromWin32finddata(d *syscall.Win32finddata) *fileStat { @@ -169,13 +188,7 @@ func (fs *fileStat) loadFileId() error { // already done return nil } - var path string - if fs.appendNameToPath { - path = fs.path + `\` + fs.name - } else { - path = fs.path - } - pathp, err := syscall.UTF16PtrFromString(path) + pathp, err := syscall.UTF16PtrFromString(fs.path) if err != nil { return err }