1
0
mirror of https://github.com/golang/go synced 2024-09-28 17:14:29 -06:00

runtime: make tidExists more robust

The LockThreadExit tests in the runtime have been observed to fail after
reading /proc/self/task/<tid>/stat and blindly assuming its contents
followed a specific format. The parsing code is also wrong, because
splitting by spaces doesn't work when the comm name contains a space.
It also ignores errors without reporting them, which isn't great.

This change rewrites tidExists to be more robust by using
/proc/self/task/<tid>/status instead. It also modifies tidExists'
signature to report an error to its caller. Its caller then prints that
error.

Ignoring a non-not-exist error with opening this file is the likely but
unconfirmed cause of #65736 (ESRCH). This change also checks for that
error explicitly as an optimistic fix.

Fixes #65736.

Change-Id: Iea560b457d514426da2781b7eb7b8616a91ec23b
Reviewed-on: https://go-review.googlesource.com/c/go/+/567938
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Michael Anthony Knyszek 2024-02-28 23:25:52 +00:00 committed by Gopher Robot
parent 802473cfda
commit aa1b50e179
3 changed files with 43 additions and 9 deletions

View File

@ -90,7 +90,11 @@ func LockOSThreadAlt() {
println("locked thread reused")
os.Exit(1)
}
exists, supported := tidExists(subTID)
exists, supported, err := tidExists(subTID)
if err != nil {
println("error:", err.Error())
return
}
if !supported || !exists {
goto ok
}

View File

@ -8,6 +8,7 @@ import (
"bytes"
"fmt"
"internal/testenv"
"io"
"os"
"syscall"
)
@ -16,14 +17,43 @@ func gettid() int {
return syscall.Gettid()
}
func tidExists(tid int) (exists, supported bool) {
stat, err := os.ReadFile(fmt.Sprintf("/proc/self/task/%d/stat", tid))
if os.IsNotExist(err) {
return false, true
func tidExists(tid int) (exists, supported bool, err error) {
// Open the magic proc status file for reading with the syscall package.
// We want to identify certain valid errors very precisely.
statusFile := fmt.Sprintf("/proc/self/task/%d/status", tid)
fd, err := syscall.Open(statusFile, syscall.O_RDONLY, 0)
if errno, ok := err.(syscall.Errno); ok {
if errno == syscall.ENOENT || errno == syscall.ESRCH {
return false, true, nil
}
}
if err != nil {
return false, false, err
}
status, err := io.ReadAll(os.NewFile(uintptr(fd), statusFile))
if err != nil {
return false, false, err
}
lines := bytes.Split(status, []byte{'\n'})
// Find the State line.
stateLineIdx := -1
for i, line := range lines {
if bytes.HasPrefix(line, []byte("State:")) {
stateLineIdx = i
break
}
}
if stateLineIdx < 0 {
// Malformed status file?
return false, false, fmt.Errorf("unexpected status file format: %s:\n%s", statusFile, status)
}
stateLine := bytes.SplitN(lines[stateLineIdx], []byte{':'}, 2)
if len(stateLine) != 2 {
// Malformed status file?
return false, false, fmt.Errorf("unexpected status file format: %s:\n%s", statusFile, status)
}
// Check if it's a zombie thread.
state := bytes.Fields(stat)[2]
return !(len(state) == 1 && state[0] == 'Z'), true
return !bytes.Contains(stateLine[1], []byte{'Z'}), true, nil
}
func getcwd() (string, error) {

View File

@ -11,8 +11,8 @@ func gettid() int {
return 0
}
func tidExists(tid int) (exists, supported bool) {
return false, false
func tidExists(tid int) (exists, supported bool, err error) {
return false, false, nil
}
func getcwd() (string, error) {