mirror of
https://github.com/golang/go
synced 2024-11-18 06:54:49 -07:00
syscall: use CLONE_VFORK safely
Currently, CLONE_VFORK is used without much regard to the stack. This is dangerous, because anything the child does to the stack is visible to the parent. For example, if the compiler were to reuse named stack slots (which it currently doesn't do), it would be easy for the child running in the same stack frame as the parent to corrupt local variables that the parent then depended on. We're not sure of anything specific going wrong in this code right now, but it is at best a ticking time bomb. CLONE_VFORK can only safely be used if we ensure the child does not execute in any of the active stack frames of the parent. This commit implements this by arranging for the parent to return immediately from the frame the child will operate in, and for the child to never return to the frame the parent will operate in. Fixes #20732. Change-Id: Iad5b4ddc2b994c082bd278bfd52ef53bd38c037f Reviewed-on: https://go-review.googlesource.com/46173 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
5a5ac34133
commit
67e537541c
@ -63,15 +63,46 @@ func runtime_AfterForkInChild()
|
||||
// functions that do not grow the stack.
|
||||
//go:norace
|
||||
func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
|
||||
// Set up and fork. This returns immediately in the parent or
|
||||
// if there's an error.
|
||||
r1, err1, p, locked := forkAndExecInChild1(argv0, argv, envv, chroot, dir, attr, sys, pipe)
|
||||
if locked {
|
||||
runtime_AfterFork()
|
||||
}
|
||||
if err1 != 0 {
|
||||
return 0, err1
|
||||
}
|
||||
|
||||
// parent; return PID
|
||||
pid = int(r1)
|
||||
|
||||
if sys.UidMappings != nil || sys.GidMappings != nil {
|
||||
Close(p[0])
|
||||
err := writeUidGidMappings(pid, sys)
|
||||
var err2 Errno
|
||||
if err != nil {
|
||||
err2 = err.(Errno)
|
||||
}
|
||||
RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
|
||||
Close(p[1])
|
||||
}
|
||||
|
||||
return pid, 0
|
||||
}
|
||||
|
||||
//go:norace
|
||||
func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) {
|
||||
// vfork requires that the child not touch any of the parent's
|
||||
// active stack frames. Hence, the child does all post-fork
|
||||
// processing in this stack frame and never returns, while the
|
||||
// parent returns immediately from this frame and does all
|
||||
// post-fork processing in the outer frame.
|
||||
// Declare all variables at top in case any
|
||||
// declarations require heap allocation (e.g., err1).
|
||||
var (
|
||||
r1 uintptr
|
||||
err1 Errno
|
||||
err2 Errno
|
||||
nextfd int
|
||||
i int
|
||||
p [2]int
|
||||
)
|
||||
|
||||
// Record parent PID so child can test if it has died.
|
||||
@ -94,13 +125,15 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
|
||||
// synchronizing writing of User ID/Group ID mappings.
|
||||
if sys.UidMappings != nil || sys.GidMappings != nil {
|
||||
if err := forkExecPipe(p[:]); err != nil {
|
||||
return 0, err.(Errno)
|
||||
err1 = err.(Errno)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// About to call fork.
|
||||
// No more allocation or calls of non-assembly functions.
|
||||
runtime_BeforeFork()
|
||||
locked = true
|
||||
switch {
|
||||
case runtime.GOARCH == "amd64" && sys.Cloneflags&CLONE_NEWUSER == 0:
|
||||
r1, err1 = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|sys.Cloneflags)
|
||||
@ -109,27 +142,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
|
||||
default:
|
||||
r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0)
|
||||
}
|
||||
if err1 != 0 {
|
||||
runtime_AfterFork()
|
||||
return 0, err1
|
||||
}
|
||||
|
||||
if r1 != 0 {
|
||||
// parent; return PID
|
||||
runtime_AfterFork()
|
||||
pid = int(r1)
|
||||
|
||||
if sys.UidMappings != nil || sys.GidMappings != nil {
|
||||
Close(p[0])
|
||||
err := writeUidGidMappings(pid, sys)
|
||||
if err != nil {
|
||||
err2 = err.(Errno)
|
||||
}
|
||||
RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
|
||||
Close(p[1])
|
||||
}
|
||||
|
||||
return pid, 0
|
||||
if err1 != 0 || r1 != 0 {
|
||||
// If we're in the parent, we must return immediately
|
||||
// so we're not in the same stack frame as the child.
|
||||
// This can at most use the return PC, which the child
|
||||
// will not modify, and the results of
|
||||
// rawVforkSyscall, which must have been written after
|
||||
// the child was replaced.
|
||||
return
|
||||
}
|
||||
|
||||
// Fork succeeded, now in child.
|
||||
|
Loading…
Reference in New Issue
Block a user