From 5930c7de933c4d826926396f715bae63333143a2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 7 Jan 2019 10:18:42 -0800 Subject: [PATCH] syscall: add all ambient capabilities into permitted and inheritable sets According to the prctl man page, each capability from the ambient set must already be present in both the permitted and the inheritable sets of the process. exec_linux_test suggests configuring the capabilities in the parent process. This doesn't look nice, because: * Capabilities are a per-thread attribute, so we need to use LockOSThread. * Need to restore capabilities after creating a process. * Doesn't work with user namespaces, because a process gets capabilities when a namespace is created. Fixes #23152 Change-Id: Iba23e530fc7b9f5182d602fe855f82218f354219 Reviewed-on: https://go-review.googlesource.com/c/go/+/156577 Run-TryBot: Tobias Klauser TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/syscall/exec_linux.go | 51 +++++++++++++++++++++++++++++-- src/syscall/exec_linux_test.go | 56 +++++++++++++++++++++++----------- 2 files changed, 87 insertions(+), 20 deletions(-) diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index 79c0d774222..ec8f296bca9 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -94,6 +94,29 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr return pid, 0 } +const _LINUX_CAPABILITY_VERSION_3 = 0x20080522 + +type capHeader struct { + version uint32 + pid int32 +} + +type capData struct { + effective uint32 + permitted uint32 + inheritable uint32 +} +type caps struct { + hdr capHeader + data [2]capData +} + +// See CAP_TO_INDEX in linux/capability.h: +func capToIndex(cap uintptr) uintptr { return cap >> 5 } + +// See CAP_TO_MASK in linux/capability.h: +func capToMask(cap uintptr) uint32 { return 1 << uint(cap&31) } + // forkAndExecInChild1 implements the body of forkAndExecInChild up to // the parent's post-fork path. This is a separate function so we can // separate the child's and parent's stack frames if we're using @@ -122,6 +145,7 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att err2 Errno nextfd int i int + caps caps ) // Record parent PID so child can test if it has died. @@ -286,11 +310,32 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att } } - for _, c := range sys.AmbientCaps { - _, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0) - if err1 != 0 { + if len(sys.AmbientCaps) != 0 { + // Ambient capabilities were added in the 4.3 kernel, + // so it is safe to always use _LINUX_CAPABILITY_VERSION_3. + caps.hdr.version = _LINUX_CAPABILITY_VERSION_3 + + if _, _, err1 := RawSyscall(SYS_CAPGET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 { goto childerror } + + for _, c := range sys.AmbientCaps { + // Add the c capability to the permitted and inheritable capability mask, + // otherwise we will not be able to add it to the ambient capability mask. + caps.data[capToIndex(c)].permitted |= capToMask(c) + caps.data[capToIndex(c)].inheritable |= capToMask(c) + } + + if _, _, err1 := RawSyscall(SYS_CAPSET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 { + goto childerror + } + + for _, c := range sys.AmbientCaps { + _, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0) + if err1 != 0 { + goto childerror + } + } } // Chdir diff --git a/src/syscall/exec_linux_test.go b/src/syscall/exec_linux_test.go index ac5745bc80b..dc16a9d9fe4 100644 --- a/src/syscall/exec_linux_test.go +++ b/src/syscall/exec_linux_test.go @@ -436,7 +436,7 @@ func TestUnshareMountNameSpaceChroot(t *testing.T) { type capHeader struct { version uint32 - pid int + pid int32 } type capData struct { @@ -446,6 +446,7 @@ type capData struct { } const CAP_SYS_TIME = 25 +const CAP_SYSLOG = 34 type caps struct { hdr capHeader @@ -506,15 +507,28 @@ func TestAmbientCapsHelper(*testing.T) { fmt.Fprintln(os.Stderr, "CAP_SYS_TIME unexpectedly not in the effective capability mask") os.Exit(2) } + if caps.data[1].effective&(1<