mirror of
https://github.com/golang/go
synced 2024-11-12 10:00:25 -07:00
runtime: use MADV_FREE on Linux if available
On Linux, sysUnused currently uses madvise(MADV_DONTNEED) to signal the kernel that a range of allocated memory contains unneeded data. After a successful call, the range (but not the data it contained before the call to madvise) is still available but the first access to that range will unconditionally incur a page fault (needed to 0-fill the range). A faster alternative is MADV_FREE, available since Linux 4.5. The mechanism is very similar, but the page fault will only be incurred if the kernel, between the call to madvise and the first access, decides to reuse that memory for something else. In sysUnused, test whether MADV_FREE is supported and fall back to MADV_DONTNEED in case it isn't. This requires making the return value of the madvise syscall available to the caller, so change runtime.madvise to return it. Fixes #23687 Change-Id: I962c3429000dd9f4a00846461ad128b71201bb04 Reviewed-on: https://go-review.googlesource.com/135395 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
a0f5d5f883
commit
77f9b2728e
@ -58,7 +58,10 @@ const (
|
||||
MAP_PRIVATE = C.MAP_PRIVATE
|
||||
MAP_FIXED = C.MAP_FIXED
|
||||
|
||||
MADV_DONTNEED = C.MADV_DONTNEED
|
||||
MADV_DONTNEED = C.MADV_DONTNEED
|
||||
MADV_FREE = C.MADV_FREE
|
||||
MADV_HUGEPAGE = C.MADV_HUGEPAGE
|
||||
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
|
||||
|
||||
SA_RESTART = C.SA_RESTART
|
||||
SA_ONSTACK = C.SA_ONSTACK
|
||||
|
@ -47,7 +47,10 @@ const (
|
||||
MAP_PRIVATE = C.MAP_PRIVATE
|
||||
MAP_FIXED = C.MAP_FIXED
|
||||
|
||||
MADV_DONTNEED = C.MADV_DONTNEED
|
||||
MADV_DONTNEED = C.MADV_DONTNEED
|
||||
MADV_FREE = C.MADV_FREE
|
||||
MADV_HUGEPAGE = C.MADV_HUGEPAGE
|
||||
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
|
||||
|
||||
SA_RESTART = C.SA_RESTART
|
||||
SA_ONSTACK = C.SA_ONSTACK
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -16,6 +16,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -22,6 +22,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -18,6 +18,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -19,6 +19,7 @@ const (
|
||||
_MAP_FIXED = 0x10
|
||||
|
||||
_MADV_DONTNEED = 0x4
|
||||
_MADV_FREE = 0x8
|
||||
_MADV_HUGEPAGE = 0xe
|
||||
_MADV_NOHUGEPAGE = 0xf
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"runtime/internal/atomic"
|
||||
"runtime/internal/sys"
|
||||
"unsafe"
|
||||
)
|
||||
@ -34,10 +35,12 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
|
||||
return p
|
||||
}
|
||||
|
||||
var adviseUnused = uint32(_MADV_FREE)
|
||||
|
||||
func sysUnused(v unsafe.Pointer, n uintptr) {
|
||||
// By default, Linux's "transparent huge page" support will
|
||||
// merge pages into a huge page if there's even a single
|
||||
// present regular page, undoing the effects of the DONTNEED
|
||||
// present regular page, undoing the effects of madvise(adviseUnused)
|
||||
// below. On amd64, that means khugepaged can turn a single
|
||||
// 4KB page to 2MB, bloating the process's RSS by as much as
|
||||
// 512X. (See issue #8832 and Linux kernel bug
|
||||
@ -102,7 +105,13 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
|
||||
throw("unaligned sysUnused")
|
||||
}
|
||||
|
||||
madvise(v, n, _MADV_DONTNEED)
|
||||
advise := atomic.Load(&adviseUnused)
|
||||
if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
|
||||
// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
|
||||
// not supported.
|
||||
atomic.Store(&adviseUnused, _MADV_DONTNEED)
|
||||
madvise(v, n, _MADV_DONTNEED)
|
||||
}
|
||||
}
|
||||
|
||||
func sysUsed(v unsafe.Pointer, n uintptr) {
|
||||
|
@ -25,7 +25,8 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
|
||||
//go:noescape
|
||||
func open(name *byte, mode, perm int32) int32
|
||||
|
||||
func madvise(addr unsafe.Pointer, n uintptr, flags int32)
|
||||
// return value is only set on linux to be used in osinit()
|
||||
func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
|
||||
|
||||
// exitThread terminates the current thread, writing *wait = 0 when
|
||||
// the stack is safe to reclaim.
|
||||
|
@ -260,9 +260,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL flags+16(FP), DX
|
||||
MOVQ $75, AX // madvise
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
JCC 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
|
||||
MOVQ new+0(FP), DI
|
||||
MOVQ old+8(FP), SI
|
||||
|
@ -163,7 +163,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$-4
|
||||
MOVL $75, AX // madvise
|
||||
INT $0x80
|
||||
// ignore failure - maybe pages are locked
|
||||
JAE 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·setitimer(SB), NOSPLIT, $-4
|
||||
|
@ -337,9 +337,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL flags+16(FP), DX
|
||||
MOVQ $75, AX // madvise
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
JCC 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
|
||||
MOVQ new+0(FP), DI
|
||||
MOVQ old+8(FP), SI
|
||||
|
@ -264,14 +264,15 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
|
||||
RET
|
||||
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVW addr+0(FP), R0 // arg 1 addr
|
||||
MOVW n+4(FP), R1 // arg 2 len
|
||||
MOVW flags+8(FP), R2 // arg 3 flags
|
||||
MOVW $SYS_madvise, R7
|
||||
SWI $0
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW addr+0(FP), R0 // arg 1 addr
|
||||
MOVW n+4(FP), R1 // arg 2 len
|
||||
MOVW flags+8(FP), R2 // arg 3 flags
|
||||
MOVW $SYS_madvise, R7
|
||||
SWI $0
|
||||
MOVW.CS $-1, R0
|
||||
MOVW R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW new+0(FP), R0
|
||||
MOVW old+4(FP), R1
|
||||
|
@ -427,7 +427,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL n+4(FP), CX
|
||||
MOVL flags+8(FP), DX
|
||||
INVOKE_SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVL AX, ret+12(FP)
|
||||
RET
|
||||
|
||||
// int32 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -519,7 +519,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL flags+16(FP), DX
|
||||
MOVQ $SYS_madvise, AX
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVL AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
// int64 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -195,7 +195,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVW flags+8(FP), R2
|
||||
MOVW $SYS_madvise, R7
|
||||
SWI $0
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·setitimer(SB),NOSPLIT,$0
|
||||
|
@ -401,7 +401,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW flags+16(FP), R2
|
||||
MOVD $SYS_madvise, R8
|
||||
SVC
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R0, ret+24(FP)
|
||||
RET
|
||||
|
||||
// int64 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -291,7 +291,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW flags+16(FP), R6
|
||||
MOVV $SYS_madvise, R2
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R2, ret+24(FP)
|
||||
RET
|
||||
|
||||
// int64 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -302,13 +302,13 @@ TEXT runtime·munmap(SB),NOSPLIT,$0-8
|
||||
UNDEF // crash
|
||||
RET
|
||||
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$0-12
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$0-16
|
||||
MOVW addr+0(FP), R4
|
||||
MOVW n+4(FP), R5
|
||||
MOVW flags+8(FP), R6
|
||||
MOVW $SYS_madvise, R2
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R2, ret+12(FP)
|
||||
RET
|
||||
|
||||
// int32 futex(int32 *uaddr, int32 op, int32 val, struct timespec *timeout, int32 *uaddr2, int32 val2);
|
||||
|
@ -454,7 +454,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVD n+8(FP), R4
|
||||
MOVW flags+16(FP), R5
|
||||
SYSCALL $SYS_madvise
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R3, ret+24(FP)
|
||||
RET
|
||||
|
||||
// int64 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -290,7 +290,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW flags+16(FP), R4
|
||||
MOVW $SYS_madvise, R1
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW R2, ret+24(FP)
|
||||
RET
|
||||
|
||||
// int64 futex(int32 *uaddr, int32 op, int32 val,
|
||||
|
@ -135,7 +135,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$-4
|
||||
MOVL $75, AX // sys_madvise
|
||||
INT $0x80
|
||||
// ignore failure - maybe pages are locked
|
||||
JAE 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·setitimer(SB),NOSPLIT,$-4
|
||||
|
@ -319,7 +319,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL flags+16(FP), DX // arg 3 - behav
|
||||
MOVQ $75, AX // sys_madvise
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
JCC 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
|
||||
|
@ -284,11 +284,12 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
|
||||
RET
|
||||
|
||||
TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVW addr+0(FP), R0 // arg 1 - addr
|
||||
MOVW n+4(FP), R1 // arg 2 - len
|
||||
MOVW flags+8(FP), R2 // arg 3 - behav
|
||||
SWI $0xa0004b // sys_madvise
|
||||
// ignore failure - maybe pages are locked
|
||||
MOVW addr+0(FP), R0 // arg 1 - addr
|
||||
MOVW n+4(FP), R1 // arg 2 - len
|
||||
MOVW flags+8(FP), R2 // arg 3 - behav
|
||||
SWI $0xa0004b // sys_madvise
|
||||
MOVW.CS $-1, R0
|
||||
MOVW R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
|
||||
|
@ -136,7 +136,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$-4
|
||||
MOVL $75, AX // sys_madvise
|
||||
INT $0x80
|
||||
JAE 2(PC)
|
||||
MOVL $0xf1, 0xf1 // crash
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·setitimer(SB),NOSPLIT,$-4
|
||||
|
@ -305,7 +305,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVL flags+16(FP), DX // arg 3 - behav
|
||||
MOVQ $75, AX // sys_madvise
|
||||
SYSCALL
|
||||
// ignore failure - maybe pages are locked
|
||||
JCC 2(PC)
|
||||
MOVL $-1, AX
|
||||
MOVL AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
|
||||
|
@ -143,8 +143,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
|
||||
MOVW flags+8(FP), R2 // arg 2 - flags
|
||||
MOVW $75, R12 // sys_madvise
|
||||
SWI $0
|
||||
MOVW.CS $0, R8 // crash on syscall failure
|
||||
MOVW.CS R8, (R8)
|
||||
MOVW.CS $-1, R0
|
||||
MOVW R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime·setitimer(SB),NOSPLIT,$0
|
||||
|
Loading…
Reference in New Issue
Block a user