1
0
mirror of https://github.com/golang/go synced 2024-11-12 10:00:25 -07:00

runtime: use MADV_FREE on Linux if available

On Linux, sysUnused currently uses madvise(MADV_DONTNEED) to signal the
kernel that a range of allocated memory contains unneeded data. After a
successful call, the range (but not the data it contained before the
call to madvise) is still available but the first access to that range
will unconditionally incur a page fault (needed to 0-fill the range).

A faster alternative is MADV_FREE, available since Linux 4.5. The
mechanism is very similar, but the page fault will only be incurred if
the kernel, between the call to madvise and the first access, decides to
reuse that memory for something else.

In sysUnused, test whether MADV_FREE is supported and fall back to
MADV_DONTNEED in case it isn't. This requires making the return value of
the madvise syscall available to the caller, so change runtime.madvise
to return it.

Fixes #23687

Change-Id: I962c3429000dd9f4a00846461ad128b71201bb04
Reviewed-on: https://go-review.googlesource.com/135395
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Tobias Klauser 2018-09-14 09:57:06 +02:00 committed by Tobias Klauser
parent a0f5d5f883
commit 77f9b2728e
31 changed files with 77 additions and 37 deletions

View File

@ -58,7 +58,10 @@ const (
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK

View File

@ -47,7 +47,10 @@ const (
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE
SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -16,6 +16,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -22,6 +22,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -19,6 +19,7 @@ const (
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

View File

@ -5,6 +5,7 @@
package runtime
import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@ -34,10 +35,12 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
return p
}
var adviseUnused = uint32(_MADV_FREE)
func sysUnused(v unsafe.Pointer, n uintptr) {
// By default, Linux's "transparent huge page" support will
// merge pages into a huge page if there's even a single
// present regular page, undoing the effects of the DONTNEED
// present regular page, undoing the effects of madvise(adviseUnused)
// below. On amd64, that means khugepaged can turn a single
// 4KB page to 2MB, bloating the process's RSS by as much as
// 512X. (See issue #8832 and Linux kernel bug
@ -102,7 +105,13 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
throw("unaligned sysUnused")
}
madvise(v, n, _MADV_DONTNEED)
advise := atomic.Load(&adviseUnused)
if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
// not supported.
atomic.Store(&adviseUnused, _MADV_DONTNEED)
madvise(v, n, _MADV_DONTNEED)
}
}
func sysUsed(v unsafe.Pointer, n uintptr) {

View File

@ -25,7 +25,8 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
//go:noescape
func open(name *byte, mode, perm int32) int32
func madvise(addr unsafe.Pointer, n uintptr, flags int32)
// return value is only set on linux to be used in osinit()
func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32
// exitThread terminates the current thread, writing *wait = 0 when
// the stack is safe to reclaim.

View File

@ -260,9 +260,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $75, AX // madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI

View File

@ -163,7 +163,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // madvise
INT $0x80
// ignore failure - maybe pages are locked
JAE 2(PC)
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET
TEXT runtime·setitimer(SB), NOSPLIT, $-4

View File

@ -337,9 +337,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $75, AX // madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI

View File

@ -264,14 +264,15 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
RET
TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW addr+0(FP), R0 // arg 1 addr
MOVW n+4(FP), R1 // arg 2 len
MOVW flags+8(FP), R2 // arg 3 flags
MOVW $SYS_madvise, R7
SWI $0
// ignore failure - maybe pages are locked
MOVW addr+0(FP), R0 // arg 1 addr
MOVW n+4(FP), R1 // arg 2 len
MOVW flags+8(FP), R2 // arg 3 flags
MOVW $SYS_madvise, R7
SWI $0
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
MOVW new+0(FP), R0
MOVW old+4(FP), R1

View File

@ -427,7 +427,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL n+4(FP), CX
MOVL flags+8(FP), DX
INVOKE_SYSCALL
// ignore failure - maybe pages are locked
MOVL AX, ret+12(FP)
RET
// int32 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -519,7 +519,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $SYS_madvise, AX
SYSCALL
// ignore failure - maybe pages are locked
MOVL AX, ret+24(FP)
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -195,7 +195,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW flags+8(FP), R2
MOVW $SYS_madvise, R7
SWI $0
// ignore failure - maybe pages are locked
MOVW R0, ret+12(FP)
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0

View File

@ -401,7 +401,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R2
MOVD $SYS_madvise, R8
SVC
// ignore failure - maybe pages are locked
MOVW R0, ret+24(FP)
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -291,7 +291,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R6
MOVV $SYS_madvise, R2
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+24(FP)
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -302,13 +302,13 @@ TEXT runtime·munmap(SB),NOSPLIT,$0-8
UNDEF // crash
RET
TEXT runtime·madvise(SB),NOSPLIT,$0-12
TEXT runtime·madvise(SB),NOSPLIT,$0-16
MOVW addr+0(FP), R4
MOVW n+4(FP), R5
MOVW flags+8(FP), R6
MOVW $SYS_madvise, R2
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+12(FP)
RET
// int32 futex(int32 *uaddr, int32 op, int32 val, struct timespec *timeout, int32 *uaddr2, int32 val2);

View File

@ -454,7 +454,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVD n+8(FP), R4
MOVW flags+16(FP), R5
SYSCALL $SYS_madvise
// ignore failure - maybe pages are locked
MOVW R3, ret+24(FP)
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -290,7 +290,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R4
MOVW $SYS_madvise, R1
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+24(FP)
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,

View File

@ -135,7 +135,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // sys_madvise
INT $0x80
// ignore failure - maybe pages are locked
JAE 2(PC)
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET
TEXT runtime·setitimer(SB),NOSPLIT,$-4

View File

@ -319,7 +319,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX // arg 3 - behav
MOVQ $75, AX // sys_madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8

View File

@ -284,11 +284,12 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
RET
TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW addr+0(FP), R0 // arg 1 - addr
MOVW n+4(FP), R1 // arg 2 - len
MOVW flags+8(FP), R2 // arg 3 - behav
SWI $0xa0004b // sys_madvise
// ignore failure - maybe pages are locked
MOVW addr+0(FP), R0 // arg 1 - addr
MOVW n+4(FP), R1 // arg 2 - len
MOVW flags+8(FP), R2 // arg 3 - behav
SWI $0xa0004b // sys_madvise
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0

View File

@ -136,7 +136,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // sys_madvise
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET
TEXT runtime·setitimer(SB),NOSPLIT,$-4

View File

@ -305,7 +305,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX // arg 3 - behav
MOVQ $75, AX // sys_madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8

View File

@ -143,8 +143,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW flags+8(FP), R2 // arg 2 - flags
MOVW $75, R12 // sys_madvise
SWI $0
MOVW.CS $0, R8 // crash on syscall failure
MOVW.CS R8, (R8)
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0