Home | History | Annotate | Download | only in runtime
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package runtime
      6 
      7 import (
      8 	"runtime/internal/sys"
      9 	"unsafe"
     10 )
     11 
     12 const (
     13 	_EACCES = 13
     14 	_EINVAL = 22
     15 )
     16 
     17 // NOTE: vec must be just 1 byte long here.
     18 // Mincore returns ENOMEM if any of the pages are unmapped,
     19 // but we want to know that all of the pages are unmapped.
     20 // To make these the same, we can only ask about one page
     21 // at a time. See golang.org/issue/7476.
     22 var addrspace_vec [1]byte
     23 
     24 func addrspace_free(v unsafe.Pointer, n uintptr) bool {
     25 	for off := uintptr(0); off < n; off += physPageSize {
     26 		// Use a length of 1 byte, which the kernel will round
     27 		// up to one physical page regardless of the true
     28 		// physical page size.
     29 		errval := mincore(unsafe.Pointer(uintptr(v)+off), 1, &addrspace_vec[0])
     30 		if errval == -_EINVAL {
     31 			// Address is not a multiple of the physical
     32 			// page size. Shouldn't happen, but just ignore it.
     33 			continue
     34 		}
     35 		// ENOMEM means unmapped, which is what we want.
     36 		// Anything else we assume means the pages are mapped.
     37 		if errval != -_ENOMEM {
     38 			return false
     39 		}
     40 	}
     41 	return true
     42 }
     43 
     44 func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) (unsafe.Pointer, int) {
     45 	p, err := mmap(v, n, prot, flags, fd, offset)
     46 	// On some systems, mmap ignores v without
     47 	// MAP_FIXED, so retry if the address space is free.
     48 	if p != v && addrspace_free(v, n) {
     49 		if err == 0 {
     50 			munmap(p, n)
     51 		}
     52 		p, err = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
     53 	}
     54 	return p, err
     55 }
     56 
     57 // Don't split the stack as this method may be invoked without a valid G, which
     58 // prevents us from allocating more stack.
     59 //go:nosplit
     60 func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
     61 	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
     62 	if err != 0 {
     63 		if err == _EACCES {
     64 			print("runtime: mmap: access denied\n")
     65 			exit(2)
     66 		}
     67 		if err == _EAGAIN {
     68 			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
     69 			exit(2)
     70 		}
     71 		return nil
     72 	}
     73 	mSysStatInc(sysStat, n)
     74 	return p
     75 }
     76 
     77 func sysUnused(v unsafe.Pointer, n uintptr) {
     78 	// By default, Linux's "transparent huge page" support will
     79 	// merge pages into a huge page if there's even a single
     80 	// present regular page, undoing the effects of the DONTNEED
     81 	// below. On amd64, that means khugepaged can turn a single
     82 	// 4KB page to 2MB, bloating the process's RSS by as much as
     83 	// 512X. (See issue #8832 and Linux kernel bug
     84 	// https://bugzilla.kernel.org/show_bug.cgi?id=93111)
     85 	//
     86 	// To work around this, we explicitly disable transparent huge
     87 	// pages when we release pages of the heap. However, we have
     88 	// to do this carefully because changing this flag tends to
     89 	// split the VMA (memory mapping) containing v in to three
     90 	// VMAs in order to track the different values of the
     91 	// MADV_NOHUGEPAGE flag in the different regions. There's a
     92 	// default limit of 65530 VMAs per address space (sysctl
     93 	// vm.max_map_count), so we must be careful not to create too
     94 	// many VMAs (see issue #12233).
     95 	//
     96 	// Since huge pages are huge, there's little use in adjusting
     97 	// the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
     98 	// exploding the number of VMAs by only adjusting the
     99 	// MADV_NOHUGEPAGE flag on a large granularity. This still
    100 	// gets most of the benefit of huge pages while keeping the
    101 	// number of VMAs under control. With hugePageSize = 2MB, even
    102 	// a pessimal heap can reach 128GB before running out of VMAs.
    103 	if sys.HugePageSize != 0 {
    104 		var s uintptr = sys.HugePageSize // division by constant 0 is a compile-time error :(
    105 
    106 		// If it's a large allocation, we want to leave huge
    107 		// pages enabled. Hence, we only adjust the huge page
    108 		// flag on the huge pages containing v and v+n-1, and
    109 		// only if those aren't aligned.
    110 		var head, tail uintptr
    111 		if uintptr(v)%s != 0 {
    112 			// Compute huge page containing v.
    113 			head = uintptr(v) &^ (s - 1)
    114 		}
    115 		if (uintptr(v)+n)%s != 0 {
    116 			// Compute huge page containing v+n-1.
    117 			tail = (uintptr(v) + n - 1) &^ (s - 1)
    118 		}
    119 
    120 		// Note that madvise will return EINVAL if the flag is
    121 		// already set, which is quite likely. We ignore
    122 		// errors.
    123 		if head != 0 && head+sys.HugePageSize == tail {
    124 			// head and tail are different but adjacent,
    125 			// so do this in one call.
    126 			madvise(unsafe.Pointer(head), 2*sys.HugePageSize, _MADV_NOHUGEPAGE)
    127 		} else {
    128 			// Advise the huge pages containing v and v+n-1.
    129 			if head != 0 {
    130 				madvise(unsafe.Pointer(head), sys.HugePageSize, _MADV_NOHUGEPAGE)
    131 			}
    132 			if tail != 0 && tail != head {
    133 				madvise(unsafe.Pointer(tail), sys.HugePageSize, _MADV_NOHUGEPAGE)
    134 			}
    135 		}
    136 	}
    137 
    138 	if uintptr(v)&(physPageSize-1) != 0 || n&(physPageSize-1) != 0 {
    139 		// madvise will round this to any physical page
    140 		// *covered* by this range, so an unaligned madvise
    141 		// will release more memory than intended.
    142 		throw("unaligned sysUnused")
    143 	}
    144 
    145 	madvise(v, n, _MADV_DONTNEED)
    146 }
    147 
    148 func sysUsed(v unsafe.Pointer, n uintptr) {
    149 	if sys.HugePageSize != 0 {
    150 		// Partially undo the NOHUGEPAGE marks from sysUnused
    151 		// for whole huge pages between v and v+n. This may
    152 		// leave huge pages off at the end points v and v+n
    153 		// even though allocations may cover these entire huge
    154 		// pages. We could detect this and undo NOHUGEPAGE on
    155 		// the end points as well, but it's probably not worth
    156 		// the cost because when neighboring allocations are
    157 		// freed sysUnused will just set NOHUGEPAGE again.
    158 		var s uintptr = sys.HugePageSize
    159 
    160 		// Round v up to a huge page boundary.
    161 		beg := (uintptr(v) + (s - 1)) &^ (s - 1)
    162 		// Round v+n down to a huge page boundary.
    163 		end := (uintptr(v) + n) &^ (s - 1)
    164 
    165 		if beg < end {
    166 			madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
    167 		}
    168 	}
    169 }
    170 
    171 // Don't split the stack as this function may be invoked without a valid G,
    172 // which prevents us from allocating more stack.
    173 //go:nosplit
    174 func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
    175 	mSysStatDec(sysStat, n)
    176 	munmap(v, n)
    177 }
    178 
    179 func sysFault(v unsafe.Pointer, n uintptr) {
    180 	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
    181 }
    182 
    183 func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
    184 	// On 64-bit, people with ulimit -v set complain if we reserve too
    185 	// much address space. Instead, assume that the reservation is okay
    186 	// if we can reserve at least 64K and check the assumption in SysMap.
    187 	// Only user-mode Linux (UML) rejects these requests.
    188 	if sys.PtrSize == 8 && uint64(n) > 1<<32 {
    189 		p, err := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    190 		if p != v || err != 0 {
    191 			if err == 0 {
    192 				munmap(p, 64<<10)
    193 			}
    194 			return nil
    195 		}
    196 		munmap(p, 64<<10)
    197 		*reserved = false
    198 		return v
    199 	}
    200 
    201 	p, err := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    202 	if err != 0 {
    203 		return nil
    204 	}
    205 	*reserved = true
    206 	return p
    207 }
    208 
    209 func sysMap(v unsafe.Pointer, n uintptr, reserved bool, sysStat *uint64) {
    210 	mSysStatInc(sysStat, n)
    211 
    212 	// On 64-bit, we don't actually have v reserved, so tread carefully.
    213 	if !reserved {
    214 		p, err := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
    215 		if err == _ENOMEM {
    216 			throw("runtime: out of memory")
    217 		}
    218 		if p != v || err != 0 {
    219 			print("runtime: address space conflict: map(", v, ") = ", p, " (err ", err, ")\n")
    220 			throw("runtime: address space conflict")
    221 		}
    222 		return
    223 	}
    224 
    225 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
    226 	if err == _ENOMEM {
    227 		throw("runtime: out of memory")
    228 	}
    229 	if p != v || err != 0 {
    230 		throw("runtime: cannot map pages in arena address space")
    231 	}
    232 }
    233