Home | History | Annotate | Download | only in runtime
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Malloc small size classes.
      6 //
      7 // See malloc.go for overview.
      8 //
      9 // The size classes are chosen so that rounding an allocation
     10 // request up to the next size class wastes at most 12.5% (1.125x).
     11 //
     12 // Each size class has its own page count that gets allocated
     13 // and chopped up when new objects of the size class are needed.
     14 // That page count is chosen so that chopping up the run of
     15 // pages into objects of the given size wastes at most 12.5% (1.125x)
     16 // of the memory.  It is not necessary that the cutoff here be
     17 // the same as above.
     18 //
     19 // The two sources of waste multiply, so the worst possible case
     20 // for the above constraints would be that allocations of some
     21 // size might have a 26.6% (1.266x) overhead.
     22 // In practice, only one of the wastes comes into play for a
     23 // given size (sizes < 512 waste mainly on the round-up,
     24 // sizes > 512 waste mainly on the page chopping).
     25 //
     26 // TODO(rsc): Compute max waste for any given size.
     27 
     28 package runtime
     29 
     30 // Size classes.  Computed and initialized by InitSizes.
     31 //
     32 // SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
     33 //	1 <= sizeclass < NumSizeClasses, for n.
     34 //	Size class 0 is reserved to mean "not small".
     35 //
     36 // class_to_size[i] = largest size in class i
     37 // class_to_allocnpages[i] = number of pages to allocate when
     38 //	making new objects in class i
     39 
     40 // The SizeToClass lookup is implemented using two arrays,
     41 // one mapping sizes <= 1024 to their class and one mapping
     42 // sizes >= 1024 and <= MaxSmallSize to their class.
     43 // All objects are 8-aligned, so the first array is indexed by
     44 // the size divided by 8 (rounded up).  Objects >= 1024 bytes
     45 // are 128-aligned, so the second array is indexed by the
     46 // size divided by 128 (rounded up).  The arrays are filled in
     47 // by InitSizes.
     48 
     49 var class_to_size [_NumSizeClasses]int32
     50 var class_to_allocnpages [_NumSizeClasses]int32
     51 var class_to_divmagic [_NumSizeClasses]divMagic
     52 
     53 var size_to_class8 [1024/8 + 1]int8
     54 var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
     55 
     56 func sizeToClass(size int32) int32 {
     57 	if size > _MaxSmallSize {
     58 		throw("SizeToClass - invalid size")
     59 	}
     60 	if size > 1024-8 {
     61 		return int32(size_to_class128[(size-1024+127)>>7])
     62 	}
     63 	return int32(size_to_class8[(size+7)>>3])
     64 }
     65 
     66 func initSizes() {
     67 	// Initialize the runtimeclass_to_size table (and choose class sizes in the process).
     68 	class_to_size[0] = 0
     69 	sizeclass := 1 // 0 means no class
     70 	align := 8
     71 	for size := align; size <= _MaxSmallSize; size += align {
     72 		if size&(size-1) == 0 { // bump alignment once in a while
     73 			if size >= 2048 {
     74 				align = 256
     75 			} else if size >= 128 {
     76 				align = size / 8
     77 			} else if size >= 16 {
     78 				align = 16 // required for x86 SSE instructions, if we want to use them
     79 			}
     80 		}
     81 		if align&(align-1) != 0 {
     82 			throw("InitSizes - bug")
     83 		}
     84 
     85 		// Make the allocnpages big enough that
     86 		// the leftover is less than 1/8 of the total,
     87 		// so wasted space is at most 12.5%.
     88 		allocsize := _PageSize
     89 		for allocsize%size > allocsize/8 {
     90 			allocsize += _PageSize
     91 		}
     92 		npages := allocsize >> _PageShift
     93 
     94 		// If the previous sizeclass chose the same
     95 		// allocation size and fit the same number of
     96 		// objects into the page, we might as well
     97 		// use just this size instead of having two
     98 		// different sizes.
     99 		if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
    100 			class_to_size[sizeclass-1] = int32(size)
    101 			continue
    102 		}
    103 
    104 		class_to_allocnpages[sizeclass] = int32(npages)
    105 		class_to_size[sizeclass] = int32(size)
    106 		sizeclass++
    107 	}
    108 	if sizeclass != _NumSizeClasses {
    109 		print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
    110 		throw("InitSizes - bad NumSizeClasses")
    111 	}
    112 
    113 	// Initialize the size_to_class tables.
    114 	nextsize := 0
    115 	for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
    116 		for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
    117 			size_to_class8[nextsize/8] = int8(sizeclass)
    118 		}
    119 		if nextsize >= 1024 {
    120 			for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
    121 				size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
    122 			}
    123 		}
    124 	}
    125 
    126 	// Double-check SizeToClass.
    127 	if false {
    128 		for n := int32(0); n < _MaxSmallSize; n++ {
    129 			sizeclass := sizeToClass(n)
    130 			if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
    131 				print("size=", n, " sizeclass=", sizeclass, " runtimeclass_to_size=", class_to_size[sizeclass], "\n")
    132 				print("incorrect SizeToClass\n")
    133 				goto dump
    134 			}
    135 			if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
    136 				print("size=", n, " sizeclass=", sizeclass, " runtimeclass_to_size=", class_to_size[sizeclass], "\n")
    137 				print("SizeToClass too big\n")
    138 				goto dump
    139 			}
    140 		}
    141 	}
    142 
    143 	testdefersizes()
    144 
    145 	// Copy out for statistics table.
    146 	for i := 0; i < len(class_to_size); i++ {
    147 		memstats.by_size[i].size = uint32(class_to_size[i])
    148 	}
    149 
    150 	for i := 1; i < len(class_to_size); i++ {
    151 		class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
    152 	}
    153 
    154 	return
    155 
    156 dump:
    157 	if true {
    158 		print("NumSizeClasses=", _NumSizeClasses, "\n")
    159 		print("runtimeclass_to_size:")
    160 		for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
    161 			print(" ", class_to_size[sizeclass], "")
    162 		}
    163 		print("\n\n")
    164 		print("size_to_class8:")
    165 		for i := 0; i < len(size_to_class8); i++ {
    166 			print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
    167 		}
    168 		print("\n")
    169 		print("size_to_class128:")
    170 		for i := 0; i < len(size_to_class128); i++ {
    171 			print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
    172 		}
    173 		print("\n")
    174 	}
    175 	throw("InitSizes failed")
    176 }
    177 
    178 // Returns size of the memory block that mallocgc will allocate if you ask for the size.
    179 func roundupsize(size uintptr) uintptr {
    180 	if size < _MaxSmallSize {
    181 		if size <= 1024-8 {
    182 			return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
    183 		} else {
    184 			return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
    185 		}
    186 	}
    187 	if size+_PageSize < size {
    188 		return size
    189 	}
    190 	return round(size, _PageSize)
    191 }
    192 
    193 // divMagic holds magic constants to implement division
    194 // by a particular constant as a shift, multiply, and shift.
    195 // That is, given
    196 //	m = computeMagic(d)
    197 // then
    198 //	n/d == ((n>>m.shift) * m.mul) >> m.shift2
    199 //
    200 // The magic computation picks m such that
    201 //	d = d*d
    202 //	d= 2^m.shift
    203 //	m.mul = 2^m.shift2 / d
    204 //
    205 // The magic computation here is tailored for malloc block sizes
    206 // and does not handle arbitrary d correctly. Malloc block sizes d are
    207 // always even, so the first shift implements the factors of 2 in d
    208 // and then the mul and second shift implement the odd factor
    209 // that remains. Because the first shift divides n by at least 2 (actually 8)
    210 // before the multiply gets involved, the huge corner cases that
    211 // require additional adjustment are impossible, so the usual
    212 // fixup is not needed.
    213 //
    214 // For more details see Hacker's Delight, Chapter 10, and
    215 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
    216 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
    217 type divMagic struct {
    218 	shift    uint8
    219 	mul      uint32
    220 	shift2   uint8
    221 	baseMask uintptr
    222 }
    223 
    224 func computeDivMagic(d uint32) divMagic {
    225 	var m divMagic
    226 
    227 	// If the size is a power of two, heapBitsForObject can divide even faster by masking.
    228 	// Compute this mask.
    229 	if d&(d-1) == 0 {
    230 		// It is a power of 2 (assuming dinptr != 1)
    231 		m.baseMask = ^(uintptr(d) - 1)
    232 	} else {
    233 		m.baseMask = 0
    234 	}
    235 
    236 	// Compute pre-shift by factoring power of 2 out of d.
    237 	for d&1 == 0 {
    238 		m.shift++
    239 		d >>= 1
    240 	}
    241 
    242 	// Compute largest k such that 2^k / d fits in a 32-bit int.
    243 	// This is always a good enough approximation.
    244 	// We could use smaller k for some divisors but there's no point.
    245 	k := uint8(63)
    246 	d64 := uint64(d)
    247 	for ((1<<k)+d64-1)/d64 >= 1<<32 {
    248 		k--
    249 	}
    250 	m.mul = uint32(((1 << k) + d64 - 1) / d64) //  2^k / d
    251 	m.shift2 = k
    252 
    253 	return m
    254 }
    255