1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Malloc small size classes. 6 // 7 // See malloc.go for overview. 8 // 9 // The size classes are chosen so that rounding an allocation 10 // request up to the next size class wastes at most 12.5% (1.125x). 11 // 12 // Each size class has its own page count that gets allocated 13 // and chopped up when new objects of the size class are needed. 14 // That page count is chosen so that chopping up the run of 15 // pages into objects of the given size wastes at most 12.5% (1.125x) 16 // of the memory. It is not necessary that the cutoff here be 17 // the same as above. 18 // 19 // The two sources of waste multiply, so the worst possible case 20 // for the above constraints would be that allocations of some 21 // size might have a 26.6% (1.266x) overhead. 22 // In practice, only one of the wastes comes into play for a 23 // given size (sizes < 512 waste mainly on the round-up, 24 // sizes > 512 waste mainly on the page chopping). 25 // 26 // TODO(rsc): Compute max waste for any given size. 27 28 package runtime 29 30 // Size classes. Computed and initialized by InitSizes. 31 // 32 // SizeToClass(0 <= n <= MaxSmallSize) returns the size class, 33 // 1 <= sizeclass < NumSizeClasses, for n. 34 // Size class 0 is reserved to mean "not small". 35 // 36 // class_to_size[i] = largest size in class i 37 // class_to_allocnpages[i] = number of pages to allocate when 38 // making new objects in class i 39 40 // The SizeToClass lookup is implemented using two arrays, 41 // one mapping sizes <= 1024 to their class and one mapping 42 // sizes >= 1024 and <= MaxSmallSize to their class. 43 // All objects are 8-aligned, so the first array is indexed by 44 // the size divided by 8 (rounded up). Objects >= 1024 bytes 45 // are 128-aligned, so the second array is indexed by the 46 // size divided by 128 (rounded up). The arrays are filled in 47 // by InitSizes. 48 49 var class_to_size [_NumSizeClasses]int32 50 var class_to_allocnpages [_NumSizeClasses]int32 51 var class_to_divmagic [_NumSizeClasses]divMagic 52 53 var size_to_class8 [1024/8 + 1]int8 54 var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8 55 56 func sizeToClass(size int32) int32 { 57 if size > _MaxSmallSize { 58 throw("SizeToClass - invalid size") 59 } 60 if size > 1024-8 { 61 return int32(size_to_class128[(size-1024+127)>>7]) 62 } 63 return int32(size_to_class8[(size+7)>>3]) 64 } 65 66 func initSizes() { 67 // Initialize the runtimeclass_to_size table (and choose class sizes in the process). 68 class_to_size[0] = 0 69 sizeclass := 1 // 0 means no class 70 align := 8 71 for size := align; size <= _MaxSmallSize; size += align { 72 if size&(size-1) == 0 { // bump alignment once in a while 73 if size >= 2048 { 74 align = 256 75 } else if size >= 128 { 76 align = size / 8 77 } else if size >= 16 { 78 align = 16 // required for x86 SSE instructions, if we want to use them 79 } 80 } 81 if align&(align-1) != 0 { 82 throw("InitSizes - bug") 83 } 84 85 // Make the allocnpages big enough that 86 // the leftover is less than 1/8 of the total, 87 // so wasted space is at most 12.5%. 88 allocsize := _PageSize 89 for allocsize%size > allocsize/8 { 90 allocsize += _PageSize 91 } 92 npages := allocsize >> _PageShift 93 94 // If the previous sizeclass chose the same 95 // allocation size and fit the same number of 96 // objects into the page, we might as well 97 // use just this size instead of having two 98 // different sizes. 99 if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) { 100 class_to_size[sizeclass-1] = int32(size) 101 continue 102 } 103 104 class_to_allocnpages[sizeclass] = int32(npages) 105 class_to_size[sizeclass] = int32(size) 106 sizeclass++ 107 } 108 if sizeclass != _NumSizeClasses { 109 print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n") 110 throw("InitSizes - bad NumSizeClasses") 111 } 112 113 // Initialize the size_to_class tables. 114 nextsize := 0 115 for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ { 116 for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 { 117 size_to_class8[nextsize/8] = int8(sizeclass) 118 } 119 if nextsize >= 1024 { 120 for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 { 121 size_to_class128[(nextsize-1024)/128] = int8(sizeclass) 122 } 123 } 124 } 125 126 // Double-check SizeToClass. 127 if false { 128 for n := int32(0); n < _MaxSmallSize; n++ { 129 sizeclass := sizeToClass(n) 130 if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n { 131 print("size=", n, " sizeclass=", sizeclass, " runtimeclass_to_size=", class_to_size[sizeclass], "\n") 132 print("incorrect SizeToClass\n") 133 goto dump 134 } 135 if sizeclass > 1 && class_to_size[sizeclass-1] >= n { 136 print("size=", n, " sizeclass=", sizeclass, " runtimeclass_to_size=", class_to_size[sizeclass], "\n") 137 print("SizeToClass too big\n") 138 goto dump 139 } 140 } 141 } 142 143 testdefersizes() 144 145 // Copy out for statistics table. 146 for i := 0; i < len(class_to_size); i++ { 147 memstats.by_size[i].size = uint32(class_to_size[i]) 148 } 149 150 for i := 1; i < len(class_to_size); i++ { 151 class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i])) 152 } 153 154 return 155 156 dump: 157 if true { 158 print("NumSizeClasses=", _NumSizeClasses, "\n") 159 print("runtimeclass_to_size:") 160 for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ { 161 print(" ", class_to_size[sizeclass], "") 162 } 163 print("\n\n") 164 print("size_to_class8:") 165 for i := 0; i < len(size_to_class8); i++ { 166 print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n") 167 } 168 print("\n") 169 print("size_to_class128:") 170 for i := 0; i < len(size_to_class128); i++ { 171 print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n") 172 } 173 print("\n") 174 } 175 throw("InitSizes failed") 176 } 177 178 // Returns size of the memory block that mallocgc will allocate if you ask for the size. 179 func roundupsize(size uintptr) uintptr { 180 if size < _MaxSmallSize { 181 if size <= 1024-8 { 182 return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) 183 } else { 184 return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) 185 } 186 } 187 if size+_PageSize < size { 188 return size 189 } 190 return round(size, _PageSize) 191 } 192 193 // divMagic holds magic constants to implement division 194 // by a particular constant as a shift, multiply, and shift. 195 // That is, given 196 // m = computeMagic(d) 197 // then 198 // n/d == ((n>>m.shift) * m.mul) >> m.shift2 199 // 200 // The magic computation picks m such that 201 // d = d*d 202 // d= 2^m.shift 203 // m.mul = 2^m.shift2 / d 204 // 205 // The magic computation here is tailored for malloc block sizes 206 // and does not handle arbitrary d correctly. Malloc block sizes d are 207 // always even, so the first shift implements the factors of 2 in d 208 // and then the mul and second shift implement the odd factor 209 // that remains. Because the first shift divides n by at least 2 (actually 8) 210 // before the multiply gets involved, the huge corner cases that 211 // require additional adjustment are impossible, so the usual 212 // fixup is not needed. 213 // 214 // For more details see Hacker's Delight, Chapter 10, and 215 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html 216 // http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html 217 type divMagic struct { 218 shift uint8 219 mul uint32 220 shift2 uint8 221 baseMask uintptr 222 } 223 224 func computeDivMagic(d uint32) divMagic { 225 var m divMagic 226 227 // If the size is a power of two, heapBitsForObject can divide even faster by masking. 228 // Compute this mask. 229 if d&(d-1) == 0 { 230 // It is a power of 2 (assuming dinptr != 1) 231 m.baseMask = ^(uintptr(d) - 1) 232 } else { 233 m.baseMask = 0 234 } 235 236 // Compute pre-shift by factoring power of 2 out of d. 237 for d&1 == 0 { 238 m.shift++ 239 d >>= 1 240 } 241 242 // Compute largest k such that 2^k / d fits in a 32-bit int. 243 // This is always a good enough approximation. 244 // We could use smaller k for some divisors but there's no point. 245 k := uint8(63) 246 d64 := uint64(d) 247 for ((1<<k)+d64-1)/d64 >= 1<<32 { 248 k-- 249 } 250 m.mul = uint32(((1 << k) + d64 - 1) / d64) // 2^k / d 251 m.shift2 = k 252 253 return m 254 } 255