1 /* 2 * 3 * Copyright (c) 1996,1997 4 * Silicon Graphics Computer Systems, Inc. 5 * 6 * Copyright (c) 1997 7 * Moscow Center for SPARC Technology 8 * 9 * Copyright (c) 1999 10 * Boris Fomitchev 11 * 12 * This material is provided "as is", with absolutely no warranty expressed 13 * or implied. Any use is at your own risk. 14 * 15 * Permission to use or copy this software for any purpose is hereby granted 16 * without fee, provided the above notices are retained on all copies. 17 * Permission to modify the code and to distribute modified code is granted, 18 * provided the above notices are retained, and a notice that the code was 19 * modified is included with the above copyright notice. 20 * 21 */ 22 23 #include "stlport_prefix.h" 24 25 #include <memory> 26 27 #if defined (__GNUC__) && (defined (__CYGWIN__) || defined (__MINGW32__)) 28 # include <malloc.h> 29 #endif 30 31 #if defined (_STLP_PTHREADS) && !defined (_STLP_NO_THREADS) 32 # include <pthread_alloc> 33 # include <cerrno> 34 #endif 35 36 #include <stl/_threads.h> 37 38 #include "lock_free_slist.h" 39 40 #if defined (__WATCOMC__) 41 # pragma warning 13 9 42 # pragma warning 367 9 43 # pragma warning 368 9 44 #endif 45 46 #if defined (_STLP_SGI_THREADS) 47 // We test whether threads are in use before locking. 48 // Perhaps this should be moved into stl_threads.h, but that 49 // probably makes it harder to avoid the procedure call when 50 // it isn't needed. 51 extern "C" { 52 extern int __us_rsthread_malloc; 53 } 54 #endif 55 56 // Specialised debug form of new operator which does not provide "false" 57 // memory leaks when run with debug CRT libraries. 58 #if defined (_STLP_MSVC) && (_STLP_MSVC >= 1020 && defined (_STLP_DEBUG_ALLOC)) && !defined (_STLP_WCE) 59 # include <crtdbg.h> 60 inline char* __stlp_new_chunk(size_t __bytes) { 61 void *__chunk = _STLP_CHECK_NULL_ALLOC(::operator new(__bytes, __FILE__, __LINE__)); 62 return __STATIC_CAST(char*, __chunk); 63 } 64 inline void __stlp_delete_chunck(void* __p) { ::operator delete(__p, __FILE__, __LINE__); } 65 #else 66 # ifdef _STLP_NODE_ALLOC_USE_MALLOC 67 # include <cstdlib> 68 inline char* __stlp_new_chunk(size_t __bytes) { 69 // do not use _STLP_CHECK_NULL_ALLOC, this macro is dedicated to new operator. 70 void *__chunk = _STLP_VENDOR_CSTD::malloc(__bytes); 71 if (__chunk == 0) { 72 _STLP_THROW_BAD_ALLOC; 73 } 74 return __STATIC_CAST(char*, __chunk); 75 } 76 inline void __stlp_delete_chunck(void* __p) { _STLP_VENDOR_CSTD::free(__p); } 77 # else 78 inline char* __stlp_new_chunk(size_t __bytes) 79 { return __STATIC_CAST(char*, _STLP_STD::__stl_new(__bytes)); } 80 inline void __stlp_delete_chunck(void* __p) { _STLP_STD::__stl_delete(__p); } 81 # endif 82 #endif 83 84 /* This is an additional atomic operations to the ones already defined in 85 * stl/_threads.h, platform should try to support it to improve performance. 86 * __add_atomic_t _STLP_ATOMIC_ADD(volatile __add_atomic_t* __target, __add_atomic_t __val) : 87 * does *__target = *__target + __val and returns the old *__target value */ 88 typedef long __add_atomic_t; 89 typedef unsigned long __uadd_atomic_t; 90 91 #if defined (__GNUC__) && defined (__i386__) 92 inline long _STLP_atomic_add_gcc_x86(long volatile* p, long addend) { 93 long result; 94 __asm__ __volatile__ 95 ("lock; xaddl %1, %0;" 96 :"=m" (*p), "=r" (result) 97 :"m" (*p), "1" (addend) 98 :"cc"); 99 return result + addend; 100 } 101 # define _STLP_ATOMIC_ADD(__dst, __val) _STLP_atomic_add_gcc_x86(__dst, __val) 102 #elif defined (_STLP_WIN32THREADS) 103 // The Win32 API function InterlockedExchangeAdd is not available on Windows 95. 104 # if !defined (_STLP_WIN95_LIKE) 105 # if defined (_STLP_NEW_PLATFORM_SDK) 106 # define _STLP_ATOMIC_ADD(__dst, __val) InterlockedExchangeAdd(__dst, __val) 107 # else 108 # define _STLP_ATOMIC_ADD(__dst, __val) InterlockedExchangeAdd(__CONST_CAST(__add_atomic_t*, __dst), __val) 109 # endif 110 # endif 111 #endif 112 113 #if defined (__OS400__) 114 // dums 02/05/2007: is it really necessary ? 115 enum { _ALIGN = 16, _ALIGN_SHIFT = 4 }; 116 #else 117 enum { _ALIGN = 2 * sizeof(void*), _ALIGN_SHIFT = 2 + sizeof(void*) / 4 }; 118 #endif 119 120 #define _S_FREELIST_INDEX(__bytes) ((__bytes - size_t(1)) >> (int)_ALIGN_SHIFT) 121 122 _STLP_BEGIN_NAMESPACE 123 124 // malloc_alloc out-of-memory handling 125 static __oom_handler_type __oom_handler = __STATIC_CAST(__oom_handler_type, 0); 126 127 #ifdef _STLP_THREADS 128 _STLP_mutex __oom_handler_lock; 129 #endif 130 131 void* _STLP_CALL __malloc_alloc::allocate(size_t __n) 132 { 133 void *__result = malloc(__n); 134 if ( 0 == __result ) { 135 __oom_handler_type __my_malloc_handler; 136 137 for (;;) { 138 { 139 #ifdef _STLP_THREADS 140 _STLP_auto_lock _l( __oom_handler_lock ); 141 #endif 142 __my_malloc_handler = __oom_handler; 143 } 144 if ( 0 == __my_malloc_handler) { 145 _STLP_THROW_BAD_ALLOC; 146 } 147 (*__my_malloc_handler)(); 148 __result = malloc(__n); 149 if ( __result ) 150 return __result; 151 } 152 } 153 return __result; 154 } 155 156 __oom_handler_type _STLP_CALL __malloc_alloc::set_malloc_handler(__oom_handler_type __f) 157 { 158 #ifdef _STLP_THREADS 159 _STLP_auto_lock _l( __oom_handler_lock ); 160 #endif 161 __oom_handler_type __old = __oom_handler; 162 __oom_handler = __f; 163 return __old; 164 } 165 166 // ******************************************************* 167 // Default node allocator. 168 // With a reasonable compiler, this should be roughly as fast as the 169 // original STL class-specific allocators, but with less fragmentation. 170 // 171 // Important implementation properties: 172 // 1. If the client request an object of size > _MAX_BYTES, the resulting 173 // object will be obtained directly from malloc. 174 // 2. In all other cases, we allocate an object of size exactly 175 // _S_round_up(requested_size). Thus the client has enough size 176 // information that we can return the object to the proper free list 177 // without permanently losing part of the object. 178 // 179 180 #define _STLP_NFREELISTS 16 181 182 #if defined (_STLP_LEAKS_PEDANTIC) && defined (_STLP_USE_DYNAMIC_LIB) 183 /* 184 * We can only do cleanup of the node allocator memory pool if we are 185 * sure that the STLport library is used as a shared one as it guaranties 186 * the unicity of the node allocator instance. Without that guaranty node 187 * allocator instances might exchange memory blocks making the implementation 188 * of a cleaning process much more complicated. 189 */ 190 # define _STLP_DO_CLEAN_NODE_ALLOC 191 #endif 192 193 /* When STLport is used without multi threaded safety we use the node allocator 194 * implementation with locks as locks becomes no-op. The lock free implementation 195 * always use system specific atomic operations which are slower than 'normal' 196 * ones. 197 */ 198 #if defined (_STLP_THREADS) && \ 199 defined (_STLP_HAS_ATOMIC_FREELIST) && defined (_STLP_ATOMIC_ADD) 200 /* 201 * We have an implementation of the atomic freelist (_STLP_atomic_freelist) 202 * for this architecture and compiler. That means we can use the non-blocking 203 * implementation of the node-allocation engine.*/ 204 # define _STLP_USE_LOCK_FREE_IMPLEMENTATION 205 #endif 206 207 #if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 208 # if defined (_STLP_THREADS) 209 210 class _Node_Alloc_Lock { 211 static _STLP_STATIC_MUTEX& _S_Mutex() { 212 static _STLP_STATIC_MUTEX mutex _STLP_MUTEX_INITIALIZER; 213 return mutex; 214 } 215 public: 216 _Node_Alloc_Lock() { 217 # if defined (_STLP_SGI_THREADS) 218 if (__us_rsthread_malloc) 219 # endif 220 _S_Mutex()._M_acquire_lock(); 221 } 222 223 ~_Node_Alloc_Lock() { 224 # if defined (_STLP_SGI_THREADS) 225 if (__us_rsthread_malloc) 226 # endif 227 _S_Mutex()._M_release_lock(); 228 } 229 }; 230 231 # else 232 233 class _Node_Alloc_Lock { 234 public: 235 _Node_Alloc_Lock() { } 236 ~_Node_Alloc_Lock() { } 237 }; 238 239 # endif 240 241 struct _Node_alloc_obj { 242 _Node_alloc_obj * _M_next; 243 }; 244 #endif 245 246 class __node_alloc_impl { 247 static inline size_t _STLP_CALL _S_round_up(size_t __bytes) 248 { return (((__bytes) + (size_t)_ALIGN-1) & ~((size_t)_ALIGN - 1)); } 249 250 #if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 251 typedef _STLP_atomic_freelist::item _Obj; 252 typedef _STLP_atomic_freelist _Freelist; 253 typedef _STLP_atomic_freelist _ChunkList; 254 255 // Header of blocks of memory that have been allocated as part of 256 // a larger chunk but have not yet been chopped up into nodes. 257 struct _FreeBlockHeader : public _STLP_atomic_freelist::item { 258 char* _M_end; // pointer to end of free memory 259 }; 260 #else 261 typedef _Node_alloc_obj _Obj; 262 typedef _Obj* _STLP_VOLATILE _Freelist; 263 typedef _Obj* _ChunkList; 264 #endif 265 266 private: 267 // Returns an object of size __n, and optionally adds to size __n free list. 268 static _Obj* _S_refill(size_t __n); 269 // Allocates a chunk for nobjs of size __p_size. nobjs may be reduced 270 // if it is inconvenient to allocate the requested number. 271 static char* _S_chunk_alloc(size_t __p_size, int& __nobjs); 272 // Chunk allocation state. 273 static _Freelist _S_free_list[_STLP_NFREELISTS]; 274 // Amount of total allocated memory 275 #if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 276 static _STLP_VOLATILE __add_atomic_t _S_heap_size; 277 #else 278 static size_t _S_heap_size; 279 #endif 280 281 #if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 282 // List of blocks of free memory 283 static _STLP_atomic_freelist _S_free_mem_blocks; 284 #else 285 // Start of the current free memory buffer 286 static char* _S_start_free; 287 // End of the current free memory buffer 288 static char* _S_end_free; 289 #endif 290 291 #if defined (_STLP_DO_CLEAN_NODE_ALLOC) 292 public: 293 // Methods to report alloc/dealloc calls to the counter system. 294 # if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 295 typedef _STLP_VOLATILE __stl_atomic_t _AllocCounter; 296 # else 297 typedef __stl_atomic_t _AllocCounter; 298 # endif 299 static _AllocCounter& _STLP_CALL _S_alloc_counter(); 300 static void _S_alloc_call(); 301 static void _S_dealloc_call(); 302 303 private: 304 // Free all the allocated chuncks of memory 305 static void _S_chunk_dealloc(); 306 // Beginning of the linked list of allocated chunks of memory 307 static _ChunkList _S_chunks; 308 #endif /* _STLP_DO_CLEAN_NODE_ALLOC */ 309 310 public: 311 /* __n must be > 0 */ 312 static void* _M_allocate(size_t& __n); 313 /* __p may not be 0 */ 314 static void _M_deallocate(void *__p, size_t __n); 315 }; 316 317 #if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 318 void* __node_alloc_impl::_M_allocate(size_t& __n) { 319 __n = _S_round_up(__n); 320 _Obj * _STLP_VOLATILE * __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n); 321 _Obj *__r; 322 323 // Acquire the lock here with a constructor call. 324 // This ensures that it is released in exit or during stack 325 // unwinding. 326 _Node_Alloc_Lock __lock_instance; 327 328 if ( (__r = *__my_free_list) != 0 ) { 329 *__my_free_list = __r->_M_next; 330 } else { 331 __r = _S_refill(__n); 332 } 333 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 334 _S_alloc_call(); 335 # endif 336 // lock is released here 337 return __r; 338 } 339 340 void __node_alloc_impl::_M_deallocate(void *__p, size_t __n) { 341 _Obj * _STLP_VOLATILE * __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n); 342 _Obj * __pobj = __STATIC_CAST(_Obj*, __p); 343 344 // acquire lock 345 _Node_Alloc_Lock __lock_instance; 346 __pobj->_M_next = *__my_free_list; 347 *__my_free_list = __pobj; 348 349 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 350 _S_dealloc_call(); 351 # endif 352 // lock is released here 353 } 354 355 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 356 # define _STLP_OFFSET sizeof(_Obj) 357 # else 358 # define _STLP_OFFSET 0 359 # endif 360 361 /* We allocate memory in large chunks in order to avoid fragmenting */ 362 /* the malloc heap too much. */ 363 /* We assume that size is properly aligned. */ 364 /* We hold the allocation lock. */ 365 char* __node_alloc_impl::_S_chunk_alloc(size_t _p_size, int& __nobjs) { 366 char* __result; 367 size_t __total_bytes = _p_size * __nobjs; 368 size_t __bytes_left = _S_end_free - _S_start_free; 369 370 if (__bytes_left > 0) { 371 if (__bytes_left >= __total_bytes) { 372 __result = _S_start_free; 373 _S_start_free += __total_bytes; 374 return __result; 375 } 376 377 if (__bytes_left >= _p_size) { 378 __nobjs = (int)(__bytes_left / _p_size); 379 __total_bytes = _p_size * __nobjs; 380 __result = _S_start_free; 381 _S_start_free += __total_bytes; 382 return __result; 383 } 384 385 // Try to make use of the left-over piece. 386 _Obj* _STLP_VOLATILE* __my_free_list = _S_free_list + _S_FREELIST_INDEX(__bytes_left); 387 __REINTERPRET_CAST(_Obj*, _S_start_free)->_M_next = *__my_free_list; 388 *__my_free_list = __REINTERPRET_CAST(_Obj*, _S_start_free); 389 _S_start_free = _S_end_free = 0; 390 } 391 392 size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size) + _STLP_OFFSET; 393 394 _STLP_TRY { 395 _S_start_free = __stlp_new_chunk(__bytes_to_get); 396 } 397 #if defined (_STLP_USE_EXCEPTIONS) 398 catch (const _STLP_STD::bad_alloc&) { 399 _Obj* _STLP_VOLATILE* __my_free_list; 400 _Obj* __p; 401 // Try to do with what we have. That can't hurt. 402 // We do not try smaller requests, since that tends 403 // to result in disaster on multi-process machines. 404 for (size_t __i = _p_size; __i <= (size_t)_MAX_BYTES; __i += (size_t)_ALIGN) { 405 __my_free_list = _S_free_list + _S_FREELIST_INDEX(__i); 406 __p = *__my_free_list; 407 if (0 != __p) { 408 *__my_free_list = __p -> _M_next; 409 _S_start_free = __REINTERPRET_CAST(char*, __p); 410 _S_end_free = _S_start_free + __i; 411 return _S_chunk_alloc(_p_size, __nobjs); 412 // Any leftover piece will eventually make it to the 413 // right free list. 414 } 415 } 416 __bytes_to_get = __total_bytes + _STLP_OFFSET; 417 _S_start_free = __stlp_new_chunk(__bytes_to_get); 418 } 419 #endif 420 421 _S_heap_size += __bytes_to_get >> 4; 422 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 423 __REINTERPRET_CAST(_Obj*, _S_start_free)->_M_next = _S_chunks; 424 _S_chunks = __REINTERPRET_CAST(_Obj*, _S_start_free); 425 # endif 426 _S_end_free = _S_start_free + __bytes_to_get; 427 _S_start_free += _STLP_OFFSET; 428 return _S_chunk_alloc(_p_size, __nobjs); 429 } 430 431 /* Returns an object of size __n, and optionally adds to size __n free list.*/ 432 /* We assume that __n is properly aligned. */ 433 /* We hold the allocation lock. */ 434 _Node_alloc_obj* __node_alloc_impl::_S_refill(size_t __n) { 435 int __nobjs = 20; 436 char* __chunk = _S_chunk_alloc(__n, __nobjs); 437 438 if (1 == __nobjs) return __REINTERPRET_CAST(_Obj*, __chunk); 439 440 _Obj* _STLP_VOLATILE* __my_free_list = _S_free_list + _S_FREELIST_INDEX(__n); 441 _Obj* __result; 442 _Obj* __current_obj; 443 _Obj* __next_obj; 444 445 /* Build free list in chunk */ 446 __result = __REINTERPRET_CAST(_Obj*, __chunk); 447 *__my_free_list = __next_obj = __REINTERPRET_CAST(_Obj*, __chunk + __n); 448 for (--__nobjs; --__nobjs; ) { 449 __current_obj = __next_obj; 450 __next_obj = __REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __next_obj) + __n); 451 __current_obj->_M_next = __next_obj; 452 } 453 __next_obj->_M_next = 0; 454 return __result; 455 } 456 457 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 458 void __node_alloc_impl::_S_alloc_call() 459 { ++_S_alloc_counter(); } 460 461 void __node_alloc_impl::_S_dealloc_call() { 462 __stl_atomic_t &counter = _S_alloc_counter(); 463 if (--counter == 0) 464 { _S_chunk_dealloc(); } 465 } 466 467 /* We deallocate all the memory chunks */ 468 void __node_alloc_impl::_S_chunk_dealloc() { 469 _Obj *__pcur = _S_chunks, *__pnext; 470 while (__pcur != 0) { 471 __pnext = __pcur->_M_next; 472 __stlp_delete_chunck(__pcur); 473 __pcur = __pnext; 474 } 475 _S_chunks = 0; 476 _S_start_free = _S_end_free = 0; 477 _S_heap_size = 0; 478 memset(__REINTERPRET_CAST(char*, __CONST_CAST(_Obj**, &_S_free_list[0])), 0, _STLP_NFREELISTS * sizeof(_Obj*)); 479 } 480 # endif 481 482 #else 483 484 void* __node_alloc_impl::_M_allocate(size_t& __n) { 485 __n = _S_round_up(__n); 486 _Obj* __r = _S_free_list[_S_FREELIST_INDEX(__n)].pop(); 487 if (__r == 0) 488 { __r = _S_refill(__n); } 489 490 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 491 _S_alloc_call(); 492 # endif 493 return __r; 494 } 495 496 void __node_alloc_impl::_M_deallocate(void *__p, size_t __n) { 497 _S_free_list[_S_FREELIST_INDEX(__n)].push(__STATIC_CAST(_Obj*, __p)); 498 499 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 500 _S_dealloc_call(); 501 # endif 502 } 503 504 /* Returns an object of size __n, and optionally adds additional ones to */ 505 /* freelist of objects of size __n. */ 506 /* We assume that __n is properly aligned. */ 507 __node_alloc_impl::_Obj* __node_alloc_impl::_S_refill(size_t __n) { 508 int __nobjs = 20; 509 char* __chunk = _S_chunk_alloc(__n, __nobjs); 510 511 if (__nobjs <= 1) 512 return __REINTERPRET_CAST(_Obj*, __chunk); 513 514 // Push all new nodes (minus first one) onto freelist 515 _Obj* __result = __REINTERPRET_CAST(_Obj*, __chunk); 516 _Obj* __cur_item = __result; 517 _Freelist* __my_freelist = _S_free_list + _S_FREELIST_INDEX(__n); 518 for (--__nobjs; __nobjs != 0; --__nobjs) { 519 __cur_item = __REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __cur_item) + __n); 520 __my_freelist->push(__cur_item); 521 } 522 return __result; 523 } 524 525 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 526 # define _STLP_OFFSET _ALIGN 527 # else 528 # define _STLP_OFFSET 0 529 # endif 530 531 /* We allocate memory in large chunks in order to avoid fragmenting */ 532 /* the malloc heap too much. */ 533 /* We assume that size is properly aligned. */ 534 char* __node_alloc_impl::_S_chunk_alloc(size_t _p_size, int& __nobjs) { 535 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 536 //We are going to add a small memory block to keep all the allocated blocks 537 //address, we need to do so respecting the memory alignment. The following 538 //static assert checks that the reserved block is big enough to store a pointer. 539 _STLP_STATIC_ASSERT(sizeof(_Obj) <= _ALIGN) 540 # endif 541 char* __result = 0; 542 __add_atomic_t __total_bytes = __STATIC_CAST(__add_atomic_t, _p_size) * __nobjs; 543 544 _FreeBlockHeader* __block = __STATIC_CAST(_FreeBlockHeader*, _S_free_mem_blocks.pop()); 545 if (__block != 0) { 546 // We checked a block out and can now mess with it with impugnity. 547 // We'll put the remainder back into the list if we're done with it below. 548 char* __buf_start = __REINTERPRET_CAST(char*, __block); 549 __add_atomic_t __bytes_left = __block->_M_end - __buf_start; 550 551 if ((__bytes_left < __total_bytes) && (__bytes_left >= __STATIC_CAST(__add_atomic_t, _p_size))) { 552 // There's enough left for at least one object, but not as much as we wanted 553 __result = __buf_start; 554 __nobjs = (int)(__bytes_left/_p_size); 555 __total_bytes = __STATIC_CAST(__add_atomic_t, _p_size) * __nobjs; 556 __bytes_left -= __total_bytes; 557 __buf_start += __total_bytes; 558 } 559 else if (__bytes_left >= __total_bytes) { 560 // The block has enough left to satisfy all that was asked for 561 __result = __buf_start; 562 __bytes_left -= __total_bytes; 563 __buf_start += __total_bytes; 564 } 565 566 if (__bytes_left != 0) { 567 // There is still some memory left over in block after we satisfied our request. 568 if ((__result != 0) && (__bytes_left >= (__add_atomic_t)sizeof(_FreeBlockHeader))) { 569 // We were able to allocate at least one object and there is still enough 570 // left to put remainder back into list. 571 _FreeBlockHeader* __newblock = __REINTERPRET_CAST(_FreeBlockHeader*, __buf_start); 572 __newblock->_M_end = __block->_M_end; 573 _S_free_mem_blocks.push(__newblock); 574 } 575 else { 576 // We were not able to allocate enough for at least one object. 577 // Shove into freelist of nearest (rounded-down!) size. 578 size_t __rounded_down = _S_round_up(__bytes_left + 1) - (size_t)_ALIGN; 579 if (__rounded_down > 0) 580 _S_free_list[_S_FREELIST_INDEX(__rounded_down)].push((_Obj*)__buf_start); 581 } 582 } 583 if (__result != 0) 584 return __result; 585 } 586 587 // We couldn't satisfy it from the list of free blocks, get new memory. 588 __add_atomic_t __bytes_to_get = 2 * __total_bytes + 589 __STATIC_CAST(__add_atomic_t, 590 _S_round_up(__STATIC_CAST(__uadd_atomic_t, _STLP_ATOMIC_ADD(&_S_heap_size, 0)))) + 591 _STLP_OFFSET; 592 _STLP_TRY { 593 __result = __stlp_new_chunk(__bytes_to_get); 594 } 595 #if defined (_STLP_USE_EXCEPTIONS) 596 catch (const bad_alloc&) { 597 // Allocation failed; try to canibalize from freelist of a larger object size. 598 for (size_t __i = _p_size; __i <= (size_t)_MAX_BYTES; __i += (size_t)_ALIGN) { 599 _Obj* __p = _S_free_list[_S_FREELIST_INDEX(__i)].pop(); 600 if (0 != __p) { 601 if (__i < sizeof(_FreeBlockHeader)) { 602 // Not enough to put into list of free blocks, divvy it up here. 603 // Use as much as possible for this request and shove remainder into freelist. 604 __nobjs = (int)(__i/_p_size); 605 __total_bytes = __nobjs * __STATIC_CAST(__add_atomic_t, _p_size); 606 size_t __bytes_left = __i - __total_bytes; 607 size_t __rounded_down = _S_round_up(__bytes_left+1) - (size_t)_ALIGN; 608 if (__rounded_down > 0) { 609 _S_free_list[_S_FREELIST_INDEX(__rounded_down)].push(__REINTERPRET_CAST(_Obj*, __REINTERPRET_CAST(char*, __p) + __total_bytes)); 610 } 611 return __REINTERPRET_CAST(char*, __p); 612 } 613 else { 614 // Add node to list of available blocks and recursively allocate from it. 615 _FreeBlockHeader* __newblock = (_FreeBlockHeader*)__p; 616 __newblock->_M_end = __REINTERPRET_CAST(char*, __p) + __i; 617 _S_free_mem_blocks.push(__newblock); 618 return _S_chunk_alloc(_p_size, __nobjs); 619 } 620 } 621 } 622 623 // We were not able to find something in a freelist, try to allocate a smaller amount. 624 __bytes_to_get = __total_bytes + _STLP_OFFSET; 625 __result = __stlp_new_chunk(__bytes_to_get); 626 627 // This should either throw an exception or remedy the situation. 628 // Thus we assume it succeeded. 629 } 630 #endif 631 // Alignment check 632 _STLP_VERBOSE_ASSERT(((__REINTERPRET_CAST(size_t, __result) & __STATIC_CAST(size_t, _ALIGN - 1)) == 0), 633 _StlMsg_DBA_DELETED_TWICE) 634 _STLP_ATOMIC_ADD(&_S_heap_size, __bytes_to_get >> 4); 635 636 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 637 // We have to track the allocated memory chunks for release on exit. 638 _S_chunks.push(__REINTERPRET_CAST(_Obj*, __result)); 639 __result += _ALIGN; 640 __bytes_to_get -= _ALIGN; 641 # endif 642 643 if (__bytes_to_get > __total_bytes) { 644 // Push excess memory allocated in this chunk into list of free memory blocks 645 _FreeBlockHeader* __freeblock = __REINTERPRET_CAST(_FreeBlockHeader*, __result + __total_bytes); 646 __freeblock->_M_end = __result + __bytes_to_get; 647 _S_free_mem_blocks.push(__freeblock); 648 } 649 return __result; 650 } 651 652 # if defined (_STLP_DO_CLEAN_NODE_ALLOC) 653 void __node_alloc_impl::_S_alloc_call() 654 { _STLP_ATOMIC_INCREMENT(&_S_alloc_counter()); } 655 656 void __node_alloc_impl::_S_dealloc_call() { 657 _STLP_VOLATILE __stl_atomic_t *pcounter = &_S_alloc_counter(); 658 if (_STLP_ATOMIC_DECREMENT(pcounter) == 0) 659 _S_chunk_dealloc(); 660 } 661 662 /* We deallocate all the memory chunks */ 663 void __node_alloc_impl::_S_chunk_dealloc() { 664 // Note: The _Node_alloc_helper class ensures that this function 665 // will only be called when the (shared) library is unloaded or the 666 // process is shutdown. It's thus not possible that another thread 667 // is currently trying to allocate a node (we're not thread-safe here). 668 // 669 670 // Clear the free blocks and all freelistst. This makes sure that if 671 // for some reason more memory is allocated again during shutdown 672 // (it'd also be really nasty to leave references to deallocated memory). 673 _S_free_mem_blocks.clear(); 674 _S_heap_size = 0; 675 676 for (size_t __i = 0; __i < _STLP_NFREELISTS; ++__i) { 677 _S_free_list[__i].clear(); 678 } 679 680 // Detach list of chunks and free them all 681 _Obj* __chunk = _S_chunks.clear(); 682 while (__chunk != 0) { 683 _Obj* __next = __chunk->_M_next; 684 __stlp_delete_chunck(__chunk); 685 __chunk = __next; 686 } 687 } 688 # endif 689 690 #endif 691 692 #if defined (_STLP_DO_CLEAN_NODE_ALLOC) 693 struct __node_alloc_cleaner { 694 ~__node_alloc_cleaner() 695 { __node_alloc_impl::_S_dealloc_call(); } 696 }; 697 698 # if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 699 _STLP_VOLATILE __stl_atomic_t& _STLP_CALL 700 # else 701 __stl_atomic_t& _STLP_CALL 702 # endif 703 __node_alloc_impl::_S_alloc_counter() { 704 static _AllocCounter _S_counter = 1; 705 static __node_alloc_cleaner _S_node_alloc_cleaner; 706 return _S_counter; 707 } 708 #endif 709 710 #if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 711 _Node_alloc_obj * _STLP_VOLATILE 712 __node_alloc_impl::_S_free_list[_STLP_NFREELISTS] 713 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 714 // The 16 zeros are necessary to make version 4.1 of the SunPro 715 // compiler happy. Otherwise it appears to allocate too little 716 // space for the array. 717 #else 718 _STLP_atomic_freelist __node_alloc_impl::_S_free_list[_STLP_NFREELISTS]; 719 _STLP_atomic_freelist __node_alloc_impl::_S_free_mem_blocks; 720 #endif 721 722 #if !defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 723 char *__node_alloc_impl::_S_start_free = 0; 724 char *__node_alloc_impl::_S_end_free = 0; 725 #endif 726 727 #if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 728 _STLP_VOLATILE __add_atomic_t 729 #else 730 size_t 731 #endif 732 __node_alloc_impl::_S_heap_size = 0; 733 734 #if defined (_STLP_DO_CLEAN_NODE_ALLOC) 735 # if defined (_STLP_USE_LOCK_FREE_IMPLEMENTATION) 736 _STLP_atomic_freelist __node_alloc_impl::_S_chunks; 737 # else 738 _Node_alloc_obj* __node_alloc_impl::_S_chunks = 0; 739 # endif 740 #endif 741 742 void * _STLP_CALL __node_alloc::_M_allocate(size_t& __n) 743 { return __node_alloc_impl::_M_allocate(__n); } 744 745 void _STLP_CALL __node_alloc::_M_deallocate(void *__p, size_t __n) 746 { __node_alloc_impl::_M_deallocate(__p, __n); } 747 748 #if defined (_STLP_PTHREADS) && !defined (_STLP_NO_THREADS) 749 750 # define _STLP_DATA_ALIGNMENT 8 751 752 _STLP_MOVE_TO_PRIV_NAMESPACE 753 754 // ******************************************************* 755 // __perthread_alloc implementation 756 union _Pthread_alloc_obj { 757 union _Pthread_alloc_obj * __free_list_link; 758 char __client_data[_STLP_DATA_ALIGNMENT]; /* The client sees this. */ 759 }; 760 761 // Pthread allocators don't appear to the client to have meaningful 762 // instances. We do in fact need to associate some state with each 763 // thread. That state is represented by _Pthread_alloc_per_thread_state. 764 765 struct _Pthread_alloc_per_thread_state { 766 typedef _Pthread_alloc_obj __obj; 767 enum { _S_NFREELISTS = _MAX_BYTES / _STLP_DATA_ALIGNMENT }; 768 769 // Free list link for list of available per thread structures. 770 // When one of these becomes available for reuse due to thread 771 // termination, any objects in its free list remain associated 772 // with it. The whole structure may then be used by a newly 773 // created thread. 774 _Pthread_alloc_per_thread_state() : __next(0) 775 { memset((void *)__CONST_CAST(_Pthread_alloc_obj**, __free_list), 0, (size_t)_S_NFREELISTS * sizeof(__obj *)); } 776 // Returns an object of size __n, and possibly adds to size n free list. 777 void *_M_refill(size_t __n); 778 779 _Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS]; 780 _Pthread_alloc_per_thread_state *__next; 781 // this data member is only to be used by per_thread_allocator, which returns memory to the originating thread. 782 _STLP_mutex _M_lock; 783 }; 784 785 // Pthread-specific allocator. 786 class _Pthread_alloc_impl { 787 public: // but only for internal use: 788 typedef _Pthread_alloc_per_thread_state __state_type; 789 typedef char value_type; 790 791 // Allocates a chunk for nobjs of size size. nobjs may be reduced 792 // if it is inconvenient to allocate the requested number. 793 static char *_S_chunk_alloc(size_t __size, size_t &__nobjs, __state_type*); 794 795 enum {_S_ALIGN = _STLP_DATA_ALIGNMENT}; 796 797 static size_t _S_round_up(size_t __bytes) 798 { return (((__bytes) + (int)_S_ALIGN - 1) & ~((int)_S_ALIGN - 1)); } 799 static size_t _S_freelist_index(size_t __bytes) 800 { return (((__bytes) + (int)_S_ALIGN - 1) / (int)_S_ALIGN - 1); } 801 802 private: 803 // Chunk allocation state. And other shared state. 804 // Protected by _S_chunk_allocator_lock. 805 static _STLP_STATIC_MUTEX _S_chunk_allocator_lock; 806 static char *_S_start_free; 807 static char *_S_end_free; 808 static size_t _S_heap_size; 809 static __state_type *_S_free_per_thread_states; 810 static pthread_key_t _S_key; 811 static bool _S_key_initialized; 812 // Pthread key under which per thread state is stored. 813 // Allocator instances that are currently unclaimed by any thread. 814 static void _S_destructor(void *instance); 815 // Function to be called on thread exit to reclaim per thread 816 // state. 817 static __state_type *_S_new_per_thread_state(); 818 public: 819 // Return a recycled or new per thread state. 820 static __state_type *_S_get_per_thread_state(); 821 private: 822 // ensure that the current thread has an associated 823 // per thread state. 824 class _M_lock; 825 friend class _M_lock; 826 class _M_lock { 827 public: 828 _M_lock () { _S_chunk_allocator_lock._M_acquire_lock(); } 829 ~_M_lock () { _S_chunk_allocator_lock._M_release_lock(); } 830 }; 831 832 public: 833 834 /* n must be > 0 */ 835 static void * allocate(size_t& __n); 836 837 /* p may not be 0 */ 838 static void deallocate(void *__p, size_t __n); 839 840 // boris : versions for per_thread_allocator 841 /* n must be > 0 */ 842 static void * allocate(size_t& __n, __state_type* __a); 843 844 /* p may not be 0 */ 845 static void deallocate(void *__p, size_t __n, __state_type* __a); 846 847 static void * reallocate(void *__p, size_t __old_sz, size_t& __new_sz); 848 }; 849 850 /* Returns an object of size n, and optionally adds to size n free list.*/ 851 /* We assume that n is properly aligned. */ 852 /* We hold the allocation lock. */ 853 void *_Pthread_alloc_per_thread_state::_M_refill(size_t __n) { 854 typedef _Pthread_alloc_obj __obj; 855 size_t __nobjs = 128; 856 char * __chunk = _Pthread_alloc_impl::_S_chunk_alloc(__n, __nobjs, this); 857 __obj * volatile * __my_free_list; 858 __obj * __result; 859 __obj * __current_obj, * __next_obj; 860 size_t __i; 861 862 if (1 == __nobjs) { 863 return __chunk; 864 } 865 866 __my_free_list = __free_list + _Pthread_alloc_impl::_S_freelist_index(__n); 867 868 /* Build free list in chunk */ 869 __result = (__obj *)__chunk; 870 *__my_free_list = __next_obj = (__obj *)(__chunk + __n); 871 for (__i = 1; ; ++__i) { 872 __current_obj = __next_obj; 873 __next_obj = (__obj *)((char *)__next_obj + __n); 874 if (__nobjs - 1 == __i) { 875 __current_obj -> __free_list_link = 0; 876 break; 877 } else { 878 __current_obj -> __free_list_link = __next_obj; 879 } 880 } 881 return __result; 882 } 883 884 void _Pthread_alloc_impl::_S_destructor(void *__instance) { 885 _M_lock __lock_instance; // Need to acquire lock here. 886 _Pthread_alloc_per_thread_state* __s = (_Pthread_alloc_per_thread_state*)__instance; 887 __s -> __next = _S_free_per_thread_states; 888 _S_free_per_thread_states = __s; 889 } 890 891 _Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_new_per_thread_state() { 892 /* lock already held here. */ 893 if (0 != _S_free_per_thread_states) { 894 _Pthread_alloc_per_thread_state *__result = _S_free_per_thread_states; 895 _S_free_per_thread_states = _S_free_per_thread_states -> __next; 896 return __result; 897 } 898 else { 899 return new _Pthread_alloc_per_thread_state; 900 } 901 } 902 903 _Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_get_per_thread_state() { 904 int __ret_code; 905 __state_type* __result; 906 907 if (_S_key_initialized && (__result = (__state_type*) pthread_getspecific(_S_key))) 908 return __result; 909 910 /*REFERENCED*/ 911 _M_lock __lock_instance; // Need to acquire lock here. 912 if (!_S_key_initialized) { 913 if (pthread_key_create(&_S_key, _S_destructor)) { 914 _STLP_THROW_BAD_ALLOC; // failed 915 } 916 _S_key_initialized = true; 917 } 918 919 __result = _S_new_per_thread_state(); 920 __ret_code = pthread_setspecific(_S_key, __result); 921 if (__ret_code) { 922 if (__ret_code == ENOMEM) { 923 _STLP_THROW_BAD_ALLOC; 924 } else { 925 // EINVAL 926 _STLP_ABORT(); 927 } 928 } 929 return __result; 930 } 931 932 /* We allocate memory in large chunks in order to avoid fragmenting */ 933 /* the malloc heap too much. */ 934 /* We assume that size is properly aligned. */ 935 char *_Pthread_alloc_impl::_S_chunk_alloc(size_t __p_size, size_t &__nobjs, _Pthread_alloc_per_thread_state *__a) { 936 typedef _Pthread_alloc_obj __obj; 937 { 938 char * __result; 939 size_t __total_bytes; 940 size_t __bytes_left; 941 /*REFERENCED*/ 942 _M_lock __lock_instance; // Acquire lock for this routine 943 944 __total_bytes = __p_size * __nobjs; 945 __bytes_left = _S_end_free - _S_start_free; 946 if (__bytes_left >= __total_bytes) { 947 __result = _S_start_free; 948 _S_start_free += __total_bytes; 949 return __result; 950 } else if (__bytes_left >= __p_size) { 951 __nobjs = __bytes_left/__p_size; 952 __total_bytes = __p_size * __nobjs; 953 __result = _S_start_free; 954 _S_start_free += __total_bytes; 955 return __result; 956 } else { 957 size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size); 958 // Try to make use of the left-over piece. 959 if (__bytes_left > 0) { 960 __obj * volatile * __my_free_list = __a->__free_list + _S_freelist_index(__bytes_left); 961 ((__obj *)_S_start_free) -> __free_list_link = *__my_free_list; 962 *__my_free_list = (__obj *)_S_start_free; 963 } 964 # ifdef _SGI_SOURCE 965 // Try to get memory that's aligned on something like a 966 // cache line boundary, so as to avoid parceling out 967 // parts of the same line to different threads and thus 968 // possibly different processors. 969 { 970 const int __cache_line_size = 128; // probable upper bound 971 __bytes_to_get &= ~(__cache_line_size-1); 972 _S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get); 973 if (0 == _S_start_free) { 974 _S_start_free = (char *)__malloc_alloc::allocate(__bytes_to_get); 975 } 976 } 977 # else /* !SGI_SOURCE */ 978 _S_start_free = (char *)__malloc_alloc::allocate(__bytes_to_get); 979 # endif 980 _S_heap_size += __bytes_to_get >> 4; 981 _S_end_free = _S_start_free + __bytes_to_get; 982 } 983 } 984 // lock is released here 985 return _S_chunk_alloc(__p_size, __nobjs, __a); 986 } 987 988 989 /* n must be > 0 */ 990 void *_Pthread_alloc_impl::allocate(size_t& __n) { 991 typedef _Pthread_alloc_obj __obj; 992 __obj * volatile * __my_free_list; 993 __obj * __result; 994 __state_type* __a; 995 996 if (__n > _MAX_BYTES) { 997 return __malloc_alloc::allocate(__n); 998 } 999 1000 __n = _S_round_up(__n); 1001 __a = _S_get_per_thread_state(); 1002 1003 __my_free_list = __a->__free_list + _S_freelist_index(__n); 1004 __result = *__my_free_list; 1005 if (__result == 0) { 1006 void *__r = __a->_M_refill(__n); 1007 return __r; 1008 } 1009 *__my_free_list = __result->__free_list_link; 1010 return __result; 1011 }; 1012 1013 /* p may not be 0 */ 1014 void _Pthread_alloc_impl::deallocate(void *__p, size_t __n) { 1015 typedef _Pthread_alloc_obj __obj; 1016 __obj *__q = (__obj *)__p; 1017 __obj * volatile * __my_free_list; 1018 __state_type* __a; 1019 1020 if (__n > _MAX_BYTES) { 1021 __malloc_alloc::deallocate(__p, __n); 1022 return; 1023 } 1024 1025 __a = _S_get_per_thread_state(); 1026 1027 __my_free_list = __a->__free_list + _S_freelist_index(__n); 1028 __q -> __free_list_link = *__my_free_list; 1029 *__my_free_list = __q; 1030 } 1031 1032 // boris : versions for per_thread_allocator 1033 /* n must be > 0 */ 1034 void *_Pthread_alloc_impl::allocate(size_t& __n, __state_type* __a) { 1035 typedef _Pthread_alloc_obj __obj; 1036 __obj * volatile * __my_free_list; 1037 __obj * __result; 1038 1039 if (__n > _MAX_BYTES) { 1040 return __malloc_alloc::allocate(__n); 1041 } 1042 __n = _S_round_up(__n); 1043 1044 // boris : here, we have to lock per thread state, as we may be getting memory from 1045 // different thread pool. 1046 _STLP_auto_lock __lock(__a->_M_lock); 1047 1048 __my_free_list = __a->__free_list + _S_freelist_index(__n); 1049 __result = *__my_free_list; 1050 if (__result == 0) { 1051 void *__r = __a->_M_refill(__n); 1052 return __r; 1053 } 1054 *__my_free_list = __result->__free_list_link; 1055 return __result; 1056 }; 1057 1058 /* p may not be 0 */ 1059 void _Pthread_alloc_impl::deallocate(void *__p, size_t __n, __state_type* __a) { 1060 typedef _Pthread_alloc_obj __obj; 1061 __obj *__q = (__obj *)__p; 1062 __obj * volatile * __my_free_list; 1063 1064 if (__n > _MAX_BYTES) { 1065 __malloc_alloc::deallocate(__p, __n); 1066 return; 1067 } 1068 1069 // boris : here, we have to lock per thread state, as we may be returning memory from 1070 // different thread. 1071 _STLP_auto_lock __lock(__a->_M_lock); 1072 1073 __my_free_list = __a->__free_list + _S_freelist_index(__n); 1074 __q -> __free_list_link = *__my_free_list; 1075 *__my_free_list = __q; 1076 } 1077 1078 void *_Pthread_alloc_impl::reallocate(void *__p, size_t __old_sz, size_t& __new_sz) { 1079 void * __result; 1080 size_t __copy_sz; 1081 1082 if (__old_sz > _MAX_BYTES && __new_sz > _MAX_BYTES) { 1083 return realloc(__p, __new_sz); 1084 } 1085 1086 if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return __p; 1087 __result = allocate(__new_sz); 1088 __copy_sz = __new_sz > __old_sz? __old_sz : __new_sz; 1089 memcpy(__result, __p, __copy_sz); 1090 deallocate(__p, __old_sz); 1091 return __result; 1092 } 1093 1094 _Pthread_alloc_per_thread_state* _Pthread_alloc_impl::_S_free_per_thread_states = 0; 1095 pthread_key_t _Pthread_alloc_impl::_S_key = 0; 1096 _STLP_STATIC_MUTEX _Pthread_alloc_impl::_S_chunk_allocator_lock _STLP_MUTEX_INITIALIZER; 1097 bool _Pthread_alloc_impl::_S_key_initialized = false; 1098 char *_Pthread_alloc_impl::_S_start_free = 0; 1099 char *_Pthread_alloc_impl::_S_end_free = 0; 1100 size_t _Pthread_alloc_impl::_S_heap_size = 0; 1101 1102 void * _STLP_CALL _Pthread_alloc::allocate(size_t& __n) 1103 { return _Pthread_alloc_impl::allocate(__n); } 1104 void _STLP_CALL _Pthread_alloc::deallocate(void *__p, size_t __n) 1105 { _Pthread_alloc_impl::deallocate(__p, __n); } 1106 void * _STLP_CALL _Pthread_alloc::allocate(size_t& __n, __state_type* __a) 1107 { return _Pthread_alloc_impl::allocate(__n, __a); } 1108 void _STLP_CALL _Pthread_alloc::deallocate(void *__p, size_t __n, __state_type* __a) 1109 { _Pthread_alloc_impl::deallocate(__p, __n, __a); } 1110 void * _STLP_CALL _Pthread_alloc::reallocate(void *__p, size_t __old_sz, size_t& __new_sz) 1111 { return _Pthread_alloc_impl::reallocate(__p, __old_sz, __new_sz); } 1112 _Pthread_alloc_per_thread_state* _STLP_CALL _Pthread_alloc::_S_get_per_thread_state() 1113 { return _Pthread_alloc_impl::_S_get_per_thread_state(); } 1114 1115 _STLP_MOVE_TO_STD_NAMESPACE 1116 1117 #endif 1118 1119 _STLP_END_NAMESPACE 1120 1121 #undef _S_FREELIST_INDEX 1122