1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <pthread.h> 30 31 #include <errno.h> 32 #include <limits.h> 33 #include <stdatomic.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <sys/cdefs.h> 37 #include <sys/mman.h> 38 #include <unistd.h> 39 40 #include "pthread_internal.h" 41 42 #include "private/bionic_constants.h" 43 #include "private/bionic_fortify.h" 44 #include "private/bionic_futex.h" 45 #include "private/bionic_systrace.h" 46 #include "private/bionic_time_conversions.h" 47 #include "private/bionic_tls.h" 48 49 /* a mutex attribute holds the following fields 50 * 51 * bits: name description 52 * 0-3 type type of mutex 53 * 4 shared process-shared flag 54 * 5 protocol whether it is a priority inherit mutex. 55 */ 56 #define MUTEXATTR_TYPE_MASK 0x000f 57 #define MUTEXATTR_SHARED_MASK 0x0010 58 #define MUTEXATTR_PROTOCOL_MASK 0x0020 59 60 #define MUTEXATTR_PROTOCOL_SHIFT 5 61 62 int pthread_mutexattr_init(pthread_mutexattr_t *attr) 63 { 64 *attr = PTHREAD_MUTEX_DEFAULT; 65 return 0; 66 } 67 68 int pthread_mutexattr_destroy(pthread_mutexattr_t *attr) 69 { 70 *attr = -1; 71 return 0; 72 } 73 74 int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type_p) 75 { 76 int type = (*attr & MUTEXATTR_TYPE_MASK); 77 78 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK) { 79 return EINVAL; 80 } 81 82 *type_p = type; 83 return 0; 84 } 85 86 int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) 87 { 88 if (type < PTHREAD_MUTEX_NORMAL || type > PTHREAD_MUTEX_ERRORCHECK ) { 89 return EINVAL; 90 } 91 92 *attr = (*attr & ~MUTEXATTR_TYPE_MASK) | type; 93 return 0; 94 } 95 96 /* process-shared mutexes are not supported at the moment */ 97 98 int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int pshared) 99 { 100 switch (pshared) { 101 case PTHREAD_PROCESS_PRIVATE: 102 *attr &= ~MUTEXATTR_SHARED_MASK; 103 return 0; 104 105 case PTHREAD_PROCESS_SHARED: 106 /* our current implementation of pthread actually supports shared 107 * mutexes but won't cleanup if a process dies with the mutex held. 108 * Nevertheless, it's better than nothing. Shared mutexes are used 109 * by surfaceflinger and audioflinger. 110 */ 111 *attr |= MUTEXATTR_SHARED_MASK; 112 return 0; 113 } 114 return EINVAL; 115 } 116 117 int pthread_mutexattr_getpshared(const pthread_mutexattr_t* attr, int* pshared) { 118 *pshared = (*attr & MUTEXATTR_SHARED_MASK) ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE; 119 return 0; 120 } 121 122 int pthread_mutexattr_setprotocol(pthread_mutexattr_t* attr, int protocol) { 123 if (protocol != PTHREAD_PRIO_NONE && protocol != PTHREAD_PRIO_INHERIT) { 124 return EINVAL; 125 } 126 *attr = (*attr & ~MUTEXATTR_PROTOCOL_MASK) | (protocol << MUTEXATTR_PROTOCOL_SHIFT); 127 return 0; 128 } 129 130 int pthread_mutexattr_getprotocol(const pthread_mutexattr_t* attr, int* protocol) { 131 *protocol = (*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT; 132 return 0; 133 } 134 135 // Priority Inheritance mutex implementation 136 struct PIMutex { 137 // mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck), constant during lifetime 138 uint8_t type; 139 // process-shared flag, constant during lifetime 140 bool shared; 141 // <number of times a thread holding a recursive PI mutex> - 1 142 uint16_t counter; 143 // owner_tid is read/written by both userspace code and kernel code. It includes three fields: 144 // FUTEX_WAITERS, FUTEX_OWNER_DIED and FUTEX_TID_MASK. 145 atomic_int owner_tid; 146 }; 147 148 static inline __always_inline int PIMutexTryLock(PIMutex& mutex) { 149 pid_t tid = __get_thread()->tid; 150 // Handle common case first. 151 int old_owner = 0; 152 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, 153 &old_owner, tid, 154 memory_order_acquire, 155 memory_order_relaxed))) { 156 return 0; 157 } 158 if (tid == (old_owner & FUTEX_TID_MASK)) { 159 // We already own this mutex. 160 if (mutex.type == PTHREAD_MUTEX_NORMAL) { 161 return EBUSY; 162 } 163 if (mutex.type == PTHREAD_MUTEX_ERRORCHECK) { 164 return EDEADLK; 165 } 166 if (mutex.counter == 0xffff) { 167 return EAGAIN; 168 } 169 mutex.counter++; 170 return 0; 171 } 172 return EBUSY; 173 } 174 175 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on 176 // ARM/ARM64, which increases at most 20 percent overhead. So make it noinline. 177 static int __attribute__((noinline)) PIMutexTimedLock(PIMutex& mutex, 178 bool use_realtime_clock, 179 const timespec* abs_timeout) { 180 int ret = PIMutexTryLock(mutex); 181 if (__predict_true(ret == 0)) { 182 return 0; 183 } 184 if (ret == EBUSY) { 185 ScopedTrace trace("Contending for pthread mutex"); 186 ret = -__futex_pi_lock_ex(&mutex.owner_tid, mutex.shared, use_realtime_clock, abs_timeout); 187 } 188 return ret; 189 } 190 191 static int PIMutexUnlock(PIMutex& mutex) { 192 pid_t tid = __get_thread()->tid; 193 int old_owner = tid; 194 // Handle common case first. 195 if (__predict_true(mutex.type == PTHREAD_MUTEX_NORMAL)) { 196 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, 197 &old_owner, 0, 198 memory_order_release, 199 memory_order_relaxed))) { 200 return 0; 201 } 202 } else { 203 old_owner = atomic_load_explicit(&mutex.owner_tid, memory_order_relaxed); 204 } 205 206 if (tid != (old_owner & FUTEX_TID_MASK)) { 207 // The mutex can only be unlocked by the thread who owns it. 208 return EPERM; 209 } 210 if (mutex.type == PTHREAD_MUTEX_RECURSIVE) { 211 if (mutex.counter != 0u) { 212 --mutex.counter; 213 return 0; 214 } 215 } 216 if (old_owner == tid) { 217 // No thread is waiting. 218 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex.owner_tid, 219 &old_owner, 0, 220 memory_order_release, 221 memory_order_relaxed))) { 222 return 0; 223 } 224 } 225 return -__futex_pi_unlock(&mutex.owner_tid, mutex.shared); 226 } 227 228 static int PIMutexDestroy(PIMutex& mutex) { 229 // The mutex should be in unlocked state (owner_tid == 0) when destroyed. 230 // Store 0xffffffff to make the mutex unusable. 231 int old_owner = 0; 232 if (atomic_compare_exchange_strong_explicit(&mutex.owner_tid, &old_owner, 0xffffffff, 233 memory_order_relaxed, memory_order_relaxed)) { 234 return 0; 235 } 236 return EBUSY; 237 } 238 239 #if !defined(__LP64__) 240 241 namespace PIMutexAllocator { 242 // pthread_mutex_t has only 4 bytes in 32-bit programs, which are not enough to hold PIMutex. 243 // So we use malloc to allocate PIMutexes and use 16-bit of pthread_mutex_t as indexes to find 244 // the allocated PIMutexes. This allows at most 65536 PI mutexes. 245 // When calling operations like pthread_mutex_lock/unlock, the 16-bit index is mapped to the 246 // corresponding PIMutex. To make the map operation fast, we use a lockless mapping method: 247 // Once a PIMutex is allocated, all the data used to map index to the PIMutex isn't changed until 248 // it is destroyed. 249 // Below are the data structures: 250 // // struct Node contains a PIMutex. 251 // typedef Node NodeArray[256]; 252 // typedef NodeArray* NodeArrayP; 253 // NodeArrayP nodes[256]; 254 // 255 // A 16-bit index is mapped to Node as below: 256 // (*nodes[index >> 8])[index & 0xff] 257 // 258 // Also use a free list to allow O(1) finding recycled PIMutexes. 259 260 union Node { 261 PIMutex mutex; 262 int next_free_id; // If not -1, refer to the next node in the free PIMutex list. 263 }; 264 typedef Node NodeArray[256]; 265 typedef NodeArray* NodeArrayP; 266 267 // lock_ protects below items. 268 static Lock lock; 269 static NodeArrayP* nodes; 270 static int next_to_alloc_id; 271 static int first_free_id = -1; // If not -1, refer to the first node in the free PIMutex list. 272 273 static inline __always_inline Node& IdToNode(int id) { 274 return (*nodes[id >> 8])[id & 0xff]; 275 } 276 277 static inline __always_inline PIMutex& IdToPIMutex(int id) { 278 return IdToNode(id).mutex; 279 } 280 281 static int AllocIdLocked() { 282 if (first_free_id != -1) { 283 int result = first_free_id; 284 first_free_id = IdToNode(result).next_free_id; 285 return result; 286 } 287 if (next_to_alloc_id >= 0x10000) { 288 return -1; 289 } 290 int array_pos = next_to_alloc_id >> 8; 291 int node_pos = next_to_alloc_id & 0xff; 292 if (node_pos == 0) { 293 if (array_pos == 0) { 294 nodes = static_cast<NodeArray**>(calloc(256, sizeof(NodeArray*))); 295 if (nodes == nullptr) { 296 return -1; 297 } 298 } 299 nodes[array_pos] = static_cast<NodeArray*>(malloc(sizeof(NodeArray))); 300 if (nodes[array_pos] == nullptr) { 301 return -1; 302 } 303 } 304 return next_to_alloc_id++; 305 } 306 307 // If succeed, return an id referring to a PIMutex, otherwise return -1. 308 // A valid id is in range [0, 0xffff]. 309 static int AllocId() { 310 lock.lock(); 311 int result = AllocIdLocked(); 312 lock.unlock(); 313 if (result != -1) { 314 memset(&IdToPIMutex(result), 0, sizeof(PIMutex)); 315 } 316 return result; 317 } 318 319 static void FreeId(int id) { 320 lock.lock(); 321 IdToNode(id).next_free_id = first_free_id; 322 first_free_id = id; 323 lock.unlock(); 324 } 325 326 } // namespace PIMutexAllocator 327 328 #endif // !defined(__LP64__) 329 330 331 /* Convenience macro, creates a mask of 'bits' bits that starts from 332 * the 'shift'-th least significant bit in a 32-bit word. 333 * 334 * Examples: FIELD_MASK(0,4) -> 0xf 335 * FIELD_MASK(16,9) -> 0x1ff0000 336 */ 337 #define FIELD_MASK(shift,bits) (((1 << (bits))-1) << (shift)) 338 339 /* This one is used to create a bit pattern from a given field value */ 340 #define FIELD_TO_BITS(val,shift,bits) (((val) & ((1 << (bits))-1)) << (shift)) 341 342 /* And this one does the opposite, i.e. extract a field's value from a bit pattern */ 343 #define FIELD_FROM_BITS(val,shift,bits) (((val) >> (shift)) & ((1 << (bits))-1)) 344 345 /* Convenience macros. 346 * 347 * These are used to form or modify the bit pattern of a given mutex value 348 */ 349 350 /* Mutex state: 351 * 352 * 0 for unlocked 353 * 1 for locked, no waiters 354 * 2 for locked, maybe waiters 355 */ 356 #define MUTEX_STATE_SHIFT 0 357 #define MUTEX_STATE_LEN 2 358 359 #define MUTEX_STATE_MASK FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) 360 #define MUTEX_STATE_FROM_BITS(v) FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) 361 #define MUTEX_STATE_TO_BITS(v) FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN) 362 363 #define MUTEX_STATE_UNLOCKED 0 /* must be 0 to match PTHREAD_MUTEX_INITIALIZER */ 364 #define MUTEX_STATE_LOCKED_UNCONTENDED 1 /* must be 1 due to atomic dec in unlock operation */ 365 #define MUTEX_STATE_LOCKED_CONTENDED 2 /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */ 366 367 #define MUTEX_STATE_BITS_UNLOCKED MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED) 368 #define MUTEX_STATE_BITS_LOCKED_UNCONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED) 369 #define MUTEX_STATE_BITS_LOCKED_CONTENDED MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED) 370 371 // Return true iff the mutex is unlocked. 372 #define MUTEX_STATE_BITS_IS_UNLOCKED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_UNLOCKED) 373 374 // Return true iff the mutex is locked with no waiters. 375 #define MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED) 376 377 // return true iff the mutex is locked with maybe waiters. 378 #define MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v) (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED) 379 380 /* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */ 381 #define MUTEX_STATE_BITS_FLIP_CONTENTION(v) ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED)) 382 383 /* Mutex counter: 384 * 385 * We need to check for overflow before incrementing, and we also need to 386 * detect when the counter is 0 387 */ 388 #define MUTEX_COUNTER_SHIFT 2 389 #define MUTEX_COUNTER_LEN 11 390 #define MUTEX_COUNTER_MASK FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN) 391 392 #define MUTEX_COUNTER_BITS_WILL_OVERFLOW(v) (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK) 393 #define MUTEX_COUNTER_BITS_IS_ZERO(v) (((v) & MUTEX_COUNTER_MASK) == 0) 394 395 /* Used to increment the counter directly after overflow has been checked */ 396 #define MUTEX_COUNTER_BITS_ONE FIELD_TO_BITS(1, MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN) 397 398 /* Mutex shared bit flag 399 * 400 * This flag is set to indicate that the mutex is shared among processes. 401 * This changes the futex opcode we use for futex wait/wake operations 402 * (non-shared operations are much faster). 403 */ 404 #define MUTEX_SHARED_SHIFT 13 405 #define MUTEX_SHARED_MASK FIELD_MASK(MUTEX_SHARED_SHIFT,1) 406 407 /* Mutex type: 408 * We support normal, recursive and errorcheck mutexes. 409 */ 410 #define MUTEX_TYPE_SHIFT 14 411 #define MUTEX_TYPE_LEN 2 412 #define MUTEX_TYPE_MASK FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN) 413 414 #define MUTEX_TYPE_TO_BITS(t) FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN) 415 416 #define MUTEX_TYPE_BITS_NORMAL MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_NORMAL) 417 #define MUTEX_TYPE_BITS_RECURSIVE MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_RECURSIVE) 418 #define MUTEX_TYPE_BITS_ERRORCHECK MUTEX_TYPE_TO_BITS(PTHREAD_MUTEX_ERRORCHECK) 419 // Use a special mutex type to mark priority inheritance mutexes. 420 #define PI_MUTEX_STATE MUTEX_TYPE_TO_BITS(3) 421 422 // For a PI mutex, it includes below fields: 423 // Atomic(uint16_t) state; 424 // PIMutex pi_mutex; // uint16_t pi_mutex_id in 32-bit programs 425 // 426 // state holds the following fields: 427 // 428 // bits: name description 429 // 15-14 type mutex type, should be 3 430 // 13-0 padding should be 0 431 // 432 // pi_mutex holds the state of a PI mutex. 433 // pi_mutex_id holds an integer to find the state of a PI mutex. 434 // 435 // For a Non-PI mutex, it includes below fields: 436 // Atomic(uint16_t) state; 437 // atomic_int owner_tid; // Atomic(uint16_t) in 32-bit programs 438 // 439 // state holds the following fields: 440 // 441 // bits: name description 442 // 15-14 type mutex type, can be 0 (normal), 1 (recursive), 2 (errorcheck) 443 // 13 shared process-shared flag 444 // 12-2 counter <number of times a thread holding a recursive Non-PI mutex> - 1 445 // 1-0 state lock state (0, 1 or 2) 446 // 447 // bits 15-13 are constant during the lifetime of the mutex. 448 // 449 // owner_tid is used only in recursive and errorcheck Non-PI mutexes to hold the mutex owner 450 // thread id. 451 // 452 // PI mutexes and Non-PI mutexes are distinguished by checking type field in state. 453 #if defined(__LP64__) 454 struct pthread_mutex_internal_t { 455 _Atomic(uint16_t) state; 456 uint16_t __pad; 457 union { 458 atomic_int owner_tid; 459 PIMutex pi_mutex; 460 }; 461 char __reserved[28]; 462 463 PIMutex& ToPIMutex() { 464 return pi_mutex; 465 } 466 467 void FreePIMutex() { 468 } 469 } __attribute__((aligned(4))); 470 471 #else 472 struct pthread_mutex_internal_t { 473 _Atomic(uint16_t) state; 474 union { 475 _Atomic(uint16_t) owner_tid; 476 uint16_t pi_mutex_id; 477 }; 478 479 PIMutex& ToPIMutex() { 480 return PIMutexAllocator::IdToPIMutex(pi_mutex_id); 481 } 482 483 void FreePIMutex() { 484 PIMutexAllocator::FreeId(pi_mutex_id); 485 } 486 } __attribute__((aligned(4))); 487 #endif 488 489 static_assert(sizeof(pthread_mutex_t) == sizeof(pthread_mutex_internal_t), 490 "pthread_mutex_t should actually be pthread_mutex_internal_t in implementation."); 491 492 // For binary compatibility with old version of pthread_mutex_t, we can't use more strict alignment 493 // than 4-byte alignment. 494 static_assert(alignof(pthread_mutex_t) == 4, 495 "pthread_mutex_t should fulfill the alignment of pthread_mutex_internal_t."); 496 497 static inline pthread_mutex_internal_t* __get_internal_mutex(pthread_mutex_t* mutex_interface) { 498 return reinterpret_cast<pthread_mutex_internal_t*>(mutex_interface); 499 } 500 501 int pthread_mutex_init(pthread_mutex_t* mutex_interface, const pthread_mutexattr_t* attr) { 502 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 503 504 memset(mutex, 0, sizeof(pthread_mutex_internal_t)); 505 506 if (__predict_true(attr == nullptr)) { 507 atomic_init(&mutex->state, MUTEX_TYPE_BITS_NORMAL); 508 return 0; 509 } 510 511 uint16_t state = 0; 512 if ((*attr & MUTEXATTR_SHARED_MASK) != 0) { 513 state |= MUTEX_SHARED_MASK; 514 } 515 516 switch (*attr & MUTEXATTR_TYPE_MASK) { 517 case PTHREAD_MUTEX_NORMAL: 518 state |= MUTEX_TYPE_BITS_NORMAL; 519 break; 520 case PTHREAD_MUTEX_RECURSIVE: 521 state |= MUTEX_TYPE_BITS_RECURSIVE; 522 break; 523 case PTHREAD_MUTEX_ERRORCHECK: 524 state |= MUTEX_TYPE_BITS_ERRORCHECK; 525 break; 526 default: 527 return EINVAL; 528 } 529 530 if (((*attr & MUTEXATTR_PROTOCOL_MASK) >> MUTEXATTR_PROTOCOL_SHIFT) == PTHREAD_PRIO_INHERIT) { 531 #if !defined(__LP64__) 532 if (state & MUTEX_SHARED_MASK) { 533 return EINVAL; 534 } 535 int id = PIMutexAllocator::AllocId(); 536 if (id == -1) { 537 return ENOMEM; 538 } 539 mutex->pi_mutex_id = id; 540 #endif 541 atomic_init(&mutex->state, PI_MUTEX_STATE); 542 PIMutex& pi_mutex = mutex->ToPIMutex(); 543 pi_mutex.type = *attr & MUTEXATTR_TYPE_MASK; 544 pi_mutex.shared = (*attr & MUTEXATTR_SHARED_MASK) != 0; 545 } else { 546 atomic_init(&mutex->state, state); 547 atomic_init(&mutex->owner_tid, 0); 548 } 549 return 0; 550 } 551 552 // namespace for Non-PI mutex routines. 553 namespace NonPI { 554 555 static inline __always_inline int NormalMutexTryLock(pthread_mutex_internal_t* mutex, 556 uint16_t shared) { 557 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; 558 const uint16_t locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; 559 560 uint16_t old_state = unlocked; 561 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 562 locked_uncontended, memory_order_acquire, memory_order_relaxed))) { 563 return 0; 564 } 565 return EBUSY; 566 } 567 568 /* 569 * Lock a normal Non-PI mutex. 570 * 571 * As noted above, there are three states: 572 * 0 (unlocked, no contention) 573 * 1 (locked, no contention) 574 * 2 (locked, contention) 575 * 576 * Non-recursive mutexes don't use the thread-id or counter fields, and the 577 * "type" value is zero, so the only bits that will be set are the ones in 578 * the lock state field. 579 */ 580 static inline __always_inline int NormalMutexLock(pthread_mutex_internal_t* mutex, 581 uint16_t shared, 582 bool use_realtime_clock, 583 const timespec* abs_timeout_or_null) { 584 if (__predict_true(NormalMutexTryLock(mutex, shared) == 0)) { 585 return 0; 586 } 587 int result = check_timespec(abs_timeout_or_null, true); 588 if (result != 0) { 589 return result; 590 } 591 592 ScopedTrace trace("Contending for pthread mutex"); 593 594 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; 595 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; 596 597 // We want to go to sleep until the mutex is available, which requires 598 // promoting it to locked_contended. We need to swap in the new state 599 // and then wait until somebody wakes us up. 600 // An atomic_exchange is used to compete with other threads for the lock. 601 // If it returns unlocked, we have acquired the lock, otherwise another 602 // thread still holds the lock and we should wait again. 603 // If lock is acquired, an acquire fence is needed to make all memory accesses 604 // made by other threads visible to the current CPU. 605 while (atomic_exchange_explicit(&mutex->state, locked_contended, 606 memory_order_acquire) != unlocked) { 607 if (__futex_wait_ex(&mutex->state, shared, locked_contended, use_realtime_clock, 608 abs_timeout_or_null) == -ETIMEDOUT) { 609 return ETIMEDOUT; 610 } 611 } 612 return 0; 613 } 614 615 /* 616 * Release a normal Non-PI mutex. The caller is responsible for determining 617 * that we are in fact the owner of this lock. 618 */ 619 static inline __always_inline void NormalMutexUnlock(pthread_mutex_internal_t* mutex, 620 uint16_t shared) { 621 const uint16_t unlocked = shared | MUTEX_STATE_BITS_UNLOCKED; 622 const uint16_t locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; 623 624 // We use an atomic_exchange to release the lock. If locked_contended state 625 // is returned, some threads is waiting for the lock and we need to wake up 626 // one of them. 627 // A release fence is required to make previous stores visible to next 628 // lock owner threads. 629 if (atomic_exchange_explicit(&mutex->state, unlocked, 630 memory_order_release) == locked_contended) { 631 // Wake up one waiting thread. We don't know which thread will be 632 // woken or when it'll start executing -- futexes make no guarantees 633 // here. There may not even be a thread waiting. 634 // 635 // The newly-woken thread will replace the unlocked state we just set above 636 // with locked_contended state, which means that when it eventually releases 637 // the mutex it will also call FUTEX_WAKE. This results in one extra wake 638 // call whenever a lock is contended, but let us avoid forgetting anyone 639 // without requiring us to track the number of sleepers. 640 // 641 // It's possible for another thread to sneak in and grab the lock between 642 // the exchange above and the wake call below. If the new thread is "slow" 643 // and holds the lock for a while, we'll wake up a sleeper, which will swap 644 // in locked_uncontended state and then go back to sleep since the lock is 645 // still held. If the new thread is "fast", running to completion before 646 // we call wake, the thread we eventually wake will find an unlocked mutex 647 // and will execute. Either way we have correct behavior and nobody is 648 // orphaned on the wait queue. 649 __futex_wake_ex(&mutex->state, shared, 1); 650 } 651 } 652 653 /* This common inlined function is used to increment the counter of a recursive Non-PI mutex. 654 * 655 * If the counter overflows, it will return EAGAIN. 656 * Otherwise, it atomically increments the counter and returns 0. 657 * 658 */ 659 static inline __always_inline int RecursiveIncrement(pthread_mutex_internal_t* mutex, 660 uint16_t old_state) { 661 // Detect recursive lock overflow and return EAGAIN. 662 // This is safe because only the owner thread can modify the 663 // counter bits in the mutex value. 664 if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(old_state)) { 665 return EAGAIN; 666 } 667 668 // Other threads are able to change the lower bits (e.g. promoting it to "contended"), 669 // but the mutex counter will not overflow. So we use atomic_fetch_add operation here. 670 // The mutex is already locked by current thread, so we don't need an acquire fence. 671 atomic_fetch_add_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed); 672 return 0; 673 } 674 675 // Wait on a recursive or errorcheck Non-PI mutex. 676 static inline __always_inline int RecursiveOrErrorcheckMutexWait(pthread_mutex_internal_t* mutex, 677 uint16_t shared, 678 uint16_t old_state, 679 bool use_realtime_clock, 680 const timespec* abs_timeout) { 681 // __futex_wait always waits on a 32-bit value. But state is 16-bit. For a normal mutex, the owner_tid 682 // field in mutex is not used. On 64-bit devices, the __pad field in mutex is not used. 683 // But when a recursive or errorcheck mutex is used on 32-bit devices, we need to add the 684 // owner_tid value in the value argument for __futex_wait, otherwise we may always get EAGAIN error. 685 686 #if defined(__LP64__) 687 return __futex_wait_ex(&mutex->state, shared, old_state, use_realtime_clock, abs_timeout); 688 689 #else 690 // This implementation works only when the layout of pthread_mutex_internal_t matches below expectation. 691 // And it is based on the assumption that Android is always in little-endian devices. 692 static_assert(offsetof(pthread_mutex_internal_t, state) == 0, ""); 693 static_assert(offsetof(pthread_mutex_internal_t, owner_tid) == 2, ""); 694 695 uint32_t owner_tid = atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed); 696 return __futex_wait_ex(&mutex->state, shared, (owner_tid << 16) | old_state, 697 use_realtime_clock, abs_timeout); 698 #endif 699 } 700 701 // Lock a Non-PI mutex. 702 static int MutexLockWithTimeout(pthread_mutex_internal_t* mutex, bool use_realtime_clock, 703 const timespec* abs_timeout_or_null) { 704 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 705 uint16_t mtype = (old_state & MUTEX_TYPE_MASK); 706 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 707 708 // Handle common case first. 709 if ( __predict_true(mtype == MUTEX_TYPE_BITS_NORMAL) ) { 710 return NormalMutexLock(mutex, shared, use_realtime_clock, abs_timeout_or_null); 711 } 712 713 // Do we already own this recursive or error-check mutex? 714 pid_t tid = __get_thread()->tid; 715 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) { 716 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) { 717 return EDEADLK; 718 } 719 return RecursiveIncrement(mutex, old_state); 720 } 721 722 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; 723 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; 724 const uint16_t locked_contended = mtype | shared | MUTEX_STATE_BITS_LOCKED_CONTENDED; 725 726 // First, if the mutex is unlocked, try to quickly acquire it. 727 // In the optimistic case where this works, set the state to locked_uncontended. 728 if (old_state == unlocked) { 729 // If exchanged successfully, an acquire fence is required to make 730 // all memory accesses made by other threads visible to the current CPU. 731 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 732 locked_uncontended, memory_order_acquire, memory_order_relaxed))) { 733 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); 734 return 0; 735 } 736 } 737 738 ScopedTrace trace("Contending for pthread mutex"); 739 740 while (true) { 741 if (old_state == unlocked) { 742 // NOTE: We put the state to locked_contended since we _know_ there 743 // is contention when we are in this loop. This ensures all waiters 744 // will be unlocked. 745 746 // If exchanged successfully, an acquire fence is required to make 747 // all memory accesses made by other threads visible to the current CPU. 748 if (__predict_true(atomic_compare_exchange_weak_explicit(&mutex->state, 749 &old_state, locked_contended, 750 memory_order_acquire, 751 memory_order_relaxed))) { 752 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); 753 return 0; 754 } 755 continue; 756 } else if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(old_state)) { 757 // We should set it to locked_contended beforing going to sleep. This can make 758 // sure waiters will be woken up eventually. 759 760 int new_state = MUTEX_STATE_BITS_FLIP_CONTENTION(old_state); 761 if (__predict_false(!atomic_compare_exchange_weak_explicit(&mutex->state, 762 &old_state, new_state, 763 memory_order_relaxed, 764 memory_order_relaxed))) { 765 continue; 766 } 767 old_state = new_state; 768 } 769 770 int result = check_timespec(abs_timeout_or_null, true); 771 if (result != 0) { 772 return result; 773 } 774 // We are in locked_contended state, sleep until someone wakes us up. 775 if (RecursiveOrErrorcheckMutexWait(mutex, shared, old_state, use_realtime_clock, 776 abs_timeout_or_null) == -ETIMEDOUT) { 777 return ETIMEDOUT; 778 } 779 old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 780 } 781 } 782 783 } // namespace NonPI 784 785 static inline __always_inline bool IsMutexDestroyed(uint16_t mutex_state) { 786 return mutex_state == 0xffff; 787 } 788 789 // Inlining this function in pthread_mutex_lock() adds the cost of stack frame instructions on 790 // ARM64. So make it noinline. 791 static int __attribute__((noinline)) HandleUsingDestroyedMutex(pthread_mutex_t* mutex, 792 const char* function_name) { 793 if (android_get_application_target_sdk_version() >= __ANDROID_API_P__) { 794 __fortify_fatal("%s called on a destroyed mutex (%p)", function_name, mutex); 795 } 796 return EBUSY; 797 } 798 799 int pthread_mutex_lock(pthread_mutex_t* mutex_interface) { 800 #if !defined(__LP64__) 801 // Some apps depend on being able to pass NULL as a mutex and get EINVAL 802 // back. Don't need to worry about it for LP64 since the ABI is brand new, 803 // but keep compatibility for LP32. http://b/19995172. 804 if (mutex_interface == nullptr) { 805 return EINVAL; 806 } 807 #endif 808 809 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 810 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 811 uint16_t mtype = (old_state & MUTEX_TYPE_MASK); 812 // Avoid slowing down fast path of normal mutex lock operation. 813 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { 814 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 815 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) { 816 return 0; 817 } 818 } 819 if (old_state == PI_MUTEX_STATE) { 820 PIMutex& m = mutex->ToPIMutex(); 821 // Handle common case first. 822 if (__predict_true(PIMutexTryLock(m) == 0)) { 823 return 0; 824 } 825 return PIMutexTimedLock(mutex->ToPIMutex(), false, nullptr); 826 } 827 if (__predict_false(IsMutexDestroyed(old_state))) { 828 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); 829 } 830 return NonPI::MutexLockWithTimeout(mutex, false, nullptr); 831 } 832 833 int pthread_mutex_unlock(pthread_mutex_t* mutex_interface) { 834 #if !defined(__LP64__) 835 // Some apps depend on being able to pass NULL as a mutex and get EINVAL 836 // back. Don't need to worry about it for LP64 since the ABI is brand new, 837 // but keep compatibility for LP32. http://b/19995172. 838 if (mutex_interface == nullptr) { 839 return EINVAL; 840 } 841 #endif 842 843 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 844 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 845 uint16_t mtype = (old_state & MUTEX_TYPE_MASK); 846 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 847 848 // Handle common case first. 849 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { 850 NonPI::NormalMutexUnlock(mutex, shared); 851 return 0; 852 } 853 if (old_state == PI_MUTEX_STATE) { 854 return PIMutexUnlock(mutex->ToPIMutex()); 855 } 856 if (__predict_false(IsMutexDestroyed(old_state))) { 857 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); 858 } 859 860 // Do we already own this recursive or error-check mutex? 861 pid_t tid = __get_thread()->tid; 862 if ( tid != atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed) ) { 863 return EPERM; 864 } 865 866 // If the counter is > 0, we can simply decrement it atomically. 867 // Since other threads can mutate the lower state bits (and only the 868 // lower state bits), use a compare_exchange loop to do it. 869 if (!MUTEX_COUNTER_BITS_IS_ZERO(old_state)) { 870 // We still own the mutex, so a release fence is not needed. 871 atomic_fetch_sub_explicit(&mutex->state, MUTEX_COUNTER_BITS_ONE, memory_order_relaxed); 872 return 0; 873 } 874 875 // The counter is 0, so we'are going to unlock the mutex by resetting its 876 // state to unlocked, we need to perform a atomic_exchange inorder to read 877 // the current state, which will be locked_contended if there may have waiters 878 // to awake. 879 // A release fence is required to make previous stores visible to next 880 // lock owner threads. 881 atomic_store_explicit(&mutex->owner_tid, 0, memory_order_relaxed); 882 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; 883 old_state = atomic_exchange_explicit(&mutex->state, unlocked, memory_order_release); 884 if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(old_state)) { 885 __futex_wake_ex(&mutex->state, shared, 1); 886 } 887 888 return 0; 889 } 890 891 int pthread_mutex_trylock(pthread_mutex_t* mutex_interface) { 892 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 893 894 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 895 uint16_t mtype = (old_state & MUTEX_TYPE_MASK); 896 897 // Handle common case first. 898 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { 899 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 900 return NonPI::NormalMutexTryLock(mutex, shared); 901 } 902 if (old_state == PI_MUTEX_STATE) { 903 return PIMutexTryLock(mutex->ToPIMutex()); 904 } 905 if (__predict_false(IsMutexDestroyed(old_state))) { 906 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); 907 } 908 909 // Do we already own this recursive or error-check mutex? 910 pid_t tid = __get_thread()->tid; 911 if (tid == atomic_load_explicit(&mutex->owner_tid, memory_order_relaxed)) { 912 if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) { 913 return EBUSY; 914 } 915 return NonPI::RecursiveIncrement(mutex, old_state); 916 } 917 918 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 919 const uint16_t unlocked = mtype | shared | MUTEX_STATE_BITS_UNLOCKED; 920 const uint16_t locked_uncontended = mtype | shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; 921 922 // Same as pthread_mutex_lock, except that we don't want to wait, and 923 // the only operation that can succeed is a single compare_exchange to acquire the 924 // lock if it is released / not owned by anyone. No need for a complex loop. 925 // If exchanged successfully, an acquire fence is required to make 926 // all memory accesses made by other threads visible to the current CPU. 927 old_state = unlocked; 928 if (__predict_true(atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 929 locked_uncontended, 930 memory_order_acquire, 931 memory_order_relaxed))) { 932 atomic_store_explicit(&mutex->owner_tid, tid, memory_order_relaxed); 933 return 0; 934 } 935 return EBUSY; 936 } 937 938 #if !defined(__LP64__) 939 extern "C" int pthread_mutex_lock_timeout_np(pthread_mutex_t* mutex_interface, unsigned ms) { 940 timespec ts; 941 timespec_from_ms(ts, ms); 942 timespec abs_timeout; 943 absolute_timespec_from_timespec(abs_timeout, ts, CLOCK_MONOTONIC); 944 int error = NonPI::MutexLockWithTimeout(__get_internal_mutex(mutex_interface), false, 945 &abs_timeout); 946 if (error == ETIMEDOUT) { 947 error = EBUSY; 948 } 949 return error; 950 } 951 #endif 952 953 static int __pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, bool use_realtime_clock, 954 const timespec* abs_timeout, const char* function) { 955 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 956 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 957 uint16_t mtype = (old_state & MUTEX_TYPE_MASK); 958 // Handle common case first. 959 if (__predict_true(mtype == MUTEX_TYPE_BITS_NORMAL)) { 960 uint16_t shared = (old_state & MUTEX_SHARED_MASK); 961 if (__predict_true(NonPI::NormalMutexTryLock(mutex, shared) == 0)) { 962 return 0; 963 } 964 } 965 if (old_state == PI_MUTEX_STATE) { 966 return PIMutexTimedLock(mutex->ToPIMutex(), use_realtime_clock, abs_timeout); 967 } 968 if (__predict_false(IsMutexDestroyed(old_state))) { 969 return HandleUsingDestroyedMutex(mutex_interface, function); 970 } 971 return NonPI::MutexLockWithTimeout(mutex, use_realtime_clock, abs_timeout); 972 } 973 974 int pthread_mutex_timedlock(pthread_mutex_t* mutex_interface, const struct timespec* abs_timeout) { 975 return __pthread_mutex_timedlock(mutex_interface, true, abs_timeout, __FUNCTION__); 976 } 977 978 int pthread_mutex_timedlock_monotonic_np(pthread_mutex_t* mutex_interface, 979 const struct timespec* abs_timeout) { 980 return __pthread_mutex_timedlock(mutex_interface, false, abs_timeout, __FUNCTION__); 981 } 982 983 int pthread_mutex_destroy(pthread_mutex_t* mutex_interface) { 984 pthread_mutex_internal_t* mutex = __get_internal_mutex(mutex_interface); 985 uint16_t old_state = atomic_load_explicit(&mutex->state, memory_order_relaxed); 986 if (__predict_false(IsMutexDestroyed(old_state))) { 987 return HandleUsingDestroyedMutex(mutex_interface, __FUNCTION__); 988 } 989 if (old_state == PI_MUTEX_STATE) { 990 int result = PIMutexDestroy(mutex->ToPIMutex()); 991 if (result == 0) { 992 mutex->FreePIMutex(); 993 atomic_store(&mutex->state, 0xffff); 994 } 995 return result; 996 } 997 // Store 0xffff to make the mutex unusable. Although POSIX standard says it is undefined 998 // behavior to destroy a locked mutex, we prefer not to change mutex->state in that situation. 999 if (MUTEX_STATE_BITS_IS_UNLOCKED(old_state) && 1000 atomic_compare_exchange_strong_explicit(&mutex->state, &old_state, 0xffff, 1001 memory_order_relaxed, memory_order_relaxed)) { 1002 return 0; 1003 } 1004 return EBUSY; 1005 } 1006