1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <errno.h> 30 #include <stdatomic.h> 31 #include <string.h> 32 33 #include "pthread_internal.h" 34 #include "private/bionic_futex.h" 35 #include "private/bionic_lock.h" 36 #include "private/bionic_time_conversions.h" 37 38 /* Technical note: 39 * 40 * Possible states of a read/write lock: 41 * 42 * - no readers and no writer (unlocked) 43 * - one or more readers sharing the lock at the same time (read-locked) 44 * - one writer holding the lock (write-lock) 45 * 46 * Additionally: 47 * - trying to get the write-lock while there are any readers blocks 48 * - trying to get the read-lock while there is a writer blocks 49 * - a single thread can acquire the lock multiple times in read mode 50 * 51 * - Posix states that behavior is undefined (may deadlock) if a thread tries 52 * to acquire the lock 53 * - in write mode while already holding the lock (whether in read or write mode) 54 * - in read mode while already holding the lock in write mode. 55 * - This implementation will return EDEADLK in "write after write" and "read after 56 * write" cases and will deadlock in write after read case. 57 * 58 */ 59 60 // A rwlockattr is implemented as a 32-bit integer which has following fields: 61 // bits name description 62 // 1 rwlock_kind have rwlock preference like PTHREAD_RWLOCK_PREFER_READER_NP. 63 // 0 process_shared set to 1 if the rwlock is shared between processes. 64 65 #define RWLOCKATTR_PSHARED_SHIFT 0 66 #define RWLOCKATTR_KIND_SHIFT 1 67 68 #define RWLOCKATTR_PSHARED_MASK 1 69 #define RWLOCKATTR_KIND_MASK 2 70 #define RWLOCKATTR_RESERVED_MASK (~3) 71 72 static inline __always_inline __always_inline bool __rwlockattr_getpshared(const pthread_rwlockattr_t* attr) { 73 return (*attr & RWLOCKATTR_PSHARED_MASK) >> RWLOCKATTR_PSHARED_SHIFT; 74 } 75 76 static inline __always_inline __always_inline void __rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) { 77 *attr = (*attr & ~RWLOCKATTR_PSHARED_MASK) | (pshared << RWLOCKATTR_PSHARED_SHIFT); 78 } 79 80 static inline __always_inline int __rwlockattr_getkind(const pthread_rwlockattr_t* attr) { 81 return (*attr & RWLOCKATTR_KIND_MASK) >> RWLOCKATTR_KIND_SHIFT; 82 } 83 84 static inline __always_inline void __rwlockattr_setkind(pthread_rwlockattr_t* attr, int kind) { 85 *attr = (*attr & ~RWLOCKATTR_KIND_MASK) | (kind << RWLOCKATTR_KIND_SHIFT); 86 } 87 88 89 int pthread_rwlockattr_init(pthread_rwlockattr_t* attr) { 90 *attr = 0; 91 return 0; 92 } 93 94 int pthread_rwlockattr_destroy(pthread_rwlockattr_t* attr) { 95 *attr = -1; 96 return 0; 97 } 98 99 int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* attr, int* pshared) { 100 if (__rwlockattr_getpshared(attr)) { 101 *pshared = PTHREAD_PROCESS_SHARED; 102 } else { 103 *pshared = PTHREAD_PROCESS_PRIVATE; 104 } 105 return 0; 106 } 107 108 int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* attr, int pshared) { 109 switch (pshared) { 110 case PTHREAD_PROCESS_PRIVATE: 111 __rwlockattr_setpshared(attr, 0); 112 return 0; 113 case PTHREAD_PROCESS_SHARED: 114 __rwlockattr_setpshared(attr, 1); 115 return 0; 116 default: 117 return EINVAL; 118 } 119 } 120 121 int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* attr, int* pref) { 122 *pref = __rwlockattr_getkind(attr); 123 return 0; 124 } 125 126 int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* attr, int pref) { 127 switch (pref) { 128 case PTHREAD_RWLOCK_PREFER_READER_NP: // Fall through. 129 case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP: 130 __rwlockattr_setkind(attr, pref); 131 return 0; 132 default: 133 return EINVAL; 134 } 135 } 136 137 // A rwlock state is implemented as a 32-bit integer which has following rules: 138 // bits name description 139 // 31 owned_by_writer_flag set to 1 if the lock is owned by a writer now. 140 // 30-2 reader_count the count of readers holding the lock. 141 // 1 have_pending_writers set to 1 if having pending writers. 142 // 0 have_pending_readers set to 1 if having pending readers. 143 144 #define STATE_HAVE_PENDING_READERS_SHIFT 0 145 #define STATE_HAVE_PENDING_WRITERS_SHIFT 1 146 #define STATE_READER_COUNT_SHIFT 2 147 #define STATE_OWNED_BY_WRITER_SHIFT 31 148 149 #define STATE_HAVE_PENDING_READERS_FLAG (1 << STATE_HAVE_PENDING_READERS_SHIFT) 150 #define STATE_HAVE_PENDING_WRITERS_FLAG (1 << STATE_HAVE_PENDING_WRITERS_SHIFT) 151 #define STATE_READER_COUNT_CHANGE_STEP (1 << STATE_READER_COUNT_SHIFT) 152 #define STATE_OWNED_BY_WRITER_FLAG (1 << STATE_OWNED_BY_WRITER_SHIFT) 153 154 #define STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG \ 155 (STATE_HAVE_PENDING_READERS_FLAG | STATE_HAVE_PENDING_WRITERS_FLAG) 156 157 struct pthread_rwlock_internal_t { 158 atomic_int state; 159 atomic_int writer_tid; 160 161 bool pshared; 162 bool writer_nonrecursive_preferred; 163 uint16_t __pad; 164 165 // When a reader thread plans to suspend on the rwlock, it will add STATE_HAVE_PENDING_READERS_FLAG 166 // in state, increase pending_reader_count, and wait on pending_reader_wakeup_serial. After woken 167 // up, the reader thread decreases pending_reader_count, and the last pending reader thread should 168 // remove STATE_HAVE_PENDING_READERS_FLAG in state. A pending writer thread works in a similar way, 169 // except that it uses flag and members for writer threads. 170 171 Lock pending_lock; // All pending members below are protected by pending_lock. 172 uint32_t pending_reader_count; // Count of pending reader threads. 173 uint32_t pending_writer_count; // Count of pending writer threads. 174 uint32_t pending_reader_wakeup_serial; // Pending reader threads wait on this address by futex_wait. 175 uint32_t pending_writer_wakeup_serial; // Pending writer threads wait on this address by futex_wait. 176 177 #if defined(__LP64__) 178 char __reserved[20]; 179 #else 180 char __reserved[4]; 181 #endif 182 }; 183 184 static inline __always_inline bool __state_owned_by_writer(int state) { 185 return state < 0; 186 } 187 188 static inline __always_inline bool __state_owned_by_readers(int state) { 189 // If state >= 0, the owned_by_writer_flag is not set. 190 // And if state >= STATE_READER_COUNT_CHANGE_STEP, the reader_count field is not empty. 191 return state >= STATE_READER_COUNT_CHANGE_STEP; 192 } 193 194 static inline __always_inline bool __state_owned_by_readers_or_writer(int state) { 195 return state < 0 || state >= STATE_READER_COUNT_CHANGE_STEP; 196 } 197 198 static inline __always_inline int __state_add_writer_flag(int state) { 199 return state | STATE_OWNED_BY_WRITER_FLAG; 200 } 201 202 static inline __always_inline bool __state_is_last_reader(int state) { 203 return (state >> STATE_READER_COUNT_SHIFT) == 1; 204 } 205 206 static inline __always_inline bool __state_have_pending_writers(int state) { 207 return state & STATE_HAVE_PENDING_WRITERS_FLAG; 208 } 209 210 static inline __always_inline bool __state_have_pending_readers_or_writers(int state) { 211 return state & STATE_HAVE_PENDING_READERS_OR_WRITERS_FLAG; 212 } 213 214 static_assert(sizeof(pthread_rwlock_t) == sizeof(pthread_rwlock_internal_t), 215 "pthread_rwlock_t should actually be pthread_rwlock_internal_t in implementation."); 216 217 // For binary compatibility with old version of pthread_rwlock_t, we can't use more strict 218 // alignment than 4-byte alignment. 219 static_assert(alignof(pthread_rwlock_t) == 4, 220 "pthread_rwlock_t should fulfill the alignment requirement of pthread_rwlock_internal_t."); 221 222 static inline __always_inline pthread_rwlock_internal_t* __get_internal_rwlock(pthread_rwlock_t* rwlock_interface) { 223 return reinterpret_cast<pthread_rwlock_internal_t*>(rwlock_interface); 224 } 225 226 int pthread_rwlock_init(pthread_rwlock_t* rwlock_interface, const pthread_rwlockattr_t* attr) { 227 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 228 229 memset(rwlock, 0, sizeof(pthread_rwlock_internal_t)); 230 231 if (__predict_false(attr != nullptr)) { 232 rwlock->pshared = __rwlockattr_getpshared(attr); 233 int kind = __rwlockattr_getkind(attr); 234 switch (kind) { 235 case PTHREAD_RWLOCK_PREFER_READER_NP: 236 rwlock->writer_nonrecursive_preferred = false; 237 break; 238 case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP: 239 rwlock->writer_nonrecursive_preferred = true; 240 break; 241 default: 242 return EINVAL; 243 } 244 if ((*attr & RWLOCKATTR_RESERVED_MASK) != 0) { 245 return EINVAL; 246 } 247 } 248 249 atomic_init(&rwlock->state, 0); 250 rwlock->pending_lock.init(rwlock->pshared); 251 return 0; 252 } 253 254 int pthread_rwlock_destroy(pthread_rwlock_t* rwlock_interface) { 255 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 256 257 if (atomic_load_explicit(&rwlock->state, memory_order_relaxed) != 0) { 258 return EBUSY; 259 } 260 return 0; 261 } 262 263 static inline __always_inline bool __can_acquire_read_lock(int old_state, 264 bool writer_nonrecursive_preferred) { 265 // If writer is preferred with nonrecursive reader, we prevent further readers from acquiring 266 // the lock when there are writers waiting for the lock. 267 bool cannot_apply = __state_owned_by_writer(old_state) || 268 (writer_nonrecursive_preferred && __state_have_pending_writers(old_state)); 269 return !cannot_apply; 270 } 271 272 static inline __always_inline int __pthread_rwlock_tryrdlock(pthread_rwlock_internal_t* rwlock) { 273 int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); 274 275 while (__predict_true(__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred))) { 276 277 int new_state = old_state + STATE_READER_COUNT_CHANGE_STEP; 278 if (__predict_false(!__state_owned_by_readers(new_state))) { // Happens when reader count overflows. 279 return EAGAIN; 280 } 281 if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, new_state, 282 memory_order_acquire, memory_order_relaxed))) { 283 return 0; 284 } 285 } 286 return EBUSY; 287 } 288 289 static int __pthread_rwlock_timedrdlock(pthread_rwlock_internal_t* rwlock, bool use_realtime_clock, 290 const timespec* abs_timeout_or_null) { 291 if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) { 292 return EDEADLK; 293 } 294 295 while (true) { 296 int result = __pthread_rwlock_tryrdlock(rwlock); 297 if (result == 0 || result == EAGAIN) { 298 return result; 299 } 300 result = check_timespec(abs_timeout_or_null, true); 301 if (result != 0) { 302 return result; 303 } 304 305 int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); 306 if (__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) { 307 continue; 308 } 309 310 rwlock->pending_lock.lock(); 311 rwlock->pending_reader_count++; 312 313 // We rely on the fact that all atomic exchange operations on the same object (here it is 314 // rwlock->state) always appear to occur in a single total order. If the pending flag is added 315 // before unlocking, the unlocking thread will wakeup the waiter. Otherwise, we will see the 316 // state is unlocked and will not wait anymore. 317 old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_READERS_FLAG, 318 memory_order_relaxed); 319 320 int old_serial = rwlock->pending_reader_wakeup_serial; 321 rwlock->pending_lock.unlock(); 322 323 int futex_result = 0; 324 if (!__can_acquire_read_lock(old_state, rwlock->writer_nonrecursive_preferred)) { 325 futex_result = __futex_wait_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, 326 old_serial, use_realtime_clock, abs_timeout_or_null); 327 } 328 329 rwlock->pending_lock.lock(); 330 rwlock->pending_reader_count--; 331 if (rwlock->pending_reader_count == 0) { 332 atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_READERS_FLAG, 333 memory_order_relaxed); 334 } 335 rwlock->pending_lock.unlock(); 336 337 if (futex_result == -ETIMEDOUT) { 338 return ETIMEDOUT; 339 } 340 } 341 } 342 343 static inline __always_inline bool __can_acquire_write_lock(int old_state) { 344 return !__state_owned_by_readers_or_writer(old_state); 345 } 346 347 static inline __always_inline int __pthread_rwlock_trywrlock(pthread_rwlock_internal_t* rwlock) { 348 int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); 349 350 while (__predict_true(__can_acquire_write_lock(old_state))) { 351 if (__predict_true(atomic_compare_exchange_weak_explicit(&rwlock->state, &old_state, 352 __state_add_writer_flag(old_state), memory_order_acquire, memory_order_relaxed))) { 353 354 atomic_store_explicit(&rwlock->writer_tid, __get_thread()->tid, memory_order_relaxed); 355 return 0; 356 } 357 } 358 return EBUSY; 359 } 360 361 static int __pthread_rwlock_timedwrlock(pthread_rwlock_internal_t* rwlock, bool use_realtime_clock, 362 const timespec* abs_timeout_or_null) { 363 if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) == __get_thread()->tid) { 364 return EDEADLK; 365 } 366 while (true) { 367 int result = __pthread_rwlock_trywrlock(rwlock); 368 if (result == 0) { 369 return result; 370 } 371 result = check_timespec(abs_timeout_or_null, true); 372 if (result != 0) { 373 return result; 374 } 375 376 int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); 377 if (__can_acquire_write_lock(old_state)) { 378 continue; 379 } 380 381 rwlock->pending_lock.lock(); 382 rwlock->pending_writer_count++; 383 384 old_state = atomic_fetch_or_explicit(&rwlock->state, STATE_HAVE_PENDING_WRITERS_FLAG, 385 memory_order_relaxed); 386 387 int old_serial = rwlock->pending_writer_wakeup_serial; 388 rwlock->pending_lock.unlock(); 389 390 int futex_result = 0; 391 if (!__can_acquire_write_lock(old_state)) { 392 futex_result = __futex_wait_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 393 old_serial, use_realtime_clock, abs_timeout_or_null); 394 } 395 396 rwlock->pending_lock.lock(); 397 rwlock->pending_writer_count--; 398 if (rwlock->pending_writer_count == 0) { 399 atomic_fetch_and_explicit(&rwlock->state, ~STATE_HAVE_PENDING_WRITERS_FLAG, 400 memory_order_relaxed); 401 } 402 rwlock->pending_lock.unlock(); 403 404 if (futex_result == -ETIMEDOUT) { 405 return ETIMEDOUT; 406 } 407 } 408 } 409 410 int pthread_rwlock_rdlock(pthread_rwlock_t* rwlock_interface) { 411 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 412 // Avoid slowing down fast path of rdlock. 413 if (__predict_true(__pthread_rwlock_tryrdlock(rwlock) == 0)) { 414 return 0; 415 } 416 return __pthread_rwlock_timedrdlock(rwlock, false, nullptr); 417 } 418 419 int pthread_rwlock_timedrdlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) { 420 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 421 422 return __pthread_rwlock_timedrdlock(rwlock, true, abs_timeout); 423 } 424 425 int pthread_rwlock_timedrdlock_monotonic_np(pthread_rwlock_t* rwlock_interface, 426 const timespec* abs_timeout) { 427 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 428 429 return __pthread_rwlock_timedrdlock(rwlock, false, abs_timeout); 430 } 431 432 int pthread_rwlock_tryrdlock(pthread_rwlock_t* rwlock_interface) { 433 return __pthread_rwlock_tryrdlock(__get_internal_rwlock(rwlock_interface)); 434 } 435 436 int pthread_rwlock_wrlock(pthread_rwlock_t* rwlock_interface) { 437 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 438 // Avoid slowing down fast path of wrlock. 439 if (__predict_true(__pthread_rwlock_trywrlock(rwlock) == 0)) { 440 return 0; 441 } 442 return __pthread_rwlock_timedwrlock(rwlock, false, nullptr); 443 } 444 445 int pthread_rwlock_timedwrlock(pthread_rwlock_t* rwlock_interface, const timespec* abs_timeout) { 446 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 447 448 return __pthread_rwlock_timedwrlock(rwlock, true, abs_timeout); 449 } 450 451 int pthread_rwlock_timedwrlock_monotonic_np(pthread_rwlock_t* rwlock_interface, 452 const timespec* abs_timeout) { 453 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 454 455 return __pthread_rwlock_timedwrlock(rwlock, false, abs_timeout); 456 } 457 458 int pthread_rwlock_trywrlock(pthread_rwlock_t* rwlock_interface) { 459 return __pthread_rwlock_trywrlock(__get_internal_rwlock(rwlock_interface)); 460 } 461 462 int pthread_rwlock_unlock(pthread_rwlock_t* rwlock_interface) { 463 pthread_rwlock_internal_t* rwlock = __get_internal_rwlock(rwlock_interface); 464 465 int old_state = atomic_load_explicit(&rwlock->state, memory_order_relaxed); 466 if (__state_owned_by_writer(old_state)) { 467 if (atomic_load_explicit(&rwlock->writer_tid, memory_order_relaxed) != __get_thread()->tid) { 468 return EPERM; 469 } 470 atomic_store_explicit(&rwlock->writer_tid, 0, memory_order_relaxed); 471 old_state = atomic_fetch_and_explicit(&rwlock->state, ~STATE_OWNED_BY_WRITER_FLAG, 472 memory_order_release); 473 if (!__state_have_pending_readers_or_writers(old_state)) { 474 return 0; 475 } 476 477 } else if (__state_owned_by_readers(old_state)) { 478 old_state = atomic_fetch_sub_explicit(&rwlock->state, STATE_READER_COUNT_CHANGE_STEP, 479 memory_order_release); 480 if (!__state_is_last_reader(old_state) || !__state_have_pending_readers_or_writers(old_state)) { 481 return 0; 482 } 483 484 } else { 485 return EPERM; 486 } 487 488 // Wake up pending readers or writers. 489 rwlock->pending_lock.lock(); 490 if (rwlock->pending_writer_count != 0) { 491 rwlock->pending_writer_wakeup_serial++; 492 rwlock->pending_lock.unlock(); 493 494 __futex_wake_ex(&rwlock->pending_writer_wakeup_serial, rwlock->pshared, 1); 495 496 } else if (rwlock->pending_reader_count != 0) { 497 rwlock->pending_reader_wakeup_serial++; 498 rwlock->pending_lock.unlock(); 499 500 __futex_wake_ex(&rwlock->pending_reader_wakeup_serial, rwlock->pshared, INT_MAX); 501 502 } else { 503 // It happens when waiters are woken up by timeout. 504 rwlock->pending_lock.unlock(); 505 } 506 return 0; 507 } 508