Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <pthread.h>
     30 
     31 #include <errno.h>
     32 #include <string.h>
     33 #include <sys/mman.h>
     34 #include <sys/prctl.h>
     35 #include <sys/random.h>
     36 #include <unistd.h>
     37 
     38 #include "pthread_internal.h"
     39 
     40 #include <async_safe/log.h>
     41 
     42 #include "private/bionic_constants.h"
     43 #include "private/bionic_defs.h"
     44 #include "private/bionic_globals.h"
     45 #include "private/bionic_macros.h"
     46 #include "private/bionic_ssp.h"
     47 #include "private/bionic_systrace.h"
     48 #include "private/bionic_tls.h"
     49 #include "private/ErrnoRestorer.h"
     50 
     51 // x86 uses segment descriptors rather than a direct pointer to TLS.
     52 #if defined(__i386__)
     53 #include <asm/ldt.h>
     54 void __init_user_desc(struct user_desc*, bool, void*);
     55 #endif
     56 
     57 // This code is used both by each new pthread and the code that initializes the main thread.
     58 __attribute__((no_stack_protector))
     59 void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
     60 #ifdef TLS_SLOT_SELF
     61   // On x86, slot 0 must point to itself so code can read the thread pointer by
     62   // loading %fs:0 or %gs:0.
     63   tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
     64 #endif
     65   tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
     66 }
     67 
     68 __attribute__((no_stack_protector))
     69 void __init_tcb_stack_guard(bionic_tcb* tcb) {
     70   // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
     71   tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
     72 }
     73 
     74 __attribute__((no_stack_protector))
     75 void __init_tcb_dtv(bionic_tcb* tcb) {
     76   // Initialize the DTV slot to a statically-allocated empty DTV. The first
     77   // access to a dynamic TLS variable allocates a new DTV.
     78   static const TlsDtv zero_dtv = {};
     79   __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
     80 }
     81 
     82 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
     83   tcb->thread()->bionic_tls = tls;
     84   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
     85 }
     86 
     87 // Allocate a temporary bionic_tls that the dynamic linker's main thread can
     88 // use while it's loading the initial set of ELF modules.
     89 bionic_tls* __allocate_temp_bionic_tls() {
     90   size_t allocation_size = __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE);
     91   void* allocation = mmap(nullptr, allocation_size,
     92                           PROT_READ | PROT_WRITE,
     93                           MAP_PRIVATE | MAP_ANONYMOUS,
     94                           -1, 0);
     95   if (allocation == MAP_FAILED) {
     96     // Avoid strerror because it might need bionic_tls.
     97     async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
     98   }
     99   return static_cast<bionic_tls*>(allocation);
    100 }
    101 
    102 void __free_temp_bionic_tls(bionic_tls* tls) {
    103   munmap(tls, __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE));
    104 }
    105 
    106 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
    107   // Create and set an alternate signal stack.
    108   void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    109   if (stack_base != MAP_FAILED) {
    110     // Create a guard to catch stack overflows in signal handlers.
    111     if (mprotect(stack_base, PTHREAD_GUARD_SIZE, PROT_NONE) == -1) {
    112       munmap(stack_base, SIGNAL_STACK_SIZE);
    113       return;
    114     }
    115     stack_t ss;
    116     ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PTHREAD_GUARD_SIZE;
    117     ss.ss_size = SIGNAL_STACK_SIZE - PTHREAD_GUARD_SIZE;
    118     ss.ss_flags = 0;
    119     sigaltstack(&ss, nullptr);
    120     thread->alternate_signal_stack = stack_base;
    121 
    122     // We can only use const static allocated string for mapped region name, as Android kernel
    123     // uses the string pointer directly when dumping /proc/pid/maps.
    124     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
    125   }
    126 }
    127 
    128 static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
    129 #ifdef __aarch64__
    130   // Allocate the stack and the guard region.
    131   char* scs_guard_region = reinterpret_cast<char*>(
    132       mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
    133   thread->shadow_call_stack_guard_region = scs_guard_region;
    134 
    135   // The address is aligned to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
    136   // in jmp_buf.
    137   char* scs_aligned_guard_region =
    138       reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
    139 
    140   // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
    141   // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
    142   // use arc4random_uniform in init because /dev/urandom might not have been created yet.
    143   size_t scs_offset =
    144       (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
    145 
    146   // Make the stack readable and writable and store its address in register x18. This is
    147   // deliberately the only place where the address is stored.
    148   char *scs = scs_aligned_guard_region + scs_offset;
    149   mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
    150   __asm__ __volatile__("mov x18, %0" ::"r"(scs));
    151 #endif
    152 }
    153 
    154 void __init_additional_stacks(pthread_internal_t* thread) {
    155   __init_alternate_signal_stack(thread);
    156   __init_shadow_call_stack(thread);
    157 }
    158 
    159 int __init_thread(pthread_internal_t* thread) {
    160   thread->cleanup_stack = nullptr;
    161 
    162   if (__predict_true((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) == 0)) {
    163     atomic_init(&thread->join_state, THREAD_NOT_JOINED);
    164   } else {
    165     atomic_init(&thread->join_state, THREAD_DETACHED);
    166   }
    167 
    168   // Set the scheduling policy/priority of the thread if necessary.
    169   bool need_set = true;
    170   int policy;
    171   sched_param param;
    172   if ((thread->attr.flags & PTHREAD_ATTR_FLAG_INHERIT) != 0) {
    173     // Unless the parent has SCHED_RESET_ON_FORK set, we've already inherited from the parent.
    174     policy = sched_getscheduler(0);
    175     need_set = ((policy & SCHED_RESET_ON_FORK) != 0);
    176     if (need_set) {
    177       if (policy == -1) {
    178         async_safe_format_log(ANDROID_LOG_WARN, "libc",
    179                               "pthread_create sched_getscheduler failed: %s", strerror(errno));
    180         return errno;
    181       }
    182       if (sched_getparam(0, &param) == -1) {
    183         async_safe_format_log(ANDROID_LOG_WARN, "libc",
    184                               "pthread_create sched_getparam failed: %s", strerror(errno));
    185         return errno;
    186       }
    187     }
    188   } else {
    189     policy = thread->attr.sched_policy;
    190     param.sched_priority = thread->attr.sched_priority;
    191   }
    192   // Backwards compatibility: before P, Android didn't have pthread_attr_setinheritsched,
    193   // and our behavior was neither of the POSIX behaviors.
    194   if ((thread->attr.flags & (PTHREAD_ATTR_FLAG_INHERIT|PTHREAD_ATTR_FLAG_EXPLICIT)) == 0) {
    195     need_set = (thread->attr.sched_policy != SCHED_NORMAL);
    196   }
    197   if (need_set) {
    198     if (sched_setscheduler(thread->tid, policy, &param) == -1) {
    199       async_safe_format_log(ANDROID_LOG_WARN, "libc",
    200                             "pthread_create sched_setscheduler(%d, {%d}) call failed: %s", policy,
    201                             param.sched_priority, strerror(errno));
    202 #if defined(__LP64__)
    203       // For backwards compatibility reasons, we only report failures on 64-bit devices.
    204       return errno;
    205 #endif
    206     }
    207   }
    208 
    209   return 0;
    210 }
    211 
    212 
    213 // Allocate a thread's primary mapping. This mapping includes static TLS and
    214 // optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
    215 // struct.
    216 //
    217 // The stack_guard_size must be a multiple of the PAGE_SIZE.
    218 ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
    219   const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
    220 
    221   // Allocate in order: stack guard, stack, static TLS, guard page.
    222   size_t mmap_size;
    223   if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
    224   if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
    225   if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
    226 
    227   // Align the result to a page size.
    228   const size_t unaligned_size = mmap_size;
    229   mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
    230   if (mmap_size < unaligned_size) return {};
    231 
    232   // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
    233   // read+write area in the middle.
    234   const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
    235   char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
    236   if (space == MAP_FAILED) {
    237     async_safe_format_log(ANDROID_LOG_WARN,
    238                           "libc",
    239                           "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
    240                           mmap_size, strerror(errno));
    241     return {};
    242   }
    243   const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
    244   if (mprotect(space + stack_guard_size,
    245                writable_size,
    246                PROT_READ | PROT_WRITE) != 0) {
    247     async_safe_format_log(ANDROID_LOG_WARN, "libc",
    248                           "pthread_create failed: couldn't mprotect R+W %zu-byte thread mapping region: %s",
    249                           writable_size, strerror(errno));
    250     munmap(space, mmap_size);
    251     return {};
    252   }
    253 
    254   ThreadMapping result = {};
    255   result.mmap_base = space;
    256   result.mmap_size = mmap_size;
    257   result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
    258   result.stack_base = space;
    259   result.stack_top = result.static_tls;
    260   return result;
    261 }
    262 
    263 static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
    264   ThreadMapping mapping;
    265   char* stack_top;
    266   bool stack_clean = false;
    267 
    268   if (attr->stack_base == nullptr) {
    269     // The caller didn't provide a stack, so allocate one.
    270 
    271     // Make sure the guard size is a multiple of PAGE_SIZE.
    272     const size_t unaligned_guard_size = attr->guard_size;
    273     attr->guard_size = __BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
    274     if (attr->guard_size < unaligned_guard_size) return EAGAIN;
    275 
    276     mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
    277     if (mapping.mmap_base == nullptr) return EAGAIN;
    278 
    279     stack_top = mapping.stack_top;
    280     attr->stack_base = mapping.stack_base;
    281     stack_clean = true;
    282   } else {
    283     mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
    284     if (mapping.mmap_base == nullptr) return EAGAIN;
    285 
    286     stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
    287   }
    288 
    289   // Carve out space from the stack for the thread's pthread_internal_t. This
    290   // memory isn't counted in pthread_attr_getstacksize.
    291 
    292   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
    293   stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
    294 
    295   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
    296   if (!stack_clean) {
    297     // If thread was not allocated by mmap(), it may not have been cleared to zero.
    298     // So assume the worst and zero it.
    299     memset(thread, 0, sizeof(pthread_internal_t));
    300   }
    301 
    302   // Locate static TLS structures within the mapped region.
    303   const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
    304   auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
    305   auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
    306 
    307   // Initialize TLS memory.
    308   __init_static_tls(mapping.static_tls);
    309   __init_tcb(tcb, thread);
    310   __init_tcb_dtv(tcb);
    311   __init_tcb_stack_guard(tcb);
    312   __init_bionic_tls_ptrs(tcb, tls);
    313 
    314   attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
    315   thread->attr = *attr;
    316   thread->mmap_base = mapping.mmap_base;
    317   thread->mmap_size = mapping.mmap_size;
    318 
    319   *tcbp = tcb;
    320   *child_stack = stack_top;
    321   return 0;
    322 }
    323 
    324 __attribute__((no_sanitize("hwaddress")))
    325 static int __pthread_start(void* arg) {
    326   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
    327 
    328   __hwasan_thread_enter();
    329 
    330   // Wait for our creating thread to release us. This lets it have time to
    331   // notify gdb about this thread before we start doing anything.
    332   // This also provides the memory barrier needed to ensure that all memory
    333   // accesses previously made by the creating thread are visible to us.
    334   thread->startup_handshake_lock.lock();
    335 
    336   __init_additional_stacks(thread);
    337 
    338   void* result = thread->start_routine(thread->start_routine_arg);
    339   pthread_exit(result);
    340 
    341   return 0;
    342 }
    343 
    344 // A dummy start routine for pthread_create failures where we've created a thread but aren't
    345 // going to run user code on it. We swap out the user's start routine for this and take advantage
    346 // of the regular thread teardown to free up resources.
    347 static void* __do_nothing(void*) {
    348   return nullptr;
    349 }
    350 
    351 
    352 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
    353 int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
    354                    void* (*start_routine)(void*), void* arg) {
    355   ErrnoRestorer errno_restorer;
    356 
    357   pthread_attr_t thread_attr;
    358   ScopedTrace trace("pthread_create");
    359   if (attr == nullptr) {
    360     pthread_attr_init(&thread_attr);
    361   } else {
    362     thread_attr = *attr;
    363     attr = nullptr; // Prevent misuse below.
    364   }
    365 
    366   bionic_tcb* tcb = nullptr;
    367   void* child_stack = nullptr;
    368   int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
    369   if (result != 0) {
    370     return result;
    371   }
    372 
    373   pthread_internal_t* thread = tcb->thread();
    374 
    375   // Create a lock for the thread to wait on once it starts so we can keep
    376   // it from doing anything until after we notify the debugger about it
    377   //
    378   // This also provides the memory barrier we need to ensure that all
    379   // memory accesses previously performed by this thread are visible to
    380   // the new thread.
    381   thread->startup_handshake_lock.init(false);
    382   thread->startup_handshake_lock.lock();
    383 
    384   thread->start_routine = start_routine;
    385   thread->start_routine_arg = arg;
    386 
    387   thread->set_cached_pid(getpid());
    388 
    389   int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
    390       CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
    391   void* tls = &tcb->tls_slot(0);
    392 #if defined(__i386__)
    393   // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
    394   // a pointer to the TLS itself.
    395   user_desc tls_descriptor;
    396   __init_user_desc(&tls_descriptor, false, tls);
    397   tls = &tls_descriptor;
    398 #endif
    399   int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
    400   if (rc == -1) {
    401     int clone_errno = errno;
    402     // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
    403     // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
    404     // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
    405     thread->startup_handshake_lock.unlock();
    406     if (thread->mmap_size != 0) {
    407       munmap(thread->mmap_base, thread->mmap_size);
    408     }
    409     async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s",
    410                           strerror(clone_errno));
    411     return clone_errno;
    412   }
    413 
    414   int init_errno = __init_thread(thread);
    415   if (init_errno != 0) {
    416     // Mark the thread detached and replace its start_routine with a no-op.
    417     // Letting the thread run is the easiest way to clean up its resources.
    418     atomic_store(&thread->join_state, THREAD_DETACHED);
    419     __pthread_internal_add(thread);
    420     thread->start_routine = __do_nothing;
    421     thread->startup_handshake_lock.unlock();
    422     return init_errno;
    423   }
    424 
    425   // Publish the pthread_t and unlock the mutex to let the new thread start running.
    426   *thread_out = __pthread_internal_add(thread);
    427   thread->startup_handshake_lock.unlock();
    428 
    429   return 0;
    430 }
    431