Home | History | Annotate | Download | only in m_scheduler
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Linux ticket lock implementation         ticket-lock-linux.c ---*/
      4 /*---                                                              ---*/
      5 /*--- Guarantees fair scheduling even if multiple threads are      ---*/
      6 /*--- runnable at the same time on a multicore system. Has been    ---*/
      7 /*--- observed to cause a slow-down compared to the generic        ---*/
      8 /*--- scheduler lock with CPU frequency scaling enabled. Makes     ---*/
      9 /*--- Valgrind slightly faster if CPU frequency scaling has been   ---*/
     10 /*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/
     11 /*--------------------------------------------------------------------*/
     12 
     13 /*
     14    This file is part of Valgrind, a dynamic binary instrumentation
     15    framework.
     16 
     17    Copyright (C) 2011 Bart Van Assche <bvanassche (at) acm.org>.
     18 
     19    This program is free software; you can redistribute it and/or
     20    modify it under the terms of the GNU General Public License as
     21    published by the Free Software Foundation; either version 2 of the
     22    License, or (at your option) any later version.
     23 
     24    This program is distributed in the hope that it will be useful, but
     25    WITHOUT ANY WARRANTY; without even the implied warranty of
     26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     27    General Public License for more details.
     28 
     29    You should have received a copy of the GNU General Public License
     30    along with this program; if not, write to the Free Software
     31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     32    02111-1307, USA.
     33 
     34    The GNU General Public License is contained in the file COPYING.
     35 */
     36 
     37 #include "pub_core_basics.h"
     38 #include "pub_core_libcassert.h"
     39 #include "pub_core_libcbase.h"     // VG_(memset)()
     40 #include "pub_core_libcprint.h"
     41 #include "pub_core_syscall.h"
     42 #include "pub_core_vki.h"
     43 #include "pub_core_vkiscnums.h"    // __NR_futex
     44 #include "pub_tool_libcproc.h"
     45 #include "pub_tool_mallocfree.h"
     46 #include "pub_tool_threadstate.h"
     47 #include "pub_tool_inner.h"
     48 #if defined(ENABLE_INNER_CLIENT_REQUEST)
     49 #include "helgrind/helgrind.h"
     50 #endif
     51 #include "priv_sched-lock.h"
     52 #include "priv_sched-lock-impl.h"
     53 
     54 #define TL_FUTEX_COUNT_LOG2 4
     55 #define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2)
     56 #define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1)
     57 
     58 struct sched_lock {
     59    volatile unsigned head;
     60    volatile unsigned tail;
     61    volatile unsigned futex[TL_FUTEX_COUNT];
     62    int owner;
     63 };
     64 
     65 #if 1
     66 static Bool s_debug;
     67 #else
     68 static Bool s_debug = True;
     69 #endif
     70 
     71 static const Char *get_sched_lock_name(void)
     72 {
     73    return "ticket lock";
     74 }
     75 
     76 static struct sched_lock *create_sched_lock(void)
     77 {
     78    struct sched_lock *p;
     79 
     80    p = VG_(malloc)("sched_lock", sizeof(*p));
     81    if (p) {
     82       // The futex syscall requires that a futex takes four bytes.
     83       vg_assert(sizeof(p->futex[0]) == 4);
     84 
     85       p->head = 0;
     86       p->tail = 0;
     87       VG_(memset)((void*)p->futex, 0, sizeof(p->futex));
     88       p->owner = 0;
     89    }
     90    INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p));
     91    INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), ""));
     92    return p;
     93 }
     94 
     95 static void destroy_sched_lock(struct sched_lock *p)
     96 {
     97    INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p));
     98    VG_(free)(p);
     99 }
    100 
    101 static int get_sched_lock_owner(struct sched_lock *p)
    102 {
    103    return p->owner;
    104 }
    105 
    106 /*
    107  * Acquire ticket lock. Increment the tail of the queue and use the original
    108  * value as the ticket value. Wait until the head of the queue equals the
    109  * ticket value. The futex used to wait depends on the ticket value in order
    110  * to avoid that all threads get woken up every time a ticket lock is
    111  * released. That last effect is sometimes called the "thundering herd"
    112  * effect.
    113  *
    114  * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list
    115  * (http://lkml.org/lkml/2007/11/1/125) for more info.
    116  */
    117 static void acquire_sched_lock(struct sched_lock *p)
    118 {
    119    unsigned ticket, futex_value;
    120    volatile unsigned *futex;
    121    SysRes sres;
    122 
    123    ticket = __sync_fetch_and_add(&p->tail, 1);
    124    futex = &p->futex[ticket & TL_FUTEX_MASK];
    125    if (s_debug)
    126       VG_(printf)("[%d/%d] acquire: ticket %d\n", VG_(getpid)(),
    127                   VG_(gettid)(), ticket);
    128    for (;;) {
    129       futex_value = *futex;
    130       __sync_synchronize();
    131       if (ticket == p->head)
    132          break;
    133       if (s_debug)
    134          VG_(printf)("[%d/%d] acquire: ticket %d - waiting until"
    135                      " futex[%ld] != %d\n", VG_(getpid)(),
    136                      VG_(gettid)(), ticket, (long)(futex - p->futex),
    137                      futex_value);
    138       sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
    139                               VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG,
    140                               futex_value);
    141       if (sr_isError(sres) && sres._val != VKI_EAGAIN) {
    142          VG_(printf)("futex_wait() returned error code %ld\n", sres._val);
    143          vg_assert(False);
    144       }
    145    }
    146    __sync_synchronize();
    147    INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1));
    148    vg_assert(p->owner == 0);
    149    p->owner = VG_(gettid)();
    150 }
    151 
    152 /*
    153  * Release a ticket lock by incrementing the head of the queue. Only generate
    154  * a thread wakeup signal if at least one thread is waiting. If the queue tail
    155  * matches the wakeup_ticket value, no threads have to be woken up.
    156  *
    157  * Note: tail will only be read after head has been incremented since both are
    158  * declared as volatile and since the __sync...() functions imply a memory
    159  * barrier.
    160  */
    161 static void release_sched_lock(struct sched_lock *p)
    162 {
    163    unsigned wakeup_ticket, futex_value;
    164    volatile unsigned *futex;
    165    SysRes sres;
    166 
    167    vg_assert(p->owner != 0);
    168    p->owner = 0;
    169    INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1));
    170    wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1;
    171    if (p->tail != wakeup_ticket) {
    172       futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK];
    173       futex_value = __sync_fetch_and_add(futex, 1);
    174       if (s_debug)
    175          VG_(printf)("[%d/%d] release: waking up ticket %d (futex[%ld] = %d)"
    176                      "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket,
    177                      (long)(futex - p->futex), futex_value);
    178       sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
    179                               VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG,
    180                               0x7fffffff);
    181       vg_assert(!sr_isError(sres));
    182    } else {
    183       if (s_debug)
    184          VG_(printf)("[%d/%d] release: no thread is waiting for ticket %d\n",
    185                      VG_(getpid)(), VG_(gettid)(), wakeup_ticket);
    186    }
    187 }
    188 
    189 const struct sched_lock_ops ML_(linux_ticket_lock_ops) = {
    190    .get_sched_lock_name  = get_sched_lock_name,
    191    .create_sched_lock    = create_sched_lock,
    192    .destroy_sched_lock   = destroy_sched_lock,
    193    .get_sched_lock_owner = get_sched_lock_owner,
    194    .acquire_sched_lock   = acquire_sched_lock,
    195    .release_sched_lock   = release_sched_lock,
    196 };
    197