Home | History | Annotate | Download | only in drd
      1 /*
      2   This file is part of drd, a thread error detector.
      3 
      4   Copyright (C) 2006-2012 Bart Van Assche <bvanassche (at) acm.org>.
      5 
      6   This program is free software; you can redistribute it and/or
      7   modify it under the terms of the GNU General Public License as
      8   published by the Free Software Foundation; either version 2 of the
      9   License, or (at your option) any later version.
     10 
     11   This program is distributed in the hope that it will be useful, but
     12   WITHOUT ANY WARRANTY; without even the implied warranty of
     13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14   General Public License for more details.
     15 
     16   You should have received a copy of the GNU General Public License
     17   along with this program; if not, write to the Free Software
     18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     19   02111-1307, USA.
     20 
     21   The GNU General Public License is contained in the file COPYING.
     22 */
     23 
     24 
     25 #include "drd_barrier.h"
     26 #include "drd_clientobj.h"
     27 #include "drd_error.h"
     28 #include "drd_suppression.h"
     29 #include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
     30 #include "pub_tool_libcassert.h"  // tl_assert()
     31 #include "pub_tool_libcprint.h"   // VG_(printf)()
     32 #include "pub_tool_machine.h"     // VG_(get_IP)()
     33 #include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
     34 #include "pub_tool_oset.h"
     35 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
     36 
     37 
     38 /* Type definitions. */
     39 
     40 /** Information associated with one thread participating in a barrier. */
     41 struct barrier_thread_info
     42 {
     43    UWord       tid;           // A DrdThreadId declared as UWord because
     44                               // this member variable is the key of an OSet.
     45    Segment*    sg;            // Segment of the last pthread_barrier() call
     46                               // by thread tid.
     47    Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
     48    ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
     49    Bool       thread_finished;// Whether thread 'tid' has finished.
     50 };
     51 
     52 
     53 /* Local functions. */
     54 
     55 static void barrier_cleanup(struct barrier_info* p);
     56 static void barrier_delete_thread(struct barrier_info* const p,
     57                                   const DrdThreadId tid);
     58 static const char* barrier_get_typename(struct barrier_info* const p);
     59 static const char* barrier_type_name(const BarrierT bt);
     60 static
     61 void barrier_report_wait_delete_race(const struct barrier_info* const p,
     62                                      const struct barrier_thread_info* const q);
     63 
     64 
     65 /* Local variables. */
     66 
     67 static Bool  s_trace_barrier = False;
     68 static ULong s_barrier_segment_creation_count;
     69 
     70 
     71 /* Function definitions. */
     72 
     73 void DRD_(barrier_set_trace)(const Bool trace_barrier)
     74 {
     75    s_trace_barrier = trace_barrier;
     76 }
     77 
     78 /**
     79  * Initialize the structure *p with the specified thread ID and iteration
     80  * information.
     81  */
     82 static
     83 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
     84                                      const DrdThreadId tid)
     85 {
     86    p->tid             = tid;
     87    p->sg              = NULL;
     88    p->post_wait_sg    = 0;
     89    p->wait_call_ctxt  = 0;
     90    p->thread_finished = False;
     91 }
     92 
     93 /**
     94  * Deallocate the memory that is owned by members of
     95  * struct barrier_thread_info.
     96  */
     97 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
     98 {
     99    tl_assert(p);
    100    DRD_(sg_put)(p->sg);
    101    DRD_(sg_put)(p->post_wait_sg);
    102 }
    103 
    104 /**
    105  * Initialize the structure *p with the specified client-side barrier address,
    106  * barrier object size and number of participants in each barrier.
    107  */
    108 static
    109 void DRD_(barrier_initialize)(struct barrier_info* const p,
    110                               const Addr barrier,
    111                               const BarrierT barrier_type,
    112                               const Word count)
    113 {
    114    int i;
    115 
    116    tl_assert(barrier != 0);
    117    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    118    tl_assert(p->a1 == barrier);
    119 
    120    p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
    121    p->delete_thread
    122       = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
    123    p->barrier_type      = barrier_type;
    124    p->count             = count;
    125    p->pre_iteration     = 0;
    126    p->post_iteration    = 0;
    127    p->pre_waiters_left  = count;
    128    p->post_waiters_left = count;
    129 
    130    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
    131    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
    132              >= sizeof(DrdThreadId));
    133    for (i = 0; i < 2; i++) {
    134       p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
    135                                        VG_(free));
    136    }
    137 }
    138 
    139 /**
    140  * Deallocate the memory owned by the struct barrier_info object and also
    141  * all the nodes in the OSet p->oset.
    142  *
    143  * Called by clientobj_destroy().
    144  */
    145 static void barrier_cleanup(struct barrier_info* p)
    146 {
    147    struct barrier_thread_info* q;
    148    Segment* latest_sg = 0;
    149    OSet* oset;
    150    int i;
    151 
    152    tl_assert(p);
    153 
    154    DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
    155    tl_assert(latest_sg);
    156 
    157    if (p->pre_waiters_left != p->count) {
    158       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    159       VG_(maybe_record_error)(VG_(get_running_tid)(),
    160                               BarrierErr,
    161                               VG_(get_IP)(VG_(get_running_tid)()),
    162                               "Destruction of barrier that is being waited"
    163                               " upon",
    164                               &bei);
    165    } else {
    166       oset = p->oset[1 - (p->pre_iteration & 1)];
    167       VG_(OSetGen_ResetIter)(oset);
    168       for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
    169          if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
    170                                               &latest_sg->vc))
    171          {
    172             barrier_report_wait_delete_race(p, q);
    173          }
    174          DRD_(barrier_thread_destroy)(q);
    175       }
    176    }
    177 
    178    for (i = 0; i < 2; i++) {
    179       VG_(OSetGen_Destroy)(p->oset[i]);
    180       p->oset[i] = NULL;
    181    }
    182 
    183    DRD_(sg_put)(latest_sg);
    184 }
    185 
    186 /**
    187  * Look up the client-side barrier address barrier in s_barrier[]. If not
    188  * found, add it.
    189  */
    190 static
    191 struct barrier_info*
    192 DRD_(barrier_get_or_allocate)(const Addr barrier,
    193                               const BarrierT barrier_type, const Word count)
    194 {
    195    struct barrier_info *p;
    196 
    197    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    198 
    199    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    200    p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    201    if (p == 0)
    202    {
    203       p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
    204       DRD_(barrier_initialize)(p, barrier, barrier_type, count);
    205    }
    206    return p;
    207 }
    208 
    209 /**
    210  * Look up the address of the struct barrier_info associated with the
    211  * client-side barrier object.
    212  */
    213 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
    214 {
    215    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    216    return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    217 }
    218 
    219 /**
    220  * Initialize a barrier with given client address, barrier type and number of
    221  * participants. The 'reinitialization' argument indicates whether a barrier
    222  * object is being initialized or reinitialized.
    223  *
    224  * Called before pthread_barrier_init().
    225  */
    226 void DRD_(barrier_init)(const Addr barrier,
    227                         const BarrierT barrier_type, const Word count,
    228                         const Bool reinitialization)
    229 {
    230    struct barrier_info* p;
    231 
    232    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    233 
    234    if (count == 0)
    235    {
    236       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    237       VG_(maybe_record_error)(VG_(get_running_tid)(),
    238                               BarrierErr,
    239                               VG_(get_IP)(VG_(get_running_tid)()),
    240                               "pthread_barrier_init: 'count' argument is zero",
    241                               &bei);
    242    }
    243 
    244    if (! reinitialization && barrier_type == pthread_barrier)
    245    {
    246       p = DRD_(barrier_get)(barrier);
    247       if (p)
    248       {
    249          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    250          VG_(maybe_record_error)(VG_(get_running_tid)(),
    251                                  BarrierErr,
    252                                  VG_(get_IP)(VG_(get_running_tid)()),
    253                                  "Barrier reinitialization",
    254                                  &bei);
    255       }
    256    }
    257 
    258    p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
    259 
    260    if (s_trace_barrier) {
    261       if (reinitialization)
    262          DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
    263                          DRD_(thread_get_running_tid)(),
    264                          barrier_get_typename(p), barrier, p->count, count);
    265       else
    266          DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
    267                          DRD_(thread_get_running_tid)(),
    268                          barrier_get_typename(p),
    269                          barrier);
    270    }
    271 
    272    if (reinitialization && p->count != count)
    273    {
    274       if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    275       {
    276          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    277          VG_(maybe_record_error)(VG_(get_running_tid)(),
    278                                  BarrierErr,
    279                                  VG_(get_IP)(VG_(get_running_tid)()),
    280                                  "Reinitialization of barrier with active"
    281                                  " waiters",
    282                                  &bei);
    283       }
    284       p->count = count;
    285    }
    286 }
    287 
    288 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
    289 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
    290 {
    291    struct barrier_info* p;
    292 
    293    p = DRD_(barrier_get)(barrier);
    294 
    295    if (s_trace_barrier)
    296       DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
    297                       DRD_(thread_get_running_tid)(),
    298                       barrier_get_typename(p), barrier);
    299 
    300    if (p == 0)
    301    {
    302       GenericErrInfo GEI = {
    303 	 .tid = DRD_(thread_get_running_tid)(),
    304 	 .addr = barrier,
    305       };
    306       VG_(maybe_record_error)(VG_(get_running_tid)(),
    307                               GenericErr,
    308                               VG_(get_IP)(VG_(get_running_tid)()),
    309                               "Not a barrier",
    310                               &GEI);
    311       return;
    312    }
    313 
    314    if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    315    {
    316       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    317       VG_(maybe_record_error)(VG_(get_running_tid)(),
    318                               BarrierErr,
    319                               VG_(get_IP)(VG_(get_running_tid)()),
    320                               "Destruction of a barrier with active waiters",
    321                               &bei);
    322    }
    323 
    324    DRD_(clientobj_remove)(p->a1, ClientBarrier);
    325 }
    326 
    327 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
    328 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
    329                             const BarrierT barrier_type)
    330 {
    331    struct barrier_info* p;
    332    struct barrier_thread_info* q;
    333    const UWord word_tid = tid;
    334    OSet* oset;
    335 
    336    p = DRD_(barrier_get)(barrier);
    337    if (p == 0 && barrier_type == gomp_barrier) {
    338       /*
    339        * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
    340        * not. The only cause I know of that can trigger this is that libgomp.so
    341        * has been compiled with --enable-linux-futex.
    342        */
    343       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
    344       VG_(maybe_record_error)(VG_(get_running_tid)(),
    345                               BarrierErr,
    346                               VG_(get_IP)(VG_(get_running_tid)()),
    347                               "Please verify whether gcc has been configured"
    348                               " with option --disable-linux-futex. See also"
    349                               " the section about OpenMP in the DRD manual.",
    350                               &bei);
    351    }
    352    tl_assert(p);
    353 
    354    if (s_trace_barrier)
    355       DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
    356                       DRD_(thread_get_running_tid)(),
    357                       barrier_get_typename(p), barrier, p->pre_iteration);
    358 
    359    /* Clean up nodes associated with finished threads. */
    360    oset = p->oset[p->pre_iteration & 1];
    361    tl_assert(oset);
    362    VG_(OSetGen_ResetIter)(oset);
    363    for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
    364       if (q->thread_finished) {
    365          void* r = VG_(OSetGen_Remove)(oset, &q->tid);
    366          tl_assert(r == q);
    367          DRD_(barrier_thread_destroy)(q);
    368          VG_(OSetGen_FreeNode)(oset, q);
    369          VG_(OSetGen_ResetIterAt)(oset, &word_tid);
    370       }
    371    }
    372    /* Allocate the per-thread data structure if necessary. */
    373    q = VG_(OSetGen_Lookup)(oset, &word_tid);
    374    if (q == NULL) {
    375       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
    376       DRD_(barrier_thread_initialize)(q, tid);
    377       VG_(OSetGen_Insert)(oset, q);
    378       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
    379    }
    380 
    381    /* Record *_barrier_wait() call context. */
    382    q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
    383 
    384    /*
    385     * Store a pointer to the latest segment of the current thread in the
    386     * per-thread data structure.
    387     */
    388    DRD_(thread_get_latest_segment)(&q->sg, tid);
    389 
    390    /*
    391     * If the same number of threads as the barrier count indicates have
    392     * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
    393     * reset the p->pre_waiters_left counter.
    394     */
    395    if (--p->pre_waiters_left <= 0)
    396    {
    397       p->pre_iteration++;
    398       p->pre_waiters_left = p->count;
    399    }
    400 }
    401 
    402 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
    403 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
    404                              const BarrierT barrier_type, const Bool waited,
    405                              const Bool serializing)
    406 {
    407    struct barrier_info* p;
    408    const UWord word_tid = tid;
    409    struct barrier_thread_info* q;
    410    struct barrier_thread_info* r;
    411    OSet* oset;
    412 
    413    p = DRD_(barrier_get)(barrier);
    414 
    415    if (s_trace_barrier)
    416       DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
    417                       tid, p ? barrier_get_typename(p) : "(?)",
    418                       barrier, p ? p->post_iteration : -1,
    419                       serializing ? " (serializing)" : "");
    420 
    421    /*
    422     * If p == 0, this means that the barrier has been destroyed after
    423     * *_barrier_wait() returned and before this function was called. Just
    424     * return in that case -- race conditions between *_barrier_wait()
    425     * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
    426     */
    427    if (p == 0)
    428       return;
    429 
    430    /* If the *_barrier_wait() call returned an error code, exit. */
    431    if (! waited)
    432       return;
    433 
    434    oset = p->oset[p->post_iteration & 1];
    435    q = VG_(OSetGen_Lookup)(oset, &word_tid);
    436    if (p->pre_iteration - p->post_iteration > 1) {
    437       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    438       VG_(maybe_record_error)(VG_(get_running_tid)(),
    439                               BarrierErr,
    440                               VG_(get_IP)(VG_(get_running_tid)()),
    441                               "Number of concurrent pthread_barrier_wait()"
    442                               " calls exceeds the barrier count",
    443                               &bei);
    444    } else if (q == NULL) {
    445       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    446       VG_(maybe_record_error)(VG_(get_running_tid)(),
    447                               BarrierErr,
    448                               VG_(get_IP)(VG_(get_running_tid)()),
    449                               "Error in barrier implementation"
    450                               " -- barrier_wait() started before"
    451                               " barrier_destroy() and finished after"
    452                               " barrier_destroy()",
    453                               &bei);
    454    }
    455    if (q == NULL) {
    456       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
    457       DRD_(barrier_thread_initialize)(q, tid);
    458       VG_(OSetGen_Insert)(oset, q);
    459       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
    460       DRD_(thread_get_latest_segment)(&q->sg, tid);
    461    }
    462 
    463    /* Create a new segment and store a pointer to that segment. */
    464    DRD_(thread_new_segment)(tid);
    465    DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
    466    s_barrier_segment_creation_count++;
    467 
    468    /*
    469     * Combine all vector clocks that were stored in the pre_barrier_wait
    470     * wrapper with the vector clock of the current thread.
    471     */
    472    {
    473       VectorClock old_vc;
    474 
    475       DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid));
    476       VG_(OSetGen_ResetIter)(oset);
    477       for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
    478       {
    479          if (r != q)
    480          {
    481             tl_assert(r->sg);
    482             DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc);
    483          }
    484       }
    485       DRD_(thread_update_conflict_set)(tid, &old_vc);
    486       DRD_(vc_cleanup)(&old_vc);
    487    }
    488 
    489    /*
    490     * If the same number of threads as the barrier count indicates have
    491     * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
    492     * reset the p->post_waiters_left counter.
    493     */
    494    if (--p->post_waiters_left <= 0)
    495    {
    496       p->post_iteration++;
    497       p->post_waiters_left = p->count;
    498    }
    499 }
    500 
    501 /** Called when thread tid stops to exist. */
    502 static void barrier_delete_thread(struct barrier_info* const p,
    503                                   const DrdThreadId tid)
    504 {
    505    struct barrier_thread_info* q;
    506    const UWord word_tid = tid;
    507    int i;
    508 
    509    for (i = 0; i < 2; i++) {
    510       q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
    511       if (q)
    512          q->thread_finished = True;
    513    }
    514 }
    515 
    516 /**
    517  * Report that *_barrier_destroy() has been called but that this call was
    518  * not synchronized with the last *_barrier_wait() call on the same barrier.
    519  *
    520  * This topic has been discussed extensively on comp.programming.threads
    521  * (February 3, 2009). See also
    522  * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
    523  */
    524 static
    525 void barrier_report_wait_delete_race(const struct barrier_info* const p,
    526                                      const struct barrier_thread_info* const q)
    527 {
    528    tl_assert(p);
    529    tl_assert(q);
    530 
    531    {
    532       BarrierErrInfo bei
    533          = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
    534       VG_(maybe_record_error)(VG_(get_running_tid)(),
    535                               BarrierErr,
    536                               VG_(get_IP)(VG_(get_running_tid)()),
    537                               "Destruction of barrier not synchronized with"
    538                               " barrier wait call",
    539                               &bei);
    540    }
    541 }
    542 
    543 static const char* barrier_get_typename(struct barrier_info* const p)
    544 {
    545    tl_assert(p);
    546 
    547    return barrier_type_name(p->barrier_type);
    548 }
    549 
    550 static const char* barrier_type_name(const BarrierT bt)
    551 {
    552    switch (bt)
    553    {
    554    case pthread_barrier:
    555       return "pthread barrier";
    556    case gomp_barrier:
    557       return "gomp barrier";
    558    }
    559    return "?";
    560 }
    561 
    562 ULong DRD_(get_barrier_segment_creation_count)(void)
    563 {
    564    return s_barrier_segment_creation_count;
    565 }
    566