Home | History | Annotate | Download | only in drd
      1 /* -*- mode: C; c-basic-offset: 3; indent-tabs-mode: nil; -*- */
      2 /*
      3   This file is part of drd, a thread error detector.
      4 
      5   Copyright (C) 2006-2011 Bart Van Assche <bvanassche (at) acm.org>.
      6 
      7   This program is free software; you can redistribute it and/or
      8   modify it under the terms of the GNU General Public License as
      9   published by the Free Software Foundation; either version 2 of the
     10   License, or (at your option) any later version.
     11 
     12   This program is distributed in the hope that it will be useful, but
     13   WITHOUT ANY WARRANTY; without even the implied warranty of
     14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15   General Public License for more details.
     16 
     17   You should have received a copy of the GNU General Public License
     18   along with this program; if not, write to the Free Software
     19   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     20   02111-1307, USA.
     21 
     22   The GNU General Public License is contained in the file COPYING.
     23 */
     24 
     25 
     26 #include "drd_barrier.h"
     27 #include "drd_clientobj.h"
     28 #include "drd_error.h"
     29 #include "drd_suppression.h"
     30 #include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
     31 #include "pub_tool_libcassert.h"  // tl_assert()
     32 #include "pub_tool_libcprint.h"   // VG_(printf)()
     33 #include "pub_tool_machine.h"     // VG_(get_IP)()
     34 #include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
     35 #include "pub_tool_oset.h"
     36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
     37 
     38 
     39 /* Type definitions. */
     40 
     41 /** Information associated with one thread participating in a barrier. */
     42 struct barrier_thread_info
     43 {
     44    UWord       tid;           // A DrdThreadId declared as UWord because
     45                               // this member variable is the key of an OSet.
     46    Segment*    sg;            // Segment of the last pthread_barrier() call
     47                               // by thread tid.
     48    Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
     49    ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
     50    Bool       thread_finished;// Whether thread 'tid' has finished.
     51 };
     52 
     53 
     54 /* Local functions. */
     55 
     56 static void barrier_cleanup(struct barrier_info* p);
     57 static void barrier_delete_thread(struct barrier_info* const p,
     58                                   const DrdThreadId tid);
     59 static const char* barrier_get_typename(struct barrier_info* const p);
     60 static const char* barrier_type_name(const BarrierT bt);
     61 static
     62 void barrier_report_wait_delete_race(const struct barrier_info* const p,
     63                                      const struct barrier_thread_info* const q);
     64 
     65 
     66 /* Local variables. */
     67 
     68 static Bool  s_trace_barrier = False;
     69 static ULong s_barrier_segment_creation_count;
     70 
     71 
     72 /* Function definitions. */
     73 
     74 void DRD_(barrier_set_trace)(const Bool trace_barrier)
     75 {
     76    s_trace_barrier = trace_barrier;
     77 }
     78 
     79 /**
     80  * Initialize the structure *p with the specified thread ID and iteration
     81  * information.
     82  */
     83 static
     84 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
     85                                      const DrdThreadId tid)
     86 {
     87    p->tid             = tid;
     88    p->sg              = NULL;
     89    p->post_wait_sg    = 0;
     90    p->wait_call_ctxt  = 0;
     91    p->thread_finished = False;
     92 }
     93 
     94 /**
     95  * Deallocate the memory that is owned by members of
     96  * struct barrier_thread_info.
     97  */
     98 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
     99 {
    100    tl_assert(p);
    101    DRD_(sg_put)(p->sg);
    102    DRD_(sg_put)(p->post_wait_sg);
    103 }
    104 
    105 /**
    106  * Initialize the structure *p with the specified client-side barrier address,
    107  * barrier object size and number of participants in each barrier.
    108  */
    109 static
    110 void DRD_(barrier_initialize)(struct barrier_info* const p,
    111                               const Addr barrier,
    112                               const BarrierT barrier_type,
    113                               const Word count)
    114 {
    115    int i;
    116 
    117    tl_assert(barrier != 0);
    118    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    119    tl_assert(p->a1 == barrier);
    120 
    121    p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
    122    p->delete_thread
    123       = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
    124    p->barrier_type      = barrier_type;
    125    p->count             = count;
    126    p->pre_iteration     = 0;
    127    p->post_iteration    = 0;
    128    p->pre_waiters_left  = count;
    129    p->post_waiters_left = count;
    130 
    131    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
    132    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
    133              >= sizeof(DrdThreadId));
    134    for (i = 0; i < 2; i++) {
    135       p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
    136                                        VG_(free));
    137    }
    138 }
    139 
    140 /**
    141  * Deallocate the memory owned by the struct barrier_info object and also
    142  * all the nodes in the OSet p->oset.
    143  *
    144  * Called by clientobj_destroy().
    145  */
    146 static void barrier_cleanup(struct barrier_info* p)
    147 {
    148    struct barrier_thread_info* q;
    149    Segment* latest_sg = 0;
    150    OSet* oset;
    151    int i;
    152 
    153    tl_assert(p);
    154 
    155    DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
    156    tl_assert(latest_sg);
    157 
    158    if (p->pre_waiters_left != p->count) {
    159       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    160       VG_(maybe_record_error)(VG_(get_running_tid)(),
    161                               BarrierErr,
    162                               VG_(get_IP)(VG_(get_running_tid)()),
    163                               "Destruction of barrier that is being waited"
    164                               " upon",
    165                               &bei);
    166    } else {
    167       oset = p->oset[1 - (p->pre_iteration & 1)];
    168       VG_(OSetGen_ResetIter)(oset);
    169       for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
    170          if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
    171                                               &latest_sg->vc))
    172          {
    173             barrier_report_wait_delete_race(p, q);
    174          }
    175          DRD_(barrier_thread_destroy)(q);
    176       }
    177    }
    178 
    179    for (i = 0; i < 2; i++) {
    180       VG_(OSetGen_Destroy)(p->oset[i]);
    181       p->oset[i] = NULL;
    182    }
    183 
    184    DRD_(sg_put)(latest_sg);
    185 }
    186 
    187 /**
    188  * Look up the client-side barrier address barrier in s_barrier[]. If not
    189  * found, add it.
    190  */
    191 static
    192 struct barrier_info*
    193 DRD_(barrier_get_or_allocate)(const Addr barrier,
    194                               const BarrierT barrier_type, const Word count)
    195 {
    196    struct barrier_info *p;
    197 
    198    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    199 
    200    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    201    p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    202    if (p == 0)
    203    {
    204       p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
    205       DRD_(barrier_initialize)(p, barrier, barrier_type, count);
    206    }
    207    return p;
    208 }
    209 
    210 /**
    211  * Look up the address of the struct barrier_info associated with the
    212  * client-side barrier object.
    213  */
    214 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
    215 {
    216    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    217    return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    218 }
    219 
    220 /**
    221  * Initialize a barrier with given client address, barrier type and number of
    222  * participants. The 'reinitialization' argument indicates whether a barrier
    223  * object is being initialized or reinitialized.
    224  *
    225  * Called before pthread_barrier_init().
    226  */
    227 void DRD_(barrier_init)(const Addr barrier,
    228                         const BarrierT barrier_type, const Word count,
    229                         const Bool reinitialization)
    230 {
    231    struct barrier_info* p;
    232 
    233    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    234 
    235    if (count == 0)
    236    {
    237       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    238       VG_(maybe_record_error)(VG_(get_running_tid)(),
    239                               BarrierErr,
    240                               VG_(get_IP)(VG_(get_running_tid)()),
    241                               "pthread_barrier_init: 'count' argument is zero",
    242                               &bei);
    243    }
    244 
    245    if (! reinitialization && barrier_type == pthread_barrier)
    246    {
    247       p = DRD_(barrier_get)(barrier);
    248       if (p)
    249       {
    250          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    251          VG_(maybe_record_error)(VG_(get_running_tid)(),
    252                                  BarrierErr,
    253                                  VG_(get_IP)(VG_(get_running_tid)()),
    254                                  "Barrier reinitialization",
    255                                  &bei);
    256       }
    257    }
    258 
    259    p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
    260 
    261    if (s_trace_barrier) {
    262       if (reinitialization)
    263          DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
    264                          DRD_(thread_get_running_tid)(),
    265                          barrier_get_typename(p), barrier, p->count, count);
    266       else
    267          DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
    268                          DRD_(thread_get_running_tid)(),
    269                          barrier_get_typename(p),
    270                          barrier);
    271    }
    272 
    273    if (reinitialization && p->count != count)
    274    {
    275       if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    276       {
    277          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    278          VG_(maybe_record_error)(VG_(get_running_tid)(),
    279                                  BarrierErr,
    280                                  VG_(get_IP)(VG_(get_running_tid)()),
    281                                  "Reinitialization of barrier with active"
    282                                  " waiters",
    283                                  &bei);
    284       }
    285       p->count = count;
    286    }
    287 }
    288 
    289 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
    290 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
    291 {
    292    struct barrier_info* p;
    293 
    294    p = DRD_(barrier_get)(barrier);
    295 
    296    if (s_trace_barrier)
    297       DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
    298                       DRD_(thread_get_running_tid)(),
    299                       barrier_get_typename(p), barrier);
    300 
    301    if (p == 0)
    302    {
    303       GenericErrInfo GEI = {
    304 	 .tid = DRD_(thread_get_running_tid)(),
    305 	 .addr = barrier,
    306       };
    307       VG_(maybe_record_error)(VG_(get_running_tid)(),
    308                               GenericErr,
    309                               VG_(get_IP)(VG_(get_running_tid)()),
    310                               "Not a barrier",
    311                               &GEI);
    312       return;
    313    }
    314 
    315    if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    316    {
    317       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    318       VG_(maybe_record_error)(VG_(get_running_tid)(),
    319                               BarrierErr,
    320                               VG_(get_IP)(VG_(get_running_tid)()),
    321                               "Destruction of a barrier with active waiters",
    322                               &bei);
    323    }
    324 
    325    DRD_(clientobj_remove)(p->a1, ClientBarrier);
    326 }
    327 
    328 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
    329 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
    330                             const BarrierT barrier_type)
    331 {
    332    struct barrier_info* p;
    333    struct barrier_thread_info* q;
    334    const UWord word_tid = tid;
    335    OSet* oset;
    336 
    337    p = DRD_(barrier_get)(barrier);
    338    if (p == 0 && barrier_type == gomp_barrier) {
    339       /*
    340        * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
    341        * not. The only cause I know of that can trigger this is that libgomp.so
    342        * has been compiled with --enable-linux-futex.
    343        */
    344       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
    345       VG_(maybe_record_error)(VG_(get_running_tid)(),
    346                               BarrierErr,
    347                               VG_(get_IP)(VG_(get_running_tid)()),
    348                               "Please verify whether gcc has been configured"
    349                               " with option --disable-linux-futex. See also"
    350                               " the section about OpenMP in the DRD manual.",
    351                               &bei);
    352    }
    353    tl_assert(p);
    354 
    355    if (s_trace_barrier)
    356       DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
    357                       DRD_(thread_get_running_tid)(),
    358                       barrier_get_typename(p), barrier, p->pre_iteration);
    359 
    360    /* Clean up nodes associated with finished threads. */
    361    oset = p->oset[p->pre_iteration & 1];
    362    tl_assert(oset);
    363    VG_(OSetGen_ResetIter)(oset);
    364    for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
    365       if (q->thread_finished) {
    366          void* r = VG_(OSetGen_Remove)(oset, &q->tid);
    367          tl_assert(r == q);
    368          DRD_(barrier_thread_destroy)(q);
    369          VG_(OSetGen_FreeNode)(oset, q);
    370          VG_(OSetGen_ResetIterAt)(oset, &word_tid);
    371       }
    372    }
    373    /* Allocate the per-thread data structure if necessary. */
    374    q = VG_(OSetGen_Lookup)(oset, &word_tid);
    375    if (q == NULL) {
    376       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
    377       DRD_(barrier_thread_initialize)(q, tid);
    378       VG_(OSetGen_Insert)(oset, q);
    379       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
    380    }
    381 
    382    /* Record *_barrier_wait() call context. */
    383    q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
    384 
    385    /*
    386     * Store a pointer to the latest segment of the current thread in the
    387     * per-thread data structure.
    388     */
    389    DRD_(thread_get_latest_segment)(&q->sg, tid);
    390 
    391    /*
    392     * If the same number of threads as the barrier count indicates have
    393     * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
    394     * reset the p->pre_waiters_left counter.
    395     */
    396    if (--p->pre_waiters_left <= 0)
    397    {
    398       p->pre_iteration++;
    399       p->pre_waiters_left = p->count;
    400    }
    401 }
    402 
    403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
    404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
    405                              const BarrierT barrier_type, const Bool waited,
    406                              const Bool serializing)
    407 {
    408    struct barrier_info* p;
    409    const UWord word_tid = tid;
    410    struct barrier_thread_info* q;
    411    struct barrier_thread_info* r;
    412    OSet* oset;
    413 
    414    p = DRD_(barrier_get)(barrier);
    415 
    416    if (s_trace_barrier)
    417       DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
    418                       tid, p ? barrier_get_typename(p) : "(?)",
    419                       barrier, p ? p->post_iteration : -1,
    420                       serializing ? " (serializing)" : "");
    421 
    422    /*
    423     * If p == 0, this means that the barrier has been destroyed after
    424     * *_barrier_wait() returned and before this function was called. Just
    425     * return in that case -- race conditions between *_barrier_wait()
    426     * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
    427     */
    428    if (p == 0)
    429       return;
    430 
    431    /* If the *_barrier_wait() call returned an error code, exit. */
    432    if (! waited)
    433       return;
    434 
    435    oset = p->oset[p->post_iteration & 1];
    436    q = VG_(OSetGen_Lookup)(oset, &word_tid);
    437    if (p->pre_iteration - p->post_iteration > 1) {
    438       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    439       VG_(maybe_record_error)(VG_(get_running_tid)(),
    440                               BarrierErr,
    441                               VG_(get_IP)(VG_(get_running_tid)()),
    442                               "Number of concurrent pthread_barrier_wait()"
    443                               " calls exceeds the barrier count",
    444                               &bei);
    445    } else if (q == NULL) {
    446       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    447       VG_(maybe_record_error)(VG_(get_running_tid)(),
    448                               BarrierErr,
    449                               VG_(get_IP)(VG_(get_running_tid)()),
    450                               "Error in barrier implementation"
    451                               " -- barrier_wait() started before"
    452                               " barrier_destroy() and finished after"
    453                               " barrier_destroy()",
    454                               &bei);
    455    }
    456    if (q == NULL) {
    457       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
    458       DRD_(barrier_thread_initialize)(q, tid);
    459       VG_(OSetGen_Insert)(oset, q);
    460       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
    461       DRD_(thread_get_latest_segment)(&q->sg, tid);
    462    }
    463 
    464    /* Create a new segment and store a pointer to that segment. */
    465    DRD_(thread_new_segment)(tid);
    466    DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
    467    s_barrier_segment_creation_count++;
    468 
    469    /*
    470     * Combine all vector clocks that were stored in the pre_barrier_wait
    471     * wrapper with the vector clock of the current thread.
    472     */
    473    {
    474       VectorClock old_vc;
    475 
    476       DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc);
    477       VG_(OSetGen_ResetIter)(oset);
    478       for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
    479       {
    480          if (r != q)
    481          {
    482             tl_assert(r->sg);
    483             DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc,
    484                              &r->sg->vc);
    485          }
    486       }
    487       DRD_(thread_update_conflict_set)(tid, &old_vc);
    488       DRD_(vc_cleanup)(&old_vc);
    489    }
    490 
    491    /*
    492     * If the same number of threads as the barrier count indicates have
    493     * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
    494     * reset the p->post_waiters_left counter.
    495     */
    496    if (--p->post_waiters_left <= 0)
    497    {
    498       p->post_iteration++;
    499       p->post_waiters_left = p->count;
    500    }
    501 }
    502 
    503 /** Called when thread tid stops to exist. */
    504 static void barrier_delete_thread(struct barrier_info* const p,
    505                                   const DrdThreadId tid)
    506 {
    507    struct barrier_thread_info* q;
    508    const UWord word_tid = tid;
    509    int i;
    510 
    511    for (i = 0; i < 2; i++) {
    512       q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
    513       if (q)
    514          q->thread_finished = True;
    515    }
    516 }
    517 
    518 /**
    519  * Report that *_barrier_destroy() has been called but that this call was
    520  * not synchronized with the last *_barrier_wait() call on the same barrier.
    521  *
    522  * This topic has been discussed extensively on comp.programming.threads
    523  * (February 3, 2009). See also
    524  * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
    525  */
    526 static
    527 void barrier_report_wait_delete_race(const struct barrier_info* const p,
    528                                      const struct barrier_thread_info* const q)
    529 {
    530    tl_assert(p);
    531    tl_assert(q);
    532 
    533    {
    534       BarrierErrInfo bei
    535          = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
    536       VG_(maybe_record_error)(VG_(get_running_tid)(),
    537                               BarrierErr,
    538                               VG_(get_IP)(VG_(get_running_tid)()),
    539                               "Destruction of barrier not synchronized with"
    540                               " barrier wait call",
    541                               &bei);
    542    }
    543 }
    544 
    545 static const char* barrier_get_typename(struct barrier_info* const p)
    546 {
    547    tl_assert(p);
    548 
    549    return barrier_type_name(p->barrier_type);
    550 }
    551 
    552 static const char* barrier_type_name(const BarrierT bt)
    553 {
    554    switch (bt)
    555    {
    556    case pthread_barrier:
    557       return "pthread barrier";
    558    case gomp_barrier:
    559       return "gomp barrier";
    560    }
    561    return "?";
    562 }
    563 
    564 ULong DRD_(get_barrier_segment_creation_count)(void)
    565 {
    566    return s_barrier_segment_creation_count;
    567 }
    568