Home | History | Annotate | Download | only in drd
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 /*
      3   This file is part of drd, a thread error detector.
      4 
      5   Copyright (C) 2006-2010 Bart Van Assche <bvanassche (at) acm.org>.
      6 
      7   This program is free software; you can redistribute it and/or
      8   modify it under the terms of the GNU General Public License as
      9   published by the Free Software Foundation; either version 2 of the
     10   License, or (at your option) any later version.
     11 
     12   This program is distributed in the hope that it will be useful, but
     13   WITHOUT ANY WARRANTY; without even the implied warranty of
     14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15   General Public License for more details.
     16 
     17   You should have received a copy of the GNU General Public License
     18   along with this program; if not, write to the Free Software
     19   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     20   02111-1307, USA.
     21 
     22   The GNU General Public License is contained in the file COPYING.
     23 */
     24 
     25 
     26 #include "drd_barrier.h"
     27 #include "drd_clientobj.h"
     28 #include "drd_error.h"
     29 #include "drd_suppression.h"
     30 #include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
     31 #include "pub_tool_libcassert.h"  // tl_assert()
     32 #include "pub_tool_libcprint.h"   // VG_(printf)()
     33 #include "pub_tool_machine.h"     // VG_(get_IP)()
     34 #include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
     35 #include "pub_tool_oset.h"
     36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
     37 
     38 
     39 /* Type definitions. */
     40 
     41 /** Information associated with one thread participating in a barrier. */
     42 struct barrier_thread_info
     43 {
     44    UWord       tid;           // A DrdThreadId declared as UWord because
     45                               // this member variable is the key of an OSet.
     46    Word        iteration;     // iteration of last pthread_barrier_wait()
     47                               // call thread tid participated in.
     48    Segment*    sg[2];         // Segments of the last two
     49                               // pthread_barrier() calls by thread tid.
     50    ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
     51    Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
     52 };
     53 
     54 
     55 /* Local functions. */
     56 
     57 static void barrier_cleanup(struct barrier_info* p);
     58 static void barrier_delete_thread(struct barrier_info* const p,
     59                                   const DrdThreadId tid);
     60 static const char* barrier_get_typename(struct barrier_info* const p);
     61 static const char* barrier_type_name(const BarrierT bt);
     62 static
     63 void barrier_report_wait_delete_race(const struct barrier_info* const p,
     64                                      const struct barrier_thread_info* const q);
     65 
     66 
     67 /* Local variables. */
     68 
     69 static Bool  s_trace_barrier = False;
     70 static ULong s_barrier_segment_creation_count;
     71 
     72 
     73 /* Function definitions. */
     74 
     75 void DRD_(barrier_set_trace)(const Bool trace_barrier)
     76 {
     77    s_trace_barrier = trace_barrier;
     78 }
     79 
     80 /**
     81  * Initialize the structure *p with the specified thread ID and iteration
     82  * information.
     83  */
     84 static
     85 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
     86                                      const DrdThreadId tid,
     87                                      const Word iteration)
     88 {
     89    p->tid            = tid;
     90    p->iteration      = iteration;
     91    p->sg[0]          = 0;
     92    p->sg[1]          = 0;
     93    p->wait_call_ctxt = 0;
     94    p->post_wait_sg   = 0;
     95 }
     96 
     97 /**
     98  * Deallocate the memory that is owned by members of
     99  * struct barrier_thread_info.
    100  */
    101 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
    102 {
    103    tl_assert(p);
    104    DRD_(sg_put)(p->sg[0]);
    105    DRD_(sg_put)(p->sg[1]);
    106    DRD_(sg_put)(p->post_wait_sg);
    107 }
    108 
    109 /**
    110  * Initialize the structure *p with the specified client-side barrier address,
    111  * barrier object size and number of participants in each barrier.
    112  */
    113 static
    114 void DRD_(barrier_initialize)(struct barrier_info* const p,
    115                               const Addr barrier,
    116                               const BarrierT barrier_type,
    117                               const Word count)
    118 {
    119    tl_assert(barrier != 0);
    120    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    121    tl_assert(p->a1 == barrier);
    122 
    123    p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
    124    p->delete_thread
    125       = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
    126    p->barrier_type      = barrier_type;
    127    p->count             = count;
    128    p->pre_iteration     = 0;
    129    p->post_iteration    = 0;
    130    p->pre_waiters_left  = count;
    131    p->post_waiters_left = count;
    132 
    133    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
    134    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
    135              >= sizeof(DrdThreadId));
    136    p->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
    137                                  VG_(free));
    138 }
    139 
    140 /**
    141  * Deallocate the memory owned by the struct barrier_info object and also
    142  * all the nodes in the OSet p->oset.
    143  *
    144  * Called by clientobj_destroy().
    145  */
    146 static void barrier_cleanup(struct barrier_info* p)
    147 {
    148    struct barrier_thread_info* q;
    149    Segment* latest_sg = 0;
    150 
    151    tl_assert(p);
    152 
    153    if (p->pre_waiters_left != p->count)
    154    {
    155       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    156       VG_(maybe_record_error)(VG_(get_running_tid)(),
    157                               BarrierErr,
    158                               VG_(get_IP)(VG_(get_running_tid)()),
    159                               "Destruction of barrier that is being waited"
    160                               " upon",
    161                               &bei);
    162    }
    163 
    164    DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
    165    tl_assert(latest_sg);
    166 
    167    VG_(OSetGen_ResetIter)(p->oset);
    168    for ( ; (q = VG_(OSetGen_Next)(p->oset)) != 0; )
    169    {
    170       if (q->post_wait_sg
    171           && ! DRD_(vc_lte)(&q->post_wait_sg->vc, &latest_sg->vc))
    172       {
    173          barrier_report_wait_delete_race(p, q);
    174       }
    175 
    176       DRD_(barrier_thread_destroy)(q);
    177    }
    178    VG_(OSetGen_Destroy)(p->oset);
    179 
    180    DRD_(sg_put)(latest_sg);
    181 }
    182 
    183 /**
    184  * Look up the client-side barrier address barrier in s_barrier[]. If not
    185  * found, add it.
    186  */
    187 static
    188 struct barrier_info*
    189 DRD_(barrier_get_or_allocate)(const Addr barrier,
    190                               const BarrierT barrier_type, const Word count)
    191 {
    192    struct barrier_info *p;
    193 
    194    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    195 
    196    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    197    p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    198    if (p == 0)
    199    {
    200       p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
    201       DRD_(barrier_initialize)(p, barrier, barrier_type, count);
    202    }
    203    return p;
    204 }
    205 
    206 /**
    207  * Look up the address of the information associated with the client-side
    208  * barrier object.
    209  */
    210 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
    211 {
    212    tl_assert(offsetof(DrdClientobj, barrier) == 0);
    213    return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
    214 }
    215 
    216 /**
    217  * Initialize a barrier with client address barrier, client size size, and
    218  * where count threads participate in each barrier.
    219  *
    220  * Called before pthread_barrier_init().
    221  */
    222 void DRD_(barrier_init)(const Addr barrier,
    223                         const BarrierT barrier_type, const Word count,
    224                         const Bool reinitialization)
    225 {
    226    struct barrier_info* p;
    227 
    228    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
    229 
    230    if (count == 0)
    231    {
    232       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    233       VG_(maybe_record_error)(VG_(get_running_tid)(),
    234                               BarrierErr,
    235                               VG_(get_IP)(VG_(get_running_tid)()),
    236                               "pthread_barrier_init: 'count' argument is zero",
    237                               &bei);
    238    }
    239 
    240    if (! reinitialization && barrier_type == pthread_barrier)
    241    {
    242       p = DRD_(barrier_get)(barrier);
    243       if (p)
    244       {
    245          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
    246          VG_(maybe_record_error)(VG_(get_running_tid)(),
    247                                  BarrierErr,
    248                                  VG_(get_IP)(VG_(get_running_tid)()),
    249                                  "Barrier reinitialization",
    250                                  &bei);
    251       }
    252    }
    253    p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
    254 
    255    if (s_trace_barrier)
    256    {
    257       if (reinitialization)
    258       {
    259          VG_(message)(Vg_UserMsg,
    260                       "[%d] barrier_reinit    %s 0x%lx count %ld -> %ld\n",
    261                       DRD_(thread_get_running_tid)(),
    262                       barrier_get_typename(p),
    263                       barrier,
    264                       p->count,
    265                       count);
    266       }
    267       else
    268       {
    269          VG_(message)(Vg_UserMsg,
    270                       "[%d] barrier_init      %s 0x%lx\n",
    271                       DRD_(thread_get_running_tid)(),
    272                       barrier_get_typename(p),
    273                       barrier);
    274       }
    275    }
    276 
    277    if (reinitialization && p->count != count)
    278    {
    279       if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    280       {
    281          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    282          VG_(maybe_record_error)(VG_(get_running_tid)(),
    283                                  BarrierErr,
    284                                  VG_(get_IP)(VG_(get_running_tid)()),
    285                                  "Reinitialization of barrier with active"
    286                                  " waiters",
    287                                  &bei);
    288       }
    289       p->count = count;
    290    }
    291 }
    292 
    293 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
    294 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
    295 {
    296    struct barrier_info* p;
    297 
    298    p = DRD_(barrier_get)(barrier);
    299 
    300    if (s_trace_barrier)
    301    {
    302       VG_(message)(Vg_UserMsg,
    303                    "[%d] barrier_destroy   %s 0x%lx\n",
    304                    DRD_(thread_get_running_tid)(),
    305                    barrier_get_typename(p),
    306                    barrier);
    307    }
    308 
    309    if (p == 0)
    310    {
    311       GenericErrInfo GEI = {
    312 	 .tid = DRD_(thread_get_running_tid)(),
    313 	 .addr = barrier,
    314       };
    315       VG_(maybe_record_error)(VG_(get_running_tid)(),
    316                               GenericErr,
    317                               VG_(get_IP)(VG_(get_running_tid)()),
    318                               "Not a barrier",
    319                               &GEI);
    320       return;
    321    }
    322 
    323    if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
    324    {
    325       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    326       VG_(maybe_record_error)(VG_(get_running_tid)(),
    327                               BarrierErr,
    328                               VG_(get_IP)(VG_(get_running_tid)()),
    329                               "Destruction of a barrier with active waiters",
    330                               &bei);
    331    }
    332 
    333    DRD_(clientobj_remove)(p->a1, ClientBarrier);
    334 }
    335 
    336 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
    337 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
    338                             const BarrierT barrier_type)
    339 {
    340    struct barrier_info* p;
    341    struct barrier_thread_info* q;
    342    const UWord word_tid = tid;
    343 
    344    p = DRD_(barrier_get)(barrier);
    345    if (p == 0 && barrier_type == gomp_barrier)
    346    {
    347       /*
    348        * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
    349        * not. The only cause I know of that can trigger this is that libgomp.so
    350        * has been compiled with --enable-linux-futex.
    351        */
    352       VG_(message)(Vg_UserMsg, "\n");
    353       VG_(message)(Vg_UserMsg,
    354                    "Please verify whether gcc has been configured"
    355                    " with option --disable-linux-futex.\n");
    356       VG_(message)(Vg_UserMsg,
    357                    "See also the section about OpenMP in the DRD manual.\n");
    358       VG_(message)(Vg_UserMsg, "\n");
    359    }
    360    tl_assert(p);
    361 
    362    if (s_trace_barrier)
    363    {
    364       VG_(message)(Vg_UserMsg,
    365                    "[%d] barrier_pre_wait  %s 0x%lx iteration %ld\n",
    366                    DRD_(thread_get_running_tid)(),
    367                    barrier_get_typename(p),
    368                    barrier,
    369                    p->pre_iteration);
    370    }
    371 
    372    /* Allocate the per-thread data structure if necessary. */
    373    q = VG_(OSetGen_Lookup)(p->oset, &word_tid);
    374    if (q == 0)
    375    {
    376       q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q));
    377       DRD_(barrier_thread_initialize)(q, tid, p->pre_iteration);
    378       VG_(OSetGen_Insert)(p->oset, q);
    379       tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q);
    380    }
    381 
    382    /* Record *_barrier_wait() call context. */
    383    q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
    384 
    385    /*
    386     * Store a pointer to the latest segment of the current thread in the
    387     * per-thread data structure.
    388     */
    389    DRD_(thread_get_latest_segment)(&q->sg[p->pre_iteration], tid);
    390 
    391    /*
    392     * If the same number of threads as the barrier count indicates have
    393     * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
    394     * reset the p->pre_waiters_left counter.
    395     */
    396    if (--p->pre_waiters_left <= 0)
    397    {
    398       p->pre_iteration    = 1 - p->pre_iteration;
    399       p->pre_waiters_left = p->count;
    400    }
    401 }
    402 
    403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
    404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
    405                              const BarrierT barrier_type, const Bool waited,
    406                              const Bool serializing)
    407 {
    408    struct barrier_info* p;
    409    const UWord word_tid = tid;
    410    struct barrier_thread_info* q;
    411    struct barrier_thread_info* r;
    412 
    413    p = DRD_(barrier_get)(barrier);
    414 
    415    if (s_trace_barrier)
    416    {
    417       VG_(message)(Vg_UserMsg,
    418                    "[%d] barrier_post_wait %s 0x%lx iteration %ld%s\n",
    419                    tid,
    420                    p ? barrier_get_typename(p) : "(?)",
    421                    barrier,
    422                    p ? p->post_iteration : -1,
    423                    serializing ? " (serializing)" : "");
    424    }
    425 
    426    /*
    427     * If p == 0, this means that the barrier has been destroyed after
    428     * *_barrier_wait() returned and before this function was called. Just
    429     * return in that case -- race conditions between *_barrier_wait()
    430     * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
    431     */
    432    if (p == 0)
    433       return;
    434 
    435    /* If the *_barrier_wait() call returned an error code, exit. */
    436    if (! waited)
    437       return;
    438 
    439    q = VG_(OSetGen_Lookup)(p->oset, &word_tid);
    440    if (q == 0)
    441    {
    442       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
    443       VG_(maybe_record_error)(VG_(get_running_tid)(),
    444                               BarrierErr,
    445                               VG_(get_IP)(VG_(get_running_tid)()),
    446                               "Error in barrier implementation"
    447                               " -- barrier_wait() started before"
    448                               " barrier_destroy() and finished after"
    449                               " barrier_destroy()",
    450                               &bei);
    451 
    452       q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q));
    453       DRD_(barrier_thread_initialize)(q, tid, p->pre_iteration);
    454       VG_(OSetGen_Insert)(p->oset, q);
    455       tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q);
    456    }
    457 
    458    /* Create a new segment and store a pointer to that segment. */
    459    DRD_(thread_new_segment)(tid);
    460    DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
    461    s_barrier_segment_creation_count++;
    462 
    463    /*
    464     * Combine all vector clocks that were stored in the pre_barrier_wait
    465     * wrapper with the vector clock of the current thread.
    466     */
    467    {
    468       VectorClock old_vc;
    469 
    470       DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc);
    471       VG_(OSetGen_ResetIter)(p->oset);
    472       for ( ; (r = VG_(OSetGen_Next)(p->oset)) != 0; )
    473       {
    474          if (r != q)
    475          {
    476             tl_assert(r->sg[p->post_iteration]);
    477             DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc,
    478                              &r->sg[p->post_iteration]->vc);
    479          }
    480       }
    481       DRD_(thread_update_conflict_set)(tid, &old_vc);
    482       DRD_(vc_cleanup)(&old_vc);
    483    }
    484 
    485    /*
    486     * If the same number of threads as the barrier count indicates have
    487     * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
    488     * reset the p->post_waiters_left counter.
    489     */
    490    if (--p->post_waiters_left <= 0)
    491    {
    492       p->post_iteration    = 1 - p->post_iteration;
    493       p->post_waiters_left = p->count;
    494    }
    495 }
    496 
    497 /** Called when thread tid stops to exist. */
    498 static void barrier_delete_thread(struct barrier_info* const p,
    499                                   const DrdThreadId tid)
    500 {
    501    struct barrier_thread_info* q;
    502    const UWord word_tid = tid;
    503 
    504    q = VG_(OSetGen_Remove)(p->oset, &word_tid);
    505 
    506    /*
    507     * q is only non-zero if the barrier object has been used by thread tid
    508     * after the barrier_init() call and before the thread finished.
    509     */
    510    if (q)
    511    {
    512       DRD_(barrier_thread_destroy)(q);
    513       VG_(OSetGen_FreeNode)(p->oset, q);
    514    }
    515 }
    516 
    517 /**
    518  * Report that *_barrier_destroy() has been called but that this call was
    519  * not synchronized with the last *_barrier_wait() call on the same barrier.
    520  *
    521  * This topic has been discussed extensively on comp.programming.threads
    522  * (February 3, 2009). See also
    523  * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
    524  */
    525 static
    526 void barrier_report_wait_delete_race(const struct barrier_info* const p,
    527                                      const struct barrier_thread_info* const q)
    528 {
    529    tl_assert(p);
    530    tl_assert(q);
    531 
    532    {
    533       BarrierErrInfo bei
    534          = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
    535       VG_(maybe_record_error)(VG_(get_running_tid)(),
    536                               BarrierErr,
    537                               VG_(get_IP)(VG_(get_running_tid)()),
    538                               "Destruction of barrier not synchronized with"
    539                               " barrier wait call",
    540                               &bei);
    541    }
    542 }
    543 
    544 static const char* barrier_get_typename(struct barrier_info* const p)
    545 {
    546    tl_assert(p);
    547 
    548    return barrier_type_name(p->barrier_type);
    549 }
    550 
    551 static const char* barrier_type_name(const BarrierT bt)
    552 {
    553    switch (bt)
    554    {
    555    case pthread_barrier:
    556       return "pthread barrier";
    557    case gomp_barrier:
    558       return "gomp barrier";
    559    }
    560    return "?";
    561 }
    562 
    563 ULong DRD_(get_barrier_segment_creation_count)(void)
    564 {
    565    return s_barrier_segment_creation_count;
    566 }
    567