1 /* -*- mode: C; c-basic-offset: 3; -*- */ 2 /* 3 This file is part of drd, a thread error detector. 4 5 Copyright (C) 2006-2010 Bart Van Assche <bvanassche (at) acm.org>. 6 7 This program is free software; you can redistribute it and/or 8 modify it under the terms of the GNU General Public License as 9 published by the Free Software Foundation; either version 2 of the 10 License, or (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20 02111-1307, USA. 21 22 The GNU General Public License is contained in the file COPYING. 23 */ 24 25 26 #include "drd_barrier.h" 27 #include "drd_clientobj.h" 28 #include "drd_error.h" 29 #include "drd_suppression.h" 30 #include "pub_tool_errormgr.h" // VG_(maybe_record_error)() 31 #include "pub_tool_libcassert.h" // tl_assert() 32 #include "pub_tool_libcprint.h" // VG_(printf)() 33 #include "pub_tool_machine.h" // VG_(get_IP)() 34 #include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)() 35 #include "pub_tool_oset.h" 36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)() 37 38 39 /* Type definitions. */ 40 41 /** Information associated with one thread participating in a barrier. */ 42 struct barrier_thread_info 43 { 44 UWord tid; // A DrdThreadId declared as UWord because 45 // this member variable is the key of an OSet. 46 Word iteration; // iteration of last pthread_barrier_wait() 47 // call thread tid participated in. 48 Segment* sg[2]; // Segments of the last two 49 // pthread_barrier() calls by thread tid. 50 ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call. 51 Segment* post_wait_sg; // Segment created after *_barrier_wait() finished 52 }; 53 54 55 /* Local functions. */ 56 57 static void barrier_cleanup(struct barrier_info* p); 58 static void barrier_delete_thread(struct barrier_info* const p, 59 const DrdThreadId tid); 60 static const char* barrier_get_typename(struct barrier_info* const p); 61 static const char* barrier_type_name(const BarrierT bt); 62 static 63 void barrier_report_wait_delete_race(const struct barrier_info* const p, 64 const struct barrier_thread_info* const q); 65 66 67 /* Local variables. */ 68 69 static Bool s_trace_barrier = False; 70 static ULong s_barrier_segment_creation_count; 71 72 73 /* Function definitions. */ 74 75 void DRD_(barrier_set_trace)(const Bool trace_barrier) 76 { 77 s_trace_barrier = trace_barrier; 78 } 79 80 /** 81 * Initialize the structure *p with the specified thread ID and iteration 82 * information. 83 */ 84 static 85 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p, 86 const DrdThreadId tid, 87 const Word iteration) 88 { 89 p->tid = tid; 90 p->iteration = iteration; 91 p->sg[0] = 0; 92 p->sg[1] = 0; 93 p->wait_call_ctxt = 0; 94 p->post_wait_sg = 0; 95 } 96 97 /** 98 * Deallocate the memory that is owned by members of 99 * struct barrier_thread_info. 100 */ 101 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p) 102 { 103 tl_assert(p); 104 DRD_(sg_put)(p->sg[0]); 105 DRD_(sg_put)(p->sg[1]); 106 DRD_(sg_put)(p->post_wait_sg); 107 } 108 109 /** 110 * Initialize the structure *p with the specified client-side barrier address, 111 * barrier object size and number of participants in each barrier. 112 */ 113 static 114 void DRD_(barrier_initialize)(struct barrier_info* const p, 115 const Addr barrier, 116 const BarrierT barrier_type, 117 const Word count) 118 { 119 tl_assert(barrier != 0); 120 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 121 tl_assert(p->a1 == barrier); 122 123 p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup; 124 p->delete_thread 125 = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread; 126 p->barrier_type = barrier_type; 127 p->count = count; 128 p->pre_iteration = 0; 129 p->post_iteration = 0; 130 p->pre_waiters_left = count; 131 p->post_waiters_left = count; 132 133 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word)); 134 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) 135 >= sizeof(DrdThreadId)); 136 p->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1", 137 VG_(free)); 138 } 139 140 /** 141 * Deallocate the memory owned by the struct barrier_info object and also 142 * all the nodes in the OSet p->oset. 143 * 144 * Called by clientobj_destroy(). 145 */ 146 static void barrier_cleanup(struct barrier_info* p) 147 { 148 struct barrier_thread_info* q; 149 Segment* latest_sg = 0; 150 151 tl_assert(p); 152 153 if (p->pre_waiters_left != p->count) 154 { 155 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 156 VG_(maybe_record_error)(VG_(get_running_tid)(), 157 BarrierErr, 158 VG_(get_IP)(VG_(get_running_tid)()), 159 "Destruction of barrier that is being waited" 160 " upon", 161 &bei); 162 } 163 164 DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)()); 165 tl_assert(latest_sg); 166 167 VG_(OSetGen_ResetIter)(p->oset); 168 for ( ; (q = VG_(OSetGen_Next)(p->oset)) != 0; ) 169 { 170 if (q->post_wait_sg 171 && ! DRD_(vc_lte)(&q->post_wait_sg->vc, &latest_sg->vc)) 172 { 173 barrier_report_wait_delete_race(p, q); 174 } 175 176 DRD_(barrier_thread_destroy)(q); 177 } 178 VG_(OSetGen_Destroy)(p->oset); 179 180 DRD_(sg_put)(latest_sg); 181 } 182 183 /** 184 * Look up the client-side barrier address barrier in s_barrier[]. If not 185 * found, add it. 186 */ 187 static 188 struct barrier_info* 189 DRD_(barrier_get_or_allocate)(const Addr barrier, 190 const BarrierT barrier_type, const Word count) 191 { 192 struct barrier_info *p; 193 194 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 195 196 tl_assert(offsetof(DrdClientobj, barrier) == 0); 197 p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 198 if (p == 0) 199 { 200 p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier); 201 DRD_(barrier_initialize)(p, barrier, barrier_type, count); 202 } 203 return p; 204 } 205 206 /** 207 * Look up the address of the information associated with the client-side 208 * barrier object. 209 */ 210 static struct barrier_info* DRD_(barrier_get)(const Addr barrier) 211 { 212 tl_assert(offsetof(DrdClientobj, barrier) == 0); 213 return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 214 } 215 216 /** 217 * Initialize a barrier with client address barrier, client size size, and 218 * where count threads participate in each barrier. 219 * 220 * Called before pthread_barrier_init(). 221 */ 222 void DRD_(barrier_init)(const Addr barrier, 223 const BarrierT barrier_type, const Word count, 224 const Bool reinitialization) 225 { 226 struct barrier_info* p; 227 228 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 229 230 if (count == 0) 231 { 232 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 233 VG_(maybe_record_error)(VG_(get_running_tid)(), 234 BarrierErr, 235 VG_(get_IP)(VG_(get_running_tid)()), 236 "pthread_barrier_init: 'count' argument is zero", 237 &bei); 238 } 239 240 if (! reinitialization && barrier_type == pthread_barrier) 241 { 242 p = DRD_(barrier_get)(barrier); 243 if (p) 244 { 245 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 246 VG_(maybe_record_error)(VG_(get_running_tid)(), 247 BarrierErr, 248 VG_(get_IP)(VG_(get_running_tid)()), 249 "Barrier reinitialization", 250 &bei); 251 } 252 } 253 p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count); 254 255 if (s_trace_barrier) 256 { 257 if (reinitialization) 258 { 259 VG_(message)(Vg_UserMsg, 260 "[%d] barrier_reinit %s 0x%lx count %ld -> %ld\n", 261 DRD_(thread_get_running_tid)(), 262 barrier_get_typename(p), 263 barrier, 264 p->count, 265 count); 266 } 267 else 268 { 269 VG_(message)(Vg_UserMsg, 270 "[%d] barrier_init %s 0x%lx\n", 271 DRD_(thread_get_running_tid)(), 272 barrier_get_typename(p), 273 barrier); 274 } 275 } 276 277 if (reinitialization && p->count != count) 278 { 279 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 280 { 281 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 282 VG_(maybe_record_error)(VG_(get_running_tid)(), 283 BarrierErr, 284 VG_(get_IP)(VG_(get_running_tid)()), 285 "Reinitialization of barrier with active" 286 " waiters", 287 &bei); 288 } 289 p->count = count; 290 } 291 } 292 293 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */ 294 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type) 295 { 296 struct barrier_info* p; 297 298 p = DRD_(barrier_get)(barrier); 299 300 if (s_trace_barrier) 301 { 302 VG_(message)(Vg_UserMsg, 303 "[%d] barrier_destroy %s 0x%lx\n", 304 DRD_(thread_get_running_tid)(), 305 barrier_get_typename(p), 306 barrier); 307 } 308 309 if (p == 0) 310 { 311 GenericErrInfo GEI = { 312 .tid = DRD_(thread_get_running_tid)(), 313 .addr = barrier, 314 }; 315 VG_(maybe_record_error)(VG_(get_running_tid)(), 316 GenericErr, 317 VG_(get_IP)(VG_(get_running_tid)()), 318 "Not a barrier", 319 &GEI); 320 return; 321 } 322 323 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 324 { 325 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 326 VG_(maybe_record_error)(VG_(get_running_tid)(), 327 BarrierErr, 328 VG_(get_IP)(VG_(get_running_tid)()), 329 "Destruction of a barrier with active waiters", 330 &bei); 331 } 332 333 DRD_(clientobj_remove)(p->a1, ClientBarrier); 334 } 335 336 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */ 337 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, 338 const BarrierT barrier_type) 339 { 340 struct barrier_info* p; 341 struct barrier_thread_info* q; 342 const UWord word_tid = tid; 343 344 p = DRD_(barrier_get)(barrier); 345 if (p == 0 && barrier_type == gomp_barrier) 346 { 347 /* 348 * gomp_barrier_wait() call has been intercepted but gomp_barrier_init() 349 * not. The only cause I know of that can trigger this is that libgomp.so 350 * has been compiled with --enable-linux-futex. 351 */ 352 VG_(message)(Vg_UserMsg, "\n"); 353 VG_(message)(Vg_UserMsg, 354 "Please verify whether gcc has been configured" 355 " with option --disable-linux-futex.\n"); 356 VG_(message)(Vg_UserMsg, 357 "See also the section about OpenMP in the DRD manual.\n"); 358 VG_(message)(Vg_UserMsg, "\n"); 359 } 360 tl_assert(p); 361 362 if (s_trace_barrier) 363 { 364 VG_(message)(Vg_UserMsg, 365 "[%d] barrier_pre_wait %s 0x%lx iteration %ld\n", 366 DRD_(thread_get_running_tid)(), 367 barrier_get_typename(p), 368 barrier, 369 p->pre_iteration); 370 } 371 372 /* Allocate the per-thread data structure if necessary. */ 373 q = VG_(OSetGen_Lookup)(p->oset, &word_tid); 374 if (q == 0) 375 { 376 q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q)); 377 DRD_(barrier_thread_initialize)(q, tid, p->pre_iteration); 378 VG_(OSetGen_Insert)(p->oset, q); 379 tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q); 380 } 381 382 /* Record *_barrier_wait() call context. */ 383 q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0); 384 385 /* 386 * Store a pointer to the latest segment of the current thread in the 387 * per-thread data structure. 388 */ 389 DRD_(thread_get_latest_segment)(&q->sg[p->pre_iteration], tid); 390 391 /* 392 * If the same number of threads as the barrier count indicates have 393 * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and 394 * reset the p->pre_waiters_left counter. 395 */ 396 if (--p->pre_waiters_left <= 0) 397 { 398 p->pre_iteration = 1 - p->pre_iteration; 399 p->pre_waiters_left = p->count; 400 } 401 } 402 403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */ 404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, 405 const BarrierT barrier_type, const Bool waited, 406 const Bool serializing) 407 { 408 struct barrier_info* p; 409 const UWord word_tid = tid; 410 struct barrier_thread_info* q; 411 struct barrier_thread_info* r; 412 413 p = DRD_(barrier_get)(barrier); 414 415 if (s_trace_barrier) 416 { 417 VG_(message)(Vg_UserMsg, 418 "[%d] barrier_post_wait %s 0x%lx iteration %ld%s\n", 419 tid, 420 p ? barrier_get_typename(p) : "(?)", 421 barrier, 422 p ? p->post_iteration : -1, 423 serializing ? " (serializing)" : ""); 424 } 425 426 /* 427 * If p == 0, this means that the barrier has been destroyed after 428 * *_barrier_wait() returned and before this function was called. Just 429 * return in that case -- race conditions between *_barrier_wait() 430 * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper. 431 */ 432 if (p == 0) 433 return; 434 435 /* If the *_barrier_wait() call returned an error code, exit. */ 436 if (! waited) 437 return; 438 439 q = VG_(OSetGen_Lookup)(p->oset, &word_tid); 440 if (q == 0) 441 { 442 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 443 VG_(maybe_record_error)(VG_(get_running_tid)(), 444 BarrierErr, 445 VG_(get_IP)(VG_(get_running_tid)()), 446 "Error in barrier implementation" 447 " -- barrier_wait() started before" 448 " barrier_destroy() and finished after" 449 " barrier_destroy()", 450 &bei); 451 452 q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q)); 453 DRD_(barrier_thread_initialize)(q, tid, p->pre_iteration); 454 VG_(OSetGen_Insert)(p->oset, q); 455 tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q); 456 } 457 458 /* Create a new segment and store a pointer to that segment. */ 459 DRD_(thread_new_segment)(tid); 460 DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid); 461 s_barrier_segment_creation_count++; 462 463 /* 464 * Combine all vector clocks that were stored in the pre_barrier_wait 465 * wrapper with the vector clock of the current thread. 466 */ 467 { 468 VectorClock old_vc; 469 470 DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc); 471 VG_(OSetGen_ResetIter)(p->oset); 472 for ( ; (r = VG_(OSetGen_Next)(p->oset)) != 0; ) 473 { 474 if (r != q) 475 { 476 tl_assert(r->sg[p->post_iteration]); 477 DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc, 478 &r->sg[p->post_iteration]->vc); 479 } 480 } 481 DRD_(thread_update_conflict_set)(tid, &old_vc); 482 DRD_(vc_cleanup)(&old_vc); 483 } 484 485 /* 486 * If the same number of threads as the barrier count indicates have 487 * called the post *_barrier_wait() wrapper, toggle p->post_iteration and 488 * reset the p->post_waiters_left counter. 489 */ 490 if (--p->post_waiters_left <= 0) 491 { 492 p->post_iteration = 1 - p->post_iteration; 493 p->post_waiters_left = p->count; 494 } 495 } 496 497 /** Called when thread tid stops to exist. */ 498 static void barrier_delete_thread(struct barrier_info* const p, 499 const DrdThreadId tid) 500 { 501 struct barrier_thread_info* q; 502 const UWord word_tid = tid; 503 504 q = VG_(OSetGen_Remove)(p->oset, &word_tid); 505 506 /* 507 * q is only non-zero if the barrier object has been used by thread tid 508 * after the barrier_init() call and before the thread finished. 509 */ 510 if (q) 511 { 512 DRD_(barrier_thread_destroy)(q); 513 VG_(OSetGen_FreeNode)(p->oset, q); 514 } 515 } 516 517 /** 518 * Report that *_barrier_destroy() has been called but that this call was 519 * not synchronized with the last *_barrier_wait() call on the same barrier. 520 * 521 * This topic has been discussed extensively on comp.programming.threads 522 * (February 3, 2009). See also 523 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>. 524 */ 525 static 526 void barrier_report_wait_delete_race(const struct barrier_info* const p, 527 const struct barrier_thread_info* const q) 528 { 529 tl_assert(p); 530 tl_assert(q); 531 532 { 533 BarrierErrInfo bei 534 = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt }; 535 VG_(maybe_record_error)(VG_(get_running_tid)(), 536 BarrierErr, 537 VG_(get_IP)(VG_(get_running_tid)()), 538 "Destruction of barrier not synchronized with" 539 " barrier wait call", 540 &bei); 541 } 542 } 543 544 static const char* barrier_get_typename(struct barrier_info* const p) 545 { 546 tl_assert(p); 547 548 return barrier_type_name(p->barrier_type); 549 } 550 551 static const char* barrier_type_name(const BarrierT bt) 552 { 553 switch (bt) 554 { 555 case pthread_barrier: 556 return "pthread barrier"; 557 case gomp_barrier: 558 return "gomp barrier"; 559 } 560 return "?"; 561 } 562 563 ULong DRD_(get_barrier_segment_creation_count)(void) 564 { 565 return s_barrier_segment_creation_count; 566 } 567