1 /* -*- mode: C; c-basic-offset: 3; indent-tabs-mode: nil; -*- */ 2 /* 3 This file is part of drd, a thread error detector. 4 5 Copyright (C) 2006-2011 Bart Van Assche <bvanassche (at) acm.org>. 6 7 This program is free software; you can redistribute it and/or 8 modify it under the terms of the GNU General Public License as 9 published by the Free Software Foundation; either version 2 of the 10 License, or (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20 02111-1307, USA. 21 22 The GNU General Public License is contained in the file COPYING. 23 */ 24 25 26 #include "drd_barrier.h" 27 #include "drd_clientobj.h" 28 #include "drd_error.h" 29 #include "drd_suppression.h" 30 #include "pub_tool_errormgr.h" // VG_(maybe_record_error)() 31 #include "pub_tool_libcassert.h" // tl_assert() 32 #include "pub_tool_libcprint.h" // VG_(printf)() 33 #include "pub_tool_machine.h" // VG_(get_IP)() 34 #include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)() 35 #include "pub_tool_oset.h" 36 #include "pub_tool_threadstate.h" // VG_(get_running_tid)() 37 38 39 /* Type definitions. */ 40 41 /** Information associated with one thread participating in a barrier. */ 42 struct barrier_thread_info 43 { 44 UWord tid; // A DrdThreadId declared as UWord because 45 // this member variable is the key of an OSet. 46 Segment* sg; // Segment of the last pthread_barrier() call 47 // by thread tid. 48 Segment* post_wait_sg; // Segment created after *_barrier_wait() finished 49 ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call. 50 Bool thread_finished;// Whether thread 'tid' has finished. 51 }; 52 53 54 /* Local functions. */ 55 56 static void barrier_cleanup(struct barrier_info* p); 57 static void barrier_delete_thread(struct barrier_info* const p, 58 const DrdThreadId tid); 59 static const char* barrier_get_typename(struct barrier_info* const p); 60 static const char* barrier_type_name(const BarrierT bt); 61 static 62 void barrier_report_wait_delete_race(const struct barrier_info* const p, 63 const struct barrier_thread_info* const q); 64 65 66 /* Local variables. */ 67 68 static Bool s_trace_barrier = False; 69 static ULong s_barrier_segment_creation_count; 70 71 72 /* Function definitions. */ 73 74 void DRD_(barrier_set_trace)(const Bool trace_barrier) 75 { 76 s_trace_barrier = trace_barrier; 77 } 78 79 /** 80 * Initialize the structure *p with the specified thread ID and iteration 81 * information. 82 */ 83 static 84 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p, 85 const DrdThreadId tid) 86 { 87 p->tid = tid; 88 p->sg = NULL; 89 p->post_wait_sg = 0; 90 p->wait_call_ctxt = 0; 91 p->thread_finished = False; 92 } 93 94 /** 95 * Deallocate the memory that is owned by members of 96 * struct barrier_thread_info. 97 */ 98 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p) 99 { 100 tl_assert(p); 101 DRD_(sg_put)(p->sg); 102 DRD_(sg_put)(p->post_wait_sg); 103 } 104 105 /** 106 * Initialize the structure *p with the specified client-side barrier address, 107 * barrier object size and number of participants in each barrier. 108 */ 109 static 110 void DRD_(barrier_initialize)(struct barrier_info* const p, 111 const Addr barrier, 112 const BarrierT barrier_type, 113 const Word count) 114 { 115 int i; 116 117 tl_assert(barrier != 0); 118 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 119 tl_assert(p->a1 == barrier); 120 121 p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup; 122 p->delete_thread 123 = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread; 124 p->barrier_type = barrier_type; 125 p->count = count; 126 p->pre_iteration = 0; 127 p->post_iteration = 0; 128 p->pre_waiters_left = count; 129 p->post_waiters_left = count; 130 131 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word)); 132 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) 133 >= sizeof(DrdThreadId)); 134 for (i = 0; i < 2; i++) { 135 p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1", 136 VG_(free)); 137 } 138 } 139 140 /** 141 * Deallocate the memory owned by the struct barrier_info object and also 142 * all the nodes in the OSet p->oset. 143 * 144 * Called by clientobj_destroy(). 145 */ 146 static void barrier_cleanup(struct barrier_info* p) 147 { 148 struct barrier_thread_info* q; 149 Segment* latest_sg = 0; 150 OSet* oset; 151 int i; 152 153 tl_assert(p); 154 155 DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)()); 156 tl_assert(latest_sg); 157 158 if (p->pre_waiters_left != p->count) { 159 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 160 VG_(maybe_record_error)(VG_(get_running_tid)(), 161 BarrierErr, 162 VG_(get_IP)(VG_(get_running_tid)()), 163 "Destruction of barrier that is being waited" 164 " upon", 165 &bei); 166 } else { 167 oset = p->oset[1 - (p->pre_iteration & 1)]; 168 VG_(OSetGen_ResetIter)(oset); 169 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { 170 if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc, 171 &latest_sg->vc)) 172 { 173 barrier_report_wait_delete_race(p, q); 174 } 175 DRD_(barrier_thread_destroy)(q); 176 } 177 } 178 179 for (i = 0; i < 2; i++) { 180 VG_(OSetGen_Destroy)(p->oset[i]); 181 p->oset[i] = NULL; 182 } 183 184 DRD_(sg_put)(latest_sg); 185 } 186 187 /** 188 * Look up the client-side barrier address barrier in s_barrier[]. If not 189 * found, add it. 190 */ 191 static 192 struct barrier_info* 193 DRD_(barrier_get_or_allocate)(const Addr barrier, 194 const BarrierT barrier_type, const Word count) 195 { 196 struct barrier_info *p; 197 198 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 199 200 tl_assert(offsetof(DrdClientobj, barrier) == 0); 201 p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 202 if (p == 0) 203 { 204 p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier); 205 DRD_(barrier_initialize)(p, barrier, barrier_type, count); 206 } 207 return p; 208 } 209 210 /** 211 * Look up the address of the struct barrier_info associated with the 212 * client-side barrier object. 213 */ 214 static struct barrier_info* DRD_(barrier_get)(const Addr barrier) 215 { 216 tl_assert(offsetof(DrdClientobj, barrier) == 0); 217 return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 218 } 219 220 /** 221 * Initialize a barrier with given client address, barrier type and number of 222 * participants. The 'reinitialization' argument indicates whether a barrier 223 * object is being initialized or reinitialized. 224 * 225 * Called before pthread_barrier_init(). 226 */ 227 void DRD_(barrier_init)(const Addr barrier, 228 const BarrierT barrier_type, const Word count, 229 const Bool reinitialization) 230 { 231 struct barrier_info* p; 232 233 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 234 235 if (count == 0) 236 { 237 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 238 VG_(maybe_record_error)(VG_(get_running_tid)(), 239 BarrierErr, 240 VG_(get_IP)(VG_(get_running_tid)()), 241 "pthread_barrier_init: 'count' argument is zero", 242 &bei); 243 } 244 245 if (! reinitialization && barrier_type == pthread_barrier) 246 { 247 p = DRD_(barrier_get)(barrier); 248 if (p) 249 { 250 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 251 VG_(maybe_record_error)(VG_(get_running_tid)(), 252 BarrierErr, 253 VG_(get_IP)(VG_(get_running_tid)()), 254 "Barrier reinitialization", 255 &bei); 256 } 257 } 258 259 p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count); 260 261 if (s_trace_barrier) { 262 if (reinitialization) 263 DRD_(trace_msg)("[%d] barrier_reinit %s 0x%lx count %ld -> %ld", 264 DRD_(thread_get_running_tid)(), 265 barrier_get_typename(p), barrier, p->count, count); 266 else 267 DRD_(trace_msg)("[%d] barrier_init %s 0x%lx", 268 DRD_(thread_get_running_tid)(), 269 barrier_get_typename(p), 270 barrier); 271 } 272 273 if (reinitialization && p->count != count) 274 { 275 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 276 { 277 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 278 VG_(maybe_record_error)(VG_(get_running_tid)(), 279 BarrierErr, 280 VG_(get_IP)(VG_(get_running_tid)()), 281 "Reinitialization of barrier with active" 282 " waiters", 283 &bei); 284 } 285 p->count = count; 286 } 287 } 288 289 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */ 290 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type) 291 { 292 struct barrier_info* p; 293 294 p = DRD_(barrier_get)(barrier); 295 296 if (s_trace_barrier) 297 DRD_(trace_msg)("[%d] barrier_destroy %s 0x%lx", 298 DRD_(thread_get_running_tid)(), 299 barrier_get_typename(p), barrier); 300 301 if (p == 0) 302 { 303 GenericErrInfo GEI = { 304 .tid = DRD_(thread_get_running_tid)(), 305 .addr = barrier, 306 }; 307 VG_(maybe_record_error)(VG_(get_running_tid)(), 308 GenericErr, 309 VG_(get_IP)(VG_(get_running_tid)()), 310 "Not a barrier", 311 &GEI); 312 return; 313 } 314 315 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 316 { 317 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 318 VG_(maybe_record_error)(VG_(get_running_tid)(), 319 BarrierErr, 320 VG_(get_IP)(VG_(get_running_tid)()), 321 "Destruction of a barrier with active waiters", 322 &bei); 323 } 324 325 DRD_(clientobj_remove)(p->a1, ClientBarrier); 326 } 327 328 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */ 329 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, 330 const BarrierT barrier_type) 331 { 332 struct barrier_info* p; 333 struct barrier_thread_info* q; 334 const UWord word_tid = tid; 335 OSet* oset; 336 337 p = DRD_(barrier_get)(barrier); 338 if (p == 0 && barrier_type == gomp_barrier) { 339 /* 340 * gomp_barrier_wait() call has been intercepted but gomp_barrier_init() 341 * not. The only cause I know of that can trigger this is that libgomp.so 342 * has been compiled with --enable-linux-futex. 343 */ 344 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 }; 345 VG_(maybe_record_error)(VG_(get_running_tid)(), 346 BarrierErr, 347 VG_(get_IP)(VG_(get_running_tid)()), 348 "Please verify whether gcc has been configured" 349 " with option --disable-linux-futex. See also" 350 " the section about OpenMP in the DRD manual.", 351 &bei); 352 } 353 tl_assert(p); 354 355 if (s_trace_barrier) 356 DRD_(trace_msg)("[%d] barrier_pre_wait %s 0x%lx iteration %ld", 357 DRD_(thread_get_running_tid)(), 358 barrier_get_typename(p), barrier, p->pre_iteration); 359 360 /* Clean up nodes associated with finished threads. */ 361 oset = p->oset[p->pre_iteration & 1]; 362 tl_assert(oset); 363 VG_(OSetGen_ResetIter)(oset); 364 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { 365 if (q->thread_finished) { 366 void* r = VG_(OSetGen_Remove)(oset, &q->tid); 367 tl_assert(r == q); 368 DRD_(barrier_thread_destroy)(q); 369 VG_(OSetGen_FreeNode)(oset, q); 370 VG_(OSetGen_ResetIterAt)(oset, &word_tid); 371 } 372 } 373 /* Allocate the per-thread data structure if necessary. */ 374 q = VG_(OSetGen_Lookup)(oset, &word_tid); 375 if (q == NULL) { 376 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); 377 DRD_(barrier_thread_initialize)(q, tid); 378 VG_(OSetGen_Insert)(oset, q); 379 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); 380 } 381 382 /* Record *_barrier_wait() call context. */ 383 q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0); 384 385 /* 386 * Store a pointer to the latest segment of the current thread in the 387 * per-thread data structure. 388 */ 389 DRD_(thread_get_latest_segment)(&q->sg, tid); 390 391 /* 392 * If the same number of threads as the barrier count indicates have 393 * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and 394 * reset the p->pre_waiters_left counter. 395 */ 396 if (--p->pre_waiters_left <= 0) 397 { 398 p->pre_iteration++; 399 p->pre_waiters_left = p->count; 400 } 401 } 402 403 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */ 404 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, 405 const BarrierT barrier_type, const Bool waited, 406 const Bool serializing) 407 { 408 struct barrier_info* p; 409 const UWord word_tid = tid; 410 struct barrier_thread_info* q; 411 struct barrier_thread_info* r; 412 OSet* oset; 413 414 p = DRD_(barrier_get)(barrier); 415 416 if (s_trace_barrier) 417 DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s", 418 tid, p ? barrier_get_typename(p) : "(?)", 419 barrier, p ? p->post_iteration : -1, 420 serializing ? " (serializing)" : ""); 421 422 /* 423 * If p == 0, this means that the barrier has been destroyed after 424 * *_barrier_wait() returned and before this function was called. Just 425 * return in that case -- race conditions between *_barrier_wait() 426 * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper. 427 */ 428 if (p == 0) 429 return; 430 431 /* If the *_barrier_wait() call returned an error code, exit. */ 432 if (! waited) 433 return; 434 435 oset = p->oset[p->post_iteration & 1]; 436 q = VG_(OSetGen_Lookup)(oset, &word_tid); 437 if (p->pre_iteration - p->post_iteration > 1) { 438 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 439 VG_(maybe_record_error)(VG_(get_running_tid)(), 440 BarrierErr, 441 VG_(get_IP)(VG_(get_running_tid)()), 442 "Number of concurrent pthread_barrier_wait()" 443 " calls exceeds the barrier count", 444 &bei); 445 } else if (q == NULL) { 446 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 447 VG_(maybe_record_error)(VG_(get_running_tid)(), 448 BarrierErr, 449 VG_(get_IP)(VG_(get_running_tid)()), 450 "Error in barrier implementation" 451 " -- barrier_wait() started before" 452 " barrier_destroy() and finished after" 453 " barrier_destroy()", 454 &bei); 455 } 456 if (q == NULL) { 457 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); 458 DRD_(barrier_thread_initialize)(q, tid); 459 VG_(OSetGen_Insert)(oset, q); 460 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); 461 DRD_(thread_get_latest_segment)(&q->sg, tid); 462 } 463 464 /* Create a new segment and store a pointer to that segment. */ 465 DRD_(thread_new_segment)(tid); 466 DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid); 467 s_barrier_segment_creation_count++; 468 469 /* 470 * Combine all vector clocks that were stored in the pre_barrier_wait 471 * wrapper with the vector clock of the current thread. 472 */ 473 { 474 VectorClock old_vc; 475 476 DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc); 477 VG_(OSetGen_ResetIter)(oset); 478 for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; ) 479 { 480 if (r != q) 481 { 482 tl_assert(r->sg); 483 DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc, 484 &r->sg->vc); 485 } 486 } 487 DRD_(thread_update_conflict_set)(tid, &old_vc); 488 DRD_(vc_cleanup)(&old_vc); 489 } 490 491 /* 492 * If the same number of threads as the barrier count indicates have 493 * called the post *_barrier_wait() wrapper, toggle p->post_iteration and 494 * reset the p->post_waiters_left counter. 495 */ 496 if (--p->post_waiters_left <= 0) 497 { 498 p->post_iteration++; 499 p->post_waiters_left = p->count; 500 } 501 } 502 503 /** Called when thread tid stops to exist. */ 504 static void barrier_delete_thread(struct barrier_info* const p, 505 const DrdThreadId tid) 506 { 507 struct barrier_thread_info* q; 508 const UWord word_tid = tid; 509 int i; 510 511 for (i = 0; i < 2; i++) { 512 q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid); 513 if (q) 514 q->thread_finished = True; 515 } 516 } 517 518 /** 519 * Report that *_barrier_destroy() has been called but that this call was 520 * not synchronized with the last *_barrier_wait() call on the same barrier. 521 * 522 * This topic has been discussed extensively on comp.programming.threads 523 * (February 3, 2009). See also 524 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>. 525 */ 526 static 527 void barrier_report_wait_delete_race(const struct barrier_info* const p, 528 const struct barrier_thread_info* const q) 529 { 530 tl_assert(p); 531 tl_assert(q); 532 533 { 534 BarrierErrInfo bei 535 = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt }; 536 VG_(maybe_record_error)(VG_(get_running_tid)(), 537 BarrierErr, 538 VG_(get_IP)(VG_(get_running_tid)()), 539 "Destruction of barrier not synchronized with" 540 " barrier wait call", 541 &bei); 542 } 543 } 544 545 static const char* barrier_get_typename(struct barrier_info* const p) 546 { 547 tl_assert(p); 548 549 return barrier_type_name(p->barrier_type); 550 } 551 552 static const char* barrier_type_name(const BarrierT bt) 553 { 554 switch (bt) 555 { 556 case pthread_barrier: 557 return "pthread barrier"; 558 case gomp_barrier: 559 return "gomp barrier"; 560 } 561 return "?"; 562 } 563 564 ULong DRD_(get_barrier_segment_creation_count)(void) 565 { 566 return s_barrier_segment_creation_count; 567 } 568