1 /* 2 This file is part of drd, a thread error detector. 3 4 Copyright (C) 2006-2013 Bart Van Assche <bvanassche (at) acm.org>. 5 6 This program is free software; you can redistribute it and/or 7 modify it under the terms of the GNU General Public License as 8 published by the Free Software Foundation; either version 2 of the 9 License, or (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307, USA. 20 21 The GNU General Public License is contained in the file COPYING. 22 */ 23 24 25 #include "drd_barrier.h" 26 #include "drd_clientobj.h" 27 #include "drd_error.h" 28 #include "drd_suppression.h" 29 #include "pub_tool_errormgr.h" // VG_(maybe_record_error)() 30 #include "pub_tool_libcassert.h" // tl_assert() 31 #include "pub_tool_libcprint.h" // VG_(printf)() 32 #include "pub_tool_machine.h" // VG_(get_IP)() 33 #include "pub_tool_mallocfree.h" // VG_(malloc)(), VG_(free)() 34 #include "pub_tool_oset.h" 35 #include "pub_tool_threadstate.h" // VG_(get_running_tid)() 36 37 38 /* Type definitions. */ 39 40 /** Information associated with one thread participating in a barrier. */ 41 struct barrier_thread_info 42 { 43 UWord tid; // A DrdThreadId declared as UWord because 44 // this member variable is the key of an OSet. 45 Segment* sg; // Segment of the last pthread_barrier() call 46 // by thread tid. 47 Segment* post_wait_sg; // Segment created after *_barrier_wait() finished 48 ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call. 49 Bool thread_finished;// Whether thread 'tid' has finished. 50 }; 51 52 53 /* Local functions. */ 54 55 static void barrier_cleanup(struct barrier_info* p); 56 static void barrier_delete_thread(struct barrier_info* const p, 57 const DrdThreadId tid); 58 static const HChar* barrier_get_typename(struct barrier_info* const p); 59 static const HChar* barrier_type_name(const BarrierT bt); 60 static 61 void barrier_report_wait_delete_race(const struct barrier_info* const p, 62 const struct barrier_thread_info* const q); 63 64 65 /* Local variables. */ 66 67 static Bool s_trace_barrier = False; 68 static ULong s_barrier_segment_creation_count; 69 70 71 /* Function definitions. */ 72 73 void DRD_(barrier_set_trace)(const Bool trace_barrier) 74 { 75 s_trace_barrier = trace_barrier; 76 } 77 78 /** 79 * Initialize the structure *p with the specified thread ID and iteration 80 * information. 81 */ 82 static 83 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p, 84 const DrdThreadId tid) 85 { 86 p->tid = tid; 87 p->sg = NULL; 88 p->post_wait_sg = 0; 89 p->wait_call_ctxt = 0; 90 p->thread_finished = False; 91 } 92 93 /** 94 * Deallocate the memory that is owned by members of 95 * struct barrier_thread_info. 96 */ 97 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p) 98 { 99 tl_assert(p); 100 DRD_(sg_put)(p->sg); 101 DRD_(sg_put)(p->post_wait_sg); 102 } 103 104 /** 105 * Initialize the structure *p with the specified client-side barrier address, 106 * barrier object size and number of participants in each barrier. 107 */ 108 static 109 void DRD_(barrier_initialize)(struct barrier_info* const p, 110 const Addr barrier, 111 const BarrierT barrier_type, 112 const Word count) 113 { 114 int i; 115 116 tl_assert(barrier != 0); 117 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 118 tl_assert(p->a1 == barrier); 119 120 p->cleanup = (void(*)(DrdClientobj*))barrier_cleanup; 121 p->delete_thread 122 = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread; 123 p->barrier_type = barrier_type; 124 p->count = count; 125 p->pre_iteration = 0; 126 p->post_iteration = 0; 127 p->pre_waiters_left = count; 128 p->post_waiters_left = count; 129 130 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word)); 131 tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) 132 >= sizeof(DrdThreadId)); 133 for (i = 0; i < 2; i++) { 134 p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1", 135 VG_(free)); 136 } 137 } 138 139 /** 140 * Deallocate the memory owned by the struct barrier_info object and also 141 * all the nodes in the OSet p->oset. 142 * 143 * Called by clientobj_destroy(). 144 */ 145 static void barrier_cleanup(struct barrier_info* p) 146 { 147 struct barrier_thread_info* q; 148 Segment* latest_sg = 0; 149 OSet* oset; 150 int i; 151 152 tl_assert(p); 153 154 DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)()); 155 tl_assert(latest_sg); 156 157 if (p->pre_waiters_left != p->count) { 158 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 159 VG_(maybe_record_error)(VG_(get_running_tid)(), 160 BarrierErr, 161 VG_(get_IP)(VG_(get_running_tid)()), 162 "Destruction of barrier that is being waited" 163 " upon", 164 &bei); 165 } else { 166 oset = p->oset[1 - (p->pre_iteration & 1)]; 167 VG_(OSetGen_ResetIter)(oset); 168 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { 169 if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc, 170 &latest_sg->vc)) 171 { 172 barrier_report_wait_delete_race(p, q); 173 } 174 DRD_(barrier_thread_destroy)(q); 175 } 176 } 177 178 for (i = 0; i < 2; i++) { 179 VG_(OSetGen_Destroy)(p->oset[i]); 180 p->oset[i] = NULL; 181 } 182 183 DRD_(sg_put)(latest_sg); 184 } 185 186 /** 187 * Look up the client-side barrier address barrier in s_barrier[]. If not 188 * found, add it. 189 */ 190 static 191 struct barrier_info* 192 DRD_(barrier_get_or_allocate)(const Addr barrier, 193 const BarrierT barrier_type, const Word count) 194 { 195 struct barrier_info *p; 196 197 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 198 199 tl_assert(offsetof(DrdClientobj, barrier) == 0); 200 p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 201 if (p == 0) 202 { 203 p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier); 204 DRD_(barrier_initialize)(p, barrier, barrier_type, count); 205 } 206 return p; 207 } 208 209 /** 210 * Look up the address of the struct barrier_info associated with the 211 * client-side barrier object. 212 */ 213 static struct barrier_info* DRD_(barrier_get)(const Addr barrier) 214 { 215 tl_assert(offsetof(DrdClientobj, barrier) == 0); 216 return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier); 217 } 218 219 /** 220 * Initialize a barrier with given client address, barrier type and number of 221 * participants. The 'reinitialization' argument indicates whether a barrier 222 * object is being initialized or reinitialized. 223 * 224 * Called before pthread_barrier_init(). 225 */ 226 void DRD_(barrier_init)(const Addr barrier, 227 const BarrierT barrier_type, const Word count, 228 const Bool reinitialization) 229 { 230 struct barrier_info* p; 231 232 tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); 233 234 if (count == 0) 235 { 236 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 237 VG_(maybe_record_error)(VG_(get_running_tid)(), 238 BarrierErr, 239 VG_(get_IP)(VG_(get_running_tid)()), 240 "pthread_barrier_init: 'count' argument is zero", 241 &bei); 242 } 243 244 if (! reinitialization && barrier_type == pthread_barrier) 245 { 246 p = DRD_(barrier_get)(barrier); 247 if (p) 248 { 249 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 }; 250 VG_(maybe_record_error)(VG_(get_running_tid)(), 251 BarrierErr, 252 VG_(get_IP)(VG_(get_running_tid)()), 253 "Barrier reinitialization", 254 &bei); 255 } 256 } 257 258 p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count); 259 260 if (s_trace_barrier) { 261 if (reinitialization) 262 DRD_(trace_msg)("[%d] barrier_reinit %s 0x%lx count %ld -> %ld", 263 DRD_(thread_get_running_tid)(), 264 barrier_get_typename(p), barrier, p->count, count); 265 else 266 DRD_(trace_msg)("[%d] barrier_init %s 0x%lx", 267 DRD_(thread_get_running_tid)(), 268 barrier_get_typename(p), 269 barrier); 270 } 271 272 if (reinitialization && p->count != count) 273 { 274 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 275 { 276 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 277 VG_(maybe_record_error)(VG_(get_running_tid)(), 278 BarrierErr, 279 VG_(get_IP)(VG_(get_running_tid)()), 280 "Reinitialization of barrier with active" 281 " waiters", 282 &bei); 283 } 284 p->count = count; 285 } 286 } 287 288 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */ 289 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type) 290 { 291 struct barrier_info* p; 292 293 p = DRD_(barrier_get)(barrier); 294 295 if (s_trace_barrier) 296 DRD_(trace_msg)("[%d] barrier_destroy %s 0x%lx", 297 DRD_(thread_get_running_tid)(), 298 barrier_get_typename(p), barrier); 299 300 if (p == 0) 301 { 302 GenericErrInfo GEI = { 303 .tid = DRD_(thread_get_running_tid)(), 304 .addr = barrier, 305 }; 306 VG_(maybe_record_error)(VG_(get_running_tid)(), 307 GenericErr, 308 VG_(get_IP)(VG_(get_running_tid)()), 309 "Not a barrier", 310 &GEI); 311 return; 312 } 313 314 if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count) 315 { 316 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 317 VG_(maybe_record_error)(VG_(get_running_tid)(), 318 BarrierErr, 319 VG_(get_IP)(VG_(get_running_tid)()), 320 "Destruction of a barrier with active waiters", 321 &bei); 322 } 323 324 DRD_(clientobj_remove)(p->a1, ClientBarrier); 325 } 326 327 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */ 328 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, 329 const BarrierT barrier_type) 330 { 331 struct barrier_info* p; 332 struct barrier_thread_info* q; 333 const UWord word_tid = tid; 334 OSet* oset; 335 336 p = DRD_(barrier_get)(barrier); 337 if (p == 0 && barrier_type == gomp_barrier) { 338 /* 339 * gomp_barrier_wait() call has been intercepted but gomp_barrier_init() 340 * not. The only cause I know of that can trigger this is that libgomp.so 341 * has been compiled with --enable-linux-futex. 342 */ 343 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 }; 344 VG_(maybe_record_error)(VG_(get_running_tid)(), 345 BarrierErr, 346 VG_(get_IP)(VG_(get_running_tid)()), 347 "Please verify whether gcc has been configured" 348 " with option --disable-linux-futex. See also" 349 " the section about OpenMP in the DRD manual.", 350 &bei); 351 } 352 tl_assert(p); 353 354 if (s_trace_barrier) 355 DRD_(trace_msg)("[%d] barrier_pre_wait %s 0x%lx iteration %ld", 356 DRD_(thread_get_running_tid)(), 357 barrier_get_typename(p), barrier, p->pre_iteration); 358 359 /* Clean up nodes associated with finished threads. */ 360 oset = p->oset[p->pre_iteration & 1]; 361 tl_assert(oset); 362 VG_(OSetGen_ResetIter)(oset); 363 for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { 364 if (q->thread_finished) { 365 void* r = VG_(OSetGen_Remove)(oset, &q->tid); 366 tl_assert(r == q); 367 DRD_(barrier_thread_destroy)(q); 368 VG_(OSetGen_FreeNode)(oset, q); 369 VG_(OSetGen_ResetIterAt)(oset, &word_tid); 370 } 371 } 372 /* Allocate the per-thread data structure if necessary. */ 373 q = VG_(OSetGen_Lookup)(oset, &word_tid); 374 if (q == NULL) { 375 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); 376 DRD_(barrier_thread_initialize)(q, tid); 377 VG_(OSetGen_Insert)(oset, q); 378 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); 379 } 380 381 /* Record *_barrier_wait() call context. */ 382 q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0); 383 384 /* 385 * Store a pointer to the latest segment of the current thread in the 386 * per-thread data structure. 387 */ 388 DRD_(thread_get_latest_segment)(&q->sg, tid); 389 390 /* 391 * If the same number of threads as the barrier count indicates have 392 * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and 393 * reset the p->pre_waiters_left counter. 394 */ 395 if (--p->pre_waiters_left <= 0) 396 { 397 p->pre_iteration++; 398 p->pre_waiters_left = p->count; 399 } 400 } 401 402 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */ 403 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, 404 const BarrierT barrier_type, const Bool waited, 405 const Bool serializing) 406 { 407 struct barrier_info* p; 408 const UWord word_tid = tid; 409 struct barrier_thread_info* q; 410 struct barrier_thread_info* r; 411 OSet* oset; 412 413 p = DRD_(barrier_get)(barrier); 414 415 if (s_trace_barrier) 416 DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s", 417 tid, p ? barrier_get_typename(p) : "(?)", 418 barrier, p ? p->post_iteration : -1, 419 serializing ? " (serializing)" : ""); 420 421 /* 422 * If p == 0, this means that the barrier has been destroyed after 423 * *_barrier_wait() returned and before this function was called. Just 424 * return in that case -- race conditions between *_barrier_wait() 425 * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper. 426 */ 427 if (p == 0) 428 return; 429 430 /* If the *_barrier_wait() call returned an error code, exit. */ 431 if (! waited) 432 return; 433 434 oset = p->oset[p->post_iteration & 1]; 435 q = VG_(OSetGen_Lookup)(oset, &word_tid); 436 if (p->pre_iteration - p->post_iteration > 1) { 437 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 438 VG_(maybe_record_error)(VG_(get_running_tid)(), 439 BarrierErr, 440 VG_(get_IP)(VG_(get_running_tid)()), 441 "Number of concurrent pthread_barrier_wait()" 442 " calls exceeds the barrier count", 443 &bei); 444 } else if (q == NULL) { 445 BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; 446 VG_(maybe_record_error)(VG_(get_running_tid)(), 447 BarrierErr, 448 VG_(get_IP)(VG_(get_running_tid)()), 449 "Error in barrier implementation" 450 " -- barrier_wait() started before" 451 " barrier_destroy() and finished after" 452 " barrier_destroy()", 453 &bei); 454 } 455 if (q == NULL) { 456 q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); 457 DRD_(barrier_thread_initialize)(q, tid); 458 VG_(OSetGen_Insert)(oset, q); 459 tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); 460 DRD_(thread_get_latest_segment)(&q->sg, tid); 461 } 462 463 /* Create a new segment and store a pointer to that segment. */ 464 DRD_(thread_new_segment)(tid); 465 DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid); 466 s_barrier_segment_creation_count++; 467 468 /* 469 * Combine all vector clocks that were stored in the pre_barrier_wait 470 * wrapper with the vector clock of the current thread. 471 */ 472 { 473 VectorClock old_vc; 474 475 DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid)); 476 VG_(OSetGen_ResetIter)(oset); 477 for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; ) 478 { 479 if (r != q) 480 { 481 tl_assert(r->sg); 482 DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc); 483 } 484 } 485 DRD_(thread_update_conflict_set)(tid, &old_vc); 486 DRD_(vc_cleanup)(&old_vc); 487 } 488 489 /* 490 * If the same number of threads as the barrier count indicates have 491 * called the post *_barrier_wait() wrapper, toggle p->post_iteration and 492 * reset the p->post_waiters_left counter. 493 */ 494 if (--p->post_waiters_left <= 0) 495 { 496 p->post_iteration++; 497 p->post_waiters_left = p->count; 498 } 499 } 500 501 /** Called when thread tid stops to exist. */ 502 static void barrier_delete_thread(struct barrier_info* const p, 503 const DrdThreadId tid) 504 { 505 struct barrier_thread_info* q; 506 const UWord word_tid = tid; 507 int i; 508 509 for (i = 0; i < 2; i++) { 510 q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid); 511 if (q) 512 q->thread_finished = True; 513 } 514 } 515 516 /** 517 * Report that *_barrier_destroy() has been called but that this call was 518 * not synchronized with the last *_barrier_wait() call on the same barrier. 519 * 520 * This topic has been discussed extensively on comp.programming.threads 521 * (February 3, 2009). See also 522 * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>. 523 */ 524 static 525 void barrier_report_wait_delete_race(const struct barrier_info* const p, 526 const struct barrier_thread_info* const q) 527 { 528 tl_assert(p); 529 tl_assert(q); 530 531 { 532 BarrierErrInfo bei 533 = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt }; 534 VG_(maybe_record_error)(VG_(get_running_tid)(), 535 BarrierErr, 536 VG_(get_IP)(VG_(get_running_tid)()), 537 "Destruction of barrier not synchronized with" 538 " barrier wait call", 539 &bei); 540 } 541 } 542 543 static const HChar* barrier_get_typename(struct barrier_info* const p) 544 { 545 tl_assert(p); 546 547 return barrier_type_name(p->barrier_type); 548 } 549 550 static const HChar* barrier_type_name(const BarrierT bt) 551 { 552 switch (bt) 553 { 554 case pthread_barrier: 555 return "pthread barrier"; 556 case gomp_barrier: 557 return "gomp barrier"; 558 } 559 return "?"; 560 } 561 562 ULong DRD_(get_barrier_segment_creation_count)(void) 563 { 564 return s_barrier_segment_creation_count; 565 } 566