1 #include <gtest/gtest.h> 2 #include <sync/sync.h> 3 #include <sw_sync.h> 4 #include <fcntl.h> 5 #include <vector> 6 #include <string> 7 #include <cassert> 8 #include <iostream> 9 #include <unistd.h> 10 #include <thread> 11 #include <poll.h> 12 #include <mutex> 13 #include <algorithm> 14 #include <tuple> 15 #include <random> 16 #include <unordered_map> 17 18 // TODO: better stress tests? 19 // Handle more than 64 fd's simultaneously, i.e. fix sync_fence_info's 4k limit. 20 // Handle wraparound in timelines like nvidia. 21 22 using namespace std; 23 24 namespace { 25 26 // C++ wrapper class for sync timeline. 27 class SyncTimeline { 28 int m_fd = -1; 29 bool m_fdInitialized = false; 30 public: 31 SyncTimeline(const SyncTimeline &) = delete; 32 SyncTimeline& operator=(SyncTimeline&) = delete; 33 SyncTimeline() noexcept { 34 int fd = sw_sync_timeline_create(); 35 if (fd == -1) 36 return; 37 m_fdInitialized = true; 38 m_fd = fd; 39 } 40 void destroy() { 41 if (m_fdInitialized) { 42 close(m_fd); 43 m_fd = -1; 44 m_fdInitialized = false; 45 } 46 } 47 ~SyncTimeline() { 48 destroy(); 49 } 50 bool isValid() const { 51 if (m_fdInitialized) { 52 int status = fcntl(m_fd, F_GETFD, 0); 53 if (status >= 0) 54 return true; 55 else 56 return false; 57 } 58 else { 59 return false; 60 } 61 } 62 int getFd() const { 63 return m_fd; 64 } 65 int inc(int val = 1) { 66 return sw_sync_timeline_inc(m_fd, val); 67 } 68 }; 69 70 struct SyncPointInfo { 71 std::string driverName; 72 std::string objectName; 73 uint64_t timeStampNs; 74 int status; // 1 sig, 0 active, neg is err 75 }; 76 77 // Wrapper class for sync fence. 78 class SyncFence { 79 int m_fd = -1; 80 bool m_fdInitialized = false; 81 static int s_fenceCount; 82 83 void setFd(int fd) { 84 m_fd = fd; 85 m_fdInitialized = true; 86 } 87 void clearFd() { 88 m_fd = -1; 89 m_fdInitialized = false; 90 } 91 public: 92 bool isValid() const { 93 if (m_fdInitialized) { 94 int status = fcntl(m_fd, F_GETFD, 0); 95 if (status >= 0) 96 return true; 97 else 98 return false; 99 } 100 else { 101 return false; 102 } 103 } 104 SyncFence& operator=(SyncFence &&rhs) noexcept { 105 destroy(); 106 if (rhs.isValid()) { 107 setFd(rhs.getFd()); 108 rhs.clearFd(); 109 } 110 return *this; 111 } 112 SyncFence(SyncFence &&fence) noexcept { 113 if (fence.isValid()) { 114 setFd(fence.getFd()); 115 fence.clearFd(); 116 } 117 } 118 SyncFence(const SyncFence &fence) noexcept { 119 // This is ok, as sync fences are immutable after construction, so a dup 120 // is basically the same thing as a copy. 121 if (fence.isValid()) { 122 int fd = dup(fence.getFd()); 123 if (fd == -1) 124 return; 125 setFd(fd); 126 } 127 } 128 SyncFence(const SyncTimeline &timeline, 129 int value, 130 const char *name = nullptr) noexcept { 131 std::string autoName = "allocFence"; 132 autoName += s_fenceCount; 133 s_fenceCount++; 134 int fd = sw_sync_fence_create(timeline.getFd(), name ? name : autoName.c_str(), value); 135 if (fd == -1) 136 return; 137 setFd(fd); 138 } 139 SyncFence(const SyncFence &a, const SyncFence &b, const char *name = nullptr) noexcept { 140 std::string autoName = "mergeFence"; 141 autoName += s_fenceCount; 142 s_fenceCount++; 143 int fd = sync_merge(name ? name : autoName.c_str(), a.getFd(), b.getFd()); 144 if (fd == -1) 145 return; 146 setFd(fd); 147 } 148 SyncFence(const vector<SyncFence> &sources) noexcept { 149 assert(sources.size()); 150 SyncFence temp(*begin(sources)); 151 for (auto itr = ++begin(sources); itr != end(sources); ++itr) { 152 temp = SyncFence(*itr, temp); 153 } 154 if (temp.isValid()) { 155 setFd(temp.getFd()); 156 temp.clearFd(); 157 } 158 } 159 void destroy() { 160 if (isValid()) { 161 close(m_fd); 162 clearFd(); 163 } 164 } 165 ~SyncFence() { 166 destroy(); 167 } 168 int getFd() const { 169 return m_fd; 170 } 171 int wait(int timeout = -1) { 172 return sync_wait(m_fd, timeout); 173 } 174 vector<SyncPointInfo> getInfo() const { 175 struct sync_pt_info *pointInfo = nullptr; 176 vector<SyncPointInfo> fenceInfo; 177 sync_fence_info_data *info = sync_fence_info(getFd()); 178 if (!info) { 179 return fenceInfo; 180 } 181 while ((pointInfo = sync_pt_info(info, pointInfo))) { 182 fenceInfo.push_back(SyncPointInfo{ 183 pointInfo->driver_name, 184 pointInfo->obj_name, 185 pointInfo->timestamp_ns, 186 pointInfo->status}); 187 } 188 sync_fence_info_free(info); 189 return fenceInfo; 190 } 191 int getSize() const { 192 return getInfo().size(); 193 } 194 int getSignaledCount() const { 195 return countWithStatus(1); 196 } 197 int getActiveCount() const { 198 return countWithStatus(0); 199 } 200 int getErrorCount() const { 201 return countWithStatus(-1); 202 } 203 private: 204 int countWithStatus(int status) const { 205 int count = 0; 206 for (auto &info : getInfo()) { 207 if (info.status == status) { 208 count++; 209 } 210 } 211 return count; 212 } 213 }; 214 215 int SyncFence::s_fenceCount = 0; 216 217 TEST(AllocTest, Timeline) { 218 SyncTimeline timeline; 219 ASSERT_TRUE(timeline.isValid()); 220 } 221 222 TEST(AllocTest, Fence) { 223 SyncTimeline timeline; 224 ASSERT_TRUE(timeline.isValid()); 225 226 SyncFence fence(timeline, 1); 227 ASSERT_TRUE(fence.isValid()); 228 } 229 230 TEST(AllocTest, FenceNegative) { 231 int timeline = sw_sync_timeline_create(); 232 ASSERT_GT(timeline, 0); 233 234 // bad fd. 235 ASSERT_LT(sw_sync_fence_create(-1, "fence", 1), 0); 236 237 // No name - segfaults in user space. 238 // Maybe we should be friendlier here? 239 /* 240 ASSERT_LT(sw_sync_fence_create(timeline, nullptr, 1), 0); 241 */ 242 close(timeline); 243 } 244 245 TEST(FenceTest, OneTimelineWait) { 246 SyncTimeline timeline; 247 ASSERT_TRUE(timeline.isValid()); 248 249 SyncFence fence(timeline, 5); 250 ASSERT_TRUE(fence.isValid()); 251 252 // Wait on fence until timeout. 253 ASSERT_EQ(fence.wait(0), -1); 254 ASSERT_EQ(errno, ETIME); 255 256 // Advance timeline from 0 -> 1 257 ASSERT_EQ(timeline.inc(1), 0); 258 259 // Wait on fence until timeout. 260 ASSERT_EQ(fence.wait(0), -1); 261 ASSERT_EQ(errno, ETIME); 262 263 // Signal the fence. 264 ASSERT_EQ(timeline.inc(4), 0); 265 266 // Wait successfully. 267 ASSERT_EQ(fence.wait(0), 0); 268 269 // Go even futher, and confirm wait still succeeds. 270 ASSERT_EQ(timeline.inc(10), 0); 271 ASSERT_EQ(fence.wait(0), 0); 272 } 273 274 TEST(FenceTest, OneTimelinePoll) { 275 SyncTimeline timeline; 276 ASSERT_TRUE(timeline.isValid()); 277 278 SyncFence fence(timeline, 100); 279 ASSERT_TRUE(fence.isValid()); 280 281 fd_set set; 282 FD_ZERO(&set); 283 FD_SET(fence.getFd(), &set); 284 285 // Poll the fence, and wait till timeout. 286 timeval time = {0}; 287 ASSERT_EQ(select(fence.getFd() + 1, &set, nullptr, nullptr, &time), 0); 288 289 // Advance the timeline. 290 timeline.inc(100); 291 timeline.inc(100); 292 293 // Select should return that the fd is read for reading. 294 FD_ZERO(&set); 295 FD_SET(fence.getFd(), &set); 296 297 ASSERT_EQ(select(fence.getFd() + 1, &set, nullptr, nullptr, &time), 1); 298 ASSERT_TRUE(FD_ISSET(fence.getFd(), &set)); 299 } 300 301 TEST(FenceTest, OneTimelineMerge) { 302 SyncTimeline timeline; 303 ASSERT_TRUE(timeline.isValid()); 304 305 // create fence a,b,c and then merge them all into fence d. 306 SyncFence a(timeline, 1), b(timeline, 2), c(timeline, 3); 307 ASSERT_TRUE(a.isValid()); 308 ASSERT_TRUE(b.isValid()); 309 ASSERT_TRUE(c.isValid()); 310 311 SyncFence d({a,b,c}); 312 ASSERT_TRUE(d.isValid()); 313 314 // confirm all fences have one active point (even d). 315 ASSERT_EQ(a.getActiveCount(), 1); 316 ASSERT_EQ(b.getActiveCount(), 1); 317 ASSERT_EQ(c.getActiveCount(), 1); 318 ASSERT_EQ(d.getActiveCount(), 1); 319 320 // confirm that d is not signaled until the max of a,b,c 321 timeline.inc(1); 322 ASSERT_EQ(a.getSignaledCount(), 1); 323 ASSERT_EQ(d.getActiveCount(), 1); 324 325 timeline.inc(1); 326 ASSERT_EQ(b.getSignaledCount(), 1); 327 ASSERT_EQ(d.getActiveCount(), 1); 328 329 timeline.inc(1); 330 ASSERT_EQ(c.getSignaledCount(), 1); 331 ASSERT_EQ(d.getActiveCount(), 0); 332 ASSERT_EQ(d.getSignaledCount(), 1); 333 } 334 335 TEST(FenceTest, MergeSameFence) { 336 SyncTimeline timeline; 337 ASSERT_TRUE(timeline.isValid()); 338 339 SyncFence fence(timeline, 5); 340 ASSERT_TRUE(fence.isValid()); 341 342 SyncFence selfMergeFence(fence, fence); 343 ASSERT_TRUE(selfMergeFence.isValid()); 344 345 ASSERT_EQ(selfMergeFence.getSignaledCount(), 0); 346 347 timeline.inc(5); 348 ASSERT_EQ(selfMergeFence.getSignaledCount(), 1); 349 } 350 351 TEST(FenceTest, WaitOnDestroyedTimeline) { 352 SyncTimeline timeline; 353 ASSERT_TRUE(timeline.isValid()); 354 355 SyncFence fenceSig(timeline, 100); 356 SyncFence fenceKill(timeline, 200); 357 358 // Spawn a thread to wait on a fence when the timeline is killed. 359 thread waitThread{ 360 [&]() { 361 ASSERT_EQ(timeline.inc(100), 0); 362 363 ASSERT_EQ(fenceKill.wait(-1), -1); 364 ASSERT_EQ(errno, ENOENT); 365 } 366 }; 367 368 // Wait for the thread to spool up. 369 fenceSig.wait(); 370 371 // Kill the timeline. 372 timeline.destroy(); 373 374 // wait for the thread to clean up. 375 waitThread.join(); 376 } 377 378 TEST(FenceTest, PollOnDestroyedTimeline) { 379 SyncTimeline timeline; 380 ASSERT_TRUE(timeline.isValid()); 381 382 SyncFence fenceSig(timeline, 100); 383 SyncFence fenceKill(timeline, 200); 384 385 // Spawn a thread to wait on a fence when the timeline is killed. 386 thread waitThread{ 387 [&]() { 388 ASSERT_EQ(timeline.inc(100), 0); 389 390 // Wait on the fd. 391 struct pollfd fds; 392 fds.fd = fenceKill.getFd(); 393 fds.events = POLLIN | POLLERR; 394 ASSERT_EQ(poll(&fds, 1, -1), 1); 395 ASSERT_TRUE(fds.revents & POLLERR); 396 } 397 }; 398 399 // Wait for the thread to spool up. 400 fenceSig.wait(); 401 402 // Kill the timeline. 403 timeline.destroy(); 404 405 // wait for the thread to clean up. 406 waitThread.join(); 407 } 408 409 TEST(FenceTest, MultiTimelineWait) { 410 SyncTimeline timelineA, timelineB, timelineC; 411 412 SyncFence fenceA(timelineA, 5); 413 SyncFence fenceB(timelineB, 5); 414 SyncFence fenceC(timelineC, 5); 415 416 // Make a larger fence using 3 other fences from different timelines. 417 SyncFence mergedFence({fenceA, fenceB, fenceC}); 418 ASSERT_TRUE(mergedFence.isValid()); 419 420 // Confirm fence isn't signaled 421 ASSERT_EQ(mergedFence.getActiveCount(), 3); 422 ASSERT_EQ(mergedFence.wait(0), -1); 423 ASSERT_EQ(errno, ETIME); 424 425 timelineA.inc(5); 426 ASSERT_EQ(mergedFence.getActiveCount(), 2); 427 ASSERT_EQ(mergedFence.getSignaledCount(), 1); 428 429 timelineB.inc(5); 430 ASSERT_EQ(mergedFence.getActiveCount(), 1); 431 ASSERT_EQ(mergedFence.getSignaledCount(), 2); 432 433 timelineC.inc(5); 434 ASSERT_EQ(mergedFence.getActiveCount(), 0); 435 ASSERT_EQ(mergedFence.getSignaledCount(), 3); 436 437 // confirm you can successfully wait. 438 ASSERT_EQ(mergedFence.wait(100), 0); 439 } 440 441 TEST(StressTest, TwoThreadsSharedTimeline) { 442 const int iterations = 1 << 16; 443 int counter = 0; 444 SyncTimeline timeline; 445 ASSERT_TRUE(timeline.isValid()); 446 447 // Use a single timeline to synchronize two threads 448 // hammmering on the same counter. 449 auto threadMain = [&](int threadId) { 450 for (int i = 0; i < iterations; i++) { 451 SyncFence fence(timeline, i * 2 + threadId); 452 ASSERT_TRUE(fence.isValid()); 453 454 // Wait on the prior thread to complete. 455 ASSERT_EQ(fence.wait(), 0); 456 457 // Confirm the previous thread's writes are visible and then inc. 458 ASSERT_EQ(counter, i * 2 + threadId); 459 counter++; 460 461 // Kick off the other thread. 462 ASSERT_EQ(timeline.inc(), 0); 463 } 464 }; 465 466 thread a{threadMain, 0}; 467 thread b{threadMain, 1}; 468 a.join(); 469 b.join(); 470 471 // make sure the threads did not trample on one another. 472 ASSERT_EQ(counter, iterations * 2); 473 } 474 475 class ConsumerStressTest : public ::testing::TestWithParam<int> {}; 476 477 TEST_P(ConsumerStressTest, MultiProducerSingleConsumer) { 478 mutex lock; 479 int counter = 0; 480 int iterations = 1 << 12; 481 482 vector<SyncTimeline> producerTimelines(GetParam()); 483 vector<thread> threads; 484 SyncTimeline consumerTimeline; 485 486 // Producer threads run this lambda. 487 auto threadMain = [&](int threadId) { 488 for (int i = 0; i < iterations; i++) { 489 SyncFence fence(consumerTimeline, i); 490 ASSERT_TRUE(fence.isValid()); 491 492 // Wait for the consumer to finish. Use alternate 493 // means of waiting on the fence. 494 if ((iterations + threadId) % 8 != 0) { 495 ASSERT_EQ(fence.wait(), 0); 496 } 497 else { 498 while (fence.getSignaledCount() != 1) { 499 ASSERT_EQ(fence.getErrorCount(), 0); 500 } 501 } 502 503 // Every producer increments the counter, the consumer checks + erases it. 504 lock.lock(); 505 counter++; 506 lock.unlock(); 507 508 ASSERT_EQ(producerTimelines[threadId].inc(), 0); 509 } 510 }; 511 512 for (int i = 0; i < GetParam(); i++) { 513 threads.push_back(thread{threadMain, i}); 514 } 515 516 // Consumer thread runs this loop. 517 for (int i = 1; i <= iterations; i++) { 518 // Create a fence representing all producers final timelines. 519 vector<SyncFence> fences; 520 for (auto& timeline : producerTimelines) { 521 fences.push_back(SyncFence(timeline, i)); 522 } 523 SyncFence mergeFence(fences); 524 ASSERT_TRUE(mergeFence.isValid()); 525 526 // Make sure we see an increment from every producer thread. Vary 527 // the means by which we wait. 528 if (iterations % 8 != 0) { 529 ASSERT_EQ(mergeFence.wait(), 0); 530 } 531 else { 532 while (mergeFence.getSignaledCount() != mergeFence.getSize()) { 533 ASSERT_EQ(mergeFence.getErrorCount(), 0); 534 } 535 } 536 ASSERT_EQ(counter, GetParam()*i); 537 538 // Release the producer threads. 539 ASSERT_EQ(consumerTimeline.inc(), 0); 540 } 541 542 for_each(begin(threads), end(threads), [](thread& thread) { thread.join(); }); 543 } 544 INSTANTIATE_TEST_CASE_P( 545 ParameterizedStressTest, 546 ConsumerStressTest, 547 ::testing::Values(2,4,16)); 548 549 class MergeStressTest : public ::testing::TestWithParam<tuple<int, int>> {}; 550 551 template <typename K, typename V> using dict = unordered_map<K,V>; 552 553 TEST_P(MergeStressTest, RandomMerge) { 554 int timelineCount = get<0>(GetParam()); 555 int mergeCount = get<1>(GetParam()); 556 557 vector<SyncTimeline> timelines(timelineCount); 558 559 default_random_engine generator; 560 uniform_int_distribution<int> timelineDist(0, timelines.size()-1); 561 uniform_int_distribution<int> syncPointDist(0, numeric_limits<int>::max()); 562 563 SyncFence fence(timelines[0], 0); 564 ASSERT_TRUE(fence.isValid()); 565 566 unordered_map<int, int> fenceMap; 567 fenceMap.insert(make_tuple(0, 0)); 568 569 // Randomly create syncpoints out of a fixed set of timelines, and merge them together. 570 for (int i = 0; i < mergeCount; i++) { 571 572 // Generate syncpoint. 573 int timelineOffset = timelineDist(generator); 574 const SyncTimeline& timeline = timelines[timelineOffset]; 575 int syncPoint = syncPointDist(generator); 576 577 // Keep track of the latest syncpoint in each timeline. 578 auto itr = fenceMap.find(timelineOffset); 579 if (itr == end(fenceMap)) { 580 fenceMap.insert(tie(timelineOffset, syncPoint)); 581 } 582 else { 583 int oldSyncPoint = itr->second; 584 fenceMap.erase(itr); 585 fenceMap.insert(tie(timelineOffset, max(syncPoint, oldSyncPoint))); 586 } 587 588 // Merge. 589 fence = SyncFence(fence, SyncFence(timeline, syncPoint)); 590 ASSERT_TRUE(fence.isValid()); 591 } 592 593 // Confirm our map matches the fence. 594 ASSERT_EQ(fence.getSize(), fenceMap.size()); 595 596 // Trigger the merged fence. 597 for (auto& item: fenceMap) { 598 ASSERT_EQ(fence.wait(0), -1); 599 ASSERT_EQ(errno, ETIME); 600 601 // Increment the timeline to the last syncpoint. 602 timelines[item.first].inc(item.second); 603 } 604 605 // Check that the fence is triggered. 606 ASSERT_EQ(fence.wait(0), 0); 607 } 608 609 INSTANTIATE_TEST_CASE_P( 610 ParameterizedMergeStressTest, 611 MergeStressTest, 612 ::testing::Combine(::testing::Values(16,32), ::testing::Values(32, 1024, 1024*32))); 613 614 } 615 616