1 // Copyright 2006 Google Inc. All Rights Reserved. 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 7 // http://www.apache.org/licenses/LICENSE-2.0 8 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // worker.cc : individual tasks that can be run in combination to 16 // stress the system 17 18 #include <errno.h> 19 #include <pthread.h> 20 #include <sched.h> 21 #include <signal.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <stdint.h> 25 #include <string.h> 26 #include <time.h> 27 #include <unistd.h> 28 29 #include <sys/select.h> 30 #include <sys/stat.h> 31 #include <sys/types.h> 32 #include <sys/times.h> 33 34 // These are necessary, but on by default 35 // #define __USE_GNU 36 // #define __USE_LARGEFILE64 37 #include <fcntl.h> 38 #include <sys/socket.h> 39 #include <netdb.h> 40 #include <arpa/inet.h> 41 #include <linux/unistd.h> // for gettid 42 43 // For size of block device 44 #include <sys/ioctl.h> 45 #include <linux/fs.h> 46 // For asynchronous I/O 47 #ifdef HAVE_LIBAIO_H 48 #include <libaio.h> 49 #endif 50 51 #include <sys/syscall.h> 52 53 #include <set> 54 #include <string> 55 56 // This file must work with autoconf on its public version, 57 // so these includes are correct. 58 #include "error_diag.h" // NOLINT 59 #include "os.h" // NOLINT 60 #include "pattern.h" // NOLINT 61 #include "queue.h" // NOLINT 62 #include "sat.h" // NOLINT 63 #include "sattypes.h" // NOLINT 64 #include "worker.h" // NOLINT 65 66 // Syscalls 67 // Why ubuntu, do you hate gettid so bad? 68 #if !defined(__NR_gettid) 69 #define __NR_gettid 224 70 #endif 71 72 #define gettid() syscall(__NR_gettid) 73 #if !defined(CPU_SETSIZE) 74 _syscall3(int, sched_getaffinity, pid_t, pid, 75 unsigned int, len, cpu_set_t*, mask) 76 _syscall3(int, sched_setaffinity, pid_t, pid, 77 unsigned int, len, cpu_set_t*, mask) 78 #endif 79 80 namespace { 81 // Work around the sad fact that there are two (gnu, xsi) incompatible 82 // versions of strerror_r floating around google. Awesome. 83 bool sat_strerror(int err, char *buf, int len) { 84 buf[0] = 0; 85 char *errmsg = reinterpret_cast<char*>(strerror_r(err, buf, len)); 86 int retval = reinterpret_cast<int64>(errmsg); 87 if (retval == 0) 88 return true; 89 if (retval == -1) 90 return false; 91 if (errmsg != buf) { 92 strncpy(buf, errmsg, len); 93 buf[len - 1] = 0; 94 } 95 return true; 96 } 97 98 99 inline uint64 addr_to_tag(void *address) { 100 return reinterpret_cast<uint64>(address); 101 } 102 } // namespace 103 104 #if !defined(O_DIRECT) 105 // Sometimes this isn't available. 106 // Disregard if it's not defined. 107 #define O_DIRECT 0 108 #endif 109 110 // A struct to hold captured errors, for later reporting. 111 struct ErrorRecord { 112 uint64 actual; // This is the actual value read. 113 uint64 reread; // This is the actual value, reread. 114 uint64 expected; // This is what it should have been. 115 uint64 *vaddr; // This is where it was (or wasn't). 116 char *vbyteaddr; // This is byte specific where the data was (or wasn't). 117 uint64 paddr; // This is the bus address, if available. 118 uint64 *tagvaddr; // This holds the tag value if this data was tagged. 119 uint64 tagpaddr; // This holds the physical address corresponding to the tag. 120 }; 121 122 // This is a helper function to create new threads with pthreads. 123 static void *ThreadSpawnerGeneric(void *ptr) { 124 WorkerThread *worker = static_cast<WorkerThread*>(ptr); 125 worker->StartRoutine(); 126 return NULL; 127 } 128 129 void WorkerStatus::Initialize() { 130 sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL)); 131 sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL)); 132 #ifdef HAVE_PTHREAD_BARRIERS 133 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, 134 num_workers_ + 1)); 135 #endif 136 } 137 138 void WorkerStatus::Destroy() { 139 sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_)); 140 sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_)); 141 #ifdef HAVE_PTHREAD_BARRIERS 142 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); 143 #endif 144 } 145 146 void WorkerStatus::PauseWorkers() { 147 if (SetStatus(PAUSE) != PAUSE) 148 WaitOnPauseBarrier(); 149 } 150 151 void WorkerStatus::ResumeWorkers() { 152 if (SetStatus(RUN) == PAUSE) 153 WaitOnPauseBarrier(); 154 } 155 156 void WorkerStatus::StopWorkers() { 157 if (SetStatus(STOP) == PAUSE) 158 WaitOnPauseBarrier(); 159 } 160 161 bool WorkerStatus::ContinueRunning(bool *paused) { 162 // This loop is an optimization. We use it to immediately re-check the status 163 // after resuming from a pause, instead of returning and waiting for the next 164 // call to this function. 165 if (paused) { 166 *paused = false; 167 } 168 for (;;) { 169 switch (GetStatus()) { 170 case RUN: 171 return true; 172 case PAUSE: 173 // Wait for the other workers to call this function so that 174 // PauseWorkers() can return. 175 WaitOnPauseBarrier(); 176 // Wait for ResumeWorkers() to be called. 177 WaitOnPauseBarrier(); 178 // Indicate that a pause occurred. 179 if (paused) { 180 *paused = true; 181 } 182 break; 183 case STOP: 184 return false; 185 } 186 } 187 } 188 189 bool WorkerStatus::ContinueRunningNoPause() { 190 return (GetStatus() != STOP); 191 } 192 193 void WorkerStatus::RemoveSelf() { 194 // Acquire a read lock on status_rwlock_ while (status_ != PAUSE). 195 for (;;) { 196 AcquireStatusReadLock(); 197 if (status_ != PAUSE) 198 break; 199 // We need to obey PauseWorkers() just like ContinueRunning() would, so that 200 // the other threads won't wait on pause_barrier_ forever. 201 ReleaseStatusLock(); 202 // Wait for the other workers to call this function so that PauseWorkers() 203 // can return. 204 WaitOnPauseBarrier(); 205 // Wait for ResumeWorkers() to be called. 206 WaitOnPauseBarrier(); 207 } 208 209 // This lock would be unnecessary if we held a write lock instead of a read 210 // lock on status_rwlock_, but that would also force all threads calling 211 // ContinueRunning() to wait on this one. Using a separate lock avoids that. 212 AcquireNumWorkersLock(); 213 // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't 214 // in use because (status != PAUSE). 215 #ifdef HAVE_PTHREAD_BARRIERS 216 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); 217 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_)); 218 #endif 219 --num_workers_; 220 ReleaseNumWorkersLock(); 221 222 // Release status_rwlock_. 223 ReleaseStatusLock(); 224 } 225 226 227 // Parent thread class. 228 WorkerThread::WorkerThread() { 229 status_ = false; 230 pages_copied_ = 0; 231 errorcount_ = 0; 232 runduration_usec_ = 1; 233 priority_ = Normal; 234 worker_status_ = NULL; 235 thread_spawner_ = &ThreadSpawnerGeneric; 236 tag_mode_ = false; 237 } 238 239 WorkerThread::~WorkerThread() {} 240 241 // Constructors. Just init some default values. 242 FillThread::FillThread() { 243 num_pages_to_fill_ = 0; 244 } 245 246 // Initialize file name to empty. 247 FileThread::FileThread() { 248 filename_ = ""; 249 devicename_ = ""; 250 pass_ = 0; 251 page_io_ = true; 252 crc_page_ = -1; 253 local_page_ = NULL; 254 } 255 256 // If file thread used bounce buffer in memory, account for the extra 257 // copy for memory bandwidth calculation. 258 float FileThread::GetMemoryCopiedData() { 259 if (!os_->normal_mem()) 260 return GetCopiedData(); 261 else 262 return 0; 263 } 264 265 // Initialize target hostname to be invalid. 266 NetworkThread::NetworkThread() { 267 snprintf(ipaddr_, sizeof(ipaddr_), "Unknown"); 268 sock_ = 0; 269 } 270 271 // Initialize? 272 NetworkSlaveThread::NetworkSlaveThread() { 273 } 274 275 // Initialize? 276 NetworkListenThread::NetworkListenThread() { 277 } 278 279 // Init member variables. 280 void WorkerThread::InitThread(int thread_num_init, 281 class Sat *sat_init, 282 class OsLayer *os_init, 283 class PatternList *patternlist_init, 284 WorkerStatus *worker_status) { 285 sat_assert(worker_status); 286 worker_status->AddWorkers(1); 287 288 thread_num_ = thread_num_init; 289 sat_ = sat_init; 290 os_ = os_init; 291 patternlist_ = patternlist_init; 292 worker_status_ = worker_status; 293 294 AvailableCpus(&cpu_mask_); 295 tag_ = 0xffffffff; 296 297 tag_mode_ = sat_->tag_mode(); 298 } 299 300 301 // Use pthreads to prioritize a system thread. 302 bool WorkerThread::InitPriority() { 303 // This doesn't affect performance that much, and may not be too safe. 304 305 bool ret = BindToCpus(&cpu_mask_); 306 if (!ret) 307 logprintf(11, "Log: Bind to %s failed.\n", 308 cpuset_format(&cpu_mask_).c_str()); 309 310 logprintf(11, "Log: Thread %d running on core ID %d mask %s (%s).\n", 311 thread_num_, sched_getcpu(), 312 CurrentCpusFormat().c_str(), 313 cpuset_format(&cpu_mask_).c_str()); 314 #if 0 315 if (priority_ == High) { 316 sched_param param; 317 param.sched_priority = 1; 318 // Set the priority; others are unchanged. 319 logprintf(0, "Log: Changing priority to SCHED_FIFO %d\n", 320 param.sched_priority); 321 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 322 char buf[256]; 323 sat_strerror(errno, buf, sizeof(buf)); 324 logprintf(0, "Process Error: sched_setscheduler " 325 "failed - error %d %s\n", 326 errno, buf); 327 } 328 } 329 #endif 330 return true; 331 } 332 333 // Use pthreads to create a system thread. 334 int WorkerThread::SpawnThread() { 335 // Create the new thread. 336 int result = pthread_create(&thread_, NULL, thread_spawner_, this); 337 if (result) { 338 char buf[256]; 339 sat_strerror(result, buf, sizeof(buf)); 340 logprintf(0, "Process Error: pthread_create " 341 "failed - error %d %s\n", result, 342 buf); 343 status_ = false; 344 return false; 345 } 346 347 // 0 is pthreads success. 348 return true; 349 } 350 351 // Kill the worker thread with SIGINT. 352 bool WorkerThread::KillThread() { 353 return (pthread_kill(thread_, SIGINT) == 0); 354 } 355 356 // Block until thread has exited. 357 bool WorkerThread::JoinThread() { 358 int result = pthread_join(thread_, NULL); 359 360 if (result) { 361 logprintf(0, "Process Error: pthread_join failed - error %d\n", result); 362 status_ = false; 363 } 364 365 // 0 is pthreads success. 366 return (!result); 367 } 368 369 370 void WorkerThread::StartRoutine() { 371 InitPriority(); 372 StartThreadTimer(); 373 Work(); 374 StopThreadTimer(); 375 worker_status_->RemoveSelf(); 376 } 377 378 379 // Thread work loop. Execute until marked finished. 380 bool WorkerThread::Work() { 381 do { 382 logprintf(9, "Log: ...\n"); 383 // Sleep for 1 second. 384 sat_sleep(1); 385 } while (IsReadyToRun()); 386 387 return false; 388 } 389 390 391 // Returns CPU mask of CPUs available to this process, 392 // Conceptually, each bit represents a logical CPU, ie: 393 // mask = 3 (11b): cpu0, 1 394 // mask = 13 (1101b): cpu0, 2, 3 395 bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) { 396 CPU_ZERO(cpuset); 397 #ifdef HAVE_SCHED_GETAFFINITY 398 return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0; 399 #else 400 return 0; 401 #endif 402 } 403 404 405 // Returns CPU mask of CPUs this thread is bound to, 406 // Conceptually, each bit represents a logical CPU, ie: 407 // mask = 3 (11b): cpu0, 1 408 // mask = 13 (1101b): cpu0, 2, 3 409 bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) { 410 CPU_ZERO(cpuset); 411 #ifdef HAVE_SCHED_GETAFFINITY 412 return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0; 413 #else 414 return 0; 415 #endif 416 } 417 418 419 // Bind worker thread to specified CPU(s) 420 // Args: 421 // thread_mask: cpu_set_t representing CPUs, ie 422 // mask = 1 (01b): cpu0 423 // mask = 3 (11b): cpu0, 1 424 // mask = 13 (1101b): cpu0, 2, 3 425 // 426 // Returns true on success, false otherwise. 427 bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) { 428 cpu_set_t process_mask; 429 AvailableCpus(&process_mask); 430 if (cpuset_isequal(thread_mask, &process_mask)) 431 return true; 432 433 logprintf(11, "Log: available CPU mask - %s\n", 434 cpuset_format(&process_mask).c_str()); 435 if (!cpuset_issubset(thread_mask, &process_mask)) { 436 // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist. 437 logprintf(0, "Log: requested CPUs %s not a subset of available %s\n", 438 cpuset_format(thread_mask).c_str(), 439 cpuset_format(&process_mask).c_str()); 440 return false; 441 } 442 #ifdef HAVE_SCHED_GETAFFINITY 443 return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0); 444 #else 445 return 0; 446 #endif 447 } 448 449 450 // A worker thread can yield itself to give up CPU until it's scheduled again. 451 // Returns true on success, false on error. 452 bool WorkerThread::YieldSelf() { 453 return (sched_yield() == 0); 454 } 455 456 457 // Fill this page with its pattern. 458 bool WorkerThread::FillPage(struct page_entry *pe) { 459 // Error check arguments. 460 if (pe == 0) { 461 logprintf(0, "Process Error: Fill Page entry null\n"); 462 return 0; 463 } 464 465 // Mask is the bitmask of indexes used by the pattern. 466 // It is the pattern size -1. Size is always a power of 2. 467 uint64 *memwords = static_cast<uint64*>(pe->addr); 468 int length = sat_->page_length(); 469 470 if (tag_mode_) { 471 // Select tag or data as appropriate. 472 for (int i = 0; i < length / wordsize_; i++) { 473 datacast_t data; 474 475 if ((i & 0x7) == 0) { 476 data.l64 = addr_to_tag(&memwords[i]); 477 } else { 478 data.l32.l = pe->pattern->pattern(i << 1); 479 data.l32.h = pe->pattern->pattern((i << 1) + 1); 480 } 481 memwords[i] = data.l64; 482 } 483 } else { 484 // Just fill in untagged data directly. 485 for (int i = 0; i < length / wordsize_; i++) { 486 datacast_t data; 487 488 data.l32.l = pe->pattern->pattern(i << 1); 489 data.l32.h = pe->pattern->pattern((i << 1) + 1); 490 memwords[i] = data.l64; 491 } 492 } 493 494 return 1; 495 } 496 497 498 // Tell the thread how many pages to fill. 499 void FillThread::SetFillPages(int64 num_pages_to_fill_init) { 500 num_pages_to_fill_ = num_pages_to_fill_init; 501 } 502 503 // Fill this page with a random pattern. 504 bool FillThread::FillPageRandom(struct page_entry *pe) { 505 // Error check arguments. 506 if (pe == 0) { 507 logprintf(0, "Process Error: Fill Page entry null\n"); 508 return 0; 509 } 510 if ((patternlist_ == 0) || (patternlist_->Size() == 0)) { 511 logprintf(0, "Process Error: No data patterns available\n"); 512 return 0; 513 } 514 515 // Choose a random pattern for this block. 516 pe->pattern = patternlist_->GetRandomPattern(); 517 if (pe->pattern == 0) { 518 logprintf(0, "Process Error: Null data pattern\n"); 519 return 0; 520 } 521 522 // Actually fill the page. 523 return FillPage(pe); 524 } 525 526 527 // Memory fill work loop. Execute until alloted pages filled. 528 bool FillThread::Work() { 529 bool result = true; 530 531 logprintf(9, "Log: Starting fill thread %d\n", thread_num_); 532 533 // We want to fill num_pages_to_fill pages, and 534 // stop when we've filled that many. 535 // We also want to capture early break 536 struct page_entry pe; 537 int64 loops = 0; 538 while (IsReadyToRun() && (loops < num_pages_to_fill_)) { 539 result = result && sat_->GetEmpty(&pe); 540 if (!result) { 541 logprintf(0, "Process Error: fill_thread failed to pop pages, " 542 "bailing\n"); 543 break; 544 } 545 546 // Fill the page with pattern 547 result = result && FillPageRandom(&pe); 548 if (!result) break; 549 550 // Put the page back on the queue. 551 result = result && sat_->PutValid(&pe); 552 if (!result) { 553 logprintf(0, "Process Error: fill_thread failed to push pages, " 554 "bailing\n"); 555 break; 556 } 557 loops++; 558 } 559 560 // Fill in thread status. 561 pages_copied_ = loops; 562 status_ = result; 563 logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n", 564 thread_num_, status_, pages_copied_); 565 return result; 566 } 567 568 569 // Print error information about a data miscompare. 570 void WorkerThread::ProcessError(struct ErrorRecord *error, 571 int priority, 572 const char *message) { 573 char dimm_string[256] = ""; 574 575 int core_id = sched_getcpu(); 576 577 // Determine if this is a write or read error. 578 os_->Flush(error->vaddr); 579 error->reread = *(error->vaddr); 580 581 char *good = reinterpret_cast<char*>(&(error->expected)); 582 char *bad = reinterpret_cast<char*>(&(error->actual)); 583 584 sat_assert(error->expected != error->actual); 585 unsigned int offset = 0; 586 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 587 if (good[offset] != bad[offset]) 588 break; 589 } 590 591 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 592 593 // Find physical address if possible. 594 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 595 596 // Pretty print DIMM mapping if available. 597 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 598 599 // Report parseable error. 600 if (priority < 5) { 601 // Run miscompare error through diagnoser for logging and reporting. 602 os_->error_diagnoser_->AddMiscompareError(dimm_string, 603 reinterpret_cast<uint64> 604 (error->vaddr), 1); 605 606 logprintf(priority, 607 "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " 608 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", 609 message, 610 core_id, 611 CurrentCpusFormat().c_str(), 612 error->vaddr, 613 error->paddr, 614 dimm_string, 615 error->actual, 616 error->reread, 617 error->expected); 618 } 619 620 621 // Overwrite incorrect data with correct data to prevent 622 // future miscompares when this data is reused. 623 *(error->vaddr) = error->expected; 624 os_->Flush(error->vaddr); 625 } 626 627 628 629 // Print error information about a data miscompare. 630 void FileThread::ProcessError(struct ErrorRecord *error, 631 int priority, 632 const char *message) { 633 char dimm_string[256] = ""; 634 635 // Determine if this is a write or read error. 636 os_->Flush(error->vaddr); 637 error->reread = *(error->vaddr); 638 639 char *good = reinterpret_cast<char*>(&(error->expected)); 640 char *bad = reinterpret_cast<char*>(&(error->actual)); 641 642 sat_assert(error->expected != error->actual); 643 unsigned int offset = 0; 644 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 645 if (good[offset] != bad[offset]) 646 break; 647 } 648 649 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 650 651 // Find physical address if possible. 652 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 653 654 // Pretty print DIMM mapping if available. 655 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 656 657 // If crc_page_ is valid, ie checking content read back from file, 658 // track src/dst memory addresses. Otherwise catagorize as general 659 // mememory miscompare for CRC checking everywhere else. 660 if (crc_page_ != -1) { 661 int miscompare_byteoffset = static_cast<char*>(error->vbyteaddr) - 662 static_cast<char*>(page_recs_[crc_page_].dst); 663 os_->error_diagnoser_->AddHDDMiscompareError(devicename_, 664 crc_page_, 665 miscompare_byteoffset, 666 page_recs_[crc_page_].src, 667 page_recs_[crc_page_].dst); 668 } else { 669 os_->error_diagnoser_->AddMiscompareError(dimm_string, 670 reinterpret_cast<uint64> 671 (error->vaddr), 1); 672 } 673 674 logprintf(priority, 675 "%s: miscompare on %s at %p(0x%llx:%s): read:0x%016llx, " 676 "reread:0x%016llx expected:0x%016llx\n", 677 message, 678 devicename_.c_str(), 679 error->vaddr, 680 error->paddr, 681 dimm_string, 682 error->actual, 683 error->reread, 684 error->expected); 685 686 // Overwrite incorrect data with correct data to prevent 687 // future miscompares when this data is reused. 688 *(error->vaddr) = error->expected; 689 os_->Flush(error->vaddr); 690 } 691 692 693 // Do a word by word result check of a region. 694 // Print errors on mismatches. 695 int WorkerThread::CheckRegion(void *addr, 696 class Pattern *pattern, 697 int64 length, 698 int offset, 699 int64 pattern_offset) { 700 uint64 *memblock = static_cast<uint64*>(addr); 701 const int kErrorLimit = 128; 702 int errors = 0; 703 int overflowerrors = 0; // Count of overflowed errors. 704 bool page_error = false; 705 string errormessage("Hardware Error"); 706 struct ErrorRecord 707 recorded[kErrorLimit]; // Queued errors for later printing. 708 709 // For each word in the data region. 710 for (int i = 0; i < length / wordsize_; i++) { 711 uint64 actual = memblock[i]; 712 uint64 expected; 713 714 // Determine the value that should be there. 715 datacast_t data; 716 int index = 2 * i + pattern_offset; 717 data.l32.l = pattern->pattern(index); 718 data.l32.h = pattern->pattern(index + 1); 719 expected = data.l64; 720 // Check tags if necessary. 721 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) { 722 expected = addr_to_tag(&memblock[i]); 723 } 724 725 726 // If the value is incorrect, save an error record for later printing. 727 if (actual != expected) { 728 if (errors < kErrorLimit) { 729 recorded[errors].actual = actual; 730 recorded[errors].expected = expected; 731 recorded[errors].vaddr = &memblock[i]; 732 errors++; 733 } else { 734 page_error = true; 735 // If we have overflowed the error queue, just print the errors now. 736 logprintf(10, "Log: Error record overflow, too many miscompares!\n"); 737 errormessage = "Page Error"; 738 break; 739 } 740 } 741 } 742 743 // Find if this is a whole block corruption. 744 if (page_error && !tag_mode_) { 745 int patsize = patternlist_->Size(); 746 for (int pat = 0; pat < patsize; pat++) { 747 class Pattern *altpattern = patternlist_->GetPattern(pat); 748 const int kGood = 0; 749 const int kBad = 1; 750 const int kGoodAgain = 2; 751 const int kNoMatch = 3; 752 int state = kGood; 753 unsigned int badstart = 0; 754 unsigned int badend = 0; 755 756 // Don't match against ourself! 757 if (pattern == altpattern) 758 continue; 759 760 for (int i = 0; i < length / wordsize_; i++) { 761 uint64 actual = memblock[i]; 762 datacast_t expected; 763 datacast_t possible; 764 765 // Determine the value that should be there. 766 int index = 2 * i + pattern_offset; 767 768 expected.l32.l = pattern->pattern(index); 769 expected.l32.h = pattern->pattern(index + 1); 770 771 possible.l32.l = pattern->pattern(index); 772 possible.l32.h = pattern->pattern(index + 1); 773 774 if (state == kGood) { 775 if (actual == expected.l64) { 776 continue; 777 } else if (actual == possible.l64) { 778 badstart = i; 779 badend = i; 780 state = kBad; 781 continue; 782 } else { 783 state = kNoMatch; 784 break; 785 } 786 } else if (state == kBad) { 787 if (actual == possible.l64) { 788 badend = i; 789 continue; 790 } else if (actual == expected.l64) { 791 state = kGoodAgain; 792 continue; 793 } else { 794 state = kNoMatch; 795 break; 796 } 797 } else if (state == kGoodAgain) { 798 if (actual == expected.l64) { 799 continue; 800 } else { 801 state = kNoMatch; 802 break; 803 } 804 } 805 } 806 807 if ((state == kGoodAgain) || (state == kBad)) { 808 unsigned int blockerrors = badend - badstart + 1; 809 errormessage = "Block Error"; 810 // It's okay for the 1st entry to be corrected multiple times, 811 // it will simply be reported twice. Once here and once below 812 // when processing the error queue. 813 ProcessError(&recorded[0], 0, errormessage.c_str()); 814 logprintf(0, "Block Error: (%p) pattern %s instead of %s, " 815 "%d bytes from offset 0x%x to 0x%x\n", 816 &memblock[badstart], 817 altpattern->name(), pattern->name(), 818 blockerrors * wordsize_, 819 offset + badstart * wordsize_, 820 offset + badend * wordsize_); 821 } 822 } 823 } 824 825 826 // Process error queue after all errors have been recorded. 827 for (int err = 0; err < errors; err++) { 828 int priority = 5; 829 if (errorcount_ + err < 30) 830 priority = 0; // Bump up the priority for the first few errors. 831 ProcessError(&recorded[err], priority, errormessage.c_str()); 832 } 833 834 if (page_error) { 835 // For each word in the data region. 836 for (int i = 0; i < length / wordsize_; i++) { 837 uint64 actual = memblock[i]; 838 uint64 expected; 839 datacast_t data; 840 // Determine the value that should be there. 841 int index = 2 * i + pattern_offset; 842 843 data.l32.l = pattern->pattern(index); 844 data.l32.h = pattern->pattern(index + 1); 845 expected = data.l64; 846 847 // Check tags if necessary. 848 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) { 849 expected = addr_to_tag(&memblock[i]); 850 } 851 852 // If the value is incorrect, save an error record for later printing. 853 if (actual != expected) { 854 // If we have overflowed the error queue, print the errors now. 855 struct ErrorRecord er; 856 er.actual = actual; 857 er.expected = expected; 858 er.vaddr = &memblock[i]; 859 860 // Do the error printout. This will take a long time and 861 // likely change the machine state. 862 ProcessError(&er, 12, errormessage.c_str()); 863 overflowerrors++; 864 } 865 } 866 } 867 868 // Keep track of observed errors. 869 errorcount_ += errors + overflowerrors; 870 return errors + overflowerrors; 871 } 872 873 float WorkerThread::GetCopiedData() { 874 return pages_copied_ * sat_->page_length() / kMegabyte; 875 } 876 877 // Calculate the CRC of a region. 878 // Result check if the CRC mismatches. 879 int WorkerThread::CrcCheckPage(struct page_entry *srcpe) { 880 const int blocksize = 4096; 881 const int blockwords = blocksize / wordsize_; 882 int errors = 0; 883 884 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 885 uint64 *memblock = static_cast<uint64*>(srcpe->addr); 886 int blocks = sat_->page_length() / blocksize; 887 for (int currentblock = 0; currentblock < blocks; currentblock++) { 888 uint64 *memslice = memblock + currentblock * blockwords; 889 890 AdlerChecksum crc; 891 if (tag_mode_) { 892 AdlerAddrCrcC(memslice, blocksize, &crc, srcpe); 893 } else { 894 CalculateAdlerChecksum(memslice, blocksize, &crc); 895 } 896 897 // If the CRC does not match, we'd better look closer. 898 if (!crc.Equals(*expectedcrc)) { 899 logprintf(11, "Log: CrcCheckPage Falling through to slow compare, " 900 "CRC mismatch %s != %s\n", 901 crc.ToHexString().c_str(), 902 expectedcrc->ToHexString().c_str()); 903 int errorcount = CheckRegion(memslice, 904 srcpe->pattern, 905 blocksize, 906 currentblock * blocksize, 0); 907 if (errorcount == 0) { 908 logprintf(0, "Log: CrcCheckPage CRC mismatch %s != %s, " 909 "but no miscompares found.\n", 910 crc.ToHexString().c_str(), 911 expectedcrc->ToHexString().c_str()); 912 } 913 errors += errorcount; 914 } 915 } 916 917 // For odd length transfers, we should never hit this. 918 int leftovers = sat_->page_length() % blocksize; 919 if (leftovers) { 920 uint64 *memslice = memblock + blocks * blockwords; 921 errors += CheckRegion(memslice, 922 srcpe->pattern, 923 leftovers, 924 blocks * blocksize, 0); 925 } 926 return errors; 927 } 928 929 930 // Print error information about a data miscompare. 931 void WorkerThread::ProcessTagError(struct ErrorRecord *error, 932 int priority, 933 const char *message) { 934 char dimm_string[256] = ""; 935 char tag_dimm_string[256] = ""; 936 bool read_error = false; 937 938 int core_id = sched_getcpu(); 939 940 // Determine if this is a write or read error. 941 os_->Flush(error->vaddr); 942 error->reread = *(error->vaddr); 943 944 // Distinguish read and write errors. 945 if (error->actual != error->reread) { 946 read_error = true; 947 } 948 949 sat_assert(error->expected != error->actual); 950 951 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr); 952 953 // Find physical address if possible. 954 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 955 error->tagpaddr = os_->VirtualToPhysical(error->tagvaddr); 956 957 // Pretty print DIMM mapping if available. 958 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 959 // Pretty print DIMM mapping if available. 960 os_->FindDimm(error->tagpaddr, tag_dimm_string, sizeof(tag_dimm_string)); 961 962 // Report parseable error. 963 if (priority < 5) { 964 logprintf(priority, 965 "%s: Tag from %p(0x%llx:%s) (%s) " 966 "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " 967 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", 968 message, 969 error->tagvaddr, error->tagpaddr, 970 tag_dimm_string, 971 read_error ? "read error" : "write error", 972 core_id, 973 CurrentCpusFormat().c_str(), 974 error->vaddr, 975 error->paddr, 976 dimm_string, 977 error->actual, 978 error->reread, 979 error->expected); 980 } 981 982 errorcount_ += 1; 983 984 // Overwrite incorrect data with correct data to prevent 985 // future miscompares when this data is reused. 986 *(error->vaddr) = error->expected; 987 os_->Flush(error->vaddr); 988 } 989 990 991 // Print out and log a tag error. 992 bool WorkerThread::ReportTagError( 993 uint64 *mem64, 994 uint64 actual, 995 uint64 tag) { 996 struct ErrorRecord er; 997 er.actual = actual; 998 999 er.expected = tag; 1000 er.vaddr = mem64; 1001 1002 // Generate vaddr from tag. 1003 er.tagvaddr = reinterpret_cast<uint64*>(actual); 1004 1005 ProcessTagError(&er, 0, "Hardware Error"); 1006 return true; 1007 } 1008 1009 // C implementation of Adler memory copy, with memory tagging. 1010 bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64, 1011 uint64 *srcmem64, 1012 unsigned int size_in_bytes, 1013 AdlerChecksum *checksum, 1014 struct page_entry *pe) { 1015 // Use this data wrapper to access memory with 64bit read/write. 1016 datacast_t data; 1017 datacast_t dstdata; 1018 unsigned int count = size_in_bytes / sizeof(data); 1019 1020 if (count > ((1U) << 19)) { 1021 // Size is too large, must be strictly less than 512 KB. 1022 return false; 1023 } 1024 1025 uint64 a1 = 1; 1026 uint64 a2 = 1; 1027 uint64 b1 = 0; 1028 uint64 b2 = 0; 1029 1030 class Pattern *pattern = pe->pattern; 1031 1032 unsigned int i = 0; 1033 while (i < count) { 1034 // Process 64 bits at a time. 1035 if ((i & 0x7) == 0) { 1036 data.l64 = srcmem64[i]; 1037 dstdata.l64 = dstmem64[i]; 1038 uint64 src_tag = addr_to_tag(&srcmem64[i]); 1039 uint64 dst_tag = addr_to_tag(&dstmem64[i]); 1040 // Detect if tags have been corrupted. 1041 if (data.l64 != src_tag) 1042 ReportTagError(&srcmem64[i], data.l64, src_tag); 1043 if (dstdata.l64 != dst_tag) 1044 ReportTagError(&dstmem64[i], dstdata.l64, dst_tag); 1045 1046 data.l32.l = pattern->pattern(i << 1); 1047 data.l32.h = pattern->pattern((i << 1) + 1); 1048 a1 = a1 + data.l32.l; 1049 b1 = b1 + a1; 1050 a1 = a1 + data.l32.h; 1051 b1 = b1 + a1; 1052 1053 data.l64 = dst_tag; 1054 dstmem64[i] = data.l64; 1055 1056 } else { 1057 data.l64 = srcmem64[i]; 1058 a1 = a1 + data.l32.l; 1059 b1 = b1 + a1; 1060 a1 = a1 + data.l32.h; 1061 b1 = b1 + a1; 1062 dstmem64[i] = data.l64; 1063 } 1064 i++; 1065 1066 data.l64 = srcmem64[i]; 1067 a2 = a2 + data.l32.l; 1068 b2 = b2 + a2; 1069 a2 = a2 + data.l32.h; 1070 b2 = b2 + a2; 1071 dstmem64[i] = data.l64; 1072 i++; 1073 } 1074 checksum->Set(a1, a2, b1, b2); 1075 return true; 1076 } 1077 1078 // x86_64 SSE2 assembly implementation of Adler memory copy, with address 1079 // tagging added as a second step. This is useful for debugging failures 1080 // that only occur when SSE / nontemporal writes are used. 1081 bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64, 1082 uint64 *srcmem64, 1083 unsigned int size_in_bytes, 1084 AdlerChecksum *checksum, 1085 struct page_entry *pe) { 1086 // Do ASM copy, ignore checksum. 1087 AdlerChecksum ignored_checksum; 1088 os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum); 1089 1090 // Force cache flush of both the source and destination addresses. 1091 // length - length of block to flush in cachelines. 1092 // mem_increment - number of dstmem/srcmem values per cacheline. 1093 int length = size_in_bytes / kCacheLineSize; 1094 int mem_increment = kCacheLineSize / sizeof(*dstmem64); 1095 OsLayer::FastFlushSync(); 1096 for (int i = 0; i < length; ++i) { 1097 OsLayer::FastFlushHint(dstmem64 + (i * mem_increment)); 1098 OsLayer::FastFlushHint(srcmem64 + (i * mem_increment)); 1099 } 1100 OsLayer::FastFlushSync(); 1101 1102 // Check results. 1103 AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe); 1104 // Patch up address tags. 1105 TagAddrC(dstmem64, size_in_bytes); 1106 return true; 1107 } 1108 1109 // Retag pages.. 1110 bool WorkerThread::TagAddrC(uint64 *memwords, 1111 unsigned int size_in_bytes) { 1112 // Mask is the bitmask of indexes used by the pattern. 1113 // It is the pattern size -1. Size is always a power of 2. 1114 1115 // Select tag or data as appropriate. 1116 int length = size_in_bytes / wordsize_; 1117 for (int i = 0; i < length; i += 8) { 1118 datacast_t data; 1119 data.l64 = addr_to_tag(&memwords[i]); 1120 memwords[i] = data.l64; 1121 } 1122 return true; 1123 } 1124 1125 // C implementation of Adler memory crc. 1126 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64, 1127 unsigned int size_in_bytes, 1128 AdlerChecksum *checksum, 1129 struct page_entry *pe) { 1130 // Use this data wrapper to access memory with 64bit read/write. 1131 datacast_t data; 1132 unsigned int count = size_in_bytes / sizeof(data); 1133 1134 if (count > ((1U) << 19)) { 1135 // Size is too large, must be strictly less than 512 KB. 1136 return false; 1137 } 1138 1139 uint64 a1 = 1; 1140 uint64 a2 = 1; 1141 uint64 b1 = 0; 1142 uint64 b2 = 0; 1143 1144 class Pattern *pattern = pe->pattern; 1145 1146 unsigned int i = 0; 1147 while (i < count) { 1148 // Process 64 bits at a time. 1149 if ((i & 0x7) == 0) { 1150 data.l64 = srcmem64[i]; 1151 uint64 src_tag = addr_to_tag(&srcmem64[i]); 1152 // Check that tags match expected. 1153 if (data.l64 != src_tag) 1154 ReportTagError(&srcmem64[i], data.l64, src_tag); 1155 1156 data.l32.l = pattern->pattern(i << 1); 1157 data.l32.h = pattern->pattern((i << 1) + 1); 1158 a1 = a1 + data.l32.l; 1159 b1 = b1 + a1; 1160 a1 = a1 + data.l32.h; 1161 b1 = b1 + a1; 1162 } else { 1163 data.l64 = srcmem64[i]; 1164 a1 = a1 + data.l32.l; 1165 b1 = b1 + a1; 1166 a1 = a1 + data.l32.h; 1167 b1 = b1 + a1; 1168 } 1169 i++; 1170 1171 data.l64 = srcmem64[i]; 1172 a2 = a2 + data.l32.l; 1173 b2 = b2 + a2; 1174 a2 = a2 + data.l32.h; 1175 b2 = b2 + a2; 1176 i++; 1177 } 1178 checksum->Set(a1, a2, b1, b2); 1179 return true; 1180 } 1181 1182 // Copy a block of memory quickly, while keeping a CRC of the data. 1183 // Result check if the CRC mismatches. 1184 int WorkerThread::CrcCopyPage(struct page_entry *dstpe, 1185 struct page_entry *srcpe) { 1186 int errors = 0; 1187 const int blocksize = 4096; 1188 const int blockwords = blocksize / wordsize_; 1189 int blocks = sat_->page_length() / blocksize; 1190 1191 // Base addresses for memory copy 1192 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr); 1193 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr); 1194 // Remember the expected CRC 1195 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 1196 1197 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1198 uint64 *targetmem = targetmembase + currentblock * blockwords; 1199 uint64 *sourcemem = sourcemembase + currentblock * blockwords; 1200 1201 AdlerChecksum crc; 1202 if (tag_mode_) { 1203 AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe); 1204 } else { 1205 AdlerMemcpyC(targetmem, sourcemem, blocksize, &crc); 1206 } 1207 1208 // Investigate miscompares. 1209 if (!crc.Equals(*expectedcrc)) { 1210 logprintf(11, "Log: CrcCopyPage Falling through to slow compare, " 1211 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(), 1212 expectedcrc->ToHexString().c_str()); 1213 int errorcount = CheckRegion(sourcemem, 1214 srcpe->pattern, 1215 blocksize, 1216 currentblock * blocksize, 0); 1217 if (errorcount == 0) { 1218 logprintf(0, "Log: CrcCopyPage CRC mismatch %s != %s, " 1219 "but no miscompares found. Retrying with fresh data.\n", 1220 crc.ToHexString().c_str(), 1221 expectedcrc->ToHexString().c_str()); 1222 if (!tag_mode_) { 1223 // Copy the data originally read from this region back again. 1224 // This data should have any corruption read originally while 1225 // calculating the CRC. 1226 memcpy(sourcemem, targetmem, blocksize); 1227 errorcount = CheckRegion(sourcemem, 1228 srcpe->pattern, 1229 blocksize, 1230 currentblock * blocksize, 0); 1231 if (errorcount == 0) { 1232 int core_id = sched_getcpu(); 1233 logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage " 1234 "CRC mismatch %s != %s, " 1235 "but no miscompares found on second pass.\n", 1236 core_id, CurrentCpusFormat().c_str(), 1237 crc.ToHexString().c_str(), 1238 expectedcrc->ToHexString().c_str()); 1239 struct ErrorRecord er; 1240 er.actual = sourcemem[0]; 1241 er.expected = 0x0; 1242 er.vaddr = sourcemem; 1243 ProcessError(&er, 0, "Hardware Error"); 1244 } 1245 } 1246 } 1247 errors += errorcount; 1248 } 1249 } 1250 1251 // For odd length transfers, we should never hit this. 1252 int leftovers = sat_->page_length() % blocksize; 1253 if (leftovers) { 1254 uint64 *targetmem = targetmembase + blocks * blockwords; 1255 uint64 *sourcemem = sourcemembase + blocks * blockwords; 1256 1257 errors += CheckRegion(sourcemem, 1258 srcpe->pattern, 1259 leftovers, 1260 blocks * blocksize, 0); 1261 int leftoverwords = leftovers / wordsize_; 1262 for (int i = 0; i < leftoverwords; i++) { 1263 targetmem[i] = sourcemem[i]; 1264 } 1265 } 1266 1267 // Update pattern reference to reflect new contents. 1268 dstpe->pattern = srcpe->pattern; 1269 1270 // Clean clean clean the errors away. 1271 if (errors) { 1272 // TODO(nsanders): Maybe we should patch rather than fill? Filling may 1273 // cause bad data to be propogated across the page. 1274 FillPage(dstpe); 1275 } 1276 return errors; 1277 } 1278 1279 1280 1281 // Invert a block of memory quickly, traversing downwards. 1282 int InvertThread::InvertPageDown(struct page_entry *srcpe) { 1283 const int blocksize = 4096; 1284 const int blockwords = blocksize / wordsize_; 1285 int blocks = sat_->page_length() / blocksize; 1286 1287 // Base addresses for memory copy 1288 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr); 1289 1290 for (int currentblock = blocks-1; currentblock >= 0; currentblock--) { 1291 unsigned int *sourcemem = sourcemembase + currentblock * blockwords; 1292 for (int i = blockwords - 32; i >= 0; i -= 32) { 1293 for (int index = i + 31; index >= i; --index) { 1294 unsigned int actual = sourcemem[index]; 1295 sourcemem[index] = ~actual; 1296 } 1297 OsLayer::FastFlush(&sourcemem[i]); 1298 } 1299 } 1300 1301 return 0; 1302 } 1303 1304 // Invert a block of memory, traversing upwards. 1305 int InvertThread::InvertPageUp(struct page_entry *srcpe) { 1306 const int blocksize = 4096; 1307 const int blockwords = blocksize / wordsize_; 1308 int blocks = sat_->page_length() / blocksize; 1309 1310 // Base addresses for memory copy 1311 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr); 1312 1313 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1314 unsigned int *sourcemem = sourcemembase + currentblock * blockwords; 1315 for (int i = 0; i < blockwords; i += 32) { 1316 for (int index = i; index <= i + 31; ++index) { 1317 unsigned int actual = sourcemem[index]; 1318 sourcemem[index] = ~actual; 1319 } 1320 OsLayer::FastFlush(&sourcemem[i]); 1321 } 1322 } 1323 return 0; 1324 } 1325 1326 // Copy a block of memory quickly, while keeping a CRC of the data. 1327 // Result check if the CRC mismatches. Warm the CPU while running 1328 int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe, 1329 struct page_entry *srcpe) { 1330 int errors = 0; 1331 const int blocksize = 4096; 1332 const int blockwords = blocksize / wordsize_; 1333 int blocks = sat_->page_length() / blocksize; 1334 1335 // Base addresses for memory copy 1336 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr); 1337 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr); 1338 // Remember the expected CRC 1339 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 1340 1341 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1342 uint64 *targetmem = targetmembase + currentblock * blockwords; 1343 uint64 *sourcemem = sourcemembase + currentblock * blockwords; 1344 1345 AdlerChecksum crc; 1346 if (tag_mode_) { 1347 AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe); 1348 } else { 1349 os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc); 1350 } 1351 1352 // Investigate miscompares. 1353 if (!crc.Equals(*expectedcrc)) { 1354 logprintf(11, "Log: CrcWarmCopyPage Falling through to slow compare, " 1355 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(), 1356 expectedcrc->ToHexString().c_str()); 1357 int errorcount = CheckRegion(sourcemem, 1358 srcpe->pattern, 1359 blocksize, 1360 currentblock * blocksize, 0); 1361 if (errorcount == 0) { 1362 logprintf(0, "Log: CrcWarmCopyPage CRC mismatch expected: %s != actual: %s, " 1363 "but no miscompares found. Retrying with fresh data.\n", 1364 expectedcrc->ToHexString().c_str(), 1365 crc.ToHexString().c_str() ); 1366 if (!tag_mode_) { 1367 // Copy the data originally read from this region back again. 1368 // This data should have any corruption read originally while 1369 // calculating the CRC. 1370 memcpy(sourcemem, targetmem, blocksize); 1371 errorcount = CheckRegion(sourcemem, 1372 srcpe->pattern, 1373 blocksize, 1374 currentblock * blocksize, 0); 1375 if (errorcount == 0) { 1376 int core_id = sched_getcpu(); 1377 logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage " 1378 "CRC mismatch %s != %s, " 1379 "but no miscompares found on second pass.\n", 1380 core_id, CurrentCpusFormat().c_str(), 1381 crc.ToHexString().c_str(), 1382 expectedcrc->ToHexString().c_str()); 1383 struct ErrorRecord er; 1384 er.actual = sourcemem[0]; 1385 er.expected = 0xbad; 1386 er.vaddr = sourcemem; 1387 ProcessError(&er, 0, "Hardware Error"); 1388 } 1389 } 1390 } 1391 errors += errorcount; 1392 } 1393 } 1394 1395 // For odd length transfers, we should never hit this. 1396 int leftovers = sat_->page_length() % blocksize; 1397 if (leftovers) { 1398 uint64 *targetmem = targetmembase + blocks * blockwords; 1399 uint64 *sourcemem = sourcemembase + blocks * blockwords; 1400 1401 errors += CheckRegion(sourcemem, 1402 srcpe->pattern, 1403 leftovers, 1404 blocks * blocksize, 0); 1405 int leftoverwords = leftovers / wordsize_; 1406 for (int i = 0; i < leftoverwords; i++) { 1407 targetmem[i] = sourcemem[i]; 1408 } 1409 } 1410 1411 // Update pattern reference to reflect new contents. 1412 dstpe->pattern = srcpe->pattern; 1413 1414 // Clean clean clean the errors away. 1415 if (errors) { 1416 // TODO(nsanders): Maybe we should patch rather than fill? Filling may 1417 // cause bad data to be propogated across the page. 1418 FillPage(dstpe); 1419 } 1420 return errors; 1421 } 1422 1423 1424 1425 // Memory check work loop. Execute until done, then exhaust pages. 1426 bool CheckThread::Work() { 1427 struct page_entry pe; 1428 bool result = true; 1429 int64 loops = 0; 1430 1431 logprintf(9, "Log: Starting Check thread %d\n", thread_num_); 1432 1433 // We want to check all the pages, and 1434 // stop when there aren't any left. 1435 while (true) { 1436 result = result && sat_->GetValid(&pe); 1437 if (!result) { 1438 if (IsReadyToRunNoPause()) 1439 logprintf(0, "Process Error: check_thread failed to pop pages, " 1440 "bailing\n"); 1441 else 1442 result = true; 1443 break; 1444 } 1445 1446 // Do the result check. 1447 CrcCheckPage(&pe); 1448 1449 // Push pages back on the valid queue if we are still going, 1450 // throw them out otherwise. 1451 if (IsReadyToRunNoPause()) 1452 result = result && sat_->PutValid(&pe); 1453 else 1454 result = result && sat_->PutEmpty(&pe); 1455 if (!result) { 1456 logprintf(0, "Process Error: check_thread failed to push pages, " 1457 "bailing\n"); 1458 break; 1459 } 1460 loops++; 1461 } 1462 1463 pages_copied_ = loops; 1464 status_ = result; 1465 logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n", 1466 thread_num_, status_, pages_copied_); 1467 return result; 1468 } 1469 1470 1471 // Memory copy work loop. Execute until marked done. 1472 bool CopyThread::Work() { 1473 struct page_entry src; 1474 struct page_entry dst; 1475 bool result = true; 1476 int64 loops = 0; 1477 1478 logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n", 1479 thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_); 1480 1481 while (IsReadyToRun()) { 1482 // Pop the needed pages. 1483 result = result && sat_->GetValid(&src, tag_); 1484 result = result && sat_->GetEmpty(&dst, tag_); 1485 if (!result) { 1486 logprintf(0, "Process Error: copy_thread failed to pop pages, " 1487 "bailing\n"); 1488 break; 1489 } 1490 1491 // Force errors for unittests. 1492 if (sat_->error_injection()) { 1493 if (loops == 8) { 1494 char *addr = reinterpret_cast<char*>(src.addr); 1495 int offset = random() % sat_->page_length(); 1496 addr[offset] = 0xba; 1497 } 1498 } 1499 1500 // We can use memcpy, or CRC check while we copy. 1501 if (sat_->warm()) { 1502 CrcWarmCopyPage(&dst, &src); 1503 } else if (sat_->strict()) { 1504 CrcCopyPage(&dst, &src); 1505 } else { 1506 memcpy(dst.addr, src.addr, sat_->page_length()); 1507 dst.pattern = src.pattern; 1508 } 1509 1510 result = result && sat_->PutValid(&dst); 1511 result = result && sat_->PutEmpty(&src); 1512 1513 // Copy worker-threads yield themselves at the end of each copy loop, 1514 // to avoid threads from preempting each other in the middle of the inner 1515 // copy-loop. Cooperations between Copy worker-threads results in less 1516 // unnecessary cache thrashing (which happens when context-switching in the 1517 // middle of the inner copy-loop). 1518 YieldSelf(); 1519 1520 if (!result) { 1521 logprintf(0, "Process Error: copy_thread failed to push pages, " 1522 "bailing\n"); 1523 break; 1524 } 1525 loops++; 1526 } 1527 1528 pages_copied_ = loops; 1529 status_ = result; 1530 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n", 1531 thread_num_, status_, pages_copied_); 1532 return result; 1533 } 1534 1535 // Memory invert work loop. Execute until marked done. 1536 bool InvertThread::Work() { 1537 struct page_entry src; 1538 bool result = true; 1539 int64 loops = 0; 1540 1541 logprintf(9, "Log: Starting invert thread %d\n", thread_num_); 1542 1543 while (IsReadyToRun()) { 1544 // Pop the needed pages. 1545 result = result && sat_->GetValid(&src); 1546 if (!result) { 1547 logprintf(0, "Process Error: invert_thread failed to pop pages, " 1548 "bailing\n"); 1549 break; 1550 } 1551 1552 if (sat_->strict()) 1553 CrcCheckPage(&src); 1554 1555 // For the same reason CopyThread yields itself (see YieldSelf comment 1556 // in CopyThread::Work(), InvertThread yields itself after each invert 1557 // operation to improve cooperation between different worker threads 1558 // stressing the memory/cache. 1559 InvertPageUp(&src); 1560 YieldSelf(); 1561 InvertPageDown(&src); 1562 YieldSelf(); 1563 InvertPageDown(&src); 1564 YieldSelf(); 1565 InvertPageUp(&src); 1566 YieldSelf(); 1567 1568 if (sat_->strict()) 1569 CrcCheckPage(&src); 1570 1571 result = result && sat_->PutValid(&src); 1572 if (!result) { 1573 logprintf(0, "Process Error: invert_thread failed to push pages, " 1574 "bailing\n"); 1575 break; 1576 } 1577 loops++; 1578 } 1579 1580 pages_copied_ = loops * 2; 1581 status_ = result; 1582 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n", 1583 thread_num_, status_, pages_copied_); 1584 return result; 1585 } 1586 1587 1588 // Set file name to use for File IO. 1589 void FileThread::SetFile(const char *filename_init) { 1590 filename_ = filename_init; 1591 devicename_ = os_->FindFileDevice(filename_); 1592 } 1593 1594 // Open the file for access. 1595 bool FileThread::OpenFile(int *pfile) { 1596 int flags = O_RDWR | O_CREAT | O_SYNC; 1597 int fd = open(filename_.c_str(), flags | O_DIRECT, 0644); 1598 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { 1599 fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT 1600 os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL 1601 } 1602 if (fd < 0) { 1603 logprintf(0, "Process Error: Failed to create file %s!!\n", 1604 filename_.c_str()); 1605 pages_copied_ = 0; 1606 return false; 1607 } 1608 *pfile = fd; 1609 return true; 1610 } 1611 1612 // Close the file. 1613 bool FileThread::CloseFile(int fd) { 1614 close(fd); 1615 return true; 1616 } 1617 1618 // Check sector tagging. 1619 bool FileThread::SectorTagPage(struct page_entry *src, int block) { 1620 int page_length = sat_->page_length(); 1621 struct FileThread::SectorTag *tag = 1622 (struct FileThread::SectorTag *)(src->addr); 1623 1624 // Tag each sector. 1625 unsigned char magic = ((0xba + thread_num_) & 0xff); 1626 for (int sec = 0; sec < page_length / 512; sec++) { 1627 tag[sec].magic = magic; 1628 tag[sec].block = block & 0xff; 1629 tag[sec].sector = sec & 0xff; 1630 tag[sec].pass = pass_ & 0xff; 1631 } 1632 return true; 1633 } 1634 1635 bool FileThread::WritePageToFile(int fd, struct page_entry *src) { 1636 int page_length = sat_->page_length(); 1637 // Fill the file with our data. 1638 int64 size = write(fd, src->addr, page_length); 1639 1640 if (size != page_length) { 1641 os_->ErrorReport(devicename_.c_str(), "write-error", 1); 1642 errorcount_++; 1643 logprintf(0, "Block Error: file_thread failed to write, " 1644 "bailing\n"); 1645 return false; 1646 } 1647 return true; 1648 } 1649 1650 // Write the data to the file. 1651 bool FileThread::WritePages(int fd) { 1652 int strict = sat_->strict(); 1653 1654 // Start fresh at beginning of file for each batch of pages. 1655 lseek64(fd, 0, SEEK_SET); 1656 for (int i = 0; i < sat_->disk_pages(); i++) { 1657 struct page_entry src; 1658 if (!GetValidPage(&src)) 1659 return false; 1660 // Save expected pattern. 1661 page_recs_[i].pattern = src.pattern; 1662 page_recs_[i].src = src.addr; 1663 1664 // Check data correctness. 1665 if (strict) 1666 CrcCheckPage(&src); 1667 1668 SectorTagPage(&src, i); 1669 1670 bool result = WritePageToFile(fd, &src); 1671 1672 if (!PutEmptyPage(&src)) 1673 return false; 1674 1675 if (!result) 1676 return false; 1677 } 1678 return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. 1679 } 1680 1681 // Copy data from file into memory block. 1682 bool FileThread::ReadPageFromFile(int fd, struct page_entry *dst) { 1683 int page_length = sat_->page_length(); 1684 1685 // Do the actual read. 1686 int64 size = read(fd, dst->addr, page_length); 1687 if (size != page_length) { 1688 os_->ErrorReport(devicename_.c_str(), "read-error", 1); 1689 logprintf(0, "Block Error: file_thread failed to read, " 1690 "bailing\n"); 1691 errorcount_++; 1692 return false; 1693 } 1694 return true; 1695 } 1696 1697 // Check sector tagging. 1698 bool FileThread::SectorValidatePage(const struct PageRec &page, 1699 struct page_entry *dst, int block) { 1700 // Error injection. 1701 static int calls = 0; 1702 calls++; 1703 1704 // Do sector tag compare. 1705 int firstsector = -1; 1706 int lastsector = -1; 1707 bool badsector = false; 1708 int page_length = sat_->page_length(); 1709 1710 // Cast data block into an array of tagged sectors. 1711 struct FileThread::SectorTag *tag = 1712 (struct FileThread::SectorTag *)(dst->addr); 1713 1714 sat_assert(sizeof(*tag) == 512); 1715 1716 // Error injection. 1717 if (sat_->error_injection()) { 1718 if (calls == 2) { 1719 for (int badsec = 8; badsec < 17; badsec++) 1720 tag[badsec].pass = 27; 1721 } 1722 if (calls == 18) { 1723 (static_cast<int32*>(dst->addr))[27] = 0xbadda7a; 1724 } 1725 } 1726 1727 // Check each sector for the correct tag we added earlier, 1728 // then revert the tag to the to normal data pattern. 1729 unsigned char magic = ((0xba + thread_num_) & 0xff); 1730 for (int sec = 0; sec < page_length / 512; sec++) { 1731 // Check magic tag. 1732 if ((tag[sec].magic != magic) || 1733 (tag[sec].block != (block & 0xff)) || 1734 (tag[sec].sector != (sec & 0xff)) || 1735 (tag[sec].pass != (pass_ & 0xff))) { 1736 // Offset calculation for tag location. 1737 int offset = sec * sizeof(SectorTag); 1738 if (tag[sec].block != (block & 0xff)) 1739 offset += 1 * sizeof(uint8); 1740 else if (tag[sec].sector != (sec & 0xff)) 1741 offset += 2 * sizeof(uint8); 1742 else if (tag[sec].pass != (pass_ & 0xff)) 1743 offset += 3 * sizeof(uint8); 1744 1745 // Run sector tag error through diagnoser for logging and reporting. 1746 errorcount_ += 1; 1747 os_->error_diagnoser_->AddHDDSectorTagError(devicename_, tag[sec].block, 1748 offset, 1749 tag[sec].sector, 1750 page.src, page.dst); 1751 1752 logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. " 1753 "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n", 1754 block * page_length + 512 * sec, 1755 (pass_ & 0xff), (unsigned int)tag[sec].pass, 1756 sec, (unsigned int)tag[sec].sector, 1757 block, (unsigned int)tag[sec].block, 1758 magic, (unsigned int)tag[sec].magic, 1759 filename_.c_str()); 1760 1761 // Keep track of first and last bad sector. 1762 if (firstsector == -1) 1763 firstsector = (block * page_length / 512) + sec; 1764 lastsector = (block * page_length / 512) + sec; 1765 badsector = true; 1766 } 1767 // Patch tag back to proper pattern. 1768 unsigned int *addr = (unsigned int *)(&tag[sec]); 1769 *addr = dst->pattern->pattern(512 * sec / sizeof(*addr)); 1770 } 1771 1772 // If we found sector errors: 1773 if (badsector == true) { 1774 logprintf(5, "Log: file sector miscompare at offset %x-%x. File: %s\n", 1775 firstsector * 512, 1776 ((lastsector + 1) * 512) - 1, 1777 filename_.c_str()); 1778 1779 // Either exit immediately, or patch the data up and continue. 1780 if (sat_->stop_on_error()) { 1781 exit(1); 1782 } else { 1783 // Patch up bad pages. 1784 for (int block = (firstsector * 512) / page_length; 1785 block <= (lastsector * 512) / page_length; 1786 block++) { 1787 unsigned int *memblock = static_cast<unsigned int *>(dst->addr); 1788 int length = page_length / wordsize_; 1789 for (int i = 0; i < length; i++) { 1790 memblock[i] = dst->pattern->pattern(i); 1791 } 1792 } 1793 } 1794 } 1795 return true; 1796 } 1797 1798 // Get memory for an incoming data transfer.. 1799 bool FileThread::PagePrepare() { 1800 // We can only do direct IO to SAT pages if it is normal mem. 1801 page_io_ = os_->normal_mem(); 1802 1803 // Init a local buffer if we need it. 1804 if (!page_io_) { 1805 #ifdef HAVE_POSIX_MEMALIGN 1806 int result = posix_memalign(&local_page_, 512, sat_->page_length()); 1807 #else 1808 local_page_ = memalign(512, sat_->page_length()); 1809 int result = (local_page_ == 0); 1810 #endif 1811 if (result) { 1812 logprintf(0, "Process Error: disk thread posix_memalign " 1813 "returned %d (fail)\n", 1814 result); 1815 status_ = false; 1816 return false; 1817 } 1818 } 1819 return true; 1820 } 1821 1822 1823 // Remove memory allocated for data transfer. 1824 bool FileThread::PageTeardown() { 1825 // Free a local buffer if we need to. 1826 if (!page_io_) { 1827 free(local_page_); 1828 } 1829 return true; 1830 } 1831 1832 1833 1834 // Get memory for an incoming data transfer.. 1835 bool FileThread::GetEmptyPage(struct page_entry *dst) { 1836 if (page_io_) { 1837 if (!sat_->GetEmpty(dst)) 1838 return false; 1839 } else { 1840 dst->addr = local_page_; 1841 dst->offset = 0; 1842 dst->pattern = 0; 1843 } 1844 return true; 1845 } 1846 1847 // Get memory for an outgoing data transfer.. 1848 bool FileThread::GetValidPage(struct page_entry *src) { 1849 struct page_entry tmp; 1850 if (!sat_->GetValid(&tmp)) 1851 return false; 1852 if (page_io_) { 1853 *src = tmp; 1854 return true; 1855 } else { 1856 src->addr = local_page_; 1857 src->offset = 0; 1858 CrcCopyPage(src, &tmp); 1859 if (!sat_->PutValid(&tmp)) 1860 return false; 1861 } 1862 return true; 1863 } 1864 1865 1866 // Throw out a used empty page. 1867 bool FileThread::PutEmptyPage(struct page_entry *src) { 1868 if (page_io_) { 1869 if (!sat_->PutEmpty(src)) 1870 return false; 1871 } 1872 return true; 1873 } 1874 1875 // Throw out a used, filled page. 1876 bool FileThread::PutValidPage(struct page_entry *src) { 1877 if (page_io_) { 1878 if (!sat_->PutValid(src)) 1879 return false; 1880 } 1881 return true; 1882 } 1883 1884 // Copy data from file into memory blocks. 1885 bool FileThread::ReadPages(int fd) { 1886 int page_length = sat_->page_length(); 1887 int strict = sat_->strict(); 1888 bool result = true; 1889 1890 // Read our data back out of the file, into it's new location. 1891 lseek64(fd, 0, SEEK_SET); 1892 for (int i = 0; i < sat_->disk_pages(); i++) { 1893 struct page_entry dst; 1894 if (!GetEmptyPage(&dst)) 1895 return false; 1896 // Retrieve expected pattern. 1897 dst.pattern = page_recs_[i].pattern; 1898 // Update page recordpage record. 1899 page_recs_[i].dst = dst.addr; 1900 1901 // Read from the file into destination page. 1902 if (!ReadPageFromFile(fd, &dst)) { 1903 PutEmptyPage(&dst); 1904 return false; 1905 } 1906 1907 SectorValidatePage(page_recs_[i], &dst, i); 1908 1909 // Ensure that the transfer ended up with correct data. 1910 if (strict) { 1911 // Record page index currently CRC checked. 1912 crc_page_ = i; 1913 int errors = CrcCheckPage(&dst); 1914 if (errors) { 1915 logprintf(5, "Log: file miscompare at block %d, " 1916 "offset %x-%x. File: %s\n", 1917 i, i * page_length, ((i + 1) * page_length) - 1, 1918 filename_.c_str()); 1919 result = false; 1920 } 1921 crc_page_ = -1; 1922 errorcount_ += errors; 1923 } 1924 if (!PutValidPage(&dst)) 1925 return false; 1926 } 1927 return result; 1928 } 1929 1930 // File IO work loop. Execute until marked done. 1931 bool FileThread::Work() { 1932 bool result = true; 1933 int64 loops = 0; 1934 1935 logprintf(9, "Log: Starting file thread %d, file %s, device %s\n", 1936 thread_num_, 1937 filename_.c_str(), 1938 devicename_.c_str()); 1939 1940 if (!PagePrepare()) { 1941 status_ = false; 1942 return false; 1943 } 1944 1945 // Open the data IO file. 1946 int fd = 0; 1947 if (!OpenFile(&fd)) { 1948 status_ = false; 1949 return false; 1950 } 1951 1952 pass_ = 0; 1953 1954 // Load patterns into page records. 1955 page_recs_ = new struct PageRec[sat_->disk_pages()]; 1956 for (int i = 0; i < sat_->disk_pages(); i++) { 1957 page_recs_[i].pattern = new class Pattern(); 1958 } 1959 1960 // Loop until done. 1961 while (IsReadyToRun()) { 1962 // Do the file write. 1963 if (!(result = result && WritePages(fd))) 1964 break; 1965 1966 // Do the file read. 1967 if (!(result = result && ReadPages(fd))) 1968 break; 1969 1970 loops++; 1971 pass_ = loops; 1972 } 1973 1974 pages_copied_ = loops * sat_->disk_pages(); 1975 1976 // Clean up. 1977 CloseFile(fd); 1978 PageTeardown(); 1979 1980 logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n", 1981 thread_num_, status_, pages_copied_); 1982 // Failure to read from device indicates hardware, 1983 // rather than procedural SW error. 1984 status_ = true; 1985 return true; 1986 } 1987 1988 bool NetworkThread::IsNetworkStopSet() { 1989 return !IsReadyToRunNoPause(); 1990 } 1991 1992 bool NetworkSlaveThread::IsNetworkStopSet() { 1993 // This thread has no completion status. 1994 // It finishes whever there is no more data to be 1995 // passed back. 1996 return true; 1997 } 1998 1999 // Set ip name to use for Network IO. 2000 void NetworkThread::SetIP(const char *ipaddr_init) { 2001 strncpy(ipaddr_, ipaddr_init, 256); 2002 } 2003 2004 // Create a socket. 2005 // Return 0 on error. 2006 bool NetworkThread::CreateSocket(int *psocket) { 2007 int sock = socket(AF_INET, SOCK_STREAM, 0); 2008 if (sock == -1) { 2009 logprintf(0, "Process Error: Cannot open socket\n"); 2010 pages_copied_ = 0; 2011 status_ = false; 2012 return false; 2013 } 2014 *psocket = sock; 2015 return true; 2016 } 2017 2018 // Close the socket. 2019 bool NetworkThread::CloseSocket(int sock) { 2020 close(sock); 2021 return true; 2022 } 2023 2024 // Initiate the tcp connection. 2025 bool NetworkThread::Connect(int sock) { 2026 struct sockaddr_in dest_addr; 2027 dest_addr.sin_family = AF_INET; 2028 dest_addr.sin_port = htons(kNetworkPort); 2029 memset(&(dest_addr.sin_zero), '\0', sizeof(dest_addr.sin_zero)); 2030 2031 // Translate dot notation to u32. 2032 if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) { 2033 logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_); 2034 pages_copied_ = 0; 2035 status_ = false; 2036 return false; 2037 } 2038 2039 if (-1 == connect(sock, reinterpret_cast<struct sockaddr *>(&dest_addr), 2040 sizeof(struct sockaddr))) { 2041 logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_); 2042 pages_copied_ = 0; 2043 status_ = false; 2044 return false; 2045 } 2046 return true; 2047 } 2048 2049 // Initiate the tcp connection. 2050 bool NetworkListenThread::Listen() { 2051 struct sockaddr_in sa; 2052 2053 memset(&(sa.sin_zero), '\0', sizeof(sa.sin_zero)); 2054 2055 sa.sin_family = AF_INET; 2056 sa.sin_addr.s_addr = INADDR_ANY; 2057 sa.sin_port = htons(kNetworkPort); 2058 2059 if (-1 == ::bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) { 2060 char buf[256]; 2061 sat_strerror(errno, buf, sizeof(buf)); 2062 logprintf(0, "Process Error: Cannot bind socket: %s\n", buf); 2063 pages_copied_ = 0; 2064 status_ = false; 2065 return false; 2066 } 2067 listen(sock_, 3); 2068 return true; 2069 } 2070 2071 // Wait for a connection from a network traffic generation thread. 2072 bool NetworkListenThread::Wait() { 2073 fd_set rfds; 2074 struct timeval tv; 2075 int retval; 2076 2077 // Watch sock_ to see when it has input. 2078 FD_ZERO(&rfds); 2079 FD_SET(sock_, &rfds); 2080 // Wait up to five seconds. 2081 tv.tv_sec = 5; 2082 tv.tv_usec = 0; 2083 2084 retval = select(sock_ + 1, &rfds, NULL, NULL, &tv); 2085 2086 return (retval > 0); 2087 } 2088 2089 // Wait for a connection from a network traffic generation thread. 2090 bool NetworkListenThread::GetConnection(int *pnewsock) { 2091 struct sockaddr_in sa; 2092 socklen_t size = sizeof(struct sockaddr_in); 2093 2094 int newsock = accept(sock_, reinterpret_cast<struct sockaddr *>(&sa), &size); 2095 if (newsock < 0) { 2096 logprintf(0, "Process Error: Did not receive connection\n"); 2097 pages_copied_ = 0; 2098 status_ = false; 2099 return false; 2100 } 2101 *pnewsock = newsock; 2102 return true; 2103 } 2104 2105 // Send a page, return false if a page was not sent. 2106 bool NetworkThread::SendPage(int sock, struct page_entry *src) { 2107 int page_length = sat_->page_length(); 2108 char *address = static_cast<char*>(src->addr); 2109 2110 // Send our data over the network. 2111 int size = page_length; 2112 while (size) { 2113 int transferred = send(sock, address + (page_length - size), size, 0); 2114 if ((transferred == 0) || (transferred == -1)) { 2115 if (!IsNetworkStopSet()) { 2116 char buf[256] = ""; 2117 sat_strerror(errno, buf, sizeof(buf)); 2118 logprintf(0, "Process Error: Thread %d, " 2119 "Network write failed, bailing. (%s)\n", 2120 thread_num_, buf); 2121 status_ = false; 2122 } 2123 return false; 2124 } 2125 size = size - transferred; 2126 } 2127 return true; 2128 } 2129 2130 // Receive a page. Return false if a page was not received. 2131 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) { 2132 int page_length = sat_->page_length(); 2133 char *address = static_cast<char*>(dst->addr); 2134 2135 // Maybe we will get our data back again, maybe not. 2136 int size = page_length; 2137 while (size) { 2138 int transferred = recv(sock, address + (page_length - size), size, 0); 2139 if ((transferred == 0) || (transferred == -1)) { 2140 // Typically network slave thread should exit as network master 2141 // thread stops sending data. 2142 if (IsNetworkStopSet()) { 2143 int err = errno; 2144 if (transferred == 0 && err == 0) { 2145 // Two system setups will not sync exactly, 2146 // allow early exit, but log it. 2147 logprintf(0, "Log: Net thread did not receive any data, exiting.\n"); 2148 } else { 2149 char buf[256] = ""; 2150 sat_strerror(err, buf, sizeof(buf)); 2151 // Print why we failed. 2152 logprintf(0, "Process Error: Thread %d, " 2153 "Network read failed, bailing (%s).\n", 2154 thread_num_, buf); 2155 status_ = false; 2156 // Print arguments and results. 2157 logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n", 2158 sock, address + (page_length - size), 2159 size, transferred, err); 2160 if ((transferred == 0) && 2161 (page_length - size < 512) && 2162 (page_length - size > 0)) { 2163 // Print null terminated data received, to see who's been 2164 // sending us supicious unwanted data. 2165 address[page_length - size] = 0; 2166 logprintf(0, "Log: received %d bytes: '%s'\n", 2167 page_length - size, address); 2168 } 2169 } 2170 } 2171 return false; 2172 } 2173 size = size - transferred; 2174 } 2175 return true; 2176 } 2177 2178 // Network IO work loop. Execute until marked done. 2179 // Return true if the thread ran as expected. 2180 bool NetworkThread::Work() { 2181 logprintf(9, "Log: Starting network thread %d, ip %s\n", 2182 thread_num_, 2183 ipaddr_); 2184 2185 // Make a socket. 2186 int sock = 0; 2187 if (!CreateSocket(&sock)) 2188 return false; 2189 2190 // Network IO loop requires network slave thread to have already initialized. 2191 // We will sleep here for awhile to ensure that the slave thread will be 2192 // listening by the time we connect. 2193 // Sleep for 15 seconds. 2194 sat_sleep(15); 2195 logprintf(9, "Log: Starting execution of network thread %d, ip %s\n", 2196 thread_num_, 2197 ipaddr_); 2198 2199 2200 // Connect to a slave thread. 2201 if (!Connect(sock)) 2202 return false; 2203 2204 // Loop until done. 2205 bool result = true; 2206 int strict = sat_->strict(); 2207 int64 loops = 0; 2208 while (IsReadyToRun()) { 2209 struct page_entry src; 2210 struct page_entry dst; 2211 result = result && sat_->GetValid(&src); 2212 result = result && sat_->GetEmpty(&dst); 2213 if (!result) { 2214 logprintf(0, "Process Error: net_thread failed to pop pages, " 2215 "bailing\n"); 2216 break; 2217 } 2218 2219 // Check data correctness. 2220 if (strict) 2221 CrcCheckPage(&src); 2222 2223 // Do the network write. 2224 if (!(result = result && SendPage(sock, &src))) 2225 break; 2226 2227 // Update pattern reference to reflect new contents. 2228 dst.pattern = src.pattern; 2229 2230 // Do the network read. 2231 if (!(result = result && ReceivePage(sock, &dst))) 2232 break; 2233 2234 // Ensure that the transfer ended up with correct data. 2235 if (strict) 2236 CrcCheckPage(&dst); 2237 2238 // Return all of our pages to the queue. 2239 result = result && sat_->PutValid(&dst); 2240 result = result && sat_->PutEmpty(&src); 2241 if (!result) { 2242 logprintf(0, "Process Error: net_thread failed to push pages, " 2243 "bailing\n"); 2244 break; 2245 } 2246 loops++; 2247 } 2248 2249 pages_copied_ = loops; 2250 status_ = result; 2251 2252 // Clean up. 2253 CloseSocket(sock); 2254 2255 logprintf(9, "Log: Completed %d: network thread status %d, " 2256 "%d pages copied\n", 2257 thread_num_, status_, pages_copied_); 2258 return result; 2259 } 2260 2261 // Spawn slave threads for incoming connections. 2262 bool NetworkListenThread::SpawnSlave(int newsock, int threadid) { 2263 logprintf(12, "Log: Listen thread spawning slave\n"); 2264 2265 // Spawn slave thread, to reflect network traffic back to sender. 2266 ChildWorker *child_worker = new ChildWorker; 2267 child_worker->thread.SetSock(newsock); 2268 child_worker->thread.InitThread(threadid, sat_, os_, patternlist_, 2269 &child_worker->status); 2270 child_worker->status.Initialize(); 2271 child_worker->thread.SpawnThread(); 2272 child_workers_.push_back(child_worker); 2273 2274 return true; 2275 } 2276 2277 // Reap slave threads. 2278 bool NetworkListenThread::ReapSlaves() { 2279 bool result = true; 2280 // Gather status and reap threads. 2281 logprintf(12, "Log: Joining all outstanding threads\n"); 2282 2283 for (size_t i = 0; i < child_workers_.size(); i++) { 2284 NetworkSlaveThread& child_thread = child_workers_[i]->thread; 2285 logprintf(12, "Log: Joining slave thread %d\n", i); 2286 child_thread.JoinThread(); 2287 if (child_thread.GetStatus() != 1) { 2288 logprintf(0, "Process Error: Slave Thread %d failed with status %d\n", i, 2289 child_thread.GetStatus()); 2290 result = false; 2291 } 2292 errorcount_ += child_thread.GetErrorCount(); 2293 logprintf(9, "Log: Slave Thread %d found %lld miscompares\n", i, 2294 child_thread.GetErrorCount()); 2295 pages_copied_ += child_thread.GetPageCount(); 2296 } 2297 2298 return result; 2299 } 2300 2301 // Network listener IO work loop. Execute until marked done. 2302 // Return false on fatal software error. 2303 bool NetworkListenThread::Work() { 2304 logprintf(9, "Log: Starting network listen thread %d\n", 2305 thread_num_); 2306 2307 // Make a socket. 2308 sock_ = 0; 2309 if (!CreateSocket(&sock_)) { 2310 status_ = false; 2311 return false; 2312 } 2313 logprintf(9, "Log: Listen thread created sock\n"); 2314 2315 // Allows incoming connections to be queued up by socket library. 2316 int newsock = 0; 2317 Listen(); 2318 logprintf(12, "Log: Listen thread waiting for incoming connections\n"); 2319 2320 // Wait on incoming connections, and spawn worker threads for them. 2321 int threadcount = 0; 2322 while (IsReadyToRun()) { 2323 // Poll for connections that we can accept(). 2324 if (Wait()) { 2325 // Accept those connections. 2326 logprintf(12, "Log: Listen thread found incoming connection\n"); 2327 if (GetConnection(&newsock)) { 2328 SpawnSlave(newsock, threadcount); 2329 threadcount++; 2330 } 2331 } 2332 } 2333 2334 // Gather status and join spawned threads. 2335 ReapSlaves(); 2336 2337 // Delete the child workers. 2338 for (ChildVector::iterator it = child_workers_.begin(); 2339 it != child_workers_.end(); ++it) { 2340 (*it)->status.Destroy(); 2341 delete *it; 2342 } 2343 child_workers_.clear(); 2344 2345 CloseSocket(sock_); 2346 2347 status_ = true; 2348 logprintf(9, 2349 "Log: Completed %d: network listen thread status %d, " 2350 "%d pages copied\n", 2351 thread_num_, status_, pages_copied_); 2352 return true; 2353 } 2354 2355 // Set network reflector socket struct. 2356 void NetworkSlaveThread::SetSock(int sock) { 2357 sock_ = sock; 2358 } 2359 2360 // Network reflector IO work loop. Execute until marked done. 2361 // Return false on fatal software error. 2362 bool NetworkSlaveThread::Work() { 2363 logprintf(9, "Log: Starting network slave thread %d\n", 2364 thread_num_); 2365 2366 // Verify that we have a socket. 2367 int sock = sock_; 2368 if (!sock) { 2369 status_ = false; 2370 return false; 2371 } 2372 2373 // Loop until done. 2374 int64 loops = 0; 2375 // Init a local buffer for storing data. 2376 void *local_page = NULL; 2377 #ifdef HAVE_POSIX_MEMALIGN 2378 int result = posix_memalign(&local_page, 512, sat_->page_length()); 2379 #else 2380 local_page = memalign(512, sat_->page_length()); 2381 int result = (local_page == 0); 2382 #endif 2383 if (result) { 2384 logprintf(0, "Process Error: net slave posix_memalign " 2385 "returned %d (fail)\n", 2386 result); 2387 status_ = false; 2388 return false; 2389 } 2390 2391 struct page_entry page; 2392 page.addr = local_page; 2393 2394 // This thread will continue to run as long as the thread on the other end of 2395 // the socket is still sending and receiving data. 2396 while (1) { 2397 // Do the network read. 2398 if (!ReceivePage(sock, &page)) 2399 break; 2400 2401 // Do the network write. 2402 if (!SendPage(sock, &page)) 2403 break; 2404 2405 loops++; 2406 } 2407 2408 pages_copied_ = loops; 2409 // No results provided from this type of thread. 2410 status_ = true; 2411 2412 // Clean up. 2413 CloseSocket(sock); 2414 2415 logprintf(9, 2416 "Log: Completed %d: network slave thread status %d, " 2417 "%d pages copied\n", 2418 thread_num_, status_, pages_copied_); 2419 return true; 2420 } 2421 2422 // Thread work loop. Execute until marked finished. 2423 bool ErrorPollThread::Work() { 2424 logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_); 2425 2426 // This calls a generic error polling function in the Os abstraction layer. 2427 do { 2428 errorcount_ += os_->ErrorPoll(); 2429 os_->ErrorWait(); 2430 } while (IsReadyToRun()); 2431 2432 logprintf(9, "Log: Finished system error poll thread %d: %d errors\n", 2433 thread_num_, errorcount_); 2434 status_ = true; 2435 return true; 2436 } 2437 2438 // Worker thread to heat up CPU. 2439 // This thread does not evaluate pass/fail or software error. 2440 bool CpuStressThread::Work() { 2441 logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_); 2442 2443 do { 2444 // Run ludloff's platform/CPU-specific assembly workload. 2445 os_->CpuStressWorkload(); 2446 YieldSelf(); 2447 } while (IsReadyToRun()); 2448 2449 logprintf(9, "Log: Finished CPU stress thread %d:\n", 2450 thread_num_); 2451 status_ = true; 2452 return true; 2453 } 2454 2455 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data, 2456 int cacheline_count, 2457 int thread_num, 2458 int thread_count, 2459 int inc_count) { 2460 cc_cacheline_data_ = data; 2461 cc_cacheline_count_ = cacheline_count; 2462 cc_thread_num_ = thread_num; 2463 cc_thread_count_ = thread_count; 2464 cc_inc_count_ = inc_count; 2465 } 2466 2467 // A very simple psuedorandom generator. Since the random number is based 2468 // on only a few simple logic operations, it can be done quickly in registers 2469 // and the compiler can inline it. 2470 uint64 CpuCacheCoherencyThread::SimpleRandom(uint64 seed) { 2471 return (seed >> 1) ^ (-(seed & 1) & kRandomPolynomial); 2472 } 2473 2474 // Worked thread to test the cache coherency of the CPUs 2475 // Return false on fatal sw error. 2476 bool CpuCacheCoherencyThread::Work() { 2477 logprintf(9, "Log: Starting the Cache Coherency thread %d\n", 2478 cc_thread_num_); 2479 uint64 time_start, time_end; 2480 struct timeval tv; 2481 2482 // Use a slightly more robust random number for the initial 2483 // value, so the random sequences from the simple generator will 2484 // be more divergent. 2485 #ifdef HAVE_RAND_R 2486 unsigned int seed = static_cast<unsigned int>(gettid()); 2487 uint64 r = static_cast<uint64>(rand_r(&seed)); 2488 r |= static_cast<uint64>(rand_r(&seed)) << 32; 2489 #else 2490 srand(time(NULL)); 2491 uint64 r = static_cast<uint64>(rand()); // NOLINT 2492 r |= static_cast<uint64>(rand()) << 32; // NOLINT 2493 #endif 2494 2495 gettimeofday(&tv, NULL); // Get the timestamp before increments. 2496 time_start = tv.tv_sec * 1000000ULL + tv.tv_usec; 2497 2498 uint64 total_inc = 0; // Total increments done by the thread. 2499 while (IsReadyToRun()) { 2500 for (int i = 0; i < cc_inc_count_; i++) { 2501 // Choose a datastructure in random and increment the appropriate 2502 // member in that according to the offset (which is the same as the 2503 // thread number. 2504 r = SimpleRandom(r); 2505 int cline_num = r % cc_cacheline_count_; 2506 int offset; 2507 // Reverse the order for odd numbered threads in odd numbered cache 2508 // lines. This is designed for massively multi-core systems where the 2509 // number of cores exceeds the bytes in a cache line, so "distant" cores 2510 // get a chance to exercize cache coherency between them. 2511 if (cline_num & cc_thread_num_ & 1) 2512 offset = (cc_thread_count_ & ~1) - cc_thread_num_; 2513 else 2514 offset = cc_thread_num_; 2515 // Increment the member of the randomely selected structure. 2516 (cc_cacheline_data_[cline_num].num[offset])++; 2517 } 2518 2519 total_inc += cc_inc_count_; 2520 2521 // Calculate if the local counter matches with the global value 2522 // in all the cache line structures for this particular thread. 2523 int cc_global_num = 0; 2524 for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) { 2525 int offset; 2526 // Perform the same offset calculation from above. 2527 if (cline_num & cc_thread_num_ & 1) 2528 offset = (cc_thread_count_ & ~1) - cc_thread_num_; 2529 else 2530 offset = cc_thread_num_; 2531 cc_global_num += cc_cacheline_data_[cline_num].num[offset]; 2532 // Reset the cachline member's value for the next run. 2533 cc_cacheline_data_[cline_num].num[offset] = 0; 2534 } 2535 if (sat_->error_injection()) 2536 cc_global_num = -1; 2537 2538 // Since the count is only stored in a byte, to squeeze more into a 2539 // single cache line, only compare it as a byte. In the event that there 2540 // is something detected, the chance that it would be missed by a single 2541 // thread is 1 in 256. If it affects all cores, that makes the chance 2542 // of it being missed terribly minute. It seems unlikely any failure 2543 // case would be off by more than a small number. 2544 if ((cc_global_num & 0xff) != (cc_inc_count_ & 0xff)) { 2545 errorcount_++; 2546 logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n", 2547 cc_global_num, cc_inc_count_); 2548 } 2549 } 2550 gettimeofday(&tv, NULL); // Get the timestamp at the end. 2551 time_end = tv.tv_sec * 1000000ULL + tv.tv_usec; 2552 2553 uint64 us_elapsed = time_end - time_start; 2554 // inc_rate is the no. of increments per second. 2555 double inc_rate = total_inc * 1e6 / us_elapsed; 2556 2557 logprintf(4, "Stats: CC Thread(%d): Time=%llu us," 2558 " Increments=%llu, Increments/sec = %.6lf\n", 2559 cc_thread_num_, us_elapsed, total_inc, inc_rate); 2560 logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n", 2561 cc_thread_num_); 2562 status_ = true; 2563 return true; 2564 } 2565 2566 DiskThread::DiskThread(DiskBlockTable *block_table) { 2567 read_block_size_ = kSectorSize; // default 1 sector (512 bytes) 2568 write_block_size_ = kSectorSize; // this assumes read and write block size 2569 // are the same 2570 segment_size_ = -1; // use the entire disk as one segment 2571 cache_size_ = 16 * 1024 * 1024; // assume 16MiB cache by default 2572 // Use a queue such that 3/2 times as much data as the cache can hold 2573 // is written before it is read so that there is little chance the read 2574 // data is in the cache. 2575 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2; 2576 blocks_per_segment_ = 32; 2577 2578 read_threshold_ = 100000; // 100ms is a reasonable limit for 2579 write_threshold_ = 100000; // reading/writing a sector 2580 2581 read_timeout_ = 5000000; // 5 seconds should be long enough for a 2582 write_timeout_ = 5000000; // timout for reading/writing 2583 2584 device_sectors_ = 0; 2585 non_destructive_ = 0; 2586 2587 #ifdef HAVE_LIBAIO_H 2588 aio_ctx_ = 0; 2589 #endif 2590 block_table_ = block_table; 2591 update_block_table_ = 1; 2592 2593 block_buffer_ = NULL; 2594 2595 blocks_written_ = 0; 2596 blocks_read_ = 0; 2597 } 2598 2599 DiskThread::~DiskThread() { 2600 if (block_buffer_) 2601 free(block_buffer_); 2602 } 2603 2604 // Set filename for device file (in /dev). 2605 void DiskThread::SetDevice(const char *device_name) { 2606 device_name_ = device_name; 2607 } 2608 2609 // Set various parameters that control the behaviour of the test. 2610 // -1 is used as a sentinel value on each parameter (except non_destructive) 2611 // to indicate that the parameter not be set. 2612 bool DiskThread::SetParameters(int read_block_size, 2613 int write_block_size, 2614 int64 segment_size, 2615 int64 cache_size, 2616 int blocks_per_segment, 2617 int64 read_threshold, 2618 int64 write_threshold, 2619 int non_destructive) { 2620 if (read_block_size != -1) { 2621 // Blocks must be aligned to the disk's sector size. 2622 if (read_block_size % kSectorSize != 0) { 2623 logprintf(0, "Process Error: Block size must be a multiple of %d " 2624 "(thread %d).\n", kSectorSize, thread_num_); 2625 return false; 2626 } 2627 2628 read_block_size_ = read_block_size; 2629 } 2630 2631 if (write_block_size != -1) { 2632 // Write blocks must be aligned to the disk's sector size and to the 2633 // block size. 2634 if (write_block_size % kSectorSize != 0) { 2635 logprintf(0, "Process Error: Write block size must be a multiple " 2636 "of %d (thread %d).\n", kSectorSize, thread_num_); 2637 return false; 2638 } 2639 if (write_block_size % read_block_size_ != 0) { 2640 logprintf(0, "Process Error: Write block size must be a multiple " 2641 "of the read block size, which is %d (thread %d).\n", 2642 read_block_size_, thread_num_); 2643 return false; 2644 } 2645 2646 write_block_size_ = write_block_size; 2647 2648 } else { 2649 // Make sure write_block_size_ is still valid. 2650 if (read_block_size_ > write_block_size_) { 2651 logprintf(5, "Log: Assuming write block size equal to read block size, " 2652 "which is %d (thread %d).\n", read_block_size_, 2653 thread_num_); 2654 write_block_size_ = read_block_size_; 2655 } else { 2656 if (write_block_size_ % read_block_size_ != 0) { 2657 logprintf(0, "Process Error: Write block size (defined as %d) must " 2658 "be a multiple of the read block size, which is %d " 2659 "(thread %d).\n", write_block_size_, read_block_size_, 2660 thread_num_); 2661 return false; 2662 } 2663 } 2664 } 2665 2666 if (cache_size != -1) { 2667 cache_size_ = cache_size; 2668 } 2669 2670 if (blocks_per_segment != -1) { 2671 if (blocks_per_segment <= 0) { 2672 logprintf(0, "Process Error: Blocks per segment must be greater than " 2673 "zero.\n (thread %d)", thread_num_); 2674 return false; 2675 } 2676 2677 blocks_per_segment_ = blocks_per_segment; 2678 } 2679 2680 if (read_threshold != -1) { 2681 if (read_threshold <= 0) { 2682 logprintf(0, "Process Error: Read threshold must be greater than " 2683 "zero (thread %d).\n", thread_num_); 2684 return false; 2685 } 2686 2687 read_threshold_ = read_threshold; 2688 } 2689 2690 if (write_threshold != -1) { 2691 if (write_threshold <= 0) { 2692 logprintf(0, "Process Error: Write threshold must be greater than " 2693 "zero (thread %d).\n", thread_num_); 2694 return false; 2695 } 2696 2697 write_threshold_ = write_threshold; 2698 } 2699 2700 if (segment_size != -1) { 2701 // Segments must be aligned to the disk's sector size. 2702 if (segment_size % kSectorSize != 0) { 2703 logprintf(0, "Process Error: Segment size must be a multiple of %d" 2704 " (thread %d).\n", kSectorSize, thread_num_); 2705 return false; 2706 } 2707 2708 segment_size_ = segment_size / kSectorSize; 2709 } 2710 2711 non_destructive_ = non_destructive; 2712 2713 // Having a queue of 150% of blocks that will fit in the disk's cache 2714 // should be enough to force out the oldest block before it is read and hence, 2715 // making sure the data comes form the disk and not the cache. 2716 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2; 2717 // Updating DiskBlockTable parameters 2718 if (update_block_table_) { 2719 block_table_->SetParameters(kSectorSize, write_block_size_, 2720 device_sectors_, segment_size_, 2721 device_name_); 2722 } 2723 return true; 2724 } 2725 2726 // Open a device, return false on failure. 2727 bool DiskThread::OpenDevice(int *pfile) { 2728 int flags = O_RDWR | O_SYNC | O_LARGEFILE; 2729 int fd = open(device_name_.c_str(), flags | O_DIRECT, 0); 2730 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { 2731 fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT 2732 os_->ActivateFlushPageCache(); 2733 } 2734 if (fd < 0) { 2735 logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n", 2736 device_name_.c_str(), thread_num_); 2737 return false; 2738 } 2739 *pfile = fd; 2740 2741 return GetDiskSize(fd); 2742 } 2743 2744 // Retrieves the size (in bytes) of the disk/file. 2745 // Return false on failure. 2746 bool DiskThread::GetDiskSize(int fd) { 2747 struct stat device_stat; 2748 if (fstat(fd, &device_stat) == -1) { 2749 logprintf(0, "Process Error: Unable to fstat disk %s (thread %d).\n", 2750 device_name_.c_str(), thread_num_); 2751 return false; 2752 } 2753 2754 // For a block device, an ioctl is needed to get the size since the size 2755 // of the device file (i.e. /dev/sdb) is 0. 2756 if (S_ISBLK(device_stat.st_mode)) { 2757 uint64 block_size = 0; 2758 2759 if (ioctl(fd, BLKGETSIZE64, &block_size) == -1) { 2760 logprintf(0, "Process Error: Unable to ioctl disk %s (thread %d).\n", 2761 device_name_.c_str(), thread_num_); 2762 return false; 2763 } 2764 2765 // Zero size indicates nonworking device.. 2766 if (block_size == 0) { 2767 os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1); 2768 ++errorcount_; 2769 status_ = true; // Avoid a procedural error. 2770 return false; 2771 } 2772 2773 device_sectors_ = block_size / kSectorSize; 2774 2775 } else if (S_ISREG(device_stat.st_mode)) { 2776 device_sectors_ = device_stat.st_size / kSectorSize; 2777 2778 } else { 2779 logprintf(0, "Process Error: %s is not a regular file or block " 2780 "device (thread %d).\n", device_name_.c_str(), 2781 thread_num_); 2782 return false; 2783 } 2784 2785 logprintf(12, "Log: Device sectors: %lld on disk %s (thread %d).\n", 2786 device_sectors_, device_name_.c_str(), thread_num_); 2787 2788 if (update_block_table_) { 2789 block_table_->SetParameters(kSectorSize, write_block_size_, 2790 device_sectors_, segment_size_, 2791 device_name_); 2792 } 2793 2794 return true; 2795 } 2796 2797 bool DiskThread::CloseDevice(int fd) { 2798 close(fd); 2799 return true; 2800 } 2801 2802 // Return the time in microseconds. 2803 int64 DiskThread::GetTime() { 2804 struct timeval tv; 2805 gettimeofday(&tv, NULL); 2806 return tv.tv_sec * 1000000 + tv.tv_usec; 2807 } 2808 2809 // Do randomized reads and (possibly) writes on a device. 2810 // Return false on fatal SW error, true on SW success, 2811 // regardless of whether HW failed. 2812 bool DiskThread::DoWork(int fd) { 2813 int64 block_num = 0; 2814 int64 num_segments; 2815 2816 if (segment_size_ == -1) { 2817 num_segments = 1; 2818 } else { 2819 num_segments = device_sectors_ / segment_size_; 2820 if (device_sectors_ % segment_size_ != 0) 2821 num_segments++; 2822 } 2823 2824 // Disk size should be at least 3x cache size. See comment later for 2825 // details. 2826 sat_assert(device_sectors_ * kSectorSize > 3 * cache_size_); 2827 2828 // This disk test works by writing blocks with a certain pattern to 2829 // disk, then reading them back and verifying it against the pattern 2830 // at a later time. A failure happens when either the block cannot 2831 // be written/read or when the read block is different than what was 2832 // written. If a block takes too long to write/read, then a warning 2833 // is given instead of an error since taking too long is not 2834 // necessarily an error. 2835 // 2836 // To prevent the read blocks from coming from the disk cache, 2837 // enough blocks are written before read such that a block would 2838 // be ejected from the disk cache by the time it is read. 2839 // 2840 // TODO(amistry): Implement some sort of read/write throttling. The 2841 // flood of asynchronous I/O requests when a drive is 2842 // unplugged is causing the application and kernel to 2843 // become unresponsive. 2844 2845 while (IsReadyToRun()) { 2846 // Write blocks to disk. 2847 logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n", 2848 non_destructive_ ? "(disabled) " : "", 2849 device_name_.c_str(), thread_num_); 2850 while (IsReadyToRunNoPause() && 2851 in_flight_sectors_.size() < 2852 static_cast<size_t>(queue_size_ + 1)) { 2853 // Confine testing to a particular segment of the disk. 2854 int64 segment = (block_num / blocks_per_segment_) % num_segments; 2855 if (!non_destructive_ && 2856 (block_num % blocks_per_segment_ == 0)) { 2857 logprintf(20, "Log: Starting to write segment %lld out of " 2858 "%lld on disk %s (thread %d).\n", 2859 segment, num_segments, device_name_.c_str(), 2860 thread_num_); 2861 } 2862 block_num++; 2863 2864 BlockData *block = block_table_->GetUnusedBlock(segment); 2865 2866 // If an unused sequence of sectors could not be found, skip to the 2867 // next block to process. Soon, a new segment will come and new 2868 // sectors will be able to be allocated. This effectively puts a 2869 // minumim on the disk size at 3x the stated cache size, or 48MiB 2870 // if a cache size is not given (since the cache is set as 16MiB 2871 // by default). Given that todays caches are at the low MiB range 2872 // and drive sizes at the mid GB, this shouldn't pose a problem. 2873 // The 3x minimum comes from the following: 2874 // 1. In order to allocate 'y' blocks from a segment, the 2875 // segment must contain at least 2y blocks or else an 2876 // allocation may not succeed. 2877 // 2. Assume the entire disk is one segment. 2878 // 3. A full write phase consists of writing blocks corresponding to 2879 // 3/2 cache size. 2880 // 4. Therefore, the one segment must have 2 * 3/2 * cache 2881 // size worth of blocks = 3 * cache size worth of blocks 2882 // to complete. 2883 // In non-destructive mode, don't write anything to disk. 2884 if (!non_destructive_) { 2885 if (!WriteBlockToDisk(fd, block)) { 2886 block_table_->RemoveBlock(block); 2887 return true; 2888 } 2889 blocks_written_++; 2890 } 2891 2892 // Block is either initialized by writing, or in nondestructive case, 2893 // initialized by being added into the datastructure for later reading. 2894 block->initialized(); 2895 2896 in_flight_sectors_.push(block); 2897 } 2898 if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. 2899 return false; 2900 2901 // Verify blocks on disk. 2902 logprintf(20, "Log: Read phase for disk %s (thread %d).\n", 2903 device_name_.c_str(), thread_num_); 2904 while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) { 2905 BlockData *block = in_flight_sectors_.front(); 2906 in_flight_sectors_.pop(); 2907 if (!ValidateBlockOnDisk(fd, block)) 2908 return true; 2909 block_table_->RemoveBlock(block); 2910 blocks_read_++; 2911 } 2912 } 2913 2914 pages_copied_ = blocks_written_ + blocks_read_; 2915 return true; 2916 } 2917 2918 // Do an asynchronous disk I/O operation. 2919 // Return false if the IO is not set up. 2920 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, 2921 int64 offset, int64 timeout) { 2922 #ifdef HAVE_LIBAIO_H 2923 // Use the Linux native asynchronous I/O interface for reading/writing. 2924 // A read/write consists of three basic steps: 2925 // 1. create an io context. 2926 // 2. prepare and submit an io request to the context 2927 // 3. wait for an event on the context. 2928 2929 struct { 2930 const int opcode; 2931 const char *op_str; 2932 const char *error_str; 2933 } operations[2] = { 2934 { IO_CMD_PREAD, "read", "disk-read-error" }, 2935 { IO_CMD_PWRITE, "write", "disk-write-error" } 2936 }; 2937 2938 struct iocb cb; 2939 memset(&cb, 0, sizeof(cb)); 2940 2941 cb.aio_fildes = fd; 2942 cb.aio_lio_opcode = operations[op].opcode; 2943 cb.u.c.buf = buf; 2944 cb.u.c.nbytes = size; 2945 cb.u.c.offset = offset; 2946 2947 struct iocb *cbs[] = { &cb }; 2948 if (io_submit(aio_ctx_, 1, cbs) != 1) { 2949 int error = errno; 2950 char buf[256]; 2951 sat_strerror(error, buf, sizeof(buf)); 2952 logprintf(0, "Process Error: Unable to submit async %s " 2953 "on disk %s (thread %d). Error %d, %s\n", 2954 operations[op].op_str, device_name_.c_str(), 2955 thread_num_, error, buf); 2956 return false; 2957 } 2958 2959 struct io_event event; 2960 memset(&event, 0, sizeof(event)); 2961 struct timespec tv; 2962 tv.tv_sec = timeout / 1000000; 2963 tv.tv_nsec = (timeout % 1000000) * 1000; 2964 if (io_getevents(aio_ctx_, 1, 1, &event, &tv) != 1) { 2965 // A ctrl-c from the keyboard will cause io_getevents to fail with an 2966 // EINTR error code. This is not an error and so don't treat it as such, 2967 // but still log it. 2968 int error = errno; 2969 if (error == EINTR) { 2970 logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n", 2971 operations[op].op_str, device_name_.c_str(), 2972 thread_num_); 2973 } else { 2974 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); 2975 errorcount_ += 1; 2976 logprintf(0, "Hardware Error: Timeout doing async %s to sectors " 2977 "starting at %lld on disk %s (thread %d).\n", 2978 operations[op].op_str, offset / kSectorSize, 2979 device_name_.c_str(), thread_num_); 2980 } 2981 2982 // Don't bother checking return codes since io_cancel seems to always fail. 2983 // Since io_cancel is always failing, destroying and recreating an I/O 2984 // context is a workaround for canceling an in-progress I/O operation. 2985 // TODO(amistry): Find out why io_cancel isn't working and make it work. 2986 io_cancel(aio_ctx_, &cb, &event); 2987 io_destroy(aio_ctx_); 2988 aio_ctx_ = 0; 2989 if (io_setup(5, &aio_ctx_)) { 2990 int error = errno; 2991 char buf[256]; 2992 sat_strerror(error, buf, sizeof(buf)); 2993 logprintf(0, "Process Error: Unable to create aio context on disk %s" 2994 " (thread %d) Error %d, %s\n", 2995 device_name_.c_str(), thread_num_, error, buf); 2996 } 2997 2998 return false; 2999 } 3000 3001 // event.res contains the number of bytes written/read or 3002 // error if < 0, I think. 3003 if (event.res != static_cast<uint64>(size)) { 3004 errorcount_++; 3005 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); 3006 3007 int64 result = static_cast<int64>(event.res); 3008 if (result < 0) { 3009 switch (result) { 3010 case -EIO: 3011 logprintf(0, "Hardware Error: Low-level I/O error while doing %s to " 3012 "sectors starting at %lld on disk %s (thread %d).\n", 3013 operations[op].op_str, offset / kSectorSize, 3014 device_name_.c_str(), thread_num_); 3015 break; 3016 default: 3017 logprintf(0, "Hardware Error: Unknown error while doing %s to " 3018 "sectors starting at %lld on disk %s (thread %d).\n", 3019 operations[op].op_str, offset / kSectorSize, 3020 device_name_.c_str(), thread_num_); 3021 } 3022 } else { 3023 logprintf(0, "Hardware Error: Unable to %s to sectors starting at " 3024 "%lld on disk %s (thread %d).\n", 3025 operations[op].op_str, offset / kSectorSize, 3026 device_name_.c_str(), thread_num_); 3027 } 3028 return false; 3029 } 3030 3031 return true; 3032 #else // !HAVE_LIBAIO_H 3033 return false; 3034 #endif 3035 } 3036 3037 // Write a block to disk. 3038 // Return false if the block is not written. 3039 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { 3040 memset(block_buffer_, 0, block->size()); 3041 3042 // Fill block buffer with a pattern 3043 struct page_entry pe; 3044 if (!sat_->GetValid(&pe)) { 3045 // Even though a valid page could not be obatined, it is not an error 3046 // since we can always fill in a pattern directly, albeit slower. 3047 unsigned int *memblock = static_cast<unsigned int *>(block_buffer_); 3048 block->set_pattern(patternlist_->GetRandomPattern()); 3049 3050 logprintf(11, "Log: Warning, using pattern fill fallback in " 3051 "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n", 3052 device_name_.c_str(), thread_num_); 3053 3054 for (unsigned int i = 0; i < block->size()/wordsize_; i++) { 3055 memblock[i] = block->pattern()->pattern(i); 3056 } 3057 } else { 3058 memcpy(block_buffer_, pe.addr, block->size()); 3059 block->set_pattern(pe.pattern); 3060 sat_->PutValid(&pe); 3061 } 3062 3063 logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s" 3064 " (thread %d).\n", 3065 block->size()/kSectorSize, block->address(), 3066 device_name_.c_str(), thread_num_); 3067 3068 int64 start_time = GetTime(); 3069 3070 if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->size(), 3071 block->address() * kSectorSize, write_timeout_)) { 3072 return false; 3073 } 3074 3075 int64 end_time = GetTime(); 3076 logprintf(12, "Log: Writing time: %lld us (thread %d).\n", 3077 end_time - start_time, thread_num_); 3078 if (end_time - start_time > write_threshold_) { 3079 logprintf(5, "Log: Write took %lld us which is longer than threshold " 3080 "%lld us on disk %s (thread %d).\n", 3081 end_time - start_time, write_threshold_, device_name_.c_str(), 3082 thread_num_); 3083 } 3084 3085 return true; 3086 } 3087 3088 // Verify a block on disk. 3089 // Return true if the block was read, also increment errorcount 3090 // if the block had data errors or performance problems. 3091 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { 3092 int64 blocks = block->size() / read_block_size_; 3093 int64 bytes_read = 0; 3094 int64 current_blocks; 3095 int64 current_bytes; 3096 uint64 address = block->address(); 3097 3098 logprintf(20, "Log: Reading sectors starting at %lld on disk %s " 3099 "(thread %d).\n", 3100 address, device_name_.c_str(), thread_num_); 3101 3102 // Read block from disk and time the read. If it takes longer than the 3103 // threshold, complain. 3104 if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) { 3105 logprintf(0, "Process Error: Unable to seek to sector %lld in " 3106 "DiskThread::ValidateSectorsOnDisk on disk %s " 3107 "(thread %d).\n", address, device_name_.c_str(), thread_num_); 3108 return false; 3109 } 3110 int64 start_time = GetTime(); 3111 3112 // Split a large write-sized block into small read-sized blocks and 3113 // read them in groups of randomly-sized multiples of read block size. 3114 // This assures all data written on disk by this particular block 3115 // will be tested using a random reading pattern. 3116 while (blocks != 0) { 3117 // Test all read blocks in a written block. 3118 current_blocks = (random() % blocks) + 1; 3119 current_bytes = current_blocks * read_block_size_; 3120 3121 memset(block_buffer_, 0, current_bytes); 3122 3123 logprintf(20, "Log: Reading %lld sectors starting at sector %lld on " 3124 "disk %s (thread %d)\n", 3125 current_bytes / kSectorSize, 3126 (address * kSectorSize + bytes_read) / kSectorSize, 3127 device_name_.c_str(), thread_num_); 3128 3129 if (!AsyncDiskIO(ASYNC_IO_READ, fd, block_buffer_, current_bytes, 3130 address * kSectorSize + bytes_read, 3131 write_timeout_)) { 3132 return false; 3133 } 3134 3135 int64 end_time = GetTime(); 3136 logprintf(20, "Log: Reading time: %lld us (thread %d).\n", 3137 end_time - start_time, thread_num_); 3138 if (end_time - start_time > read_threshold_) { 3139 logprintf(5, "Log: Read took %lld us which is longer than threshold " 3140 "%lld us on disk %s (thread %d).\n", 3141 end_time - start_time, read_threshold_, 3142 device_name_.c_str(), thread_num_); 3143 } 3144 3145 // In non-destructive mode, don't compare the block to the pattern since 3146 // the block was never written to disk in the first place. 3147 if (!non_destructive_) { 3148 if (CheckRegion(block_buffer_, block->pattern(), current_bytes, 3149 0, bytes_read)) { 3150 os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1); 3151 errorcount_ += 1; 3152 logprintf(0, "Hardware Error: Pattern mismatch in block starting at " 3153 "sector %lld in DiskThread::ValidateSectorsOnDisk on " 3154 "disk %s (thread %d).\n", 3155 address, device_name_.c_str(), thread_num_); 3156 } 3157 } 3158 3159 bytes_read += current_blocks * read_block_size_; 3160 blocks -= current_blocks; 3161 } 3162 3163 return true; 3164 } 3165 3166 // Direct device access thread. 3167 // Return false on software error. 3168 bool DiskThread::Work() { 3169 int fd; 3170 3171 logprintf(9, "Log: Starting disk thread %d, disk %s\n", 3172 thread_num_, device_name_.c_str()); 3173 3174 srandom(time(NULL)); 3175 3176 if (!OpenDevice(&fd)) { 3177 status_ = false; 3178 return false; 3179 } 3180 3181 // Allocate a block buffer aligned to 512 bytes since the kernel requires it 3182 // when using direct IO. 3183 #ifdef HAVE_POSIX_MEMALIGN 3184 int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment, 3185 sat_->page_length()); 3186 #else 3187 block_buffer_ = memalign(kBufferAlignment, sat_->page_length()); 3188 int memalign_result = (block_buffer_ == 0); 3189 #endif 3190 if (memalign_result) { 3191 CloseDevice(fd); 3192 logprintf(0, "Process Error: Unable to allocate memory for buffers " 3193 "for disk %s (thread %d) posix memalign returned %d.\n", 3194 device_name_.c_str(), thread_num_, memalign_result); 3195 status_ = false; 3196 return false; 3197 } 3198 3199 #ifdef HAVE_LIBAIO_H 3200 if (io_setup(5, &aio_ctx_)) { 3201 CloseDevice(fd); 3202 logprintf(0, "Process Error: Unable to create aio context for disk %s" 3203 " (thread %d).\n", 3204 device_name_.c_str(), thread_num_); 3205 status_ = false; 3206 return false; 3207 } 3208 #endif 3209 3210 bool result = DoWork(fd); 3211 3212 status_ = result; 3213 3214 #ifdef HAVE_LIBAIO_H 3215 io_destroy(aio_ctx_); 3216 #endif 3217 CloseDevice(fd); 3218 3219 logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, " 3220 "%d pages copied\n", 3221 thread_num_, device_name_.c_str(), status_, pages_copied_); 3222 return result; 3223 } 3224 3225 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table) 3226 : DiskThread(block_table) { 3227 update_block_table_ = 0; 3228 } 3229 3230 RandomDiskThread::~RandomDiskThread() { 3231 } 3232 3233 // Workload for random disk thread. 3234 bool RandomDiskThread::DoWork(int fd) { 3235 logprintf(11, "Log: Random phase for disk %s (thread %d).\n", 3236 device_name_.c_str(), thread_num_); 3237 while (IsReadyToRun()) { 3238 BlockData *block = block_table_->GetRandomBlock(); 3239 if (block == NULL) { 3240 logprintf(12, "Log: No block available for device %s (thread %d).\n", 3241 device_name_.c_str(), thread_num_); 3242 } else { 3243 ValidateBlockOnDisk(fd, block); 3244 block_table_->ReleaseBlock(block); 3245 blocks_read_++; 3246 } 3247 } 3248 pages_copied_ = blocks_read_; 3249 return true; 3250 } 3251 3252 MemoryRegionThread::MemoryRegionThread() { 3253 error_injection_ = false; 3254 pages_ = NULL; 3255 } 3256 3257 MemoryRegionThread::~MemoryRegionThread() { 3258 if (pages_ != NULL) 3259 delete pages_; 3260 } 3261 3262 // Set a region of memory or MMIO to be tested. 3263 // Return false if region could not be mapped. 3264 bool MemoryRegionThread::SetRegion(void *region, int64 size) { 3265 int plength = sat_->page_length(); 3266 int npages = size / plength; 3267 if (size % plength) { 3268 logprintf(0, "Process Error: region size is not a multiple of SAT " 3269 "page length\n"); 3270 return false; 3271 } else { 3272 if (pages_ != NULL) 3273 delete pages_; 3274 pages_ = new PageEntryQueue(npages); 3275 char *base_addr = reinterpret_cast<char*>(region); 3276 region_ = base_addr; 3277 for (int i = 0; i < npages; i++) { 3278 struct page_entry pe; 3279 init_pe(&pe); 3280 pe.addr = reinterpret_cast<void*>(base_addr + i * plength); 3281 pe.offset = i * plength; 3282 3283 pages_->Push(&pe); 3284 } 3285 return true; 3286 } 3287 } 3288 3289 // More detailed error printout for hardware errors in memory or MMIO 3290 // regions. 3291 void MemoryRegionThread::ProcessError(struct ErrorRecord *error, 3292 int priority, 3293 const char *message) { 3294 uint32 buffer_offset; 3295 if (phase_ == kPhaseCopy) { 3296 // If the error occurred on the Copy Phase, it means that 3297 // the source data (i.e., the main memory) is wrong. so 3298 // just pass it to the original ProcessError to call a 3299 // bad-dimm error 3300 WorkerThread::ProcessError(error, priority, message); 3301 } else if (phase_ == kPhaseCheck) { 3302 // A error on the Check Phase means that the memory region tested 3303 // has an error. Gathering more information and then reporting 3304 // the error. 3305 // Determine if this is a write or read error. 3306 os_->Flush(error->vaddr); 3307 error->reread = *(error->vaddr); 3308 char *good = reinterpret_cast<char*>(&(error->expected)); 3309 char *bad = reinterpret_cast<char*>(&(error->actual)); 3310 sat_assert(error->expected != error->actual); 3311 unsigned int offset = 0; 3312 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 3313 if (good[offset] != bad[offset]) 3314 break; 3315 } 3316 3317 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 3318 3319 buffer_offset = error->vbyteaddr - region_; 3320 3321 // Find physical address if possible. 3322 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 3323 logprintf(priority, 3324 "%s: miscompare on %s, CRC check at %p(0x%llx), " 3325 "offset %llx: read:0x%016llx, reread:0x%016llx " 3326 "expected:0x%016llx\n", 3327 message, 3328 identifier_.c_str(), 3329 error->vaddr, 3330 error->paddr, 3331 buffer_offset, 3332 error->actual, 3333 error->reread, 3334 error->expected); 3335 } else { 3336 logprintf(0, "Process Error: memory region thread raised an " 3337 "unexpected error."); 3338 } 3339 } 3340 3341 // Workload for testion memory or MMIO regions. 3342 // Return false on software error. 3343 bool MemoryRegionThread::Work() { 3344 struct page_entry source_pe; 3345 struct page_entry memregion_pe; 3346 bool result = true; 3347 int64 loops = 0; 3348 const uint64 error_constant = 0x00ba00000000ba00LL; 3349 3350 // For error injection. 3351 int64 *addr = 0x0; 3352 int offset = 0; 3353 int64 data = 0; 3354 3355 logprintf(9, "Log: Starting Memory Region thread %d\n", thread_num_); 3356 3357 while (IsReadyToRun()) { 3358 // Getting pages from SAT and queue. 3359 phase_ = kPhaseNoPhase; 3360 result = result && sat_->GetValid(&source_pe); 3361 if (!result) { 3362 logprintf(0, "Process Error: memory region thread failed to pop " 3363 "pages from SAT, bailing\n"); 3364 break; 3365 } 3366 3367 result = result && pages_->PopRandom(&memregion_pe); 3368 if (!result) { 3369 logprintf(0, "Process Error: memory region thread failed to pop " 3370 "pages from queue, bailing\n"); 3371 break; 3372 } 3373 3374 // Error injection for CRC copy. 3375 if ((sat_->error_injection() || error_injection_) && loops == 1) { 3376 addr = reinterpret_cast<int64*>(source_pe.addr); 3377 offset = random() % (sat_->page_length() / wordsize_); 3378 data = addr[offset]; 3379 addr[offset] = error_constant; 3380 } 3381 3382 // Copying SAT page into memory region. 3383 phase_ = kPhaseCopy; 3384 CrcCopyPage(&memregion_pe, &source_pe); 3385 memregion_pe.pattern = source_pe.pattern; 3386 3387 // Error injection for CRC Check. 3388 if ((sat_->error_injection() || error_injection_) && loops == 2) { 3389 addr = reinterpret_cast<int64*>(memregion_pe.addr); 3390 offset = random() % (sat_->page_length() / wordsize_); 3391 data = addr[offset]; 3392 addr[offset] = error_constant; 3393 } 3394 3395 // Checking page content in memory region. 3396 phase_ = kPhaseCheck; 3397 CrcCheckPage(&memregion_pe); 3398 3399 phase_ = kPhaseNoPhase; 3400 // Storing pages on their proper queues. 3401 result = result && sat_->PutValid(&source_pe); 3402 if (!result) { 3403 logprintf(0, "Process Error: memory region thread failed to push " 3404 "pages into SAT, bailing\n"); 3405 break; 3406 } 3407 result = result && pages_->Push(&memregion_pe); 3408 if (!result) { 3409 logprintf(0, "Process Error: memory region thread failed to push " 3410 "pages into queue, bailing\n"); 3411 break; 3412 } 3413 3414 if ((sat_->error_injection() || error_injection_) && 3415 loops >= 1 && loops <= 2) { 3416 addr[offset] = data; 3417 } 3418 3419 loops++; 3420 YieldSelf(); 3421 } 3422 3423 pages_copied_ = loops; 3424 status_ = result; 3425 logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d " 3426 "pages checked\n", thread_num_, status_, pages_copied_); 3427 return result; 3428 } 3429 3430 // The list of MSRs to read from each cpu. 3431 const CpuFreqThread::CpuRegisterType CpuFreqThread::kCpuRegisters[] = { 3432 { kMsrTscAddr, "TSC" }, 3433 { kMsrAperfAddr, "APERF" }, 3434 { kMsrMperfAddr, "MPERF" }, 3435 }; 3436 3437 CpuFreqThread::CpuFreqThread(int num_cpus, int freq_threshold, int round) 3438 : num_cpus_(num_cpus), 3439 freq_threshold_(freq_threshold), 3440 round_(round) { 3441 sat_assert(round >= 0); 3442 if (round == 0) { 3443 // If rounding is off, force rounding to the nearest MHz. 3444 round_ = 1; 3445 round_value_ = 0.5; 3446 } else { 3447 round_value_ = round/2.0; 3448 } 3449 } 3450 3451 CpuFreqThread::~CpuFreqThread() { 3452 } 3453 3454 // Compute the difference between the currently read MSR values and the 3455 // previously read values and store the results in delta. If any of the 3456 // values did not increase, or the TSC value is too small, returns false. 3457 // Otherwise, returns true. 3458 bool CpuFreqThread::ComputeDelta(CpuDataType *current, CpuDataType *previous, 3459 CpuDataType *delta) { 3460 // Loop through the msrs. 3461 for (int msr = 0; msr < kMsrLast; msr++) { 3462 if (previous->msrs[msr] > current->msrs[msr]) { 3463 logprintf(0, "Log: Register %s went backwards 0x%llx to 0x%llx " 3464 "skipping interval\n", kCpuRegisters[msr], previous->msrs[msr], 3465 current->msrs[msr]); 3466 return false; 3467 } else { 3468 delta->msrs[msr] = current->msrs[msr] - previous->msrs[msr]; 3469 } 3470 } 3471 3472 // Check for TSC < 1 Mcycles over interval. 3473 if (delta->msrs[kMsrTsc] < (1000 * 1000)) { 3474 logprintf(0, "Log: Insanely slow TSC rate, TSC stops in idle?\n"); 3475 return false; 3476 } 3477 timersub(¤t->tv, &previous->tv, &delta->tv); 3478 3479 return true; 3480 } 3481 3482 // Compute the change in values of the MSRs between current and previous, 3483 // set the frequency in MHz of the cpu. If there is an error computing 3484 // the delta, return false. Othewise, return true. 3485 bool CpuFreqThread::ComputeFrequency(CpuDataType *current, 3486 CpuDataType *previous, int *freq) { 3487 CpuDataType delta; 3488 if (!ComputeDelta(current, previous, &delta)) { 3489 return false; 3490 } 3491 3492 double interval = delta.tv.tv_sec + delta.tv.tv_usec / 1000000.0; 3493 double frequency = 1.0 * delta.msrs[kMsrTsc] / 1000000 3494 * delta.msrs[kMsrAperf] / delta.msrs[kMsrMperf] / interval; 3495 3496 // Use the rounding value to round up properly. 3497 int computed = static_cast<int>(frequency + round_value_); 3498 *freq = computed - (computed % round_); 3499 return true; 3500 } 3501 3502 // This is the task function that the thread executes. 3503 bool CpuFreqThread::Work() { 3504 cpu_set_t cpuset; 3505 if (!AvailableCpus(&cpuset)) { 3506 logprintf(0, "Process Error: Cannot get information about the cpus.\n"); 3507 return false; 3508 } 3509 3510 // Start off indicating the test is passing. 3511 status_ = true; 3512 3513 int curr = 0; 3514 int prev = 1; 3515 uint32 num_intervals = 0; 3516 bool paused = false; 3517 bool valid; 3518 bool pass = true; 3519 3520 vector<CpuDataType> data[2]; 3521 data[0].resize(num_cpus_); 3522 data[1].resize(num_cpus_); 3523 while (IsReadyToRun(&paused)) { 3524 if (paused) { 3525 // Reset the intervals and restart logic after the pause. 3526 num_intervals = 0; 3527 } 3528 if (num_intervals == 0) { 3529 // If this is the first interval, then always wait a bit before 3530 // starting to collect data. 3531 sat_sleep(kStartupDelay); 3532 } 3533 3534 // Get the per cpu counters. 3535 valid = true; 3536 for (int cpu = 0; cpu < num_cpus_; cpu++) { 3537 if (CPU_ISSET(cpu, &cpuset)) { 3538 if (!GetMsrs(cpu, &data[curr][cpu])) { 3539 logprintf(0, "Failed to get msrs on cpu %d.\n", cpu); 3540 valid = false; 3541 break; 3542 } 3543 } 3544 } 3545 if (!valid) { 3546 // Reset the number of collected intervals since something bad happened. 3547 num_intervals = 0; 3548 continue; 3549 } 3550 3551 num_intervals++; 3552 3553 // Only compute a delta when we have at least two intervals worth of data. 3554 if (num_intervals > 2) { 3555 for (int cpu = 0; cpu < num_cpus_; cpu++) { 3556 if (CPU_ISSET(cpu, &cpuset)) { 3557 int freq; 3558 if (!ComputeFrequency(&data[curr][cpu], &data[prev][cpu], 3559 &freq)) { 3560 // Reset the number of collected intervals since an unknown 3561 // error occurred. 3562 logprintf(0, "Log: Cannot get frequency of cpu %d.\n", cpu); 3563 num_intervals = 0; 3564 break; 3565 } 3566 logprintf(15, "Cpu %d Freq %d\n", cpu, freq); 3567 if (freq < freq_threshold_) { 3568 errorcount_++; 3569 pass = false; 3570 logprintf(0, "Log: Cpu %d frequency is too low, frequency %d MHz " 3571 "threshold %d MHz.\n", cpu, freq, freq_threshold_); 3572 } 3573 } 3574 } 3575 } 3576 3577 sat_sleep(kIntervalPause); 3578 3579 // Swap the values in curr and prev (these values flip between 0 and 1). 3580 curr ^= 1; 3581 prev ^= 1; 3582 } 3583 3584 return pass; 3585 } 3586 3587 3588 // Get the MSR values for this particular cpu and save them in data. If 3589 // any error is encountered, returns false. Otherwise, returns true. 3590 bool CpuFreqThread::GetMsrs(int cpu, CpuDataType *data) { 3591 for (int msr = 0; msr < kMsrLast; msr++) { 3592 if (!os_->ReadMSR(cpu, kCpuRegisters[msr].msr, &data->msrs[msr])) { 3593 return false; 3594 } 3595 } 3596 // Save the time at which we acquired these values. 3597 gettimeofday(&data->tv, NULL); 3598 3599 return true; 3600 } 3601 3602 // Returns true if this test can run on the current machine. Otherwise, 3603 // returns false. 3604 bool CpuFreqThread::CanRun() { 3605 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 3606 unsigned int eax, ebx, ecx, edx; 3607 3608 // Check that the TSC feature is supported. 3609 // This check is valid for both Intel and AMD. 3610 eax = 1; 3611 cpuid(&eax, &ebx, &ecx, &edx); 3612 if (!(edx & (1 << 5))) { 3613 logprintf(0, "Process Error: No TSC support.\n"); 3614 return false; 3615 } 3616 3617 // Check the highest extended function level supported. 3618 // This check is valid for both Intel and AMD. 3619 eax = 0x80000000; 3620 cpuid(&eax, &ebx, &ecx, &edx); 3621 if (eax < 0x80000007) { 3622 logprintf(0, "Process Error: No invariant TSC support.\n"); 3623 return false; 3624 } 3625 3626 // Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 3627 // This check is valid for both Intel and AMD. 3628 eax = 0x80000007; 3629 cpuid(&eax, &ebx, &ecx, &edx); 3630 if ((edx & (1 << 8)) == 0) { 3631 logprintf(0, "Process Error: No non-stop TSC support.\n"); 3632 return false; 3633 } 3634 3635 // APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 3636 // This check is valid for both Intel and AMD. 3637 eax = 0x6; 3638 cpuid(&eax, &ebx, &ecx, &edx); 3639 if ((ecx & 1) == 0) { 3640 logprintf(0, "Process Error: No APERF MSR support.\n"); 3641 return false; 3642 } 3643 return true; 3644 #else 3645 logprintf(0, "Process Error: " 3646 "cpu_freq_test is only supported on X86 processors.\n"); 3647 return false; 3648 #endif 3649 } 3650