1 // Copyright 2006 Google Inc. All Rights Reserved. 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 7 // http://www.apache.org/licenses/LICENSE-2.0 8 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // worker.cc : individual tasks that can be run in combination to 16 // stress the system 17 18 #include <errno.h> 19 #include <pthread.h> 20 #include <sched.h> 21 #include <signal.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <stdint.h> 25 #include <string.h> 26 #include <time.h> 27 #include <unistd.h> 28 29 #include <sys/select.h> 30 #include <sys/stat.h> 31 #include <sys/types.h> 32 #include <sys/times.h> 33 34 // These are necessary, but on by default 35 // #define __USE_GNU 36 // #define __USE_LARGEFILE64 37 #include <fcntl.h> 38 #include <sys/socket.h> 39 #include <netdb.h> 40 #include <arpa/inet.h> 41 #include <linux/unistd.h> // for gettid 42 43 // For size of block device 44 #include <sys/ioctl.h> 45 #include <linux/fs.h> 46 // For asynchronous I/O 47 #ifdef HAVE_LIBAIO_H 48 #include <libaio.h> 49 #endif 50 51 #include <sys/syscall.h> 52 53 #include <set> 54 #include <string> 55 56 // This file must work with autoconf on its public version, 57 // so these includes are correct. 58 #include "error_diag.h" // NOLINT 59 #include "os.h" // NOLINT 60 #include "pattern.h" // NOLINT 61 #include "queue.h" // NOLINT 62 #include "sat.h" // NOLINT 63 #include "sattypes.h" // NOLINT 64 #include "worker.h" // NOLINT 65 66 // Syscalls 67 // Why ubuntu, do you hate gettid so bad? 68 #if !defined(__NR_gettid) 69 #define __NR_gettid 224 70 #endif 71 72 #define gettid() syscall(__NR_gettid) 73 #if !defined(CPU_SETSIZE) 74 _syscall3(int, sched_getaffinity, pid_t, pid, 75 unsigned int, len, cpu_set_t*, mask) 76 _syscall3(int, sched_setaffinity, pid_t, pid, 77 unsigned int, len, cpu_set_t*, mask) 78 #endif 79 80 namespace { 81 // Get HW core ID from cpuid instruction. 82 inline int apicid(void) { 83 int cpu; 84 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686) 85 __asm __volatile("cpuid" : "=b" (cpu) : "a" (1) : "cx", "dx"); 86 #elif defined(STRESSAPPTEST_CPU_ARMV7A) 87 #warning "Unsupported CPU type ARMV7A: unable to determine core ID." 88 cpu = 0; 89 #else 90 #warning "Unsupported CPU type: unable to determine core ID." 91 cpu = 0; 92 #endif 93 return (cpu >> 24); 94 } 95 96 // Work around the sad fact that there are two (gnu, xsi) incompatible 97 // versions of strerror_r floating around google. Awesome. 98 bool sat_strerror(int err, char *buf, int len) { 99 buf[0] = 0; 100 char *errmsg = reinterpret_cast<char*>(strerror_r(err, buf, len)); 101 int retval = reinterpret_cast<int64>(errmsg); 102 if (retval == 0) 103 return true; 104 if (retval == -1) 105 return false; 106 if (errmsg != buf) { 107 strncpy(buf, errmsg, len); 108 buf[len - 1] = 0; 109 } 110 return true; 111 } 112 113 114 inline uint64 addr_to_tag(void *address) { 115 return reinterpret_cast<uint64>(address); 116 } 117 } 118 119 #if !defined(O_DIRECT) 120 // Sometimes this isn't available. 121 // Disregard if it's not defined. 122 #define O_DIRECT 0 123 #endif 124 125 // A struct to hold captured errors, for later reporting. 126 struct ErrorRecord { 127 uint64 actual; // This is the actual value read. 128 uint64 reread; // This is the actual value, reread. 129 uint64 expected; // This is what it should have been. 130 uint64 *vaddr; // This is where it was (or wasn't). 131 char *vbyteaddr; // This is byte specific where the data was (or wasn't). 132 uint64 paddr; // This is the bus address, if available. 133 uint64 *tagvaddr; // This holds the tag value if this data was tagged. 134 uint64 tagpaddr; // This holds the physical address corresponding to the tag. 135 }; 136 137 // This is a helper function to create new threads with pthreads. 138 static void *ThreadSpawnerGeneric(void *ptr) { 139 WorkerThread *worker = static_cast<WorkerThread*>(ptr); 140 worker->StartRoutine(); 141 return NULL; 142 } 143 144 void WorkerStatus::Initialize() { 145 sat_assert(0 == pthread_mutex_init(&num_workers_mutex_, NULL)); 146 sat_assert(0 == pthread_rwlock_init(&status_rwlock_, NULL)); 147 #ifdef _POSIX_BARRIERS 148 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, 149 num_workers_ + 1)); 150 #endif 151 } 152 153 void WorkerStatus::Destroy() { 154 sat_assert(0 == pthread_mutex_destroy(&num_workers_mutex_)); 155 sat_assert(0 == pthread_rwlock_destroy(&status_rwlock_)); 156 #ifdef _POSIX_BARRIERS 157 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); 158 #endif 159 } 160 161 void WorkerStatus::PauseWorkers() { 162 if (SetStatus(PAUSE) != PAUSE) 163 WaitOnPauseBarrier(); 164 } 165 166 void WorkerStatus::ResumeWorkers() { 167 if (SetStatus(RUN) == PAUSE) 168 WaitOnPauseBarrier(); 169 } 170 171 void WorkerStatus::StopWorkers() { 172 if (SetStatus(STOP) == PAUSE) 173 WaitOnPauseBarrier(); 174 } 175 176 bool WorkerStatus::ContinueRunning() { 177 // This loop is an optimization. We use it to immediately re-check the status 178 // after resuming from a pause, instead of returning and waiting for the next 179 // call to this function. 180 for (;;) { 181 switch (GetStatus()) { 182 case RUN: 183 return true; 184 case PAUSE: 185 // Wait for the other workers to call this function so that 186 // PauseWorkers() can return. 187 WaitOnPauseBarrier(); 188 // Wait for ResumeWorkers() to be called. 189 WaitOnPauseBarrier(); 190 break; 191 case STOP: 192 return false; 193 } 194 } 195 } 196 197 bool WorkerStatus::ContinueRunningNoPause() { 198 return (GetStatus() != STOP); 199 } 200 201 void WorkerStatus::RemoveSelf() { 202 // Acquire a read lock on status_rwlock_ while (status_ != PAUSE). 203 for (;;) { 204 AcquireStatusReadLock(); 205 if (status_ != PAUSE) 206 break; 207 // We need to obey PauseWorkers() just like ContinueRunning() would, so that 208 // the other threads won't wait on pause_barrier_ forever. 209 ReleaseStatusLock(); 210 // Wait for the other workers to call this function so that PauseWorkers() 211 // can return. 212 WaitOnPauseBarrier(); 213 // Wait for ResumeWorkers() to be called. 214 WaitOnPauseBarrier(); 215 } 216 217 // This lock would be unnecessary if we held a write lock instead of a read 218 // lock on status_rwlock_, but that would also force all threads calling 219 // ContinueRunning() to wait on this one. Using a separate lock avoids that. 220 AcquireNumWorkersLock(); 221 // Decrement num_workers_ and reinitialize pause_barrier_, which we know isn't 222 // in use because (status != PAUSE). 223 #ifdef _POSIX_BARRIERS 224 sat_assert(0 == pthread_barrier_destroy(&pause_barrier_)); 225 sat_assert(0 == pthread_barrier_init(&pause_barrier_, NULL, num_workers_)); 226 #endif 227 --num_workers_; 228 ReleaseNumWorkersLock(); 229 230 // Release status_rwlock_. 231 ReleaseStatusLock(); 232 } 233 234 235 // Parent thread class. 236 WorkerThread::WorkerThread() { 237 status_ = false; 238 pages_copied_ = 0; 239 errorcount_ = 0; 240 runduration_usec_ = 1; 241 priority_ = Normal; 242 worker_status_ = NULL; 243 thread_spawner_ = &ThreadSpawnerGeneric; 244 tag_mode_ = false; 245 } 246 247 WorkerThread::~WorkerThread() {} 248 249 // Constructors. Just init some default values. 250 FillThread::FillThread() { 251 num_pages_to_fill_ = 0; 252 } 253 254 // Initialize file name to empty. 255 FileThread::FileThread() { 256 filename_ = ""; 257 devicename_ = ""; 258 pass_ = 0; 259 page_io_ = true; 260 crc_page_ = -1; 261 local_page_ = NULL; 262 } 263 264 // If file thread used bounce buffer in memory, account for the extra 265 // copy for memory bandwidth calculation. 266 float FileThread::GetMemoryCopiedData() { 267 if (!os_->normal_mem()) 268 return GetCopiedData(); 269 else 270 return 0; 271 } 272 273 // Initialize target hostname to be invalid. 274 NetworkThread::NetworkThread() { 275 snprintf(ipaddr_, sizeof(ipaddr_), "Unknown"); 276 sock_ = 0; 277 } 278 279 // Initialize? 280 NetworkSlaveThread::NetworkSlaveThread() { 281 } 282 283 // Initialize? 284 NetworkListenThread::NetworkListenThread() { 285 } 286 287 // Init member variables. 288 void WorkerThread::InitThread(int thread_num_init, 289 class Sat *sat_init, 290 class OsLayer *os_init, 291 class PatternList *patternlist_init, 292 WorkerStatus *worker_status) { 293 sat_assert(worker_status); 294 worker_status->AddWorkers(1); 295 296 thread_num_ = thread_num_init; 297 sat_ = sat_init; 298 os_ = os_init; 299 patternlist_ = patternlist_init; 300 worker_status_ = worker_status; 301 302 AvailableCpus(&cpu_mask_); 303 tag_ = 0xffffffff; 304 305 tag_mode_ = sat_->tag_mode(); 306 } 307 308 309 // Use pthreads to prioritize a system thread. 310 bool WorkerThread::InitPriority() { 311 // This doesn't affect performance that much, and may not be too safe. 312 313 bool ret = BindToCpus(&cpu_mask_); 314 if (!ret) 315 logprintf(11, "Log: Bind to %s failed.\n", 316 cpuset_format(&cpu_mask_).c_str()); 317 318 logprintf(11, "Log: Thread %d running on apic ID %d mask %s (%s).\n", 319 thread_num_, apicid(), 320 CurrentCpusFormat().c_str(), 321 cpuset_format(&cpu_mask_).c_str()); 322 #if 0 323 if (priority_ == High) { 324 sched_param param; 325 param.sched_priority = 1; 326 // Set the priority; others are unchanged. 327 logprintf(0, "Log: Changing priority to SCHED_FIFO %d\n", 328 param.sched_priority); 329 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 330 char buf[256]; 331 sat_strerror(errno, buf, sizeof(buf)); 332 logprintf(0, "Process Error: sched_setscheduler " 333 "failed - error %d %s\n", 334 errno, buf); 335 } 336 } 337 #endif 338 return true; 339 } 340 341 // Use pthreads to create a system thread. 342 int WorkerThread::SpawnThread() { 343 // Create the new thread. 344 int result = pthread_create(&thread_, NULL, thread_spawner_, this); 345 if (result) { 346 char buf[256]; 347 sat_strerror(result, buf, sizeof(buf)); 348 logprintf(0, "Process Error: pthread_create " 349 "failed - error %d %s\n", result, 350 buf); 351 status_ = false; 352 return false; 353 } 354 355 // 0 is pthreads success. 356 return true; 357 } 358 359 // Kill the worker thread with SIGINT. 360 bool WorkerThread::KillThread() { 361 return (pthread_kill(thread_, SIGINT) == 0); 362 } 363 364 // Block until thread has exited. 365 bool WorkerThread::JoinThread() { 366 int result = pthread_join(thread_, NULL); 367 368 if (result) { 369 logprintf(0, "Process Error: pthread_join failed - error %d\n", result); 370 status_ = false; 371 } 372 373 // 0 is pthreads success. 374 return (!result); 375 } 376 377 378 void WorkerThread::StartRoutine() { 379 InitPriority(); 380 StartThreadTimer(); 381 Work(); 382 StopThreadTimer(); 383 worker_status_->RemoveSelf(); 384 } 385 386 387 // Thread work loop. Execute until marked finished. 388 bool WorkerThread::Work() { 389 do { 390 logprintf(9, "Log: ...\n"); 391 // Sleep for 1 second. 392 sat_sleep(1); 393 } while (IsReadyToRun()); 394 395 return false; 396 } 397 398 399 // Returns CPU mask of CPUs available to this process, 400 // Conceptually, each bit represents a logical CPU, ie: 401 // mask = 3 (11b): cpu0, 1 402 // mask = 13 (1101b): cpu0, 2, 3 403 bool WorkerThread::AvailableCpus(cpu_set_t *cpuset) { 404 CPU_ZERO(cpuset); 405 #ifdef HAVE_SCHED_GETAFFINITY 406 return sched_getaffinity(getppid(), sizeof(*cpuset), cpuset) == 0; 407 #else 408 return 0; 409 #endif 410 } 411 412 413 // Returns CPU mask of CPUs this thread is bound to, 414 // Conceptually, each bit represents a logical CPU, ie: 415 // mask = 3 (11b): cpu0, 1 416 // mask = 13 (1101b): cpu0, 2, 3 417 bool WorkerThread::CurrentCpus(cpu_set_t *cpuset) { 418 CPU_ZERO(cpuset); 419 #ifdef HAVE_SCHED_GETAFFINITY 420 return sched_getaffinity(0, sizeof(*cpuset), cpuset) == 0; 421 #else 422 return 0; 423 #endif 424 } 425 426 427 // Bind worker thread to specified CPU(s) 428 // Args: 429 // thread_mask: cpu_set_t representing CPUs, ie 430 // mask = 1 (01b): cpu0 431 // mask = 3 (11b): cpu0, 1 432 // mask = 13 (1101b): cpu0, 2, 3 433 // 434 // Returns true on success, false otherwise. 435 bool WorkerThread::BindToCpus(const cpu_set_t *thread_mask) { 436 cpu_set_t process_mask; 437 AvailableCpus(&process_mask); 438 if (cpuset_isequal(thread_mask, &process_mask)) 439 return true; 440 441 logprintf(11, "Log: available CPU mask - %s\n", 442 cpuset_format(&process_mask).c_str()); 443 if (!cpuset_issubset(thread_mask, &process_mask)) { 444 // Invalid cpu_mask, ie cpu not allocated to this process or doesn't exist. 445 logprintf(0, "Log: requested CPUs %s not a subset of available %s\n", 446 cpuset_format(thread_mask).c_str(), 447 cpuset_format(&process_mask).c_str()); 448 return false; 449 } 450 #ifdef HAVE_SCHED_GETAFFINITY 451 return (sched_setaffinity(gettid(), sizeof(*thread_mask), thread_mask) == 0); 452 #else 453 return 0; 454 #endif 455 } 456 457 458 // A worker thread can yield itself to give up CPU until it's scheduled again. 459 // Returns true on success, false on error. 460 bool WorkerThread::YieldSelf() { 461 return (sched_yield() == 0); 462 } 463 464 465 // Fill this page with its pattern. 466 bool WorkerThread::FillPage(struct page_entry *pe) { 467 // Error check arguments. 468 if (pe == 0) { 469 logprintf(0, "Process Error: Fill Page entry null\n"); 470 return 0; 471 } 472 473 // Mask is the bitmask of indexes used by the pattern. 474 // It is the pattern size -1. Size is always a power of 2. 475 uint64 *memwords = static_cast<uint64*>(pe->addr); 476 int length = sat_->page_length(); 477 478 if (tag_mode_) { 479 // Select tag or data as appropriate. 480 for (int i = 0; i < length / wordsize_; i++) { 481 datacast_t data; 482 483 if ((i & 0x7) == 0) { 484 data.l64 = addr_to_tag(&memwords[i]); 485 } else { 486 data.l32.l = pe->pattern->pattern(i << 1); 487 data.l32.h = pe->pattern->pattern((i << 1) + 1); 488 } 489 memwords[i] = data.l64; 490 } 491 } else { 492 // Just fill in untagged data directly. 493 for (int i = 0; i < length / wordsize_; i++) { 494 datacast_t data; 495 496 data.l32.l = pe->pattern->pattern(i << 1); 497 data.l32.h = pe->pattern->pattern((i << 1) + 1); 498 memwords[i] = data.l64; 499 } 500 } 501 502 return 1; 503 } 504 505 506 // Tell the thread how many pages to fill. 507 void FillThread::SetFillPages(int64 num_pages_to_fill_init) { 508 num_pages_to_fill_ = num_pages_to_fill_init; 509 } 510 511 // Fill this page with a random pattern. 512 bool FillThread::FillPageRandom(struct page_entry *pe) { 513 // Error check arguments. 514 if (pe == 0) { 515 logprintf(0, "Process Error: Fill Page entry null\n"); 516 return 0; 517 } 518 if ((patternlist_ == 0) || (patternlist_->Size() == 0)) { 519 logprintf(0, "Process Error: No data patterns available\n"); 520 return 0; 521 } 522 523 // Choose a random pattern for this block. 524 pe->pattern = patternlist_->GetRandomPattern(); 525 if (pe->pattern == 0) { 526 logprintf(0, "Process Error: Null data pattern\n"); 527 return 0; 528 } 529 530 // Actually fill the page. 531 return FillPage(pe); 532 } 533 534 535 // Memory fill work loop. Execute until alloted pages filled. 536 bool FillThread::Work() { 537 bool result = true; 538 539 logprintf(9, "Log: Starting fill thread %d\n", thread_num_); 540 541 // We want to fill num_pages_to_fill pages, and 542 // stop when we've filled that many. 543 // We also want to capture early break 544 struct page_entry pe; 545 int64 loops = 0; 546 while (IsReadyToRun() && (loops < num_pages_to_fill_)) { 547 result = result && sat_->GetEmpty(&pe); 548 if (!result) { 549 logprintf(0, "Process Error: fill_thread failed to pop pages, " 550 "bailing\n"); 551 break; 552 } 553 554 // Fill the page with pattern 555 result = result && FillPageRandom(&pe); 556 if (!result) break; 557 558 // Put the page back on the queue. 559 result = result && sat_->PutValid(&pe); 560 if (!result) { 561 logprintf(0, "Process Error: fill_thread failed to push pages, " 562 "bailing\n"); 563 break; 564 } 565 loops++; 566 } 567 568 // Fill in thread status. 569 pages_copied_ = loops; 570 status_ = result; 571 logprintf(9, "Log: Completed %d: Fill thread. Status %d, %d pages filled\n", 572 thread_num_, status_, pages_copied_); 573 return result; 574 } 575 576 577 // Print error information about a data miscompare. 578 void WorkerThread::ProcessError(struct ErrorRecord *error, 579 int priority, 580 const char *message) { 581 char dimm_string[256] = ""; 582 583 int apic_id = apicid(); 584 585 // Determine if this is a write or read error. 586 os_->Flush(error->vaddr); 587 error->reread = *(error->vaddr); 588 589 char *good = reinterpret_cast<char*>(&(error->expected)); 590 char *bad = reinterpret_cast<char*>(&(error->actual)); 591 592 sat_assert(error->expected != error->actual); 593 unsigned int offset = 0; 594 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 595 if (good[offset] != bad[offset]) 596 break; 597 } 598 599 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 600 601 // Find physical address if possible. 602 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 603 604 // Pretty print DIMM mapping if available. 605 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 606 607 // Report parseable error. 608 if (priority < 5) { 609 // Run miscompare error through diagnoser for logging and reporting. 610 os_->error_diagnoser_->AddMiscompareError(dimm_string, 611 reinterpret_cast<uint64> 612 (error->vaddr), 1); 613 614 logprintf(priority, 615 "%s: miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " 616 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", 617 message, 618 apic_id, 619 CurrentCpusFormat().c_str(), 620 error->vaddr, 621 error->paddr, 622 dimm_string, 623 error->actual, 624 error->reread, 625 error->expected); 626 } 627 628 629 // Overwrite incorrect data with correct data to prevent 630 // future miscompares when this data is reused. 631 *(error->vaddr) = error->expected; 632 os_->Flush(error->vaddr); 633 } 634 635 636 637 // Print error information about a data miscompare. 638 void FileThread::ProcessError(struct ErrorRecord *error, 639 int priority, 640 const char *message) { 641 char dimm_string[256] = ""; 642 643 // Determine if this is a write or read error. 644 os_->Flush(error->vaddr); 645 error->reread = *(error->vaddr); 646 647 char *good = reinterpret_cast<char*>(&(error->expected)); 648 char *bad = reinterpret_cast<char*>(&(error->actual)); 649 650 sat_assert(error->expected != error->actual); 651 unsigned int offset = 0; 652 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 653 if (good[offset] != bad[offset]) 654 break; 655 } 656 657 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 658 659 // Find physical address if possible. 660 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 661 662 // Pretty print DIMM mapping if available. 663 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 664 665 // If crc_page_ is valid, ie checking content read back from file, 666 // track src/dst memory addresses. Otherwise catagorize as general 667 // mememory miscompare for CRC checking everywhere else. 668 if (crc_page_ != -1) { 669 int miscompare_byteoffset = static_cast<char*>(error->vbyteaddr) - 670 static_cast<char*>(page_recs_[crc_page_].dst); 671 os_->error_diagnoser_->AddHDDMiscompareError(devicename_, 672 crc_page_, 673 miscompare_byteoffset, 674 page_recs_[crc_page_].src, 675 page_recs_[crc_page_].dst); 676 } else { 677 os_->error_diagnoser_->AddMiscompareError(dimm_string, 678 reinterpret_cast<uint64> 679 (error->vaddr), 1); 680 } 681 682 logprintf(priority, 683 "%s: miscompare on %s at %p(0x%llx:%s): read:0x%016llx, " 684 "reread:0x%016llx expected:0x%016llx\n", 685 message, 686 devicename_.c_str(), 687 error->vaddr, 688 error->paddr, 689 dimm_string, 690 error->actual, 691 error->reread, 692 error->expected); 693 694 // Overwrite incorrect data with correct data to prevent 695 // future miscompares when this data is reused. 696 *(error->vaddr) = error->expected; 697 os_->Flush(error->vaddr); 698 } 699 700 701 // Do a word by word result check of a region. 702 // Print errors on mismatches. 703 int WorkerThread::CheckRegion(void *addr, 704 class Pattern *pattern, 705 int64 length, 706 int offset, 707 int64 pattern_offset) { 708 uint64 *memblock = static_cast<uint64*>(addr); 709 const int kErrorLimit = 128; 710 int errors = 0; 711 int overflowerrors = 0; // Count of overflowed errors. 712 bool page_error = false; 713 string errormessage("Hardware Error"); 714 struct ErrorRecord 715 recorded[kErrorLimit]; // Queued errors for later printing. 716 717 // For each word in the data region. 718 for (int i = 0; i < length / wordsize_; i++) { 719 uint64 actual = memblock[i]; 720 uint64 expected; 721 722 // Determine the value that should be there. 723 datacast_t data; 724 int index = 2 * i + pattern_offset; 725 data.l32.l = pattern->pattern(index); 726 data.l32.h = pattern->pattern(index + 1); 727 expected = data.l64; 728 // Check tags if necessary. 729 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) { 730 expected = addr_to_tag(&memblock[i]); 731 } 732 733 734 // If the value is incorrect, save an error record for later printing. 735 if (actual != expected) { 736 if (errors < kErrorLimit) { 737 recorded[errors].actual = actual; 738 recorded[errors].expected = expected; 739 recorded[errors].vaddr = &memblock[i]; 740 errors++; 741 } else { 742 page_error = true; 743 // If we have overflowed the error queue, just print the errors now. 744 logprintf(10, "Log: Error record overflow, too many miscompares!\n"); 745 errormessage = "Page Error"; 746 break; 747 } 748 } 749 } 750 751 // Find if this is a whole block corruption. 752 if (page_error && !tag_mode_) { 753 int patsize = patternlist_->Size(); 754 for (int pat = 0; pat < patsize; pat++) { 755 class Pattern *altpattern = patternlist_->GetPattern(pat); 756 const int kGood = 0; 757 const int kBad = 1; 758 const int kGoodAgain = 2; 759 const int kNoMatch = 3; 760 int state = kGood; 761 unsigned int badstart = 0; 762 unsigned int badend = 0; 763 764 // Don't match against ourself! 765 if (pattern == altpattern) 766 continue; 767 768 for (int i = 0; i < length / wordsize_; i++) { 769 uint64 actual = memblock[i]; 770 datacast_t expected; 771 datacast_t possible; 772 773 // Determine the value that should be there. 774 int index = 2 * i + pattern_offset; 775 776 expected.l32.l = pattern->pattern(index); 777 expected.l32.h = pattern->pattern(index + 1); 778 779 possible.l32.l = pattern->pattern(index); 780 possible.l32.h = pattern->pattern(index + 1); 781 782 if (state == kGood) { 783 if (actual == expected.l64) { 784 continue; 785 } else if (actual == possible.l64) { 786 badstart = i; 787 badend = i; 788 state = kBad; 789 continue; 790 } else { 791 state = kNoMatch; 792 break; 793 } 794 } else if (state == kBad) { 795 if (actual == possible.l64) { 796 badend = i; 797 continue; 798 } else if (actual == expected.l64) { 799 state = kGoodAgain; 800 continue; 801 } else { 802 state = kNoMatch; 803 break; 804 } 805 } else if (state == kGoodAgain) { 806 if (actual == expected.l64) { 807 continue; 808 } else { 809 state = kNoMatch; 810 break; 811 } 812 } 813 } 814 815 if ((state == kGoodAgain) || (state == kBad)) { 816 unsigned int blockerrors = badend - badstart + 1; 817 errormessage = "Block Error"; 818 ProcessError(&recorded[0], 0, errormessage.c_str()); 819 logprintf(0, "Block Error: (%p) pattern %s instead of %s, " 820 "%d bytes from offset 0x%x to 0x%x\n", 821 &memblock[badstart], 822 altpattern->name(), pattern->name(), 823 blockerrors * wordsize_, 824 offset + badstart * wordsize_, 825 offset + badend * wordsize_); 826 errorcount_ += blockerrors; 827 return blockerrors; 828 } 829 } 830 } 831 832 833 // Process error queue after all errors have been recorded. 834 for (int err = 0; err < errors; err++) { 835 int priority = 5; 836 if (errorcount_ + err < 30) 837 priority = 0; // Bump up the priority for the first few errors. 838 ProcessError(&recorded[err], priority, errormessage.c_str()); 839 } 840 841 if (page_error) { 842 // For each word in the data region. 843 int error_recount = 0; 844 for (int i = 0; i < length / wordsize_; i++) { 845 uint64 actual = memblock[i]; 846 uint64 expected; 847 datacast_t data; 848 // Determine the value that should be there. 849 int index = 2 * i + pattern_offset; 850 851 data.l32.l = pattern->pattern(index); 852 data.l32.h = pattern->pattern(index + 1); 853 expected = data.l64; 854 855 // Check tags if necessary. 856 if (tag_mode_ && ((reinterpret_cast<uint64>(&memblock[i]) & 0x3f) == 0)) { 857 expected = addr_to_tag(&memblock[i]); 858 } 859 860 // If the value is incorrect, save an error record for later printing. 861 if (actual != expected) { 862 if (error_recount < kErrorLimit) { 863 // We already reported these. 864 error_recount++; 865 } else { 866 // If we have overflowed the error queue, print the errors now. 867 struct ErrorRecord er; 868 er.actual = actual; 869 er.expected = expected; 870 er.vaddr = &memblock[i]; 871 872 // Do the error printout. This will take a long time and 873 // likely change the machine state. 874 ProcessError(&er, 12, errormessage.c_str()); 875 overflowerrors++; 876 } 877 } 878 } 879 } 880 881 // Keep track of observed errors. 882 errorcount_ += errors + overflowerrors; 883 return errors + overflowerrors; 884 } 885 886 float WorkerThread::GetCopiedData() { 887 return pages_copied_ * sat_->page_length() / kMegabyte; 888 } 889 890 // Calculate the CRC of a region. 891 // Result check if the CRC mismatches. 892 int WorkerThread::CrcCheckPage(struct page_entry *srcpe) { 893 const int blocksize = 4096; 894 const int blockwords = blocksize / wordsize_; 895 int errors = 0; 896 897 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 898 uint64 *memblock = static_cast<uint64*>(srcpe->addr); 899 int blocks = sat_->page_length() / blocksize; 900 for (int currentblock = 0; currentblock < blocks; currentblock++) { 901 uint64 *memslice = memblock + currentblock * blockwords; 902 903 AdlerChecksum crc; 904 if (tag_mode_) { 905 AdlerAddrCrcC(memslice, blocksize, &crc, srcpe); 906 } else { 907 CalculateAdlerChecksum(memslice, blocksize, &crc); 908 } 909 910 // If the CRC does not match, we'd better look closer. 911 if (!crc.Equals(*expectedcrc)) { 912 logprintf(11, "Log: CrcCheckPage Falling through to slow compare, " 913 "CRC mismatch %s != %s\n", 914 crc.ToHexString().c_str(), 915 expectedcrc->ToHexString().c_str()); 916 int errorcount = CheckRegion(memslice, 917 srcpe->pattern, 918 blocksize, 919 currentblock * blocksize, 0); 920 if (errorcount == 0) { 921 logprintf(0, "Log: CrcCheckPage CRC mismatch %s != %s, " 922 "but no miscompares found.\n", 923 crc.ToHexString().c_str(), 924 expectedcrc->ToHexString().c_str()); 925 } 926 errors += errorcount; 927 } 928 } 929 930 // For odd length transfers, we should never hit this. 931 int leftovers = sat_->page_length() % blocksize; 932 if (leftovers) { 933 uint64 *memslice = memblock + blocks * blockwords; 934 errors += CheckRegion(memslice, 935 srcpe->pattern, 936 leftovers, 937 blocks * blocksize, 0); 938 } 939 return errors; 940 } 941 942 943 // Print error information about a data miscompare. 944 void WorkerThread::ProcessTagError(struct ErrorRecord *error, 945 int priority, 946 const char *message) { 947 char dimm_string[256] = ""; 948 char tag_dimm_string[256] = ""; 949 bool read_error = false; 950 951 int apic_id = apicid(); 952 953 // Determine if this is a write or read error. 954 os_->Flush(error->vaddr); 955 error->reread = *(error->vaddr); 956 957 // Distinguish read and write errors. 958 if (error->actual != error->reread) { 959 read_error = true; 960 } 961 962 sat_assert(error->expected != error->actual); 963 964 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr); 965 966 // Find physical address if possible. 967 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 968 error->tagpaddr = os_->VirtualToPhysical(error->tagvaddr); 969 970 // Pretty print DIMM mapping if available. 971 os_->FindDimm(error->paddr, dimm_string, sizeof(dimm_string)); 972 // Pretty print DIMM mapping if available. 973 os_->FindDimm(error->tagpaddr, tag_dimm_string, sizeof(tag_dimm_string)); 974 975 // Report parseable error. 976 if (priority < 5) { 977 logprintf(priority, 978 "%s: Tag from %p(0x%llx:%s) (%s) " 979 "miscompare on CPU %d(0x%s) at %p(0x%llx:%s): " 980 "read:0x%016llx, reread:0x%016llx expected:0x%016llx\n", 981 message, 982 error->tagvaddr, error->tagpaddr, 983 tag_dimm_string, 984 read_error ? "read error" : "write error", 985 apic_id, 986 CurrentCpusFormat().c_str(), 987 error->vaddr, 988 error->paddr, 989 dimm_string, 990 error->actual, 991 error->reread, 992 error->expected); 993 } 994 995 errorcount_ += 1; 996 997 // Overwrite incorrect data with correct data to prevent 998 // future miscompares when this data is reused. 999 *(error->vaddr) = error->expected; 1000 os_->Flush(error->vaddr); 1001 } 1002 1003 1004 // Print out and log a tag error. 1005 bool WorkerThread::ReportTagError( 1006 uint64 *mem64, 1007 uint64 actual, 1008 uint64 tag) { 1009 struct ErrorRecord er; 1010 er.actual = actual; 1011 1012 er.expected = tag; 1013 er.vaddr = mem64; 1014 1015 // Generate vaddr from tag. 1016 er.tagvaddr = reinterpret_cast<uint64*>(actual); 1017 1018 ProcessTagError(&er, 0, "Hardware Error"); 1019 return true; 1020 } 1021 1022 // C implementation of Adler memory copy, with memory tagging. 1023 bool WorkerThread::AdlerAddrMemcpyC(uint64 *dstmem64, 1024 uint64 *srcmem64, 1025 unsigned int size_in_bytes, 1026 AdlerChecksum *checksum, 1027 struct page_entry *pe) { 1028 // Use this data wrapper to access memory with 64bit read/write. 1029 datacast_t data; 1030 datacast_t dstdata; 1031 unsigned int count = size_in_bytes / sizeof(data); 1032 1033 if (count > ((1U) << 19)) { 1034 // Size is too large, must be strictly less than 512 KB. 1035 return false; 1036 } 1037 1038 uint64 a1 = 1; 1039 uint64 a2 = 1; 1040 uint64 b1 = 0; 1041 uint64 b2 = 0; 1042 1043 class Pattern *pattern = pe->pattern; 1044 1045 unsigned int i = 0; 1046 while (i < count) { 1047 // Process 64 bits at a time. 1048 if ((i & 0x7) == 0) { 1049 data.l64 = srcmem64[i]; 1050 dstdata.l64 = dstmem64[i]; 1051 uint64 src_tag = addr_to_tag(&srcmem64[i]); 1052 uint64 dst_tag = addr_to_tag(&dstmem64[i]); 1053 // Detect if tags have been corrupted. 1054 if (data.l64 != src_tag) 1055 ReportTagError(&srcmem64[i], data.l64, src_tag); 1056 if (dstdata.l64 != dst_tag) 1057 ReportTagError(&dstmem64[i], dstdata.l64, dst_tag); 1058 1059 data.l32.l = pattern->pattern(i << 1); 1060 data.l32.h = pattern->pattern((i << 1) + 1); 1061 a1 = a1 + data.l32.l; 1062 b1 = b1 + a1; 1063 a1 = a1 + data.l32.h; 1064 b1 = b1 + a1; 1065 1066 data.l64 = dst_tag; 1067 dstmem64[i] = data.l64; 1068 1069 } else { 1070 data.l64 = srcmem64[i]; 1071 a1 = a1 + data.l32.l; 1072 b1 = b1 + a1; 1073 a1 = a1 + data.l32.h; 1074 b1 = b1 + a1; 1075 dstmem64[i] = data.l64; 1076 } 1077 i++; 1078 1079 data.l64 = srcmem64[i]; 1080 a2 = a2 + data.l32.l; 1081 b2 = b2 + a2; 1082 a2 = a2 + data.l32.h; 1083 b2 = b2 + a2; 1084 dstmem64[i] = data.l64; 1085 i++; 1086 } 1087 checksum->Set(a1, a2, b1, b2); 1088 return true; 1089 } 1090 1091 // x86_64 SSE2 assembly implementation of Adler memory copy, with address 1092 // tagging added as a second step. This is useful for debugging failures 1093 // that only occur when SSE / nontemporal writes are used. 1094 bool WorkerThread::AdlerAddrMemcpyWarm(uint64 *dstmem64, 1095 uint64 *srcmem64, 1096 unsigned int size_in_bytes, 1097 AdlerChecksum *checksum, 1098 struct page_entry *pe) { 1099 // Do ASM copy, ignore checksum. 1100 AdlerChecksum ignored_checksum; 1101 os_->AdlerMemcpyWarm(dstmem64, srcmem64, size_in_bytes, &ignored_checksum); 1102 1103 // Force cache flush. 1104 int length = size_in_bytes / sizeof(*dstmem64); 1105 for (int i = 0; i < length; i += sizeof(*dstmem64)) { 1106 os_->FastFlush(dstmem64 + i); 1107 os_->FastFlush(srcmem64 + i); 1108 } 1109 // Check results. 1110 AdlerAddrCrcC(srcmem64, size_in_bytes, checksum, pe); 1111 // Patch up address tags. 1112 TagAddrC(dstmem64, size_in_bytes); 1113 return true; 1114 } 1115 1116 // Retag pages.. 1117 bool WorkerThread::TagAddrC(uint64 *memwords, 1118 unsigned int size_in_bytes) { 1119 // Mask is the bitmask of indexes used by the pattern. 1120 // It is the pattern size -1. Size is always a power of 2. 1121 1122 // Select tag or data as appropriate. 1123 int length = size_in_bytes / wordsize_; 1124 for (int i = 0; i < length; i += 8) { 1125 datacast_t data; 1126 data.l64 = addr_to_tag(&memwords[i]); 1127 memwords[i] = data.l64; 1128 } 1129 return true; 1130 } 1131 1132 // C implementation of Adler memory crc. 1133 bool WorkerThread::AdlerAddrCrcC(uint64 *srcmem64, 1134 unsigned int size_in_bytes, 1135 AdlerChecksum *checksum, 1136 struct page_entry *pe) { 1137 // Use this data wrapper to access memory with 64bit read/write. 1138 datacast_t data; 1139 unsigned int count = size_in_bytes / sizeof(data); 1140 1141 if (count > ((1U) << 19)) { 1142 // Size is too large, must be strictly less than 512 KB. 1143 return false; 1144 } 1145 1146 uint64 a1 = 1; 1147 uint64 a2 = 1; 1148 uint64 b1 = 0; 1149 uint64 b2 = 0; 1150 1151 class Pattern *pattern = pe->pattern; 1152 1153 unsigned int i = 0; 1154 while (i < count) { 1155 // Process 64 bits at a time. 1156 if ((i & 0x7) == 0) { 1157 data.l64 = srcmem64[i]; 1158 uint64 src_tag = addr_to_tag(&srcmem64[i]); 1159 // Check that tags match expected. 1160 if (data.l64 != src_tag) 1161 ReportTagError(&srcmem64[i], data.l64, src_tag); 1162 1163 data.l32.l = pattern->pattern(i << 1); 1164 data.l32.h = pattern->pattern((i << 1) + 1); 1165 a1 = a1 + data.l32.l; 1166 b1 = b1 + a1; 1167 a1 = a1 + data.l32.h; 1168 b1 = b1 + a1; 1169 } else { 1170 data.l64 = srcmem64[i]; 1171 a1 = a1 + data.l32.l; 1172 b1 = b1 + a1; 1173 a1 = a1 + data.l32.h; 1174 b1 = b1 + a1; 1175 } 1176 i++; 1177 1178 data.l64 = srcmem64[i]; 1179 a2 = a2 + data.l32.l; 1180 b2 = b2 + a2; 1181 a2 = a2 + data.l32.h; 1182 b2 = b2 + a2; 1183 i++; 1184 } 1185 checksum->Set(a1, a2, b1, b2); 1186 return true; 1187 } 1188 1189 // Copy a block of memory quickly, while keeping a CRC of the data. 1190 // Result check if the CRC mismatches. 1191 int WorkerThread::CrcCopyPage(struct page_entry *dstpe, 1192 struct page_entry *srcpe) { 1193 int errors = 0; 1194 const int blocksize = 4096; 1195 const int blockwords = blocksize / wordsize_; 1196 int blocks = sat_->page_length() / blocksize; 1197 1198 // Base addresses for memory copy 1199 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr); 1200 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr); 1201 // Remember the expected CRC 1202 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 1203 1204 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1205 uint64 *targetmem = targetmembase + currentblock * blockwords; 1206 uint64 *sourcemem = sourcemembase + currentblock * blockwords; 1207 1208 AdlerChecksum crc; 1209 if (tag_mode_) { 1210 AdlerAddrMemcpyC(targetmem, sourcemem, blocksize, &crc, srcpe); 1211 } else { 1212 AdlerMemcpyC(targetmem, sourcemem, blocksize, &crc); 1213 } 1214 1215 // Investigate miscompares. 1216 if (!crc.Equals(*expectedcrc)) { 1217 logprintf(11, "Log: CrcCopyPage Falling through to slow compare, " 1218 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(), 1219 expectedcrc->ToHexString().c_str()); 1220 int errorcount = CheckRegion(sourcemem, 1221 srcpe->pattern, 1222 blocksize, 1223 currentblock * blocksize, 0); 1224 if (errorcount == 0) { 1225 logprintf(0, "Log: CrcCopyPage CRC mismatch %s != %s, " 1226 "but no miscompares found. Retrying with fresh data.\n", 1227 crc.ToHexString().c_str(), 1228 expectedcrc->ToHexString().c_str()); 1229 if (!tag_mode_) { 1230 // Copy the data originally read from this region back again. 1231 // This data should have any corruption read originally while 1232 // calculating the CRC. 1233 memcpy(sourcemem, targetmem, blocksize); 1234 errorcount = CheckRegion(sourcemem, 1235 srcpe->pattern, 1236 blocksize, 1237 currentblock * blocksize, 0); 1238 if (errorcount == 0) { 1239 int apic_id = apicid(); 1240 logprintf(0, "Process Error: CPU %d(0x%s) CrcCopyPage " 1241 "CRC mismatch %s != %s, " 1242 "but no miscompares found on second pass.\n", 1243 apic_id, CurrentCpusFormat().c_str(), 1244 crc.ToHexString().c_str(), 1245 expectedcrc->ToHexString().c_str()); 1246 struct ErrorRecord er; 1247 er.actual = sourcemem[0]; 1248 er.expected = 0x0; 1249 er.vaddr = sourcemem; 1250 ProcessError(&er, 0, "Hardware Error"); 1251 } 1252 } 1253 } 1254 errors += errorcount; 1255 } 1256 } 1257 1258 // For odd length transfers, we should never hit this. 1259 int leftovers = sat_->page_length() % blocksize; 1260 if (leftovers) { 1261 uint64 *targetmem = targetmembase + blocks * blockwords; 1262 uint64 *sourcemem = sourcemembase + blocks * blockwords; 1263 1264 errors += CheckRegion(sourcemem, 1265 srcpe->pattern, 1266 leftovers, 1267 blocks * blocksize, 0); 1268 int leftoverwords = leftovers / wordsize_; 1269 for (int i = 0; i < leftoverwords; i++) { 1270 targetmem[i] = sourcemem[i]; 1271 } 1272 } 1273 1274 // Update pattern reference to reflect new contents. 1275 dstpe->pattern = srcpe->pattern; 1276 1277 // Clean clean clean the errors away. 1278 if (errors) { 1279 // TODO(nsanders): Maybe we should patch rather than fill? Filling may 1280 // cause bad data to be propogated across the page. 1281 FillPage(dstpe); 1282 } 1283 return errors; 1284 } 1285 1286 1287 1288 // Invert a block of memory quickly, traversing downwards. 1289 int InvertThread::InvertPageDown(struct page_entry *srcpe) { 1290 const int blocksize = 4096; 1291 const int blockwords = blocksize / wordsize_; 1292 int blocks = sat_->page_length() / blocksize; 1293 1294 // Base addresses for memory copy 1295 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr); 1296 1297 for (int currentblock = blocks-1; currentblock >= 0; currentblock--) { 1298 unsigned int *sourcemem = sourcemembase + currentblock * blockwords; 1299 for (int i = blockwords - 32; i >= 0; i -= 32) { 1300 for (int index = i + 31; index >= i; --index) { 1301 unsigned int actual = sourcemem[index]; 1302 sourcemem[index] = ~actual; 1303 } 1304 OsLayer::FastFlush(&sourcemem[i]); 1305 } 1306 } 1307 1308 return 0; 1309 } 1310 1311 // Invert a block of memory, traversing upwards. 1312 int InvertThread::InvertPageUp(struct page_entry *srcpe) { 1313 const int blocksize = 4096; 1314 const int blockwords = blocksize / wordsize_; 1315 int blocks = sat_->page_length() / blocksize; 1316 1317 // Base addresses for memory copy 1318 unsigned int *sourcemembase = static_cast<unsigned int *>(srcpe->addr); 1319 1320 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1321 unsigned int *sourcemem = sourcemembase + currentblock * blockwords; 1322 for (int i = 0; i < blockwords; i += 32) { 1323 for (int index = i; index <= i + 31; ++index) { 1324 unsigned int actual = sourcemem[index]; 1325 sourcemem[index] = ~actual; 1326 } 1327 OsLayer::FastFlush(&sourcemem[i]); 1328 } 1329 } 1330 return 0; 1331 } 1332 1333 // Copy a block of memory quickly, while keeping a CRC of the data. 1334 // Result check if the CRC mismatches. Warm the CPU while running 1335 int WorkerThread::CrcWarmCopyPage(struct page_entry *dstpe, 1336 struct page_entry *srcpe) { 1337 int errors = 0; 1338 const int blocksize = 4096; 1339 const int blockwords = blocksize / wordsize_; 1340 int blocks = sat_->page_length() / blocksize; 1341 1342 // Base addresses for memory copy 1343 uint64 *targetmembase = static_cast<uint64*>(dstpe->addr); 1344 uint64 *sourcemembase = static_cast<uint64*>(srcpe->addr); 1345 // Remember the expected CRC 1346 const AdlerChecksum *expectedcrc = srcpe->pattern->crc(); 1347 1348 for (int currentblock = 0; currentblock < blocks; currentblock++) { 1349 uint64 *targetmem = targetmembase + currentblock * blockwords; 1350 uint64 *sourcemem = sourcemembase + currentblock * blockwords; 1351 1352 AdlerChecksum crc; 1353 if (tag_mode_) { 1354 AdlerAddrMemcpyWarm(targetmem, sourcemem, blocksize, &crc, srcpe); 1355 } else { 1356 os_->AdlerMemcpyWarm(targetmem, sourcemem, blocksize, &crc); 1357 } 1358 1359 // Investigate miscompares. 1360 if (!crc.Equals(*expectedcrc)) { 1361 logprintf(11, "Log: CrcWarmCopyPage Falling through to slow compare, " 1362 "CRC mismatch %s != %s\n", crc.ToHexString().c_str(), 1363 expectedcrc->ToHexString().c_str()); 1364 int errorcount = CheckRegion(sourcemem, 1365 srcpe->pattern, 1366 blocksize, 1367 currentblock * blocksize, 0); 1368 if (errorcount == 0) { 1369 logprintf(0, "Log: CrcWarmCopyPage CRC mismatch %s != %s, " 1370 "but no miscompares found. Retrying with fresh data.\n", 1371 crc.ToHexString().c_str(), 1372 expectedcrc->ToHexString().c_str()); 1373 if (!tag_mode_) { 1374 // Copy the data originally read from this region back again. 1375 // This data should have any corruption read originally while 1376 // calculating the CRC. 1377 memcpy(sourcemem, targetmem, blocksize); 1378 errorcount = CheckRegion(sourcemem, 1379 srcpe->pattern, 1380 blocksize, 1381 currentblock * blocksize, 0); 1382 if (errorcount == 0) { 1383 int apic_id = apicid(); 1384 logprintf(0, "Process Error: CPU %d(0x%s) CrciWarmCopyPage " 1385 "CRC mismatch %s != %s, " 1386 "but no miscompares found on second pass.\n", 1387 apic_id, CurrentCpusFormat().c_str(), 1388 crc.ToHexString().c_str(), 1389 expectedcrc->ToHexString().c_str()); 1390 struct ErrorRecord er; 1391 er.actual = sourcemem[0]; 1392 er.expected = 0x0; 1393 er.vaddr = sourcemem; 1394 ProcessError(&er, 0, "Hardware Error"); 1395 } 1396 } 1397 } 1398 errors += errorcount; 1399 } 1400 } 1401 1402 // For odd length transfers, we should never hit this. 1403 int leftovers = sat_->page_length() % blocksize; 1404 if (leftovers) { 1405 uint64 *targetmem = targetmembase + blocks * blockwords; 1406 uint64 *sourcemem = sourcemembase + blocks * blockwords; 1407 1408 errors += CheckRegion(sourcemem, 1409 srcpe->pattern, 1410 leftovers, 1411 blocks * blocksize, 0); 1412 int leftoverwords = leftovers / wordsize_; 1413 for (int i = 0; i < leftoverwords; i++) { 1414 targetmem[i] = sourcemem[i]; 1415 } 1416 } 1417 1418 // Update pattern reference to reflect new contents. 1419 dstpe->pattern = srcpe->pattern; 1420 1421 // Clean clean clean the errors away. 1422 if (errors) { 1423 // TODO(nsanders): Maybe we should patch rather than fill? Filling may 1424 // cause bad data to be propogated across the page. 1425 FillPage(dstpe); 1426 } 1427 return errors; 1428 } 1429 1430 1431 1432 // Memory check work loop. Execute until done, then exhaust pages. 1433 bool CheckThread::Work() { 1434 struct page_entry pe; 1435 bool result = true; 1436 int64 loops = 0; 1437 1438 logprintf(9, "Log: Starting Check thread %d\n", thread_num_); 1439 1440 // We want to check all the pages, and 1441 // stop when there aren't any left. 1442 while (true) { 1443 result = result && sat_->GetValid(&pe); 1444 if (!result) { 1445 if (IsReadyToRunNoPause()) 1446 logprintf(0, "Process Error: check_thread failed to pop pages, " 1447 "bailing\n"); 1448 else 1449 result = true; 1450 break; 1451 } 1452 1453 // Do the result check. 1454 CrcCheckPage(&pe); 1455 1456 // Push pages back on the valid queue if we are still going, 1457 // throw them out otherwise. 1458 if (IsReadyToRunNoPause()) 1459 result = result && sat_->PutValid(&pe); 1460 else 1461 result = result && sat_->PutEmpty(&pe); 1462 if (!result) { 1463 logprintf(0, "Process Error: check_thread failed to push pages, " 1464 "bailing\n"); 1465 break; 1466 } 1467 loops++; 1468 } 1469 1470 pages_copied_ = loops; 1471 status_ = result; 1472 logprintf(9, "Log: Completed %d: Check thread. Status %d, %d pages checked\n", 1473 thread_num_, status_, pages_copied_); 1474 return result; 1475 } 1476 1477 1478 // Memory copy work loop. Execute until marked done. 1479 bool CopyThread::Work() { 1480 struct page_entry src; 1481 struct page_entry dst; 1482 bool result = true; 1483 int64 loops = 0; 1484 1485 logprintf(9, "Log: Starting copy thread %d: cpu %s, mem %x\n", 1486 thread_num_, cpuset_format(&cpu_mask_).c_str(), tag_); 1487 1488 while (IsReadyToRun()) { 1489 // Pop the needed pages. 1490 result = result && sat_->GetValid(&src, tag_); 1491 result = result && sat_->GetEmpty(&dst, tag_); 1492 if (!result) { 1493 logprintf(0, "Process Error: copy_thread failed to pop pages, " 1494 "bailing\n"); 1495 break; 1496 } 1497 1498 // Force errors for unittests. 1499 if (sat_->error_injection()) { 1500 if (loops == 8) { 1501 char *addr = reinterpret_cast<char*>(src.addr); 1502 int offset = random() % sat_->page_length(); 1503 addr[offset] = 0xba; 1504 } 1505 } 1506 1507 // We can use memcpy, or CRC check while we copy. 1508 if (sat_->warm()) { 1509 CrcWarmCopyPage(&dst, &src); 1510 } else if (sat_->strict()) { 1511 CrcCopyPage(&dst, &src); 1512 } else { 1513 memcpy(dst.addr, src.addr, sat_->page_length()); 1514 dst.pattern = src.pattern; 1515 } 1516 1517 result = result && sat_->PutValid(&dst); 1518 result = result && sat_->PutEmpty(&src); 1519 1520 // Copy worker-threads yield themselves at the end of each copy loop, 1521 // to avoid threads from preempting each other in the middle of the inner 1522 // copy-loop. Cooperations between Copy worker-threads results in less 1523 // unnecessary cache thrashing (which happens when context-switching in the 1524 // middle of the inner copy-loop). 1525 YieldSelf(); 1526 1527 if (!result) { 1528 logprintf(0, "Process Error: copy_thread failed to push pages, " 1529 "bailing\n"); 1530 break; 1531 } 1532 loops++; 1533 } 1534 1535 pages_copied_ = loops; 1536 status_ = result; 1537 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n", 1538 thread_num_, status_, pages_copied_); 1539 return result; 1540 } 1541 1542 // Memory invert work loop. Execute until marked done. 1543 bool InvertThread::Work() { 1544 struct page_entry src; 1545 bool result = true; 1546 int64 loops = 0; 1547 1548 logprintf(9, "Log: Starting invert thread %d\n", thread_num_); 1549 1550 while (IsReadyToRun()) { 1551 // Pop the needed pages. 1552 result = result && sat_->GetValid(&src); 1553 if (!result) { 1554 logprintf(0, "Process Error: invert_thread failed to pop pages, " 1555 "bailing\n"); 1556 break; 1557 } 1558 1559 if (sat_->strict()) 1560 CrcCheckPage(&src); 1561 1562 // For the same reason CopyThread yields itself (see YieldSelf comment 1563 // in CopyThread::Work(), InvertThread yields itself after each invert 1564 // operation to improve cooperation between different worker threads 1565 // stressing the memory/cache. 1566 InvertPageUp(&src); 1567 YieldSelf(); 1568 InvertPageDown(&src); 1569 YieldSelf(); 1570 InvertPageDown(&src); 1571 YieldSelf(); 1572 InvertPageUp(&src); 1573 YieldSelf(); 1574 1575 if (sat_->strict()) 1576 CrcCheckPage(&src); 1577 1578 result = result && sat_->PutValid(&src); 1579 if (!result) { 1580 logprintf(0, "Process Error: invert_thread failed to push pages, " 1581 "bailing\n"); 1582 break; 1583 } 1584 loops++; 1585 } 1586 1587 pages_copied_ = loops * 2; 1588 status_ = result; 1589 logprintf(9, "Log: Completed %d: Copy thread. Status %d, %d pages copied\n", 1590 thread_num_, status_, pages_copied_); 1591 return result; 1592 } 1593 1594 1595 // Set file name to use for File IO. 1596 void FileThread::SetFile(const char *filename_init) { 1597 filename_ = filename_init; 1598 devicename_ = os_->FindFileDevice(filename_); 1599 } 1600 1601 // Open the file for access. 1602 bool FileThread::OpenFile(int *pfile) { 1603 bool no_O_DIRECT = false; 1604 int flags = O_RDWR | O_CREAT | O_SYNC; 1605 int fd = open(filename_.c_str(), flags | O_DIRECT, 0644); 1606 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { 1607 no_O_DIRECT = true; 1608 fd = open(filename_.c_str(), flags, 0644); // Try without O_DIRECT 1609 } 1610 if (fd < 0) { 1611 logprintf(0, "Process Error: Failed to create file %s!!\n", 1612 filename_.c_str()); 1613 pages_copied_ = 0; 1614 return false; 1615 } 1616 if (no_O_DIRECT) 1617 os_->ActivateFlushPageCache(); // Not using O_DIRECT fixed EINVAL 1618 *pfile = fd; 1619 return true; 1620 } 1621 1622 // Close the file. 1623 bool FileThread::CloseFile(int fd) { 1624 close(fd); 1625 return true; 1626 } 1627 1628 // Check sector tagging. 1629 bool FileThread::SectorTagPage(struct page_entry *src, int block) { 1630 int page_length = sat_->page_length(); 1631 struct FileThread::SectorTag *tag = 1632 (struct FileThread::SectorTag *)(src->addr); 1633 1634 // Tag each sector. 1635 unsigned char magic = ((0xba + thread_num_) & 0xff); 1636 for (int sec = 0; sec < page_length / 512; sec++) { 1637 tag[sec].magic = magic; 1638 tag[sec].block = block & 0xff; 1639 tag[sec].sector = sec & 0xff; 1640 tag[sec].pass = pass_ & 0xff; 1641 } 1642 return true; 1643 } 1644 1645 bool FileThread::WritePageToFile(int fd, struct page_entry *src) { 1646 int page_length = sat_->page_length(); 1647 // Fill the file with our data. 1648 int64 size = write(fd, src->addr, page_length); 1649 1650 if (size != page_length) { 1651 os_->ErrorReport(devicename_.c_str(), "write-error", 1); 1652 errorcount_++; 1653 logprintf(0, "Block Error: file_thread failed to write, " 1654 "bailing\n"); 1655 return false; 1656 } 1657 return true; 1658 } 1659 1660 // Write the data to the file. 1661 bool FileThread::WritePages(int fd) { 1662 int strict = sat_->strict(); 1663 1664 // Start fresh at beginning of file for each batch of pages. 1665 lseek64(fd, 0, SEEK_SET); 1666 for (int i = 0; i < sat_->disk_pages(); i++) { 1667 struct page_entry src; 1668 if (!GetValidPage(&src)) 1669 return false; 1670 // Save expected pattern. 1671 page_recs_[i].pattern = src.pattern; 1672 page_recs_[i].src = src.addr; 1673 1674 // Check data correctness. 1675 if (strict) 1676 CrcCheckPage(&src); 1677 1678 SectorTagPage(&src, i); 1679 1680 bool result = WritePageToFile(fd, &src); 1681 1682 if (!PutEmptyPage(&src)) 1683 return false; 1684 1685 if (!result) 1686 return false; 1687 } 1688 return os_->FlushPageCache(); // If O_DIRECT worked, this will be a NOP. 1689 } 1690 1691 // Copy data from file into memory block. 1692 bool FileThread::ReadPageFromFile(int fd, struct page_entry *dst) { 1693 int page_length = sat_->page_length(); 1694 1695 // Do the actual read. 1696 int64 size = read(fd, dst->addr, page_length); 1697 if (size != page_length) { 1698 os_->ErrorReport(devicename_.c_str(), "read-error", 1); 1699 logprintf(0, "Block Error: file_thread failed to read, " 1700 "bailing\n"); 1701 errorcount_++; 1702 return false; 1703 } 1704 return true; 1705 } 1706 1707 // Check sector tagging. 1708 bool FileThread::SectorValidatePage(const struct PageRec &page, 1709 struct page_entry *dst, int block) { 1710 // Error injection. 1711 static int calls = 0; 1712 calls++; 1713 1714 // Do sector tag compare. 1715 int firstsector = -1; 1716 int lastsector = -1; 1717 bool badsector = false; 1718 int page_length = sat_->page_length(); 1719 1720 // Cast data block into an array of tagged sectors. 1721 struct FileThread::SectorTag *tag = 1722 (struct FileThread::SectorTag *)(dst->addr); 1723 1724 sat_assert(sizeof(*tag) == 512); 1725 1726 // Error injection. 1727 if (sat_->error_injection()) { 1728 if (calls == 2) { 1729 for (int badsec = 8; badsec < 17; badsec++) 1730 tag[badsec].pass = 27; 1731 } 1732 if (calls == 18) { 1733 (static_cast<int32*>(dst->addr))[27] = 0xbadda7a; 1734 } 1735 } 1736 1737 // Check each sector for the correct tag we added earlier, 1738 // then revert the tag to the to normal data pattern. 1739 unsigned char magic = ((0xba + thread_num_) & 0xff); 1740 for (int sec = 0; sec < page_length / 512; sec++) { 1741 // Check magic tag. 1742 if ((tag[sec].magic != magic) || 1743 (tag[sec].block != (block & 0xff)) || 1744 (tag[sec].sector != (sec & 0xff)) || 1745 (tag[sec].pass != (pass_ & 0xff))) { 1746 // Offset calculation for tag location. 1747 int offset = sec * sizeof(SectorTag); 1748 if (tag[sec].block != (block & 0xff)) 1749 offset += 1 * sizeof(uint8); 1750 else if (tag[sec].sector != (sec & 0xff)) 1751 offset += 2 * sizeof(uint8); 1752 else if (tag[sec].pass != (pass_ & 0xff)) 1753 offset += 3 * sizeof(uint8); 1754 1755 // Run sector tag error through diagnoser for logging and reporting. 1756 errorcount_ += 1; 1757 os_->error_diagnoser_->AddHDDSectorTagError(devicename_, tag[sec].block, 1758 offset, 1759 tag[sec].sector, 1760 page.src, page.dst); 1761 1762 logprintf(5, "Sector Error: Sector tag @ 0x%x, pass %d/%d. " 1763 "sec %x/%x, block %d/%d, magic %x/%x, File: %s \n", 1764 block * page_length + 512 * sec, 1765 (pass_ & 0xff), (unsigned int)tag[sec].pass, 1766 sec, (unsigned int)tag[sec].sector, 1767 block, (unsigned int)tag[sec].block, 1768 magic, (unsigned int)tag[sec].magic, 1769 filename_.c_str()); 1770 1771 // Keep track of first and last bad sector. 1772 if (firstsector == -1) 1773 firstsector = (block * page_length / 512) + sec; 1774 lastsector = (block * page_length / 512) + sec; 1775 badsector = true; 1776 } 1777 // Patch tag back to proper pattern. 1778 unsigned int *addr = (unsigned int *)(&tag[sec]); 1779 *addr = dst->pattern->pattern(512 * sec / sizeof(*addr)); 1780 } 1781 1782 // If we found sector errors: 1783 if (badsector == true) { 1784 logprintf(5, "Log: file sector miscompare at offset %x-%x. File: %s\n", 1785 firstsector * 512, 1786 ((lastsector + 1) * 512) - 1, 1787 filename_.c_str()); 1788 1789 // Either exit immediately, or patch the data up and continue. 1790 if (sat_->stop_on_error()) { 1791 exit(1); 1792 } else { 1793 // Patch up bad pages. 1794 for (int block = (firstsector * 512) / page_length; 1795 block <= (lastsector * 512) / page_length; 1796 block++) { 1797 unsigned int *memblock = static_cast<unsigned int *>(dst->addr); 1798 int length = page_length / wordsize_; 1799 for (int i = 0; i < length; i++) { 1800 memblock[i] = dst->pattern->pattern(i); 1801 } 1802 } 1803 } 1804 } 1805 return true; 1806 } 1807 1808 // Get memory for an incoming data transfer.. 1809 bool FileThread::PagePrepare() { 1810 // We can only do direct IO to SAT pages if it is normal mem. 1811 page_io_ = os_->normal_mem(); 1812 1813 // Init a local buffer if we need it. 1814 if (!page_io_) { 1815 #ifdef HAVE_POSIX_MEMALIGN 1816 int result = posix_memalign(&local_page_, 512, sat_->page_length()); 1817 #else 1818 local_page_ = memalign(512, sat_->page_length()); 1819 int result = (local_page_ == 0); 1820 #endif 1821 if (result) { 1822 logprintf(0, "Process Error: disk thread posix_memalign " 1823 "returned %d (fail)\n", 1824 result); 1825 status_ = false; 1826 return false; 1827 } 1828 } 1829 return true; 1830 } 1831 1832 1833 // Remove memory allocated for data transfer. 1834 bool FileThread::PageTeardown() { 1835 // Free a local buffer if we need to. 1836 if (!page_io_) { 1837 free(local_page_); 1838 } 1839 return true; 1840 } 1841 1842 1843 1844 // Get memory for an incoming data transfer.. 1845 bool FileThread::GetEmptyPage(struct page_entry *dst) { 1846 if (page_io_) { 1847 if (!sat_->GetEmpty(dst)) 1848 return false; 1849 } else { 1850 dst->addr = local_page_; 1851 dst->offset = 0; 1852 dst->pattern = 0; 1853 } 1854 return true; 1855 } 1856 1857 // Get memory for an outgoing data transfer.. 1858 bool FileThread::GetValidPage(struct page_entry *src) { 1859 struct page_entry tmp; 1860 if (!sat_->GetValid(&tmp)) 1861 return false; 1862 if (page_io_) { 1863 *src = tmp; 1864 return true; 1865 } else { 1866 src->addr = local_page_; 1867 src->offset = 0; 1868 CrcCopyPage(src, &tmp); 1869 if (!sat_->PutValid(&tmp)) 1870 return false; 1871 } 1872 return true; 1873 } 1874 1875 1876 // Throw out a used empty page. 1877 bool FileThread::PutEmptyPage(struct page_entry *src) { 1878 if (page_io_) { 1879 if (!sat_->PutEmpty(src)) 1880 return false; 1881 } 1882 return true; 1883 } 1884 1885 // Throw out a used, filled page. 1886 bool FileThread::PutValidPage(struct page_entry *src) { 1887 if (page_io_) { 1888 if (!sat_->PutValid(src)) 1889 return false; 1890 } 1891 return true; 1892 } 1893 1894 // Copy data from file into memory blocks. 1895 bool FileThread::ReadPages(int fd) { 1896 int page_length = sat_->page_length(); 1897 int strict = sat_->strict(); 1898 bool result = true; 1899 1900 // Read our data back out of the file, into it's new location. 1901 lseek64(fd, 0, SEEK_SET); 1902 for (int i = 0; i < sat_->disk_pages(); i++) { 1903 struct page_entry dst; 1904 if (!GetEmptyPage(&dst)) 1905 return false; 1906 // Retrieve expected pattern. 1907 dst.pattern = page_recs_[i].pattern; 1908 // Update page recordpage record. 1909 page_recs_[i].dst = dst.addr; 1910 1911 // Read from the file into destination page. 1912 if (!ReadPageFromFile(fd, &dst)) { 1913 PutEmptyPage(&dst); 1914 return false; 1915 } 1916 1917 SectorValidatePage(page_recs_[i], &dst, i); 1918 1919 // Ensure that the transfer ended up with correct data. 1920 if (strict) { 1921 // Record page index currently CRC checked. 1922 crc_page_ = i; 1923 int errors = CrcCheckPage(&dst); 1924 if (errors) { 1925 logprintf(5, "Log: file miscompare at block %d, " 1926 "offset %x-%x. File: %s\n", 1927 i, i * page_length, ((i + 1) * page_length) - 1, 1928 filename_.c_str()); 1929 result = false; 1930 } 1931 crc_page_ = -1; 1932 errorcount_ += errors; 1933 } 1934 if (!PutValidPage(&dst)) 1935 return false; 1936 } 1937 return result; 1938 } 1939 1940 // File IO work loop. Execute until marked done. 1941 bool FileThread::Work() { 1942 bool result = true; 1943 int64 loops = 0; 1944 1945 logprintf(9, "Log: Starting file thread %d, file %s, device %s\n", 1946 thread_num_, 1947 filename_.c_str(), 1948 devicename_.c_str()); 1949 1950 if (!PagePrepare()) { 1951 status_ = false; 1952 return false; 1953 } 1954 1955 // Open the data IO file. 1956 int fd = 0; 1957 if (!OpenFile(&fd)) { 1958 status_ = false; 1959 return false; 1960 } 1961 1962 pass_ = 0; 1963 1964 // Load patterns into page records. 1965 page_recs_ = new struct PageRec[sat_->disk_pages()]; 1966 for (int i = 0; i < sat_->disk_pages(); i++) { 1967 page_recs_[i].pattern = new struct Pattern(); 1968 } 1969 1970 // Loop until done. 1971 while (IsReadyToRun()) { 1972 // Do the file write. 1973 if (!(result = result && WritePages(fd))) 1974 break; 1975 1976 // Do the file read. 1977 if (!(result = result && ReadPages(fd))) 1978 break; 1979 1980 loops++; 1981 pass_ = loops; 1982 } 1983 1984 pages_copied_ = loops * sat_->disk_pages(); 1985 1986 // Clean up. 1987 CloseFile(fd); 1988 PageTeardown(); 1989 1990 logprintf(9, "Log: Completed %d: file thread status %d, %d pages copied\n", 1991 thread_num_, status_, pages_copied_); 1992 // Failure to read from device indicates hardware, 1993 // rather than procedural SW error. 1994 status_ = true; 1995 return true; 1996 } 1997 1998 bool NetworkThread::IsNetworkStopSet() { 1999 return !IsReadyToRunNoPause(); 2000 } 2001 2002 bool NetworkSlaveThread::IsNetworkStopSet() { 2003 // This thread has no completion status. 2004 // It finishes whever there is no more data to be 2005 // passed back. 2006 return true; 2007 } 2008 2009 // Set ip name to use for Network IO. 2010 void NetworkThread::SetIP(const char *ipaddr_init) { 2011 strncpy(ipaddr_, ipaddr_init, 256); 2012 } 2013 2014 // Create a socket. 2015 // Return 0 on error. 2016 bool NetworkThread::CreateSocket(int *psocket) { 2017 int sock = socket(AF_INET, SOCK_STREAM, 0); 2018 if (sock == -1) { 2019 logprintf(0, "Process Error: Cannot open socket\n"); 2020 pages_copied_ = 0; 2021 status_ = false; 2022 return false; 2023 } 2024 *psocket = sock; 2025 return true; 2026 } 2027 2028 // Close the socket. 2029 bool NetworkThread::CloseSocket(int sock) { 2030 close(sock); 2031 return true; 2032 } 2033 2034 // Initiate the tcp connection. 2035 bool NetworkThread::Connect(int sock) { 2036 struct sockaddr_in dest_addr; 2037 dest_addr.sin_family = AF_INET; 2038 dest_addr.sin_port = htons(kNetworkPort); 2039 memset(&(dest_addr.sin_zero), '\0', sizeof(dest_addr.sin_zero)); 2040 2041 // Translate dot notation to u32. 2042 if (inet_aton(ipaddr_, &dest_addr.sin_addr) == 0) { 2043 logprintf(0, "Process Error: Cannot resolve %s\n", ipaddr_); 2044 pages_copied_ = 0; 2045 status_ = false; 2046 return false; 2047 } 2048 2049 if (-1 == connect(sock, reinterpret_cast<struct sockaddr *>(&dest_addr), 2050 sizeof(struct sockaddr))) { 2051 logprintf(0, "Process Error: Cannot connect %s\n", ipaddr_); 2052 pages_copied_ = 0; 2053 status_ = false; 2054 return false; 2055 } 2056 return true; 2057 } 2058 2059 // Initiate the tcp connection. 2060 bool NetworkListenThread::Listen() { 2061 struct sockaddr_in sa; 2062 2063 memset(&(sa.sin_zero), '\0', sizeof(sa.sin_zero)); 2064 2065 sa.sin_family = AF_INET; 2066 sa.sin_addr.s_addr = INADDR_ANY; 2067 sa.sin_port = htons(kNetworkPort); 2068 2069 if (-1 == bind(sock_, (struct sockaddr*)&sa, sizeof(struct sockaddr))) { 2070 char buf[256]; 2071 sat_strerror(errno, buf, sizeof(buf)); 2072 logprintf(0, "Process Error: Cannot bind socket: %s\n", buf); 2073 pages_copied_ = 0; 2074 status_ = false; 2075 return false; 2076 } 2077 listen(sock_, 3); 2078 return true; 2079 } 2080 2081 // Wait for a connection from a network traffic generation thread. 2082 bool NetworkListenThread::Wait() { 2083 fd_set rfds; 2084 struct timeval tv; 2085 int retval; 2086 2087 // Watch sock_ to see when it has input. 2088 FD_ZERO(&rfds); 2089 FD_SET(sock_, &rfds); 2090 // Wait up to five seconds. 2091 tv.tv_sec = 5; 2092 tv.tv_usec = 0; 2093 2094 retval = select(sock_ + 1, &rfds, NULL, NULL, &tv); 2095 2096 return (retval > 0); 2097 } 2098 2099 // Wait for a connection from a network traffic generation thread. 2100 bool NetworkListenThread::GetConnection(int *pnewsock) { 2101 struct sockaddr_in sa; 2102 socklen_t size = sizeof(struct sockaddr_in); 2103 2104 int newsock = accept(sock_, reinterpret_cast<struct sockaddr *>(&sa), &size); 2105 if (newsock < 0) { 2106 logprintf(0, "Process Error: Did not receive connection\n"); 2107 pages_copied_ = 0; 2108 status_ = false; 2109 return false; 2110 } 2111 *pnewsock = newsock; 2112 return true; 2113 } 2114 2115 // Send a page, return false if a page was not sent. 2116 bool NetworkThread::SendPage(int sock, struct page_entry *src) { 2117 int page_length = sat_->page_length(); 2118 char *address = static_cast<char*>(src->addr); 2119 2120 // Send our data over the network. 2121 int size = page_length; 2122 while (size) { 2123 int transferred = send(sock, address + (page_length - size), size, 0); 2124 if ((transferred == 0) || (transferred == -1)) { 2125 if (!IsNetworkStopSet()) { 2126 char buf[256] = ""; 2127 sat_strerror(errno, buf, sizeof(buf)); 2128 logprintf(0, "Process Error: Thread %d, " 2129 "Network write failed, bailing. (%s)\n", 2130 thread_num_, buf); 2131 status_ = false; 2132 } 2133 return false; 2134 } 2135 size = size - transferred; 2136 } 2137 return true; 2138 } 2139 2140 // Receive a page. Return false if a page was not received. 2141 bool NetworkThread::ReceivePage(int sock, struct page_entry *dst) { 2142 int page_length = sat_->page_length(); 2143 char *address = static_cast<char*>(dst->addr); 2144 2145 // Maybe we will get our data back again, maybe not. 2146 int size = page_length; 2147 while (size) { 2148 int transferred = recv(sock, address + (page_length - size), size, 0); 2149 if ((transferred == 0) || (transferred == -1)) { 2150 // Typically network slave thread should exit as network master 2151 // thread stops sending data. 2152 if (IsNetworkStopSet()) { 2153 int err = errno; 2154 if (transferred == 0 && err == 0) { 2155 // Two system setups will not sync exactly, 2156 // allow early exit, but log it. 2157 logprintf(0, "Log: Net thread did not receive any data, exiting.\n"); 2158 } else { 2159 char buf[256] = ""; 2160 sat_strerror(err, buf, sizeof(buf)); 2161 // Print why we failed. 2162 logprintf(0, "Process Error: Thread %d, " 2163 "Network read failed, bailing (%s).\n", 2164 thread_num_, buf); 2165 status_ = false; 2166 // Print arguments and results. 2167 logprintf(0, "Log: recv(%d, address %x, size %x, 0) == %x, err %d\n", 2168 sock, address + (page_length - size), 2169 size, transferred, err); 2170 if ((transferred == 0) && 2171 (page_length - size < 512) && 2172 (page_length - size > 0)) { 2173 // Print null terminated data received, to see who's been 2174 // sending us supicious unwanted data. 2175 address[page_length - size] = 0; 2176 logprintf(0, "Log: received %d bytes: '%s'\n", 2177 page_length - size, address); 2178 } 2179 } 2180 } 2181 return false; 2182 } 2183 size = size - transferred; 2184 } 2185 return true; 2186 } 2187 2188 // Network IO work loop. Execute until marked done. 2189 // Return true if the thread ran as expected. 2190 bool NetworkThread::Work() { 2191 logprintf(9, "Log: Starting network thread %d, ip %s\n", 2192 thread_num_, 2193 ipaddr_); 2194 2195 // Make a socket. 2196 int sock = 0; 2197 if (!CreateSocket(&sock)) 2198 return false; 2199 2200 // Network IO loop requires network slave thread to have already initialized. 2201 // We will sleep here for awhile to ensure that the slave thread will be 2202 // listening by the time we connect. 2203 // Sleep for 15 seconds. 2204 sat_sleep(15); 2205 logprintf(9, "Log: Starting execution of network thread %d, ip %s\n", 2206 thread_num_, 2207 ipaddr_); 2208 2209 2210 // Connect to a slave thread. 2211 if (!Connect(sock)) 2212 return false; 2213 2214 // Loop until done. 2215 bool result = true; 2216 int strict = sat_->strict(); 2217 int64 loops = 0; 2218 while (IsReadyToRun()) { 2219 struct page_entry src; 2220 struct page_entry dst; 2221 result = result && sat_->GetValid(&src); 2222 result = result && sat_->GetEmpty(&dst); 2223 if (!result) { 2224 logprintf(0, "Process Error: net_thread failed to pop pages, " 2225 "bailing\n"); 2226 break; 2227 } 2228 2229 // Check data correctness. 2230 if (strict) 2231 CrcCheckPage(&src); 2232 2233 // Do the network write. 2234 if (!(result = result && SendPage(sock, &src))) 2235 break; 2236 2237 // Update pattern reference to reflect new contents. 2238 dst.pattern = src.pattern; 2239 2240 // Do the network read. 2241 if (!(result = result && ReceivePage(sock, &dst))) 2242 break; 2243 2244 // Ensure that the transfer ended up with correct data. 2245 if (strict) 2246 CrcCheckPage(&dst); 2247 2248 // Return all of our pages to the queue. 2249 result = result && sat_->PutValid(&dst); 2250 result = result && sat_->PutEmpty(&src); 2251 if (!result) { 2252 logprintf(0, "Process Error: net_thread failed to push pages, " 2253 "bailing\n"); 2254 break; 2255 } 2256 loops++; 2257 } 2258 2259 pages_copied_ = loops; 2260 status_ = result; 2261 2262 // Clean up. 2263 CloseSocket(sock); 2264 2265 logprintf(9, "Log: Completed %d: network thread status %d, " 2266 "%d pages copied\n", 2267 thread_num_, status_, pages_copied_); 2268 return result; 2269 } 2270 2271 // Spawn slave threads for incoming connections. 2272 bool NetworkListenThread::SpawnSlave(int newsock, int threadid) { 2273 logprintf(12, "Log: Listen thread spawning slave\n"); 2274 2275 // Spawn slave thread, to reflect network traffic back to sender. 2276 ChildWorker *child_worker = new ChildWorker; 2277 child_worker->thread.SetSock(newsock); 2278 child_worker->thread.InitThread(threadid, sat_, os_, patternlist_, 2279 &child_worker->status); 2280 child_worker->status.Initialize(); 2281 child_worker->thread.SpawnThread(); 2282 child_workers_.push_back(child_worker); 2283 2284 return true; 2285 } 2286 2287 // Reap slave threads. 2288 bool NetworkListenThread::ReapSlaves() { 2289 bool result = true; 2290 // Gather status and reap threads. 2291 logprintf(12, "Log: Joining all outstanding threads\n"); 2292 2293 for (size_t i = 0; i < child_workers_.size(); i++) { 2294 NetworkSlaveThread& child_thread = child_workers_[i]->thread; 2295 logprintf(12, "Log: Joining slave thread %d\n", i); 2296 child_thread.JoinThread(); 2297 if (child_thread.GetStatus() != 1) { 2298 logprintf(0, "Process Error: Slave Thread %d failed with status %d\n", i, 2299 child_thread.GetStatus()); 2300 result = false; 2301 } 2302 errorcount_ += child_thread.GetErrorCount(); 2303 logprintf(9, "Log: Slave Thread %d found %lld miscompares\n", i, 2304 child_thread.GetErrorCount()); 2305 pages_copied_ += child_thread.GetPageCount(); 2306 } 2307 2308 return result; 2309 } 2310 2311 // Network listener IO work loop. Execute until marked done. 2312 // Return false on fatal software error. 2313 bool NetworkListenThread::Work() { 2314 logprintf(9, "Log: Starting network listen thread %d\n", 2315 thread_num_); 2316 2317 // Make a socket. 2318 sock_ = 0; 2319 if (!CreateSocket(&sock_)) { 2320 status_ = false; 2321 return false; 2322 } 2323 logprintf(9, "Log: Listen thread created sock\n"); 2324 2325 // Allows incoming connections to be queued up by socket library. 2326 int newsock = 0; 2327 Listen(); 2328 logprintf(12, "Log: Listen thread waiting for incoming connections\n"); 2329 2330 // Wait on incoming connections, and spawn worker threads for them. 2331 int threadcount = 0; 2332 while (IsReadyToRun()) { 2333 // Poll for connections that we can accept(). 2334 if (Wait()) { 2335 // Accept those connections. 2336 logprintf(12, "Log: Listen thread found incoming connection\n"); 2337 if (GetConnection(&newsock)) { 2338 SpawnSlave(newsock, threadcount); 2339 threadcount++; 2340 } 2341 } 2342 } 2343 2344 // Gather status and join spawned threads. 2345 ReapSlaves(); 2346 2347 // Delete the child workers. 2348 for (ChildVector::iterator it = child_workers_.begin(); 2349 it != child_workers_.end(); ++it) { 2350 (*it)->status.Destroy(); 2351 delete *it; 2352 } 2353 child_workers_.clear(); 2354 2355 CloseSocket(sock_); 2356 2357 status_ = true; 2358 logprintf(9, 2359 "Log: Completed %d: network listen thread status %d, " 2360 "%d pages copied\n", 2361 thread_num_, status_, pages_copied_); 2362 return true; 2363 } 2364 2365 // Set network reflector socket struct. 2366 void NetworkSlaveThread::SetSock(int sock) { 2367 sock_ = sock; 2368 } 2369 2370 // Network reflector IO work loop. Execute until marked done. 2371 // Return false on fatal software error. 2372 bool NetworkSlaveThread::Work() { 2373 logprintf(9, "Log: Starting network slave thread %d\n", 2374 thread_num_); 2375 2376 // Verify that we have a socket. 2377 int sock = sock_; 2378 if (!sock) { 2379 status_ = false; 2380 return false; 2381 } 2382 2383 // Loop until done. 2384 int64 loops = 0; 2385 // Init a local buffer for storing data. 2386 void *local_page = NULL; 2387 #ifdef HAVE_POSIX_MEMALIGN 2388 int result = posix_memalign(&local_page, 512, sat_->page_length()); 2389 #else 2390 local_page = memalign(512, sat_->page_length()); 2391 int result = (local_page == 0); 2392 #endif 2393 if (result) { 2394 logprintf(0, "Process Error: net slave posix_memalign " 2395 "returned %d (fail)\n", 2396 result); 2397 status_ = false; 2398 return false; 2399 } 2400 2401 struct page_entry page; 2402 page.addr = local_page; 2403 2404 // This thread will continue to run as long as the thread on the other end of 2405 // the socket is still sending and receiving data. 2406 while (1) { 2407 // Do the network read. 2408 if (!ReceivePage(sock, &page)) 2409 break; 2410 2411 // Do the network write. 2412 if (!SendPage(sock, &page)) 2413 break; 2414 2415 loops++; 2416 } 2417 2418 pages_copied_ = loops; 2419 // No results provided from this type of thread. 2420 status_ = true; 2421 2422 // Clean up. 2423 CloseSocket(sock); 2424 2425 logprintf(9, 2426 "Log: Completed %d: network slave thread status %d, " 2427 "%d pages copied\n", 2428 thread_num_, status_, pages_copied_); 2429 return true; 2430 } 2431 2432 // Thread work loop. Execute until marked finished. 2433 bool ErrorPollThread::Work() { 2434 logprintf(9, "Log: Starting system error poll thread %d\n", thread_num_); 2435 2436 // This calls a generic error polling function in the Os abstraction layer. 2437 do { 2438 errorcount_ += os_->ErrorPoll(); 2439 os_->ErrorWait(); 2440 } while (IsReadyToRun()); 2441 2442 logprintf(9, "Log: Finished system error poll thread %d: %d errors\n", 2443 thread_num_, errorcount_); 2444 status_ = true; 2445 return true; 2446 } 2447 2448 // Worker thread to heat up CPU. 2449 // This thread does not evaluate pass/fail or software error. 2450 bool CpuStressThread::Work() { 2451 logprintf(9, "Log: Starting CPU stress thread %d\n", thread_num_); 2452 2453 do { 2454 // Run ludloff's platform/CPU-specific assembly workload. 2455 os_->CpuStressWorkload(); 2456 YieldSelf(); 2457 } while (IsReadyToRun()); 2458 2459 logprintf(9, "Log: Finished CPU stress thread %d:\n", 2460 thread_num_); 2461 status_ = true; 2462 return true; 2463 } 2464 2465 CpuCacheCoherencyThread::CpuCacheCoherencyThread(cc_cacheline_data *data, 2466 int cacheline_count, 2467 int thread_num, 2468 int inc_count) { 2469 cc_cacheline_data_ = data; 2470 cc_cacheline_count_ = cacheline_count; 2471 cc_thread_num_ = thread_num; 2472 cc_inc_count_ = inc_count; 2473 } 2474 2475 // Worked thread to test the cache coherency of the CPUs 2476 // Return false on fatal sw error. 2477 bool CpuCacheCoherencyThread::Work() { 2478 logprintf(9, "Log: Starting the Cache Coherency thread %d\n", 2479 cc_thread_num_); 2480 uint64 time_start, time_end; 2481 struct timeval tv; 2482 2483 unsigned int seed = static_cast<unsigned int>(gettid()); 2484 gettimeofday(&tv, NULL); // Get the timestamp before increments. 2485 time_start = tv.tv_sec * 1000000ULL + tv.tv_usec; 2486 2487 uint64 total_inc = 0; // Total increments done by the thread. 2488 while (IsReadyToRun()) { 2489 for (int i = 0; i < cc_inc_count_; i++) { 2490 // Choose a datastructure in random and increment the appropriate 2491 // member in that according to the offset (which is the same as the 2492 // thread number. 2493 #ifdef HAVE_RAND_R 2494 int r = rand_r(&seed); 2495 #else 2496 int r = rand(); 2497 #endif 2498 r = cc_cacheline_count_ * (r / (RAND_MAX + 1.0)); 2499 // Increment the member of the randomely selected structure. 2500 (cc_cacheline_data_[r].num[cc_thread_num_])++; 2501 } 2502 2503 total_inc += cc_inc_count_; 2504 2505 // Calculate if the local counter matches with the global value 2506 // in all the cache line structures for this particular thread. 2507 int cc_global_num = 0; 2508 for (int cline_num = 0; cline_num < cc_cacheline_count_; cline_num++) { 2509 cc_global_num += cc_cacheline_data_[cline_num].num[cc_thread_num_]; 2510 // Reset the cachline member's value for the next run. 2511 cc_cacheline_data_[cline_num].num[cc_thread_num_] = 0; 2512 } 2513 if (sat_->error_injection()) 2514 cc_global_num = -1; 2515 2516 if (cc_global_num != cc_inc_count_) { 2517 errorcount_++; 2518 logprintf(0, "Hardware Error: global(%d) and local(%d) do not match\n", 2519 cc_global_num, cc_inc_count_); 2520 } 2521 } 2522 gettimeofday(&tv, NULL); // Get the timestamp at the end. 2523 time_end = tv.tv_sec * 1000000ULL + tv.tv_usec; 2524 2525 uint64 us_elapsed = time_end - time_start; 2526 // inc_rate is the no. of increments per second. 2527 double inc_rate = total_inc * 1e6 / us_elapsed; 2528 2529 logprintf(4, "Stats: CC Thread(%d): Time=%llu us," 2530 " Increments=%llu, Increments/sec = %.6lf\n", 2531 cc_thread_num_, us_elapsed, total_inc, inc_rate); 2532 logprintf(9, "Log: Finished CPU Cache Coherency thread %d:\n", 2533 cc_thread_num_); 2534 status_ = true; 2535 return true; 2536 } 2537 2538 DiskThread::DiskThread(DiskBlockTable *block_table) { 2539 read_block_size_ = kSectorSize; // default 1 sector (512 bytes) 2540 write_block_size_ = kSectorSize; // this assumes read and write block size 2541 // are the same 2542 segment_size_ = -1; // use the entire disk as one segment 2543 cache_size_ = 16 * 1024 * 1024; // assume 16MiB cache by default 2544 // Use a queue such that 3/2 times as much data as the cache can hold 2545 // is written before it is read so that there is little chance the read 2546 // data is in the cache. 2547 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2; 2548 blocks_per_segment_ = 32; 2549 2550 read_threshold_ = 100000; // 100ms is a reasonable limit for 2551 write_threshold_ = 100000; // reading/writing a sector 2552 2553 read_timeout_ = 5000000; // 5 seconds should be long enough for a 2554 write_timeout_ = 5000000; // timout for reading/writing 2555 2556 device_sectors_ = 0; 2557 non_destructive_ = 0; 2558 2559 #ifdef HAVE_LIBAIO_H 2560 aio_ctx_ = 0; 2561 #endif 2562 block_table_ = block_table; 2563 update_block_table_ = 1; 2564 2565 block_buffer_ = NULL; 2566 2567 blocks_written_ = 0; 2568 blocks_read_ = 0; 2569 } 2570 2571 DiskThread::~DiskThread() { 2572 if (block_buffer_) 2573 free(block_buffer_); 2574 } 2575 2576 // Set filename for device file (in /dev). 2577 void DiskThread::SetDevice(const char *device_name) { 2578 device_name_ = device_name; 2579 } 2580 2581 // Set various parameters that control the behaviour of the test. 2582 // -1 is used as a sentinel value on each parameter (except non_destructive) 2583 // to indicate that the parameter not be set. 2584 bool DiskThread::SetParameters(int read_block_size, 2585 int write_block_size, 2586 int64 segment_size, 2587 int64 cache_size, 2588 int blocks_per_segment, 2589 int64 read_threshold, 2590 int64 write_threshold, 2591 int non_destructive) { 2592 if (read_block_size != -1) { 2593 // Blocks must be aligned to the disk's sector size. 2594 if (read_block_size % kSectorSize != 0) { 2595 logprintf(0, "Process Error: Block size must be a multiple of %d " 2596 "(thread %d).\n", kSectorSize, thread_num_); 2597 return false; 2598 } 2599 2600 read_block_size_ = read_block_size; 2601 } 2602 2603 if (write_block_size != -1) { 2604 // Write blocks must be aligned to the disk's sector size and to the 2605 // block size. 2606 if (write_block_size % kSectorSize != 0) { 2607 logprintf(0, "Process Error: Write block size must be a multiple " 2608 "of %d (thread %d).\n", kSectorSize, thread_num_); 2609 return false; 2610 } 2611 if (write_block_size % read_block_size_ != 0) { 2612 logprintf(0, "Process Error: Write block size must be a multiple " 2613 "of the read block size, which is %d (thread %d).\n", 2614 read_block_size_, thread_num_); 2615 return false; 2616 } 2617 2618 write_block_size_ = write_block_size; 2619 2620 } else { 2621 // Make sure write_block_size_ is still valid. 2622 if (read_block_size_ > write_block_size_) { 2623 logprintf(5, "Log: Assuming write block size equal to read block size, " 2624 "which is %d (thread %d).\n", read_block_size_, 2625 thread_num_); 2626 write_block_size_ = read_block_size_; 2627 } else { 2628 if (write_block_size_ % read_block_size_ != 0) { 2629 logprintf(0, "Process Error: Write block size (defined as %d) must " 2630 "be a multiple of the read block size, which is %d " 2631 "(thread %d).\n", write_block_size_, read_block_size_, 2632 thread_num_); 2633 return false; 2634 } 2635 } 2636 } 2637 2638 if (cache_size != -1) { 2639 cache_size_ = cache_size; 2640 } 2641 2642 if (blocks_per_segment != -1) { 2643 if (blocks_per_segment <= 0) { 2644 logprintf(0, "Process Error: Blocks per segment must be greater than " 2645 "zero.\n (thread %d)", thread_num_); 2646 return false; 2647 } 2648 2649 blocks_per_segment_ = blocks_per_segment; 2650 } 2651 2652 if (read_threshold != -1) { 2653 if (read_threshold <= 0) { 2654 logprintf(0, "Process Error: Read threshold must be greater than " 2655 "zero (thread %d).\n", thread_num_); 2656 return false; 2657 } 2658 2659 read_threshold_ = read_threshold; 2660 } 2661 2662 if (write_threshold != -1) { 2663 if (write_threshold <= 0) { 2664 logprintf(0, "Process Error: Write threshold must be greater than " 2665 "zero (thread %d).\n", thread_num_); 2666 return false; 2667 } 2668 2669 write_threshold_ = write_threshold; 2670 } 2671 2672 if (segment_size != -1) { 2673 // Segments must be aligned to the disk's sector size. 2674 if (segment_size % kSectorSize != 0) { 2675 logprintf(0, "Process Error: Segment size must be a multiple of %d" 2676 " (thread %d).\n", kSectorSize, thread_num_); 2677 return false; 2678 } 2679 2680 segment_size_ = segment_size / kSectorSize; 2681 } 2682 2683 non_destructive_ = non_destructive; 2684 2685 // Having a queue of 150% of blocks that will fit in the disk's cache 2686 // should be enough to force out the oldest block before it is read and hence, 2687 // making sure the data comes form the disk and not the cache. 2688 queue_size_ = ((cache_size_ / write_block_size_) * 3) / 2; 2689 // Updating DiskBlockTable parameters 2690 if (update_block_table_) { 2691 block_table_->SetParameters(kSectorSize, write_block_size_, 2692 device_sectors_, segment_size_, 2693 device_name_); 2694 } 2695 return true; 2696 } 2697 2698 // Open a device, return false on failure. 2699 bool DiskThread::OpenDevice(int *pfile) { 2700 bool no_O_DIRECT = false; 2701 int flags = O_RDWR | O_SYNC | O_LARGEFILE; 2702 int fd = open(device_name_.c_str(), flags | O_DIRECT, 0); 2703 if (O_DIRECT != 0 && fd < 0 && errno == EINVAL) { 2704 no_O_DIRECT = true; 2705 fd = open(device_name_.c_str(), flags, 0); // Try without O_DIRECT 2706 } 2707 if (fd < 0) { 2708 logprintf(0, "Process Error: Failed to open device %s (thread %d)!!\n", 2709 device_name_.c_str(), thread_num_); 2710 return false; 2711 } 2712 if (no_O_DIRECT) 2713 os_->ActivateFlushPageCache(); 2714 *pfile = fd; 2715 2716 return GetDiskSize(fd); 2717 } 2718 2719 // Retrieves the size (in bytes) of the disk/file. 2720 // Return false on failure. 2721 bool DiskThread::GetDiskSize(int fd) { 2722 struct stat device_stat; 2723 if (fstat(fd, &device_stat) == -1) { 2724 logprintf(0, "Process Error: Unable to fstat disk %s (thread %d).\n", 2725 device_name_.c_str(), thread_num_); 2726 return false; 2727 } 2728 2729 // For a block device, an ioctl is needed to get the size since the size 2730 // of the device file (i.e. /dev/sdb) is 0. 2731 if (S_ISBLK(device_stat.st_mode)) { 2732 uint64 block_size = 0; 2733 2734 if (ioctl(fd, BLKGETSIZE64, &block_size) == -1) { 2735 logprintf(0, "Process Error: Unable to ioctl disk %s (thread %d).\n", 2736 device_name_.c_str(), thread_num_); 2737 return false; 2738 } 2739 2740 // Zero size indicates nonworking device.. 2741 if (block_size == 0) { 2742 os_->ErrorReport(device_name_.c_str(), "device-size-zero", 1); 2743 ++errorcount_; 2744 status_ = true; // Avoid a procedural error. 2745 return false; 2746 } 2747 2748 device_sectors_ = block_size / kSectorSize; 2749 2750 } else if (S_ISREG(device_stat.st_mode)) { 2751 device_sectors_ = device_stat.st_size / kSectorSize; 2752 2753 } else { 2754 logprintf(0, "Process Error: %s is not a regular file or block " 2755 "device (thread %d).\n", device_name_.c_str(), 2756 thread_num_); 2757 return false; 2758 } 2759 2760 logprintf(12, "Log: Device sectors: %lld on disk %s (thread %d).\n", 2761 device_sectors_, device_name_.c_str(), thread_num_); 2762 2763 if (update_block_table_) { 2764 block_table_->SetParameters(kSectorSize, write_block_size_, 2765 device_sectors_, segment_size_, 2766 device_name_); 2767 } 2768 2769 return true; 2770 } 2771 2772 bool DiskThread::CloseDevice(int fd) { 2773 close(fd); 2774 return true; 2775 } 2776 2777 // Return the time in microseconds. 2778 int64 DiskThread::GetTime() { 2779 struct timeval tv; 2780 gettimeofday(&tv, NULL); 2781 return tv.tv_sec * 1000000 + tv.tv_usec; 2782 } 2783 2784 // Do randomized reads and (possibly) writes on a device. 2785 // Return false on fatal SW error, true on SW success, 2786 // regardless of whether HW failed. 2787 bool DiskThread::DoWork(int fd) { 2788 int64 block_num = 0; 2789 int64 num_segments; 2790 2791 if (segment_size_ == -1) { 2792 num_segments = 1; 2793 } else { 2794 num_segments = device_sectors_ / segment_size_; 2795 if (device_sectors_ % segment_size_ != 0) 2796 num_segments++; 2797 } 2798 2799 // Disk size should be at least 3x cache size. See comment later for 2800 // details. 2801 sat_assert(device_sectors_ * kSectorSize > 3 * cache_size_); 2802 2803 // This disk test works by writing blocks with a certain pattern to 2804 // disk, then reading them back and verifying it against the pattern 2805 // at a later time. A failure happens when either the block cannot 2806 // be written/read or when the read block is different than what was 2807 // written. If a block takes too long to write/read, then a warning 2808 // is given instead of an error since taking too long is not 2809 // necessarily an error. 2810 // 2811 // To prevent the read blocks from coming from the disk cache, 2812 // enough blocks are written before read such that a block would 2813 // be ejected from the disk cache by the time it is read. 2814 // 2815 // TODO(amistry): Implement some sort of read/write throttling. The 2816 // flood of asynchronous I/O requests when a drive is 2817 // unplugged is causing the application and kernel to 2818 // become unresponsive. 2819 2820 while (IsReadyToRun()) { 2821 // Write blocks to disk. 2822 logprintf(16, "Log: Write phase %sfor disk %s (thread %d).\n", 2823 non_destructive_ ? "(disabled) " : "", 2824 device_name_.c_str(), thread_num_); 2825 while (IsReadyToRunNoPause() && 2826 in_flight_sectors_.size() < 2827 static_cast<size_t>(queue_size_ + 1)) { 2828 // Confine testing to a particular segment of the disk. 2829 int64 segment = (block_num / blocks_per_segment_) % num_segments; 2830 if (!non_destructive_ && 2831 (block_num % blocks_per_segment_ == 0)) { 2832 logprintf(20, "Log: Starting to write segment %lld out of " 2833 "%lld on disk %s (thread %d).\n", 2834 segment, num_segments, device_name_.c_str(), 2835 thread_num_); 2836 } 2837 block_num++; 2838 2839 BlockData *block = block_table_->GetUnusedBlock(segment); 2840 2841 // If an unused sequence of sectors could not be found, skip to the 2842 // next block to process. Soon, a new segment will come and new 2843 // sectors will be able to be allocated. This effectively puts a 2844 // minumim on the disk size at 3x the stated cache size, or 48MiB 2845 // if a cache size is not given (since the cache is set as 16MiB 2846 // by default). Given that todays caches are at the low MiB range 2847 // and drive sizes at the mid GB, this shouldn't pose a problem. 2848 // The 3x minimum comes from the following: 2849 // 1. In order to allocate 'y' blocks from a segment, the 2850 // segment must contain at least 2y blocks or else an 2851 // allocation may not succeed. 2852 // 2. Assume the entire disk is one segment. 2853 // 3. A full write phase consists of writing blocks corresponding to 2854 // 3/2 cache size. 2855 // 4. Therefore, the one segment must have 2 * 3/2 * cache 2856 // size worth of blocks = 3 * cache size worth of blocks 2857 // to complete. 2858 // In non-destructive mode, don't write anything to disk. 2859 if (!non_destructive_) { 2860 if (!WriteBlockToDisk(fd, block)) { 2861 block_table_->RemoveBlock(block); 2862 return true; 2863 } 2864 blocks_written_++; 2865 } 2866 2867 // Block is either initialized by writing, or in nondestructive case, 2868 // initialized by being added into the datastructure for later reading. 2869 block->SetBlockAsInitialized(); 2870 2871 in_flight_sectors_.push(block); 2872 } 2873 if (!os_->FlushPageCache()) // If O_DIRECT worked, this will be a NOP. 2874 return false; 2875 2876 // Verify blocks on disk. 2877 logprintf(20, "Log: Read phase for disk %s (thread %d).\n", 2878 device_name_.c_str(), thread_num_); 2879 while (IsReadyToRunNoPause() && !in_flight_sectors_.empty()) { 2880 BlockData *block = in_flight_sectors_.front(); 2881 in_flight_sectors_.pop(); 2882 if (!ValidateBlockOnDisk(fd, block)) 2883 return true; 2884 block_table_->RemoveBlock(block); 2885 blocks_read_++; 2886 } 2887 } 2888 2889 pages_copied_ = blocks_written_ + blocks_read_; 2890 return true; 2891 } 2892 2893 // Do an asynchronous disk I/O operation. 2894 // Return false if the IO is not set up. 2895 bool DiskThread::AsyncDiskIO(IoOp op, int fd, void *buf, int64 size, 2896 int64 offset, int64 timeout) { 2897 #ifdef HAVE_LIBAIO_H 2898 // Use the Linux native asynchronous I/O interface for reading/writing. 2899 // A read/write consists of three basic steps: 2900 // 1. create an io context. 2901 // 2. prepare and submit an io request to the context 2902 // 3. wait for an event on the context. 2903 2904 struct { 2905 const int opcode; 2906 const char *op_str; 2907 const char *error_str; 2908 } operations[2] = { 2909 { IO_CMD_PREAD, "read", "disk-read-error" }, 2910 { IO_CMD_PWRITE, "write", "disk-write-error" } 2911 }; 2912 2913 struct iocb cb; 2914 memset(&cb, 0, sizeof(cb)); 2915 2916 cb.aio_fildes = fd; 2917 cb.aio_lio_opcode = operations[op].opcode; 2918 cb.u.c.buf = buf; 2919 cb.u.c.nbytes = size; 2920 cb.u.c.offset = offset; 2921 2922 struct iocb *cbs[] = { &cb }; 2923 if (io_submit(aio_ctx_, 1, cbs) != 1) { 2924 int error = errno; 2925 char buf[256]; 2926 sat_strerror(error, buf, sizeof(buf)); 2927 logprintf(0, "Process Error: Unable to submit async %s " 2928 "on disk %s (thread %d). Error %d, %s\n", 2929 operations[op].op_str, device_name_.c_str(), 2930 thread_num_, error, buf); 2931 return false; 2932 } 2933 2934 struct io_event event; 2935 memset(&event, 0, sizeof(event)); 2936 struct timespec tv; 2937 tv.tv_sec = timeout / 1000000; 2938 tv.tv_nsec = (timeout % 1000000) * 1000; 2939 if (io_getevents(aio_ctx_, 1, 1, &event, &tv) != 1) { 2940 // A ctrl-c from the keyboard will cause io_getevents to fail with an 2941 // EINTR error code. This is not an error and so don't treat it as such, 2942 // but still log it. 2943 int error = errno; 2944 if (error == EINTR) { 2945 logprintf(5, "Log: %s interrupted on disk %s (thread %d).\n", 2946 operations[op].op_str, device_name_.c_str(), 2947 thread_num_); 2948 } else { 2949 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); 2950 errorcount_ += 1; 2951 logprintf(0, "Hardware Error: Timeout doing async %s to sectors " 2952 "starting at %lld on disk %s (thread %d).\n", 2953 operations[op].op_str, offset / kSectorSize, 2954 device_name_.c_str(), thread_num_); 2955 } 2956 2957 // Don't bother checking return codes since io_cancel seems to always fail. 2958 // Since io_cancel is always failing, destroying and recreating an I/O 2959 // context is a workaround for canceling an in-progress I/O operation. 2960 // TODO(amistry): Find out why io_cancel isn't working and make it work. 2961 io_cancel(aio_ctx_, &cb, &event); 2962 io_destroy(aio_ctx_); 2963 aio_ctx_ = 0; 2964 if (io_setup(5, &aio_ctx_)) { 2965 int error = errno; 2966 char buf[256]; 2967 sat_strerror(error, buf, sizeof(buf)); 2968 logprintf(0, "Process Error: Unable to create aio context on disk %s" 2969 " (thread %d) Error %d, %s\n", 2970 device_name_.c_str(), thread_num_, error, buf); 2971 } 2972 2973 return false; 2974 } 2975 2976 // event.res contains the number of bytes written/read or 2977 // error if < 0, I think. 2978 if (event.res != static_cast<uint64>(size)) { 2979 errorcount_++; 2980 os_->ErrorReport(device_name_.c_str(), operations[op].error_str, 1); 2981 2982 if (event.res < 0) { 2983 switch (event.res) { 2984 case -EIO: 2985 logprintf(0, "Hardware Error: Low-level I/O error while doing %s to " 2986 "sectors starting at %lld on disk %s (thread %d).\n", 2987 operations[op].op_str, offset / kSectorSize, 2988 device_name_.c_str(), thread_num_); 2989 break; 2990 default: 2991 logprintf(0, "Hardware Error: Unknown error while doing %s to " 2992 "sectors starting at %lld on disk %s (thread %d).\n", 2993 operations[op].op_str, offset / kSectorSize, 2994 device_name_.c_str(), thread_num_); 2995 } 2996 } else { 2997 logprintf(0, "Hardware Error: Unable to %s to sectors starting at " 2998 "%lld on disk %s (thread %d).\n", 2999 operations[op].op_str, offset / kSectorSize, 3000 device_name_.c_str(), thread_num_); 3001 } 3002 return false; 3003 } 3004 3005 return true; 3006 #else // !HAVE_LIBAIO_H 3007 return false; 3008 #endif 3009 } 3010 3011 // Write a block to disk. 3012 // Return false if the block is not written. 3013 bool DiskThread::WriteBlockToDisk(int fd, BlockData *block) { 3014 memset(block_buffer_, 0, block->GetSize()); 3015 3016 // Fill block buffer with a pattern 3017 struct page_entry pe; 3018 if (!sat_->GetValid(&pe)) { 3019 // Even though a valid page could not be obatined, it is not an error 3020 // since we can always fill in a pattern directly, albeit slower. 3021 unsigned int *memblock = static_cast<unsigned int *>(block_buffer_); 3022 block->SetPattern(patternlist_->GetRandomPattern()); 3023 3024 logprintf(11, "Log: Warning, using pattern fill fallback in " 3025 "DiskThread::WriteBlockToDisk on disk %s (thread %d).\n", 3026 device_name_.c_str(), thread_num_); 3027 3028 for (int i = 0; i < block->GetSize()/wordsize_; i++) { 3029 memblock[i] = block->GetPattern()->pattern(i); 3030 } 3031 } else { 3032 memcpy(block_buffer_, pe.addr, block->GetSize()); 3033 block->SetPattern(pe.pattern); 3034 sat_->PutValid(&pe); 3035 } 3036 3037 logprintf(12, "Log: Writing %lld sectors starting at %lld on disk %s" 3038 " (thread %d).\n", 3039 block->GetSize()/kSectorSize, block->GetAddress(), 3040 device_name_.c_str(), thread_num_); 3041 3042 int64 start_time = GetTime(); 3043 3044 if (!AsyncDiskIO(ASYNC_IO_WRITE, fd, block_buffer_, block->GetSize(), 3045 block->GetAddress() * kSectorSize, write_timeout_)) { 3046 return false; 3047 } 3048 3049 int64 end_time = GetTime(); 3050 logprintf(12, "Log: Writing time: %lld us (thread %d).\n", 3051 end_time - start_time, thread_num_); 3052 if (end_time - start_time > write_threshold_) { 3053 logprintf(5, "Log: Write took %lld us which is longer than threshold " 3054 "%lld us on disk %s (thread %d).\n", 3055 end_time - start_time, write_threshold_, device_name_.c_str(), 3056 thread_num_); 3057 } 3058 3059 return true; 3060 } 3061 3062 // Verify a block on disk. 3063 // Return true if the block was read, also increment errorcount 3064 // if the block had data errors or performance problems. 3065 bool DiskThread::ValidateBlockOnDisk(int fd, BlockData *block) { 3066 int64 blocks = block->GetSize() / read_block_size_; 3067 int64 bytes_read = 0; 3068 int64 current_blocks; 3069 int64 current_bytes; 3070 uint64 address = block->GetAddress(); 3071 3072 logprintf(20, "Log: Reading sectors starting at %lld on disk %s " 3073 "(thread %d).\n", 3074 address, device_name_.c_str(), thread_num_); 3075 3076 // Read block from disk and time the read. If it takes longer than the 3077 // threshold, complain. 3078 if (lseek64(fd, address * kSectorSize, SEEK_SET) == -1) { 3079 logprintf(0, "Process Error: Unable to seek to sector %lld in " 3080 "DiskThread::ValidateSectorsOnDisk on disk %s " 3081 "(thread %d).\n", address, device_name_.c_str(), thread_num_); 3082 return false; 3083 } 3084 int64 start_time = GetTime(); 3085 3086 // Split a large write-sized block into small read-sized blocks and 3087 // read them in groups of randomly-sized multiples of read block size. 3088 // This assures all data written on disk by this particular block 3089 // will be tested using a random reading pattern. 3090 while (blocks != 0) { 3091 // Test all read blocks in a written block. 3092 current_blocks = (random() % blocks) + 1; 3093 current_bytes = current_blocks * read_block_size_; 3094 3095 memset(block_buffer_, 0, current_bytes); 3096 3097 logprintf(20, "Log: Reading %lld sectors starting at sector %lld on " 3098 "disk %s (thread %d)\n", 3099 current_bytes / kSectorSize, 3100 (address * kSectorSize + bytes_read) / kSectorSize, 3101 device_name_.c_str(), thread_num_); 3102 3103 if (!AsyncDiskIO(ASYNC_IO_READ, fd, block_buffer_, current_bytes, 3104 address * kSectorSize + bytes_read, 3105 write_timeout_)) { 3106 return false; 3107 } 3108 3109 int64 end_time = GetTime(); 3110 logprintf(20, "Log: Reading time: %lld us (thread %d).\n", 3111 end_time - start_time, thread_num_); 3112 if (end_time - start_time > read_threshold_) { 3113 logprintf(5, "Log: Read took %lld us which is longer than threshold " 3114 "%lld us on disk %s (thread %d).\n", 3115 end_time - start_time, read_threshold_, 3116 device_name_.c_str(), thread_num_); 3117 } 3118 3119 // In non-destructive mode, don't compare the block to the pattern since 3120 // the block was never written to disk in the first place. 3121 if (!non_destructive_) { 3122 if (CheckRegion(block_buffer_, block->GetPattern(), current_bytes, 3123 0, bytes_read)) { 3124 os_->ErrorReport(device_name_.c_str(), "disk-pattern-error", 1); 3125 errorcount_ += 1; 3126 logprintf(0, "Hardware Error: Pattern mismatch in block starting at " 3127 "sector %lld in DiskThread::ValidateSectorsOnDisk on " 3128 "disk %s (thread %d).\n", 3129 address, device_name_.c_str(), thread_num_); 3130 } 3131 } 3132 3133 bytes_read += current_blocks * read_block_size_; 3134 blocks -= current_blocks; 3135 } 3136 3137 return true; 3138 } 3139 3140 // Direct device access thread. 3141 // Return false on software error. 3142 bool DiskThread::Work() { 3143 int fd; 3144 3145 logprintf(9, "Log: Starting disk thread %d, disk %s\n", 3146 thread_num_, device_name_.c_str()); 3147 3148 srandom(time(NULL)); 3149 3150 if (!OpenDevice(&fd)) { 3151 status_ = false; 3152 return false; 3153 } 3154 3155 // Allocate a block buffer aligned to 512 bytes since the kernel requires it 3156 // when using direct IO. 3157 #ifdef HAVE_POSIX_MEMALIGN 3158 int memalign_result = posix_memalign(&block_buffer_, kBufferAlignment, 3159 sat_->page_length()); 3160 #else 3161 block_buffer_ = memalign(kBufferAlignment, sat_->page_length()); 3162 int memalign_result = (block_buffer_ == 0); 3163 #endif 3164 if (memalign_result) { 3165 CloseDevice(fd); 3166 logprintf(0, "Process Error: Unable to allocate memory for buffers " 3167 "for disk %s (thread %d) posix memalign returned %d.\n", 3168 device_name_.c_str(), thread_num_, memalign_result); 3169 status_ = false; 3170 return false; 3171 } 3172 3173 #ifdef HAVE_LIBAIO_H 3174 if (io_setup(5, &aio_ctx_)) { 3175 CloseDevice(fd); 3176 logprintf(0, "Process Error: Unable to create aio context for disk %s" 3177 " (thread %d).\n", 3178 device_name_.c_str(), thread_num_); 3179 status_ = false; 3180 return false; 3181 } 3182 #endif 3183 3184 bool result = DoWork(fd); 3185 3186 status_ = result; 3187 3188 #ifdef HAVE_LIBAIO_H 3189 io_destroy(aio_ctx_); 3190 #endif 3191 CloseDevice(fd); 3192 3193 logprintf(9, "Log: Completed %d (disk %s): disk thread status %d, " 3194 "%d pages copied\n", 3195 thread_num_, device_name_.c_str(), status_, pages_copied_); 3196 return result; 3197 } 3198 3199 RandomDiskThread::RandomDiskThread(DiskBlockTable *block_table) 3200 : DiskThread(block_table) { 3201 update_block_table_ = 0; 3202 } 3203 3204 RandomDiskThread::~RandomDiskThread() { 3205 } 3206 3207 // Workload for random disk thread. 3208 bool RandomDiskThread::DoWork(int fd) { 3209 logprintf(11, "Log: Random phase for disk %s (thread %d).\n", 3210 device_name_.c_str(), thread_num_); 3211 while (IsReadyToRun()) { 3212 BlockData *block = block_table_->GetRandomBlock(); 3213 if (block == NULL) { 3214 logprintf(12, "Log: No block available for device %s (thread %d).\n", 3215 device_name_.c_str(), thread_num_); 3216 } else { 3217 ValidateBlockOnDisk(fd, block); 3218 block_table_->ReleaseBlock(block); 3219 blocks_read_++; 3220 } 3221 } 3222 pages_copied_ = blocks_read_; 3223 return true; 3224 } 3225 3226 MemoryRegionThread::MemoryRegionThread() { 3227 error_injection_ = false; 3228 pages_ = NULL; 3229 } 3230 3231 MemoryRegionThread::~MemoryRegionThread() { 3232 if (pages_ != NULL) 3233 delete pages_; 3234 } 3235 3236 // Set a region of memory or MMIO to be tested. 3237 // Return false if region could not be mapped. 3238 bool MemoryRegionThread::SetRegion(void *region, int64 size) { 3239 int plength = sat_->page_length(); 3240 int npages = size / plength; 3241 if (size % plength) { 3242 logprintf(0, "Process Error: region size is not a multiple of SAT " 3243 "page length\n"); 3244 return false; 3245 } else { 3246 if (pages_ != NULL) 3247 delete pages_; 3248 pages_ = new PageEntryQueue(npages); 3249 char *base_addr = reinterpret_cast<char*>(region); 3250 region_ = base_addr; 3251 for (int i = 0; i < npages; i++) { 3252 struct page_entry pe; 3253 init_pe(&pe); 3254 pe.addr = reinterpret_cast<void*>(base_addr + i * plength); 3255 pe.offset = i * plength; 3256 3257 pages_->Push(&pe); 3258 } 3259 return true; 3260 } 3261 } 3262 3263 // More detailed error printout for hardware errors in memory or MMIO 3264 // regions. 3265 void MemoryRegionThread::ProcessError(struct ErrorRecord *error, 3266 int priority, 3267 const char *message) { 3268 uint32 buffer_offset; 3269 if (phase_ == kPhaseCopy) { 3270 // If the error occurred on the Copy Phase, it means that 3271 // the source data (i.e., the main memory) is wrong. so 3272 // just pass it to the original ProcessError to call a 3273 // bad-dimm error 3274 WorkerThread::ProcessError(error, priority, message); 3275 } else if (phase_ == kPhaseCheck) { 3276 // A error on the Check Phase means that the memory region tested 3277 // has an error. Gathering more information and then reporting 3278 // the error. 3279 // Determine if this is a write or read error. 3280 os_->Flush(error->vaddr); 3281 error->reread = *(error->vaddr); 3282 char *good = reinterpret_cast<char*>(&(error->expected)); 3283 char *bad = reinterpret_cast<char*>(&(error->actual)); 3284 sat_assert(error->expected != error->actual); 3285 unsigned int offset = 0; 3286 for (offset = 0; offset < (sizeof(error->expected) - 1); offset++) { 3287 if (good[offset] != bad[offset]) 3288 break; 3289 } 3290 3291 error->vbyteaddr = reinterpret_cast<char*>(error->vaddr) + offset; 3292 3293 buffer_offset = error->vbyteaddr - region_; 3294 3295 // Find physical address if possible. 3296 error->paddr = os_->VirtualToPhysical(error->vbyteaddr); 3297 logprintf(priority, 3298 "%s: miscompare on %s, CRC check at %p(0x%llx), " 3299 "offset %llx: read:0x%016llx, reread:0x%016llx " 3300 "expected:0x%016llx\n", 3301 message, 3302 identifier_.c_str(), 3303 error->vaddr, 3304 error->paddr, 3305 buffer_offset, 3306 error->actual, 3307 error->reread, 3308 error->expected); 3309 } else { 3310 logprintf(0, "Process Error: memory region thread raised an " 3311 "unexpected error."); 3312 } 3313 } 3314 3315 // Workload for testion memory or MMIO regions. 3316 // Return false on software error. 3317 bool MemoryRegionThread::Work() { 3318 struct page_entry source_pe; 3319 struct page_entry memregion_pe; 3320 bool result = true; 3321 int64 loops = 0; 3322 const uint64 error_constant = 0x00ba00000000ba00LL; 3323 3324 // For error injection. 3325 int64 *addr = 0x0; 3326 int offset = 0; 3327 int64 data = 0; 3328 3329 logprintf(9, "Log: Starting Memory Region thread %d\n", thread_num_); 3330 3331 while (IsReadyToRun()) { 3332 // Getting pages from SAT and queue. 3333 phase_ = kPhaseNoPhase; 3334 result = result && sat_->GetValid(&source_pe); 3335 if (!result) { 3336 logprintf(0, "Process Error: memory region thread failed to pop " 3337 "pages from SAT, bailing\n"); 3338 break; 3339 } 3340 3341 result = result && pages_->PopRandom(&memregion_pe); 3342 if (!result) { 3343 logprintf(0, "Process Error: memory region thread failed to pop " 3344 "pages from queue, bailing\n"); 3345 break; 3346 } 3347 3348 // Error injection for CRC copy. 3349 if ((sat_->error_injection() || error_injection_) && loops == 1) { 3350 addr = reinterpret_cast<int64*>(source_pe.addr); 3351 offset = random() % (sat_->page_length() / wordsize_); 3352 data = addr[offset]; 3353 addr[offset] = error_constant; 3354 } 3355 3356 // Copying SAT page into memory region. 3357 phase_ = kPhaseCopy; 3358 CrcCopyPage(&memregion_pe, &source_pe); 3359 memregion_pe.pattern = source_pe.pattern; 3360 3361 // Error injection for CRC Check. 3362 if ((sat_->error_injection() || error_injection_) && loops == 2) { 3363 addr = reinterpret_cast<int64*>(memregion_pe.addr); 3364 offset = random() % (sat_->page_length() / wordsize_); 3365 data = addr[offset]; 3366 addr[offset] = error_constant; 3367 } 3368 3369 // Checking page content in memory region. 3370 phase_ = kPhaseCheck; 3371 CrcCheckPage(&memregion_pe); 3372 3373 phase_ = kPhaseNoPhase; 3374 // Storing pages on their proper queues. 3375 result = result && sat_->PutValid(&source_pe); 3376 if (!result) { 3377 logprintf(0, "Process Error: memory region thread failed to push " 3378 "pages into SAT, bailing\n"); 3379 break; 3380 } 3381 result = result && pages_->Push(&memregion_pe); 3382 if (!result) { 3383 logprintf(0, "Process Error: memory region thread failed to push " 3384 "pages into queue, bailing\n"); 3385 break; 3386 } 3387 3388 if ((sat_->error_injection() || error_injection_) && 3389 loops >= 1 && loops <= 2) { 3390 addr[offset] = data; 3391 } 3392 3393 loops++; 3394 YieldSelf(); 3395 } 3396 3397 pages_copied_ = loops; 3398 status_ = result; 3399 logprintf(9, "Log: Completed %d: Memory Region thread. Status %d, %d " 3400 "pages checked\n", thread_num_, status_, pages_copied_); 3401 return result; 3402 } 3403