Home | History | Annotate | Download | only in src
      1 // Copyright 2006 Google Inc. All Rights Reserved.
      2 // Author: nsanders, menderico
      3 
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // os.cc : os and machine specific implementation
     17 // This file includes an abstracted interface
     18 // for linux-distro specific and HW specific
     19 // interfaces.
     20 
     21 #include "os.h"
     22 
     23 #include <errno.h>
     24 #include <fcntl.h>
     25 #include <linux/types.h>
     26 #include <malloc.h>
     27 #include <stdio.h>
     28 #include <stdlib.h>
     29 #include <string.h>
     30 #include <sys/mman.h>
     31 #include <sys/ioctl.h>
     32 #include <sys/time.h>
     33 #include <sys/types.h>
     34 #include <sys/ipc.h>
     35 #ifdef HAVE_SYS_SHM_H
     36 #include <sys/shm.h>
     37 #endif
     38 #include <unistd.h>
     39 
     40 #ifndef SHM_HUGETLB
     41 #define SHM_HUGETLB      04000  // remove when glibc defines it
     42 #endif
     43 
     44 #include <string>
     45 #include <list>
     46 
     47 // This file must work with autoconf on its public version,
     48 // so these includes are correct.
     49 #include "sattypes.h"
     50 #include "error_diag.h"
     51 
     52 // OsLayer initialization.
     53 OsLayer::OsLayer() {
     54   testmem_ = 0;
     55   testmemsize_ = 0;
     56   totalmemsize_ = 0;
     57   min_hugepages_bytes_ = 0;
     58   normal_mem_ = true;
     59   use_hugepages_ = false;
     60   use_posix_shm_ = false;
     61   dynamic_mapped_shmem_ = false;
     62   shmid_ = 0;
     63 
     64   time_initialized_ = 0;
     65 
     66   regionsize_ = 0;
     67   regioncount_ = 1;
     68   num_cpus_ = 0;
     69   num_nodes_ = 0;
     70   num_cpus_per_node_ = 0;
     71   error_diagnoser_ = 0;
     72   err_log_callback_ = 0;
     73   error_injection_ = false;
     74 
     75   void *pvoid = 0;
     76   address_mode_ = sizeof(pvoid) * 8;
     77 
     78   has_clflush_ = false;
     79   has_sse2_ = false;
     80 
     81   use_flush_page_cache_ = false;
     82 }
     83 
     84 // OsLayer cleanup.
     85 OsLayer::~OsLayer() {
     86   if (error_diagnoser_)
     87     delete error_diagnoser_;
     88 }
     89 
     90 // OsLayer initialization.
     91 bool OsLayer::Initialize() {
     92   time_initialized_ = time(NULL);
     93   // Detect asm support.
     94   GetFeatures();
     95 
     96   if (num_cpus_ == 0) {
     97     num_nodes_ = 1;
     98     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
     99     num_cpus_per_node_ = num_cpus_ / num_nodes_;
    100   }
    101   logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
    102   sat_assert(CPU_SETSIZE >= num_cpus_);
    103   cpu_sets_.resize(num_nodes_);
    104   cpu_sets_valid_.resize(num_nodes_);
    105   // Create error diagnoser.
    106   error_diagnoser_ = new ErrorDiag();
    107   if (!error_diagnoser_->set_os(this))
    108     return false;
    109   return true;
    110 }
    111 
    112 // Machine type detected. Can we implement all these functions correctly?
    113 bool OsLayer::IsSupported() {
    114   if (kOpenSource) {
    115     // There are no explicitly supported systems in open source version.
    116     return true;
    117   }
    118 
    119   // This is the default empty implementation.
    120   // SAT won't report full error information.
    121   return false;
    122 }
    123 
    124 int OsLayer::AddressMode() {
    125   // Detect 32/64 bit binary.
    126   void *pvoid = 0;
    127   return sizeof(pvoid) * 8;
    128 }
    129 
    130 // Translates user virtual to physical address.
    131 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
    132   // Needs platform specific implementation.
    133   return 0;
    134 }
    135 
    136 // Returns the HD device that contains this file.
    137 string OsLayer::FindFileDevice(string filename) {
    138   return "hdUnknown";
    139 }
    140 
    141 // Returns a list of locations corresponding to HD devices.
    142 list<string> OsLayer::FindFileDevices() {
    143   // No autodetection on unknown systems.
    144   list<string> locations;
    145   return locations;
    146 }
    147 
    148 
    149 // Get HW core features from cpuid instruction.
    150 void OsLayer::GetFeatures() {
    151 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    152   // CPUID features documented at:
    153   // http://www.sandpile.org/ia32/cpuid.htm
    154   int ax, bx, cx, dx;
    155   __asm__ __volatile__ (
    156       "cpuid": "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (1));
    157   has_clflush_ = (dx >> 19) & 1;
    158   has_sse2_ = (dx >> 26) & 1;
    159 
    160   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
    161             has_clflush_ ? "true" : "false",
    162             has_sse2_ ? "true" : "false");
    163 #elif defined(STRESSAPPTEST_CPU_PPC)
    164   // All PPC implementations have cache flush instructions.
    165   has_clflush_ = true;
    166 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    167 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
    168 #else
    169 #warning "Unsupported CPU type: unable to determine feature set."
    170 #endif
    171 }
    172 
    173 
    174 // Enable FlushPageCache to be functional instead of a NOP.
    175 void OsLayer::ActivateFlushPageCache(void) {
    176   logprintf(9, "Log: page cache will be flushed as needed\n");
    177   use_flush_page_cache_ = true;
    178 }
    179 
    180 // Flush the page cache to ensure reads come from the disk.
    181 bool OsLayer::FlushPageCache(void) {
    182   if (!use_flush_page_cache_)
    183     return true;
    184 
    185   // First, ask the kernel to write the cache to the disk.
    186   sync();
    187 
    188   // Second, ask the kernel to empty the cache by writing "1" to
    189   // "/proc/sys/vm/drop_caches".
    190   static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
    191   int dcfile = open(drop_caches_file, O_WRONLY);
    192   if (dcfile < 0) {
    193     int err = errno;
    194     string errtxt = ErrorString(err);
    195     logprintf(3, "Log: failed to open %s - err %d (%s)\n",
    196               drop_caches_file, err, errtxt.c_str());
    197     return false;
    198   }
    199 
    200   ssize_t bytes_written = write(dcfile, "1", 1);
    201   close(dcfile);
    202 
    203   if (bytes_written != 1) {
    204     int err = errno;
    205     string errtxt = ErrorString(err);
    206     logprintf(3, "Log: failed to write %s - err %d (%s)\n",
    207               drop_caches_file, err, errtxt.c_str());
    208     return false;
    209   }
    210   return true;
    211 }
    212 
    213 
    214 // We need to flush the cacheline here.
    215 void OsLayer::Flush(void *vaddr) {
    216   // Use the generic flush. This function is just so we can override
    217   // this if we are so inclined.
    218   if (has_clflush_)
    219     FastFlush(vaddr);
    220 }
    221 
    222 
    223 // Run C or ASM copy as appropriate..
    224 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
    225                               unsigned int size_in_bytes,
    226                               AdlerChecksum *checksum) {
    227   if (has_sse2_) {
    228     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
    229   } else {
    230     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
    231   }
    232 }
    233 
    234 
    235 // Translate user virtual to physical address.
    236 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
    237   char tmpbuf[256];
    238   snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
    239   snprintf(buf, len, "%s", tmpbuf);
    240   return 0;
    241 }
    242 
    243 
    244 // Classifies addresses according to "regions"
    245 // This isn't really implemented meaningfully here..
    246 int32 OsLayer::FindRegion(uint64 addr) {
    247   static bool warned = false;
    248 
    249   if (regionsize_ == 0) {
    250     regionsize_ = totalmemsize_ / 8;
    251     if (regionsize_ < 512 * kMegabyte)
    252       regionsize_ = 512 * kMegabyte;
    253     regioncount_ = totalmemsize_ / regionsize_;
    254     if (regioncount_ < 1) regioncount_ = 1;
    255   }
    256 
    257   int32 region_num = addr / regionsize_;
    258   if (region_num >= regioncount_) {
    259     if (!warned) {
    260         logprintf(0, "Log: region number %d exceeds region count %d\n",
    261                   region_num, regioncount_);
    262         warned = true;
    263     }
    264     region_num = region_num % regioncount_;
    265   }
    266   return region_num;
    267 }
    268 
    269 // Report which cores are associated with a given region.
    270 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
    271   sat_assert(region >= 0);
    272   region %= num_nodes_;
    273   if (!cpu_sets_valid_[region]) {
    274     CPU_ZERO(&cpu_sets_[region]);
    275     for (int i = 0; i < num_cpus_per_node_; ++i) {
    276       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
    277     }
    278     cpu_sets_valid_[region] = true;
    279     logprintf(5, "Log: Region %d mask 0x%s\n",
    280                  region, FindCoreMaskFormat(region).c_str());
    281   }
    282   return &cpu_sets_[region];
    283 }
    284 
    285 // Return cores associated with a given region in hex string.
    286 string OsLayer::FindCoreMaskFormat(int32 region) {
    287   cpu_set_t* mask = FindCoreMask(region);
    288   string format = cpuset_format(mask);
    289   if (format.size() < 8)
    290     format = string(8 - format.size(), '0') + format;
    291   return format;
    292 }
    293 
    294 // Report an error in an easily parseable way.
    295 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
    296   time_t now = time(NULL);
    297   int ttf = now - time_initialized_;
    298   logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
    299   return true;
    300 }
    301 
    302 // Read the number of hugepages out of the kernel interface in proc.
    303 int64 OsLayer::FindHugePages() {
    304   char buf[65] = "0";
    305 
    306   // This is a kernel interface to query the numebr of hugepages
    307   // available in the system.
    308   static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
    309   int hpfile = open(hugepages_info_file, O_RDONLY);
    310 
    311   ssize_t bytes_read = read(hpfile, buf, 64);
    312   close(hpfile);
    313 
    314   if (bytes_read <= 0) {
    315     logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
    316                   "read did not provide data\n");
    317     return 0;
    318   }
    319 
    320   if (bytes_read == 64) {
    321     logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
    322                  "is surprisingly large\n");
    323     return 0;
    324   }
    325 
    326   // Add a null termintation to be string safe.
    327   buf[bytes_read] = '\0';
    328   // Read the page count.
    329   int64 pages = strtoull(buf, NULL, 10);  // NOLINT
    330 
    331   return pages;
    332 }
    333 
    334 int64 OsLayer::FindFreeMemSize() {
    335   int64 size = 0;
    336   int64 minsize = 0;
    337   if (totalmemsize_ > 0)
    338     return totalmemsize_;
    339 
    340   int64 pages = sysconf(_SC_PHYS_PAGES);
    341   int64 avpages = sysconf(_SC_AVPHYS_PAGES);
    342   int64 pagesize = sysconf(_SC_PAGESIZE);
    343   int64 physsize = pages * pagesize;
    344   int64 avphyssize = avpages * pagesize;
    345 
    346   // Assume 2MB hugepages.
    347   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
    348 
    349   if ((pages == -1) || (pagesize == -1)) {
    350     logprintf(0, "Process Error: sysconf could not determine memory size.\n");
    351     return 0;
    352   }
    353 
    354   // We want to leave enough stuff for things to run.
    355   // If the user specified a minimum amount of memory to expect, require that.
    356   // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
    357   // If less than 2GB is present use 85% of what's available.
    358   // These are fairly arbitrary numbers that seem to work OK.
    359   //
    360   // TODO(nsanders): is there a more correct way to determine target
    361   // memory size?
    362   if (hugepagesize > 0 && min_hugepages_bytes_ > 0) {
    363     minsize = min_hugepages_bytes_;
    364   } else if (physsize < 2048LL * kMegabyte) {
    365     minsize = ((pages * 85) / 100) * pagesize;
    366   } else {
    367     minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
    368   }
    369 
    370   // Use hugepage sizing if available.
    371   if (hugepagesize > 0) {
    372     if (hugepagesize < minsize) {
    373       logprintf(0, "Procedural Error: Not enough hugepages. "
    374                    "%lldMB available < %lldMB required.\n",
    375                 hugepagesize / kMegabyte,
    376                 minsize / kMegabyte);
    377       // Require the calculated minimum amount of memory.
    378       size = minsize;
    379     } else {
    380       // Require that we get all hugepages.
    381       size = hugepagesize;
    382     }
    383   } else {
    384     // Require the calculated minimum amount of memory.
    385     size = minsize;
    386   }
    387 
    388   logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
    389                "Targeting %lld MB (%lld%%)\n",
    390             physsize / kMegabyte,
    391             avphyssize / kMegabyte,
    392             hugepagesize / kMegabyte,
    393             size / kMegabyte,
    394             size * 100 / physsize);
    395 
    396   totalmemsize_ = size;
    397   return size;
    398 }
    399 
    400 // Allocates all memory available.
    401 int64 OsLayer::AllocateAllMem() {
    402   int64 length = FindFreeMemSize();
    403   bool retval = AllocateTestMem(length, 0);
    404   if (retval)
    405     return length;
    406   else
    407     return 0;
    408 }
    409 
    410 // Allocate the target memory. This may be from malloc, hugepage pool
    411 // or other platform specific sources.
    412 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
    413   // Try hugepages first.
    414   void *buf = 0;
    415 
    416   sat_assert(length >= 0);
    417 
    418   if (paddr_base)
    419     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
    420               " ignore.\n", paddr_base);
    421 
    422   // Determine optimal memory allocation path.
    423   bool prefer_hugepages = false;
    424   bool prefer_posix_shm = false;
    425   bool prefer_dynamic_mapping = false;
    426 
    427   // Are there enough hugepages?
    428   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
    429   // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
    430   if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
    431     prefer_dynamic_mapping = true;
    432     prefer_posix_shm = true;
    433     logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
    434     logprintf(3, "Log: You may need to run "
    435                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
    436   } else if (hugepagesize >= length) {
    437     prefer_hugepages = true;
    438     logprintf(3, "Log: Prefer using hugepace allocation.\n");
    439   } else {
    440     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
    441   }
    442 
    443 #ifdef HAVE_SYS_SHM_H
    444   // Allocate hugepage mapped memory.
    445   if (prefer_hugepages) {
    446     do { // Allow break statement.
    447       int shmid;
    448       void *shmaddr;
    449 
    450       if ((shmid = shmget(2, length,
    451               SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
    452         int err = errno;
    453         string errtxt = ErrorString(err);
    454         logprintf(3, "Log: failed to allocate shared hugepage "
    455                       "object - err %d (%s)\n",
    456                   err, errtxt.c_str());
    457         logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
    458         break;
    459       }
    460 
    461       shmaddr = shmat(shmid, NULL, NULL);
    462       if (shmaddr == reinterpret_cast<void*>(-1)) {
    463         int err = errno;
    464         string errtxt = ErrorString(err);
    465         logprintf(0, "Log: failed to attach shared "
    466                      "hugepage object - err %d (%s).\n",
    467                   err, errtxt.c_str());
    468         if (shmctl(shmid, IPC_RMID, NULL) < 0) {
    469           int err = errno;
    470           string errtxt = ErrorString(err);
    471           logprintf(0, "Log: failed to remove shared "
    472                        "hugepage object - err %d (%s).\n",
    473                     err, errtxt.c_str());
    474         }
    475         break;
    476       }
    477       use_hugepages_ = true;
    478       shmid_ = shmid;
    479       buf = shmaddr;
    480       logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
    481                 shmid, shmaddr);
    482     } while (0);
    483   }
    484 
    485   if ((!use_hugepages_) && prefer_posix_shm) {
    486     do {
    487       int shm_object;
    488       void *shmaddr = NULL;
    489 
    490       shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
    491       if (shm_object < 0) {
    492         int err = errno;
    493         string errtxt = ErrorString(err);
    494         logprintf(3, "Log: failed to allocate shared "
    495                       "smallpage object - err %d (%s)\n",
    496                   err, errtxt.c_str());
    497         break;
    498       }
    499 
    500       if (0 > ftruncate(shm_object, length)) {
    501         int err = errno;
    502         string errtxt = ErrorString(err);
    503         logprintf(3, "Log: failed to ftruncate shared "
    504                       "smallpage object - err %d (%s)\n",
    505                   err, errtxt.c_str());
    506         break;
    507       }
    508 
    509       // 32 bit linux apps can only use ~1.4G of address space.
    510       // Use dynamic mapping for allocations larger than that.
    511       // Currently perf hit is ~10% for this.
    512       if (prefer_dynamic_mapping) {
    513         dynamic_mapped_shmem_ = true;
    514       } else {
    515         // Do a full mapping here otherwise.
    516         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
    517                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    518                          shm_object, NULL);
    519         if (shmaddr == reinterpret_cast<void*>(-1)) {
    520           int err = errno;
    521           string errtxt = ErrorString(err);
    522           logprintf(0, "Log: failed to map shared "
    523                        "smallpage object - err %d (%s).\n",
    524                     err, errtxt.c_str());
    525           break;
    526         }
    527       }
    528 
    529       use_posix_shm_ = true;
    530       shmid_ = shm_object;
    531       buf = shmaddr;
    532       char location_message[256] = "";
    533       if (dynamic_mapped_shmem_) {
    534         sprintf(location_message, "mapped as needed");
    535       } else {
    536         sprintf(location_message, "at %p", shmaddr);
    537       }
    538       logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
    539                 shm_object, location_message);
    540     } while (0);
    541     shm_unlink("/stressapptest");
    542   }
    543 #endif // HAVE_SYS_SHM_H
    544 
    545   if (!use_hugepages_ && !use_posix_shm_) {
    546     // Use memalign to ensure that blocks are aligned enough for disk direct IO.
    547     buf = static_cast<char*>(memalign(4096, length));
    548     if (buf) {
    549       logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
    550     } else {
    551       logprintf(0, "Process Error: memalign returned 0\n");
    552       if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
    553         logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
    554                      "bit process. Please setup shared memory.\n");
    555       }
    556     }
    557   }
    558 
    559   testmem_ = buf;
    560   if (buf || dynamic_mapped_shmem_) {
    561     testmemsize_ = length;
    562   } else {
    563     testmemsize_ = 0;
    564   }
    565 
    566   return (buf != 0) || dynamic_mapped_shmem_;
    567 }
    568 
    569 // Free the test memory.
    570 void OsLayer::FreeTestMem() {
    571   if (testmem_) {
    572     if (use_hugepages_) {
    573 #ifdef HAVE_SYS_SHM_H
    574       shmdt(testmem_);
    575       shmctl(shmid_, IPC_RMID, NULL);
    576 #endif
    577     } else if (use_posix_shm_) {
    578       if (!dynamic_mapped_shmem_) {
    579         munmap(testmem_, testmemsize_);
    580       }
    581       close(shmid_);
    582     } else {
    583       free(testmem_);
    584     }
    585     testmem_ = 0;
    586     testmemsize_ = 0;
    587   }
    588 }
    589 
    590 
    591 // Prepare the target memory. It may requre mapping in, or this may be a noop.
    592 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
    593   sat_assert((offset + length) <= testmemsize_);
    594   if (dynamic_mapped_shmem_) {
    595     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
    596     // and evaluate performance hit from not using it.
    597 #ifdef HAVE_MMAP64
    598     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
    599                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    600                      shmid_, offset);
    601 #else
    602     void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
    603                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    604                      shmid_, offset);
    605 #endif
    606     if (mapping == MAP_FAILED) {
    607       string errtxt = ErrorString(errno);
    608       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
    609                    "error: %s.\n",
    610                 offset, length, errtxt.c_str());
    611       sat_assert(0);
    612     }
    613     return mapping;
    614   }
    615 
    616   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
    617 }
    618 
    619 // Release the test memory resources, if any.
    620 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
    621   if (dynamic_mapped_shmem_) {
    622     int retval = munmap(addr, length);
    623     if (retval == -1) {
    624       string errtxt = ErrorString(errno);
    625       logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
    626                    "error: %s.\n",
    627                 addr, length, errtxt.c_str());
    628       sat_assert(0);
    629     }
    630   }
    631 }
    632 
    633 // No error polling on unknown systems.
    634 int OsLayer::ErrorPoll() {
    635   return 0;
    636 }
    637 
    638 // Generally, poll for errors once per second.
    639 void OsLayer::ErrorWait() {
    640   sat_sleep(1);
    641   return;
    642 }
    643 
    644 // Open a PCI bus-dev-func as a file and return its file descriptor.
    645 // Error is indicated by return value less than zero.
    646 int OsLayer::PciOpen(int bus, int device, int function) {
    647   char dev_file[256];
    648 
    649   snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
    650            bus, device, function);
    651 
    652   int fd = open(dev_file, O_RDWR);
    653   if (fd == -1) {
    654     logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
    655                  "function %d (errno %d).\n",
    656               bus, device, function, errno);
    657     return -1;
    658   }
    659 
    660   return fd;
    661 }
    662 
    663 
    664 // Read and write functions to access PCI config.
    665 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
    666   // Strict aliasing rules lawyers will cause data corruption
    667   // on cast pointers in some gccs.
    668   union {
    669     uint32 l32;
    670     uint16 l16;
    671     uint8 l8;
    672   } datacast;
    673   datacast.l32 = 0;
    674   uint32 size = width / 8;
    675 
    676   sat_assert((width == 32) || (width == 16) || (width == 8));
    677   sat_assert(offset <= (256 - size));
    678 
    679   if (lseek(fd, offset, SEEK_SET) < 0) {
    680     logprintf(0, "Process Error: Can't seek %x\n", offset);
    681     return 0;
    682   }
    683   if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
    684     logprintf(0, "Process Error: Can't read %x\n", offset);
    685     return 0;
    686   }
    687 
    688   // Extract the data.
    689   switch (width) {
    690     case 8:
    691       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
    692       return datacast.l8;
    693     case 16:
    694       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
    695       return datacast.l16;
    696     case 32:
    697       return datacast.l32;
    698   }
    699   return 0;
    700 }
    701 
    702 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
    703   // Strict aliasing rules lawyers will cause data corruption
    704   // on cast pointers in some gccs.
    705   union {
    706     uint32 l32;
    707     uint16 l16;
    708     uint8 l8;
    709   } datacast;
    710   datacast.l32 = 0;
    711   uint32 size = width / 8;
    712 
    713   sat_assert((width == 32) || (width == 16) || (width == 8));
    714   sat_assert(offset <= (256 - size));
    715 
    716   // Cram the data into the right alignment.
    717   switch (width) {
    718     case 8:
    719       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
    720       datacast.l8 = value;
    721     case 16:
    722       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
    723       datacast.l16 = value;
    724     case 32:
    725       datacast.l32 = value;
    726   }
    727 
    728   if (lseek(fd, offset, SEEK_SET) < 0) {
    729     logprintf(0, "Process Error: Can't seek %x\n", offset);
    730     return;
    731   }
    732   if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
    733     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
    734     return;
    735   }
    736 
    737   return;
    738 }
    739 
    740 
    741 
    742 // Open dev msr.
    743 int OsLayer::OpenMSR(uint32 core, uint32 address) {
    744   char buf[256];
    745   snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
    746   int fd = open(buf, O_RDWR);
    747   if (fd < 0)
    748     return fd;
    749 
    750   uint32 pos = lseek(fd, address, SEEK_SET);
    751   if (pos != address) {
    752     close(fd);
    753     logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
    754     return -1;
    755   }
    756 
    757   return fd;
    758 }
    759 
    760 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
    761   int fd = OpenMSR(core, address);
    762   if (fd < 0)
    763     return false;
    764 
    765   // Read from the msr.
    766   bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
    767 
    768   if (!res)
    769     logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
    770 
    771   close(fd);
    772 
    773   return res;
    774 }
    775 
    776 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
    777   int fd = OpenMSR(core, address);
    778   if (fd < 0)
    779     return false;
    780 
    781   // Write to the msr
    782   bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
    783 
    784   if (!res)
    785     logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
    786 
    787   close(fd);
    788 
    789   return res;
    790 }
    791 
    792 // Extract bits [n+len-1, n] from a 32 bit word.
    793 // so GetBitField(0x0f00, 8, 4) == 0xf.
    794 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
    795   return (val >> n) & ((1<<len) - 1);
    796 }
    797 
    798 // Generic CPU stress workload that would work on any CPU/Platform.
    799 // Float-point array moving average calculation.
    800 bool OsLayer::CpuStressWorkload() {
    801   double float_arr[100];
    802   double sum = 0;
    803   unsigned int seed = 12345;
    804 
    805   // Initialize array with random numbers.
    806   for (int i = 0; i < 100; i++) {
    807 #ifdef HAVE_RAND_R
    808     float_arr[i] = rand_r(&seed);
    809     if (rand_r(&seed) % 2)
    810       float_arr[i] *= -1.0;
    811 #else
    812     float_arr[i] = rand();
    813     if (rand() % 2)
    814       float_arr[i] *= -1.0;
    815 #endif
    816   }
    817 
    818   // Calculate moving average.
    819   for (int i = 0; i < 100000000; i++) {
    820     float_arr[i % 100] =
    821       (float_arr[i % 100] + float_arr[(i + 1) % 100] +
    822        float_arr[(i + 99) % 100]) / 3;
    823     sum += float_arr[i % 100];
    824   }
    825 
    826   // Artificial printf so the loops do not get optimized away.
    827   if (sum == 0.0)
    828     logprintf(12, "Log: I'm Feeling Lucky!\n");
    829   return true;
    830 }
    831 
    832 PCIDevices OsLayer::GetPCIDevices() {
    833   PCIDevices device_list;
    834   DIR *dir;
    835   struct dirent *buf = new struct dirent();
    836   struct dirent *entry;
    837   dir = opendir(kSysfsPath);
    838   if (!dir)
    839     logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
    840   while (readdir_r(dir, buf, &entry) == 0 && entry) {
    841     PCIDevice *device;
    842     unsigned int dev, func;
    843     // ".", ".." or a special non-device perhaps.
    844     if (entry->d_name[0] == '.')
    845       continue;
    846 
    847     device = new PCIDevice();
    848     if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
    849                &device->domain, &device->bus, &dev, &func) < 4) {
    850       logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
    851       free(device);
    852       continue;
    853     }
    854     device->dev = dev;
    855     device->func = func;
    856     device->vendor_id = PCIGetValue(entry->d_name, "vendor");
    857     device->device_id = PCIGetValue(entry->d_name, "device");
    858     PCIGetResources(entry->d_name, device);
    859     device_list.insert(device_list.end(), device);
    860   }
    861   closedir(dir);
    862   delete buf;
    863   return device_list;
    864 }
    865 
    866 int OsLayer::PCIGetValue(string name, string object) {
    867   int fd, len;
    868   char filename[256];
    869   char buf[256];
    870   snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
    871            name.c_str(), object.c_str());
    872   fd = open(filename, O_RDONLY);
    873   if (fd < 0)
    874     return 0;
    875   len = read(fd, buf, 256);
    876   close(fd);
    877   buf[len] = '\0';
    878   return strtol(buf, NULL, 0);  // NOLINT
    879 }
    880 
    881 int OsLayer::PCIGetResources(string name, PCIDevice *device) {
    882   char filename[256];
    883   char buf[256];
    884   FILE *file;
    885   int64 start;
    886   int64 end;
    887   int64 size;
    888   int i;
    889   snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
    890            name.c_str(), "resource");
    891   file = fopen(filename, "r");
    892   if (!file) {
    893     logprintf(0, "Process Error: impossible to find resource file for %s",
    894               filename);
    895     return errno;
    896   }
    897   for (i = 0; i < 6; i++) {
    898     if (!fgets(buf, 256, file))
    899       break;
    900     sscanf(buf, "%llx %llx", &start, &end);  // NOLINT
    901     size = 0;
    902     if (start)
    903       size = end - start + 1;
    904     device->base_addr[i] = start;
    905     device->size[i] = size;
    906   }
    907   fclose(file);
    908   return 0;
    909 }
    910