Home | History | Annotate | Download | only in src
      1 // Copyright 2006 Google Inc. All Rights Reserved.
      2 // Author: nsanders, menderico
      3 
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 // os.cc : os and machine specific implementation
     17 // This file includes an abstracted interface
     18 // for linux-distro specific and HW specific
     19 // interfaces.
     20 
     21 #include "os.h"
     22 
     23 #include <errno.h>
     24 #include <fcntl.h>
     25 #include <linux/types.h>
     26 #include <malloc.h>
     27 #include <stdio.h>
     28 #include <stdlib.h>
     29 #include <string.h>
     30 #include <sys/mman.h>
     31 #include <sys/ioctl.h>
     32 #include <sys/time.h>
     33 #include <sys/types.h>
     34 #include <sys/ipc.h>
     35 #ifdef HAVE_SYS_SHM_H
     36 #include <sys/shm.h>
     37 #endif
     38 #include <unistd.h>
     39 
     40 #ifndef SHM_HUGETLB
     41 #define SHM_HUGETLB      04000  // remove when glibc defines it
     42 #endif
     43 
     44 #include <string>
     45 #include <list>
     46 
     47 // This file must work with autoconf on its public version,
     48 // so these includes are correct.
     49 #include "sattypes.h"
     50 #include "error_diag.h"
     51 #include "clock.h"
     52 
     53 // OsLayer initialization.
     54 OsLayer::OsLayer() {
     55   testmem_ = 0;
     56   testmemsize_ = 0;
     57   totalmemsize_ = 0;
     58   min_hugepages_bytes_ = 0;
     59   reserve_mb_ = 0;
     60   normal_mem_ = true;
     61   use_hugepages_ = false;
     62   use_posix_shm_ = false;
     63   dynamic_mapped_shmem_ = false;
     64   mmapped_allocation_ = false;
     65   shmid_ = 0;
     66 
     67   time_initialized_ = 0;
     68 
     69   regionsize_ = 0;
     70   regioncount_ = 1;
     71   num_cpus_ = 0;
     72   num_nodes_ = 0;
     73   num_cpus_per_node_ = 0;
     74   error_diagnoser_ = 0;
     75   err_log_callback_ = 0;
     76   error_injection_ = false;
     77 
     78   void *pvoid = 0;
     79   address_mode_ = sizeof(pvoid) * 8;
     80 
     81   has_clflush_ = false;
     82   has_vector_ = false;
     83 
     84   use_flush_page_cache_ = false;
     85 
     86   clock_ = NULL;
     87 }
     88 
     89 // OsLayer cleanup.
     90 OsLayer::~OsLayer() {
     91   if (error_diagnoser_)
     92     delete error_diagnoser_;
     93   if (clock_)
     94     delete clock_;
     95 }
     96 
     97 // OsLayer initialization.
     98 bool OsLayer::Initialize() {
     99   if (!clock_) {
    100     clock_ = new Clock();
    101   }
    102 
    103   time_initialized_ = clock_->Now();
    104   // Detect asm support.
    105   GetFeatures();
    106 
    107   if (num_cpus_ == 0) {
    108     num_nodes_ = 1;
    109     num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
    110     num_cpus_per_node_ = num_cpus_ / num_nodes_;
    111   }
    112   logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
    113   sat_assert(CPU_SETSIZE >= num_cpus_);
    114   cpu_sets_.resize(num_nodes_);
    115   cpu_sets_valid_.resize(num_nodes_);
    116   // Create error diagnoser.
    117   error_diagnoser_ = new ErrorDiag();
    118   if (!error_diagnoser_->set_os(this))
    119     return false;
    120   return true;
    121 }
    122 
    123 // Machine type detected. Can we implement all these functions correctly?
    124 bool OsLayer::IsSupported() {
    125   if (kOpenSource) {
    126     // There are no explicitly supported systems in open source version.
    127     return true;
    128   }
    129 
    130   // This is the default empty implementation.
    131   // SAT won't report full error information.
    132   return false;
    133 }
    134 
    135 int OsLayer::AddressMode() {
    136   // Detect 32/64 bit binary.
    137   void *pvoid = 0;
    138   return sizeof(pvoid) * 8;
    139 }
    140 
    141 // Translates user virtual to physical address.
    142 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
    143   uint64 frame, shift;
    144   off64_t off = ((uintptr_t)vaddr) / sysconf(_SC_PAGESIZE) * 8;
    145   int fd = open(kPagemapPath, O_RDONLY);
    146   // /proc/self/pagemap is available in kernel >= 2.6.25
    147   if (fd < 0)
    148     return 0;
    149 
    150   if (lseek64(fd, off, SEEK_SET) != off || read(fd, &frame, 8) != 8) {
    151     int err = errno;
    152     string errtxt = ErrorString(err);
    153     logprintf(0, "Process Error: failed to access %s with errno %d (%s)\n",
    154               kPagemapPath, err, errtxt.c_str());
    155     if (fd >= 0)
    156       close(fd);
    157     return 0;
    158   }
    159   close(fd);
    160   if (!(frame & (1LL << 63)) || (frame & (1LL << 62)))
    161     return 0;
    162   shift = (frame >> 55) & 0x3f;
    163   frame = (frame & 0x007fffffffffffffLL) << shift;
    164   return frame | ((uintptr_t)vaddr & ((1LL << shift) - 1));
    165 }
    166 
    167 // Returns the HD device that contains this file.
    168 string OsLayer::FindFileDevice(string filename) {
    169   return "hdUnknown";
    170 }
    171 
    172 // Returns a list of locations corresponding to HD devices.
    173 list<string> OsLayer::FindFileDevices() {
    174   // No autodetection on unknown systems.
    175   list<string> locations;
    176   return locations;
    177 }
    178 
    179 
    180 // Get HW core features from cpuid instruction.
    181 void OsLayer::GetFeatures() {
    182 #if defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    183   unsigned int eax = 1, ebx, ecx, edx;
    184   cpuid(&eax, &ebx, &ecx, &edx);
    185   has_clflush_ = (edx >> 19) & 1;
    186   has_vector_ = (edx >> 26) & 1;  // SSE2 caps bit.
    187 
    188   logprintf(9, "Log: has clflush: %s, has sse2: %s\n",
    189             has_clflush_ ? "true" : "false",
    190             has_vector_ ? "true" : "false");
    191 #elif defined(STRESSAPPTEST_CPU_PPC)
    192   // All PPC implementations have cache flush instructions.
    193   has_clflush_ = true;
    194 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    195   // TODO(nsanders): add detect from /proc/cpuinfo or /proc/self/auxv.
    196   // For now assume neon and don't run -W if you don't have it.
    197   has_vector_ = true; // NEON.
    198 #warning "Unsupported CPU type ARMV7A: unable to determine feature set."
    199 #else
    200 #warning "Unsupported CPU type: unable to determine feature set."
    201 #endif
    202 }
    203 
    204 
    205 // Enable FlushPageCache to be functional instead of a NOP.
    206 void OsLayer::ActivateFlushPageCache(void) {
    207   logprintf(9, "Log: page cache will be flushed as needed\n");
    208   use_flush_page_cache_ = true;
    209 }
    210 
    211 // Flush the page cache to ensure reads come from the disk.
    212 bool OsLayer::FlushPageCache(void) {
    213   if (!use_flush_page_cache_)
    214     return true;
    215 
    216   // First, ask the kernel to write the cache to the disk.
    217   sync();
    218 
    219   // Second, ask the kernel to empty the cache by writing "1" to
    220   // "/proc/sys/vm/drop_caches".
    221   static const char *drop_caches_file = "/proc/sys/vm/drop_caches";
    222   int dcfile = open(drop_caches_file, O_WRONLY);
    223   if (dcfile < 0) {
    224     int err = errno;
    225     string errtxt = ErrorString(err);
    226     logprintf(3, "Log: failed to open %s - err %d (%s)\n",
    227               drop_caches_file, err, errtxt.c_str());
    228     return false;
    229   }
    230 
    231   ssize_t bytes_written = write(dcfile, "1", 1);
    232   close(dcfile);
    233 
    234   if (bytes_written != 1) {
    235     int err = errno;
    236     string errtxt = ErrorString(err);
    237     logprintf(3, "Log: failed to write %s - err %d (%s)\n",
    238               drop_caches_file, err, errtxt.c_str());
    239     return false;
    240   }
    241   return true;
    242 }
    243 
    244 
    245 // We need to flush the cacheline here.
    246 void OsLayer::Flush(void *vaddr) {
    247   // Use the generic flush. This function is just so we can override
    248   // this if we are so inclined.
    249   if (has_clflush_) {
    250     OsLayer::FastFlush(vaddr);
    251   }
    252 }
    253 
    254 
    255 // Run C or ASM copy as appropriate..
    256 bool OsLayer::AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
    257                               unsigned int size_in_bytes,
    258                               AdlerChecksum *checksum) {
    259   if (has_vector_) {
    260     return AdlerMemcpyAsm(dstmem, srcmem, size_in_bytes, checksum);
    261   } else {
    262     return AdlerMemcpyWarmC(dstmem, srcmem, size_in_bytes, checksum);
    263   }
    264 }
    265 
    266 
    267 // Translate physical address to memory module/chip name.
    268 // Assumes interleaving between two memory channels based on the XOR of
    269 // all address bits in the 'channel_hash' mask, with repeated 'channel_width_'
    270 // blocks with bits distributed from each chip in that channel.
    271 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
    272   if (!channels_) {
    273     snprintf(buf, len, "DIMM Unknown");
    274     return -1;
    275   }
    276 
    277   // Find channel by XORing address bits in channel_hash mask.
    278   uint32 low = static_cast<uint32>(addr & channel_hash_);
    279   uint32 high = static_cast<uint32>((addr & channel_hash_) >> 32);
    280   vector<string>& channel = (*channels_)[
    281       __builtin_parity(high) ^ __builtin_parity(low)];
    282 
    283   // Find dram chip by finding which byte within the channel
    284   // by address mod channel width, then divide the channel
    285   // evenly among the listed dram chips. Note, this will not work
    286   // with x4 dram.
    287   int chip = (addr % (channel_width_ / 8)) /
    288              ((channel_width_ / 8) / channel.size());
    289   string name = channel[chip];
    290   snprintf(buf, len, "%s", name.c_str());
    291   return 1;
    292 }
    293 
    294 
    295 // Classifies addresses according to "regions"
    296 // This isn't really implemented meaningfully here..
    297 int32 OsLayer::FindRegion(uint64 addr) {
    298   static bool warned = false;
    299 
    300   if (regionsize_ == 0) {
    301     regionsize_ = totalmemsize_ / 8;
    302     if (regionsize_ < 512 * kMegabyte)
    303       regionsize_ = 512 * kMegabyte;
    304     regioncount_ = totalmemsize_ / regionsize_;
    305     if (regioncount_ < 1) regioncount_ = 1;
    306   }
    307 
    308   int32 region_num = addr / regionsize_;
    309   if (region_num >= regioncount_) {
    310     if (!warned) {
    311         logprintf(0, "Log: region number %d exceeds region count %d\n",
    312                   region_num, regioncount_);
    313         warned = true;
    314     }
    315     region_num = region_num % regioncount_;
    316   }
    317   return region_num;
    318 }
    319 
    320 // Report which cores are associated with a given region.
    321 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
    322   sat_assert(region >= 0);
    323   region %= num_nodes_;
    324   if (!cpu_sets_valid_[region]) {
    325     CPU_ZERO(&cpu_sets_[region]);
    326     for (int i = 0; i < num_cpus_per_node_; ++i) {
    327       CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
    328     }
    329     cpu_sets_valid_[region] = true;
    330     logprintf(5, "Log: Region %d mask 0x%s\n",
    331                  region, FindCoreMaskFormat(region).c_str());
    332   }
    333   return &cpu_sets_[region];
    334 }
    335 
    336 // Return cores associated with a given region in hex string.
    337 string OsLayer::FindCoreMaskFormat(int32 region) {
    338   cpu_set_t* mask = FindCoreMask(region);
    339   string format = cpuset_format(mask);
    340   if (format.size() < 8)
    341     format = string(8 - format.size(), '0') + format;
    342   return format;
    343 }
    344 
    345 // Report an error in an easily parseable way.
    346 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
    347   time_t now = clock_->Now();
    348   int ttf = now - time_initialized_;
    349   if (strlen(symptom) && strlen(part)) {
    350     logprintf(0, "Report Error: %s : %s : %d : %ds\n",
    351               symptom, part, count, ttf);
    352   } else {
    353     // Log something so the error still shows up, but this won't break the
    354     // parser.
    355     logprintf(0, "Warning: Invalid Report Error: "
    356               "%s : %s : %d : %ds\n", symptom, part, count, ttf);
    357   }
    358   return true;
    359 }
    360 
    361 // Read the number of hugepages out of the kernel interface in proc.
    362 int64 OsLayer::FindHugePages() {
    363   char buf[65] = "0";
    364 
    365   // This is a kernel interface to query the numebr of hugepages
    366   // available in the system.
    367   static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
    368   int hpfile = open(hugepages_info_file, O_RDONLY);
    369 
    370   ssize_t bytes_read = read(hpfile, buf, 64);
    371   close(hpfile);
    372 
    373   if (bytes_read <= 0) {
    374     logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
    375                   "read did not provide data\n");
    376     return 0;
    377   }
    378 
    379   if (bytes_read == 64) {
    380     logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
    381                  "is surprisingly large\n");
    382     return 0;
    383   }
    384 
    385   // Add a null termintation to be string safe.
    386   buf[bytes_read] = '\0';
    387   // Read the page count.
    388   int64 pages = strtoull(buf, NULL, 10);  // NOLINT
    389 
    390   return pages;
    391 }
    392 
    393 int64 OsLayer::FindFreeMemSize() {
    394   int64 size = 0;
    395   int64 minsize = 0;
    396   if (totalmemsize_ > 0)
    397     return totalmemsize_;
    398 
    399   int64 pages = sysconf(_SC_PHYS_PAGES);
    400   int64 avpages = sysconf(_SC_AVPHYS_PAGES);
    401   int64 pagesize = sysconf(_SC_PAGESIZE);
    402   int64 physsize = pages * pagesize;
    403   int64 avphyssize = avpages * pagesize;
    404 
    405   // Assume 2MB hugepages.
    406   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
    407 
    408   if ((pages == -1) || (pagesize == -1)) {
    409     logprintf(0, "Process Error: sysconf could not determine memory size.\n");
    410     return 0;
    411   }
    412 
    413   // We want to leave enough stuff for things to run.
    414   // If the user specified a minimum amount of memory to expect, require that.
    415   // Otherwise, if more than 2GB is present, leave 192M + 5% for other stuff.
    416   // If less than 2GB is present use 85% of what's available.
    417   // These are fairly arbitrary numbers that seem to work OK.
    418   //
    419   // TODO(nsanders): is there a more correct way to determine target
    420   // memory size?
    421   if (hugepagesize > 0) {
    422     if (min_hugepages_bytes_ > 0) {
    423       minsize = min_hugepages_bytes_;
    424     } else {
    425       minsize = hugepagesize;
    426     }
    427   } else {
    428     if (physsize < 2048LL * kMegabyte) {
    429       minsize = ((pages * 85) / 100) * pagesize;
    430     } else {
    431       minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
    432     }
    433     // Make sure that at least reserve_mb_ is left for the system.
    434     if (reserve_mb_ > 0) {
    435       int64 totalsize = pages * pagesize;
    436       int64 reserve_kb = reserve_mb_ * kMegabyte;
    437       if (reserve_kb > totalsize) {
    438         logprintf(0, "Procedural Error: %lld is bigger than the total memory "
    439                   "available %lld\n", reserve_kb, totalsize);
    440       } else if (reserve_kb > totalsize - minsize) {
    441         logprintf(5, "Warning: Overriding memory to use: original %lld, "
    442                   "current %lld\n", minsize, totalsize - reserve_kb);
    443         minsize = totalsize - reserve_kb;
    444       }
    445     }
    446   }
    447 
    448   // Use hugepage sizing if available.
    449   if (hugepagesize > 0) {
    450     if (hugepagesize < minsize) {
    451       logprintf(0, "Procedural Error: Not enough hugepages. "
    452                    "%lldMB available < %lldMB required.\n",
    453                 hugepagesize / kMegabyte,
    454                 minsize / kMegabyte);
    455       // Require the calculated minimum amount of memory.
    456       size = minsize;
    457     } else {
    458       // Require that we get all hugepages.
    459       size = hugepagesize;
    460     }
    461   } else {
    462     // Require the calculated minimum amount of memory.
    463     size = minsize;
    464   }
    465 
    466   logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
    467                "Targeting %lld MB (%lld%%)\n",
    468             physsize / kMegabyte,
    469             avphyssize / kMegabyte,
    470             hugepagesize / kMegabyte,
    471             size / kMegabyte,
    472             size * 100 / physsize);
    473 
    474   totalmemsize_ = size;
    475   return size;
    476 }
    477 
    478 // Allocates all memory available.
    479 int64 OsLayer::AllocateAllMem() {
    480   int64 length = FindFreeMemSize();
    481   bool retval = AllocateTestMem(length, 0);
    482   if (retval)
    483     return length;
    484   else
    485     return 0;
    486 }
    487 
    488 // Allocate the target memory. This may be from malloc, hugepage pool
    489 // or other platform specific sources.
    490 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
    491   // Try hugepages first.
    492   void *buf = 0;
    493 
    494   sat_assert(length >= 0);
    495 
    496   if (paddr_base)
    497     logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
    498               " ignore.\n", paddr_base);
    499 
    500   // Determine optimal memory allocation path.
    501   bool prefer_hugepages = false;
    502   bool prefer_posix_shm = false;
    503   bool prefer_dynamic_mapping = false;
    504 
    505   // Are there enough hugepages?
    506   int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
    507   // TODO(nsanders): Is there enough /dev/shm? Is there enough free memeory?
    508   if ((length >= 1400LL * kMegabyte) && (address_mode_ == 32)) {
    509     prefer_dynamic_mapping = true;
    510     prefer_posix_shm = true;
    511     logprintf(3, "Log: Prefer POSIX shared memory allocation.\n");
    512     logprintf(3, "Log: You may need to run "
    513                  "'sudo mount -o remount,size=100\% /dev/shm.'\n");
    514   } else if (hugepagesize >= length) {
    515     prefer_hugepages = true;
    516     logprintf(3, "Log: Prefer using hugepage allocation.\n");
    517   } else {
    518     logprintf(3, "Log: Prefer plain malloc memory allocation.\n");
    519   }
    520 
    521 #ifdef HAVE_SYS_SHM_H
    522   // Allocate hugepage mapped memory.
    523   if (prefer_hugepages) {
    524     do { // Allow break statement.
    525       int shmid;
    526       void *shmaddr;
    527 
    528       if ((shmid = shmget(2, length,
    529               SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
    530         int err = errno;
    531         string errtxt = ErrorString(err);
    532         logprintf(3, "Log: failed to allocate shared hugepage "
    533                       "object - err %d (%s)\n",
    534                   err, errtxt.c_str());
    535         logprintf(3, "Log: sysctl -w vm.nr_hugepages=XXX allows hugepages.\n");
    536         break;
    537       }
    538 
    539       shmaddr = shmat(shmid, NULL, 0);
    540       if (shmaddr == reinterpret_cast<void*>(-1)) {
    541         int err = errno;
    542         string errtxt = ErrorString(err);
    543         logprintf(0, "Log: failed to attach shared "
    544                      "hugepage object - err %d (%s).\n",
    545                   err, errtxt.c_str());
    546         if (shmctl(shmid, IPC_RMID, NULL) < 0) {
    547           int err = errno;
    548           string errtxt = ErrorString(err);
    549           logprintf(0, "Log: failed to remove shared "
    550                        "hugepage object - err %d (%s).\n",
    551                     err, errtxt.c_str());
    552         }
    553         break;
    554       }
    555       use_hugepages_ = true;
    556       shmid_ = shmid;
    557       buf = shmaddr;
    558       logprintf(0, "Log: Using shared hugepage object 0x%x at %p.\n",
    559                 shmid, shmaddr);
    560     } while (0);
    561   }
    562 
    563   if ((!use_hugepages_) && prefer_posix_shm) {
    564     do {
    565       int shm_object;
    566       void *shmaddr = NULL;
    567 
    568       shm_object = shm_open("/stressapptest", O_CREAT | O_RDWR, S_IRWXU);
    569       if (shm_object < 0) {
    570         int err = errno;
    571         string errtxt = ErrorString(err);
    572         logprintf(3, "Log: failed to allocate shared "
    573                       "smallpage object - err %d (%s)\n",
    574                   err, errtxt.c_str());
    575         break;
    576       }
    577 
    578       if (0 > ftruncate(shm_object, length)) {
    579         int err = errno;
    580         string errtxt = ErrorString(err);
    581         logprintf(3, "Log: failed to ftruncate shared "
    582                       "smallpage object - err %d (%s)\n",
    583                   err, errtxt.c_str());
    584         break;
    585       }
    586 
    587       // 32 bit linux apps can only use ~1.4G of address space.
    588       // Use dynamic mapping for allocations larger than that.
    589       // Currently perf hit is ~10% for this.
    590       if (prefer_dynamic_mapping) {
    591         dynamic_mapped_shmem_ = true;
    592       } else {
    593         // Do a full mapping here otherwise.
    594         shmaddr = mmap64(NULL, length, PROT_READ | PROT_WRITE,
    595                          MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    596                          shm_object, 0);
    597         if (shmaddr == reinterpret_cast<void*>(-1)) {
    598           int err = errno;
    599           string errtxt = ErrorString(err);
    600           logprintf(0, "Log: failed to map shared "
    601                        "smallpage object - err %d (%s).\n",
    602                     err, errtxt.c_str());
    603           break;
    604         }
    605       }
    606 
    607       use_posix_shm_ = true;
    608       shmid_ = shm_object;
    609       buf = shmaddr;
    610       char location_message[256] = "";
    611       if (dynamic_mapped_shmem_) {
    612         sprintf(location_message, "mapped as needed");
    613       } else {
    614         sprintf(location_message, "at %p", shmaddr);
    615       }
    616       logprintf(0, "Log: Using posix shared memory object 0x%x %s.\n",
    617                 shm_object, location_message);
    618     } while (0);
    619     shm_unlink("/stressapptest");
    620   }
    621 #endif  // HAVE_SYS_SHM_H
    622 
    623   if (!use_hugepages_ && !use_posix_shm_) {
    624     // If the page size is what SAT is expecting explicitly perform mmap()
    625     // allocation.
    626     if (sysconf(_SC_PAGESIZE) >= 4096) {
    627       void *map_buf = mmap(NULL, length, PROT_READ | PROT_WRITE,
    628                            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    629       if (map_buf != MAP_FAILED) {
    630         buf = map_buf;
    631         mmapped_allocation_ = true;
    632         logprintf(0, "Log: Using mmap() allocation at %p.\n", buf);
    633       }
    634     }
    635     if (!mmapped_allocation_) {
    636       // Use memalign to ensure that blocks are aligned enough for disk direct
    637       // IO.
    638       buf = static_cast<char*>(memalign(4096, length));
    639       if (buf) {
    640         logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
    641       } else {
    642         logprintf(0, "Process Error: memalign returned 0\n");
    643         if ((length >= 1499LL * kMegabyte) && (address_mode_ == 32)) {
    644           logprintf(0, "Log: You are trying to allocate > 1.4G on a 32 "
    645                        "bit process. Please setup shared memory.\n");
    646         }
    647       }
    648     }
    649   }
    650 
    651   testmem_ = buf;
    652   if (buf || dynamic_mapped_shmem_) {
    653     testmemsize_ = length;
    654   } else {
    655     testmemsize_ = 0;
    656   }
    657 
    658   return (buf != 0) || dynamic_mapped_shmem_;
    659 }
    660 
    661 // Free the test memory.
    662 void OsLayer::FreeTestMem() {
    663   if (testmem_) {
    664     if (use_hugepages_) {
    665 #ifdef HAVE_SYS_SHM_H
    666       shmdt(testmem_);
    667       shmctl(shmid_, IPC_RMID, NULL);
    668 #endif
    669     } else if (use_posix_shm_) {
    670       if (!dynamic_mapped_shmem_) {
    671         munmap(testmem_, testmemsize_);
    672       }
    673       close(shmid_);
    674     } else if (mmapped_allocation_) {
    675       munmap(testmem_, testmemsize_);
    676     } else {
    677       free(testmem_);
    678     }
    679     testmem_ = 0;
    680     testmemsize_ = 0;
    681   }
    682 }
    683 
    684 
    685 // Prepare the target memory. It may requre mapping in, or this may be a noop.
    686 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
    687   sat_assert((offset + length) <= testmemsize_);
    688   if (dynamic_mapped_shmem_) {
    689     // TODO(nsanders): Check if we can support MAP_NONBLOCK,
    690     // and evaluate performance hit from not using it.
    691 #ifdef HAVE_MMAP64
    692     void * mapping = mmap64(NULL, length, PROT_READ | PROT_WRITE,
    693                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    694                      shmid_, offset);
    695 #else
    696     void * mapping = mmap(NULL, length, PROT_READ | PROT_WRITE,
    697                      MAP_SHARED | MAP_NORESERVE | MAP_LOCKED | MAP_POPULATE,
    698                      shmid_, offset);
    699 #endif
    700     if (mapping == MAP_FAILED) {
    701       string errtxt = ErrorString(errno);
    702       logprintf(0, "Process Error: PrepareTestMem mmap64(%llx, %llx) failed. "
    703                    "error: %s.\n",
    704                 offset, length, errtxt.c_str());
    705       sat_assert(0);
    706     }
    707     return mapping;
    708   }
    709 
    710   return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
    711 }
    712 
    713 // Release the test memory resources, if any.
    714 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
    715   if (dynamic_mapped_shmem_) {
    716     int retval = munmap(addr, length);
    717     if (retval == -1) {
    718       string errtxt = ErrorString(errno);
    719       logprintf(0, "Process Error: ReleaseTestMem munmap(%p, %llx) failed. "
    720                    "error: %s.\n",
    721                 addr, length, errtxt.c_str());
    722       sat_assert(0);
    723     }
    724   }
    725 }
    726 
    727 // No error polling on unknown systems.
    728 int OsLayer::ErrorPoll() {
    729   return 0;
    730 }
    731 
    732 // Generally, poll for errors once per second.
    733 void OsLayer::ErrorWait() {
    734   sat_sleep(1);
    735   return;
    736 }
    737 
    738 // Open a PCI bus-dev-func as a file and return its file descriptor.
    739 // Error is indicated by return value less than zero.
    740 int OsLayer::PciOpen(int bus, int device, int function) {
    741   char dev_file[256];
    742 
    743   snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
    744            bus, device, function);
    745 
    746   int fd = open(dev_file, O_RDWR);
    747   if (fd == -1) {
    748     logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
    749                  "function %d (errno %d).\n",
    750               bus, device, function, errno);
    751     return -1;
    752   }
    753 
    754   return fd;
    755 }
    756 
    757 
    758 // Read and write functions to access PCI config.
    759 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
    760   // Strict aliasing rules lawyers will cause data corruption
    761   // on cast pointers in some gccs.
    762   union {
    763     uint32 l32;
    764     uint16 l16;
    765     uint8 l8;
    766   } datacast;
    767   datacast.l32 = 0;
    768   uint32 size = width / 8;
    769 
    770   sat_assert((width == 32) || (width == 16) || (width == 8));
    771   sat_assert(offset <= (256 - size));
    772 
    773   if (lseek(fd, offset, SEEK_SET) < 0) {
    774     logprintf(0, "Process Error: Can't seek %x\n", offset);
    775     return 0;
    776   }
    777   if (read(fd, &datacast, size) != static_cast<ssize_t>(size)) {
    778     logprintf(0, "Process Error: Can't read %x\n", offset);
    779     return 0;
    780   }
    781 
    782   // Extract the data.
    783   switch (width) {
    784     case 8:
    785       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
    786       return datacast.l8;
    787     case 16:
    788       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
    789       return datacast.l16;
    790     case 32:
    791       return datacast.l32;
    792   }
    793   return 0;
    794 }
    795 
    796 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
    797   // Strict aliasing rules lawyers will cause data corruption
    798   // on cast pointers in some gccs.
    799   union {
    800     uint32 l32;
    801     uint16 l16;
    802     uint8 l8;
    803   } datacast;
    804   datacast.l32 = 0;
    805   uint32 size = width / 8;
    806 
    807   sat_assert((width == 32) || (width == 16) || (width == 8));
    808   sat_assert(offset <= (256 - size));
    809 
    810   // Cram the data into the right alignment.
    811   switch (width) {
    812     case 8:
    813       sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
    814       datacast.l8 = value;
    815     case 16:
    816       sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
    817       datacast.l16 = value;
    818     case 32:
    819       datacast.l32 = value;
    820   }
    821 
    822   if (lseek(fd, offset, SEEK_SET) < 0) {
    823     logprintf(0, "Process Error: Can't seek %x\n", offset);
    824     return;
    825   }
    826   if (write(fd, &datacast, size) != static_cast<ssize_t>(size)) {
    827     logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
    828     return;
    829   }
    830 
    831   return;
    832 }
    833 
    834 
    835 
    836 // Open dev msr.
    837 int OsLayer::OpenMSR(uint32 core, uint32 address) {
    838   char buf[256];
    839   snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
    840   int fd = open(buf, O_RDWR);
    841   if (fd < 0)
    842     return fd;
    843 
    844   uint32 pos = lseek(fd, address, SEEK_SET);
    845   if (pos != address) {
    846     close(fd);
    847     logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
    848     return -1;
    849   }
    850 
    851   return fd;
    852 }
    853 
    854 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
    855   int fd = OpenMSR(core, address);
    856   if (fd < 0)
    857     return false;
    858 
    859   // Read from the msr.
    860   bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
    861 
    862   if (!res)
    863     logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
    864 
    865   close(fd);
    866 
    867   return res;
    868 }
    869 
    870 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
    871   int fd = OpenMSR(core, address);
    872   if (fd < 0)
    873     return false;
    874 
    875   // Write to the msr
    876   bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
    877 
    878   if (!res)
    879     logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
    880 
    881   close(fd);
    882 
    883   return res;
    884 }
    885 
    886 // Extract bits [n+len-1, n] from a 32 bit word.
    887 // so GetBitField(0x0f00, 8, 4) == 0xf.
    888 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
    889   return (val >> n) & ((1<<len) - 1);
    890 }
    891 
    892 // Generic CPU stress workload that would work on any CPU/Platform.
    893 // Float-point array moving average calculation.
    894 bool OsLayer::CpuStressWorkload() {
    895   double float_arr[100];
    896   double sum = 0;
    897 #ifdef HAVE_RAND_R
    898   unsigned int seed = 12345;
    899 #endif
    900 
    901   // Initialize array with random numbers.
    902   for (int i = 0; i < 100; i++) {
    903 #ifdef HAVE_RAND_R
    904     float_arr[i] = rand_r(&seed);
    905     if (rand_r(&seed) % 2)
    906       float_arr[i] *= -1.0;
    907 #else
    908     srand(time(NULL));
    909     float_arr[i] = rand();  // NOLINT
    910     if (rand() % 2)         // NOLINT
    911       float_arr[i] *= -1.0;
    912 #endif
    913   }
    914 
    915   // Calculate moving average.
    916   for (int i = 0; i < 100000000; i++) {
    917     float_arr[i % 100] =
    918       (float_arr[i % 100] + float_arr[(i + 1) % 100] +
    919        float_arr[(i + 99) % 100]) / 3;
    920     sum += float_arr[i % 100];
    921   }
    922 
    923   // Artificial printf so the loops do not get optimized away.
    924   if (sum == 0.0)
    925     logprintf(12, "Log: I'm Feeling Lucky!\n");
    926   return true;
    927 }
    928