Home | History | Annotate | Download | only in src
      1 // Copyright 2015 Google Inc. All rights reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "sysinfo.h"
     16 #include "internal_macros.h"
     17 
     18 #ifdef BENCHMARK_OS_WINDOWS
     19 #include <Shlwapi.h>
     20 #include <Windows.h>
     21 #include <VersionHelpers.h>
     22 #else
     23 #include <fcntl.h>
     24 #include <sys/resource.h>
     25 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
     26 #include <sys/time.h>
     27 #include <unistd.h>
     28 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX
     29 #include <sys/sysctl.h>
     30 #endif
     31 #endif
     32 
     33 #include <cerrno>
     34 #include <cstdio>
     35 #include <cstdint>
     36 #include <cstdlib>
     37 #include <cstring>
     38 #include <iostream>
     39 #include <limits>
     40 #include <mutex>
     41 
     42 #include "arraysize.h"
     43 #include "check.h"
     44 #include "cycleclock.h"
     45 #include "internal_macros.h"
     46 #include "log.h"
     47 #include "sleep.h"
     48 #include "string_util.h"
     49 
     50 namespace benchmark {
     51 namespace {
     52 std::once_flag cpuinfo_init;
     53 double cpuinfo_cycles_per_second = 1.0;
     54 int cpuinfo_num_cpus = 1;  // Conservative guess
     55 std::mutex cputimens_mutex;
     56 
     57 #if !defined BENCHMARK_OS_MACOSX
     58 const int64_t estimate_time_ms = 1000;
     59 
     60 // Helper function estimates cycles/sec by observing cycles elapsed during
     61 // sleep(). Using small sleep time decreases accuracy significantly.
     62 int64_t EstimateCyclesPerSecond() {
     63   const int64_t start_ticks = cycleclock::Now();
     64   SleepForMilliseconds(estimate_time_ms);
     65   return cycleclock::Now() - start_ticks;
     66 }
     67 #endif
     68 
     69 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
     70 // Helper function for reading an int from a file. Returns true if successful
     71 // and the memory location pointed to by value is set to the value read.
     72 bool ReadIntFromFile(const char* file, long* value) {
     73   bool ret = false;
     74   int fd = open(file, O_RDONLY);
     75   if (fd != -1) {
     76     char line[1024];
     77     char* err;
     78     memset(line, '\0', sizeof(line));
     79     CHECK(read(fd, line, sizeof(line) - 1));
     80     const long temp_value = strtol(line, &err, 10);
     81     if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
     82       *value = temp_value;
     83       ret = true;
     84     }
     85     close(fd);
     86   }
     87   return ret;
     88 }
     89 #endif
     90 
     91 void InitializeSystemInfo() {
     92 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
     93   char line[1024];
     94   char* err;
     95   long freq;
     96 
     97   bool saw_mhz = false;
     98 
     99   // If the kernel is exporting the tsc frequency use that. There are issues
    100   // where cpuinfo_max_freq cannot be relied on because the BIOS may be
    101   // exporintg an invalid p-state (on x86) or p-states may be used to put the
    102   // processor in a new mode (turbo mode). Essentially, those frequencies
    103   // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
    104   // well.
    105   if (!saw_mhz &&
    106       ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
    107     // The value is in kHz (as the file name suggests).  For example, on a
    108     // 2GHz warpstation, the file contains the value "2000000".
    109     cpuinfo_cycles_per_second = freq * 1000.0;
    110     saw_mhz = true;
    111   }
    112 
    113   // If CPU scaling is in effect, we want to use the *maximum* frequency,
    114   // not whatever CPU speed some random processor happens to be using now.
    115   if (!saw_mhz &&
    116       ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
    117                       &freq)) {
    118     // The value is in kHz.  For example, on a 2GHz warpstation, the file
    119     // contains the value "2000000".
    120     cpuinfo_cycles_per_second = freq * 1000.0;
    121     saw_mhz = true;
    122   }
    123 
    124   // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq.
    125   const char* pname = "/proc/cpuinfo";
    126   int fd = open(pname, O_RDONLY);
    127   if (fd == -1) {
    128     perror(pname);
    129     if (!saw_mhz) {
    130       cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    131     }
    132     return;
    133   }
    134 
    135   double bogo_clock = 1.0;
    136   bool saw_bogo = false;
    137   long max_cpu_id = 0;
    138   int num_cpus = 0;
    139   line[0] = line[1] = '\0';
    140   size_t chars_read = 0;
    141   do {  // we'll exit when the last read didn't read anything
    142     // Move the next line to the beginning of the buffer
    143     const size_t oldlinelen = strlen(line);
    144     if (sizeof(line) == oldlinelen + 1)  // oldlinelen took up entire line
    145       line[0] = '\0';
    146     else  // still other lines left to save
    147       memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1));
    148     // Terminate the new line, reading more if we can't find the newline
    149     char* newline = strchr(line, '\n');
    150     if (newline == nullptr) {
    151       const size_t linelen = strlen(line);
    152       const size_t bytes_to_read = sizeof(line) - 1 - linelen;
    153       CHECK(bytes_to_read > 0);  // because the memmove recovered >=1 bytes
    154       chars_read = read(fd, line + linelen, bytes_to_read);
    155       line[linelen + chars_read] = '\0';
    156       newline = strchr(line, '\n');
    157     }
    158     if (newline != nullptr) *newline = '\0';
    159 
    160     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
    161     // accept postive values. Some environments (virtual machines) report zero,
    162     // which would cause infinite looping in WallTime_Init.
    163     if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) {
    164       const char* freqstr = strchr(line, ':');
    165       if (freqstr) {
    166         cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0;
    167         if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
    168           saw_mhz = true;
    169       }
    170     } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) {
    171       const char* freqstr = strchr(line, ':');
    172       if (freqstr) {
    173         bogo_clock = strtod(freqstr + 1, &err) * 1000000.0;
    174         if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
    175           saw_bogo = true;
    176       }
    177     } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) {
    178       // The above comparison is case-sensitive because ARM kernels often
    179       // include a "Processor" line that tells you about the CPU, distinct
    180       // from the usual "processor" lines that give you CPU ids. No current
    181       // Linux architecture is using "Processor" for CPU ids.
    182       num_cpus++;  // count up every time we see an "processor :" entry
    183       const char* id_str = strchr(line, ':');
    184       if (id_str) {
    185         const long cpu_id = strtol(id_str + 1, &err, 10);
    186         if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id)
    187           max_cpu_id = cpu_id;
    188       }
    189     }
    190   } while (chars_read > 0);
    191   close(fd);
    192 
    193   if (!saw_mhz) {
    194     if (saw_bogo) {
    195       // If we didn't find anything better, we'll use bogomips, but
    196       // we're not happy about it.
    197       cpuinfo_cycles_per_second = bogo_clock;
    198     } else {
    199       // If we don't even have bogomips, we'll use the slow estimation.
    200       cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    201     }
    202   }
    203   if (num_cpus == 0) {
    204     fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n");
    205   } else {
    206     if ((max_cpu_id + 1) != num_cpus) {
    207       fprintf(stderr,
    208               "CPU ID assignments in /proc/cpuinfo seem messed up."
    209               " This is usually caused by a bad BIOS.\n");
    210     }
    211     cpuinfo_num_cpus = num_cpus;
    212   }
    213 
    214 #elif defined BENCHMARK_OS_FREEBSD
    215 // For this sysctl to work, the machine must be configured without
    216 // SMP, APIC, or APM support.  hz should be 64-bit in freebsd 7.0
    217 // and later.  Before that, it's a 32-bit quantity (and gives the
    218 // wrong answer on machines faster than 2^32 Hz).  See
    219 //  http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html
    220 // But also compare FreeBSD 7.0:
    221 //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223
    222 //  231         error = sysctl_handle_quad(oidp, &freq, 0, req);
    223 // To FreeBSD 6.3 (it's the same in 6-STABLE):
    224 //  http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131
    225 //  139         error = sysctl_handle_int(oidp, &freq, sizeof(freq), req);
    226 #if __FreeBSD__ >= 7
    227   uint64_t hz = 0;
    228 #else
    229   unsigned int hz = 0;
    230 #endif
    231   size_t sz = sizeof(hz);
    232   const char* sysctl_path = "machdep.tsc_freq";
    233   if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) {
    234     fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
    235             sysctl_path, strerror(errno));
    236     cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    237   } else {
    238     cpuinfo_cycles_per_second = hz;
    239   }
    240 // TODO: also figure out cpuinfo_num_cpus
    241 
    242 #elif defined BENCHMARK_OS_WINDOWS
    243   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
    244   // then make a crude estimate.
    245   DWORD data, data_size = sizeof(data);
    246   if (IsWindowsXPOrGreater() &&
    247       SUCCEEDED(
    248           SHGetValueA(HKEY_LOCAL_MACHINE,
    249                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
    250                       "~MHz", nullptr, &data, &data_size)))
    251     cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000));  // was mhz
    252   else
    253     cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    254 // TODO: also figure out cpuinfo_num_cpus
    255 
    256 #elif defined BENCHMARK_OS_MACOSX
    257   // returning "mach time units" per second. the current number of elapsed
    258   // mach time units can be found by calling uint64 mach_absolute_time();
    259   // while not as precise as actual CPU cycles, it is accurate in the face
    260   // of CPU frequency scaling and multi-cpu/core machines.
    261   // Our mac users have these types of machines, and accuracy
    262   // (i.e. correctness) trumps precision.
    263   // See cycleclock.h: CycleClock::Now(), which returns number of mach time
    264   // units on Mac OS X.
    265   mach_timebase_info_data_t timebase_info;
    266   mach_timebase_info(&timebase_info);
    267   double mach_time_units_per_nanosecond =
    268       static_cast<double>(timebase_info.denom) /
    269       static_cast<double>(timebase_info.numer);
    270   cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9;
    271 
    272   int num_cpus = 0;
    273   size_t size = sizeof(num_cpus);
    274   int numcpus_name[] = {CTL_HW, HW_NCPU};
    275   if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) ==
    276           0 &&
    277       (size == sizeof(num_cpus)))
    278     cpuinfo_num_cpus = num_cpus;
    279 
    280 #else
    281   // Generic cycles per second counter
    282   cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond());
    283 #endif
    284 }
    285 }  // end namespace
    286 
    287 // getrusage() based implementation of MyCPUUsage
    288 static double MyCPUUsageRUsage() {
    289 #ifndef BENCHMARK_OS_WINDOWS
    290   struct rusage ru;
    291   if (getrusage(RUSAGE_SELF, &ru) == 0) {
    292     return (static_cast<double>(ru.ru_utime.tv_sec) +
    293             static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
    294             static_cast<double>(ru.ru_stime.tv_sec) +
    295             static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
    296   } else {
    297     return 0.0;
    298   }
    299 #else
    300   HANDLE proc = GetCurrentProcess();
    301   FILETIME creation_time;
    302   FILETIME exit_time;
    303   FILETIME kernel_time;
    304   FILETIME user_time;
    305   ULARGE_INTEGER kernel;
    306   ULARGE_INTEGER user;
    307   GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time);
    308   kernel.HighPart = kernel_time.dwHighDateTime;
    309   kernel.LowPart = kernel_time.dwLowDateTime;
    310   user.HighPart = user_time.dwHighDateTime;
    311   user.LowPart = user_time.dwLowDateTime;
    312   return (static_cast<double>(kernel.QuadPart) +
    313           static_cast<double>(user.QuadPart)) * 1e-7;
    314 #endif  // OS_WINDOWS
    315 }
    316 
    317 #ifndef BENCHMARK_OS_WINDOWS
    318 static bool MyCPUUsageCPUTimeNsLocked(double* cputime) {
    319   static int cputime_fd = -1;
    320   if (cputime_fd == -1) {
    321     cputime_fd = open("/proc/self/cputime_ns", O_RDONLY);
    322     if (cputime_fd < 0) {
    323       cputime_fd = -1;
    324       return false;
    325     }
    326   }
    327   char buff[64];
    328   memset(buff, 0, sizeof(buff));
    329   if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) {
    330     close(cputime_fd);
    331     cputime_fd = -1;
    332     return false;
    333   }
    334   unsigned long long result = strtoull(buff, nullptr, 0);
    335   if (result == (std::numeric_limits<unsigned long long>::max)()) {
    336     close(cputime_fd);
    337     cputime_fd = -1;
    338     return false;
    339   }
    340   *cputime = static_cast<double>(result) / 1e9;
    341   return true;
    342 }
    343 #endif  // OS_WINDOWS
    344 
    345 double MyCPUUsage() {
    346 #ifndef BENCHMARK_OS_WINDOWS
    347   {
    348     std::lock_guard<std::mutex> l(cputimens_mutex);
    349     static bool use_cputime_ns = true;
    350     if (use_cputime_ns) {
    351       double value;
    352       if (MyCPUUsageCPUTimeNsLocked(&value)) {
    353         return value;
    354       }
    355       // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage().
    356       VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n";
    357       use_cputime_ns = false;
    358     }
    359   }
    360 #endif  // OS_WINDOWS
    361   return MyCPUUsageRUsage();
    362 }
    363 
    364 double ChildrenCPUUsage() {
    365 #ifndef BENCHMARK_OS_WINDOWS
    366   struct rusage ru;
    367   if (getrusage(RUSAGE_CHILDREN, &ru) == 0) {
    368     return (static_cast<double>(ru.ru_utime.tv_sec) +
    369             static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 +
    370             static_cast<double>(ru.ru_stime.tv_sec) +
    371             static_cast<double>(ru.ru_stime.tv_usec) * 1e-6);
    372   } else {
    373     return 0.0;
    374   }
    375 #else
    376   // TODO: Not sure what this even means on Windows
    377   return 0.0;
    378 #endif  // OS_WINDOWS
    379 }
    380 
    381 double CyclesPerSecond(void) {
    382   std::call_once(cpuinfo_init, InitializeSystemInfo);
    383   return cpuinfo_cycles_per_second;
    384 }
    385 
    386 int NumCPUs(void) {
    387   std::call_once(cpuinfo_init, InitializeSystemInfo);
    388   return cpuinfo_num_cpus;
    389 }
    390 
    391 // The ""'s catch people who don't pass in a literal for "str"
    392 #define strliterallen(str) (sizeof("" str "") - 1)
    393 
    394 // Must use a string literal for prefix.
    395 #define memprefix(str, len, prefix)                       \
    396   ((((len) >= strliterallen(prefix)) &&                   \
    397     std::memcmp(str, prefix, strliterallen(prefix)) == 0) \
    398        ? str + strliterallen(prefix)                      \
    399        : nullptr)
    400 
    401 bool CpuScalingEnabled() {
    402 #ifndef BENCHMARK_OS_WINDOWS
    403   // On Linux, the CPUfreq subsystem exposes CPU information as files on the
    404   // local file system. If reading the exported files fails, then we may not be
    405   // running on Linux, so we silently ignore all the read errors.
    406   for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) {
    407     std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu,
    408                                        "/cpufreq/scaling_governor");
    409     FILE* file = fopen(governor_file.c_str(), "r");
    410     if (!file) break;
    411     char buff[16];
    412     size_t bytes_read = fread(buff, 1, sizeof(buff), file);
    413     fclose(file);
    414     if (memprefix(buff, bytes_read, "performance") == nullptr) return true;
    415   }
    416 #endif
    417   return false;
    418 }
    419 
    420 }  // end namespace benchmark
    421