1 // Copyright 2015 Google Inc. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "sysinfo.h" 16 #include "internal_macros.h" 17 18 #ifdef BENCHMARK_OS_WINDOWS 19 #include <Shlwapi.h> 20 #include <Windows.h> 21 #include <VersionHelpers.h> 22 #else 23 #include <fcntl.h> 24 #include <sys/resource.h> 25 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD 26 #include <sys/time.h> 27 #include <unistd.h> 28 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX 29 #include <sys/sysctl.h> 30 #endif 31 #endif 32 33 #include <cerrno> 34 #include <cstdio> 35 #include <cstdint> 36 #include <cstdlib> 37 #include <cstring> 38 #include <iostream> 39 #include <limits> 40 #include <mutex> 41 42 #include "arraysize.h" 43 #include "check.h" 44 #include "cycleclock.h" 45 #include "internal_macros.h" 46 #include "log.h" 47 #include "sleep.h" 48 #include "string_util.h" 49 50 namespace benchmark { 51 namespace { 52 std::once_flag cpuinfo_init; 53 double cpuinfo_cycles_per_second = 1.0; 54 int cpuinfo_num_cpus = 1; // Conservative guess 55 std::mutex cputimens_mutex; 56 57 #if !defined BENCHMARK_OS_MACOSX 58 const int64_t estimate_time_ms = 1000; 59 60 // Helper function estimates cycles/sec by observing cycles elapsed during 61 // sleep(). Using small sleep time decreases accuracy significantly. 62 int64_t EstimateCyclesPerSecond() { 63 const int64_t start_ticks = cycleclock::Now(); 64 SleepForMilliseconds(estimate_time_ms); 65 return cycleclock::Now() - start_ticks; 66 } 67 #endif 68 69 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN 70 // Helper function for reading an int from a file. Returns true if successful 71 // and the memory location pointed to by value is set to the value read. 72 bool ReadIntFromFile(const char* file, long* value) { 73 bool ret = false; 74 int fd = open(file, O_RDONLY); 75 if (fd != -1) { 76 char line[1024]; 77 char* err; 78 memset(line, '\0', sizeof(line)); 79 CHECK(read(fd, line, sizeof(line) - 1)); 80 const long temp_value = strtol(line, &err, 10); 81 if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { 82 *value = temp_value; 83 ret = true; 84 } 85 close(fd); 86 } 87 return ret; 88 } 89 #endif 90 91 void InitializeSystemInfo() { 92 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN 93 char line[1024]; 94 char* err; 95 long freq; 96 97 bool saw_mhz = false; 98 99 // If the kernel is exporting the tsc frequency use that. There are issues 100 // where cpuinfo_max_freq cannot be relied on because the BIOS may be 101 // exporintg an invalid p-state (on x86) or p-states may be used to put the 102 // processor in a new mode (turbo mode). Essentially, those frequencies 103 // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as 104 // well. 105 if (!saw_mhz && 106 ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { 107 // The value is in kHz (as the file name suggests). For example, on a 108 // 2GHz warpstation, the file contains the value "2000000". 109 cpuinfo_cycles_per_second = freq * 1000.0; 110 saw_mhz = true; 111 } 112 113 // If CPU scaling is in effect, we want to use the *maximum* frequency, 114 // not whatever CPU speed some random processor happens to be using now. 115 if (!saw_mhz && 116 ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 117 &freq)) { 118 // The value is in kHz. For example, on a 2GHz warpstation, the file 119 // contains the value "2000000". 120 cpuinfo_cycles_per_second = freq * 1000.0; 121 saw_mhz = true; 122 } 123 124 // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. 125 const char* pname = "/proc/cpuinfo"; 126 int fd = open(pname, O_RDONLY); 127 if (fd == -1) { 128 perror(pname); 129 if (!saw_mhz) { 130 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); 131 } 132 return; 133 } 134 135 double bogo_clock = 1.0; 136 bool saw_bogo = false; 137 long max_cpu_id = 0; 138 int num_cpus = 0; 139 line[0] = line[1] = '\0'; 140 size_t chars_read = 0; 141 do { // we'll exit when the last read didn't read anything 142 // Move the next line to the beginning of the buffer 143 const size_t oldlinelen = strlen(line); 144 if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line 145 line[0] = '\0'; 146 else // still other lines left to save 147 memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); 148 // Terminate the new line, reading more if we can't find the newline 149 char* newline = strchr(line, '\n'); 150 if (newline == nullptr) { 151 const size_t linelen = strlen(line); 152 const size_t bytes_to_read = sizeof(line) - 1 - linelen; 153 CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes 154 chars_read = read(fd, line + linelen, bytes_to_read); 155 line[linelen + chars_read] = '\0'; 156 newline = strchr(line, '\n'); 157 } 158 if (newline != nullptr) *newline = '\0'; 159 160 // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only 161 // accept postive values. Some environments (virtual machines) report zero, 162 // which would cause infinite looping in WallTime_Init. 163 if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) { 164 const char* freqstr = strchr(line, ':'); 165 if (freqstr) { 166 cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; 167 if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) 168 saw_mhz = true; 169 } 170 } else if (strncasecmp(line, "bogomips", sizeof("bogomips") - 1) == 0) { 171 const char* freqstr = strchr(line, ':'); 172 if (freqstr) { 173 bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; 174 if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) 175 saw_bogo = true; 176 } 177 } else if (strncmp(line, "processor", sizeof("processor") - 1) == 0) { 178 // The above comparison is case-sensitive because ARM kernels often 179 // include a "Processor" line that tells you about the CPU, distinct 180 // from the usual "processor" lines that give you CPU ids. No current 181 // Linux architecture is using "Processor" for CPU ids. 182 num_cpus++; // count up every time we see an "processor :" entry 183 const char* id_str = strchr(line, ':'); 184 if (id_str) { 185 const long cpu_id = strtol(id_str + 1, &err, 10); 186 if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) 187 max_cpu_id = cpu_id; 188 } 189 } 190 } while (chars_read > 0); 191 close(fd); 192 193 if (!saw_mhz) { 194 if (saw_bogo) { 195 // If we didn't find anything better, we'll use bogomips, but 196 // we're not happy about it. 197 cpuinfo_cycles_per_second = bogo_clock; 198 } else { 199 // If we don't even have bogomips, we'll use the slow estimation. 200 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); 201 } 202 } 203 if (num_cpus == 0) { 204 fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n"); 205 } else { 206 if ((max_cpu_id + 1) != num_cpus) { 207 fprintf(stderr, 208 "CPU ID assignments in /proc/cpuinfo seem messed up." 209 " This is usually caused by a bad BIOS.\n"); 210 } 211 cpuinfo_num_cpus = num_cpus; 212 } 213 214 #elif defined BENCHMARK_OS_FREEBSD 215 // For this sysctl to work, the machine must be configured without 216 // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 217 // and later. Before that, it's a 32-bit quantity (and gives the 218 // wrong answer on machines faster than 2^32 Hz). See 219 // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html 220 // But also compare FreeBSD 7.0: 221 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 222 // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); 223 // To FreeBSD 6.3 (it's the same in 6-STABLE): 224 // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 225 // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); 226 #if __FreeBSD__ >= 7 227 uint64_t hz = 0; 228 #else 229 unsigned int hz = 0; 230 #endif 231 size_t sz = sizeof(hz); 232 const char* sysctl_path = "machdep.tsc_freq"; 233 if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { 234 fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", 235 sysctl_path, strerror(errno)); 236 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); 237 } else { 238 cpuinfo_cycles_per_second = hz; 239 } 240 // TODO: also figure out cpuinfo_num_cpus 241 242 #elif defined BENCHMARK_OS_WINDOWS 243 // In NT, read MHz from the registry. If we fail to do so or we're in win9x 244 // then make a crude estimate. 245 DWORD data, data_size = sizeof(data); 246 if (IsWindowsXPOrGreater() && 247 SUCCEEDED( 248 SHGetValueA(HKEY_LOCAL_MACHINE, 249 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 250 "~MHz", nullptr, &data, &data_size))) 251 cpuinfo_cycles_per_second = static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz 252 else 253 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); 254 // TODO: also figure out cpuinfo_num_cpus 255 256 #elif defined BENCHMARK_OS_MACOSX 257 // returning "mach time units" per second. the current number of elapsed 258 // mach time units can be found by calling uint64 mach_absolute_time(); 259 // while not as precise as actual CPU cycles, it is accurate in the face 260 // of CPU frequency scaling and multi-cpu/core machines. 261 // Our mac users have these types of machines, and accuracy 262 // (i.e. correctness) trumps precision. 263 // See cycleclock.h: CycleClock::Now(), which returns number of mach time 264 // units on Mac OS X. 265 mach_timebase_info_data_t timebase_info; 266 mach_timebase_info(&timebase_info); 267 double mach_time_units_per_nanosecond = 268 static_cast<double>(timebase_info.denom) / 269 static_cast<double>(timebase_info.numer); 270 cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; 271 272 int num_cpus = 0; 273 size_t size = sizeof(num_cpus); 274 int numcpus_name[] = {CTL_HW, HW_NCPU}; 275 if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, nullptr, 0) == 276 0 && 277 (size == sizeof(num_cpus))) 278 cpuinfo_num_cpus = num_cpus; 279 280 #else 281 // Generic cycles per second counter 282 cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); 283 #endif 284 } 285 } // end namespace 286 287 // getrusage() based implementation of MyCPUUsage 288 static double MyCPUUsageRUsage() { 289 #ifndef BENCHMARK_OS_WINDOWS 290 struct rusage ru; 291 if (getrusage(RUSAGE_SELF, &ru) == 0) { 292 return (static_cast<double>(ru.ru_utime.tv_sec) + 293 static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 + 294 static_cast<double>(ru.ru_stime.tv_sec) + 295 static_cast<double>(ru.ru_stime.tv_usec) * 1e-6); 296 } else { 297 return 0.0; 298 } 299 #else 300 HANDLE proc = GetCurrentProcess(); 301 FILETIME creation_time; 302 FILETIME exit_time; 303 FILETIME kernel_time; 304 FILETIME user_time; 305 ULARGE_INTEGER kernel; 306 ULARGE_INTEGER user; 307 GetProcessTimes(proc, &creation_time, &exit_time, &kernel_time, &user_time); 308 kernel.HighPart = kernel_time.dwHighDateTime; 309 kernel.LowPart = kernel_time.dwLowDateTime; 310 user.HighPart = user_time.dwHighDateTime; 311 user.LowPart = user_time.dwLowDateTime; 312 return (static_cast<double>(kernel.QuadPart) + 313 static_cast<double>(user.QuadPart)) * 1e-7; 314 #endif // OS_WINDOWS 315 } 316 317 #ifndef BENCHMARK_OS_WINDOWS 318 static bool MyCPUUsageCPUTimeNsLocked(double* cputime) { 319 static int cputime_fd = -1; 320 if (cputime_fd == -1) { 321 cputime_fd = open("/proc/self/cputime_ns", O_RDONLY); 322 if (cputime_fd < 0) { 323 cputime_fd = -1; 324 return false; 325 } 326 } 327 char buff[64]; 328 memset(buff, 0, sizeof(buff)); 329 if (pread(cputime_fd, buff, sizeof(buff) - 1, 0) <= 0) { 330 close(cputime_fd); 331 cputime_fd = -1; 332 return false; 333 } 334 unsigned long long result = strtoull(buff, nullptr, 0); 335 if (result == (std::numeric_limits<unsigned long long>::max)()) { 336 close(cputime_fd); 337 cputime_fd = -1; 338 return false; 339 } 340 *cputime = static_cast<double>(result) / 1e9; 341 return true; 342 } 343 #endif // OS_WINDOWS 344 345 double MyCPUUsage() { 346 #ifndef BENCHMARK_OS_WINDOWS 347 { 348 std::lock_guard<std::mutex> l(cputimens_mutex); 349 static bool use_cputime_ns = true; 350 if (use_cputime_ns) { 351 double value; 352 if (MyCPUUsageCPUTimeNsLocked(&value)) { 353 return value; 354 } 355 // Once MyCPUUsageCPUTimeNsLocked fails once fall back to getrusage(). 356 VLOG(1) << "Reading /proc/self/cputime_ns failed. Using getrusage().\n"; 357 use_cputime_ns = false; 358 } 359 } 360 #endif // OS_WINDOWS 361 return MyCPUUsageRUsage(); 362 } 363 364 double ChildrenCPUUsage() { 365 #ifndef BENCHMARK_OS_WINDOWS 366 struct rusage ru; 367 if (getrusage(RUSAGE_CHILDREN, &ru) == 0) { 368 return (static_cast<double>(ru.ru_utime.tv_sec) + 369 static_cast<double>(ru.ru_utime.tv_usec) * 1e-6 + 370 static_cast<double>(ru.ru_stime.tv_sec) + 371 static_cast<double>(ru.ru_stime.tv_usec) * 1e-6); 372 } else { 373 return 0.0; 374 } 375 #else 376 // TODO: Not sure what this even means on Windows 377 return 0.0; 378 #endif // OS_WINDOWS 379 } 380 381 double CyclesPerSecond(void) { 382 std::call_once(cpuinfo_init, InitializeSystemInfo); 383 return cpuinfo_cycles_per_second; 384 } 385 386 int NumCPUs(void) { 387 std::call_once(cpuinfo_init, InitializeSystemInfo); 388 return cpuinfo_num_cpus; 389 } 390 391 // The ""'s catch people who don't pass in a literal for "str" 392 #define strliterallen(str) (sizeof("" str "") - 1) 393 394 // Must use a string literal for prefix. 395 #define memprefix(str, len, prefix) \ 396 ((((len) >= strliterallen(prefix)) && \ 397 std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ 398 ? str + strliterallen(prefix) \ 399 : nullptr) 400 401 bool CpuScalingEnabled() { 402 #ifndef BENCHMARK_OS_WINDOWS 403 // On Linux, the CPUfreq subsystem exposes CPU information as files on the 404 // local file system. If reading the exported files fails, then we may not be 405 // running on Linux, so we silently ignore all the read errors. 406 for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { 407 std::string governor_file = StrCat("/sys/devices/system/cpu/cpu", cpu, 408 "/cpufreq/scaling_governor"); 409 FILE* file = fopen(governor_file.c_str(), "r"); 410 if (!file) break; 411 char buff[16]; 412 size_t bytes_read = fread(buff, 1, sizeof(buff), file); 413 fclose(file); 414 if (memprefix(buff, bytes_read, "performance") == nullptr) return true; 415 } 416 #endif 417 return false; 418 } 419 420 } // end namespace benchmark 421