Home | History | Annotate | Download | only in src
      1 // Copyright 2006 Google Inc. All Rights Reserved.
      2 // Author: nsanders, menderico
      3 
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #ifndef STRESSAPPTEST_OS_H_  // NOLINT
     17 #define STRESSAPPTEST_OS_H_
     18 
     19 #include <dirent.h>
     20 #include <unistd.h>
     21 #include <sys/syscall.h>
     22 
     23 #include <string>
     24 #include <list>
     25 #include <map>
     26 #include <vector>
     27 
     28 // This file must work with autoconf on its public version,
     29 // so these includes are correct.
     30 #include "adler32memcpy.h"  // NOLINT
     31 #include "sattypes.h"       // NOLINT
     32 #include "clock.h"          // NOLINT
     33 
     34 const char kPagemapPath[] = "/proc/self/pagemap";
     35 
     36 struct PCIDevice {
     37   int32 domain;
     38   uint16 bus;
     39   uint8 dev;
     40   uint8 func;
     41   uint16 vendor_id;
     42   uint16 device_id;
     43   uint64 base_addr[6];
     44   uint64 size[6];
     45 };
     46 
     47 typedef vector<PCIDevice*> PCIDevices;
     48 
     49 class ErrorDiag;
     50 
     51 class Clock;
     52 
     53 // This class implements OS/Platform specific funtions.
     54 class OsLayer {
     55  public:
     56   OsLayer();
     57   virtual ~OsLayer();
     58 
     59   // Set the minimum amount of hugepages that should be available for testing.
     60   // Must be set before Initialize().
     61   void SetMinimumHugepagesSize(int64 min_bytes) {
     62     min_hugepages_bytes_ = min_bytes;
     63   }
     64 
     65   // Set the minium amount of memory that should not be allocated. This only
     66   // has any affect if hugepages are not used.
     67   // Must be set before Initialize().
     68   void SetReserveSize(int64 reserve_mb) {
     69     reserve_mb_ = reserve_mb;
     70   }
     71 
     72   // Set parameters needed to translate physical address to memory module.
     73   void SetDramMappingParams(uintptr_t channel_hash, int channel_width,
     74                             vector< vector<string> > *channels) {
     75     channel_hash_ = channel_hash;
     76     channel_width_ = channel_width;
     77     channels_ = channels;
     78   }
     79 
     80   // Initializes data strctures and open files.
     81   // Returns false on error.
     82   virtual bool Initialize();
     83 
     84   // Virtual to physical. This implementation is optional for
     85   // subclasses to implement.
     86   // Takes a pointer, and returns the corresponding bus address.
     87   virtual uint64 VirtualToPhysical(void *vaddr);
     88 
     89   // Prints failed dimm. This implementation is optional for
     90   // subclasses to implement.
     91   // Takes a bus address and string, and prints the DIMM name
     92   // into the string. Returns the DIMM number that corresponds to the
     93   // address given, or -1 if unable to identify the DIMM number.
     94   // Note that subclass implementations of FindDimm() MUST fill
     95   // buf with at LEAST one non-whitespace character (provided len > 0).
     96   virtual int FindDimm(uint64 addr, char *buf, int len);
     97 
     98   // Classifies addresses according to "regions"
     99   // This may mean different things on different platforms.
    100   virtual int32 FindRegion(uint64 paddr);
    101   // Find cpu cores associated with a region. Either NUMA or arbitrary.
    102   virtual cpu_set_t *FindCoreMask(int32 region);
    103   // Return cpu cores associated with a region in a hex string.
    104   virtual string FindCoreMaskFormat(int32 region);
    105 
    106   // Returns the HD device that contains this file.
    107   virtual string FindFileDevice(string filename);
    108 
    109   // Returns a list of paths coresponding to HD devices found on this machine.
    110   virtual list<string> FindFileDevices();
    111 
    112   // Polls for errors. This implementation is optional.
    113   // This will poll once for errors and return zero iff no errors were found.
    114   virtual int ErrorPoll();
    115 
    116   // Delay an appropriate amount of time between polling.
    117   virtual void ErrorWait();
    118 
    119   // Report errors. This implementation is mandatory.
    120   // This will output a machine readable line regarding the error.
    121   virtual bool ErrorReport(const char *part, const char *symptom, int count);
    122 
    123   // Flushes page cache. Used to circumvent the page cache when doing disk
    124   // I/O.  This will be a NOP until ActivateFlushPageCache() is called, which
    125   // is typically done when opening a file with O_DIRECT fails.
    126   // Returns false on error, true on success or NOP.
    127   // Subclasses may implement this in machine specific ways..
    128   virtual bool FlushPageCache(void);
    129   // Enable FlushPageCache() to actually do the flush instead of being a NOP.
    130   virtual void ActivateFlushPageCache(void);
    131 
    132   // Flushes cacheline. Used to distinguish read or write errors.
    133   // Subclasses may implement this in machine specific ways..
    134   // Takes a pointer, and flushed the cacheline containing that pointer.
    135   virtual void Flush(void *vaddr);
    136 
    137   // Fast flush, for use in performance critical code.
    138   // This is bound at compile time, and will not pick up
    139   // any runtime machine configuration info.
    140   inline static void FastFlush(void *vaddr) {
    141 #ifdef STRESSAPPTEST_CPU_PPC
    142     asm volatile("dcbf 0,%0; sync" : : "r" (vaddr));
    143 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    144     // Put mfence before and after clflush to make sure:
    145     // 1. The write before the clflush is committed to memory bus;
    146     // 2. The read after the clflush is hitting the memory bus.
    147     //
    148     // From Intel manual:
    149     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    150     // to be ordered by any other fencing, serializing or other CLFLUSH
    151     // instruction. For example, software can use an MFENCE instruction to
    152     // insure that previous stores are included in the write-back.
    153     asm volatile("mfence");
    154     asm volatile("clflush (%0)" : : "r" (vaddr));
    155     asm volatile("mfence");
    156 #elif defined(STRESSAPPTEST_CPU_ARMV7A) && !defined(__aarch64__)
    157     // ARMv7a cachelines are 8 words (32 bytes).
    158     syscall(__ARM_NR_cacheflush, vaddr, reinterpret_cast<char*>(vaddr) + 32, 0);
    159 #else
    160   #warning "Unsupported CPU type: Unable to force cache flushes."
    161 #endif
    162   }
    163 
    164   // Fast flush, for use in performance critical code.
    165   // This is bound at compile time, and will not pick up
    166   // any runtime machine configuration info.  Takes a NULL-terminated
    167   // array of addresses to flush.
    168   inline static void FastFlushList(void **vaddrs) {
    169 #ifdef STRESSAPPTEST_CPU_PPC
    170     while (*vaddrs) {
    171       asm volatile("dcbf 0,%0" : : "r" (*vaddrs++));
    172     }
    173     asm volatile("sync");
    174 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    175     // Put mfence before and after clflush to make sure:
    176     // 1. The write before the clflush is committed to memory bus;
    177     // 2. The read after the clflush is hitting the memory bus.
    178     //
    179     // From Intel manual:
    180     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    181     // to be ordered by any other fencing, serializing or other CLFLUSH
    182     // instruction. For example, software can use an MFENCE instruction to
    183     // insure that previous stores are included in the write-back.
    184     asm volatile("mfence");
    185     while (*vaddrs) {
    186       asm volatile("clflush (%0)" : : "r" (*vaddrs++));
    187     }
    188     asm volatile("mfence");
    189 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    190     while (*vaddrs) {
    191       FastFlush(*vaddrs++);
    192     }
    193 #else
    194     #warning "Unsupported CPU type: Unable to force cache flushes."
    195 #endif
    196   }
    197 
    198   // Fast flush hint, for use in performance critical code.
    199   // This is bound at compile time, and will not pick up
    200   // any runtime machine configuration info.  Note that this
    201   // will not guarantee that a flush happens, but will at least
    202   // hint that it should.  This is useful for speeding up
    203   // parallel march algorithms.
    204   inline static void FastFlushHint(void *vaddr) {
    205 #ifdef STRESSAPPTEST_CPU_PPC
    206     asm volatile("dcbf 0,%0" : : "r" (vaddr));
    207 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    208     // From Intel manual:
    209     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    210     // to be ordered by any other fencing, serializing or other CLFLUSH
    211     // instruction. For example, software can use an MFENCE instruction to
    212     // insure that previous stores are included in the write-back.
    213     asm volatile("clflush (%0)" : : "r" (vaddr));
    214 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    215     FastFlush(vaddr);
    216 #else
    217     #warning "Unsupported CPU type: Unable to force cache flushes."
    218 #endif
    219   }
    220 
    221   // Fast flush, for use in performance critical code.
    222   // This is bound at compile time, and will not pick up
    223   // any runtime machine configuration info.  Sync's any
    224   // transactions for ordering FastFlushHints.
    225   inline static void FastFlushSync() {
    226 #ifdef STRESSAPPTEST_CPU_PPC
    227     asm volatile("sync");
    228 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    229     // Put mfence before and after clflush to make sure:
    230     // 1. The write before the clflush is committed to memory bus;
    231     // 2. The read after the clflush is hitting the memory bus.
    232     //
    233     // From Intel manual:
    234     // CLFLUSH is only ordered by the MFENCE instruction. It is not guaranteed
    235     // to be ordered by any other fencing, serializing or other CLFLUSH
    236     // instruction. For example, software can use an MFENCE instruction to
    237     // insure that previous stores are included in the write-back.
    238     asm volatile("mfence");
    239 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    240     // This is a NOP, FastFlushHint() always does a full flush, so there's
    241     // nothing to do for FastFlushSync().
    242 #else
    243   #warning "Unsupported CPU type: Unable to force cache flushes."
    244 #endif
    245   }
    246 
    247   // Get time in cpu timer ticks. Useful for matching MCEs with software
    248   // actions.
    249   inline static uint64 GetTimestamp(void) {
    250     uint64 tsc;
    251 #ifdef STRESSAPPTEST_CPU_PPC
    252     uint32 tbl, tbu, temp;
    253     __asm __volatile(
    254       "1:\n"
    255       "mftbu  %2\n"
    256       "mftb   %0\n"
    257       "mftbu  %1\n"
    258       "cmpw   %2,%1\n"
    259       "bne    1b\n"
    260       : "=r"(tbl), "=r"(tbu), "=r"(temp)
    261       :
    262       : "cc");
    263 
    264     tsc = (static_cast<uint64>(tbu) << 32) | static_cast<uint64>(tbl);
    265 #elif defined(STRESSAPPTEST_CPU_X86_64) || defined(STRESSAPPTEST_CPU_I686)
    266     datacast_t data;
    267     __asm __volatile("rdtsc" : "=a" (data.l32.l), "=d"(data.l32.h));
    268     tsc = data.l64;
    269 #elif defined(STRESSAPPTEST_CPU_ARMV7A)
    270     #warning "Unsupported CPU type ARMV7A: your timer may not function correctly"
    271     tsc = 0;
    272 #else
    273     #warning "Unsupported CPU type: your timer may not function correctly"
    274     tsc = 0;
    275 #endif
    276     return (tsc);
    277   }
    278 
    279   // Find the free memory on the machine.
    280   virtual int64 FindFreeMemSize();
    281 
    282   // Allocates test memory of length bytes.
    283   // Subclasses must implement this.
    284   // Call PepareTestMem to get a pointer.
    285   virtual int64 AllocateAllMem();  // Returns length.
    286   // Returns success.
    287   virtual bool AllocateTestMem(int64 length, uint64 paddr_base);
    288   virtual void FreeTestMem();
    289 
    290   // Prepares the memory for use. You must call this
    291   // before using test memory, and after you are done.
    292   virtual void *PrepareTestMem(uint64 offset, uint64 length);
    293   virtual void ReleaseTestMem(void *addr, uint64 offset, uint64 length);
    294 
    295   // Machine type detected. Can we implement all these functions correctly?
    296   // Returns true if machine type is detected and implemented.
    297   virtual bool IsSupported();
    298 
    299   // Returns 32 for 32-bit, 64 for 64-bit.
    300   virtual int AddressMode();
    301   // Update OsLayer state regarding cpu support for various features.
    302   virtual void GetFeatures();
    303 
    304   // Open, read, write pci cfg through /proc/bus/pci. fd is /proc/pci file.
    305   virtual int PciOpen(int bus, int device, int function);
    306   virtual void PciWrite(int fd, uint32 offset, uint32 value, int width);
    307   virtual uint32 PciRead(int fd, uint32 offset, int width);
    308 
    309   // Read MSRs
    310   virtual bool ReadMSR(uint32 core, uint32 address, uint64 *data);
    311   virtual bool WriteMSR(uint32 core, uint32 address, uint64 *data);
    312 
    313   // Extract bits [n+len-1, n] from a 32 bit word.
    314   // so GetBitField(0x0f00, 8, 4) == 0xf.
    315   virtual uint32 GetBitField(uint32 val, uint32 n, uint32 len);
    316 
    317   // Platform and CPU specific CPU-stressing function.
    318   // Returns true on success, false otherwise.
    319   virtual bool CpuStressWorkload();
    320 
    321   // Causes false errors for unittesting.
    322   // Setting to "true" causes errors to be injected.
    323   void set_error_injection(bool errors) { error_injection_ = errors; }
    324   bool error_injection() const { return error_injection_; }
    325 
    326   // Is SAT using normal malloc'd memory, or exotic mmap'd memory.
    327   bool normal_mem() const { return normal_mem_; }
    328 
    329   // Get numa config, if available..
    330   int num_nodes() const { return num_nodes_; }
    331   int num_cpus() const { return num_cpus_; }
    332 
    333   // Handle to platform-specific error diagnoser.
    334   ErrorDiag *error_diagnoser_;
    335 
    336   // Disambiguate between different "warm" memcopies.
    337   virtual bool AdlerMemcpyWarm(uint64 *dstmem, uint64 *srcmem,
    338                                unsigned int size_in_bytes,
    339                                AdlerChecksum *checksum);
    340 
    341   // Store a callback to use to print
    342   // app-specific info about the last error location.
    343   // This call back is called with a physical address, and the app can fill in
    344   // the most recent transaction that occurred at that address.
    345   typedef bool (*ErrCallback)(uint64 paddr, string *buf);
    346   void set_err_log_callback(
    347     ErrCallback err_log_callback) {
    348     err_log_callback_ = err_log_callback;
    349   }
    350   ErrCallback get_err_log_callback() { return err_log_callback_; }
    351 
    352   // Set a clock object that can be overridden for use with unit tests.
    353   void SetClock(Clock *clock) {
    354     if (clock_) {
    355       delete clock_;
    356     }
    357     clock_ = clock;
    358     time_initialized_ = clock_->Now();
    359   }
    360 
    361  protected:
    362   void *testmem_;                // Location of test memory.
    363   uint64 testmemsize_;           // Size of test memory.
    364   int64 totalmemsize_;           // Size of available memory.
    365   int64 min_hugepages_bytes_;    // Minimum hugepages size.
    366   int64 reserve_mb_;             // Minimum amount of memory to reserve in MB.
    367   bool  error_injection_;        // Do error injection?
    368   bool  normal_mem_;             // Memory DMA capable?
    369   bool  use_hugepages_;          // Use hugepage shmem?
    370   bool  use_posix_shm_;          // Use 4k page shmem?
    371   bool  dynamic_mapped_shmem_;   // Conserve virtual address space.
    372   bool  mmapped_allocation_;     // Was memory allocated using mmap()?
    373   int   shmid_;                  // Handle to shmem
    374   vector< vector<string> > *channels_;  // Memory module names per channel.
    375   uint64 channel_hash_;          // Mask of address bits XORed for channel.
    376   int channel_width_;            // Channel width in bits.
    377 
    378   int64 regionsize_;             // Size of memory "regions"
    379   int   regioncount_;            // Number of memory "regions"
    380   int   num_cpus_;               // Number of cpus in the system.
    381   int   num_nodes_;              // Number of nodes in the system.
    382   int   num_cpus_per_node_;      // Number of cpus per node in the system.
    383   int   address_mode_;           // Are we running 32 or 64 bit?
    384   bool  has_vector_;             // Do we have sse2/neon instructions?
    385   bool  has_clflush_;            // Do we have clflush instructions?
    386   bool  use_flush_page_cache_;   // Do we need to flush the page cache?
    387 
    388 
    389   time_t time_initialized_;      // Start time of test.
    390 
    391   vector<cpu_set_t> cpu_sets_;   // Cache for cpu masks.
    392   vector<bool> cpu_sets_valid_;  // If the cpu mask cache is valid.
    393 
    394   // Get file descriptor for dev msr.
    395   virtual int OpenMSR(uint32 core, uint32 address);
    396 
    397   // Look up how many hugepages there are.
    398   virtual int64 FindHugePages();
    399 
    400   // Link to find last transaction at an error location.
    401   ErrCallback err_log_callback_;
    402 
    403   // Object to wrap the time function.
    404   Clock *clock_;
    405 
    406  private:
    407   DISALLOW_COPY_AND_ASSIGN(OsLayer);
    408 };
    409 
    410 // Selects and returns the proper OS and hardware interface.  Does not call
    411 // OsLayer::Initialize() on the new object.
    412 OsLayer *OsLayerFactory(const std::map<std::string, std::string> &options);
    413 
    414 #endif  // STRESSAPPTEST_OS_H_ NOLINT
    415