Home | History | Annotate | Download | only in src
      1 // Copyright 2006 Google Inc. All Rights Reserved.
      2 
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // sat.h : sat stress test object interface and data structures
     16 
     17 #ifndef STRESSAPPTEST_SAT_H_
     18 #define STRESSAPPTEST_SAT_H_
     19 
     20 #include <signal.h>
     21 
     22 #include <map>
     23 #include <string>
     24 #include <vector>
     25 
     26 // This file must work with autoconf on its public version,
     27 // so these includes are correct.
     28 #include "finelock_queue.h"
     29 #include "queue.h"
     30 #include "sattypes.h"
     31 #include "worker.h"
     32 #include "os.h"
     33 
     34 // SAT stress test class.
     35 class Sat {
     36  public:
     37   // Enum for page queue implementation switch.
     38   enum PageQueueType { SAT_ONELOCK, SAT_FINELOCK };
     39 
     40   Sat();
     41   virtual ~Sat();
     42 
     43   // Read configuration from arguments. Called first.
     44   bool ParseArgs(int argc, char **argv);
     45   virtual bool CheckGoogleSpecificArgs(int argc, char **argv, int *i);
     46   // Initialize data structures, subclasses, and resources,
     47   // based on command line args.
     48   // Called after ParseArgs().
     49   bool Initialize();
     50 
     51   // Execute the test. Initialize() and ParseArgs() must be called first.
     52   // This must be called from a single-threaded program.
     53   bool Run();
     54 
     55   // Pretty print result summary.
     56   // Called after Run().
     57   // Return value is success or failure of the SAT run, *not* of this function!
     58   bool PrintResults();
     59 
     60   // Pretty print version info.
     61   bool PrintVersion();
     62 
     63   // Pretty print help.
     64   virtual void PrintHelp();
     65 
     66   // Clean up allocations and resources.
     67   // Called last.
     68   bool Cleanup();
     69 
     70   // Abort Run().  Only for use by Run()-installed signal handlers.
     71   void Break() { user_break_ = true; }
     72 
     73   // Fetch and return empty and full pages into the empty and full pools.
     74   bool GetValid(struct page_entry *pe);
     75   bool PutValid(struct page_entry *pe);
     76   bool GetEmpty(struct page_entry *pe);
     77   bool PutEmpty(struct page_entry *pe);
     78 
     79   bool GetValid(struct page_entry *pe, int32 tag);
     80   bool GetEmpty(struct page_entry *pe, int32 tag);
     81 
     82   // Accessor functions.
     83   int verbosity() const { return verbosity_; }
     84   int logfile() const { return logfile_; }
     85   int page_length() const { return page_length_; }
     86   int disk_pages() const { return disk_pages_; }
     87   int strict() const { return strict_; }
     88   int tag_mode() const { return tag_mode_; }
     89   int status() const { return statuscount_; }
     90   void bad_status() { statuscount_++; }
     91   int errors() const { return errorcount_; }
     92   int warm() const { return warm_; }
     93   bool stop_on_error() const { return stop_on_error_; }
     94   int32 region_mask() const { return region_mask_; }
     95   // Semi-accessor to find the "nth" region to avoid replicated bit searching..
     96   int32 region_find(int32 num) const {
     97     for (int i = 0; i < 32; i++) {
     98       if ((1 << i) & region_mask_) {
     99         if (num == 0)
    100           return i;
    101         num--;
    102       }
    103     }
    104     return 0;
    105   }
    106 
    107   // Causes false errors for unittesting.
    108   // Setting to "true" causes errors to be injected.
    109   void set_error_injection(bool errors) { error_injection_ = errors; }
    110   bool error_injection() const { return error_injection_; }
    111 
    112  protected:
    113   // Opens log file for writing. Returns 0 on failure.
    114   bool InitializeLogfile();
    115   // Checks for supported environment. Returns 0 on failure.
    116   bool CheckEnvironment();
    117   // Allocates size_ bytes of test memory.
    118   bool AllocateMemory();
    119   // Initializes datapattern reference structures.
    120   bool InitializePatterns();
    121   // Initializes test memory with datapatterns.
    122   bool InitializePages();
    123 
    124   // Start up worker threads.
    125   virtual void InitializeThreads();
    126   // Spawn worker threads.
    127   void SpawnThreads();
    128   // Reap worker threads.
    129   void JoinThreads();
    130   // Run bandwidth and error analysis.
    131   virtual void RunAnalysis();
    132   // Delete worker threads.
    133   void DeleteThreads();
    134 
    135   // Return the number of cpus in the system.
    136   int CpuCount();
    137   // Return the worst-case (largest) cache line size of the system.
    138   int CacheLineSize();
    139 
    140   // Collect error counts from threads.
    141   int64 GetTotalErrorCount();
    142 
    143   // Command line arguments.
    144   string cmdline_;
    145 
    146   // Memory and test configuration.
    147   int runtime_seconds_;               // Seconds to run.
    148   int page_length_;                   // Length of each memory block.
    149   int64 pages_;                       // Number of memory blocks.
    150   int64 size_;                        // Size of memory tested, in bytes.
    151   int64 size_mb_;                     // Size of memory tested, in MB.
    152   int64 reserve_mb_;                  // Reserve at least this amount of memory
    153                                       // for the system, in MB.
    154   int64 min_hugepages_mbytes_;        // Minimum hugepages size.
    155   int64 freepages_;                   // How many invalid pages we need.
    156   int disk_pages_;                    // Number of pages per temp file.
    157   uint64 paddr_base_;                 // Physical address base.
    158   uint64 channel_hash_;               // Mask of address bits XORed for channel.
    159   int channel_width_;                 // Channel width in bits.
    160   vector< vector<string> > channels_;  // Memory module names per channel.
    161 
    162   // Control flags.
    163   volatile sig_atomic_t user_break_;  // User has signalled early exit.  Used as
    164                                       // a boolean.
    165   int verbosity_;                     // How much to print.
    166   int print_delay_;                   // Chatty update frequency.
    167   int strict_;                        // Check results per transaction.
    168   int warm_;                          // FPU warms CPU while copying.
    169   int address_mode_;                  // 32 or 64 bit binary.
    170   bool stop_on_error_;                // Exit immendiately on any error.
    171   bool findfiles_;                    // Autodetect tempfile locations.
    172 
    173   bool error_injection_;              // Simulate errors, for unittests.
    174   bool crazy_error_injection_;        // Simulate lots of errors.
    175   uint64 max_errorcount_;             // Number of errors before forced exit.
    176   int run_on_anything_;               // Ignore unknown machine ereor.
    177   int use_logfile_;                   // Log to a file.
    178   char logfilename_[255];             // Name of file to log to.
    179   int logfile_;                       // File handle to log to.
    180   bool log_timestamps_;               // Whether to add timestamps to log lines.
    181 
    182   // Disk thread options.
    183   int read_block_size_;               // Size of block to read from disk.
    184   int write_block_size_;              // Size of block to write to disk.
    185   int64 segment_size_;                // Size of segment to split disk into.
    186   int cache_size_;                    // Size of disk cache.
    187   int blocks_per_segment_;            // Number of blocks to test per segment.
    188   int read_threshold_;                // Maximum time (in us) a read should take
    189                                       // before warning of a slow read.
    190   int write_threshold_;               // Maximum time (in us) a write should
    191                                       // take before warning of a slow write.
    192   int non_destructive_;               // Whether to use non-destructive mode for
    193                                       // the disk test.
    194 
    195   // Generic Options.
    196   int monitor_mode_;                  // Switch for monitor-only mode SAT.
    197                                       // This switch trumps most of the other
    198                                       // argument, as SAT will only run error
    199                                       // polling threads.
    200   int tag_mode_;                      // Do tagging of memory and strict
    201                                       // checking for misplaced cachelines.
    202 
    203   bool do_page_map_;                  // Should we print a list of used pages?
    204   unsigned char *page_bitmap_;        // Store bitmap of physical pages seen.
    205   uint64 page_bitmap_size_;           // Length of physical memory represented.
    206 
    207   // Cpu Cache Coherency Options.
    208   bool cc_test_;                      // Flag to decide whether to start the
    209                                       // cache coherency threads.
    210   int cc_cacheline_count_;            // Number of cache line size structures.
    211   int cc_cacheline_size_;             // Size of a cache line.
    212   int cc_inc_count_;                  // Number of times to increment the shared
    213                                       // cache lines structure members.
    214 
    215   // Cpu Frequency Options.
    216   bool cpu_freq_test_;                // Flag to decide whether to start the
    217                                       // cpu frequency thread.
    218   int cpu_freq_threshold_;            // The MHz threshold which will cause
    219                                       // the test to fail.
    220   int cpu_freq_round_;                // Round the computed frequency to this
    221                                       // value.
    222 
    223   // Thread control.
    224   int file_threads_;                  // Threads of file IO.
    225   int net_threads_;                   // Threads of network IO.
    226   int listen_threads_;                // Threads for network IO to connect.
    227   int memory_threads_;                // Threads of memcpy.
    228   int invert_threads_;                // Threads of invert.
    229   int fill_threads_;                  // Threads of memset.
    230   int check_threads_;                 // Threads of strcmp.
    231   int cpu_stress_threads_;            // Threads of CPU stress workload.
    232   int disk_threads_;                  // Threads of disk test.
    233   int random_threads_;                // Number of random disk threads.
    234   int total_threads_;                 // Total threads used.
    235   bool error_poll_;                   // Poll for system errors.
    236 
    237   // Resources.
    238   cc_cacheline_data *cc_cacheline_data_;  // The cache line sized datastructure
    239                                           // used by the ccache threads
    240                                           // (in worker.h).
    241   vector<string> filename_;           // Filenames for file IO.
    242   vector<string> ipaddrs_;            // Addresses for network IO.
    243   vector<string> diskfilename_;       // Filename for disk IO device.
    244   // Block table for IO device.
    245   vector<DiskBlockTable*> blocktables_;
    246 
    247   int32 region_mask_;                 // Bitmask of available NUMA regions.
    248   int32 region_count_;                // Count of available NUMA regions.
    249   int32 region_[32];                  // Pagecount per region.
    250   int region_mode_;                   // What to do with NUMA hints?
    251   static const int kLocalNuma = 1;    // Target local memory.
    252   static const int kRemoteNuma = 2;   // Target remote memory.
    253 
    254   // Results.
    255   int64 errorcount_;                  // Total hardware incidents seen.
    256   int statuscount_;                   // Total test errors seen.
    257 
    258   // Thread type constants and types
    259   enum ThreadType {
    260     kMemoryType = 0,
    261     kFileIOType = 1,
    262     kNetIOType = 2,
    263     kNetSlaveType = 3,
    264     kCheckType = 4,
    265     kInvertType = 5,
    266     kDiskType = 6,
    267     kRandomDiskType = 7,
    268     kCPUType = 8,
    269     kErrorType = 9,
    270     kCCType = 10,
    271     kCPUFreqType = 11,
    272   };
    273 
    274   // Helper functions.
    275   virtual void AcquireWorkerLock();
    276   virtual void ReleaseWorkerLock();
    277   pthread_mutex_t worker_lock_;  // Lock access to the worker thread structure.
    278   typedef vector<WorkerThread*> WorkerVector;
    279   typedef map<int, WorkerVector*> WorkerMap;
    280   // Contains all worker threads.
    281   WorkerMap workers_map_;
    282   // Delay between power spikes.
    283   time_t pause_delay_;
    284   // The duration of each pause (for power spikes).
    285   time_t pause_duration_;
    286   // For the workers we pause and resume to create power spikes.
    287   WorkerStatus power_spike_status_;
    288   // For the workers we never pause.
    289   WorkerStatus continuous_status_;
    290 
    291   class OsLayer *os_;                   // Os abstraction: put hacks here.
    292   class PatternList *patternlist_;      // Access to global data patterns.
    293 
    294   // RunAnalysis methods
    295   void AnalysisAllStats();              // Summary of all runs.
    296   void MemoryStats();
    297   void FileStats();
    298   void NetStats();
    299   void CheckStats();
    300   void InvertStats();
    301   void DiskStats();
    302 
    303   void QueueStats();
    304 
    305   // Physical page use reporting.
    306   void AddrMapInit();
    307   void AddrMapUpdate(struct page_entry *pe);
    308   void AddrMapPrint();
    309 
    310   // additional memory data from google-specific tests.
    311   virtual void GoogleMemoryStats(float *memcopy_data,
    312                                  float *memcopy_bandwidth);
    313 
    314   virtual void GoogleOsOptions(std::map<std::string, std::string> *options);
    315 
    316   // Page queues, only one of (valid_+empty_) or (finelock_q_) will be used
    317   // at a time. A commandline switch controls which queue implementation will
    318   // be used.
    319   class PageEntryQueue *valid_;        // Page queue structure, valid pages.
    320   class PageEntryQueue *empty_;        // Page queue structure, free pages.
    321   class FineLockPEQueue *finelock_q_;  // Page queue with fine-grain locks
    322   Sat::PageQueueType pe_q_implementation_;   // Queue implementation switch
    323 
    324   DISALLOW_COPY_AND_ASSIGN(Sat);
    325 };
    326 
    327 Sat *SatFactory();
    328 
    329 #endif  // STRESSAPPTEST_SAT_H_
    330