Home | History | Annotate | Download | only in lib
      1 //===-- Clustering.h --------------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// Utilities to compute benchmark result clusters.
     12 ///
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
     16 #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
     17 
     18 #include "BenchmarkResult.h"
     19 #include "llvm/Support/Error.h"
     20 #include <vector>
     21 
     22 namespace exegesis {
     23 
     24 class InstructionBenchmarkClustering {
     25 public:
     26   // Clusters `Points` using DBSCAN with the given parameters. See the cc file
     27   // for more explanations on the algorithm.
     28   static llvm::Expected<InstructionBenchmarkClustering>
     29   create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
     30          double Epsilon);
     31 
     32   class ClusterId {
     33   public:
     34     static ClusterId noise() { return ClusterId(kNoise); }
     35     static ClusterId error() { return ClusterId(kError); }
     36     static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
     37     ClusterId() : Id_(kUndef) {}
     38     bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
     39     bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
     40 
     41     bool isValid() const { return Id_ <= kMaxValid; }
     42     bool isUndef() const { return Id_ == kUndef; }
     43     bool isNoise() const { return Id_ == kNoise; }
     44     bool isError() const { return Id_ == kError; }
     45 
     46     // Precondition: isValid().
     47     size_t getId() const {
     48       assert(isValid());
     49       return Id_;
     50     }
     51 
     52   private:
     53     explicit ClusterId(size_t Id) : Id_(Id) {}
     54     static constexpr const size_t kMaxValid =
     55         std::numeric_limits<size_t>::max() - 4;
     56     static constexpr const size_t kNoise = kMaxValid + 1;
     57     static constexpr const size_t kError = kMaxValid + 2;
     58     static constexpr const size_t kUndef = kMaxValid + 3;
     59     size_t Id_;
     60   };
     61 
     62   struct Cluster {
     63     Cluster() = delete;
     64     explicit Cluster(const ClusterId &Id) : Id(Id) {}
     65 
     66     const ClusterId Id;
     67     // Indices of benchmarks within the cluster.
     68     std::vector<int> PointIndices;
     69   };
     70 
     71   ClusterId getClusterIdForPoint(size_t P) const {
     72     return ClusterIdForPoint_[P];
     73   }
     74 
     75   const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
     76 
     77   const Cluster &getCluster(ClusterId Id) const {
     78     assert(!Id.isUndef() && "unlabeled cluster");
     79     if (Id.isNoise()) {
     80       return NoiseCluster_;
     81     }
     82     if (Id.isError()) {
     83       return ErrorCluster_;
     84     }
     85     return Clusters_[Id.getId()];
     86   }
     87 
     88   const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
     89 
     90   // Returns true if the given point is within a distance Epsilon of each other.
     91   bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
     92                    const std::vector<BenchmarkMeasure> &Q) const;
     93 
     94 private:
     95   InstructionBenchmarkClustering(
     96       const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
     97   llvm::Error validateAndSetup();
     98   void dbScan(size_t MinPts);
     99   std::vector<size_t> rangeQuery(size_t Q) const;
    100 
    101   const std::vector<InstructionBenchmark> &Points_;
    102   const double EpsilonSquared_;
    103   int NumDimensions_ = 0;
    104   // ClusterForPoint_[P] is the cluster id for Points[P].
    105   std::vector<ClusterId> ClusterIdForPoint_;
    106   std::vector<Cluster> Clusters_;
    107   Cluster NoiseCluster_;
    108   Cluster ErrorCluster_;
    109 };
    110 
    111 } // namespace exegesis
    112 
    113 #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
    114