Home | History | Annotate | Download | only in profile
      1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
      2 |*
      3 |*                     The LLVM Compiler Infrastructure
      4 |*
      5 |* This file is distributed under the University of Illinois Open Source
      6 |* License. See LICENSE.TXT for details.
      7 |*
      8 \*===----------------------------------------------------------------------===*/
      9 
     10 #include "InstrProfiling.h"
     11 #include "InstrProfilingInternal.h"
     12 #include "InstrProfilingUtil.h" /* For PS4 getenv shim. */
     13 #include <limits.h>
     14 #include <stdio.h>
     15 #include <stdlib.h>
     16 #include <string.h>
     17 #define INSTR_PROF_VALUE_PROF_DATA
     18 #define INSTR_PROF_COMMON_API_IMPL
     19 #include "InstrProfData.inc"
     20 
     21 static int hasStaticCounters = 1;
     22 static int OutOfNodesWarnings = 0;
     23 static int hasNonDefaultValsPerSite = 0;
     24 #define INSTR_PROF_MAX_VP_WARNS 10
     25 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 8
     26 #define INSTR_PROF_VNODE_POOL_SIZE 1024
     27 
     28 #ifndef _MSC_VER
     29 /* A shared static pool in addition to the vnodes statically
     30  * allocated by the compiler.  */
     31 COMPILER_RT_VISIBILITY ValueProfNode
     32     lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
     33        COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR);
     34 #endif
     35 
     36 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
     37     INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
     38 
     39 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
     40   const char *Str = 0;
     41   Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
     42   if (Str && Str[0]) {
     43     VPMaxNumValsPerSite = atoi(Str);
     44     hasNonDefaultValsPerSite = 1;
     45   }
     46   if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
     47     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
     48 }
     49 
     50 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
     51   VPMaxNumValsPerSite = MaxVals;
     52   hasNonDefaultValsPerSite = 1;
     53 }
     54 
     55 /* This method is only used in value profiler mock testing.  */
     56 COMPILER_RT_VISIBILITY void
     57 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
     58                                    uint32_t ValueKind, uint16_t NumValueSites) {
     59   *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
     60 }
     61 
     62 /* This method is only used in value profiler mock testing.  */
     63 COMPILER_RT_VISIBILITY const __llvm_profile_data *
     64 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
     65   return Data + 1;
     66 }
     67 
     68 /* This method is only used in value profiler mock testing.  */
     69 COMPILER_RT_VISIBILITY void *
     70 __llvm_get_function_addr(const __llvm_profile_data *Data) {
     71   return Data->FunctionPointer;
     72 }
     73 
     74 /* Allocate an array that holds the pointers to the linked lists of
     75  * value profile counter nodes. The number of element of the array
     76  * is the total number of value profile sites instrumented. Returns
     77  * 0 if allocation fails.
     78  */
     79 
     80 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
     81   uint64_t NumVSites = 0;
     82   uint32_t VKI;
     83 
     84   /* This function will never be called when value site array is allocated
     85      statically at compile time.  */
     86   hasStaticCounters = 0;
     87   /* When dynamic allocation is enabled, allow tracking the max number of
     88    * values allowd.  */
     89   if (!hasNonDefaultValsPerSite)
     90     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
     91 
     92   for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
     93     NumVSites += Data->NumValueSites[VKI];
     94 
     95   ValueProfNode **Mem =
     96       (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
     97   if (!Mem)
     98     return 0;
     99   if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
    100     free(Mem);
    101     return 0;
    102   }
    103   return 1;
    104 }
    105 
    106 static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index,
    107                                       uint64_t Value) {
    108   ValueProfNode *Node;
    109 
    110   if (!hasStaticCounters)
    111     return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
    112 
    113   /* Early check to avoid value wrapping around.  */
    114   if (CurrentVNode + 1 > EndVNode) {
    115     if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
    116       PROF_WARN("Unable to track new values: %s. "
    117                 " Consider using option -mllvm -vp-counters-per-site=<n> to "
    118                 "allocate more"
    119                 " value profile counters at compile time. \n",
    120                 "Running out of static counters");
    121     }
    122     return 0;
    123   }
    124   Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
    125   /* Due to section padding, EndVNode point to a byte which is one pass
    126    * an incomplete VNode, so we need to skip the last incomplete node. */
    127   if (Node + 1 > EndVNode)
    128     return 0;
    129 
    130   return Node;
    131 }
    132 
    133 COMPILER_RT_VISIBILITY void
    134 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
    135                                  uint32_t CounterIndex) {
    136   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
    137   if (!PData)
    138     return;
    139 
    140   if (!PData->Values) {
    141     if (!allocateValueProfileCounters(PData))
    142       return;
    143   }
    144 
    145   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
    146   ValueProfNode *PrevVNode = NULL;
    147   ValueProfNode *MinCountVNode = NULL;
    148   ValueProfNode *CurVNode = ValueCounters[CounterIndex];
    149   uint64_t MinCount = UINT64_MAX;
    150 
    151   uint8_t VDataCount = 0;
    152   while (CurVNode) {
    153     if (TargetValue == CurVNode->Value) {
    154       CurVNode->Count++;
    155       return;
    156     }
    157     if (CurVNode->Count < MinCount) {
    158       MinCount = CurVNode->Count;
    159       MinCountVNode = CurVNode;
    160     }
    161     PrevVNode = CurVNode;
    162     CurVNode = CurVNode->Next;
    163     ++VDataCount;
    164   }
    165 
    166   if (VDataCount >= VPMaxNumValsPerSite) {
    167     /* Bump down the min count node's count. If it reaches 0,
    168      * evict it. This eviction/replacement policy makes hot
    169      * targets more sticky while cold targets less so. In other
    170      * words, it makes it less likely for the hot targets to be
    171      * prematurally evicted during warmup/establishment period,
    172      * when their counts are still low. In a special case when
    173      * the number of values tracked is reduced to only one, this
    174      * policy will guarantee that the dominating target with >50%
    175      * total count will survive in the end. Note that this scheme
    176      * allows the runtime to track the min count node in an adaptive
    177      * manner. It can correct previous mistakes and eventually
    178      * lock on a cold target that is alread in stable state.
    179      *
    180      * In very rare cases,  this replacement scheme may still lead
    181      * to target loss. For instance, out of \c N value slots, \c N-1
    182      * slots are occupied by luke warm targets during the warmup
    183      * period and the remaining one slot is competed by two or more
    184      * very hot targets. If those hot targets occur in an interleaved
    185      * way, none of them will survive (gain enough weight to throw out
    186      * other established entries) due to the ping-pong effect.
    187      * To handle this situation, user can choose to increase the max
    188      * number of tracked values per value site. Alternatively, a more
    189      * expensive eviction mechanism can be implemented. It requires
    190      * the runtime to track the total number of evictions per-site.
    191      * When the total number of evictions reaches certain threshold,
    192      * the runtime can wipe out more than one lowest count entries
    193      * to give space for hot targets.
    194      */
    195     if (!(--MinCountVNode->Count)) {
    196       CurVNode = MinCountVNode;
    197       CurVNode->Value = TargetValue;
    198       CurVNode->Count++;
    199     }
    200     return;
    201   }
    202 
    203   CurVNode = allocateOneNode(PData, CounterIndex, TargetValue);
    204   if (!CurVNode)
    205     return;
    206   CurVNode->Value = TargetValue;
    207   CurVNode->Count++;
    208 
    209   uint32_t Success = 0;
    210   if (!ValueCounters[CounterIndex])
    211     Success =
    212         COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
    213   else if (PrevVNode && !PrevVNode->Next)
    214     Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
    215 
    216   if (!Success && !hasStaticCounters) {
    217     free(CurVNode);
    218     return;
    219   }
    220 }
    221 
    222 /*
    223  * A wrapper struct that represents value profile runtime data.
    224  * Like InstrProfRecord class which is used by profiling host tools,
    225  * ValueProfRuntimeRecord also implements the abstract intefaces defined in
    226  * ValueProfRecordClosure so that the runtime data can be serialized using
    227  * shared C implementation.
    228  */
    229 typedef struct ValueProfRuntimeRecord {
    230   const __llvm_profile_data *Data;
    231   ValueProfNode **NodesKind[IPVK_Last + 1];
    232   uint8_t **SiteCountArray;
    233 } ValueProfRuntimeRecord;
    234 
    235 /* ValueProfRecordClosure Interface implementation. */
    236 
    237 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
    238   return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
    239 }
    240 
    241 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
    242   uint32_t S = 0, I;
    243   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
    244   if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
    245     return 0;
    246   for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
    247     S += Record->SiteCountArray[VK][I];
    248   return S;
    249 }
    250 
    251 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
    252                                          uint32_t S) {
    253   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
    254   return Record->SiteCountArray[VK][S];
    255 }
    256 
    257 static ValueProfRuntimeRecord RTRecord;
    258 static ValueProfRecordClosure RTRecordClosure = {
    259     &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
    260     getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
    261     INSTR_PROF_NULLPTR, /* RemapValueData */
    262     INSTR_PROF_NULLPTR, /* GetValueForSite, */
    263     INSTR_PROF_NULLPTR  /* AllocValueProfData */
    264 };
    265 
    266 static uint32_t
    267 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
    268                                  uint8_t *SiteCountArray[]) {
    269   unsigned I, J, S = 0, NumValueKinds = 0;
    270   ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
    271   RTRecord.Data = Data;
    272   RTRecord.SiteCountArray = SiteCountArray;
    273   for (I = 0; I <= IPVK_Last; I++) {
    274     uint16_t N = Data->NumValueSites[I];
    275     if (!N)
    276       continue;
    277 
    278     NumValueKinds++;
    279 
    280     RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
    281     for (J = 0; J < N; J++) {
    282       /* Compute value count for each site. */
    283       uint32_t C = 0;
    284       ValueProfNode *Site =
    285           Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
    286       while (Site) {
    287         C++;
    288         Site = Site->Next;
    289       }
    290       if (C > UCHAR_MAX)
    291         C = UCHAR_MAX;
    292       RTRecord.SiteCountArray[I][J] = C;
    293     }
    294     S += N;
    295   }
    296   return NumValueKinds;
    297 }
    298 
    299 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
    300                                         InstrProfValueData *Dst,
    301                                         ValueProfNode *StartNode, uint32_t N) {
    302   unsigned I;
    303   ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
    304   for (I = 0; I < N; I++) {
    305     Dst[I].Value = VNode->Value;
    306     Dst[I].Count = VNode->Count;
    307     VNode = VNode->Next;
    308   }
    309   return VNode;
    310 }
    311 
    312 static uint32_t getValueProfDataSizeWrapper(void) {
    313   return getValueProfDataSize(&RTRecordClosure);
    314 }
    315 
    316 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
    317   return getNumValueDataForSiteRT(&RTRecord, VK, S);
    318 }
    319 
    320 static VPDataReaderType TheVPDataReader = {
    321     initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
    322     getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
    323     getValueProfDataSizeWrapper,      getNextNValueData};
    324 
    325 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
    326   return &TheVPDataReader;
    327 }
    328