1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ 2 |* 3 |* The LLVM Compiler Infrastructure 4 |* 5 |* This file is distributed under the University of Illinois Open Source 6 |* License. See LICENSE.TXT for details. 7 |* 8 \*===----------------------------------------------------------------------===*/ 9 10 #include "InstrProfiling.h" 11 #include "InstrProfilingInternal.h" 12 #include "InstrProfilingUtil.h" /* For PS4 getenv shim. */ 13 #include <limits.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #define INSTR_PROF_VALUE_PROF_DATA 18 #define INSTR_PROF_COMMON_API_IMPL 19 #include "InstrProfData.inc" 20 21 static int hasStaticCounters = 1; 22 static int OutOfNodesWarnings = 0; 23 static int hasNonDefaultValsPerSite = 0; 24 #define INSTR_PROF_MAX_VP_WARNS 10 25 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 8 26 #define INSTR_PROF_VNODE_POOL_SIZE 1024 27 28 #ifndef _MSC_VER 29 /* A shared static pool in addition to the vnodes statically 30 * allocated by the compiler. */ 31 COMPILER_RT_VISIBILITY ValueProfNode 32 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( 33 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME_STR); 34 #endif 35 36 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = 37 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; 38 39 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() { 40 const char *Str = 0; 41 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); 42 if (Str && Str[0]) { 43 VPMaxNumValsPerSite = atoi(Str); 44 hasNonDefaultValsPerSite = 1; 45 } 46 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) 47 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 48 } 49 50 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { 51 VPMaxNumValsPerSite = MaxVals; 52 hasNonDefaultValsPerSite = 1; 53 } 54 55 /* This method is only used in value profiler mock testing. */ 56 COMPILER_RT_VISIBILITY void 57 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, 58 uint32_t ValueKind, uint16_t NumValueSites) { 59 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; 60 } 61 62 /* This method is only used in value profiler mock testing. */ 63 COMPILER_RT_VISIBILITY const __llvm_profile_data * 64 __llvm_profile_iterate_data(const __llvm_profile_data *Data) { 65 return Data + 1; 66 } 67 68 /* This method is only used in value profiler mock testing. */ 69 COMPILER_RT_VISIBILITY void * 70 __llvm_get_function_addr(const __llvm_profile_data *Data) { 71 return Data->FunctionPointer; 72 } 73 74 /* Allocate an array that holds the pointers to the linked lists of 75 * value profile counter nodes. The number of element of the array 76 * is the total number of value profile sites instrumented. Returns 77 * 0 if allocation fails. 78 */ 79 80 static int allocateValueProfileCounters(__llvm_profile_data *Data) { 81 uint64_t NumVSites = 0; 82 uint32_t VKI; 83 84 /* This function will never be called when value site array is allocated 85 statically at compile time. */ 86 hasStaticCounters = 0; 87 /* When dynamic allocation is enabled, allow tracking the max number of 88 * values allowd. */ 89 if (!hasNonDefaultValsPerSite) 90 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 91 92 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) 93 NumVSites += Data->NumValueSites[VKI]; 94 95 ValueProfNode **Mem = 96 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); 97 if (!Mem) 98 return 0; 99 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { 100 free(Mem); 101 return 0; 102 } 103 return 1; 104 } 105 106 static ValueProfNode *allocateOneNode(__llvm_profile_data *Data, uint32_t Index, 107 uint64_t Value) { 108 ValueProfNode *Node; 109 110 if (!hasStaticCounters) 111 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); 112 113 /* Early check to avoid value wrapping around. */ 114 if (CurrentVNode + 1 > EndVNode) { 115 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { 116 PROF_WARN("Unable to track new values: %s. " 117 " Consider using option -mllvm -vp-counters-per-site=<n> to " 118 "allocate more" 119 " value profile counters at compile time. \n", 120 "Running out of static counters"); 121 } 122 return 0; 123 } 124 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); 125 /* Due to section padding, EndVNode point to a byte which is one pass 126 * an incomplete VNode, so we need to skip the last incomplete node. */ 127 if (Node + 1 > EndVNode) 128 return 0; 129 130 return Node; 131 } 132 133 COMPILER_RT_VISIBILITY void 134 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, 135 uint32_t CounterIndex) { 136 __llvm_profile_data *PData = (__llvm_profile_data *)Data; 137 if (!PData) 138 return; 139 140 if (!PData->Values) { 141 if (!allocateValueProfileCounters(PData)) 142 return; 143 } 144 145 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; 146 ValueProfNode *PrevVNode = NULL; 147 ValueProfNode *MinCountVNode = NULL; 148 ValueProfNode *CurVNode = ValueCounters[CounterIndex]; 149 uint64_t MinCount = UINT64_MAX; 150 151 uint8_t VDataCount = 0; 152 while (CurVNode) { 153 if (TargetValue == CurVNode->Value) { 154 CurVNode->Count++; 155 return; 156 } 157 if (CurVNode->Count < MinCount) { 158 MinCount = CurVNode->Count; 159 MinCountVNode = CurVNode; 160 } 161 PrevVNode = CurVNode; 162 CurVNode = CurVNode->Next; 163 ++VDataCount; 164 } 165 166 if (VDataCount >= VPMaxNumValsPerSite) { 167 /* Bump down the min count node's count. If it reaches 0, 168 * evict it. This eviction/replacement policy makes hot 169 * targets more sticky while cold targets less so. In other 170 * words, it makes it less likely for the hot targets to be 171 * prematurally evicted during warmup/establishment period, 172 * when their counts are still low. In a special case when 173 * the number of values tracked is reduced to only one, this 174 * policy will guarantee that the dominating target with >50% 175 * total count will survive in the end. Note that this scheme 176 * allows the runtime to track the min count node in an adaptive 177 * manner. It can correct previous mistakes and eventually 178 * lock on a cold target that is alread in stable state. 179 * 180 * In very rare cases, this replacement scheme may still lead 181 * to target loss. For instance, out of \c N value slots, \c N-1 182 * slots are occupied by luke warm targets during the warmup 183 * period and the remaining one slot is competed by two or more 184 * very hot targets. If those hot targets occur in an interleaved 185 * way, none of them will survive (gain enough weight to throw out 186 * other established entries) due to the ping-pong effect. 187 * To handle this situation, user can choose to increase the max 188 * number of tracked values per value site. Alternatively, a more 189 * expensive eviction mechanism can be implemented. It requires 190 * the runtime to track the total number of evictions per-site. 191 * When the total number of evictions reaches certain threshold, 192 * the runtime can wipe out more than one lowest count entries 193 * to give space for hot targets. 194 */ 195 if (!(--MinCountVNode->Count)) { 196 CurVNode = MinCountVNode; 197 CurVNode->Value = TargetValue; 198 CurVNode->Count++; 199 } 200 return; 201 } 202 203 CurVNode = allocateOneNode(PData, CounterIndex, TargetValue); 204 if (!CurVNode) 205 return; 206 CurVNode->Value = TargetValue; 207 CurVNode->Count++; 208 209 uint32_t Success = 0; 210 if (!ValueCounters[CounterIndex]) 211 Success = 212 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); 213 else if (PrevVNode && !PrevVNode->Next) 214 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); 215 216 if (!Success && !hasStaticCounters) { 217 free(CurVNode); 218 return; 219 } 220 } 221 222 /* 223 * A wrapper struct that represents value profile runtime data. 224 * Like InstrProfRecord class which is used by profiling host tools, 225 * ValueProfRuntimeRecord also implements the abstract intefaces defined in 226 * ValueProfRecordClosure so that the runtime data can be serialized using 227 * shared C implementation. 228 */ 229 typedef struct ValueProfRuntimeRecord { 230 const __llvm_profile_data *Data; 231 ValueProfNode **NodesKind[IPVK_Last + 1]; 232 uint8_t **SiteCountArray; 233 } ValueProfRuntimeRecord; 234 235 /* ValueProfRecordClosure Interface implementation. */ 236 237 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { 238 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; 239 } 240 241 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { 242 uint32_t S = 0, I; 243 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 244 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) 245 return 0; 246 for (I = 0; I < Record->Data->NumValueSites[VK]; I++) 247 S += Record->SiteCountArray[VK][I]; 248 return S; 249 } 250 251 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, 252 uint32_t S) { 253 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 254 return Record->SiteCountArray[VK][S]; 255 } 256 257 static ValueProfRuntimeRecord RTRecord; 258 static ValueProfRecordClosure RTRecordClosure = { 259 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ 260 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, 261 INSTR_PROF_NULLPTR, /* RemapValueData */ 262 INSTR_PROF_NULLPTR, /* GetValueForSite, */ 263 INSTR_PROF_NULLPTR /* AllocValueProfData */ 264 }; 265 266 static uint32_t 267 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, 268 uint8_t *SiteCountArray[]) { 269 unsigned I, J, S = 0, NumValueKinds = 0; 270 ValueProfNode **Nodes = (ValueProfNode **)Data->Values; 271 RTRecord.Data = Data; 272 RTRecord.SiteCountArray = SiteCountArray; 273 for (I = 0; I <= IPVK_Last; I++) { 274 uint16_t N = Data->NumValueSites[I]; 275 if (!N) 276 continue; 277 278 NumValueKinds++; 279 280 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; 281 for (J = 0; J < N; J++) { 282 /* Compute value count for each site. */ 283 uint32_t C = 0; 284 ValueProfNode *Site = 285 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; 286 while (Site) { 287 C++; 288 Site = Site->Next; 289 } 290 if (C > UCHAR_MAX) 291 C = UCHAR_MAX; 292 RTRecord.SiteCountArray[I][J] = C; 293 } 294 S += N; 295 } 296 return NumValueKinds; 297 } 298 299 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, 300 InstrProfValueData *Dst, 301 ValueProfNode *StartNode, uint32_t N) { 302 unsigned I; 303 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; 304 for (I = 0; I < N; I++) { 305 Dst[I].Value = VNode->Value; 306 Dst[I].Count = VNode->Count; 307 VNode = VNode->Next; 308 } 309 return VNode; 310 } 311 312 static uint32_t getValueProfDataSizeWrapper(void) { 313 return getValueProfDataSize(&RTRecordClosure); 314 } 315 316 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { 317 return getNumValueDataForSiteRT(&RTRecord, VK, S); 318 } 319 320 static VPDataReaderType TheVPDataReader = { 321 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, 322 getFirstValueProfRecord, getNumValueDataForSiteWrapper, 323 getValueProfDataSizeWrapper, getNextNValueData}; 324 325 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() { 326 return &TheVPDataReader; 327 } 328