1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 \*===----------------------------------------------------------------------===*/
14 #include "InstrProfiling.h"
15 #include "InstrProfilingInternal.h"
16 #include "InstrProfilingUtil.h"
18 #define INSTR_PROF_VALUE_PROF_DATA
19 #define INSTR_PROF_COMMON_API_IMPL
20 #include "profile/InstrProfData.inc"
22 static int hasStaticCounters = 1;
23 static int OutOfNodesWarnings = 0;
24 static int hasNonDefaultValsPerSite = 0;
25 #define INSTR_PROF_MAX_VP_WARNS 10
26 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
27 #define INSTR_PROF_VNODE_POOL_SIZE 1024
30 /* A shared static pool in addition to the vnodes statically
31 * allocated by the compiler. */
32 COMPILER_RT_VISIBILITY ValueProfNode
33 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
34 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
38 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
40 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
42 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
44 VPMaxNumValsPerSite = atoi(Str);
45 hasNonDefaultValsPerSite = 1;
47 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
48 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
52 VPMaxNumValsPerSite = MaxVals;
53 hasNonDefaultValsPerSite = 1;
56 /* This method is only used in value profiler mock testing. */
57 COMPILER_RT_VISIBILITY void
58 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
59 uint32_t ValueKind, uint16_t NumValueSites) {
60 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
63 /* This method is only used in value profiler mock testing. */
64 COMPILER_RT_VISIBILITY const __llvm_profile_data *
65 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
69 /* This method is only used in value profiler mock testing. */
70 COMPILER_RT_VISIBILITY void *
71 __llvm_get_function_addr(const __llvm_profile_data *Data) {
72 return Data->FunctionPointer;
75 /* Allocate an array that holds the pointers to the linked lists of
76 * value profile counter nodes. The number of element of the array
77 * is the total number of value profile sites instrumented. Returns
78 * 0 if allocation fails.
81 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
82 uint64_t NumVSites = 0;
85 /* This function will never be called when value site array is allocated
86 statically at compile time. */
87 hasStaticCounters = 0;
88 /* When dynamic allocation is enabled, allow tracking the max number of
90 if (!hasNonDefaultValsPerSite)
91 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
93 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
94 NumVSites += Data->NumValueSites[VKI];
97 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
100 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
107 static ValueProfNode *allocateOneNode(void) {
110 if (!hasStaticCounters)
111 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
113 /* Early check to avoid value wrapping around. */
114 if (CurrentVNode + 1 > EndVNode) {
115 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
116 PROF_WARN("Unable to track new values: %s. "
117 " Consider using option -mllvm -vp-counters-per-site=<n> to "
119 " value profile counters at compile time. \n",
120 "Running out of static counters");
124 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
125 /* Due to section padding, EndVNode point to a byte which is one pass
126 * an incomplete VNode, so we need to skip the last incomplete node. */
127 if (Node + 1 > EndVNode)
133 static COMPILER_RT_ALWAYS_INLINE void
134 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
135 uint32_t CounterIndex, uint64_t CountValue) {
136 __llvm_profile_data *PData = (__llvm_profile_data *)Data;
141 if (!PData->Values) {
142 if (!allocateValueProfileCounters(PData))
146 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
147 ValueProfNode *PrevVNode = NULL;
148 ValueProfNode *MinCountVNode = NULL;
149 ValueProfNode *CurVNode = ValueCounters[CounterIndex];
150 uint64_t MinCount = UINT64_MAX;
152 uint8_t VDataCount = 0;
154 if (TargetValue == CurVNode->Value) {
155 CurVNode->Count += CountValue;
158 if (CurVNode->Count < MinCount) {
159 MinCount = CurVNode->Count;
160 MinCountVNode = CurVNode;
162 PrevVNode = CurVNode;
163 CurVNode = CurVNode->Next;
167 if (VDataCount >= VPMaxNumValsPerSite) {
168 /* Bump down the min count node's count. If it reaches 0,
169 * evict it. This eviction/replacement policy makes hot
170 * targets more sticky while cold targets less so. In other
171 * words, it makes it less likely for the hot targets to be
172 * prematurally evicted during warmup/establishment period,
173 * when their counts are still low. In a special case when
174 * the number of values tracked is reduced to only one, this
175 * policy will guarantee that the dominating target with >50%
176 * total count will survive in the end. Note that this scheme
177 * allows the runtime to track the min count node in an adaptive
178 * manner. It can correct previous mistakes and eventually
179 * lock on a cold target that is alread in stable state.
181 * In very rare cases, this replacement scheme may still lead
182 * to target loss. For instance, out of \c N value slots, \c N-1
183 * slots are occupied by luke warm targets during the warmup
184 * period and the remaining one slot is competed by two or more
185 * very hot targets. If those hot targets occur in an interleaved
186 * way, none of them will survive (gain enough weight to throw out
187 * other established entries) due to the ping-pong effect.
188 * To handle this situation, user can choose to increase the max
189 * number of tracked values per value site. Alternatively, a more
190 * expensive eviction mechanism can be implemented. It requires
191 * the runtime to track the total number of evictions per-site.
192 * When the total number of evictions reaches certain threshold,
193 * the runtime can wipe out more than one lowest count entries
194 * to give space for hot targets.
196 if (MinCountVNode->Count <= CountValue) {
197 CurVNode = MinCountVNode;
198 CurVNode->Value = TargetValue;
199 CurVNode->Count = CountValue;
201 MinCountVNode->Count -= CountValue;
206 CurVNode = allocateOneNode();
209 CurVNode->Value = TargetValue;
210 CurVNode->Count += CountValue;
212 uint32_t Success = 0;
213 if (!ValueCounters[CounterIndex])
215 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
216 else if (PrevVNode && !PrevVNode->Next)
217 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
219 if (!Success && !hasStaticCounters) {
225 COMPILER_RT_VISIBILITY void
226 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
227 uint32_t CounterIndex) {
228 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
230 COMPILER_RT_VISIBILITY void
231 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
232 uint32_t CounterIndex,
233 uint64_t CountValue) {
234 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
238 * The target values are partitioned into multiple regions/ranges. There is one
239 * contiguous region which is precise -- every value in the range is tracked
240 * individually. A value outside the precise region will be collapsed into one
241 * value depending on the region it falls in.
243 * There are three regions:
244 * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong
245 * to one region -- all values here should be mapped to one value of
246 * "PreciseRangeLast + 1".
247 * 2. [PreciseRangeStart, PreciseRangeLast]
248 * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue.
250 * The range for large values is optional. The default value of INT64_MIN
251 * indicates it is not specified.
253 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
254 uint64_t TargetValue, void *Data, uint32_t CounterIndex,
255 int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
257 if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue)
258 TargetValue = LargeValue;
259 else if ((int64_t)TargetValue < PreciseRangeStart ||
260 (int64_t)TargetValue > PreciseRangeLast)
261 TargetValue = PreciseRangeLast + 1;
263 __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
267 * A wrapper struct that represents value profile runtime data.
268 * Like InstrProfRecord class which is used by profiling host tools,
269 * ValueProfRuntimeRecord also implements the abstract intefaces defined in
270 * ValueProfRecordClosure so that the runtime data can be serialized using
271 * shared C implementation.
273 typedef struct ValueProfRuntimeRecord {
274 const __llvm_profile_data *Data;
275 ValueProfNode **NodesKind[IPVK_Last + 1];
276 uint8_t **SiteCountArray;
277 } ValueProfRuntimeRecord;
279 /* ValueProfRecordClosure Interface implementation. */
281 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
282 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
285 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
287 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
288 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
290 for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
291 S += Record->SiteCountArray[VK][I];
295 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
297 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
298 return Record->SiteCountArray[VK][S];
301 static ValueProfRuntimeRecord RTRecord;
302 static ValueProfRecordClosure RTRecordClosure = {
303 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */
304 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT,
305 INSTR_PROF_NULLPTR, /* RemapValueData */
306 INSTR_PROF_NULLPTR, /* GetValueForSite, */
307 INSTR_PROF_NULLPTR /* AllocValueProfData */
311 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
312 uint8_t *SiteCountArray[]) {
313 unsigned I, J, S = 0, NumValueKinds = 0;
314 ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
315 RTRecord.Data = Data;
316 RTRecord.SiteCountArray = SiteCountArray;
317 for (I = 0; I <= IPVK_Last; I++) {
318 uint16_t N = Data->NumValueSites[I];
324 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
325 for (J = 0; J < N; J++) {
326 /* Compute value count for each site. */
328 ValueProfNode *Site =
329 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
336 RTRecord.SiteCountArray[I][J] = C;
340 return NumValueKinds;
343 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
344 InstrProfValueData *Dst,
345 ValueProfNode *StartNode, uint32_t N) {
347 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
348 for (I = 0; I < N; I++) {
349 Dst[I].Value = VNode->Value;
350 Dst[I].Count = VNode->Count;
356 static uint32_t getValueProfDataSizeWrapper(void) {
357 return getValueProfDataSize(&RTRecordClosure);
360 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
361 return getNumValueDataForSiteRT(&RTRecord, VK, S);
364 static VPDataReaderType TheVPDataReader = {
365 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
366 getFirstValueProfRecord, getNumValueDataForSiteWrapper,
367 getValueProfDataSizeWrapper, getNextNValueData};
369 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
370 return &TheVPDataReader;