2 #include "sanitizer_common/sanitizer_common.h"
4 #include "sanitizer_common/sanitizer_posix.h"
7 #include "xray_interface_internal.h"
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
10 #include <sys/types.h>
13 #include <machine/cpu.h>
15 #include <sys/sysctl.h>
16 #elif SANITIZER_FUCHSIA
17 #include <zircon/syscalls.h>
32 static std::pair<ssize_t, bool>
33 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
34 auto BytesToRead = std::distance(Begin, End);
36 ssize_t TotalBytesRead = 0;
37 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
38 if (BytesRead == -1) {
41 Report("Read error; errno = %d\n", errno);
42 return std::make_pair(TotalBytesRead, false);
45 TotalBytesRead += BytesRead;
46 BytesToRead -= BytesRead;
49 return std::make_pair(TotalBytesRead, true);
52 static bool readValueFromFile(const char *Filename,
53 long long *Value) XRAY_NEVER_INSTRUMENT {
54 int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
57 static constexpr size_t BufSize = 256;
58 char Line[BufSize] = {};
61 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
65 const char *End = nullptr;
66 long long Tmp = internal_simple_strtoll(Line, &End, 10);
68 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
76 long long TSCFrequency = -1;
77 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
80 } else if (readValueFromFile(
81 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
85 Report("Unable to determine CPU frequency for TSC accounting.\n");
87 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
91 long long TSCFrequency = -1;
92 size_t tscfreqsz = sizeof(TSCFrequency);
94 int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
95 if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
97 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
98 &tscfreqsz, NULL, 0) != -1) {
101 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
104 return static_cast<uint64_t>(TSCFrequency);
106 Report("Unable to determine CPU frequency for TSC accounting.\n");
111 #elif !SANITIZER_FUCHSIA
112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
118 static constexpr uint8_t CallOpCode = 0xe8;
119 static constexpr uint16_t MovR10Seq = 0xba41;
120 static constexpr uint16_t Jmp9Seq = 0x09eb;
121 static constexpr uint16_t Jmp20Seq = 0x14eb;
122 static constexpr uint16_t Jmp15Seq = 0x0feb;
123 static constexpr uint8_t JmpOpCode = 0xe9;
124 static constexpr uint8_t RetOpCode = 0xc3;
125 static constexpr uint16_t NopwSeq = 0x9066;
127 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
128 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
130 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
131 const XRaySledEntry &Sled,
132 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
133 // Here we do the dance of replacing the following sled:
139 // With the following:
141 // mov r10d, <function id>
142 // call <relative 32bit offset to entry trampoline>
144 // We need to do this in the following order:
146 // 1. Put the function id first, 2 bytes from the start of the sled (just
147 // after the 2-byte jmp instruction).
148 // 2. Put the call opcode 6 bytes from the start of the sled.
149 // 3. Put the relative offset 7 bytes from the start of the sled.
150 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
151 // opcode and first operand.
153 // Prerequisite is to compute the relative offset to the trampoline's address.
154 const uint64_t Address = Sled.address();
155 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
156 (static_cast<int64_t>(Address) + 11);
157 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
158 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline,
159 reinterpret_cast<void *>(Address));
163 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
164 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
165 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
166 std::atomic_store_explicit(
167 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
168 std::memory_order_release);
170 std::atomic_store_explicit(
171 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
172 std::memory_order_release);
173 // FIXME: Write out the nops still?
178 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
179 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
180 // Here we do the dance of replacing the following sled:
186 // With the following:
188 // mov r10d, <function id>
189 // jmp <relative 32bit offset to exit trampoline>
191 // 1. Put the function id first, 2 bytes from the start of the sled (just
192 // after the 1-byte ret instruction).
193 // 2. Put the jmp opcode 6 bytes from the start of the sled.
194 // 3. Put the relative offset 7 bytes from the start of the sled.
195 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
196 // opcode and first operand.
198 // Prerequisite is to compute the relative offset fo the
199 // __xray_FunctionExit function's address.
200 const uint64_t Address = Sled.address();
201 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
202 (static_cast<int64_t>(Address) + 11);
203 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
204 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
205 __xray_FunctionExit, reinterpret_cast<void *>(Address));
209 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
210 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
211 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
212 std::atomic_store_explicit(
213 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
214 std::memory_order_release);
216 std::atomic_store_explicit(
217 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
218 std::memory_order_release);
219 // FIXME: Write out the nops still?
224 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
225 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
226 // Here we do the dance of replacing the tail call sled with a similar
227 // sequence as the entry sled, but calls the tail exit sled instead.
228 const uint64_t Address = Sled.address();
229 int64_t TrampolineOffset =
230 reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
231 (static_cast<int64_t>(Address) + 11);
232 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
233 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
234 __xray_FunctionTailExit, reinterpret_cast<void *>(Address));
238 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
239 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
240 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
241 std::atomic_store_explicit(
242 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
243 std::memory_order_release);
245 std::atomic_store_explicit(
246 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
247 std::memory_order_release);
248 // FIXME: Write out the nops still?
253 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
254 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
255 // Here we do the dance of replacing the following sled:
260 // jmp +20 // 2 bytes
263 // With the following:
269 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
273 // In Version 1 or 2:
275 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
276 // to a jmp, use 15 bytes instead.
278 const uint64_t Address = Sled.address();
280 std::atomic_store_explicit(
281 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
282 std::memory_order_release);
284 switch (Sled.Version) {
287 std::atomic_store_explicit(
288 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
289 std::memory_order_release);
293 std::atomic_store_explicit(
294 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
295 std::memory_order_release);
302 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
303 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
304 // Here we do the dance of replacing the following sled:
307 // jmp +20 // 2 byte instruction
310 // With the following:
316 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
317 // The 20 byte sled stashes three argument registers, calls the trampoline,
318 // unstashes the registers and returns. If the arguments are already in
319 // the correct registers, the stashing and unstashing become equivalently
321 const uint64_t Address = Sled.address();
323 std::atomic_store_explicit(
324 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
325 std::memory_order_release);
327 std::atomic_store_explicit(
328 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
329 std::memory_order_release);
334 #if !SANITIZER_FUCHSIA
335 // We determine whether the CPU we're running on has the correct features we
336 // need. In x86_64 this will be rdtscp support.
337 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
338 unsigned int EAX, EBX, ECX, EDX;
340 // We check whether rdtscp support is enabled. According to the x86_64 manual,
341 // level should be set at 0x80000001, and we should have a look at bit 27 in
342 // EDX. That's 0x8000000 (or 1u << 27).
343 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
345 if (!(EDX & (1u << 27))) {
346 Report("Missing rdtscp support.\n");
349 // Also check whether we can determine the CPU frequency, since if we cannot,
350 // we should use the emulated TSC instead.
351 if (!getTSCFrequency()) {
352 Report("Unable to determine CPU frequency.\n");
359 } // namespace __xray