2 #include "sanitizer_common/sanitizer_common.h"
4 #include "xray_interface_internal.h"
6 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD
10 #include <machine/cpu.h>
12 #include <sys/sysctl.h>
27 static std::pair<ssize_t, bool>
28 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
29 auto BytesToRead = std::distance(Begin, End);
31 ssize_t TotalBytesRead = 0;
32 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
33 if (BytesRead == -1) {
36 Report("Read error; errno = %d\n", errno);
37 return std::make_pair(TotalBytesRead, false);
40 TotalBytesRead += BytesRead;
41 BytesToRead -= BytesRead;
44 return std::make_pair(TotalBytesRead, true);
47 static bool readValueFromFile(const char *Filename,
48 long long *Value) XRAY_NEVER_INSTRUMENT {
49 int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
52 static constexpr size_t BufSize = 256;
53 char Line[BufSize] = {};
56 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
60 const char *End = nullptr;
61 long long Tmp = internal_simple_strtoll(Line, &End, 10);
63 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
70 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
71 long long TSCFrequency = -1;
72 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
75 } else if (readValueFromFile(
76 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
80 Report("Unable to determine CPU frequency for TSC accounting.\n");
82 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
84 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD
85 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
86 long long TSCFrequency = -1;
87 size_t tscfreqsz = sizeof(TSCFrequency);
89 int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
90 if (sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
93 if (sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
96 return static_cast<uint64_t>(TSCFrequency);
98 Report("Unable to determine CPU frequency for TSC accounting.\n");
104 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
110 static constexpr uint8_t CallOpCode = 0xe8;
111 static constexpr uint16_t MovR10Seq = 0xba41;
112 static constexpr uint16_t Jmp9Seq = 0x09eb;
113 static constexpr uint16_t Jmp20Seq = 0x14eb;
114 static constexpr uint16_t Jmp15Seq = 0x0feb;
115 static constexpr uint8_t JmpOpCode = 0xe9;
116 static constexpr uint8_t RetOpCode = 0xc3;
117 static constexpr uint16_t NopwSeq = 0x9066;
119 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
120 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
122 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
123 const XRaySledEntry &Sled,
124 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
125 // Here we do the dance of replacing the following sled:
131 // With the following:
133 // mov r10d, <function id>
134 // call <relative 32bit offset to entry trampoline>
136 // We need to do this in the following order:
138 // 1. Put the function id first, 2 bytes from the start of the sled (just
139 // after the 2-byte jmp instruction).
140 // 2. Put the call opcode 6 bytes from the start of the sled.
141 // 3. Put the relative offset 7 bytes from the start of the sled.
142 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
143 // opcode and first operand.
145 // Prerequisite is to compute the relative offset to the trampoline's address.
146 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
147 (static_cast<int64_t>(Sled.Address) + 11);
148 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
149 Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
150 Trampoline, reinterpret_cast<void *>(Sled.Address));
154 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
155 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
156 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
157 std::atomic_store_explicit(
158 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
159 std::memory_order_release);
161 std::atomic_store_explicit(
162 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
163 std::memory_order_release);
164 // FIXME: Write out the nops still?
169 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
170 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
171 // Here we do the dance of replacing the following sled:
177 // With the following:
179 // mov r10d, <function id>
180 // jmp <relative 32bit offset to exit trampoline>
182 // 1. Put the function id first, 2 bytes from the start of the sled (just
183 // after the 1-byte ret instruction).
184 // 2. Put the jmp opcode 6 bytes from the start of the sled.
185 // 3. Put the relative offset 7 bytes from the start of the sled.
186 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
187 // opcode and first operand.
189 // Prerequisite is to compute the relative offset fo the
190 // __xray_FunctionExit function's address.
191 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
192 (static_cast<int64_t>(Sled.Address) + 11);
193 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
194 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
195 __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
199 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
200 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
201 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
202 std::atomic_store_explicit(
203 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
204 std::memory_order_release);
206 std::atomic_store_explicit(
207 reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
208 std::memory_order_release);
209 // FIXME: Write out the nops still?
214 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
215 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
216 // Here we do the dance of replacing the tail call sled with a similar
217 // sequence as the entry sled, but calls the tail exit sled instead.
218 int64_t TrampolineOffset =
219 reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
220 (static_cast<int64_t>(Sled.Address) + 11);
221 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
222 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
223 __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
227 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
228 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
229 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
230 std::atomic_store_explicit(
231 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
232 std::memory_order_release);
234 std::atomic_store_explicit(
235 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
236 std::memory_order_release);
237 // FIXME: Write out the nops still?
242 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
243 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
244 // Here we do the dance of replacing the following sled:
249 // jmp +20 // 2 bytes
252 // With the following:
258 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
264 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
265 // to a jmp, use 15 bytes instead.
268 std::atomic_store_explicit(
269 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
270 std::memory_order_release);
272 switch (Sled.Version) {
274 std::atomic_store_explicit(
275 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
276 std::memory_order_release);
280 std::atomic_store_explicit(
281 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
282 std::memory_order_release);
289 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
290 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
291 // Here we do the dance of replacing the following sled:
294 // jmp +20 // 2 byte instruction
297 // With the following:
303 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
304 // The 20 byte sled stashes three argument registers, calls the trampoline,
305 // unstashes the registers and returns. If the arguments are already in
306 // the correct registers, the stashing and unstashing become equivalently
309 std::atomic_store_explicit(
310 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
311 std::memory_order_release);
313 std::atomic_store_explicit(
314 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
315 std::memory_order_release);
320 // We determine whether the CPU we're running on has the correct features we
321 // need. In x86_64 this will be rdtscp support.
322 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
323 unsigned int EAX, EBX, ECX, EDX;
325 // We check whether rdtscp support is enabled. According to the x86_64 manual,
326 // level should be set at 0x80000001, and we should have a look at bit 27 in
327 // EDX. That's 0x8000000 (or 1u << 27).
328 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
330 if (!(EDX & (1u << 27))) {
331 Report("Missing rdtscp support.\n");
334 // Also check whether we can determine the CPU frequency, since if we cannot,
335 // we should use the emulated TSC instead.
336 if (!getTSCFrequency()) {
337 Report("Unable to determine CPU frequency.\n");
343 } // namespace __xray