2 #include "sanitizer_common/sanitizer_common.h"
4 #include "xray_interface_internal.h"
17 static std::pair<ssize_t, bool>
18 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
19 auto BytesToRead = std::distance(Begin, End);
21 ssize_t TotalBytesRead = 0;
22 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
23 if (BytesRead == -1) {
26 Report("Read error; errno = %d\n", errno);
27 return std::make_pair(TotalBytesRead, false);
30 TotalBytesRead += BytesRead;
31 BytesToRead -= BytesRead;
34 return std::make_pair(TotalBytesRead, true);
37 static bool readValueFromFile(const char *Filename,
38 long long *Value) XRAY_NEVER_INSTRUMENT {
39 int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
42 static constexpr size_t BufSize = 256;
43 char Line[BufSize] = {};
46 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
51 long long Tmp = internal_simple_strtoll(Line, &End, 10);
53 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
60 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
61 long long TSCFrequency = -1;
62 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
65 } else if (readValueFromFile(
66 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
70 Report("Unable to determine CPU frequency for TSC accounting.\n");
72 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
75 static constexpr uint8_t CallOpCode = 0xe8;
76 static constexpr uint16_t MovR10Seq = 0xba41;
77 static constexpr uint16_t Jmp9Seq = 0x09eb;
78 static constexpr uint16_t Jmp20Seq = 0x14eb;
79 static constexpr uint16_t Jmp15Seq = 0x0feb;
80 static constexpr uint8_t JmpOpCode = 0xe9;
81 static constexpr uint8_t RetOpCode = 0xc3;
82 static constexpr uint16_t NopwSeq = 0x9066;
84 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
85 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
87 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
88 const XRaySledEntry &Sled,
89 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
90 // Here we do the dance of replacing the following sled:
96 // With the following:
98 // mov r10d, <function id>
99 // call <relative 32bit offset to entry trampoline>
101 // We need to do this in the following order:
103 // 1. Put the function id first, 2 bytes from the start of the sled (just
104 // after the 2-byte jmp instruction).
105 // 2. Put the call opcode 6 bytes from the start of the sled.
106 // 3. Put the relative offset 7 bytes from the start of the sled.
107 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
108 // opcode and first operand.
110 // Prerequisite is to compute the relative offset to the trampoline's address.
111 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
112 (static_cast<int64_t>(Sled.Address) + 11);
113 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
114 Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
115 Trampoline, reinterpret_cast<void *>(Sled.Address));
119 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
120 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
121 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
122 std::atomic_store_explicit(
123 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
124 std::memory_order_release);
126 std::atomic_store_explicit(
127 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
128 std::memory_order_release);
129 // FIXME: Write out the nops still?
134 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
135 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
136 // Here we do the dance of replacing the following sled:
142 // With the following:
144 // mov r10d, <function id>
145 // jmp <relative 32bit offset to exit trampoline>
147 // 1. Put the function id first, 2 bytes from the start of the sled (just
148 // after the 1-byte ret instruction).
149 // 2. Put the jmp opcode 6 bytes from the start of the sled.
150 // 3. Put the relative offset 7 bytes from the start of the sled.
151 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
152 // opcode and first operand.
154 // Prerequisite is to compute the relative offset fo the
155 // __xray_FunctionExit function's address.
156 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
157 (static_cast<int64_t>(Sled.Address) + 11);
158 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
159 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
160 __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
164 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
165 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
166 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
167 std::atomic_store_explicit(
168 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
169 std::memory_order_release);
171 std::atomic_store_explicit(
172 reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
173 std::memory_order_release);
174 // FIXME: Write out the nops still?
179 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
180 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
181 // Here we do the dance of replacing the tail call sled with a similar
182 // sequence as the entry sled, but calls the tail exit sled instead.
183 int64_t TrampolineOffset =
184 reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
185 (static_cast<int64_t>(Sled.Address) + 11);
186 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
187 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
188 __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
192 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
193 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
194 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
195 std::atomic_store_explicit(
196 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
197 std::memory_order_release);
199 std::atomic_store_explicit(
200 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
201 std::memory_order_release);
202 // FIXME: Write out the nops still?
207 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
208 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
209 // Here we do the dance of replacing the following sled:
214 // jmp +20 // 2 bytes
217 // With the following:
223 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
229 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
230 // to a jmp, use 15 bytes instead.
233 std::atomic_store_explicit(
234 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
235 std::memory_order_release);
237 switch (Sled.Version) {
239 std::atomic_store_explicit(
240 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
241 std::memory_order_release);
245 std::atomic_store_explicit(
246 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
247 std::memory_order_release);
254 // We determine whether the CPU we're running on has the correct features we
255 // need. In x86_64 this will be rdtscp support.
256 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
257 unsigned int EAX, EBX, ECX, EDX;
259 // We check whether rdtscp support is enabled. According to the x86_64 manual,
260 // level should be set at 0x80000001, and we should have a look at bit 27 in
261 // EDX. That's 0x8000000 (or 1u << 27).
262 __get_cpuid(0x80000001, &EAX, &EBX, &ECX, &EDX);
263 if (!(EDX & (1u << 27))) {
264 Report("Missing rdtscp support.\n");
267 // Also check whether we can determine the CPU frequency, since if we cannot,
268 // we should use the emulated TSC instead.
269 if (!getTSCFrequency()) {
270 Report("Unable to determine CPU frequency.\n");
276 } // namespace __xray