1 //===-- xray_arm.cc ---------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of XRay, a dynamic runtime instrumentation system.
12 // Implementation of ARM-specific routines (32-bit).
14 //===----------------------------------------------------------------------===//
15 #include "sanitizer_common/sanitizer_common.h"
16 #include "xray_defs.h"
17 #include "xray_emulate_tsc.h"
18 #include "xray_interface_internal.h"
22 extern "C" void __clear_cache(void* start, void* end);
26 uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
27 // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
28 // not have a constant frequency like TSC on x86[_64]; it may go faster or
29 // slower depending on CPU's turbo or power saving modes. Furthermore, to
30 // read from CP15 on ARM a kernel modification or a driver is needed.
31 // We can not require this from users of compiler-rt.
32 // So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
33 // To get the measurements per second, we scale this by the number of
34 // nanoseconds per second, pretending that the TSC frequency is 1GHz and
35 // one TSC tick is 1 nanosecond.
36 return NanosecondsPerSecond;
39 // The machine codes for some instructions used in runtime patching.
40 enum class PatchOpcodes : uint32_t {
41 PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
42 PO_BlxIp = 0xE12FFF3C, // BLX ip
43 PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
44 PO_B20 = 0xEA000005 // B #20
47 // 0xUUUUWXYZ -> 0x000W0XYZ
48 inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
49 return (Value & 0xfff) | ((Value & 0xf000) << 4);
52 // 0xWXYZUUUU -> 0x000W0XYZ
53 inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT {
54 return getMovwMask(Value >> 16);
57 // Writes the following instructions:
58 // MOVW R<regNo>, #<lower 16 bits of the |Value|>
59 // MOVT R<regNo>, #<higher 16 bits of the |Value|>
60 inline static uint32_t *
61 write32bitLoadReg(uint8_t regNo, uint32_t *Address,
62 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
63 // This is a fatal error: we cannot just report it and continue execution.
64 assert(regNo <= 15 && "Register number must be 0 to 15.");
65 // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
66 *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value));
68 // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
69 *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value));
73 // Writes the following instructions:
74 // MOVW r0, #<lower 16 bits of the |Value|>
75 // MOVT r0, #<higher 16 bits of the |Value|>
76 inline static uint32_t *
77 Write32bitLoadR0(uint32_t *Address,
78 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
79 return write32bitLoadReg(0, Address, Value);
82 // Writes the following instructions:
83 // MOVW ip, #<lower 16 bits of the |Value|>
84 // MOVT ip, #<higher 16 bits of the |Value|>
85 inline static uint32_t *
86 Write32bitLoadIP(uint32_t *Address,
87 const uint32_t Value) XRAY_NEVER_INSTRUMENT {
88 return write32bitLoadReg(12, Address, Value);
91 inline static bool patchSled(const bool Enable, const uint32_t FuncId,
92 const XRaySledEntry &Sled,
93 void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
94 // When |Enable| == true,
95 // We replace the following compile-time stub (sled):
101 // With the following runtime patch:
105 // MOVW r0, #<lower 16 bits of function ID>
106 // MOVT r0, #<higher 16 bits of function ID>
107 // MOVW ip, #<lower 16 bits of address of TracingHook>
108 // MOVT ip, #<higher 16 bits of address of TracingHook>
112 // Replacement of the first 4-byte instruction should be the last and atomic
113 // operation, so that the user code which reaches the sled concurrently
114 // either jumps over the whole sled, or executes the whole sled when the
117 // When |Enable|==false, we set back the first instruction in the sled to be
120 uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
121 uint32_t *CurAddress = FirstAddress + 1;
124 Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
126 Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
127 *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
129 *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
131 std::atomic_store_explicit(
132 reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
133 uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
135 std::atomic_store_explicit(
136 reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
137 uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
139 __clear_cache(reinterpret_cast<char*>(FirstAddress),
140 reinterpret_cast<char*>(CurAddress));
144 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
145 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
146 return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
149 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
150 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
151 return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
154 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
155 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
156 // FIXME: In the future we'd need to distinguish between non-tail exits and
157 // tail exits for better information preservation.
158 return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
161 } // namespace __xray