llvm.org GIT mirror llvm / 916b366
[XRay] ARM 32-bit no-Thumb support in LLVM This is a port of XRay to ARM 32-bit, without Thumb support yet. The XRay instrumentation support is moving up to AsmPrinter. This is one of 3 commits to different repositories of XRay ARM port. The other 2 are: https://reviews.llvm.org/D23932 (Clang test) https://reviews.llvm.org/D23933 (compiler-rt) Differential Revision: https://reviews.llvm.org/D23931 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281878 91177308-0d34-0410-b5e6-96231b3b80d8 Dean Michael Berris 4 years ago
17 changed file(s) with 328 addition(s) and 63 deletion(s). Raw diff Collapse all Expand all
183183 MCSymbol *getSymbol(const GlobalValue *GV) const;
184184
185185 //===------------------------------------------------------------------===//
186 // XRay instrumentation implementation.
187 //===------------------------------------------------------------------===//
188 public:
189 // This describes the kind of sled we're storing in the XRay table.
190 enum class SledKind : uint8_t {
191 FUNCTION_ENTER = 0,
192 FUNCTION_EXIT = 1,
193 TAIL_CALL = 2,
194 };
195
196 // The table will contain these structs that point to the sled, the function
197 // containing the sled, and what kind of sled (and whether they should always
198 // be instrumented).
199 struct XRayFunctionEntry {
200 const MCSymbol *Sled;
201 const MCSymbol *Function;
202 SledKind Kind;
203 bool AlwaysInstrument;
204 const class Function *Fn;
205 };
206
207 // All the sleds to be emitted.
208 std::vector Sleds;
209
210 // Helper function to record a given XRay sled.
211 void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
212
213 //===------------------------------------------------------------------===//
186214 // MachineFunctionPass Implementation.
187215 //===------------------------------------------------------------------===//
188216
976976 def PATCHABLE_RET : Instruction {
977977 let OutOperandList = (outs unknown:$dst);
978978 let InOperandList = (ins variable_ops);
979 let AsmString = "# XRay Function Exit.";
979 let AsmString = "# XRay Function Patchable RET.";
980980 let usesCustomInserter = 1;
981981 let hasSideEffects = 1;
982982 let isReturn = 1;
983 }
984 def PATCHABLE_FUNCTION_EXIT : Instruction {
985 let OutOperandList = (outs);
986 let InOperandList = (ins);
987 let AsmString = "# XRay Function Exit.";
988 let usesCustomInserter = 1;
989 let hasSideEffects = 0; // FIXME: is this correct?
990 let isReturn = 0; // Original return instruction will follow
983991 }
984992 def PATCHABLE_TAIL_CALL : Instruction {
985993 let OutOperandList = (outs unknown:$dst);
152152 /// Wraps a return instruction and its operands to enable adding nop sleds
153153 /// either before or after the return. The nop sleds are useful for inserting
154154 /// instrumentation instructions at runtime.
155 /// The patch here replaces the return instruction.
155156 HANDLE_TARGET_OPCODE(PATCHABLE_RET)
157
158 /// This is a marker instruction which gets translated into a nop sled, useful
159 /// for inserting instrumentation instructions at runtime.
160 /// The patch here prepends the return instruction.
161 /// The same thing as in x86_64 is not possible for ARM because it has multiple
162 /// return instructions. Furthermore, CPU allows parametrized and even
163 /// conditional return instructions. In the current ARM implementation we are
164 /// making use of the fact that currently LLVM doesn't seem to generate
165 /// conditional return instructions.
166 /// On ARM, the same instruction can be used for popping multiple registers
167 /// from the stack and returning (it just pops pc register too), and LLVM
168 /// generates it sometimes. So we can't insert the sled between this stack
169 /// adjustment and the return without splitting the original instruction into 2
170 /// instructions. So on ARM, rather than jumping into the exit trampoline, we
171 /// call it, it does the tracing, preserves the stack and returns.
172 HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
156173
157174 /// Wraps a tail call instruction and its operands to enable adding nop sleds
158175 /// either before or after the tail exit. We use this as a disambiguation from
7070
7171 virtual ~TargetSubtargetInfo();
7272
73 virtual bool isXRaySupported() const { return false; }
74
7375 // Interfaces to the major aspects of target machine information:
7476 //
7577 // -- Instruction opcode and operand information
25992599 AsmPrinterHandler::~AsmPrinterHandler() {}
26002600
26012601 void AsmPrinterHandler::markFunctionEnd() {}
2602
2603 void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
2604 SledKind Kind) {
2605 auto Fn = MI.getParent()->getParent()->getFunction();
2606 auto Attr = Fn->getFnAttribute("function-instrument");
2607 bool AlwaysInstrument =
2608 Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
2609 Sleds.emplace_back(
2610 XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
2611 }
3333 }
3434
3535 bool runOnMachineFunction(MachineFunction &MF) override;
36
37 private:
38 // Replace the original RET instruction with the exit sled code ("patchable
39 // ret" pseudo-instruction), so that at runtime XRay can replace the sled
40 // with a code jumping to XRay trampoline, which calls the tracing handler
41 // and, in the end, issues the RET instruction.
42 // This is the approach to go on CPUs which have a single RET instruction,
43 // like x86/x86_64.
44 void replaceRetWithPatchableRet(MachineFunction &MF,
45 const TargetInstrInfo *TII);
46 // Prepend the original return instruction with the exit sled code ("patchable
47 // function exit" pseudo-instruction), preserving the original return
48 // instruction just after the exit sled code.
49 // This is the approach to go on CPUs which have multiple options for the
50 // return instruction, like ARM. For such CPUs we can't just jump into the
51 // XRay trampoline and issue a single return instruction there. We rather
52 // have to call the trampoline and return from it to the original return
53 // instruction of the function being instrumented.
54 void prependRetWithPatchableExit(MachineFunction &MF,
55 const TargetInstrInfo *TII);
3656 };
37 }
57 } // anonymous namespace
3858
39 bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
40 auto &F = *MF.getFunction();
41 auto InstrAttr = F.getFnAttribute("function-instrument");
42 bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
43 InstrAttr.isStringAttribute() &&
44 InstrAttr.getValueAsString() == "xray-always";
45 Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
46 unsigned XRayThreshold = 0;
47 if (!AlwaysInstrument) {
48 if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
49 return false; // XRay threshold attribute not found.
50 if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
51 return false; // Invalid value for threshold.
52 if (F.size() < XRayThreshold)
53 return false; // Function is too small.
54 }
55
56 // FIXME: Do the loop triviality analysis here or in an earlier pass.
57
58 // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
59 // MachineFunction.
60 auto &FirstMBB = *MF.begin();
61 auto &FirstMI = *FirstMBB.begin();
62 auto *TII = MF.getSubtarget().getInstrInfo();
63 BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
64 TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
65
66 // Then we look for *all* terminators and returns, then replace those with
59 void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
60 const TargetInstrInfo *TII)
61 {
62 // We look for *all* terminators and returns, then replace those with
6763 // PATCHABLE_RET instructions.
6864 SmallVector Terminators;
6965 for (auto &MBB : MF) {
9187
9288 for (auto &I : Terminators)
9389 I->eraseFromParent();
90 }
9491
92 void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
93 const TargetInstrInfo *TII)
94 {
95 for (auto &MBB : MF) {
96 for (auto &T : MBB.terminators()) {
97 if (T.isReturn()) {
98 // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT
99 BuildMI(MBB, T, T.getDebugLoc(),
100 TII->get(TargetOpcode::PATCHABLE_FUNCTION_EXIT));
101 }
102 }
103 }
104 }
105
106 bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
107 auto &F = *MF.getFunction();
108 auto InstrAttr = F.getFnAttribute("function-instrument");
109 bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
110 InstrAttr.isStringAttribute() &&
111 InstrAttr.getValueAsString() == "xray-always";
112 Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
113 unsigned XRayThreshold = 0;
114 if (!AlwaysInstrument) {
115 if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
116 return false; // XRay threshold attribute not found.
117 if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
118 return false; // Invalid value for threshold.
119 if (F.size() < XRayThreshold)
120 return false; // Function is too small.
121 }
122
123 auto &FirstMBB = *MF.begin();
124 auto &FirstMI = *FirstMBB.begin();
125
126 if (!MF.getSubtarget().isXRaySupported()) {
127 FirstMI.emitError("An attempt to perform XRay instrumentation for an"
128 " unsupported target.");
129 return false;
130 }
131
132 // FIXME: Do the loop triviality analysis here or in an earlier pass.
133
134 // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
135 // MachineFunction.
136 auto *TII = MF.getSubtarget().getInstrInfo();
137 BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
138 TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
139
140 switch (MF.getTarget().getTargetTriple().getArch()) {
141 case Triple::ArchType::arm:
142 case Triple::ArchType::thumb:
143 // For the architectures which don't have a single return instruction
144 prependRetWithPatchableExit(MF, TII);
145 break;
146 default:
147 // For the architectures that have a single return instruction (such as
148 // RETQ on x86_64).
149 replaceRetWithPatchableRet(MF, TII);
150 break;
151 }
95152 return true;
96153 }
97154
162162
163163 // Emit the rest of the function body.
164164 EmitFunctionBody();
165
166 // Emit the XRay table for this function.
167 EmitXRayTable();
165168
166169 // If we need V4T thumb mode Register Indirect Jump pads, emit them.
167170 // These are created per function, rather than per TU, since it's
20182021 .addReg(0));
20192022 return;
20202023 }
2024 case ARM::PATCHABLE_FUNCTION_ENTER:
2025 LowerPATCHABLE_FUNCTION_ENTER(*MI);
2026 return;
2027 case ARM::PATCHABLE_FUNCTION_EXIT:
2028 LowerPATCHABLE_FUNCTION_EXIT(*MI);
2029 return;
20212030 }
20222031
20232032 MCInst TmpInst;
100100 // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
101101 bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
102102
103 //===------------------------------------------------------------------===//
104 // XRay implementation
105 //===------------------------------------------------------------------===//
106 public:
107 // XRay-specific lowering for ARM.
108 void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
109 void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
110 // Helper function that emits the XRay sleds we've collected for a particular
111 // function.
112 void EmitXRayTable();
113
103114 private:
115 void EmitSled(const MachineInstr &MI, SledKind Kind);
104116
105117 // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
106118 void emitAttributes();
9898 public:
9999 // Return whether the target has an explicit NOP encoding.
100100 bool hasNOP() const;
101
102 virtual void getNoopForElfTarget(MCInst &NopInst) const {
103 getNoopForMachoTarget(NopInst);
104 }
101105
102106 // Return the non-pre/post incrementing version of 'Opc'. Return 0
103107 // if there is not such an opcode.
2020 #include "llvm/IR/Mangler.h"
2121 #include "llvm/MC/MCExpr.h"
2222 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCSymbolELF.h"
25 #include "llvm/MC/MCSectionELF.h"
26 #include "llvm/MC/MCInstBuilder.h"
27 #include "llvm/MC/MCStreamer.h"
2328 using namespace llvm;
2429
2530
149154 }
150155 }
151156 }
157
158 void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
159 {
160 if (MI.getParent()->getParent()->getInfo()
161 ->isThumbFunction())
162 {
163 MI.emitError("An attempt to perform XRay instrumentation for a"
164 " Thumb function (not supported). Detected when emitting a sled.");
165 return;
166 }
167 static const int8_t NoopsInSledCount = 6;
168 // We want to emit the following pattern:
169 //
170 // .Lxray_sled_N:
171 // ALIGN
172 // B #20
173 // ; 6 NOP instructions (24 bytes)
174 // .tmpN
175 //
176 // We need the 24 bytes (6 instructions) because at runtime, we'd be patching
177 // over the full 28 bytes (7 instructions) with the following pattern:
178 //
179 // PUSH{ r0, lr }
180 // MOVW r0, #
181 // MOVT r0, #
182 // MOVW ip, #
183 // MOVT ip, #
184 // BLX ip
185 // POP{ r0, lr }
186 //
187 OutStreamer->EmitCodeAlignment(4);
188 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
189 OutStreamer->EmitLabel(CurSled);
190 auto Target = OutContext.createTempSymbol();
191
192 // Emit "B #20" instruction, which jumps over the next 24 bytes (because
193 // register pc is 8 bytes ahead of the jump instruction by the moment CPU
194 // is executing it).
195 // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
196 // It is not clear why |addReg(0)| is needed (the last operand).
197 EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
198 .addImm(ARMCC::AL).addReg(0));
199
200 MCInst Noop;
201 Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
202 for (int8_t I = 0; I < NoopsInSledCount; I++)
203 {
204 OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
205 }
206
207 OutStreamer->EmitLabel(Target);
208 recordSled(CurSled, MI, Kind);
209 }
210
211 void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
212 {
213 EmitSled(MI, SledKind::FUNCTION_ENTER);
214 }
215
216 void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
217 {
218 EmitSled(MI, SledKind::FUNCTION_EXIT);
219 }
220
221 void ARMAsmPrinter::EmitXRayTable()
222 {
223 if (Sleds.empty())
224 return;
225 if (Subtarget->isTargetELF()) {
226 auto *Section = OutContext.getELFSection(
227 "xray_instr_map", ELF::SHT_PROGBITS,
228 ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
229 CurrentFnSym->getName());
230 auto PrevSection = OutStreamer->getCurrentSectionOnly();
231 OutStreamer->SwitchSection(Section);
232 for (const auto &Sled : Sleds) {
233 OutStreamer->EmitSymbolValue(Sled.Sled, 4);
234 OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
235 auto Kind = static_cast(Sled.Kind);
236 OutStreamer->EmitBytes(
237 StringRef(reinterpret_cast(&Kind), 1));
238 OutStreamer->EmitBytes(
239 StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1));
240 OutStreamer->EmitZeros(6);
241 }
242 OutStreamer->SwitchSection(PrevSection);
243 }
244 Sleds.clear();
245 }
100100 : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
101101 TLInfo(TM, *this) {}
102102
103 bool ARMSubtarget::isXRaySupported() const {
104 // We don't currently suppport Thumb, but Windows requires Thumb.
105 return hasV6Ops() && hasARMOps() && !isTargetWindows();
106 }
107
103108 void ARMSubtarget::initializeEnvironment() {
104109 // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
105110 // directly from it, but we can try to make sure they're consistent when both
539539 }
540540 bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
541541
542 virtual bool isXRaySupported() const override;
543
542544 bool isAPCS_ABI() const;
543545 bool isAAPCS_ABI() const;
544546 bool isAAPCS16_ABI() const;
7070
7171 StackMapShadowTracker SMShadowTracker;
7272
73 // This describes the kind of sled we're storing in the XRay table.
74 enum class SledKind : uint8_t {
75 FUNCTION_ENTER = 0,
76 FUNCTION_EXIT = 1,
77 TAIL_CALL = 2,
78 };
79
80 // The table will contain these structs that point to the sled, the function
81 // containing the sled, and what kind of sled (and whether they should always
82 // be instrumented).
83 struct XRayFunctionEntry {
84 const MCSymbol *Sled;
85 const MCSymbol *Function;
86 SledKind Kind;
87 bool AlwaysInstrument;
88 const class Function *Fn;
89 };
90
91 // All the sleds to be emitted.
92 std::vector Sleds;
93
9473 // All instructions emitted by the X86AsmPrinter should use this helper
9574 // method.
9675 //
11695 // function.
11796 void EmitXRayTable();
11897
119 // Helper function to record a given XRay sled.
120 void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
12198 public:
12299 explicit X86AsmPrinter(TargetMachine &TM,
123100 std::unique_ptr Streamer)
10171017
10181018 EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
10191019 getSubtargetInfo());
1020 }
1021
1022 void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
1023 SledKind Kind) {
1024 auto Fn = MI.getParent()->getParent()->getFunction();
1025 auto Attr = Fn->getFnAttribute("function-instrument");
1026 bool AlwaysInstrument =
1027 Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
1028 Sleds.emplace_back(
1029 XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn});
10301020 }
10311021
10321022 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
459459 bool hasPKU() const { return HasPKU; }
460460 bool hasMPX() const { return HasMPX; }
461461
462 virtual bool isXRaySupported() const override { return is64Bit(); }
463
462464 bool isAtom() const { return X86ProcFamily == IntelAtom; }
463465 bool isSLM() const { return X86ProcFamily == IntelSLM; }
464466 bool useSoftFloat() const { return UseSoftFloat; }
0 ; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s
1
2 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
3 ; CHECK-LABEL: Lxray_sled_0:
4 ; CHECK-NEXT: b #20
5 ; CHECK-NEXT: mov r0, r0
6 ; CHECK-NEXT: mov r0, r0
7 ; CHECK-NEXT: mov r0, r0
8 ; CHECK-NEXT: mov r0, r0
9 ; CHECK-NEXT: mov r0, r0
10 ; CHECK-NEXT: mov r0, r0
11 ; CHECK-LABEL: Ltmp0:
12 ret i32 0
13 ; CHECK-LABEL: Lxray_sled_1:
14 ; CHECK-NEXT: b #20
15 ; CHECK-NEXT: mov r0, r0
16 ; CHECK-NEXT: mov r0, r0
17 ; CHECK-NEXT: mov r0, r0
18 ; CHECK-NEXT: mov r0, r0
19 ; CHECK-NEXT: mov r0, r0
20 ; CHECK-NEXT: mov r0, r0
21 ; CHECK-LABEL: Ltmp1:
22 ; CHECK-NEXT: bx lr
23 }
0 ; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
1
2 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
3 ; CHECK-LABEL: Lxray_sled_0:
4 ; CHECK-NEXT: b #20
5 ; CHECK-NEXT: nop
6 ; CHECK-NEXT: nop
7 ; CHECK-NEXT: nop
8 ; CHECK-NEXT: nop
9 ; CHECK-NEXT: nop
10 ; CHECK-NEXT: nop
11 ; CHECK-LABEL: Ltmp0:
12 ret i32 0
13 ; CHECK-LABEL: Lxray_sled_1:
14 ; CHECK-NEXT: b #20
15 ; CHECK-NEXT: nop
16 ; CHECK-NEXT: nop
17 ; CHECK-NEXT: nop
18 ; CHECK-NEXT: nop
19 ; CHECK-NEXT: nop
20 ; CHECK-NEXT: nop
21 ; CHECK-LABEL: Ltmp1:
22 ; CHECK-NEXT: bx lr
23 }