llvm.org GIT mirror llvm / 5d42fba
[AMDGPU] Add insert nops pass based on subtarget features instead of cl::opt Also, - Skip pass if machine module does not have debug info - Minor comment changes - Added test Differential Revision: http://reviews.llvm.org/D19079 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266626 91177308-0d34-0410-b5e6-96231b3b80d8 Konstantin Zhuravlyov 4 years ago
6 changed file(s) with 118 addition(s) and 14 deletion(s). Raw diff Collapse all Expand all
314314 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
315315 FeatureSMemRealTime
316316 ]
317 >;
318
319 //===----------------------------------------------------------------------===//
320 // Debugger related subtarget features.
321 //===----------------------------------------------------------------------===//
322
323 def FeatureDebuggerInsertNops : SubtargetFeature<
324 "amdgpu-debugger-insert-nops",
325 "DebuggerInsertNops",
326 "true",
327 "Insert two nop instructions for each high level source statement"
317328 >;
318329
319330 //===----------------------------------------------------------------------===//
9696 HasSMemRealTime(false), Has16BitInsts(false),
9797 LDSBankCount(0),
9898 IsaVersion(ISAVersion0_0_0),
99 EnableSIScheduler(false), FrameLowering(nullptr),
99 EnableSIScheduler(false),
100 DebuggerInsertNops(false),
101 FrameLowering(nullptr),
100102 GISel(),
101103 InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
102104
9494 int LDSBankCount;
9595 unsigned IsaVersion;
9696 bool EnableSIScheduler;
97 bool DebuggerInsertNops;
9798
9899 std::unique_ptr FrameLowering;
99100 std::unique_ptr TLInfo;
303304 return EnableSIScheduler;
304305 }
305306
307 bool debuggerInsertNops() const {
308 return DebuggerInsertNops;
309 }
310
306311 bool dumpCode() const {
307312 return DumpCode;
308313 }
3030 #include "llvm/IR/Verifier.h"
3131 #include "llvm/MC/MCAsmInfo.h"
3232 #include "llvm/IR/LegacyPassManager.h"
33 #include "llvm/Support/CommandLine.h"
3433 #include "llvm/Support/TargetRegistry.h"
3534 #include "llvm/Support/raw_os_ostream.h"
3635 #include "llvm/Transforms/IPO.h"
147146 //===----------------------------------------------------------------------===//
148147
149148 namespace {
150
151 cl::opt InsertNops(
152 "amdgpu-insert-nops",
153 cl::desc("Insert two nop instructions for each high level source statement"),
154 cl::init(false));
155149
156150 class AMDGPUPassConfig : public TargetPassConfig {
157151 public:
396390 void GCNPassConfig::addPreEmitPass() {
397391 addPass(createSIInsertWaitsPass(), false);
398392 addPass(createSILowerControlFlowPass(), false);
399 if (InsertNops) {
393
394 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
395 if (ST.debuggerInsertNops()) {
400396 addPass(createSIInsertNopsPass(), false);
401397 }
402398 }
77 //===----------------------------------------------------------------------===//
88 //
99 /// \file
10 /// \brief Insert two S_NOP instructions for every high level source statement.
10 /// \brief Insert two nop instructions for each high level source statement.
1111 ///
1212 /// Tools, such as debugger, need to pause execution based on user input (i.e.
13 /// breakpoint). In order to do this, two S_NOP instructions are inserted for
14 /// each high level source statement: one before first isa instruction of high
15 /// level source statement, and one after last isa instruction of high level
16 /// source statement. Further, debugger may replace S_NOP instructions with
17 /// S_TRAP instructions based on user input.
13 /// breakpoint). In order to do this, two nop instructions are inserted for each
14 /// high level source statement: one before first isa instruction of high level
15 /// source statement, and one after last isa instruction of high level source
16 /// statement. Further, debugger may replace nop instructions with trap
17 /// instructions based on user input.
1818 //
1919 //===----------------------------------------------------------------------===//
2020
2323 #include "llvm/CodeGen/MachineFunction.h"
2424 #include "llvm/CodeGen/MachineFunctionPass.h"
2525 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineModuleInfo.h"
2627 using namespace llvm;
2728
2829 #define DEBUG_TYPE "si-insert-nops"
5253 }
5354
5455 bool SIInsertNops::runOnMachineFunction(MachineFunction &MF) {
56 // Skip machine functions without debug info.
57 if (!MF.getMMI().hasDebugInfo()) {
58 return false;
59 }
60
61 // Target instruction info.
5562 const SIInstrInfo *TII =
5663 static_cast(MF.getSubtarget().getInstrInfo());
5764
65 // Mapping from high level source statement line number to last corresponding
66 // isa instruction.
5867 DenseMap LineToInst;
68 // Insert nop instruction before first isa instruction of each high level
69 // source statement and collect last isa instruction for each high level
70 // source statement.
5971 for (auto MBB = MF.begin(); MBB != MF.end(); ++MBB) {
6072 for (auto MI = MBB->begin(); MI != MBB->end(); ++MI) {
6173 if (MI->isDebugValue() || !MI->getDebugLoc()) {
7385 }
7486 }
7587 }
88 // Insert nop instruction after last isa instruction of each high level source
89 // statement.
7690 for (auto LineToInstEntry = LineToInst.begin();
7791 LineToInstEntry != LineToInst.end(); ++LineToInstEntry) {
7892 auto MBB = LineToInstEntry->second->getParent();
8498 .addImm(0);
8599 }
86100 }
101 // Insert nop instruction before prologue.
87102 MachineBasicBlock &MBB = MF.front();
88103 MachineInstr &MI = MBB.front();
89104 BuildMI(MBB, MI, DebugLoc(), TII->get(AMDGPU::S_NOP))
0 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
1
2 ; CHECK: debugger_insert_nops.cl:2:3
3 ; CHECK-NEXT: s_nop 0
4 ; CHECK: debugger_insert_nops.cl:3:3
5 ; CHECK-NEXT: s_nop 0
6 ; CHECK: debugger_insert_nops.cl:4:3
7 ; CHECK-NEXT: s_nop 0
8 ; CHECK: debugger_insert_nops.cl:5:3
9 ; CHECK-NEXT: s_nop 0
10 ; CHECK: debugger_insert_nops.cl:6:1
11 ; CHECK-NEXT: s_nop 0
12 ; CHECK-NEXT: s_endpgm
13
14 ; Function Attrs: nounwind
15 define void @debugger_insert_nops(i32 addrspace(1)* %A) #0 !dbg !12 {
16 entry:
17 %A.addr = alloca i32 addrspace(1)*, align 4
18 store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
19 call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
20 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
21 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
22 store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
23 %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
24 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
25 store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23
26 %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !24
27 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2, !dbg !24
28 store i32 3, i32 addrspace(1)* %arrayidx2, align 4, !dbg !25
29 %3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !26
30 %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4, !dbg !26
31 store i32 4, i32 addrspace(1)* %arrayidx3, align 4, !dbg !27
32 ret void, !dbg !28
33 }
34
35 ; Function Attrs: nounwind readnone
36 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
37
38 attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
39 attributes #1 = { nounwind readnone }
40
41 !llvm.dbg.cu = !{!0}
42 !opencl.kernels = !{!3}
43 !llvm.module.flags = !{!9, !10}
44 !llvm.ident = !{!11}
45
46 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 266620)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
47 !1 = !DIFile(filename: "debugger_insert_nops.cl", directory: "/home/kzhuravl/Sandbox")
48 !2 = !{}
49 !3 = !{void (i32 addrspace(1)*)* @debugger_insert_nops, !4, !5, !6, !7, !8}
50 !4 = !{!"kernel_arg_addr_space", i32 1}
51 !5 = !{!"kernel_arg_access_qual", !"none"}
52 !6 = !{!"kernel_arg_type", !"int*"}
53 !7 = !{!"kernel_arg_base_type", !"int*"}
54 !8 = !{!"kernel_arg_type_qual", !""}
55 !9 = !{i32 2, !"Dwarf Version", i32 4}
56 !10 = !{i32 2, !"Debug Info Version", i32 3}
57 !11 = !{!"clang version 3.9.0 (trunk 266620)"}
58 !12 = distinct !DISubprogram(name: "debugger_insert_nops", scope: !1, file: !1, line: 1, type: !13, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
59 !13 = !DISubroutineType(types: !14)
60 !14 = !{null, !15}
61 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64, align: 32)
62 !16 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
63 !17 = !DILocalVariable(name: "A", arg: 1, scope: !12, file: !1, line: 1, type: !15)
64 !18 = !DIExpression()
65 !19 = !DILocation(line: 1, column: 46, scope: !12)
66 !20 = !DILocation(line: 2, column: 3, scope: !12)
67 !21 = !DILocation(line: 2, column: 8, scope: !12)
68 !22 = !DILocation(line: 3, column: 3, scope: !12)
69 !23 = !DILocation(line: 3, column: 8, scope: !12)
70 !24 = !DILocation(line: 4, column: 3, scope: !12)
71 !25 = !DILocation(line: 4, column: 8, scope: !12)
72 !26 = !DILocation(line: 5, column: 3, scope: !12)
73 !27 = !DILocation(line: 5, column: 8, scope: !12)
74 !28 = !DILocation(line: 6, column: 1, scope: !12)