llvm.org GIT mirror llvm / 3499304
[AMDGPU] Turn s_and_saveexec_b64 into s_and_b64 if result is unused With SI_END_CF elimination for some nested control flow we can now eliminate saved exec register completely by turning a saveexec version of instruction into just a logical instruction. Differential Revision: https://reviews.llvm.org/D36007 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309766 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 2 years ago
4 changed file(s) with 214 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
8181 }
8282 case AMDGPU::S_MOV_B64_term:
8383 llvm_unreachable("should have been replaced");
84 }
85
86 return AMDGPU::NoRegister;
87 }
88
89 /// If \p MI is a logical operation on an exec value,
90 /// return the register copied to.
91 static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
92 switch (MI.getOpcode()) {
93 case AMDGPU::S_AND_B64:
94 case AMDGPU::S_OR_B64:
95 case AMDGPU::S_XOR_B64:
96 case AMDGPU::S_ANDN2_B64:
97 case AMDGPU::S_ORN2_B64:
98 case AMDGPU::S_NAND_B64:
99 case AMDGPU::S_NOR_B64:
100 case AMDGPU::S_XNOR_B64: {
101 const MachineOperand &Src1 = MI.getOperand(1);
102 if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC)
103 return MI.getOperand(0).getReg();
104 const MachineOperand &Src2 = MI.getOperand(2);
105 if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
106 return MI.getOperand(0).getReg();
107 }
84108 }
85109
86110 return AMDGPU::NoRegister;
208232 // Scan backwards to find the def.
209233 auto CopyToExecInst = &*I;
210234 auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
211 if (CopyFromExecInst == E)
212 continue;
235 if (CopyFromExecInst == E) {
236 auto PrepareExecInst = std::next(I);
237 if (PrepareExecInst == E)
238 continue;
239 // Fold exec = COPY (S_AND_B64 reg, exec) -> exec = S_AND_B64 reg, exec
240 if (CopyToExecInst->getOperand(1).isKill() &&
241 isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
242 DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
243
244 PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
245
246 DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
247
248 CopyToExecInst->eraseFromParent();
249 }
250
251 continue;
252 }
213253
214254 if (isLiveOut(MBB, CopyToExec)) {
215255 // The copied register is live out and has a second use in another block.
146146 }
147147
148148 Changed = true;
149
150 // If the only use of saved exec in the removed instruction is S_AND_B64
151 // fold the copy now.
152 auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
153 if (!SaveExec || !SaveExec->isFullCopy())
154 continue;
155
156 unsigned SavedExec = SaveExec->getOperand(0).getReg();
157 bool SafeToReplace = true;
158 for (auto& U : MRI.use_nodbg_instructions(SavedExec)) {
159 if (U.getParent() != SaveExec->getParent()) {
160 SafeToReplace = false;
161 break;
162 }
163
164 DEBUG(dbgs() << "Redundant EXEC COPY: " << *SaveExec << '\n');
165 }
166
167 if (SafeToReplace) {
168 LIS->RemoveMachineInstrFromMaps(*SaveExec);
169 SaveExec->eraseFromParent();
170 MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
171 LIS->removeInterval(SavedExec);
172 }
149173 }
150174
151175 if (Changed) {
33 ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
44 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
55 ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
6 ; GCN: s_and_saveexec_b64
6 ; GCN: s_and_b64 exec, exec, vcc
77 ; GCN-NEXT: ; mask branch [[ENDIF]]
88 ; GCN-NEXT: {{^BB[0-9_]+}}:
99 ; GCN: store_dword
0 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking %s -o - | FileCheck -check-prefix=GCN %s
1
2 ---
3 # GCN-LABEL: name: reduce_and_saveexec
4 # GCN: %exec = S_AND_B64 %exec, killed %vcc
5 # GCN-NEXT: S_ENDPGM
6 name: reduce_and_saveexec
7 tracksRegLiveness: true
8 body: |
9 bb.0:
10 %vcc = IMPLICIT_DEF
11 %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
12 %exec = COPY killed %sgpr0_sgpr1
13 S_ENDPGM
14 ...
15 ---
16 # GCN-LABEL: name: reduce_and_saveexec_commuted
17 # GCN: %exec = S_AND_B64 killed %vcc, %exec
18 # GCN-NEXT: S_ENDPGM
19 name: reduce_and_saveexec_commuted
20 tracksRegLiveness: true
21 body: |
22 bb.0:
23 %vcc = IMPLICIT_DEF
24 %sgpr0_sgpr1 = S_AND_B64 killed %vcc, %exec, implicit-def %scc
25 %exec = COPY killed %sgpr0_sgpr1
26 S_ENDPGM
27 ...
28 ---
29 # GCN-LABEL: name: reduce_and_saveexec_liveout
30 # GCN: %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc
31 # GCN-NEXT: %exec = COPY
32 name: reduce_and_saveexec_liveout
33 tracksRegLiveness: true
34 body: |
35 bb.0:
36 %vcc = IMPLICIT_DEF
37 %sgpr0_sgpr1 = S_AND_B64 %exec, killed %vcc, implicit-def %scc
38 %exec = COPY %sgpr0_sgpr1
39 S_ENDPGM
40 ...
41 ---
42 # GCN-LABEL: name: and_saveexec
43 # GCN: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc
44 # GCN-NEXT: S_ENDPGM
45 name: and_saveexec
46 tracksRegLiveness: true
47 body: |
48 bb.0:
49 %vcc = IMPLICIT_DEF
50 %sgpr0_sgpr1 = COPY %exec
51 %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc
52 %exec = S_MOV_B64_term %sgpr2_sgpr3
53 S_ENDPGM
54 ...
55 ---
56 # GCN-LABEL: name: reduce_or_saveexec
57 # GCN: %exec = S_OR_B64 %exec, killed %vcc
58 # GCN-NEXT: S_ENDPGM
59 name: reduce_or_saveexec
60 tracksRegLiveness: true
61 body: |
62 bb.0:
63 %vcc = IMPLICIT_DEF
64 %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc
65 %exec = COPY killed %sgpr0_sgpr1
66 S_ENDPGM
67 ...
68 ---
69 # GCN-LABEL: name: reduce_xor_saveexec
70 # GCN: %exec = S_XOR_B64 %exec, killed %vcc
71 # GCN-NEXT: S_ENDPGM
72 name: reduce_xor_saveexec
73 tracksRegLiveness: true
74 body: |
75 bb.0:
76 %vcc = IMPLICIT_DEF
77 %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %vcc, implicit-def %scc
78 %exec = COPY killed %sgpr0_sgpr1
79 S_ENDPGM
80 ...
81 ---
82 # GCN-LABEL: name: reduce_andn2_saveexec
83 # GCN: %exec = S_ANDN2_B64 %exec, killed %vcc
84 # GCN-NEXT: S_ENDPGM
85 name: reduce_andn2_saveexec
86 tracksRegLiveness: true
87 body: |
88 bb.0:
89 %vcc = IMPLICIT_DEF
90 %sgpr0_sgpr1 = S_ANDN2_B64 %exec, killed %vcc, implicit-def %scc
91 %exec = COPY killed %sgpr0_sgpr1
92 S_ENDPGM
93 ...
94 ---
95 # GCN-LABEL: name: reduce_orn2_saveexec
96 # GCN: %exec = S_ORN2_B64 %exec, killed %vcc
97 # GCN-NEXT: S_ENDPGM
98 name: reduce_orn2_saveexec
99 tracksRegLiveness: true
100 body: |
101 bb.0:
102 %vcc = IMPLICIT_DEF
103 %sgpr0_sgpr1 = S_ORN2_B64 %exec, killed %vcc, implicit-def %scc
104 %exec = COPY killed %sgpr0_sgpr1
105 S_ENDPGM
106 ...
107 ---
108 # GCN-LABEL: name: reduce_nand_saveexec
109 # GCN: %exec = S_NAND_B64 %exec, killed %vcc
110 # GCN-NEXT: S_ENDPGM
111 name: reduce_nand_saveexec
112 tracksRegLiveness: true
113 body: |
114 bb.0:
115 %vcc = IMPLICIT_DEF
116 %sgpr0_sgpr1 = S_NAND_B64 %exec, killed %vcc, implicit-def %scc
117 %exec = COPY killed %sgpr0_sgpr1
118 S_ENDPGM
119 ...
120 ---
121 # GCN-LABEL: name: reduce_nor_saveexec
122 # GCN: %exec = S_NOR_B64 %exec, killed %vcc
123 # GCN-NEXT: S_ENDPGM
124 name: reduce_nor_saveexec
125 tracksRegLiveness: true
126 body: |
127 bb.0:
128 %vcc = IMPLICIT_DEF
129 %sgpr0_sgpr1 = S_NOR_B64 %exec, killed %vcc, implicit-def %scc
130 %exec = COPY killed %sgpr0_sgpr1
131 S_ENDPGM
132 ...
133 ---
134 # GCN-LABEL: name: reduce_xnor_saveexec
135 # GCN: %exec = S_XNOR_B64 %exec, killed %vcc
136 # GCN-NEXT: S_ENDPGM
137 name: reduce_xnor_saveexec
138 tracksRegLiveness: true
139 body: |
140 bb.0:
141 %vcc = IMPLICIT_DEF
142 %sgpr0_sgpr1 = S_XNOR_B64 %exec, killed %vcc, implicit-def %scc
143 %exec = COPY killed %sgpr0_sgpr1
144 S_ENDPGM
145 ...
146 ---