llvm.org GIT mirror llvm / 9de32f7
Copy utilities updated and added for MI flags Summary: This patch adds a GlobalIsel copy utility into MI for flags and updates the instruction emitter for the SDAG path. Some tests show new behavior and I added one for GlobalIsel which mirrors an SDAG test for handling nsw/nuw. Reviewers: spatel, wristow, arsenm Reviewed By: arsenm Subscribers: wdng Differential Revision: https://reviews.llvm.org/D52006 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342576 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Berg 1 year, 9 months ago
7 changed file(s) with 284 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
15251525 /// not modify the MIFlags of this MachineInstr.
15261526 uint16_t mergeFlagsWith(const MachineInstr& Other) const;
15271527
1528 /// Copy all flags to MachineInst MIFlags
1529 void copyIRFlags(const Instruction &I);
1530
15281531 /// Break any tie involving OpIdx.
15291532 void untieRegOperand(unsigned OpIdx) {
15301533 MachineOperand &MO = getOperand(OpIdx);
278278 unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
279279 unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
280280 unsigned Res = getOrCreateVReg(U);
281 MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
281 auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
282 if (isa(U)) {
283 MachineInstr *FBinOpMI = FBinOp.getInstr();
284 const Instruction &I = cast(U);
285 FBinOpMI->copyIRFlags(I);
286 }
282287 return true;
283288 }
284289
5151 #include "llvm/IR/ModuleSlotTracker.h"
5252 #include "llvm/IR/Type.h"
5353 #include "llvm/IR/Value.h"
54 #include "llvm/IR/Operator.h"
5455 #include "llvm/MC/MCInstrDesc.h"
5556 #include "llvm/MC/MCRegisterInfo.h"
5657 #include "llvm/MC/MCSymbol.h"
514515 // For now, the just return the union of the flags. If the flags get more
515516 // complicated over time, we might need more logic here.
516517 return getFlags() | Other.getFlags();
518 }
519
520 void MachineInstr::copyIRFlags(const Instruction &I) {
521 // Copy the wrapping flags.
522 if (const OverflowingBinaryOperator *OB =
523 dyn_cast(&I)) {
524 if (OB->hasNoSignedWrap())
525 setFlag(MachineInstr::MIFlag::NoSWrap);
526 if (OB->hasNoUnsignedWrap())
527 setFlag(MachineInstr::MIFlag::NoUWrap);
528 }
529
530 // Copy the exact flag.
531 if (const PossiblyExactOperator *PE = dyn_cast(&I))
532 if (PE->isExact())
533 setFlag(MachineInstr::MIFlag::IsExact);
534
535 // Copy the fast-math flags.
536 if (const FPMathOperator *FP = dyn_cast(&I)) {
537 const FastMathFlags Flags = FP->getFastMathFlags();
538 if (Flags.noNaNs())
539 setFlag(MachineInstr::MIFlag::FmNoNans);
540 if (Flags.noInfs())
541 setFlag(MachineInstr::MIFlag::FmNoInfs);
542 if (Flags.noSignedZeros())
543 setFlag(MachineInstr::MIFlag::FmNsz);
544 if (Flags.allowReciprocal())
545 setFlag(MachineInstr::MIFlag::FmArcp);
546 if (Flags.allowContract())
547 setFlag(MachineInstr::MIFlag::FmContract);
548 if (Flags.approxFunc())
549 setFlag(MachineInstr::MIFlag::FmAfn);
550 if (Flags.allowReassoc())
551 setFlag(MachineInstr::MIFlag::FmReassoc);
552 }
517553 }
518554
519555 bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
867867
868868 if (Flags.hasAllowReassociation())
869869 MI->setFlag(MachineInstr::MIFlag::FmReassoc);
870
871 if (Flags.hasNoUnsignedWrap())
872 MI->setFlag(MachineInstr::MIFlag::NoUWrap);
873
874 if (Flags.hasNoSignedWrap())
875 MI->setFlag(MachineInstr::MIFlag::NoSWrap);
876
877 if (Flags.hasExact())
878 MI->setFlag(MachineInstr::MIFlag::IsExact);
870879 }
871880
872881 // Emit all of the actual operands of this instruction, adding them to the
88
99 ; CHECK: New block
1010 ; CHECK: %[[REG:([0-9]+)]]:intregs = PHI %{{.*}}, %[[REG1:([0-9]+)]]
11 ; CHECK: %[[REG1]]:intregs = A2_addi
11 ; CHECK: %[[REG1]]:intregs = nuw A2_addi
1212 ; CHECK: epilog:
1313 ; CHECK: %{{[0-9]+}}:intregs = PHI %{{.*}}, %[[REG]]
1414
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -global-isel | FileCheck %s
2
3 ; The fundamental problem: an add separated from other arithmetic by a sign or
4 ; zero extension can't be combined with the later instructions. However, if the
5 ; first add is 'nsw' or 'nuw' respectively, then we can promote the extension
6 ; ahead of that add to allow optimizations.
7
8 define i64 @add_nsw_consts(i32 %i) {
9 ; CHECK-LABEL: add_nsw_consts:
10 ; CHECK: # %bb.0:
11 ; CHECK-NEXT: addl $5, %edi
12 ; CHECK-NEXT: movslq %edi, %rax
13 ; CHECK-NEXT: addq $7, %rax
14 ; CHECK-NEXT: retq
15
16 %add = add nsw i32 %i, 5
17 %ext = sext i32 %add to i64
18 %idx = add i64 %ext, 7
19 ret i64 %idx
20 }
21
22 ; An x86 bonus: If we promote the sext ahead of the 'add nsw',
23 ; we allow LEA formation and eliminate an add instruction.
24
25 define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
26 ; CHECK-LABEL: add_nsw_sext_add:
27 ; CHECK: # %bb.0:
28 ; CHECK-NEXT: addl $5, %edi
29 ; CHECK-NEXT: movslq %edi, %rax
30 ; CHECK-NEXT: addq %rsi, %rax
31 ; CHECK-NEXT: retq
32
33 %add = add nsw i32 %i, 5
34 %ext = sext i32 %add to i64
35 %idx = add i64 %x, %ext
36 ret i64 %idx
37 }
38
39 ; Throw in a scale (left shift) because an LEA can do that too.
40 ; Use a negative constant (LEA displacement) to verify that's handled correctly.
41
42 define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
43 ; CHECK-LABEL: add_nsw_sext_lsh_add:
44 ; CHECK: # %bb.0:
45 ; CHECK-NEXT: addl $-5, %edi
46 ; CHECK-NEXT: movslq %edi, %rax
47 ; CHECK-NEXT: movq $3, %rcx
48 ; CHECK: retq
49
50 %add = add nsw i32 %i, -5
51 %ext = sext i32 %add to i64
52 %shl = shl i64 %ext, 3
53 %idx = add i64 %x, %shl
54 ret i64 %idx
55 }
56
57 ; Don't promote the sext if it has no users. The wider add instruction needs an
58 ; extra byte to encode.
59
60 define i64 @add_nsw_sext(i32 %i, i64 %x) {
61 ; CHECK-LABEL: add_nsw_sext:
62 ; CHECK: # %bb.0:
63 ; CHECK-NEXT: addl $5, %edi
64 ; CHECK-NEXT: movslq %edi, %rax
65 ; CHECK-NEXT: retq
66
67 %add = add nsw i32 %i, 5
68 %ext = sext i32 %add to i64
69 ret i64 %ext
70 }
71
72 ; The typical use case: a 64-bit system where an 'int' is used as an index into an array.
73
74 define i8* @gep8(i32 %i, i8* %x) {
75 ; CHECK-LABEL: gep8:
76 ; CHECK: # %bb.0:
77 ; CHECK-NEXT: addl $5, %edi
78 ; CHECK-NEXT: movslq %edi, %rax
79 ; CHECK-NEXT: leaq (%rsi,%rax), %rax
80 ; CHECK-NEXT: retq
81
82 %add = add nsw i32 %i, 5
83 %ext = sext i32 %add to i64
84 %idx = getelementptr i8, i8* %x, i64 %ext
85 ret i8* %idx
86 }
87
88 define i16* @gep16(i32 %i, i16* %x) {
89 ; CHECK-LABEL: gep16:
90 ; CHECK: # %bb.0:
91 ; CHECK-NEXT: movq $2, %rax
92 ; CHECK-NEXT: addl $-5, %edi
93 ; CHECK-NEXT: movslq %edi, %rcx
94 ; CHECK-NEXT: imulq %rax, %rcx
95 ; CHECK-NEXT: leaq (%rsi,%rcx), %rax
96 ; CHECK-NEXT: retq
97
98 %add = add nsw i32 %i, -5
99 %ext = sext i32 %add to i64
100 %idx = getelementptr i16, i16* %x, i64 %ext
101 ret i16* %idx
102 }
103
104 define i32* @gep32(i32 %i, i32* %x) {
105 ; CHECK-LABEL: gep32:
106 ; CHECK: # %bb.0:
107 ; CHECK-NEXT: movq $4, %rax
108 ; CHECK-NEXT: addl $5, %edi
109 ; CHECK-NEXT: movslq %edi, %rcx
110 ; CHECK-NEXT: imulq %rax, %rcx
111 ; CHECK-NEXT: leaq (%rsi,%rcx), %rax
112 ; CHECK-NEXT: retq
113
114 %add = add nsw i32 %i, 5
115 %ext = sext i32 %add to i64
116 %idx = getelementptr i32, i32* %x, i64 %ext
117 ret i32* %idx
118 }
119
120 define i64* @gep64(i32 %i, i64* %x) {
121 ; CHECK-LABEL: gep64:
122 ; CHECK: # %bb.0:
123 ; CHECK-NEXT: movq $8, %rax
124 ; CHECK-NEXT: addl $-5, %edi
125 ; CHECK-NEXT: movslq %edi, %rcx
126 ; CHECK-NEXT: imulq %rax, %rcx
127 ; CHECK-NEXT: leaq (%rsi,%rcx), %rax
128 ; CHECK-NEXT: retq
129
130 %add = add nsw i32 %i, -5
131 %ext = sext i32 %add to i64
132 %idx = getelementptr i64, i64* %x, i64 %ext
133 ret i64* %idx
134 }
135
136 ; LEA can't scale by 16, but the adds can still be combined into an LEA.
137
138 define i128* @gep128(i32 %i, i128* %x) {
139 ; CHECK-LABEL: gep128:
140 ; CHECK: # %bb.0:
141 ; CHECK-NEXT: movq $16, %rax
142 ; CHECK-NEXT: addl $5, %edi
143 ; CHECK-NEXT: movslq %edi, %rcx
144 ; CHECK-NEXT: imulq %rax, %rcx
145 ; CHECK-NEXT: leaq (%rsi,%rcx), %rax
146 ; CHECK-NEXT: retq
147
148 %add = add nsw i32 %i, 5
149 %ext = sext i32 %add to i64
150 %idx = getelementptr i128, i128* %x, i64 %ext
151 ret i128* %idx
152 }
153
154 ; A bigger win can be achieved when there is more than one use of the
155 ; sign extended value. In this case, we can eliminate sign extension
156 ; instructions plus use more efficient addressing modes for memory ops.
157
158 define void @PR20134(i32* %a, i32 %i) {
159 ; CHECK-LABEL: PR20134:
160 ; CHECK: # %bb.0:
161 ; CHECK: movq $4, %rax
162 ; CHECK-NEXT: leal 1(%rsi), %ecx
163 ; CHECK-NEXT: movslq %ecx, %rcx
164 ; CHECK-NEXT: imulq %rax, %rcx
165 ; CHECK-NEXT: leaq (%rdi,%rcx), %rcx
166 ; CHECK-NEXT: leal 2(%rsi), %edx
167 ; CHECK-NEXT: movslq %edx, %rdx
168 ; CHECK-NEXT: imulq %rax, %rdx
169 ; CHECK-NEXT: leaq (%rdi,%rdx), %rdx
170 ; CHECK-NEXT: movl (%rdx), %edx
171 ; CHECK-NEXT: addl (%rcx), %edx
172 ; CHECK-NEXT: movslq %esi, %rcx
173 ; CHECK-NEXT: imulq %rax, %rcx
174 ; CHECK-NEXT: leaq (%rdi,%rcx), %rax
175 ; CHECK-NEXT: movl %edx, (%rax)
176 ; CHECK-NEXT: retq
177
178 %add1 = add nsw i32 %i, 1
179 %idx1 = sext i32 %add1 to i64
180 %gep1 = getelementptr i32, i32* %a, i64 %idx1
181 %load1 = load i32, i32* %gep1, align 4
182
183 %add2 = add nsw i32 %i, 2
184 %idx2 = sext i32 %add2 to i64
185 %gep2 = getelementptr i32, i32* %a, i64 %idx2
186 %load2 = load i32, i32* %gep2, align 4
187
188 %add3 = add i32 %load1, %load2
189 %idx3 = sext i32 %i to i64
190 %gep3 = getelementptr i32, i32* %a, i64 %idx3
191 store i32 %add3, i32* %gep3, align 4
192 ret void
193 }
194
195 ; The same as @PR20134 but sign extension is replaced with zero extension
196 define void @PR20134_zext(i32* %a, i32 %i) {
197 ; CHECK: # %bb.0:
198 ; CHECK: movq $4, %rax
199 ; CHECK-NEXT: leal 1(%rsi), %ecx
200 ; CHECK-NEXT: imulq %rax, %rcx
201 ; CHECK-NEXT: leaq (%rdi,%rcx), %rcx
202 ; CHECK-NEXT: leal 2(%rsi), %edx
203 ; CHECK-NEXT: imulq %rax, %rdx
204 ; CHECK-NEXT: leaq (%rdi,%rdx), %rdx
205 ; CHECK-NEXT: movl (%rdx), %edx
206 ; CHECK-NEXT: addl (%rcx), %edx
207 ; CHECK-NEXT: imulq %rax, %rsi
208 ; CHECK-NEXT: leaq (%rdi,%rsi), %rax
209 ; CHECK-NEXT: movl %edx, (%rax)
210 ; CHECK-NEXT: retq
211
212 %add1 = add nuw i32 %i, 1
213 %idx1 = zext i32 %add1 to i64
214 %gep1 = getelementptr i32, i32* %a, i64 %idx1
215 %load1 = load i32, i32* %gep1, align 4
216
217 %add2 = add nuw i32 %i, 2
218 %idx2 = zext i32 %add2 to i64
219 %gep2 = getelementptr i32, i32* %a, i64 %idx2
220 %load2 = load i32, i32* %gep2, align 4
221
222 %add3 = add i32 %load1, %load2
223 %idx3 = zext i32 %i to i64
224 %gep3 = getelementptr i32, i32* %a, i64 %idx3
225 store i32 %add3, i32* %gep3, align 4
226 ret void
227 }
2525 ; CHECK: SUB64rr [[VREG2]], [[VREG1]]
2626 ; CHECK-NEXT: JNE_1 {{.*}}, debug-location [[DLOC]]{{$}}
2727 ; CHECK: [[VREG3:%[^ ]+]]:gr64 = PHI [[VREG2]]
28 ; CHECK: [[VREG4:%[^ ]+]]:gr64 = ADD64ri8 [[VREG3]], 4
28 ; CHECK: [[VREG4:%[^ ]+]]:gr64 = nuw ADD64ri8 [[VREG3]], 4
2929 ; CHECK: SUB64rr [[VREG1]], [[VREG4]]
3030 ; CHECK-NEXT: JNE_1 {{.*}}, debug-location [[DLOC]]{{$}}
3131 ; CHECK-NEXT: JMP_1 {{.*}}, debug-location [[DLOC]]{{$}}