llvm.org GIT mirror llvm / 153f2e3
[DAGCombine] Improve ReduceLoadWidth for SRL If the SRL node is only used by an AND, we may be able to set the ExtVT to the width of the mask, making the AND redundant. To support this, another check has been added in isLegalNarrowLoad which queries whether the load is valid. Differential Revision: https://reviews.llvm.org/D41350 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321259 91177308-0d34-0410-b5e6-96231b3b80d8 Sam Parker 2 years ago
3 changed file(s) with 142 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
37873787 if (LoadN->getNumValues() > 2)
37883788 return false;
37893789
3790 // Only allow byte offsets.
3791 if (ShAmt % 8)
3792 return false;
3793
3794 // Ensure that this isn't going to produce an unsupported unaligned access.
3795 if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
3796 ExtVT, LoadN->getAddressSpace(),
3797 ShAmt / 8))
3798 return false;
3799
37903800 // If the load that we're shrinking is an extload and we're not just
37913801 // discarding the extension we can't simply shrink the load. Bail.
37923802 // TODO: It would be possible to merge the extensions in some cases.
82738283 // then the result of the shift+trunc is zero/undef (handled elsewhere).
82748284 if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits())
82758285 return SDValue();
8286
8287 // If the SRL is only used by a masking AND, we may be able to adjust
8288 // the ExtVT to make the AND redundant.
8289 SDNode *Mask = *(N->use_begin());
8290 if (Mask->getOpcode() == ISD::AND &&
8291 isa(Mask->getOperand(1))) {
8292 const APInt &ShiftMask =
8293 cast(Mask->getOperand(1))->getAPIntValue();
8294 if (ShiftMask.isMask()) {
8295 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
8296 ShiftMask.countTrailingOnes());
8297 // Recompute the type.
8298 if (TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
8299 ExtVT = MaskedVT;
8300 }
8301 }
82768302 }
82778303 }
82788304
216216 ret i32 %conv
217217 }
218218
219 ; CHECK-LABEL: test_shift7_mask8
220 ; CHECK-BE: ldr r1, [r0]
221 ; CHECK-COMMON: ldr r1, [r0]
222 ; CHECK-COMMON: ubfx r1, r1, #7, #8
223 ; CHECK-COMMON: str r1, [r0]
224 define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) {
225 entry:
226 %0 = load i32, i32* %p, align 4
227 %shl = lshr i32 %0, 7
228 %and = and i32 %shl, 255
229 store i32 %and, i32* %p, align 4
230 ret void
231 }
232
219233 ; CHECK-LABEL: test_shift8_mask8
220 ; CHECK-BE: ldr r1, [r0]
221 ; CHECK-COMMON: ldr r1, [r0]
222 ; CHECK-COMMON: ubfx r1, r1, #8, #8
234 ; CHECK-BE: ldrb r1, [r0, #2]
235 ; CHECK-COMMON: ldrb r1, [r0, #1]
223236 ; CHECK-COMMON: str r1, [r0]
224237 define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) {
225238 entry:
230243 ret void
231244 }
232245
246 ; CHECK-LABEL: test_shift8_mask7
247 ; CHECK-BE: ldr r1, [r0]
248 ; CHECK-COMMON: ldr r1, [r0]
249 ; CHECK-COMMON: ubfx r1, r1, #8, #7
250 ; CHECK-COMMON: str r1, [r0]
251 define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) {
252 entry:
253 %0 = load i32, i32* %p, align 4
254 %shl = lshr i32 %0, 8
255 %and = and i32 %shl, 127
256 store i32 %and, i32* %p, align 4
257 ret void
258 }
259
260 ; CHECK-LABEL: test_shift9_mask8
261 ; CHECK-BE: ldr r1, [r0]
262 ; CHECK-COMMON: ldr r1, [r0]
263 ; CHECK-COMMON: ubfx r1, r1, #9, #8
264 ; CHECK-COMMON: str r1, [r0]
265 define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) {
266 entry:
267 %0 = load i32, i32* %p, align 4
268 %shl = lshr i32 %0, 9
269 %and = and i32 %shl, 255
270 store i32 %and, i32* %p, align 4
271 ret void
272 }
273
233274 ; CHECK-LABEL: test_shift8_mask16
234 ; CHECK-BE: ldr r1, [r0]
235 ; CHECK-COMMON: ldr r1, [r0]
236 ; CHECK-COMMON: ubfx r1, r1, #8, #16
275 ; CHECK-ALIGN: ldr r1, [r0]
276 ; CHECK-ALIGN: ubfx r1, r1, #8, #16
277 ; CHECK-BE: ldrh r1, [r0, #1]
278 ; CHECK-ARM: ldrh r1, [r0, #1]
279 ; CHECK-THUMB: ldrh.w r1, [r0, #1]
237280 ; CHECK-COMMON: str r1, [r0]
238281 define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) {
239282 entry:
240283 %0 = load i32, i32* %p, align 4
241284 %shl = lshr i32 %0, 8
285 %and = and i32 %shl, 65535
286 store i32 %and, i32* %p, align 4
287 ret void
288 }
289
290 ; CHECK-LABEL: test_shift15_mask16
291 ; CHECK-COMMON: ldr r1, [r0]
292 ; CHECK-COMMON: ubfx r1, r1, #15, #16
293 ; CHECK-COMMON: str r1, [r0]
294 define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) {
295 entry:
296 %0 = load i32, i32* %p, align 4
297 %shl = lshr i32 %0, 15
298 %and = and i32 %shl, 65535
299 store i32 %and, i32* %p, align 4
300 ret void
301 }
302
303 ; CHECK-LABEL: test_shift16_mask15
304 ; CHECK-BE: ldrh r1, [r0]
305 ; CHECK-COMMON: ldrh r1, [r0, #2]
306 ; CHECK-COMMON: bfc r1, #15, #17
307 ; CHECK-COMMON: str r1, [r0]
308 define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) {
309 entry:
310 %0 = load i32, i32* %p, align 4
311 %shl = lshr i32 %0, 16
312 %and = and i32 %shl, 32767
313 store i32 %and, i32* %p, align 4
314 ret void
315 }
316
317 ; CHECK-LABEL: test_shift8_mask24
318 ; CHECK-BE: ldr r1, [r0]
319 ; CHECK-COMMON: ldr r1, [r0]
320 ; CHECK-ARM: lsr r1, r1, #8
321 ; CHECK-THUMB: lsrs r1, r1, #8
322 ; CHECK-COMMON: str r1, [r0]
323 define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) {
324 entry:
325 %0 = load i32, i32* %p, align 4
326 %shl = lshr i32 %0, 8
327 %and = and i32 %shl, 16777215
328 store i32 %and, i32* %p, align 4
329 ret void
330 }
331
332 ; CHECK-LABEL: test_shift24_mask16
333 ; CHECK-BE: ldrb r1, [r0]
334 ; CHECK-COMMON: ldrb r1, [r0, #3]
335 ; CHECK-COMMON: str r1, [r0]
336 define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) {
337 entry:
338 %0 = load i32, i32* %p, align 4
339 %shl = lshr i32 %0, 24
242340 %and = and i32 %shl, 65535
243341 store i32 %and, i32* %p, align 4
244342 ret void
2121 ; CHECK-NEXT: movzbl %ah, %eax # NOREX
2222 ; CHECK-NEXT: movq %rax, %r10
2323 ; CHECK-NEXT: movzbl %dh, %edx # NOREX
24 ; CHECK-NEXT: movzbl %ch, %eax # NOREX
25 ; CHECK-NEXT: movq %rax, %r11
24 ; CHECK-NEXT: movzbl %ch, %ebp # NOREX
2625 ; CHECK-NEXT: movq %r8, %rax
2726 ; CHECK-NEXT: movzbl %ah, %ecx # NOREX
2827 ; CHECK-NEXT: movq %r9, %rax
29 ; CHECK-NEXT: movzbl %ah, %ebp # NOREX
30 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
31 ; CHECK-NEXT: movzbl %ah, %eax # NOREX
32 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
33 ; CHECK-NEXT: movzbl %bh, %edi # NOREX
28 ; CHECK-NEXT: movzbl %ah, %ebx # NOREX
29 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
30 ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
3431 ; CHECK-NEXT: movq %r10, %r8
3532 ; CHECK-NEXT: addq %r8, %rsi
36 ; CHECK-NEXT: addq %r11, %rdx
33 ; CHECK-NEXT: addq %rbp, %rdx
3734 ; CHECK-NEXT: addq %rsi, %rdx
38 ; CHECK-NEXT: addq %rbp, %rcx
35 ; CHECK-NEXT: addq %rbx, %rcx
3936 ; CHECK-NEXT: addq %rdi, %rax
4037 ; CHECK-NEXT: addq %rcx, %rax
4138 ; CHECK-NEXT: addq %rdx, %rax
5754 ; GNUX32-NEXT: movzbl %ah, %eax # NOREX
5855 ; GNUX32-NEXT: movq %rax, %r10
5956 ; GNUX32-NEXT: movzbl %dh, %edx # NOREX
60 ; GNUX32-NEXT: movzbl %ch, %eax # NOREX
61 ; GNUX32-NEXT: movq %rax, %r11
57 ; GNUX32-NEXT: movzbl %ch, %ebp # NOREX
6258 ; GNUX32-NEXT: movq %r8, %rax
6359 ; GNUX32-NEXT: movzbl %ah, %ecx # NOREX
6460 ; GNUX32-NEXT: movq %r9, %rax
65 ; GNUX32-NEXT: movzbl %ah, %ebp # NOREX
66 ; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %eax
67 ; GNUX32-NEXT: movzbl %ah, %eax # NOREX
68 ; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx
69 ; GNUX32-NEXT: movzbl %bh, %edi # NOREX
61 ; GNUX32-NEXT: movzbl %ah, %ebx # NOREX
62 ; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
63 ; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %edi
7064 ; GNUX32-NEXT: movq %r10, %r8
7165 ; GNUX32-NEXT: addq %r8, %rsi
72 ; GNUX32-NEXT: addq %r11, %rdx
66 ; GNUX32-NEXT: addq %rbp, %rdx
7367 ; GNUX32-NEXT: addq %rsi, %rdx
74 ; GNUX32-NEXT: addq %rbp, %rcx
68 ; GNUX32-NEXT: addq %rbx, %rcx
7569 ; GNUX32-NEXT: addq %rdi, %rax
7670 ; GNUX32-NEXT: addq %rcx, %rax
7771 ; GNUX32-NEXT: addq %rdx, %rax