llvm.org GIT mirror llvm / f7e48cb
Merging r246937: ------------------------------------------------------------------------ r246937 | hfinkel | 2015-09-06 00:17:30 -0400 (Sun, 06 Sep 2015) | 13 lines [PowerPC] Don't commute trivial rlwimi instructions To commute a trivial rlwimi instructions (meaning one with a full mask and zero shift), we'd need to ability to form an all-zero mask (instead of an all-one mask) using rlwimi. We can't represent this, however, and we'll miscompile code if we try. The code quality problem that this highlights (that SDAG simplification can lead to us generating an ISD::OR node with a constant zero LHS) will be fixed as a follow-up. Fixes PR24719. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_37@252481 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 years ago
2 changed file(s) with 97 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
307307 // Masks.
308308 unsigned MB = MI->getOperand(4).getImm();
309309 unsigned ME = MI->getOperand(5).getImm();
310
311 // We can't commute a trivial mask (there is no way to represent an all-zero
312 // mask).
313 if (MB == 0 && ME == 31)
314 return nullptr;
310315
311316 if (NewMI) {
312317 // Create a new instruction.
0 # RUN: llc -start-after=dead-mi-elimination -stop-after=twoaddressinstruction -o /dev/null %s | FileCheck %s
1
2 --- |
3 target datalayout = "E-m:e-i64:64-n32:64"
4 target triple = "powerpc64-unknown-linux-gnu"
5
6 @d = global i32 15, align 4
7 @b = global i32* @d, align 8
8 @a = common global i32 0, align 4
9
10 ; Function Attrs: nounwind
11 define signext i32 @main() #0 {
12 entry:
13 %0 = load i32*, i32** @b, align 8
14 %1 = load i32, i32* @a, align 4
15 %lnot = icmp eq i32 %1, 0
16 %lnot.ext = zext i1 %lnot to i32
17 %shr.i = lshr i32 2072, %lnot.ext
18 %call.lobit = lshr i32 %shr.i, 7
19 %2 = and i32 %call.lobit, 1
20 %3 = load i32, i32* %0, align 4
21 %or = or i32 %2, %3
22 store i32 %or, i32* %0, align 4
23 %4 = load i32, i32* @a, align 4
24 %lnot.1 = icmp eq i32 %4, 0
25 %lnot.ext.1 = zext i1 %lnot.1 to i32
26 %shr.i.1 = lshr i32 2072, %lnot.ext.1
27 %call.lobit.1 = lshr i32 %shr.i.1, 7
28 %5 = and i32 %call.lobit.1, 1
29 %or.1 = or i32 %5, %or
30 store i32 %or.1, i32* %0, align 4
31 ret i32 %or.1
32 }
33
34 attributes #0 = { nounwind "target-cpu"="ppc64" }
35
36 ...
37 ---
38 name: main
39 alignment: 2
40 exposesReturnsTwice: false
41 hasInlineAsm: false
42 isSSA: true
43 tracksRegLiveness: true
44 tracksSubRegLiveness: false
45 registers:
46 - { id: 0, class: g8rc_and_g8rc_nox0 }
47 - { id: 1, class: g8rc_and_g8rc_nox0 }
48 - { id: 2, class: gprc }
49 - { id: 3, class: gprc }
50 - { id: 4, class: gprc }
51 - { id: 5, class: g8rc_and_g8rc_nox0 }
52 - { id: 6, class: g8rc_and_g8rc_nox0 }
53 - { id: 7, class: gprc }
54 - { id: 8, class: gprc }
55 - { id: 9, class: gprc }
56 - { id: 10, class: g8rc }
57 frameInfo:
58 isFrameAddressTaken: false
59 isReturnAddressTaken: false
60 hasStackMap: false
61 hasPatchPoint: false
62 stackSize: 0
63 offsetAdjustment: 0
64 maxAlignment: 0
65 adjustsStack: false
66 hasCalls: false
67 maxCallFrameSize: 0
68 hasOpaqueSPAdjustment: false
69 hasVAStart: false
70 hasMustTailInVarArgFunc: false
71 body: |
72 bb.0.entry:
73 liveins: %x2
74
75 %0 = ADDIStocHA %x2, @b
76 %1 = LD target-flags(ppc-toc-lo) @b, killed %0 :: (load 8 from @b)
77 %2 = LWZ 0, %1 :: (load 4 from %ir.0)
78 %3 = LI 0
79 %4 = RLWIMI %3, killed %2, 0, 0, 31
80 ; CHECK-LABEL: name: main
81 ; CHECK: %[[REG1:[0-9]+]] = LI 0
82 ; CHECK: %[[REG2:[0-9]+]] = COPY %[[REG1]]
83 ; CHECK: %[[REG2]] = RLWIMI %[[REG2]], killed %2, 0, 0, 31
84 %8 = RLWIMI %3, %4, 0, 0, 31
85 STW %4, 0, %1 :: (store 4 into %ir.0)
86 %10 = EXTSW_32_64 %8
87 STW %8, 0, %1 :: (store 4 into %ir.0)
88 %x3 = COPY %10
89 BLR8 implicit %x3, implicit %lr8, implicit %rm
90
91 ...