llvm.org GIT mirror llvm / 8d41610
[PowerPC] Fix erroneous condition for converting uint-to-fp vector conversion A condition for exiting the legalization of v4i32 conversion to v2f64 through extract/convert/build erroneously checks for the extract having type i32. This is not adequate as smaller extracts are actually legalized to i32 as well. Furthermore, an early exit is missing which means that we only check that both extracts are from the same vector if that check fails. As a result, both cases in the included test case fail - the first gets a select error and the second generates incorrect code. The culprit commit is r274535. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360043 91177308-0d34-0410-b5e6-96231b3b80d8 Nemanja Ivanovic 1 year, 5 months ago
2 changed file(s) with 136 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
1247512475 ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1));
1247612476 if (!Ext1Op || !Ext2Op)
1247712477 return SDValue();
12478 if (Ext1.getValueType() != MVT::i32 ||
12479 Ext2.getValueType() != MVT::i32)
12480 if (Ext1.getOperand(0) != Ext2.getOperand(0))
12478 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
12479 Ext1.getOperand(0) != Ext2.getOperand(0))
1248112480 return SDValue();
1248212481
1248312482 int FirstElem = Ext1Op->getZExtValue();
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
2 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
3 ; RUN: -check-prefix=P9BE
4 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
6 ; RUN: -check-prefix=P9LE
7 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
8 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
9 ; RUN: -check-prefix=P8BE
10 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
11 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
12 ; RUN: -check-prefix=P8LE
13 define dso_local <2 x double> @test1(<8 x i16> %a) {
14 ; P9BE-LABEL: test1:
15 ; P9BE: # %bb.0: # %entry
16 ; P9BE-NEXT: li r3, 0
17 ; P9BE-NEXT: vextuhlx r3, r3, v2
18 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
19 ; P9BE-NEXT: mtvsrwz f0, r3
20 ; P9BE-NEXT: li r3, 2
21 ; P9BE-NEXT: vextuhlx r3, r3, v2
22 ; P9BE-NEXT: rlwinm r3, r3, 0, 16, 31
23 ; P9BE-NEXT: mtvsrwz f1, r3
24 ; P9BE-NEXT: xscvuxddp f0, f0
25 ; P9BE-NEXT: xscvuxddp f1, f1
26 ; P9BE-NEXT: xxmrghd v2, vs0, vs1
27 ; P9BE-NEXT: blr
28 ;
29 ; P9LE-LABEL: test1:
30 ; P9LE: # %bb.0: # %entry
31 ; P9LE-NEXT: li r3, 0
32 ; P9LE-NEXT: vextuhrx r3, r3, v2
33 ; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31
34 ; P9LE-NEXT: mtvsrwz f0, r3
35 ; P9LE-NEXT: li r3, 2
36 ; P9LE-NEXT: vextuhrx r3, r3, v2
37 ; P9LE-NEXT: rlwinm r3, r3, 0, 16, 31
38 ; P9LE-NEXT: mtvsrwz f1, r3
39 ; P9LE-NEXT: xscvuxddp f0, f0
40 ; P9LE-NEXT: xscvuxddp f1, f1
41 ; P9LE-NEXT: xxmrghd v2, vs1, vs0
42 ; P9LE-NEXT: blr
43 ;
44 ; P8BE-LABEL: test1:
45 ; P8BE: # %bb.0: # %entry
46 ; P8BE-NEXT: mfvsrd r3, v2
47 ; P8BE-NEXT: rldicl r4, r3, 16, 48
48 ; P8BE-NEXT: rldicl r3, r3, 32, 48
49 ; P8BE-NEXT: rlwinm r4, r4, 0, 16, 31
50 ; P8BE-NEXT: rlwinm r3, r3, 0, 16, 31
51 ; P8BE-NEXT: mtvsrwz f0, r4
52 ; P8BE-NEXT: mtvsrwz f1, r3
53 ; P8BE-NEXT: xscvuxddp f0, f0
54 ; P8BE-NEXT: xscvuxddp f1, f1
55 ; P8BE-NEXT: xxmrghd v2, vs0, vs1
56 ; P8BE-NEXT: blr
57 ;
58 ; P8LE-LABEL: test1:
59 ; P8LE: # %bb.0: # %entry
60 ; P8LE-NEXT: xxswapd vs0, v2
61 ; P8LE-NEXT: mfvsrd r3, f0
62 ; P8LE-NEXT: clrldi r4, r3, 48
63 ; P8LE-NEXT: rldicl r3, r3, 48, 48
64 ; P8LE-NEXT: rlwinm r4, r4, 0, 16, 31
65 ; P8LE-NEXT: rlwinm r3, r3, 0, 16, 31
66 ; P8LE-NEXT: mtvsrwz f0, r4
67 ; P8LE-NEXT: mtvsrwz f1, r3
68 ; P8LE-NEXT: xscvuxddp f0, f0
69 ; P8LE-NEXT: xscvuxddp f1, f1
70 ; P8LE-NEXT: xxmrghd v2, vs1, vs0
71 ; P8LE-NEXT: blr
72 entry:
73 %vecext = extractelement <8 x i16> %a, i32 0
74 %conv = uitofp i16 %vecext to double
75 %vecinit = insertelement <2 x double> undef, double %conv, i32 0
76 %vecext1 = extractelement <8 x i16> %a, i32 1
77 %conv2 = uitofp i16 %vecext1 to double
78 %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
79 ret <2 x double> %vecinit3
80 }
81
82 define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
83 ; P9BE-LABEL: test2:
84 ; P9BE: # %bb.0: # %entry
85 ; P9BE-NEXT: xxextractuw f0, v2, 0
86 ; P9BE-NEXT: xxextractuw f1, v3, 4
87 ; P9BE-NEXT: xscvuxddp f0, f0
88 ; P9BE-NEXT: xscvuxddp f1, f1
89 ; P9BE-NEXT: xxmrghd v2, vs0, vs1
90 ; P9BE-NEXT: blr
91 ;
92 ; P9LE-LABEL: test2:
93 ; P9LE: # %bb.0: # %entry
94 ; P9LE-NEXT: xxextractuw f0, v2, 12
95 ; P9LE-NEXT: xxextractuw f1, v3, 8
96 ; P9LE-NEXT: xscvuxddp f0, f0
97 ; P9LE-NEXT: xscvuxddp f1, f1
98 ; P9LE-NEXT: xxmrghd v2, vs1, vs0
99 ; P9LE-NEXT: blr
100 ;
101 ; P8BE-LABEL: test2:
102 ; P8BE: # %bb.0: # %entry
103 ; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
104 ; P8BE-NEXT: mfvsrwz r4, v3
105 ; P8BE-NEXT: mtvsrwz f1, r4
106 ; P8BE-NEXT: mfvsrwz r3, f0
107 ; P8BE-NEXT: xscvuxddp f1, f1
108 ; P8BE-NEXT: mtvsrwz f0, r3
109 ; P8BE-NEXT: xscvuxddp f0, f0
110 ; P8BE-NEXT: xxmrghd v2, vs0, vs1
111 ; P8BE-NEXT: blr
112 ;
113 ; P8LE-LABEL: test2:
114 ; P8LE: # %bb.0: # %entry
115 ; P8LE-NEXT: xxswapd vs0, v2
116 ; P8LE-NEXT: xxsldwi vs1, v3, v3, 1
117 ; P8LE-NEXT: mfvsrwz r3, f0
118 ; P8LE-NEXT: mfvsrwz r4, f1
119 ; P8LE-NEXT: mtvsrwz f0, r3
120 ; P8LE-NEXT: mtvsrwz f1, r4
121 ; P8LE-NEXT: xscvuxddp f0, f0
122 ; P8LE-NEXT: xscvuxddp f1, f1
123 ; P8LE-NEXT: xxmrghd v2, vs1, vs0
124 ; P8LE-NEXT: blr
125 entry:
126 %vecext = extractelement <4 x i32> %a, i32 0
127 %conv = uitofp i32 %vecext to double
128 %vecinit = insertelement <2 x double> undef, double %conv, i32 0
129 %vecext1 = extractelement <4 x i32> %b, i32 1
130 %conv2 = uitofp i32 %vecext1 to double
131 %vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
132 ret <2 x double> %vecinit3
133 }