llvm.org GIT mirror llvm / af34a3a
[PowerPC] Add codegen for VSX word extract convert to FP Add codegen for VSX word extract conversion from signed/unsigned to single/double precision. For UINT_TO_FP: Extract word unsigned and convert to float was implemented in https://reviews.llvm.org/D20239. Here we will add the missing extract integer and conversion to double. This utilizes the new P9 instruction xxextractuw to extracting an integer element when the result will be converted to double thereby saving 2 direct moves (VSR <-> GPR). For SINT_TO_FP: We will implement the following sequence which will also reduce the number of instructions by saving 2 direct moves. v4i32->f32: xxspltw xvcvsxwsp xscvspdpn v4i32->f64: xxspltw xvcvsxwdp Differential Revision: https://reviews.llvm.org/D35859 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310866 91177308-0d34-0410-b5e6-96231b3b80d8 Lei Huang 2 years ago
3 changed file(s) with 245 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
25492549 UseVSXReg;
25502550 } // mayStore
25512551
2552 let Predicates = [IsLittleEndian] in {
2553 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
2554 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
2555 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
2556 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
2557 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
2558 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
2559 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
2560 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
2561 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
2562 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
2563 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
2564 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
2565 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
2566 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
2567 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
2568 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
2569 }
2570
2571 let Predicates = [IsBigEndian] in {
2572 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
2573 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
2574 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
2575 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
2576 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
2577 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
2578 def: Pat<(f32 (PPCfcfids (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
2579 (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
2580 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 0))))),
2581 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
2582 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 1))))),
2583 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
2584 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 2))))),
2585 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
2586 def: Pat<(f64 (PPCfcfid (PPCmtvsra (i32 (extractelt v4i32:$A, 3))))),
2587 (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
2588 }
2589
25522590 // Patterns for which instructions from ISA 3.0 are a better match
25532591 let Predicates = [IsLittleEndian, HasP9Vector] in {
25542592 def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
25592597 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
25602598 def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
25612599 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
2600 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
2601 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
2602 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
2603 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
2604 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
2605 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
2606 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
2607 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
25622608 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
25632609 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
25642610 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
25862632 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
25872633 def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
25882634 (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
2635 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))),
2636 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
2637 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))),
2638 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
2639 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))),
2640 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
2641 def : Pat<(f64 (PPCfcfidu (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))),
2642 (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
25892643 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
25902644 (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
25912645 def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
438438 ret float %conv
439439 }
440440
441 ; Verify we generate optimal code for unsigned vector int elem extract followed
442 ; by conversion to double
443
444 define double @conv2dlbTestui0(<4 x i32> %a) {
445 entry:
446 ; CHECK-LABEL: conv2dlbTestui0
447 ; CHECK: xxextractuw [[SW:[0-9]+]], 34, 12
448 ; CHECK: xscvuxddp 1, [[SW]]
449 ; CHECK-BE-LABEL: conv2dlbTestui0
450 ; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 0
451 ; CHECK-BE: xscvuxddp 1, [[CP]]
452 %0 = extractelement <4 x i32> %a, i32 0
453 %1 = uitofp i32 %0 to double
454 ret double %1
455 }
456
457 define double @conv2dlbTestui1(<4 x i32> %a) {
458 entry:
459 ; CHECK-LABEL: conv2dlbTestui1
460 ; CHECK: xxextractuw [[SW:[0-9]+]], 34, 8
461 ; CHECK: xscvuxddp 1, [[SW]]
462 ; CHECK-BE-LABEL: conv2dlbTestui1
463 ; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 4
464 ; CHECK-BE: xscvuxddp 1, [[CP]]
465 %0 = extractelement <4 x i32> %a, i32 1
466 %1 = uitofp i32 %0 to double
467 ret double %1
468 }
469
470 define double @conv2dlbTestui2(<4 x i32> %a) {
471 entry:
472 ; CHECK-LABEL: conv2dlbTestui2
473 ; CHECK: xxextractuw [[SW:[0-9]+]], 34, 4
474 ; CHECK: xscvuxddp 1, [[SW]]
475 ; CHECK-BE-LABEL: conv2dlbTestui2
476 ; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 8
477 ; CHECK-BE: xscvuxddp 1, [[CP]]
478 %0 = extractelement <4 x i32> %a, i32 2
479 %1 = uitofp i32 %0 to double
480 ret double %1
481 }
482
483 define double @conv2dlbTestui3(<4 x i32> %a) {
484 entry:
485 ; CHECK-LABEL: conv2dlbTestui3
486 ; CHECK: xxextractuw [[SW:[0-9]+]], 34, 0
487 ; CHECK: xscvuxddp 1, [[SW]]
488 ; CHECK-BE-LABEL: conv2dlbTestui3
489 ; CHECK-BE: xxextractuw [[CP:[0-9]+]], 34, 12
490 ; CHECK-BE: xscvuxddp 1, [[CP]]
491 %0 = extractelement <4 x i32> %a, i32 3
492 %1 = uitofp i32 %0 to double
493 ret double %1
494 }
495
496 ; verify we don't crash for variable elem extract
497 define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) {
498 entry:
499 %vecext = extractelement <4 x i32> %a, i32 %elem
500 %conv = uitofp i32 %vecext to double
501 ret double %conv
502 }
503
441504 define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) {
442505 entry:
443506 ; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_
104104 %1 = uitofp i64 %0 to float
105105 ret float %1
106106 }
107
108 define float @conv2fltTesti0(<4 x i32> %a) {
109 entry:
110 ; CHECK-LABEL: conv2fltTesti0
111 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
112 ; CHECK: xvcvsxwsp [[SW]], [[SW]]
113 ; CHECK: xscvspdpn 1, [[SW]]
114 ; CHECK-BE-LABEL: conv2fltTesti0
115 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
116 ; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
117 ; CHECK-BE: xscvspdpn 1, [[CP]]
118 %vecext = extractelement <4 x i32> %a, i32 0
119 %conv = sitofp i32 %vecext to float
120 ret float %conv
121 }
122
123 define float @conv2fltTesti1(<4 x i32> %a) {
124 entry:
125 ; CHECK-LABEL: conv2fltTesti1
126 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
127 ; CHECK: xvcvsxwsp [[SW]], [[SW]]
128 ; CHECK: xscvspdpn 1, [[SW]]
129 ; CHECK-BE-LABEL: conv2fltTesti1
130 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
131 ; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
132 ; CHECK-BE: xscvspdpn 1, [[CP]]
133 %vecext = extractelement <4 x i32> %a, i32 1
134 %conv = sitofp i32 %vecext to float
135 ret float %conv
136 }
137
138 define float @conv2fltTesti2(<4 x i32> %a) {
139 entry:
140 ; CHECK-LABEL: conv2fltTesti2
141 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
142 ; CHECK: xvcvsxwsp [[SW]], [[SW]]
143 ; CHECK: xscvspdpn 1, [[SW]]
144 ; CHECK-BE-LABEL: conv2fltTesti2
145 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
146 ; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
147 ; CHECK-BE: xscvspdpn 1, [[CP]]
148 %vecext = extractelement <4 x i32> %a, i32 2
149 %conv = sitofp i32 %vecext to float
150 ret float %conv
151 }
152
153 define float @conv2fltTesti3(<4 x i32> %a) {
154 entry:
155 ; CHECK-LABEL: conv2fltTesti3
156 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
157 ; CHECK: xvcvsxwsp [[SW]], [[SW]]
158 ; CHECK: xscvspdpn 1, [[SW]]
159 ; CHECK-BE-LABEL: conv2fltTesti3
160 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
161 ; CHECK-BE: xvcvsxwsp [[CP]], [[CP]]
162 ; CHECK-BE: xscvspdpn 1, [[CP]]
163 %vecext = extractelement <4 x i32> %a, i32 3
164 %conv = sitofp i32 %vecext to float
165 ret float %conv
166 }
167
168 ; verify we don't crash for variable elem extract
169 define float @conv2fltTestiVar(<4 x i32> %a, i32 zeroext %elem) {
170 entry:
171 %vecext = extractelement <4 x i32> %a, i32 %elem
172 %conv = sitofp i32 %vecext to float
173 ret float %conv
174 }
175
176 define double @conv2dblTesti0(<4 x i32> %a) {
177 entry:
178 ; CHECK-LABEL: conv2dblTesti0
179 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 3
180 ; CHECK: xvcvsxwdp 1, [[SW]]
181 ; CHECK-BE-LABEL: conv2dblTesti0
182 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 0
183 ; CHECK-BE: xvcvsxwdp 1, [[CP]]
184 %vecext = extractelement <4 x i32> %a, i32 0
185 %conv = sitofp i32 %vecext to double
186 ret double %conv
187 }
188
189 define double @conv2dblTesti1(<4 x i32> %a) {
190 entry:
191 ; CHECK-LABEL: conv2dblTesti1
192 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 2
193 ; CHECK: xvcvsxwdp 1, [[SW]]
194 ; CHECK-BE-LABEL: conv2dblTesti1
195 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 1
196 ; CHECK-BE: xvcvsxwdp 1, [[CP]]
197 %vecext = extractelement <4 x i32> %a, i32 1
198 %conv = sitofp i32 %vecext to double
199 ret double %conv
200 }
201
202 define double @conv2dblTesti2(<4 x i32> %a) {
203 entry:
204 ; CHECK-LABEL: conv2dblTesti2
205 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 1
206 ; CHECK: xvcvsxwdp 1, [[SW]]
207 ; CHECK-BE-LABEL: conv2dblTesti2
208 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 2
209 ; CHECK-BE: xvcvsxwdp 1, [[CP]]
210 %vecext = extractelement <4 x i32> %a, i32 2
211 %conv = sitofp i32 %vecext to double
212 ret double %conv
213 }
214
215 define double @conv2dblTesti3(<4 x i32> %a) {
216 entry:
217 ; CHECK-LABEL: conv2dblTesti3
218 ; CHECK: xxspltw [[SW:[0-9]+]], 34, 0
219 ; CHECK: xvcvsxwdp 1, [[SW]]
220 ; CHECK-BE-LABEL: conv2dblTesti3
221 ; CHECK-BE: xxspltw [[CP:[0-9]+]], 34, 3
222 ; CHECK-BE: xvcvsxwdp 1, [[CP]]
223 %vecext = extractelement <4 x i32> %a, i32 3
224 %conv = sitofp i32 %vecext to double
225 ret double %conv
226 }
227
228 ; verify we don't crash for variable elem extract
229 define double @conv2dblTestiVar(<4 x i32> %a, i32 zeroext %elem) {
230 entry:
231 %vecext = extractelement <4 x i32> %a, i32 %elem
232 %conv = sitofp i32 %vecext to double
233 ret double %conv
234 }