llvm.org GIT mirror llvm / e8af9b4
Merging r339260: ------------------------------------------------------------------------ r339260 | syzaara | 2018-08-08 08:20:43 -0700 (Wed, 08 Aug 2018) | 13 lines [PowerPC] Improve codegen for vector loads using scalar_to_vector This patch aims to improve the codegen for vector loads involving the scalar_to_vector (load X) sequence. Initially, ld->mv instructions were used for scalar_to_vector (load X), so this patch allows scalar_to_vector (load X) to utilize: LXSD and LXSDX for i64 and f64 LXSIWAX for i32 (sign extension to i64) LXSIWZX for i32 and f64 Committing on behalf of Amy Kwan. Differential Revision: https://reviews.llvm.org/D48950 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_70@347957 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 1 year, 10 months ago
15 changed file(s) with 1549 addition(s) and 262 deletion(s). Raw diff Collapse all Expand all
591591 XXPERM,
592592 XXPERMR,
593593 XXSLDWI,
594 XXSLDWIs,
594595 XXSPLTIB,
595596 XXSPLTW,
596597 XXSPLTWs,
84538453 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
84548454 int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
84558455
8456 // If the source for the shuffle is a scalar_to_vector that came from a
8457 // 32-bit load, it will have used LXVWSX so we don't need to splat again.
8458 if (Subtarget.hasP9Vector() &&
8459 ((isLittleEndian && SplatIdx == 3) ||
8460 (!isLittleEndian && SplatIdx == 0))) {
8461 SDValue Src = V1.getOperand(0);
8462 if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
8463 Src.getOperand(0).getOpcode() == ISD::LOAD &&
8464 Src.getOperand(0).hasOneUse())
8465 return V1;
8466 }
84678456 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
84688457 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
84698458 DAG.getConstant(SplatIdx, dl, MVT::i32));
876876 "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
877877 [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
878878 imm32SExt16:$SHW))]>;
879
880 let isCodeGenOnly = 1 in
881 def XXSLDWIs : XX3Form_2s<60, 2,
882 (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW),
883 "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>;
884
879885 def XXSPLTW : XX2Form_2<60, 164,
880886 (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
881887 "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
885891 def XXSPLTWs : XX2Form_2<60, 164,
886892 (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
887893 "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
894
888895 } // hasSideEffects
889896 } // UseVSXReg = 1
890897
14651472 (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))),
14661473 (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
14671474 }
1468 def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
1469 (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>;
14701475
14711476 // Instructions for converting float to i64 feeding a store.
14721477 let Predicates = [NoP9Vector] in {
30493054 (STXVX $rS, xoaddr:$dst)>;
30503055 def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
30513056 (STXVX $rS, xoaddr:$dst)>;
3052 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
3053 (v4i32 (LXVWSX xoaddr:$src))>;
3054 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
3055 (v4f32 (LXVWSX xoaddr:$src))>;
3056 def : Pat<(v4f32 (scalar_to_vector
3057 (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))),
3058 (v4f32 (LXVWSX xoaddr:$src))>;
3057
3058 let AddedComplexity = 400 in {
3059 // LIWAX - This instruction is used for sign extending i32 -> i64.
3060 // LIWZX - This instruction will be emitted for i32, f32, and when
3061 // zero-extending i32 to i64 (zext i32 -> i64).
3062 let Predicates = [IsLittleEndian] in {
3063
3064 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
3065 (v2i64 (XXPERMDIs
3066 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>;
3067
3068 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
3069 (v2i64 (XXPERMDIs
3070 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
3071
3072 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
3073 (v4i32 (XXPERMDIs
3074 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
3075
3076 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
3077 (v4f32 (XXPERMDIs
3078 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>;
3079 }
3080
3081 let Predicates = [IsBigEndian] in {
3082 def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
3083 (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
3084
3085 def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
3086 (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
3087
3088 def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
3089 (v4i32 (XXSLDWIs
3090 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
3091
3092 def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
3093 (v4f32 (XXSLDWIs
3094 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
3095 }
3096
3097 }
30593098
30603099 // Build vectors from i8 loads
30613100 def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
32163255 (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
32173256 def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
32183257 (f32 (DFLOADf32 ixaddr:$src))>;
3258
3259
3260 let AddedComplexity = 400 in {
3261 // The following pseudoinstructions are used to ensure the utilization
3262 // of all 64 VSX registers.
3263 let Predicates = [IsLittleEndian, HasP9Vector] in {
3264 def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
3265 (v2i64 (XXPERMDIs
3266 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
3267 def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
3268 (v2i64 (XXPERMDIs
3269 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
3270
3271 def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
3272 (v2f64 (XXPERMDIs
3273 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
3274 def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
3275 (v2f64 (XXPERMDIs
3276 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
3277 }
3278
3279 let Predicates = [IsBigEndian, HasP9Vector] in {
3280 def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
3281 (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
3282 def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
3283 (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
3284
3285 def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
3286 (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
3287 def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
3288 (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
3289 }
3290 }
32193291
32203292 let Predicates = [IsBigEndian, HasP9Vector] in {
32213293
39314003 (v4i32 (VEXTSH2W $A))>;
39324004 }
39334005 }
4006
0 ; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
1 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8
1 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
2 ; RUN: | FileCheck %s --check-prefix=CHECK-P8
23 ; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9
4 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
5 ; RUN: | FileCheck %s --check-prefix=CHECK-P9
46
57 @a = external local_unnamed_addr global <4 x i32>, align 16
68 @pb = external local_unnamed_addr global float*, align 8
79
810 define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) {
9 ; CHECK-P8-LABEL: testExpandPostRAPseudo:
10 ; CHECK-P8: lxsiwax 34, 0, 3
11 ; CHECK-P8-NEXT: xxspltw 34, 34, 1
12 ; CHECK-P8-NEXT: stvx 2, 0, 4
13 ; CHECK-P8: #APP
14 ; CHECK-P8-NEXT: #Clobber Rigisters
15 ; CHECK-P8-NEXT: #NO_APP
16 ; CHECK-P8-NEXT: lis 4, 1024
17 ; CHECK-P8-NEXT: lfiwax 0, 0, 3
18 ; CHECK-P8: stfsx 0, 3, 4
19 ; CHECK-P8-NEXT: blr
20
21 ; CHECK-P9-LABEL: testExpandPostRAPseudo:
22 ; CHECK-P9: lxvwsx 0, 0, 3
23 ; CHECK-P9: stxvx 0, 0, 4
24 ; CHECK-P9: #APP
25 ; CHECK-P9-NEXT: #Clobber Rigisters
26 ; CHECK-P9-NEXT: #NO_APP
27 ; CHECK-P9-NEXT: lis 4, 1024
28 ; CHECK-P9-NEXT: lfiwax 0, 0, 3
29 ; CHECK-P9: stfsx 0, 3, 4
30 ; CHECK-P9-NEXT: blr
31
11 ; CHECK-P8-LABEL: testExpandPostRAPseudo:
12 ; CHECK-P8: # %bb.0: # %entry
13 ; CHECK-P8: lfiwzx f0, 0, r3
14 ; CHECK-P8: ld r4, .LC0@toc@l(r4)
15 ; CHECK-P8: xxpermdi vs0, f0, f0, 2
16 ; CHECK-P8: xxspltw v2, vs0, 3
17 ; CHECK-P8: stvx v2, 0, r4
18 ; CHECK-P8: lis r4, 1024
19 ; CHECK-P8: lfiwax f0, 0, r3
20 ; CHECK-P8: addis r3, r2, .LC1@toc@ha
21 ; CHECK-P8: ld r3, .LC1@toc@l(r3)
22 ; CHECK-P8: xscvsxdsp f0, f0
23 ; CHECK-P8: ld r3, 0(r3)
24 ; CHECK-P8: stfsx f0, r3, r4
25 ; CHECK-P8: blr
26 ;
27 ; CHECK-P9-LABEL: testExpandPostRAPseudo:
28 ; CHECK-P9: # %bb.0: # %entry
29 ; CHECK-P9: lfiwzx f0, 0, r3
30 ; CHECK-P9: addis r4, r2, .LC0@toc@ha
31 ; CHECK-P9: ld r4, .LC0@toc@l(r4)
32 ; CHECK-P9: xxpermdi vs0, f0, f0, 2
33 ; CHECK-P9: xxspltw vs0, vs0, 3
34 ; CHECK-P9: stxvx vs0, 0, r4
35 ; CHECK-P9: lis r4, 1024
36 ; CHECK-P9: lfiwax f0, 0, r3
37 ; CHECK-P9: addis r3, r2, .LC1@toc@ha
38 ; CHECK-P9: ld r3, .LC1@toc@l(r3)
39 ; CHECK-P9: xscvsxdsp f0, f0
40 ; CHECK-P9: ld r3, 0(r3)
41 ; CHECK-P9: stfsx f0, r3, r4
42 ; CHECK-P9: blr
3243 entry:
3344 %0 = load i32, i32* %ptr, align 4
3445 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
108108 ;vector int spltRegVali(int val) { //
109109 ; return (vector int) val; //
110110 ;} //
111 ;// P8: lxsiwax, xxspltw //
112 ;// P9: lxvwsx //
111 ;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
112 ;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
113113 ;vector int spltMemVali(int *ptr) { //
114114 ; return (vector int)*ptr; //
115115 ;} //
283283 ;vector unsigned int spltRegValui(unsigned int val) { //
284284 ; return (vector unsigned int) val; //
285285 ;} //
286 ;// P8: lxsiwax, xxspltw //
287 ;// P9: lxvwsx //
286 ;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
287 ;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
288288 ;vector unsigned int spltMemValui(unsigned int *ptr) { //
289289 ; return (vector unsigned int)*ptr; //
290290 ;} //
12011201 ; P9LE-LABEL: spltMemVali
12021202 ; P8BE-LABEL: spltMemVali
12031203 ; P8LE-LABEL: spltMemVali
1204 ; P9BE: lxvwsx v2, 0, r3
1205 ; P9BE: blr
1206 ; P9LE: lxvwsx v2, 0, r3
1207 ; P9LE: blr
1208 ; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
1209 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
1210 ; P8BE: blr
1211 ; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
1212 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
1204 ; P9BE: lfiwzx f0, 0, r3
1205 ; P9BE: xxsldwi vs0, f0, f0, 1
1206 ; P9BE: xxspltw v2, vs0, 0
1207 ; P9BE: blr
1208 ; P9LE: lfiwzx f0, 0, r3
1209 ; P9LE: xxpermdi vs0, f0, f0, 2
1210 ; P9LE: xxspltw v2, vs0, 3
1211 ; P9LE: blr
1212 ; P8BE: lfiwzx f0, 0, r3
1213 ; P8BE: xxsldwi vs0, f0, f0, 1
1214 ; P8BE: xxspltw v2, vs0, 0
1215 ; P8BE: blr
1216 ; P8LE: lfiwzx f0, 0, r3
1217 ; P8LE: xxpermdi vs0, f0, f0, 2
1218 ; P8LE: xxspltw v2, vs0, 3
12131219 ; P8LE: blr
12141220 }
12151221
23372343 ; P9LE-LABEL: spltMemValui
23382344 ; P8BE-LABEL: spltMemValui
23392345 ; P8LE-LABEL: spltMemValui
2340 ; P9BE: lxvwsx v2, 0, r3
2341 ; P9BE: blr
2342 ; P9LE: lxvwsx v2, 0, r3
2343 ; P9LE: blr
2344 ; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
2345 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
2346 ; P8BE: blr
2347 ; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
2348 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
2346 ; P9BE: lfiwzx f0, 0, r3
2347 ; P9BE: xxsldwi vs0, f0, f0, 1
2348 ; P9BE: xxspltw v2, vs0, 0
2349 ; P9BE: blr
2350 ; P9LE: lfiwzx f0, 0, r3
2351 ; P9LE: xxpermdi vs0, f0, f0, 2
2352 ; P9LE: xxspltw v2, vs0, 3
2353 ; P9LE: blr
2354 ; P8BE: lfiwzx f0, 0, r3
2355 ; P8BE: xxsldwi vs0, f0, f0, 1
2356 ; P8BE: xxspltw v2, vs0, 0
2357 ; P8BE: blr
2358 ; P8LE: lfiwzx f0, 0, r3
2359 ; P8LE: xxpermdi vs0, f0, f0, 2
2360 ; P8LE: xxspltw v2, vs0, 3
23492361 ; P8LE: blr
23502362 }
23512363
None ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
0 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \
1 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \
12 ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
2 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
3 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \
4 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \
35 ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s
46
57 define <16 x i8> @test(i32* %s, i32* %t) {
8 ; CHECK-LE-LABEL: test:
9 ; CHECK-LE: # %bb.0: # %entry
10 ; CHECK-LE-NEXT: lfiwzx f0, 0, r3
11 ; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2
12 ; CHECK-LE-NEXT: xxspltw v2, vs0, 3
13 ; CHECK-LE-NEXT: blr
14
15 ; CHECK-LABEL: test:
16 ; CHECK: # %bb.0: # %entry
17 ; CHECK-NEXT: lfiwzx f0, 0, r3
18 ; CHECK-NEXT: xxsldwi vs0, f0, f0, 1
19 ; CHECK-NEXT: xxspltw v2, vs0, 0
20 ; CHECK-NEXT: blr
621 entry:
722 %0 = bitcast i32* %s to <4 x i8>*
823 %1 = load <4 x i8>, <4 x i8>* %0, align 4
924 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32>
1025 ret <16 x i8> %2
11 ; CHECK-LABEL: test
12 ; CHECK: lxsiwax 34, 0, 3
13 ; CHECK: xxspltw 34, 34, 1
1426 }
None ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
1 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
2 ; RUN: --check-prefix=CHECK-BE
0 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
1 ; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s
2 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \
3 ; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE
34
45 @Globi = external global i32, align 4
56 @Globf = external global float, align 4
67
78 define <2 x i64> @test1(i64 %a, i64 %b) {
9 ; CHECK-LABEL: test1:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: mtvsrdd v2, r4, r3
12 ; CHECK-NEXT: blr
13
14 ; CHECK-BE-LABEL: test1:
15 ; CHECK-BE: # %bb.0: # %entry
16 ; CHECK-BE-NEXT: mtvsrdd v2, r3, r4
17 ; CHECK-BE-NEXT: blr
818 entry:
919 ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp
1020 ; which will happen in a subsequent patch.
11 ; CHECK-LABEL: test1
12 ; CHECK: mtvsrdd 34, 4, 3
13 ; CHECK-BE-LABEL: test1
14 ; CHECK-BE: mtvsrdd 34, 3, 4
1521 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0
1622 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1
1723 ret <2 x i64> %vecins1
1824 }
1925
2026 define i64 @test2(<2 x i64> %a) {
21 entry:
22 ; CHECK-LABEL: test2
23 ; CHECK: mfvsrld 3, 34
27 ; CHECK-LABEL: test2:
28 ; CHECK: # %bb.0: # %entry
29 ; CHECK-NEXT: mfvsrld r3, v2
30 ; CHECK-NEXT: blr
31
32 ; CHECK-BE-LABEL: test2:
33 ; CHECK-BE: # %bb.0: # %entry
34 ; CHECK-BE-NEXT: mfvsrd r3, v2
35 ; CHECK-BE-NEXT: blr
36 entry:
2437 %0 = extractelement <2 x i64> %a, i32 0
2538 ret i64 %0
2639 }
2740
2841 define i64 @test3(<2 x i64> %a) {
29 entry:
30 ; CHECK-BE-LABEL: test3
31 ; CHECK-BE: mfvsrld 3, 34
42 ; CHECK-LABEL: test3:
43 ; CHECK: # %bb.0: # %entry
44 ; CHECK-NEXT: mfvsrd r3, v2
45 ; CHECK-NEXT: blr
46
47 ; CHECK-BE-LABEL: test3:
48 ; CHECK-BE: # %bb.0: # %entry
49 ; CHECK-BE-NEXT: mfvsrld r3, v2
50 ; CHECK-BE-NEXT: blr
51 entry:
3252 %0 = extractelement <2 x i64> %a, i32 1
3353 ret i64 %0
3454 }
3555
3656 define <4 x i32> @test4(i32* nocapture readonly %in) {
37 entry:
38 ; CHECK-LABEL: test4
39 ; CHECK: lxvwsx 34, 0, 3
40 ; CHECK-NOT: xxspltw
41 ; CHECK-BE-LABEL: test4
42 ; CHECK-BE: lxvwsx 34, 0, 3
43 ; CHECK-BE-NOT: xxspltw
57 ; CHECK-LABEL: test4:
58 ; CHECK: # %bb.0: # %entry
59 ; CHECK-NEXT: lfiwzx f0, 0, r3
60 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
61 ; CHECK-NEXT: xxspltw v2, vs0, 3
62 ; CHECK-NEXT: blr
63
64 ; CHECK-BE-LABEL: test4:
65 ; CHECK-BE: # %bb.0: # %entry
66 ; CHECK-BE-NEXT: lfiwzx f0, 0, r3
67 ; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
68 ; CHECK-BE-NEXT: xxspltw v2, vs0, 0
69 ; CHECK-BE-NEXT: blr
70 entry:
4471 %0 = load i32, i32* %in, align 4
4572 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
4673 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
4875 }
4976
5077 define <4 x float> @test5(float* nocapture readonly %in) {
51 entry:
52 ; CHECK-LABEL: test5
53 ; CHECK: lxvwsx 34, 0, 3
54 ; CHECK-NOT: xxspltw
55 ; CHECK-BE-LABEL: test5
56 ; CHECK-BE: lxvwsx 34, 0, 3
57 ; CHECK-BE-NOT: xxspltw
78 ; CHECK-LABEL: test5:
79 ; CHECK: # %bb.0: # %entry
80 ; CHECK-NEXT: lfiwzx f0, 0, r3
81 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
82 ; CHECK-NEXT: xxspltw v2, vs0, 3
83 ; CHECK-NEXT: blr
84
85 ; CHECK-BE-LABEL: test5:
86 ; CHECK-BE: # %bb.0: # %entry
87 ; CHECK-BE-NEXT: lfiwzx f0, 0, r3
88 ; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
89 ; CHECK-BE-NEXT: xxspltw v2, vs0, 0
90 ; CHECK-BE-NEXT: blr
91 entry:
5892 %0 = load float, float* %in, align 4
5993 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
6094 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
6296 }
6397
6498 define <4 x i32> @test6() {
65 entry:
66 ; CHECK-LABEL: test6
67 ; CHECK: addis
68 ; CHECK: ld [[TOC:[0-9]+]], .LC0
69 ; CHECK: lxvwsx 34, 0, 3
70 ; CHECK-NOT: xxspltw
71 ; CHECK-BE-LABEL: test6
72 ; CHECK-BE: addis
73 ; CHECK-BE: ld [[TOC:[0-9]+]], .LC0
74 ; CHECK-BE: lxvwsx 34, 0, 3
75 ; CHECK-BE-NOT: xxspltw
99 ; CHECK-LABEL: test6:
100 ; CHECK: # %bb.0: # %entry
101 ; CHECK-NEXT: addis r3, r2, .LC0@toc@ha
102 ; CHECK-NEXT: ld r3, .LC0@toc@l(r3)
103 ; CHECK-NEXT: lfiwzx f0, 0, r3
104 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
105 ; CHECK-NEXT: xxspltw v2, vs0, 3
106 ; CHECK-NEXT: blr
107
108 ; CHECK-BE-LABEL: test6:
109 ; CHECK-BE: # %bb.0: # %entry
110 ; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha
111 ; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3)
112 ; CHECK-BE-NEXT: lfiwzx f0, 0, r3
113 ; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
114 ; CHECK-BE-NEXT: xxspltw v2, vs0, 0
115 ; CHECK-BE-NEXT: blr
116 entry:
76117 %0 = load i32, i32* @Globi, align 4
77118 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
78119 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
80121 }
81122
82123 define <4 x float> @test7() {
83 entry:
84 ; CHECK-LABEL: test7
85 ; CHECK: addis
86 ; CHECK: ld [[TOC:[0-9]+]], .LC1
87 ; CHECK: lxvwsx 34, 0, 3
88 ; CHECK-NOT: xxspltw
89 ; CHECK-BE-LABEL: test7
90 ; CHECK-BE: addis
91 ; CHECK-BE: ld [[TOC:[0-9]+]], .LC1
92 ; CHECK-BE: lxvwsx 34, 0, 3
93 ; CHECK-BE-NOT: xxspltw
124 ; CHECK-LABEL: test7:
125 ; CHECK: # %bb.0: # %entry
126 ; CHECK-NEXT: addis r3, r2, .LC1@toc@ha
127 ; CHECK-NEXT: ld r3, .LC1@toc@l(r3)
128 ; CHECK-NEXT: lfiwzx f0, 0, r3
129 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
130 ; CHECK-NEXT: xxspltw v2, vs0, 3
131 ; CHECK-NEXT: blr
132
133 ; CHECK-BE-LABEL: test7:
134 ; CHECK-BE: # %bb.0: # %entry
135 ; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha
136 ; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3)
137 ; CHECK-BE-NEXT: lfiwzx f0, 0, r3
138 ; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1
139 ; CHECK-BE-NEXT: xxspltw v2, vs0, 0
140 ; CHECK-BE-NEXT: blr
141 entry:
94142 %0 = load float, float* @Globf, align 4
95143 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
96144 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
98146 }
99147
100148 define <16 x i8> @test8() {
101 entry:
102 ; CHECK-LABEL: test8
103 ; CHECK: xxlxor 34, 34, 34
104 ; CHECK-BE-LABEL: test8
105 ; CHECK-BE: xxlxor 34, 34, 34
149 ; CHECK-LABEL: test8:
150 ; CHECK: # %bb.0: # %entry
151 ; CHECK-NEXT: xxlxor v2, v2, v2
152 ; CHECK-NEXT: blr
153
154 ; CHECK-BE-LABEL: test8:
155 ; CHECK-BE: # %bb.0: # %entry
156 ; CHECK-BE-NEXT: xxlxor v2, v2, v2
157 ; CHECK-BE-NEXT: blr
158 entry:
106159 ret <16 x i8> zeroinitializer
107160 }
108161
109162 define <16 x i8> @test9() {
110 entry:
111 ; CHECK-LABEL: test9
112 ; CHECK: xxspltib 34, 1
113 ; CHECK-BE-LABEL: test9
114 ; CHECK-BE: xxspltib 34, 1
163 ; CHECK-LABEL: test9:
164 ; CHECK: # %bb.0: # %entry
165 ; CHECK-NEXT: xxspltib v2, 1
166 ; CHECK-NEXT: blr
167
168 ; CHECK-BE-LABEL: test9:
169 ; CHECK-BE: # %bb.0: # %entry
170 ; CHECK-BE-NEXT: xxspltib v2, 1
171 ; CHECK-BE-NEXT: blr
172 entry:
115173 ret <16 x i8>
116174 }
117175
118176 define <16 x i8> @test10() {
119 entry:
120 ; CHECK-LABEL: test10
121 ; CHECK: xxspltib 34, 127
122 ; CHECK-BE-LABEL: test10
123 ; CHECK-BE: xxspltib 34, 127
177 ; CHECK-LABEL: test10:
178 ; CHECK: # %bb.0: # %entry
179 ; CHECK-NEXT: xxspltib v2, 127
180 ; CHECK-NEXT: blr
181
182 ; CHECK-BE-LABEL: test10:
183 ; CHECK-BE: # %bb.0: # %entry
184 ; CHECK-BE-NEXT: xxspltib v2, 127
185 ; CHECK-BE-NEXT: blr
186 entry:
124187 ret <16 x i8>
125188 }
126189
127190 define <16 x i8> @test11() {
128 entry:
129 ; CHECK-LABEL: test11
130 ; CHECK: xxspltib 34, 128
131 ; CHECK-BE-LABEL: test11
132 ; CHECK-BE: xxspltib 34, 128
191 ; CHECK-LABEL: test11:
192 ; CHECK: # %bb.0: # %entry
193 ; CHECK-NEXT: xxspltib v2, 128
194 ; CHECK-NEXT: blr
195
196 ; CHECK-BE-LABEL: test11:
197 ; CHECK-BE: # %bb.0: # %entry
198 ; CHECK-BE-NEXT: xxspltib v2, 128
199 ; CHECK-BE-NEXT: blr
200 entry:
133201 ret <16 x i8>
134202 }
135203
136204 define <16 x i8> @test12() {
137 entry:
138 ; CHECK-LABEL: test12
139 ; CHECK: xxspltib 34, 255
140 ; CHECK-BE-LABEL: test12
141 ; CHECK-BE: xxspltib 34, 255
205 ; CHECK-LABEL: test12:
206 ; CHECK: # %bb.0: # %entry
207 ; CHECK-NEXT: xxspltib v2, 255
208 ; CHECK-NEXT: blr
209
210 ; CHECK-BE-LABEL: test12:
211 ; CHECK-BE: # %bb.0: # %entry
212 ; CHECK-BE-NEXT: xxspltib v2, 255
213 ; CHECK-BE-NEXT: blr
214 entry:
142215 ret <16 x i8>
143216 }
144217
145218 define <16 x i8> @test13() {
146 entry:
147 ; CHECK-LABEL: test13
148 ; CHECK: xxspltib 34, 129
149 ; CHECK-BE-LABEL: test13
150 ; CHECK-BE: xxspltib 34, 129
219 ; CHECK-LABEL: test13:
220 ; CHECK: # %bb.0: # %entry
221 ; CHECK-NEXT: xxspltib v2, 129
222 ; CHECK-NEXT: blr
223
224 ; CHECK-BE-LABEL: test13:
225 ; CHECK-BE: # %bb.0: # %entry
226 ; CHECK-BE-NEXT: xxspltib v2, 129
227 ; CHECK-BE-NEXT: blr
228 entry:
151229 ret <16 x i8>
152230 }
153231
154232 define <16 x i8> @test13E127() {
155 entry:
156 ; CHECK-LABEL: test13E127
157 ; CHECK: xxspltib 34, 200
158 ; CHECK-BE-LABEL: test13E127
159 ; CHECK-BE: xxspltib 34, 200
233 ; CHECK-LABEL: test13E127:
234 ; CHECK: # %bb.0: # %entry
235 ; CHECK-NEXT: xxspltib v2, 200
236 ; CHECK-NEXT: blr
237
238 ; CHECK-BE-LABEL: test13E127:
239 ; CHECK-BE: # %bb.0: # %entry
240 ; CHECK-BE-NEXT: xxspltib v2, 200
241 ; CHECK-BE-NEXT: blr
242 entry:
160243 ret <16 x i8>
161244 }
162245
163246 define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) {
164 entry:
165 ; CHECK-LABEL: test14
166 ; CHECK: lwz [[LD:[0-9]+]],
167 ; CHECK: mtvsrws 34, [[LD]]
168 ; CHECK-BE-LABEL: test14
169 ; CHECK-BE: lwz [[LD:[0-9]+]],
170 ; CHECK-BE: mtvsrws 34, [[LD]]
247 ; CHECK-LABEL: test14:
248 ; CHECK: # %bb.0: # %entry
249 ; CHECK-NEXT: lwz r3, 0(r5)
250 ; CHECK-NEXT: mtvsrws v2, r3
251 ; CHECK-NEXT: addi r3, r3, 5
252 ; CHECK-NEXT: stw r3, 0(r5)
253 ; CHECK-NEXT: blr
254
255 ; CHECK-BE-LABEL: test14:
256 ; CHECK-BE: # %bb.0: # %entry
257 ; CHECK-BE-NEXT: lwz r3, 0(r5)
258 ; CHECK-BE-NEXT: mtvsrws v2, r3
259 ; CHECK-BE-NEXT: addi r3, r3, 5
260 ; CHECK-BE-NEXT: stw r3, 0(r5)
261 ; CHECK-BE-NEXT: blr
262 entry:
171263 %0 = load i32, i32* %b, align 4
172264 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
173265 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1010 define void @draw_llvm_vs_variant0() {
1111 ; CHECK-LABEL: draw_llvm_vs_variant0:
1212 ; CHECK: # %bb.0: # %entry
13 ; CHECK-NEXT: ldx r3, 0, r3
14 ; CHECK-NEXT: mtvsrd f0, r3
15 ; CHECK-NEXT: xxswapd v2, vs0
13 ; CHECK-NEXT: lfd f0, 0(r3)
14 ; CHECK-NEXT: xxpermdi v2, f0, f0, 2
1615 ; CHECK-NEXT: vmrglh v2, v2, v2
1716 ; CHECK-NEXT: vextsh2w v2, v2
1817 ; CHECK-NEXT: xvcvsxwsp vs0, v2
None ; RUN: llc -verify-machineinstrs < %s | FileCheck %s
1 target datalayout = "E-m:e-i64:64-n32:64"
2 target triple = "powerpc64-bgq-linux"
0 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
1 ; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
32
43 ; Function Attrs: norecurse nounwind readonly
54 define <4 x double> @foo(double* nocapture readonly %a) #0 {
5 ; CHECK-LABEL: foo:
6 ; CHECK: # %bb.0: # %entry
7 ; CHECK-NEXT: lxvdsx v2, 0, r3
8 ; CHECK-NEXT: vmr v3, v2
9 ; CHECK-NEXT: blr
610 entry:
711 %0 = load double, double* %a, align 8
812 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
913 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
1014 ret <4 x double> %shuffle.i
11
12 ; CHECK-LABEL: @foo
13 ; CHECK: lfd 1, 0(3)
14 ; CHECK: blr
1515 }
1616
1717 define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 {
18 ; CHECK-LABEL: foox:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: sldi r4, r4, 3
21 ; CHECK-NEXT: lxvdsx v2, r3, r4
22 ; CHECK-NEXT: vmr v3, v2
23 ; CHECK-NEXT: blr
1824 entry:
1925 %p = getelementptr double, double* %a, i64 %idx
2026 %0 = load double, double* %p, align 8
2127 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0
2228 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
2329 ret <4 x double> %shuffle.i
24
25 ; CHECK-LABEL: @foox
26 ; CHECK: sldi [[REG1:[0-9]+]], 4, 3
27 ; CHECK: lfdx 1, 3, [[REG1]]
28 ; CHECK: blr
2930 }
3031
3132 define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 {
33 ; CHECK-LABEL: fooxu:
34 ; CHECK: # %bb.0: # %entry
35 ; CHECK-NEXT: sldi r4, r4, 3
36 ; CHECK-NEXT: lfdux f0, r3, r4
37 ; CHECK-NEXT: xxspltd v2, vs0, 0
38 ; CHECK-NEXT: std r3, 0(r5)
39 ; CHECK-NEXT: vmr v3, v2
40 ; CHECK-NEXT: blr
3241 entry:
3342 %p = getelementptr double, double* %a, i64 %idx
3443 %0 = load double, double* %p, align 8
3645 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer
3746 store double* %p, double** %pptr, align 8
3847 ret <4 x double> %shuffle.i
39
40 ; CHECK-LABEL: @foox
41 ; CHECK: sldi [[REG1:[0-9]+]], 4, 3
42 ; CHECK: lfdux 1, 3, [[REG1]]
43 ; CHECK: std 3, 0(5)
44 ; CHECK: blr
4548 }
4649
4750 define <4 x float> @foof(float* nocapture readonly %a) #0 {
51 ; CHECK-LABEL: foof:
52 ; CHECK: # %bb.0: # %entry
53 ; CHECK-NEXT: lfiwzx f0, 0, r3
54 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
55 ; CHECK-NEXT: xxspltw v2, vs0, 3
56 ; CHECK-NEXT: blr
4857 entry:
4958 %0 = load float, float* %a, align 4
5059 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
5160 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
5261 ret <4 x float> %shuffle.i
53
54 ; CHECK-LABEL: @foof
55 ; CHECK: lfs 1, 0(3)
56 ; CHECK: blr
5762 }
5863
5964 define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 {
65 ; CHECK-LABEL: foofx:
66 ; CHECK: # %bb.0: # %entry
67 ; CHECK-NEXT: sldi r4, r4, 2
68 ; CHECK-NEXT: lfiwzx f0, r3, r4
69 ; CHECK-NEXT: xxpermdi vs0, f0, f0, 2
70 ; CHECK-NEXT: xxspltw v2, vs0, 3
71 ; CHECK-NEXT: blr
6072 entry:
6173 %p = getelementptr float, float* %a, i64 %idx
6274 %0 = load float, float* %p, align 4
6375 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0
6476 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
6577 ret <4 x float> %shuffle.i
66
67 ; CHECK-LABEL: @foofx
68 ; CHECK: sldi [[REG1:[0-9]+]], 4, 2
69 ; CHECK: lfsx 1, 3, [[REG1]]
70 ; CHECK: blr
7178 }
7279
73 attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
7480
0 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
4 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
8
9 ; Function Attrs: norecurse nounwind readonly
10 define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) {
11 ; P9LE-LABEL: s2v_test1:
12 ; P9LE: # %bb.0: # %entry
13 ; P9LE-NEXT: lfd f0, 0(r3)
14 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
15 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
16 ; P9LE-NEXT: blr
17
18 ; P9BE-LABEL: s2v_test1:
19 ; P9BE: # %bb.0: # %entry
20 ; P9BE-NEXT: lfd f0, 0(r3)
21 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
22 ; P9BE-NEXT: blr
23 entry:
24 %0 = load i64, i64* %int64, align 8
25 %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
26 ret <2 x i64> %vecins
27 }
28
29 ; Function Attrs: norecurse nounwind readonly
30 define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) {
31 ; P9LE-LABEL: s2v_test2:
32 ; P9LE: # %bb.0: # %entry
33 ; P9LE-NEXT: lfd f0, 8(r3)
34 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
35 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
36 ; P9LE-NEXT: blr
37
38 ; P9BE-LABEL: s2v_test2:
39 ; P9BE: # %bb.0: # %entry
40 ; P9BE-NEXT: lfd f0, 8(r3)
41 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
42 ; P9BE-NEXT: blr
43 entry:
44 %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
45 %0 = load i64, i64* %arrayidx, align 8
46 %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
47 ret <2 x i64> %vecins
48 }
49
50 ; Function Attrs: norecurse nounwind readonly
51 define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) {
52 ; P9LE-LABEL: s2v_test3:
53 ; P9LE: # %bb.0: # %entry
54 ; P9LE-NEXT: sldi r4, r7, 3
55 ; P9LE-NEXT: lfdx f0, r3, r4
56 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
57 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
58 ; P9LE-NEXT: blr
59
60 ; P9BE-LABEL: s2v_test3
61 ; P9BE: # %bb.0: # %entry
62 ; P9BE-NEXT: sldi r4, r7, 3
63 ; P9BE-NEXT: lfdx f0, r3, r4
64 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
65 ; P9BE-NEXT: blr
66 entry:
67 %idxprom = sext i32 %Idx to i64
68 %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom
69 %0 = load i64, i64* %arrayidx, align 8
70 %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
71 ret <2 x i64> %vecins
72 }
73
74 ; Function Attrs: norecurse nounwind readonly
75 define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) {
76 ; P9LE-LABEL: s2v_test4:
77 ; P9LE: # %bb.0: # %entry
78 ; P9LE-NEXT: lfd f0, 8(r3)
79 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
80 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
81 ; P9LE-NEXT: blr
82
83 ; P9BE-LABEL: s2v_test4:
84 ; P9BE: # %bb.0: # %entry
85 ; P9BE-NEXT: lfd f0, 8(r3)
86 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
87 ; P9BE-NEXT: blr
88 entry:
89 %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1
90 %0 = load i64, i64* %arrayidx, align 8
91 %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
92 ret <2 x i64> %vecins
93 }
94
95 ; Function Attrs: norecurse nounwind readonly
96 define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) {
97 ; P9LE-LABEL: s2v_test5:
98 ; P9LE: # %bb.0: # %entry
99 ; P9LE-NEXT: lfd f0, 0(r5)
100 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
101 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
102 ; P9LE-NEXT: blr
103
104 ; P9BE-LABEL: s2v_test5:
105 ; P9BE: # %bb.0: # %entry
106 ; P9BE-NEXT: lfd f0, 0(r5)
107 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
108 ; P9BE-NEXT: blr
109 entry:
110 %0 = load i64, i64* %ptr1, align 8
111 %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0
112 ret <2 x i64> %vecins
113 }
114
115 ; Function Attrs: norecurse nounwind readonly
116 define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) {
117 ; P9LE-LABEL: s2v_test_f1:
118 ; P9LE: # %bb.0: # %entry
119 ; P9LE-NEXT: lfd f0, 0(r3)
120 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
121 ; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
122 ; P9LE-NEXT: blr
123
124 ; P9BE-LABEL: s2v_test_f1:
125 ; P9BE: # %bb.0: # %entry
126 ; P9BE-NEXT: lfd f0, 0(r3)
127 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
128 ; P9BE-NEXT: blr
129
130 ; P8LE-LABEL: s2v_test_f1:
131 ; P8LE: # %bb.0: # %entry
132 ; P8LE-NEXT: lfdx f0, 0, r3
133 ; P8LE-NEXT: xxspltd vs0, vs0, 0
134 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
135 ; P8LE-NEXT: blr
136
137 ; P8BE-LABEL: s2v_test_f1:
138 ; P8BE: # %bb.0: # %entry
139 ; P8BE-NEXT: lfdx f0, 0, r3
140 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
141 ; P8BE-NEXT: blr
142 entry:
143 %0 = load double, double* %f64, align 8
144 %vecins = insertelement <2 x double> %vec, double %0, i32 0
145 ret <2 x double> %vecins
146 }
147
148 ; Function Attrs: norecurse nounwind readonly
149 define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) {
150 ; P9LE-LABEL: s2v_test_f2:
151 ; P9LE: # %bb.0: # %entry
152 ; P9LE-NEXT: lfd f0, 8(r3)
153 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
154 ; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
155 ; P9LE-NEXT: blr
156
157 ; P9BE-LABEL: s2v_test_f2:
158 ; P9BE: # %bb.0: # %entry
159 ; P9BE-NEXT: lfd f0, 8(r3)
160 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
161 ; P9BE-NEXT: blr
162
163 ; P8LE-LABEL: s2v_test_f2:
164 ; P8LE: # %bb.0: # %entry
165 ; P8LE-NEXT: addi r3, r3, 8
166 ; P8LE-NEXT: lfdx f0, 0, r3
167 ; P8LE-NEXT: xxspltd vs0, vs0, 0
168 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
169 ; P8LE-NEXT: blr
170
171 ; P8BE-LABEL: s2v_test_f2:
172 ; P8BE: # %bb.0: # %entry
173 ; P8BE-NEXT: addi r3, r3, 8
174 ; P8BE-NEXT: lfdx f0, 0, r3
175 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
176 ; P8BE-NEXT: blr
177 entry:
178 %arrayidx = getelementptr inbounds double, double* %f64, i64 1
179 %0 = load double, double* %arrayidx, align 8
180 %vecins = insertelement <2 x double> %vec, double %0, i32 0
181 ret <2 x double> %vecins
182 }
183
184 ; Function Attrs: norecurse nounwind readonly
185 define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) {
186 ; P9LE-LABEL: s2v_test_f3:
187 ; P9LE: # %bb.0: # %entry
188 ; P9LE-NEXT: sldi r4, r7, 3
189 ; P9LE-NEXT: lfdx f0, r3, r4
190 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
191 ; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
192 ; P9LE-NEXT: blr
193
194 ; P9BE-LABEL: s2v_test_f3:
195 ; P9BE: # %bb.0: # %entry
196 ; P9BE-NEXT: sldi r4, r7, 3
197 ; P9BE-NEXT: lfdx f0, r3, r4
198 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
199 ; P9BE-NEXT: blr
200
201 ; P8LE-LABEL: s2v_test_f3:
202 ; P8LE: # %bb.0: # %entry
203 ; P8LE-NEXT: sldi r4, r7, 3
204 ; P8LE-NEXT: lfdx f0, r3, r4
205 ; P8LE-NEXT: xxspltd vs0, vs0, 0
206 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
207 ; P8LE-NEXT: blr
208
209 ; P8BE-LABEL: s2v_test_f3:
210 ; P8BE: # %bb.0: # %entry
211 ; P8BE-NEXT: sldi r4, r7, 3
212 ; P8BE-NEXT: lfdx f0, r3, r4
213 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
214 ; P8BE-NEXT: blr
215 entry:
216 %idxprom = sext i32 %Idx to i64
217 %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom
218 %0 = load double, double* %arrayidx, align 8
219 %vecins = insertelement <2 x double> %vec, double %0, i32 0
220 ret <2 x double> %vecins
221 }
222
223 ; Function Attrs: norecurse nounwind readonly
224 define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) {
225 ; P9LE-LABEL: s2v_test_f4:
226 ; P9LE: # %bb.0: # %entry
227 ; P9LE-NEXT: lfd f0, 8(r3)
228 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
229 ; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
230 ; P9LE-NEXT: blr
231
232 ; P9BE-LABEL: s2v_test_f4:
233 ; P9BE: # %bb.0: # %entry
234 ; P9BE-NEXT: lfd f0, 8(r3)
235 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
236 ; P9BE-NEXT: blr
237
238 ; P8LE-LABEL: s2v_test_f4:
239 ; P8LE: # %bb.0: # %entry
240 ; P8LE-NEXT: addi r3, r3, 8
241 ; P8LE-NEXT: lfdx f0, 0, r3
242 ; P8LE-NEXT: xxspltd vs0, vs0, 0
243 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
244 ; P8LE-NEXT: blr
245
246 ; P8BE-LABEL: s2v_test_f4:
247 ; P8BE: # %bb.0: # %entry
248 ; P8BE-NEXT: addi r3, r3, 8
249 ; P8BE-NEXT: lfdx f0, 0, r3
250 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
251 ; P8BE-NEXT: blr
252 entry:
253 %arrayidx = getelementptr inbounds double, double* %f64, i64 1
254 %0 = load double, double* %arrayidx, align 8
255 %vecins = insertelement <2 x double> %vec, double %0, i32 0
256 ret <2 x double> %vecins
257 }
258
259 ; Function Attrs: norecurse nounwind readonly
260 define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) {
261 ; P9LE-LABEL: s2v_test_f5:
262 ; P9LE: # %bb.0: # %entry
263 ; P9LE-NEXT: lfd f0, 0(r5)
264 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
265 ; P9LE-NEXT: xxpermdi v2, v2, vs0, 1
266 ; P9LE-NEXT: blr
267
268 ; P9BE-LABEL: s2v_test_f5:
269 ; P9BE: # %bb.0: # %entry
270 ; P9BE-NEXT: lfd f0, 0(r5)
271 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
272 ; P9BE-NEXT: blr
273
274 ; P8LE-LABEL: s2v_test_f5:
275 ; P8LE: # %bb.0: # %entry
276 ; P8LE-NEXT: lfdx f0, 0, r5
277 ; P8LE-NEXT: xxspltd vs0, vs0, 0
278 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1
279 ; P8LE-NEXT: blr
280
281 ; P8BE-LABEL: s2v_test_f5:
282 ; P8BE: # %bb.0: # %entry
283 ; P8BE-NEXT: lfdx f0, 0, r5
284 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
285 ; P8BE-NEXT: blr
286 entry:
287 %0 = load double, double* %ptr1, align 8
288 %vecins = insertelement <2 x double> %vec, double %0, i32 0
289 ret <2 x double> %vecins
290 }
291
0 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
4 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
8
9 define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
10 ; P9LE-LABEL: test_liwzx1:
11 ; P9LE: # %bb.0:
12 ; P9LE-NEXT: lfiwzx f0, 0, r3
13 ; P9LE-NEXT: lfiwzx f1, 0, r4
14 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
15 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
16 ; P9LE-NEXT: xvaddsp vs0, vs0, vs1
17 ; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
18 ; P9LE-NEXT: xscvspdpn f0, vs0
19 ; P9LE-NEXT: stfs f0, 0(r5)
20 ; P9LE-NEXT: blr
21
22 ; P9BE-LABEL: test_liwzx1:
23 ; P9BE: # %bb.0:
24 ; P9BE-NEXT: lfiwzx f0, 0, r3
25 ; P9BE-NEXT: lfiwzx f1, 0, r4
26 ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
27 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
28 ; P9BE-NEXT: xvaddsp vs0, vs0, vs1
29 ; P9BE-NEXT: xscvspdpn f0, vs0
30 ; P9BE-NEXT: stfs f0, 0(r5)
31 ; P9BE-NEXT: blr
32
33 ; P8LE-LABEL: test_liwzx1:
34 ; P8LE: # %bb.0:
35 ; P8LE-NEXT: lfiwzx f0, 0, r3
36 ; P8LE-NEXT: lfiwzx f1, 0, r4
37 ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
38 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
39 ; P8LE-NEXT: xvaddsp vs0, vs0, vs1
40 ; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
41 ; P8LE-NEXT: xscvspdpn f0, vs0
42 ; P8LE-NEXT: stfsx f0, 0, r5
43 ; P8LE-NEXT: blr
44
45 ; P8BE-LABEL: test_liwzx1:
46 ; P8BE: # %bb.0:
47 ; P8BE-NEXT: lfiwzx f0, 0, r3
48 ; P8BE-NEXT: lfiwzx f1, 0, r4
49 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
50 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
51 ; P8BE-NEXT: xvaddsp vs0, vs0, vs1
52 ; P8BE-NEXT: xscvspdpn f0, vs0
53 ; P8BE-NEXT: stfsx f0, 0, r5
54 ; P8BE-NEXT: blr
55 %a = load <1 x float>, <1 x float>* %A
56 %b = load <1 x float>, <1 x float>* %B
57 %X = fadd <1 x float> %a, %b
58 store <1 x float> %X, <1 x float>* %C
59 ret void
60 }
61
62 define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) {
63 ; P9LE-LABEL: test_liwzx2:
64 ; P9LE: # %bb.0:
65 ; P9LE-NEXT: lfiwzx f0, 0, r3
66 ; P9LE-NEXT: lfiwzx f1, 0, r4
67 ; P9LE-NEXT: mr r3, r5
68 ; P9LE-NEXT: xxpermdi vs0, f0, f0, 2
69 ; P9LE-NEXT: xxpermdi vs1, f1, f1, 2
70 ; P9LE-NEXT: xvsubsp vs0, vs0, vs1
71 ; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
72 ; P9LE-NEXT: xscvspdpn f0, vs0
73 ; P9LE-NEXT: stfs f0, 0(r5)
74 ; P9LE-NEXT: blr
75
76 ; P9BE-LABEL: test_liwzx2:
77 ; P9BE: # %bb.0:
78 ; P9BE-NEXT: lfiwzx f0, 0, r3
79 ; P9BE-NEXT: lfiwzx f1, 0, r4
80 ; P9BE-NEXT: mr r3, r5
81 ; P9BE-NEXT: xxsldwi vs0, f0, f0, 1
82 ; P9BE-NEXT: xxsldwi vs1, f1, f1, 1
83 ; P9BE-NEXT: xvsubsp vs0, vs0, vs1
84 ; P9BE-NEXT: xscvspdpn f0, vs0
85 ; P9BE-NEXT: stfs f0, 0(r5)
86 ; P9BE-NEXT: blr
87
88 ; P8LE-LABEL: test_liwzx2:
89 ; P8LE: # %bb.0:
90 ; P8LE-NEXT: lfiwzx f0, 0, r3
91 ; P8LE-NEXT: lfiwzx f1, 0, r4
92 ; P8LE-NEXT: mr r3, r5
93 ; P8LE-NEXT: xxpermdi vs0, f0, f0, 2
94 ; P8LE-NEXT: xxpermdi vs1, f1, f1, 2
95 ; P8LE-NEXT: xvsubsp vs0, vs0, vs1
96 ; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3
97 ; P8LE-NEXT: xscvspdpn f0, vs0
98 ; P8LE-NEXT: stfsx f0, 0, r5
99 ; P8LE-NEXT: blr
100
101 ; P8BE-LABEL: test_liwzx2:
102 ; P8BE: # %bb.0:
103 ; P8BE-NEXT: lfiwzx f0, 0, r3
104 ; P8BE-NEXT: lfiwzx f1, 0, r4
105 ; P8BE-NEXT: mr r3, r5
106 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
107 ; P8BE-NEXT: xxsldwi vs1, f1, f1, 1
108 ; P8BE-NEXT: xvsubsp vs0, vs0, vs1
109 ; P8BE-NEXT: xscvspdpn f0, vs0
110 ; P8BE-NEXT: stfsx f0, 0, r5
111 ; P8BE-NEXT: blr
112 %a = load <1 x float>, <1 x float>* %A
113 %b = load <1 x float>, <1 x float>* %B
114 %X = fsub <1 x float> %a, %b
115 store <1 x float> %X, <1 x float>* %C
116 ret <1 x float>* %C
117 }
0 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
4 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
8
9 ; Function Attrs: norecurse nounwind readonly
10 define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) {
11 ; P9LE-LABEL: s2v_test1:
12 ; P9LE: # %bb.0: # %entry
13 ; P9LE-NEXT: lfiwax f0, 0, r3
14 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
15 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
16 ; P9LE-NEXT: blr
17
18 ; P9BE-LABEL: s2v_test1:
19 ; P9BE: # %bb.0: # %entry
20 ; P9BE-NEXT: lfiwax f0, 0, r3
21 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
22 ; P9BE-NEXT: blr
23
24 ; P8LE-LABEL: s2v_test1:
25 ; P8LE: # %bb.0: # %entry
26 ; P8LE-NEXT: lfiwax f0, 0, r3
27 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
28 ; P8LE-NEXT: xxpermdi v2, v2, v3, 1
29 ; P8LE-NEXT: blr
30
31 ; P8BE-LABEL: s2v_test1:
32 ; P8BE: # %bb.0: # %entry
33 ; P8BE-NEXT: lfiwax f0, 0, r3
34 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
35 ; P8BE-NEXT: blr
36 entry:
37 %0 = load i32, i32* %int32, align 4
38 %conv = sext i32 %0 to i64
39 %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
40 ret <2 x i64> %vecins
41 }
42
43 ; Function Attrs: norecurse nounwind readonly
44 define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) {
45 ; P9LE-LABEL: s2v_test2:
46 ; P9LE: # %bb.0: # %entry
47 ; P9LE-NEXT: addi r3, r3, 4
48 ; P9LE-NEXT: lfiwax f0, 0, r3
49 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
50 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
51 ; P9LE-NEXT: blr
52
53 ; P9BE-LABEL: s2v_test2:
54 ; P9BE: # %bb.0: # %entry
55 ; P9BE-NEXT: addi r3, r3, 4
56 ; P9BE-NEXT: lfiwax f0, 0, r3
57 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
58 ; P9BE-NEXT: blr
59
60 ; P8LE-LABEL: s2v_test2:
61 ; P8LE: # %bb.0: # %entry
62 ; P8LE-NEXT: addi r3, r3, 4
63 ; P8LE-NEXT: lfiwax f0, 0, r3
64 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
65 ; P8LE-NEXT: xxpermdi v2, v2, v3, 1
66 ; P8LE-NEXT: blr
67
68 ; P8BE-LABEL: s2v_test2:
69 ; P8BE: # %bb.0: # %entry
70 ; P8BE-NEXT: addi r3, r3, 4
71 ; P8BE-NEXT: lfiwax f0, 0, r3
72 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
73 ; P8BE-NEXT: blr
74 entry:
75 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
76 %0 = load i32, i32* %arrayidx, align 4
77 %conv = sext i32 %0 to i64
78 %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
79 ret <2 x i64> %vecins
80 }
81
82 ; Function Attrs: norecurse nounwind readonly
83 define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) {
84 ; P9LE-LABEL: s2v_test3:
85 ; P9LE: # %bb.0: # %entry
86 ; P9LE-NEXT: sldi r4, r7, 2
87 ; P9LE-NEXT: lfiwax f0, r3, r4
88 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
89 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
90 ; P9LE-NEXT: blr
91
92 ; P9BE-LABEL: s2v_test3:
93 ; P9BE: # %bb.0: # %entry
94 ; P9BE-NEXT: sldi r4, r7, 2
95 ; P9BE-NEXT: lfiwax f0, r3, r4
96 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
97 ; P9BE-NEXT: blr
98
99 ; P8LE-LABEL: s2v_test3:
100 ; P8LE: # %bb.0: # %entry
101 ; P8LE-NEXT: sldi r4, r7, 2
102 ; P8LE-NEXT: lfiwax f0, r3, r4
103 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
104 ; P8LE-NEXT: xxpermdi v2, v2, v3, 1
105 ; P8LE-NEXT: blr
106
107 ; P8BE-LABEL: s2v_test3:
108 ; P8BE: # %bb.0: # %entry
109 ; P8BE-NEXT: sldi r4, r7, 2
110 ; P8BE-NEXT: lfiwax f0, r3, r4
111 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
112 ; P8BE-NEXT: blr
113 entry:
114 %idxprom = sext i32 %Idx to i64
115 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
116 %0 = load i32, i32* %arrayidx, align 4
117 %conv = sext i32 %0 to i64
118 %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
119 ret <2 x i64> %vecins
120 }
121
122 ; Function Attrs: norecurse nounwind readonly
123 define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) {
124 ; P9LE-LABEL: s2v_test4:
125 ; P9LE: # %bb.0: # %entry
126 ; P9LE-NEXT: addi r3, r3, 4
127 ; P9LE-NEXT: lfiwax f0, 0, r3
128 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
129 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
130 ; P9LE-NEXT: blr
131
132 ; P9BE-LABEL: s2v_test4:
133 ; P9BE: # %bb.0: # %entry
134 ; P9BE-NEXT: addi r3, r3, 4
135 ; P9BE-NEXT: lfiwax f0, 0, r3
136 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
137 ; P9BE-NEXT: blr
138
139 ; P8LE-LABEL: s2v_test4:
140 ; P8LE: # %bb.0: # %entry
141 ; P8LE-NEXT: addi r3, r3, 4
142 ; P8LE-NEXT: lfiwax f0, 0, r3
143 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
144 ; P8LE-NEXT: xxpermdi v2, v2, v3, 1
145 ; P8LE-NEXT: blr
146
147 ; P8BE-LABEL: s2v_test4:
148 ; P8BE: # %bb.0: # %entry
149 ; P8BE-NEXT: addi r3, r3, 4
150 ; P8BE-NEXT: lfiwax f0, 0, r3
151 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
152 ; P8BE-NEXT: blr
153 entry:
154 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
155 %0 = load i32, i32* %arrayidx, align 4
156 %conv = sext i32 %0 to i64
157 %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
158 ret <2 x i64> %vecins
159 }
160
161 ; Function Attrs: norecurse nounwind readonly
162 define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) {
163 ; P9LE-LABEL: s2v_test5:
164 ; P9LE: # %bb.0: # %entry
165 ; P9LE-NEXT: lfiwax f0, 0, r5
166 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
167 ; P9LE-NEXT: xxpermdi v2, v2, v3, 1
168 ; P9LE-NEXT: blr
169
170 ; P9BE-LABEL: s2v_test5:
171 ; P9BE: # %bb.0: # %entry
172 ; P9BE-NEXT: lfiwax f0, 0, r5
173 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1
174 ; P9BE-NEXT: blr
175
176 ; P8LE-LABEL: s2v_test5:
177 ; P8LE: # %bb.0: # %entry
178 ; P8LE-NEXT: lfiwax f0, 0, r5
179 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
180 ; P8LE-NEXT: xxpermdi v2, v2, v3, 1
181 ; P8LE-NEXT: blr
182
183 ; P8BE-LABEL: s2v_test5:
184 ; P8BE: # %bb.0: # %entry
185 ; P8BE-NEXT: lfiwax f0, 0, r5
186 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1
187 ; P8BE-NEXT: blr
188 entry:
189 %0 = load i32, i32* %ptr1, align 4
190 %conv = sext i32 %0 to i64
191 %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0
192 ret <2 x i64> %vecins
193 }
194
195 ; Function Attrs: norecurse nounwind readonly
196 define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) {
197 ; P9LE-LABEL: s2v_test6:
198 ; P9LE: # %bb.0: # %entry
199 ; P9LE-NEXT: lfiwax f0, 0, r3
200 ; P9LE-NEXT: xxpermdi v2, f0, f0, 2
201 ; P9LE-NEXT: xxspltd v2, v2, 1
202 ; P9LE-NEXT: blr
203
204 ; P9BE-LABEL: s2v_test6:
205 ; P9BE: # %bb.0: # %entry
206 ; P9BE-NEXT: lfiwax f0, 0, r3
207 ; P9BE-NEXT: xxspltd v2, vs0, 0
208 ; P9BE-NEXT: blr
209
210 ; P8LE-LABEL: s2v_test6:
211 ; P8LE: # %bb.0: # %entry
212 ; P8LE-NEXT: lfiwax f0, 0, r3
213 ; P8LE-NEXT: xxpermdi v2, f0, f0, 2
214 ; P8LE-NEXT: xxspltd v2, v2, 1
215 ; P8LE-NEXT: blr
216
217 ; P8BE-LABEL: s2v_test6:
218 ; P8BE: # %bb.0: # %entry
219 ; P8BE-NEXT: lfiwax f0, 0, r3
220 ; P8BE-NEXT: xxspltd v2, vs0, 0
221 ; P8BE-NEXT: blr
222 entry:
223 %0 = load i32, i32* %ptr, align 4
224 %conv = sext i32 %0 to i64
225 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
226 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
227 ret <2 x i64> %splat.splat
228 }
229
230 ; Function Attrs: norecurse nounwind readonly
231 define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) {
232 ; P9LE-LABEL: s2v_test7:
233 ; P9LE: # %bb.0: # %entry
234 ; P9LE-NEXT: lfiwax f0, 0, r3
235 ; P9LE-NEXT: xxpermdi v2, f0, f0, 2
236 ; P9LE-NEXT: xxspltd v2, v2, 1
237 ; P9LE-NEXT: blr
238
239 ; P9BE-LABEL: s2v_test7:
240 ; P9BE: # %bb.0: # %entry
241 ; P9BE-NEXT: lfiwax f0, 0, r3
242 ; P9BE-NEXT: xxspltd v2, vs0, 0
243 ; P9BE-NEXT: blr
244
245 ; P8LE-LABEL: s2v_test7:
246 ; P8LE: # %bb.0: # %entry
247 ; P8LE-NEXT: lfiwax f0, 0, r3
248 ; P8LE-NEXT: xxpermdi v2, f0, f0, 2
249 ; P8LE-NEXT: xxspltd v2, v2, 1
250 ; P8LE-NEXT: blr
251
252 ; P8BE-LABEL: s2v_test7:
253 ; P8BE: # %bb.0: # %entry
254 ; P8BE-NEXT: lfiwax f0, 0, r3
255 ; P8BE-NEXT: xxspltd v2, vs0, 0
256 ; P8BE-NEXT: blr
257 entry:
258 %0 = load i32, i32* %ptr, align 4
259 %conv = sext i32 %0 to i64
260 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
261 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
262 ret <2 x i64> %splat.splat
263 }
264
0 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
4 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
8
9 ; Function Attrs: norecurse nounwind readonly
10 define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
11 ; P8LE-LABEL: s2v_test1:
12 ; P8LE: # %bb.0: # %entry
13 ; P8LE-NEXT: lfiwzx f0, 0, r3
14 ; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
15 ; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
16 ; P8LE-NEXT: lvx v4, 0, r3
17 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
18 ; P8LE-NEXT: vperm v2, v3, v2, v4
19 ; P8LE-NEXT: blr
20
21 ; P8BE-LABEL: s2v_test1:
22 ; P8BE: # %bb.0: # %entry
23 ; P8BE: lfiwzx f0, 0, r3
24 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
25 ; P8BE: xxsldwi vs0, v2, vs0, 1
26 ; P8BE: xxsldwi v2, vs0, vs0, 3
27 ; P8BE-NEXT: blr
28 entry:
29 %0 = load i32, i32* %int32, align 4
30 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
31 ret <4 x i32> %vecins
32 }
33
34 ; Function Attrs: norecurse nounwind readonly
35 define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
36 ; P8LE-LABEL: s2v_test2:
37 ; P8LE: # %bb.0: # %entry
38 ; P8LE-NEXT: addi r3, r3, 4
39 ; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
40 ; P8LE-NEXT: lfiwzx f0, 0, r3
41 ; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
42 ; P8LE-NEXT: lvx v4, 0, r3
43 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
44 ; P8LE-NEXT: vperm v2, v3, v2, v4
45 ; P8LE-NEXT: blr
46
47 ; P8BE-LABEL: s2v_test2:
48 ; P8BE: # %bb.0: # %entry
49 ; P8BE: addi r3, r3, 4
50 ; P8BE: lfiwzx f0, 0, r3
51 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
52 ; P8BE: xxsldwi vs0, v2, vs0, 1
53 ; P8BE: xxsldwi v2, vs0, vs0, 3
54 ; P8BE-NEXT: blr
55 entry:
56 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
57 %0 = load i32, i32* %arrayidx, align 4
58 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
59 ret <4 x i32> %vecins
60 }
61
62 ; Function Attrs: norecurse nounwind readonly
63 define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
64 ; P8LE-LABEL: s2v_test3:
65 ; P8LE: # %bb.0: # %entry
66 ; P8LE-NEXT: sldi r5, r7, 2
67 ; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
68 ; P8LE-NEXT: lfiwzx f0, r3, r5
69 ; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
70 ; P8LE-NEXT: lvx v4, 0, r3
71 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
72 ; P8LE-NEXT: vperm v2, v3, v2, v4
73 ; P8LE-NEXT: blr
74
75 ; P8BE-LABEL: s2v_test3:
76 ; P8BE: # %bb.0: # %entry
77 ; P8BE: sldi r4, r7, 2
78 ; P8BE: lfiwzx f0, r3, r4
79 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
80 ; P8BE: xxsldwi vs0, v2, vs0, 1
81 ; P8BE: xxsldwi v2, vs0, vs0, 3
82 ; P8BE-NEXT: blr
83 entry:
84 %idxprom = sext i32 %Idx to i64
85 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
86 %0 = load i32, i32* %arrayidx, align 4
87 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
88 ret <4 x i32> %vecins
89 }
90
91 ; Function Attrs: norecurse nounwind readonly
92 define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
93 ; P8LE-LABEL: s2v_test4:
94 ; P8LE: # %bb.0: # %entry
95 ; P8LE-NEXT: addi r3, r3, 4
96 ; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
97 ; P8LE-NEXT: lfiwzx f0, 0, r3
98 ; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
99 ; P8LE-NEXT: lvx v4, 0, r3
100 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
101 ; P8LE-NEXT: vperm v2, v3, v2, v4
102 ; P8LE-NEXT: blr
103
104 ; P8BE-LABEL: s2v_test4:
105 ; P8BE: # %bb.0: # %entry
106 ; P8BE: addi r3, r3, 4
107 ; P8BE: lfiwzx f0, 0, r3
108 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
109 ; P8BE: xxsldwi vs0, v2, vs0, 1
110 ; P8BE: xxsldwi v2, vs0, vs0, 3
111 ; P8BE-NEXT: blr
112 entry:
113 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
114 %0 = load i32, i32* %arrayidx, align 4
115 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
116 ret <4 x i32> %vecins
117 }
118
119 ; Function Attrs: norecurse nounwind readonly
120 define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
121 ; P8LE-LABEL: s2v_test5:
122 ; P8LE: # %bb.0: # %entry
123 ; P8LE-NEXT: lfiwzx f0, 0, r5
124 ; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
125 ; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
126 ; P8LE-NEXT: lvx v4, 0, r3
127 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
128 ; P8LE-NEXT: vperm v2, v3, v2, v4
129 ; P8LE-NEXT: blr
130
131 ; P8BE-LABEL: s2v_test5:
132 ; P8BE: # %bb.0: # %entry
133 ; P8BE: lfiwzx f0, 0, r5
134 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
135 ; P8BE: xxsldwi vs0, v2, vs0, 1
136 ; P8BE: xxsldwi v2, vs0, vs0, 3
137 ; P8BE-NEXT: blr
138 entry:
139 %0 = load i32, i32* %ptr1, align 4
140 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
141 ret <4 x i32> %vecins
142 }
143
144 ; Function Attrs: norecurse nounwind readonly
145 define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
146 ; P8LE-LABEL: s2v_test_f1:
147 ; P8LE: # %bb.0: # %entry
148 ; P8LE-NEXT: lfiwzx f0, 0, r3
149 ; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
150 ; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
151 ; P8LE-NEXT: lvx v4, 0, r3
152 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
153 ; P8LE-NEXT: vperm v2, v3, v2, v4
154 ; P8LE-NEXT: blr
155
156 ; P8BE-LABEL: s2v_test_f1:
157 ; P8BE: # %bb.0: # %entry
158 ; P8BE: lfiwzx f0, 0, r3
159 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
160 ; P8BE: xxsldwi vs0, v2, vs0, 1
161 ; P8BE: xxsldwi v2, vs0, vs0, 3
162 ; P8BE-NEXT: blr
163 entry:
164 %0 = load float, float* %f64, align 4
165 %vecins = insertelement <4 x float> %vec, float %0, i32 0
166 ret <4 x float> %vecins
167 }
168
169 ; Function Attrs: norecurse nounwind readonly
170 define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) {
171 ; P9LE-LABEL: s2v_test_f2:
172 ; P9LE: # %bb.0: # %entry
173 ; P9LE-NEXT: addi r3, r3, 4
174 ; P9LE-NEXT: xxspltw v2, v2, 2
175 ; P9LE-NEXT: lfiwzx f0, 0, r3
176 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
177 ; P9LE-NEXT: vmrglw v2, v2, v3
178 ; P9LE-NEXT: blr
179
180 ; P9BE-LABEL: s2v_test_f2:
181 ; P9BE: # %bb.0: # %entry
182 ; P9BE: addi r3, r3, 4
183 ; P9BE: xxspltw v2, v2, 1
184 ; P9BE: lfiwzx f0, 0, r3
185 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
186 ; P9BE: vmrghw v2, v3, v2
187 ; P9BE-NEXT: blr
188
189 ; P8LE-LABEL: s2v_test_f2:
190 ; P8LE: # %bb.0: # %entry
191 ; P8LE-NEXT: addi r3, r3, 4
192 ; P8LE-NEXT: xxspltw v2, v2, 2
193 ; P8LE-NEXT: lfiwzx f0, 0, r3
194 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
195 ; P8LE-NEXT: vmrglw v2, v2, v3
196 ; P8LE-NEXT: blr
197
198 ; P8BE-LABEL: s2v_test_f2:
199 ; P8BE: # %bb.0: # %entry
200 ; P8BE-NEXT: addi r3, r3, 4
201 ; P8BE-NEXT: xxspltw v2, v2, 1
202 ; P8BE-NEXT: lfiwzx f0, 0, r3
203 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
204 ; P8BE-NEXT: vmrghw v2, v3, v2
205 ; P8BE-NEXT: blr
206 entry:
207 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
208 %0 = load float, float* %arrayidx, align 8
209 %vecins = insertelement <2 x float> %vec, float %0, i32 0
210 ret <2 x float> %vecins
211 }
212
213 ; Function Attrs: norecurse nounwind readonly
214 define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) {
215 ; P9LE-LABEL: s2v_test_f3:
216 ; P9LE: # %bb.0: # %entry
217 ; P9LE-NEXT: sldi r4, r7, 2
218 ; P9LE-NEXT: xxspltw v2, v2, 2
219 ; P9LE-NEXT: lfiwzx f0, r3, r4
220 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
221 ; P9LE-NEXT: vmrglw v2, v2, v3
222 ; P9LE-NEXT: blr
223
224 ; P9BE-LABEL: s2v_test_f3:
225 ; P9BE: # %bb.0: # %entry
226 ; P9BE: sldi r4, r7, 2
227 ; P9BE: xxspltw v2, v2, 1
228 ; P9BE: lfiwzx f0, r3, r4
229 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
230 ; P9BE: vmrghw v2, v3, v2
231 ; P9BE-NEXT: blr
232
233 ; P8LE-LABEL: s2v_test_f3:
234 ; P8LE: # %bb.0: # %entry
235 ; P8LE-NEXT: sldi r4, r7, 2
236 ; P8LE-NEXT: xxspltw v2, v2, 2
237 ; P8LE-NEXT: lfiwzx f0, r3, r4
238 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
239 ; P8LE-NEXT: vmrglw v2, v2, v3
240 ; P8LE-NEXT: blr
241
242 ; P8BE-LABEL: s2v_test_f3:
243 ; P8BE: # %bb.0: # %entry
244 ; P8BE-NEXT: sldi r4, r7, 2
245 ; P8BE-NEXT: xxspltw v2, v2, 1
246 ; P8BE-NEXT: lfiwzx f0, r3, r4
247 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
248 ; P8BE-NEXT: vmrghw v2, v3, v2
249 ; P8BE-NEXT: blr
250 entry:
251 %idxprom = sext i32 %Idx to i64
252 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
253 %0 = load float, float* %arrayidx, align 8
254 %vecins = insertelement <2 x float> %vec, float %0, i32 0
255 ret <2 x float> %vecins
256 }
257
258 ; Function Attrs: norecurse nounwind readonly
259 define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) {
260 ; P9LE-LABEL: s2v_test_f4:
261 ; P9LE: # %bb.0: # %entry
262 ; P9LE-NEXT: addi r3, r3, 4
263 ; P9LE-NEXT: xxspltw v2, v2, 2
264 ; P9LE-NEXT: lfiwzx f0, 0, r3
265 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
266 ; P9LE-NEXT: vmrglw v2, v2, v3
267 ; P9LE-NEXT: blr
268
269 ; P9BE-LABEL: s2v_test_f4:
270 ; P9BE: # %bb.0: # %entry
271 ; P9BE: addi r3, r3, 4
272 ; P9BE: xxspltw v2, v2, 1
273 ; P9BE: lfiwzx f0, 0, r3
274 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
275 ; P9BE: vmrghw v2, v3, v2
276 ; P9BE-NEXT: blr
277
278 ; P8LE-LABEL: s2v_test_f4:
279 ; P8LE: # %bb.0: # %entry
280 ; P8LE-NEXT: addi r3, r3, 4
281 ; P8LE-NEXT: xxspltw v2, v2, 2
282 ; P8LE-NEXT: lfiwzx f0, 0, r3
283 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
284 ; P8LE-NEXT: vmrglw v2, v2, v3
285 ; P8LE-NEXT: blr
286
287 ; P8BE-LABEL: s2v_test_f4:
288 ; P8BE: # %bb.0: # %entry
289 ; P8BE-NEXT: addi r3, r3, 4
290 ; P8BE-NEXT: xxspltw v2, v2, 1
291 ; P8BE-NEXT: lfiwzx f0, 0, r3
292 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
293 ; P8BE-NEXT: vmrghw v2, v3, v2
294 ; P8BE-NEXT: blr
295 entry:
296 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
297 %0 = load float, float* %arrayidx, align 8
298 %vecins = insertelement <2 x float> %vec, float %0, i32 0
299 ret <2 x float> %vecins
300 }
301
302 ; Function Attrs: norecurse nounwind readonly
303 define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
304 ; P9LE-LABEL: s2v_test_f5:
305 ; P9LE: # %bb.0: # %entry
306 ; P9LE-NEXT: lfiwzx f0, 0, r5
307 ; P9LE-NEXT: xxspltw v2, v2, 2
308 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
309 ; P9LE-NEXT: vmrglw v2, v2, v3
310 ; P9LE-NEXT: blr
311
312 ; P9BE-LABEL: s2v_test_f5:
313 ; P9BE: # %bb.0: # %entry
314 ; P9BE: lfiwzx f0, 0, r5
315 ; P9BE: xxspltw v2, v2, 1
316 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
317 ; P9BE: vmrghw v2, v3, v2
318 ; P9BE-NEXT: blr
319
320 ; P8LE-LABEL: s2v_test_f5:
321 ; P8LE: # %bb.0: # %entry
322 ; P8LE-NEXT: lfiwzx f0, 0, r5
323 ; P8LE-NEXT: xxspltw v2, v2, 2
324 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
325 ; P8LE-NEXT: vmrglw v2, v2, v3
326 ; P8LE-NEXT: blr
327
328 ; P8BE-LABEL: s2v_test_f5:
329 ; P8BE: # %bb.0: # %entry
330 ; P8BE-NEXT: lfiwzx f0, 0, r5
331 ; P8BE-NEXT: xxspltw v2, v2, 1
332 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
333 ; P8BE-NEXT: vmrghw v2, v3, v2
334 ; P8BE-NEXT: blr
335 entry:
336 %0 = load float, float* %ptr1, align 8
337 %vecins = insertelement <2 x float> %vec, float %0, i32 0
338 ret <2 x float> %vecins
339 }
340
None ; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
0 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
1 ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
2 ; RUN: -O3 < %s | FileCheck %s
23
34 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
4 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \
5 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
6 ; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \
57 ; RUN: --implicit-check-not xxswapd
68
79 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \
8 ; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s
10 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \
11 ; RUN: -mattr=-power9-vector < %s | FileCheck %s
912
1013 ; These tests verify that VSX swap optimization works when loading a scalar
1114 ; into a vector register.
1619 @y = global double 1.780000e+00, align 8
1720
1821 define void @bar0() {
22 ; CHECK-LABEL: bar0:
23 ; CHECK: # %bb.0: # %entry
24 ; CHECK: addis r3, r2, .LC0@toc@ha
25 ; CHECK: addis r4, r2, .LC1@toc@ha
26 ; CHECK: ld r3, .LC0@toc@l(r3)
27 ; CHECK: addis r3, r2, .LC2@toc@ha
28 ; CHECK: ld r3, .LC2@toc@l(r3)
29 ; CHECK: xxpermdi vs0, vs0, vs1, 1
30 ; CHECK: stxvd2x vs0, 0, r3
31 ; CHECK: blr
32 ;
33 ; CHECK-P9-LABEL: bar0:
34 ; CHECK-P9: # %bb.0: # %entry
35 ; CHECK-P9: addis r3, r2, .LC0@toc@ha
36 ; CHECK-P9: addis r4, r2, .LC1@toc@ha
37 ; CHECK-P9: ld r3, .LC0@toc@l(r3)
38 ; CHECK-P9: ld r4, .LC1@toc@l(r4)
39 ; CHECK-P9: lfd f0, 0(r3)
40 ; CHECK-P9: lxvx vs1, 0, r4
41 ; CHECK-P9: addis r3, r2, .LC2@toc@ha
42 ; CHECK-P9: ld r3, .LC2@toc@l(r3)
43 ; CHECK-P9: xxpermdi vs0, f0, f0, 2
44 ; CHECK-P9: xxpermdi vs0, vs1, vs0, 1
45 ; CHECK-P9: stxvx vs0, 0, r3
46 ; CHECK-P9: blr
1947 entry:
2048 %0 = load <2 x double>, <2 x double>* @x, align 16
2149 %1 = load double, double* @y, align 8
2452 ret void
2553 }
2654
27 ; CHECK-LABEL: @bar0
28 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
29 ; CHECK-DAG: lfdx [[REG2:[0-9]+]]
30 ; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
31 ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
32 ; CHECK: stxvd2x [[REG5]]
33
34 ; CHECK-P9-LABEL: @bar0
35 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
36 ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
37 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
38 ; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
39 ; CHECK-P9: stxvx [[REG5]]
40
4155 define void @bar1() {
56 ; CHECK-LABEL: bar1:
57 ; CHECK: # %bb.0: # %entry
58 ; CHECK: addis r3, r2, .LC0@toc@ha
59 ; CHECK: addis r4, r2, .LC1@toc@ha
60 ; CHECK: ld r3, .LC0@toc@l(r3)
61 ; CHECK: addis r3, r2, .LC2@toc@ha
62 ; CHECK: ld r3, .LC2@toc@l(r3)
63 ; CHECK: xxmrghd vs0, vs1, vs0
64 ; CHECK: stxvd2x vs0, 0, r3
65 ; CHECK: blr
66 ;
67 ; CHECK-P9-LABEL: bar1:
68 ; CHECK-P9: # %bb.0: # %entry
69 ; CHECK-P9: addis r3, r2, .LC0@toc@ha
70 ; CHECK-P9: addis r4, r2, .LC1@toc@ha
71 ; CHECK-P9: ld r3, .LC0@toc@l(r3)
72 ; CHECK-P9: ld r4, .LC1@toc@l(r4)
73 ; CHECK-P9: lfd f0, 0(r3)
74 ; CHECK-P9: lxvx vs1, 0, r4
75 ; CHECK-P9: addis r3, r2, .LC2@toc@ha
76 ; CHECK-P9: ld r3, .LC2@toc@l(r3)
77 ; CHECK-P9: xxpermdi vs0, f0, f0, 2
78 ; CHECK-P9: xxmrgld vs0, vs0, vs1
79 ; CHECK-P9: stxvx vs0, 0, r3
80 ; CHECK-P9: blr
4281 entry:
4382 %0 = load <2 x double>, <2 x double>* @x, align 16
4483 %1 = load double, double* @y, align 8
4786 ret void
4887 }
4988
50 ; CHECK-LABEL: @bar1
51 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
52 ; CHECK-DAG: lfdx [[REG2:[0-9]+]]
53 ; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
54 ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
55 ; CHECK: stxvd2x [[REG5]]
56
57 ; CHECK-P9-LABEL: @bar1
58 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
59 ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
60 ; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
61 ; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
62 ; CHECK-P9: stxvx [[REG5]]
63
None ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
1 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
0 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
1 ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
2 ; RUN: | FileCheck %s
23
3 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \
4 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
4 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
5 ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
6 ; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s
57
6 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
8 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
79 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \
810 ; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd
911
1012 define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
13 ; CHECK-LABEL: testi0:
14 ; CHECK: # %bb.0:
15 ; CHECK-NEXT: lxvd2x vs0, 0, r3
16 ; CHECK-NEXT: lfdx f1, 0, r4
17 ; CHECK-NEXT: xxswapd vs0, vs0
18 ; CHECK-NEXT: xxspltd vs1, vs1, 0
19 ; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1
20 ; CHECK-NEXT: blr
21 ;
22 ; CHECK-P9-VECTOR-LABEL: testi0:
23 ; CHECK-P9-VECTOR: # %bb.0:
24 ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
25 ; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
26 ; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
27 ; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
28 ; CHECK-P9-VECTOR-NEXT: xxpermdi v2, vs0, vs1, 1
29 ; CHECK-P9-VECTOR-NEXT: blr
30 ;
31 ; CHECK-P9-LABEL: testi0:
32 ; CHECK-P9: # %bb.0:
33 ; CHECK-P9-NEXT: lfd f0, 0(r4)
34 ; CHECK-P9-NEXT: lxv vs1, 0(r3)
35 ; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
36 ; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1
37 ; CHECK-P9-NEXT: blr
1138 %v = load <2 x double>, <2 x double>* %p1
1239 %s = load double, double* %p2
1340 %r = insertelement <2 x double> %v, double %s, i32 0
1441 ret <2 x double> %r
1542
16 ; CHECK-LABEL: testi0
17 ; CHECK: lxvd2x 0, 0, 3
18 ; CHECK: lfdx 1, 0, 4
19 ; CHECK-DAG: xxspltd 1, 1, 0
20 ; CHECK-DAG: xxswapd 0, 0
21 ; CHECK: xxpermdi 34, 0, 1, 1
2243
23 ; CHECK-P9-LABEL: testi0
24 ; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
25 ; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
26 ; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
27 ; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1
2844 }
2945
3046 define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
47 ; CHECK-LABEL: testi1:
48 ; CHECK: # %bb.0:
49 ; CHECK-NEXT: lxvd2x vs0, 0, r3
50 ; CHECK-NEXT: lfdx f1, 0, r4
51 ; CHECK-NEXT: xxswapd vs0, vs0
52 ; CHECK-NEXT: xxspltd vs1, vs1, 0
53 ; CHECK-NEXT: xxmrgld v2, vs1, vs0
54 ; CHECK-NEXT: blr
55 ;
56 ; CHECK-P9-VECTOR-LABEL: testi1:
57 ; CHECK-P9-VECTOR: # %bb.0:
58 ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
59 ; CHECK-P9-VECTOR-NEXT: lfdx f1, 0, r4
60 ; CHECK-P9-VECTOR-NEXT: xxspltd vs1, vs1, 0
61 ; CHECK-P9-VECTOR-NEXT: xxswapd vs0, vs0
62 ; CHECK-P9-VECTOR-NEXT: xxmrgld v2, vs1, vs0
63 ; CHECK-P9-VECTOR-NEXT: blr
64 ;
65 ; CHECK-P9-LABEL: testi1:
66 ; CHECK-P9: # %bb.0:
67 ; CHECK-P9-NEXT: lfd f0, 0(r4)
68 ; CHECK-P9-NEXT: lxv vs1, 0(r3)
69 ; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2
70 ; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1
71 ; CHECK-P9-NEXT: blr
3172 %v = load <2 x double>, <2 x double>* %p1
3273 %s = load double, double* %p2
3374 %r = insertelement <2 x double> %v, double %s, i32 1
3475 ret <2 x double> %r
3576
36 ; CHECK-LABEL: testi1
37 ; CHECK: lxvd2x 0, 0, 3
38 ; CHECK: lfdx 1, 0, 4
39 ; CHECK-DAG: xxspltd 1, 1, 0
40 ; CHECK-DAG: xxswapd 0, 0
41 ; CHECK: xxmrgld 34, 1, 0
4277
43 ; CHECK-P9-LABEL: testi1
44 ; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4)
45 ; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3)
46 ; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0
47 ; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]]
4878 }
4979
5080 define double @teste0(<2 x double>* %p1) {
81 ; CHECK-LABEL: teste0:
82 ; CHECK: # %bb.0:
83 ; CHECK-NEXT: lxvd2x vs1, 0, r3
84 ; CHECK: blr
85 ;
86 ; CHECK-P9-VECTOR-LABEL: teste0:
87 ; CHECK-P9-VECTOR: # %bb.0:
88 ; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3
89 ; CHECK-P9-VECTOR: blr
90 ;
91 ; CHECK-P9-LABEL: teste0:
92 ; CHECK-P9: # %bb.0:
93 ; CHECK-P9-NEXT: lfd f1, 0(r3)
94 ; CHECK-P9-NEXT: blr
5195 %v = load <2 x double>, <2 x double>* %p1
5296 %r = extractelement <2 x double> %v, i32 0
5397 ret double %r
5498
55 ; CHECK-LABEL: teste0
56 ; CHECK: lxvd2x 1, 0, 3
5799
58 ; CHECK-P9-LABEL: teste0
59 ; CHECK-P9: lfd 1, 0(3)
60100 }
61101
62102 define double @teste1(<2 x double>* %p1) {
103 ; CHECK-LABEL: teste1:
104 ; CHECK: # %bb.0:
105 ; CHECK-NEXT: lxvd2x vs0, 0, r3
106 ; CHECK-NEXT: xxswapd vs1, vs0
107 ; CHECK: blr
108 ;
109 ; CHECK-P9-VECTOR-LABEL: teste1:
110 ; CHECK-P9-VECTOR: # %bb.0:
111 ; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
112 ; CHECK-P9-VECTOR-NEXT: xxswapd vs1, vs0
113 ; CHECK-P9-VECTOR: blr
114 ;
115 ; CHECK-P9-LABEL: teste1:
116 ; CHECK-P9: # %bb.0:
117 ; CHECK-P9-NEXT: lfd f1, 8(r3)
118 ; CHECK-P9-NEXT: blr
63119 %v = load <2 x double>, <2 x double>* %p1
64120 %r = extractelement <2 x double> %v, i32 1
65121 ret double %r
66122
67 ; CHECK-LABEL: teste1
68 ; CHECK: lxvd2x 0, 0, 3
69 ; CHECK: xxswapd 1, 0
70123
71 ; CHECK-P9-LABEL: teste1
72 ; CHECK-P9: lfd 1, 8(3)
73124 }