llvm.org GIT mirror llvm / cae9695
[X86, AVX2] Replace inserti128 and extracti128 intrinsics with generic shuffles This should complete the job started in r231794 and continued in r232045: We want to replace as much custom x86 shuffling via intrinsics as possible because pushing the code down the generic shuffle optimization path allows for better codegen and less complexity in LLVM. AVX2 introduced proper integer variants of the hacked integer insert/extract C intrinsics that were created for this same functionality with AVX1. This should complete the removal of insert/extract128 intrinsics. The Clang precursor patch for this change was checked in at r232109. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232120 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 4 years ago
6 changed file(s) with 30 addition(s) and 52 deletion(s). Raw diff Collapse all Expand all
17581758
17591759 // Vector extract and insert
17601760 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1761 def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
1762 Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
1763 llvm_i8_ty], [IntrNoMem]>;
1764 def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
1765 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1766 llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
1767
17681761 def int_x86_avx512_mask_vextractf32x4_512 :
17691762 GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
17701763 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
49624962 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
49634963 DAG.getConstant(NewIntrinsic, MVT::i32),
49644964 getValue(I.getArgOperand(0)), ShAmt);
4965 setValue(&I, Res);
4966 return nullptr;
4967 }
4968 case Intrinsic::x86_avx2_vinserti128: {
4969 EVT DestVT = TLI.getValueType(I.getType());
4970 EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
4971 uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) *
4972 ElVT.getVectorNumElements();
4973 Res =
4974 DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
4975 getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
4976 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
4977 setValue(&I, Res);
4978 return nullptr;
4979 }
4980 case Intrinsic::x86_avx2_vextracti128: {
4981 EVT DestVT = TLI.getValueType(I.getType());
4982 uint64_t Idx = (cast(I.getArgOperand(1))->getZExtValue() & 1) *
4983 DestVT.getVectorNumElements();
4984 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
4985 getValue(I.getArgOperand(0)),
4986 DAG.getConstant(Idx, TLI.getVectorIdxTy()));
49874965 setValue(&I, Res);
49884966 return nullptr;
49894967 }
160160 Name == "x86.avx.vinsertf128.pd.256" ||
161161 Name == "x86.avx.vinsertf128.ps.256" ||
162162 Name == "x86.avx.vinsertf128.si.256" ||
163 Name == "x86.avx2.vinserti128" ||
163164 Name == "x86.avx.vextractf128.pd.256" ||
164165 Name == "x86.avx.vextractf128.ps.256" ||
165166 Name == "x86.avx.vextractf128.si.256" ||
167 Name == "x86.avx2.vextracti128" ||
166168 Name == "x86.avx.movnt.dq.256" ||
167169 Name == "x86.avx.movnt.pd.256" ||
168170 Name == "x86.avx.movnt.ps.256" ||
633635 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
634636 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
635637 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
636 Name == "llvm.x86.avx.vinsertf128.si.256") {
638 Name == "llvm.x86.avx.vinsertf128.si.256" ||
639 Name == "llvm.x86.avx2.vinserti128") {
637640 Value *Op0 = CI->getArgOperand(0);
638641 Value *Op1 = CI->getArgOperand(1);
639642 unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue();
678681 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
679682 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
680683 Name == "llvm.x86.avx.vextractf128.ps.256" ||
681 Name == "llvm.x86.avx.vextractf128.si.256") {
684 Name == "llvm.x86.avx.vextractf128.si.256" ||
685 Name == "llvm.x86.avx2.vextracti128") {
682686 Value *Op0 = CI->getArgOperand(0);
683687 unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue();
684688 VectorType *VecTy = cast(CI->getType());
85948594 //
85958595 def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
85968596 (ins VR256:$src1, u8imm:$src2),
8597 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8598 [(set VR128:$dst,
8599 (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
8597 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
86008598 Sched<[WriteShuffle256]>, VEX, VEX_L;
86018599 let hasSideEffects = 0, mayStore = 1 in
86028600 def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
11
22 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
33 ; CHECK: vpblendw
6161 ret <4 x i64> %res
6262 }
6363 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
64
65
66 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
67 ; CHECK-LABEL: test_x86_avx2_vextracti128:
68 ; CHECK: vextracti128
69
70 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
71 ret <2 x i64> %res
72 }
73 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
74
75
76 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
77 ; CHECK-LABEL: test_x86_avx2_vinserti128:
78 ; CHECK: vinserti128
79
80 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
81 ret <4 x i64> %res
82 }
83 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
84
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
11
22 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
33 ; CHECK: vpackssdw
772772 ret <4 x i64> %res
773773 }
774774 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
775
776
777 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
778 ; CHECK: vextracti128
779 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
780 ret <2 x i64> %res
781 }
782 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
783
784
785 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
786 ; CHECK: vinserti128
787 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
788 ret <4 x i64> %res
789 }
790 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
791775
792776
793777 define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {