llvm.org GIT mirror llvm / 98e0b9c
Add new X86 AVX2 VBROADCAST instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143612 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
4 changed file(s) with 70 addition(s) and 25 deletion(s). Raw diff Collapse all Expand all
12801280
12811281 // Vector load with broadcast
12821282 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1283 def int_x86_avx_vbroadcastss :
1283 def int_x86_avx_vbroadcast_ss :
12841284 GCCBuiltin<"__builtin_ia32_vbroadcastss">,
12851285 Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
12861286 def int_x86_avx_vbroadcast_sd_256 :
12871287 GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
12881288 Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
1289 def int_x86_avx_vbroadcastss_256 :
1289 def int_x86_avx_vbroadcast_ss_256 :
12901290 GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
12911291 Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
12921292 def int_x86_avx_vbroadcastf128_pd_256 :
16711671
16721672 // Vector load with broadcast
16731673 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1674 def int_x86_avx2_vbroadcast_ss_ps :
1675 GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
1676 Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
1677 def int_x86_avx2_vbroadcast_sd_pd_256 :
1678 GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
1679 Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>;
1680 def int_x86_avx2_vbroadcast_ss_ps_256 :
1681 GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
1682 Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
16741683 def int_x86_avx2_vbroadcasti128 :
16751684 GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
16761685 Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
70827082 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
70837083 [(set RC:$dst, (Int addr:$src))]>, VEX;
70847084
7085 def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
7086 int_x86_avx_vbroadcastss>;
7087 def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
7088 int_x86_avx_vbroadcastss_256>;
7089 def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
7090 int_x86_avx_vbroadcast_sd_256>;
7085 class avx_broadcast_reg opc, string OpcodeStr, RegisterClass RC,
7086 Intrinsic Int> :
7087 AVX8I
7088 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7089 [(set RC:$dst, (Int VR128:$src))]>, VEX;
7090
7091 def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
7092 int_x86_avx_vbroadcast_ss>;
7093 def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
7094 int_x86_avx_vbroadcast_ss_256>;
7095 def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
7096 int_x86_avx_vbroadcast_sd_256>;
70917097 def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
70927098 int_x86_avx_vbroadcastf128_pd_256>;
70937099
7094 let Predicates = [HasAVX2] in
7100 let Predicates = [HasAVX2] in {
70957101 def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
70967102 int_x86_avx2_vbroadcasti128>;
7103 def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
7104 int_x86_avx2_vbroadcast_ss_ps>;
7105 def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
7106 int_x86_avx2_vbroadcast_ss_ps_256>;
7107 def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
7108 int_x86_avx2_vbroadcast_sd_pd_256>;
7109 }
70977110
70987111 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
70997112 (VBROADCASTF128 addr:$src)>;
71007113
71017114 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
7102 (VBROADCASTSSY addr:$src)>;
7115 (VBROADCASTSSYrm addr:$src)>;
71037116 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
7104 (VBROADCASTSD addr:$src)>;
7117 (VBROADCASTSDrm addr:$src)>;
71057118 def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
7106 (VBROADCASTSSY addr:$src)>;
7119 (VBROADCASTSSYrm addr:$src)>;
71077120 def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
7108 (VBROADCASTSD addr:$src)>;
7121 (VBROADCASTSDrm addr:$src)>;
71097122
71107123 def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
7111 (VBROADCASTSS addr:$src)>;
7124 (VBROADCASTSSrm addr:$src)>;
71127125 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
7113 (VBROADCASTSS addr:$src)>;
7126 (VBROADCASTSSrm addr:$src)>;
71147127
71157128 //===----------------------------------------------------------------------===//
71167129 // VINSERTF128 - Insert packed floating-point values
22912291 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
22922292
22932293
2294 define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
2294 define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
22952295 ; CHECK: vbroadcastss
2296 %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
2297 ret <4 x float> %res
2298 }
2299 declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
2300
2301
2302 define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
2296 %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
2297 ret <4 x float> %res
2298 }
2299 declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
2300
2301
2302 define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
23032303 ; CHECK: vbroadcastss
2304 %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
2304 %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
23052305 ret <8 x float> %res
23062306 }
2307 declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
2307 declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
23082308
23092309
23102310 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
742742 ret <4 x i64> %res
743743 }
744744 declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
745
746 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
747 ; CHECK: vbroadcastsd
748 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
749 ret <4 x double> %res
750 }
751 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
752
753
754 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
755 ; CHECK: vbroadcastss
756 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
757 ret <4 x float> %res
758 }
759 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
760
761
762 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
763 ; CHECK: vbroadcastss
764 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
765 ret <8 x float> %res
766 }
767 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly