llvm.org GIT mirror llvm / 4babeee
Add 3DNow! intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129551 91177308-0d34-0410-b5e6-96231b3b80d8 Michael J. Spencer 9 years ago
4 changed file(s) with 451 addition(s) and 51 deletion(s). Raw diff Collapse all Expand all
1414 // Interrupt traps
1515 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1616 def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
17 }
18
19 //===----------------------------------------------------------------------===//
20 // 3DNow!
21
22 let TargetPrefix = "x86" in {
23 def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">,
24 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
25 [IntrNoMem]>;
26 def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">,
27 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
28 def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">,
29 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
30 [IntrNoMem]>;
31 def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">,
32 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
33 [IntrNoMem]>;
34 def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">,
35 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
36 [IntrNoMem]>;
37 def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">,
38 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
39 [IntrNoMem]>;
40 def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">,
41 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
42 [IntrNoMem]>;
43 def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">,
44 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
45 [IntrNoMem]>;
46 def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">,
47 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
48 [IntrNoMem]>;
49 def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">,
50 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
51 [IntrNoMem]>;
52 def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">,
53 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
54 def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">,
55 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
56 [IntrNoMem]>;
57 def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">,
58 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
59 [IntrNoMem]>;
60 def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">,
61 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
62 def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">,
63 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
64 [IntrNoMem]>;
65 def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">,
66 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
67 [IntrNoMem]>;
68 def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">,
69 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
70 [IntrNoMem]>;
71 def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">,
72 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
73 def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">,
74 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
75 [IntrNoMem]>;
76 }
77
78 //===----------------------------------------------------------------------===//
79 // 3DNow! extensions
80
81 let TargetPrefix = "x86" in {
82 def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">,
83 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
84 def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">,
85 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
86 [IntrNoMem]>;
87 def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">,
88 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
89 [IntrNoMem]>;
90 def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">,
91 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
92 def int_x86_3dnowa_pswapd :
93 Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
1794 }
1895
1996 //===----------------------------------------------------------------------===//
4949 "Enable SSE 4.2 instructions",
5050 [FeatureSSE41, FeaturePOPCNT]>;
5151 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
52 "Enable 3DNow! instructions">;
52 "Enable 3DNow! instructions",
53 [FeatureMMX]>;
5354 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
5455 "Enable 3DNow! Athlon instructions",
5556 [Feature3DNow]>;
124125 FeatureAES, FeatureCLMUL]>;
125126
126127 def : Proc<"k6", [FeatureMMX]>;
127 def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
128 def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
129 def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
130 def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
128 def : Proc<"k6-2", [Feature3DNow]>;
129 def : Proc<"k6-3", [Feature3DNow]>;
130 def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
131 def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
131132 def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
132133 def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
133134 def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
155156 Feature3DNowA]>;
156157
157158 def : Proc<"winchip-c6", [FeatureMMX]>;
158 def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
159 def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
159 def : Proc<"winchip2", [Feature3DNow]>;
160 def : Proc<"c3", [Feature3DNow]>;
160161 def : Proc<"c3-2", [FeatureSSE1]>;
161162
162163 //===----------------------------------------------------------------------===//
1111 //
1212 //===----------------------------------------------------------------------===//
1313
14 // FIXME: We don't support any intrinsics for these instructions yet.
15
16 class I3DNow o, Format F, dag outs, dag ins, string asm,
17 list pattern>
18 : I, TB, Requires<[Has3DNow]> {
14 class I3DNow o, Format F, dag outs, dag ins, string asm, list pat>
15 : I, TB, Requires<[Has3DNow]> {
1916 }
2017
21 class I3DNow_binop o, Format F, dag ins, string Mnemonic>
22 : I
23 !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
24 TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
18 class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat>
19 : I3DNow
20 !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
21 Has3DNow0F0FOpcode {
22 // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
23 let isAsmParserOnly = 1;
24 let Constraints = "$src1 = $dst";
25 }
26
27 class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat>
28 : I3DNow
29 !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
30 Has3DNow0F0FOpcode {
2531 // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
2632 let isAsmParserOnly = 1;
2733 }
2834
29
30 let Constraints = "$src1 = $dst" in {
31 // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
32 // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
33 multiclass I3DNow_binop_rm opc, string Mn> {
34 def rr : I3DNow_binop;
35 def rm : I3DNow_binop;
36 }
35 multiclass I3DNow_binop_rm opc, string Mn> {
36 def rr : I3DNow_binop;
37 def rm : I3DNow_binop;
3738 }
3839
39 defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
40 defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
41 defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
42 defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
43 defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
44 defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
45 defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
46 defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
47 defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
48 defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
49 defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
50 defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
51 defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
52 defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
53 defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
54 defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
55 defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
56 defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
57 defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
40 multiclass I3DNow_binop_rm_int opc, string Mn, string Ver = ""> {
41 def rr : I3DNow_binop
42 [(set VR64:$dst, (!cast(
43 !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
44 def rm : I3DNow_binop
45 [(set VR64:$dst, (!cast(
46 !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
47 (bitconvert (load_mmx addr:$src2))))]>;
48 }
49
50 multiclass I3DNow_conv_rm opc, string Mn> {
51 def rr : I3DNow_conv;
52 def rm : I3DNow_conv;
53 }
54
55 multiclass I3DNow_conv_rm_int opc, string Mn, string Ver = ""> {
56 def rr : I3DNow_conv
57 [(set VR64:$dst, (!cast(
58 !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
59 def rm : I3DNow_conv
60 [(set VR64:$dst, (!cast(
61 !strconcat("int_x86_3dnow", Ver, "_", Mn))
62 (bitconvert (load_mmx addr:$src))))]>;
63 }
64
65 defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
66 defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
67 defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
68 defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
69 defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
70 defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
71 defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
72 defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
73 defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
74 defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
75 defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
76 defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
77 defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
78 defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
79 defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
80 defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
81 defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
82 defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
83 defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
5884
5985
6086 def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
6187
6288 def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
6389 "prefetch $addr", []>;
64
90
6591 // FIXME: Diassembler gets a bogus decode conflict.
66 let isAsmParserOnly = 1 in {
92 let isAsmParserOnly = 1 in
6793 def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
6894 "prefetchw $addr", []>;
69 }
7095
7196 // "3DNowA" instructions
72 defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
73 defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
74 defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
75 defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
76 defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
97 defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
98 defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
99 defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
100 defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
101 defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
0 ; RUN: llc < %s -march=x86 -mattr=+3dnow | FileCheck %s
1
2 define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
3 ; CHECK: pavgusb
4 entry:
5 %0 = bitcast x86_mmx %a.coerce to <8 x i8>
6 %1 = bitcast x86_mmx %b.coerce to <8 x i8>
7 %2 = bitcast <8 x i8> %0 to x86_mmx
8 %3 = bitcast <8 x i8> %1 to x86_mmx
9 %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3)
10 %5 = bitcast x86_mmx %4 to <8 x i8>
11 ret <8 x i8> %5
12 }
13
14 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
15
16 define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone {
17 ; CHECK: pf2id
18 entry:
19 %0 = bitcast <2 x float> %a to x86_mmx
20 %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0)
21 %2 = bitcast x86_mmx %1 to <2 x i32>
22 ret <2 x i32> %2
23 }
24
25 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
26
27 define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
28 ; CHECK: pfacc
29 entry:
30 %0 = bitcast <2 x float> %a to x86_mmx
31 %1 = bitcast <2 x float> %b to x86_mmx
32 %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1)
33 %3 = bitcast x86_mmx %2 to <2 x float>
34 ret <2 x float> %3
35 }
36
37 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
38
39 define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone {
40 ; CHECK: pfadd
41 entry:
42 %0 = bitcast <2 x float> %a to x86_mmx
43 %1 = bitcast <2 x float> %b to x86_mmx
44 %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1)
45 %3 = bitcast x86_mmx %2 to <2 x float>
46 ret <2 x float> %3
47 }
48
49 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
50
51 define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone {
52 ; CHECK: pfcmpeq
53 entry:
54 %0 = bitcast <2 x float> %a to x86_mmx
55 %1 = bitcast <2 x float> %b to x86_mmx
56 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1)
57 %3 = bitcast x86_mmx %2 to <2 x i32>
58 ret <2 x i32> %3
59 }
60
61 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
62
63 define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone {
64 ; CHECK: pfcmpge
65 entry:
66 %0 = bitcast <2 x float> %a to x86_mmx
67 %1 = bitcast <2 x float> %b to x86_mmx
68 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1)
69 %3 = bitcast x86_mmx %2 to <2 x i32>
70 ret <2 x i32> %3
71 }
72
73 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
74
75 define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone {
76 ; CHECK: pfcmpgt
77 entry:
78 %0 = bitcast <2 x float> %a to x86_mmx
79 %1 = bitcast <2 x float> %b to x86_mmx
80 %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1)
81 %3 = bitcast x86_mmx %2 to <2 x i32>
82 ret <2 x i32> %3
83 }
84
85 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
86
87 define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone {
88 ; CHECK: pfmax
89 entry:
90 %0 = bitcast <2 x float> %a to x86_mmx
91 %1 = bitcast <2 x float> %b to x86_mmx
92 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1)
93 %3 = bitcast x86_mmx %2 to <2 x float>
94 ret <2 x float> %3
95 }
96
97 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
98
99 define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone {
100 ; CHECK: pfmin
101 entry:
102 %0 = bitcast <2 x float> %a to x86_mmx
103 %1 = bitcast <2 x float> %b to x86_mmx
104 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1)
105 %3 = bitcast x86_mmx %2 to <2 x float>
106 ret <2 x float> %3
107 }
108
109 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
110
111 define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone {
112 ; CHECK: pfmul
113 entry:
114 %0 = bitcast <2 x float> %a to x86_mmx
115 %1 = bitcast <2 x float> %b to x86_mmx
116 %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1)
117 %3 = bitcast x86_mmx %2 to <2 x float>
118 ret <2 x float> %3
119 }
120
121 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
122
123 define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone {
124 ; CHECK: pfrcp
125 entry:
126 %0 = bitcast <2 x float> %a to x86_mmx
127 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0)
128 %2 = bitcast x86_mmx %1 to <2 x float>
129 ret <2 x float> %2
130 }
131
132 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
133
134 define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
135 ; CHECK: pfrcpit1
136 entry:
137 %0 = bitcast <2 x float> %a to x86_mmx
138 %1 = bitcast <2 x float> %b to x86_mmx
139 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1)
140 %3 = bitcast x86_mmx %2 to <2 x float>
141 ret <2 x float> %3
142 }
143
144 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
145
146 define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone {
147 ; CHECK: pfrcpit2
148 entry:
149 %0 = bitcast <2 x float> %a to x86_mmx
150 %1 = bitcast <2 x float> %b to x86_mmx
151 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1)
152 %3 = bitcast x86_mmx %2 to <2 x float>
153 ret <2 x float> %3
154 }
155
156 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
157
158 define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone {
159 ; CHECK: pfrsqrt
160 entry:
161 %0 = bitcast <2 x float> %a to x86_mmx
162 %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0)
163 %2 = bitcast x86_mmx %1 to <2 x float>
164 ret <2 x float> %2
165 }
166
167 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
168
169 define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone {
170 ; CHECK: pfrsqit1
171 entry:
172 %0 = bitcast <2 x float> %a to x86_mmx
173 %1 = bitcast <2 x float> %b to x86_mmx
174 %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1)
175 %3 = bitcast x86_mmx %2 to <2 x float>
176 ret <2 x float> %3
177 }
178
179 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
180
181 define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone {
182 ; CHECK: pfsub
183 entry:
184 %0 = bitcast <2 x float> %a to x86_mmx
185 %1 = bitcast <2 x float> %b to x86_mmx
186 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1)
187 %3 = bitcast x86_mmx %2 to <2 x float>
188 ret <2 x float> %3
189 }
190
191 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
192
193 define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone {
194 ; CHECK: pfsubr
195 entry:
196 %0 = bitcast <2 x float> %a to x86_mmx
197 %1 = bitcast <2 x float> %b to x86_mmx
198 %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1)
199 %3 = bitcast x86_mmx %2 to <2 x float>
200 ret <2 x float> %3
201 }
202
203 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
204
205 define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone {
206 ; CHECK: pi2fd
207 entry:
208 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
209 %1 = bitcast <2 x i32> %0 to x86_mmx
210 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
211 %3 = bitcast x86_mmx %2 to <2 x float>
212 ret <2 x float> %3
213 }
214
215 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
216
217 define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone {
218 ; CHECK: pmulhrw
219 entry:
220 %0 = bitcast x86_mmx %a.coerce to <4 x i16>
221 %1 = bitcast x86_mmx %b.coerce to <4 x i16>
222 %2 = bitcast <4 x i16> %0 to x86_mmx
223 %3 = bitcast <4 x i16> %1 to x86_mmx
224 %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3)
225 %5 = bitcast x86_mmx %4 to <4 x i16>
226 ret <4 x i16> %5
227 }
228
229 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
230
231 define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone {
232 ; CHECK: pf2iw
233 entry:
234 %0 = bitcast <2 x float> %a to x86_mmx
235 %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0)
236 %2 = bitcast x86_mmx %1 to <2 x i32>
237 ret <2 x i32> %2
238 }
239
240 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
241
242 define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
243 ; CHECK: pfnacc
244 entry:
245 %0 = bitcast <2 x float> %a to x86_mmx
246 %1 = bitcast <2 x float> %b to x86_mmx
247 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1)
248 %3 = bitcast x86_mmx %2 to <2 x float>
249 ret <2 x float> %3
250 }
251
252 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
253
254 define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone {
255 ; CHECK: pfpnacc
256 entry:
257 %0 = bitcast <2 x float> %a to x86_mmx
258 %1 = bitcast <2 x float> %b to x86_mmx
259 %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1)
260 %3 = bitcast x86_mmx %2 to <2 x float>
261 ret <2 x float> %3
262 }
263
264 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
265
266 define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone {
267 ; CHECK: pi2fw
268 entry:
269 %0 = bitcast x86_mmx %a.coerce to <2 x i32>
270 %1 = bitcast <2 x i32> %0 to x86_mmx
271 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
272 %3 = bitcast x86_mmx %2 to <2 x float>
273 ret <2 x float> %3
274 }
275
276 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
277
278 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
279 ; CHECK: pswapd
280 entry:
281 %0 = bitcast <2 x float> %a to x86_mmx
282 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
283 %2 = bitcast x86_mmx %1 to <2 x float>
284 ret <2 x float> %2
285 }
286
287 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
288 ; CHECK: pswapd
289 entry:
290 %0 = bitcast <2 x i32> %a to x86_mmx
291 %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
292 %2 = bitcast x86_mmx %1 to <2 x i32>
293 ret <2 x i32> %2
294 }
295
296 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone