llvm.org GIT mirror llvm / e71cfb9
[X86][SSE] Add initial <2 x half> tests for PR31088 As discussed on D32391, test X86/X64 SSE2 and X64 F16C. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301744 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
1 changed file(s) with 105 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+f16c | FileCheck %s --check-prefix=F16C
4
5 define <2 x half> @ir_fadd_v2f16(<2 x half> %arg0, <2 x half> %arg1) nounwind {
6 ; X86-LABEL: ir_fadd_v2f16:
7 ; X86: # BB#0:
8 ; X86-NEXT: subl $64, %esp
9 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
10 ; X86-NEXT: movss %xmm0, (%esp)
11 ; X86-NEXT: calll __gnu_f2h_ieee
12 ; X86-NEXT: movzwl %ax, %eax
13 ; X86-NEXT: movl %eax, (%esp)
14 ; X86-NEXT: calll __gnu_h2f_ieee
15 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
16 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
17 ; X86-NEXT: movss %xmm0, (%esp)
18 ; X86-NEXT: calll __gnu_f2h_ieee
19 ; X86-NEXT: movzwl %ax, %eax
20 ; X86-NEXT: movl %eax, (%esp)
21 ; X86-NEXT: calll __gnu_h2f_ieee
22 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
23 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
24 ; X86-NEXT: movss %xmm0, (%esp)
25 ; X86-NEXT: calll __gnu_f2h_ieee
26 ; X86-NEXT: movzwl %ax, %eax
27 ; X86-NEXT: movl %eax, (%esp)
28 ; X86-NEXT: calll __gnu_h2f_ieee
29 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill
30 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
31 ; X86-NEXT: movss %xmm0, (%esp)
32 ; X86-NEXT: calll __gnu_f2h_ieee
33 ; X86-NEXT: movzwl %ax, %eax
34 ; X86-NEXT: movl %eax, (%esp)
35 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
36 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
37 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
38 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
39 ; X86-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload
40 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
41 ; X86-NEXT: calll __gnu_h2f_ieee
42 ; X86-NEXT: fstps {{[0-9]+}}(%esp)
43 ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
44 ; X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
45 ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm1
46 ; X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
47 ; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
48 ; X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
49 ; X86-NEXT: flds {{[0-9]+}}(%esp)
50 ; X86-NEXT: flds {{[0-9]+}}(%esp)
51 ; X86-NEXT: addl $64, %esp
52 ; X86-NEXT: retl
53 ;
54 ; X64-LABEL: ir_fadd_v2f16:
55 ; X64: # BB#0:
56 ; X64-NEXT: subq $24, %rsp
57 ; X64-NEXT: movss %xmm2, {{[0-9]+}}(%rsp) # 4-byte Spill
58 ; X64-NEXT: movss %xmm1, {{[0-9]+}}(%rsp) # 4-byte Spill
59 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
60 ; X64-NEXT: movaps %xmm3, %xmm0
61 ; X64-NEXT: callq __gnu_f2h_ieee
62 ; X64-NEXT: movzwl %ax, %edi
63 ; X64-NEXT: callq __gnu_h2f_ieee
64 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
65 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
66 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
67 ; X64-NEXT: callq __gnu_f2h_ieee
68 ; X64-NEXT: movzwl %ax, %edi
69 ; X64-NEXT: callq __gnu_h2f_ieee
70 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
71 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
72 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
73 ; X64-NEXT: callq __gnu_f2h_ieee
74 ; X64-NEXT: movzwl %ax, %edi
75 ; X64-NEXT: callq __gnu_h2f_ieee
76 ; X64-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill
77 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload
78 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
79 ; X64-NEXT: callq __gnu_f2h_ieee
80 ; X64-NEXT: movzwl %ax, %edi
81 ; X64-NEXT: callq __gnu_h2f_ieee
82 ; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
83 ; X64-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload
84 ; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
85 ; X64-NEXT: addss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
86 ; X64-NEXT: addq $24, %rsp
87 ; X64-NEXT: retq
88 ;
89 ; F16C-LABEL: ir_fadd_v2f16:
90 ; F16C: # BB#0:
91 ; F16C-NEXT: vcvtps2ph $4, %xmm3, %xmm3
92 ; F16C-NEXT: vcvtph2ps %xmm3, %xmm3
93 ; F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
94 ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
95 ; F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2
96 ; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
97 ; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
98 ; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
99 ; F16C-NEXT: vaddss %xmm2, %xmm0, %xmm0
100 ; F16C-NEXT: vaddss %xmm3, %xmm1, %xmm1
101 ; F16C-NEXT: retq
102 %retval = fadd <2 x half> %arg0, %arg1
103 ret <2 x half> %retval
104 }