llvm.org GIT mirror llvm / 2a4a956
[X86] Add 64-bit int to float/double conversion with AVX to X86FastISel::X86SelectSIToFP Summary: [X86] Teach fast isel to handle i64 sitofp with AVX. For some reason we only handled i32 sitofp with AVX. But with SSE only we support i64 so we should do the same with AVX. Also add i686 command lines for the 32-bit tests. 64-bit tests are in a separate file to avoid a fast-isel abort failure in 32-bit mode. Reviewers: RKSimon, zvi Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D39450 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317102 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 2 years ago
3 changed file(s) with 184 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
24092409 if (!Subtarget->hasAVX())
24102410 return false;
24112411
2412 if (!I->getOperand(0)->getType()->isIntegerTy(32))
2412 Type *InTy = I->getOperand(0)->getType();
2413 if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
24132414 return false;
24142415
24152416 // Select integer to float/double conversion.
24222423
24232424 if (I->getType()->isDoubleTy()) {
24242425 // sitofp int -> double
2425 Opcode = X86::VCVTSI2SDrr;
2426 Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr;
24262427 RC = &X86::FR64RegClass;
24272428 } else if (I->getType()->isFloatTy()) {
24282429 // sitofp int -> float
2429 Opcode = X86::VCVTSI2SSrr;
2430 Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr;
24302431 RC = &X86::FR32RegClass;
24312432 } else
24322433 return false;
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
3
4
5 define double @long_to_double_rr(i64 %a) {
6 ; SSE2-LABEL: long_to_double_rr:
7 ; SSE2: # BB#0: # %entry
8 ; SSE2-NEXT: cvtsi2sdq %rdi, %xmm0
9 ; SSE2-NEXT: retq
10 ;
11 ; AVX-LABEL: long_to_double_rr:
12 ; AVX: # BB#0: # %entry
13 ; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
14 ; AVX-NEXT: retq
15 entry:
16 %0 = sitofp i64 %a to double
17 ret double %0
18 }
19
20 define double @long_to_double_rm(i64* %a) {
21 ; SSE2-LABEL: long_to_double_rm:
22 ; SSE2: # BB#0: # %entry
23 ; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
24 ; SSE2-NEXT: retq
25 ;
26 ; AVX-LABEL: long_to_double_rm:
27 ; AVX: # BB#0: # %entry
28 ; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
29 ; AVX-NEXT: retq
30 entry:
31 %0 = load i64, i64* %a
32 %1 = sitofp i64 %0 to double
33 ret double %1
34 }
35
36 define float @long_to_float_rr(i64 %a) {
37 ; SSE2-LABEL: long_to_float_rr:
38 ; SSE2: # BB#0: # %entry
39 ; SSE2-NEXT: cvtsi2ssq %rdi, %xmm0
40 ; SSE2-NEXT: retq
41 ;
42 ; AVX-LABEL: long_to_float_rr:
43 ; AVX: # BB#0: # %entry
44 ; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
45 ; AVX-NEXT: retq
46 entry:
47 %0 = sitofp i64 %a to float
48 ret float %0
49 }
50
51 define float @long_to_float_rm(i64* %a) {
52 ; SSE2-LABEL: long_to_float_rm:
53 ; SSE2: # BB#0: # %entry
54 ; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
55 ; SSE2-NEXT: retq
56 ;
57 ; AVX-LABEL: long_to_float_rm:
58 ; AVX: # BB#0: # %entry
59 ; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
60 ; AVX-NEXT: retq
61 entry:
62 %0 = load i64, i64* %a
63 %1 = sitofp i64 %0 to float
64 ret float %1
65 }
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX
3 ; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2_X86
4 ; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86
35
46
57 define double @int_to_double_rr(i32 %a) {
1214 ; AVX: # BB#0: # %entry
1315 ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
1416 ; AVX-NEXT: retq
17 ;
18 ; SSE2_X86-LABEL: int_to_double_rr:
19 ; SSE2_X86: # BB#0: # %entry
20 ; SSE2_X86-NEXT: pushl %ebp
21 ; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
22 ; SSE2_X86-NEXT: .cfi_offset %ebp, -8
23 ; SSE2_X86-NEXT: movl %esp, %ebp
24 ; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
25 ; SSE2_X86-NEXT: andl $-8, %esp
26 ; SSE2_X86-NEXT: subl $8, %esp
27 ; SSE2_X86-NEXT: movl 8(%ebp), %eax
28 ; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0
29 ; SSE2_X86-NEXT: movsd %xmm0, (%esp)
30 ; SSE2_X86-NEXT: fldl (%esp)
31 ; SSE2_X86-NEXT: movl %ebp, %esp
32 ; SSE2_X86-NEXT: popl %ebp
33 ; SSE2_X86-NEXT: retl
34 ;
35 ; AVX_X86-LABEL: int_to_double_rr:
36 ; AVX_X86: # BB#0: # %entry
37 ; AVX_X86-NEXT: pushl %ebp
38 ; AVX_X86-NEXT: .cfi_def_cfa_offset 8
39 ; AVX_X86-NEXT: .cfi_offset %ebp, -8
40 ; AVX_X86-NEXT: movl %esp, %ebp
41 ; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
42 ; AVX_X86-NEXT: andl $-8, %esp
43 ; AVX_X86-NEXT: subl $8, %esp
44 ; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
45 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
46 ; AVX_X86-NEXT: fldl (%esp)
47 ; AVX_X86-NEXT: movl %ebp, %esp
48 ; AVX_X86-NEXT: popl %ebp
49 ; AVX_X86-NEXT: retl
1550 entry:
1651 %0 = sitofp i32 %a to double
1752 ret double %0
2762 ; AVX: # BB#0: # %entry
2863 ; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
2964 ; AVX-NEXT: retq
65 ;
66 ; SSE2_X86-LABEL: int_to_double_rm:
67 ; SSE2_X86: # BB#0: # %entry
68 ; SSE2_X86-NEXT: pushl %ebp
69 ; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
70 ; SSE2_X86-NEXT: .cfi_offset %ebp, -8
71 ; SSE2_X86-NEXT: movl %esp, %ebp
72 ; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
73 ; SSE2_X86-NEXT: andl $-8, %esp
74 ; SSE2_X86-NEXT: subl $8, %esp
75 ; SSE2_X86-NEXT: movl 8(%ebp), %eax
76 ; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0
77 ; SSE2_X86-NEXT: movsd %xmm0, (%esp)
78 ; SSE2_X86-NEXT: fldl (%esp)
79 ; SSE2_X86-NEXT: movl %ebp, %esp
80 ; SSE2_X86-NEXT: popl %ebp
81 ; SSE2_X86-NEXT: retl
82 ;
83 ; AVX_X86-LABEL: int_to_double_rm:
84 ; AVX_X86: # BB#0: # %entry
85 ; AVX_X86-NEXT: pushl %ebp
86 ; AVX_X86-NEXT: .cfi_def_cfa_offset 8
87 ; AVX_X86-NEXT: .cfi_offset %ebp, -8
88 ; AVX_X86-NEXT: movl %esp, %ebp
89 ; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
90 ; AVX_X86-NEXT: andl $-8, %esp
91 ; AVX_X86-NEXT: subl $8, %esp
92 ; AVX_X86-NEXT: movl 8(%ebp), %eax
93 ; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
94 ; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
95 ; AVX_X86-NEXT: fldl (%esp)
96 ; AVX_X86-NEXT: movl %ebp, %esp
97 ; AVX_X86-NEXT: popl %ebp
98 ; AVX_X86-NEXT: retl
3099 entry:
31100 %0 = load i32, i32* %a
32101 %1 = sitofp i32 %0 to double
43112 ; AVX: # BB#0: # %entry
44113 ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
45114 ; AVX-NEXT: retq
115 ;
116 ; SSE2_X86-LABEL: int_to_float_rr:
117 ; SSE2_X86: # BB#0: # %entry
118 ; SSE2_X86-NEXT: pushl %eax
119 ; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
120 ; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
121 ; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0
122 ; SSE2_X86-NEXT: movss %xmm0, (%esp)
123 ; SSE2_X86-NEXT: flds (%esp)
124 ; SSE2_X86-NEXT: popl %eax
125 ; SSE2_X86-NEXT: retl
126 ;
127 ; AVX_X86-LABEL: int_to_float_rr:
128 ; AVX_X86: # BB#0: # %entry
129 ; AVX_X86-NEXT: pushl %eax
130 ; AVX_X86-NEXT: .cfi_def_cfa_offset 8
131 ; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
132 ; AVX_X86-NEXT: vmovss %xmm0, (%esp)
133 ; AVX_X86-NEXT: flds (%esp)
134 ; AVX_X86-NEXT: popl %eax
135 ; AVX_X86-NEXT: retl
46136 entry:
47137 %0 = sitofp i32 %a to float
48138 ret float %0
58148 ; AVX: # BB#0: # %entry
59149 ; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
60150 ; AVX-NEXT: retq
151 ;
152 ; SSE2_X86-LABEL: int_to_float_rm:
153 ; SSE2_X86: # BB#0: # %entry
154 ; SSE2_X86-NEXT: pushl %eax
155 ; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
156 ; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
157 ; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0
158 ; SSE2_X86-NEXT: movss %xmm0, (%esp)
159 ; SSE2_X86-NEXT: flds (%esp)
160 ; SSE2_X86-NEXT: popl %eax
161 ; SSE2_X86-NEXT: retl
162 ;
163 ; AVX_X86-LABEL: int_to_float_rm:
164 ; AVX_X86: # BB#0: # %entry
165 ; AVX_X86-NEXT: pushl %eax
166 ; AVX_X86-NEXT: .cfi_def_cfa_offset 8
167 ; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
168 ; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
169 ; AVX_X86-NEXT: vmovss %xmm0, (%esp)
170 ; AVX_X86-NEXT: flds (%esp)
171 ; AVX_X86-NEXT: popl %eax
172 ; AVX_X86-NEXT: retl
61173 entry:
62174 %0 = load i32, i32* %a
63175 %1 = sitofp i32 %0 to float