llvm.org GIT mirror llvm / 4607999
[3.9.1] Merging r280837 [X86] Don't reduce the width of vector mul if the target doesn't support SSE2. The patch is to fix PR30298, which is caused by rL272694. The solution is to bail out if the target has no SSE2. Differential Revision: https://reviews.llvm.org/D24288 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@282753 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 years ago
2 changed file(s) with 45 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
2751527515 const X86Subtarget &Subtarget) {
2751627516 // pmulld is supported since SSE41. It is better to use pmulld
2751727517 // instead of pmullw+pmulhw.
27518 if (Subtarget.hasSSE41())
27518 // pmullw/pmulhw are not supported by SSE.
27519 if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
2751927520 return SDValue();
2752027521
2752127522 ShrinkMode Mode;
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=i386-pc-linux-gnu -mattr=+sse < %s | FileCheck %s
2
3 @c = external global i32*, align 8
4
5 define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) nounwind {
6 ; CHECK-LABEL: mul_2xi8:
7 ; CHECK: # BB#0: # %entry
8 ; CHECK-NEXT: pushl %ebx
9 ; CHECK-NEXT: pushl %edi
10 ; CHECK-NEXT: pushl %esi
11 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
12 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
13 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
14 ; CHECK-NEXT: movl c, %esi
15 ; CHECK-NEXT: movzbl 1(%edx,%ecx), %edi
16 ; CHECK-NEXT: movzbl (%edx,%ecx), %edx
17 ; CHECK-NEXT: movzbl 1(%eax,%ecx), %ebx
18 ; CHECK-NEXT: movzbl (%eax,%ecx), %eax
19 ; CHECK-NEXT: imull %edx, %eax
20 ; CHECK-NEXT: imull %edi, %ebx
21 ; CHECK-NEXT: movl %ebx, 4(%esi,%ecx,4)
22 ; CHECK-NEXT: movl %eax, (%esi,%ecx,4)
23 ; CHECK-NEXT: popl %esi
24 ; CHECK-NEXT: popl %edi
25 ; CHECK-NEXT: popl %ebx
26 ; CHECK-NEXT: retl
27 entry:
28 %pre = load i32*, i32** @c
29 %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
30 %tmp7 = bitcast i8* %tmp6 to <2 x i8>*
31 %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
32 %tmp8 = zext <2 x i8> %wide.load to <2 x i32>
33 %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
34 %tmp11 = bitcast i8* %tmp10 to <2 x i8>*
35 %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
36 %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
37 %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
38 %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
39 %tmp15 = bitcast i32* %tmp14 to <2 x i32>*
40 store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
41 ret void
42 }