llvm.org GIT mirror llvm / 7cb1b5f
Replace more uses of sse41 with sse4.1. llc using the host cpu features and *waning* on unknown features is probably not a good thing :-( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189144 91177308-0d34-0410-b5e6-96231b3b80d8 Rafael Espindola 6 years ago
39 changed file(s) with 46 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
None ; RUN: llc < %s -mattr=+sse41
0 ; RUN: llc < %s -mattr=+sse4.1
11 ; rdar://5886601
22 ; gcc testsuite: gcc.target/i386/sse4_1-pblendw.c
33 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
2 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
1 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
2 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
33 ; rdar://6627786
44 ; rdar://7792037
55
0 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
1 ; RUN: -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
1 ; RUN: -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
22 ; RUN: FileCheck %s
33 ; rdar://6808032
44
None ;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
0 ;RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
11
22 ;CHECK: @max
33 ;CHECK: cmplepd
None ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41
0 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1
11
22 ; Make sure we are not crashing on this code.
33
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse41
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-sse4.1
11
22 target triple = "x86_64-unknown-linux-gnu"
33
None ; RUN: llc -march=x86-64 -mattr=+sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
1 ; RUN: llc -march=x86-64 -mattr=-sse41 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
0 ; RUN: llc -march=x86-64 -mattr=+sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
1 ; RUN: llc -march=x86-64 -mattr=-sse4.1 -mcpu=penryn < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
22 ; Test case for r146671
33 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
44 target triple = "x86_64-apple-macosx10.7"
None ; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
0 ; RUN: llc -march=x86-64 -mattr=-sse42,+sse4.1 < %s | FileCheck %s
11 ; Make sure we don't load from the location pointed to by %p
22 ; twice: it has non-obvious performance implications, and
33 ; the relevant transformation doesn't know how to update
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22
33 ; In this test we check that sign-extend of the mask bit is performed by
None ; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse41 -verify-machineinstrs
0 ; RUN: llc < %s -mcpu=corei7 -mattr=-sse2,-sse4.1 -verify-machineinstrs
11 target triple = "x86_64-unknown-linux-gnu"
22
33 ; PR10503
None ; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
1 ; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
1 ; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
22
33 target triple = "x86_64-unknown-linux-gnu"
44
None ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22 ; rdar://12721174
33 ; We should not fold movss into pshufd since pshufd expects m128 while movss
None ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32
1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64
0 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X32
1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
33
44 define i32 @test1() nounwind readonly {
55 entry:
None ; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
11 ; RUN: grep xorps %t | count 1
22
33 ; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
None ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -mattr=+sse2,+sse41 | FileCheck %s
0 ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s -mattr=+sse2,+sse4.1 | FileCheck %s
11
22 ; CHECK: func_4_8
33 ; A single memory write
None ; RUN: llc < %s -march=x86 -mattr=sse41 -mcpu=nehalem -stack-alignment=16 > %t
0 ; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
11 ; RUN: grep pmul %t | count 12
22 ; RUN: grep mov %t | count 11
33
None ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
0 ; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse4.1 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
22
33 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
44 ; CHECK-LABEL: test1:
None ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
11
22 ; No check in a crash test
33
None ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
11
22 ; No check in a crash test
33
None ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
11
22 ; No check in a crash test
33
None ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse41
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse4.1
11
22 ; No check in a crash test
33
None ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41
0 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix SSE41
11 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix AVX
22
33 define i32 @veccond128(<4 x i32> %input) {
None ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck -check-prefix=CHECK-SSE %s
11 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
22
33 define float @test1(float %x) nounwind {
None ; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
11
22 ; CHECK: vsel_float
33 ; CHECK: pandn
None ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22 ;CHECK-LABEL: vsel_float:
33 ;CHECK: blendvps
None ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X32
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 -mcpu=penryn | FileCheck %s -check-prefix=X64
0 ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64
22
33 @g16 = external global i16
44
None ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
11
22 ;CHECK-LABEL: load_2_i8:
33 ; A single 16-bit load
None ; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse41 -o %t
0 ; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 -o %t
11 ; RUN: not grep extractps %t
22 ; RUN: not grep pextrd %t
33 ; RUN: not grep pshufd %t
None ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1,-avx | FileCheck %s
11 ; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s
22
33 ; PR11674
None ; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 -o %t
11
22 ; tests variable insert and extract of a 4 x i32
33
None ; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 > %t
11 ; RUN: grep pinsrd %t | count 1
22
33 define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
None ; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
11
22 define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
33 entry:
None ; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
0 ; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
11 ; RUN: grep unpcklps %t | count 3
22 ; RUN: grep unpckhps %t | count 1
33
None ; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
11
22 ; ModuleID = 'vec_shuffle-27.bc'
33 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
None ; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
11
22 define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
33 ; CHECK: pshufb
None ; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse41 | FileCheck %s
0 ; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
11
22 ; Splat test for v8i16
33 define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
None ; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s
11
22 target datalayout = "e-p:32:32"
33 target triple = "i686-apple-darwin8.7.2"
None ; RUN: llc < %s -march=x86-64 -mattr=+sse41
0 ; RUN: llc < %s -march=x86-64 -mattr=+sse4.1
11
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
33 target triple = "x86_64-apple-darwin11.0.0"
None ; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
0 ; RUN: llc < %s -march=x86 -mattr=+sse4.1 | FileCheck %s
11 ; CHECK: movd
22
33 ; Test bit convert that requires widening in the operand.