llvm.org GIT mirror llvm / release_90 test / CodeGen / X86 / commute-blend-sse41.ll
release_90

Tree @release_90 (Download .tar.gz)

commute-blend-sse41.ll @release_90

4a05d35
9ed230b
4ad0654
a593998
4a05d35
ca0df55
4a05d35
 
7c9c6ed
4ad0654
 
 
 
 
a593998
4a05d35
ca0df55
4a05d35
 
7c9c6ed
8df1c6e
4ad0654
 
 
 
a593998
4a05d35
ca0df55
52f615d
4a05d35
7c9c6ed
4ad0654
 
 
 
a593998
 
 
 
ed9ed6e
a593998
 
 
ed9ed6e
a593998
 
 
9ed230b
 
 
 
 
 
 
 
 
75aa1cd
 
 
 
 
9ed230b
 
 
 
 
 
 
 
 
 
 
 
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown -mattr=+sse4.1,-slow-unaligned-mem-16 | FileCheck %s

define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) {
; CHECK-LABEL: commute_fold_pblendw:
; CHECK:       # %bb.0:
; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
; CHECK-NEXT:    retq
  %1 = load <8 x i16>, <8 x i16>* %b
  %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
  ret <8 x i16> %2
}
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone

define <4 x float> @commute_fold_blendps(<4 x float> %a, <4 x float>* %b) {
; CHECK-LABEL: commute_fold_blendps:
; CHECK:       # %bb.0:
; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
; CHECK-NEXT:    retq
  %1 = load <4 x float>, <4 x float>* %b
  %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
  ret <4 x float> %2
}
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone

define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) {
; CHECK-LABEL: commute_fold_blendpd:
; CHECK:       # %bb.0:
; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
; CHECK-NEXT:    retq
  %1 = load <2 x double>, <2 x double>* %b
  %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
  ret <2 x double> %2
}
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone

define <4 x i32> @commute_fold_blend_v4i32(<4 x i32>* %a, <4 x i32> %b) {
; CHECK-LABEL: commute_fold_blend_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    paddd %xmm0, %xmm0
; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = mem[0,1,2,3,4,5],xmm0[6,7]
; CHECK-NEXT:    retq
  %1 = load <4 x i32>, <4 x i32>* %a
  %2 = add <4 x i32> %b, %b ; force integer domain
  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %3
}

; Test case for a crash that occurred due to blendi being commuted to
; movsd during two address instruction pass. The change in number of operands
; caused a bad call to getOperand. This caused the revert in r354713.
%struct.spam = type { i64, i64 }

define void @baz(<2 x i64>* %arg, %struct.spam* %arg1) optsize {
; CHECK-LABEL: baz:
; CHECK:       # %bb.0: # %bb
; CHECK-NEXT:    movaps (%rdi), %xmm0
; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [3,3]
; CHECK-NEXT:    andps %xmm0, %xmm1
; CHECK-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
; CHECK-NEXT:    movups %xmm1, (%rsi)
; CHECK-NEXT:    retq
bb:
  %tmp = load <2 x i64>, <2 x i64>* %arg, align 16
  %tmp2 = and <2 x i64> %tmp, <i64 3, i64 3>
  %tmp3 = getelementptr inbounds %struct.spam, %struct.spam* %arg1, i64 0, i32 0
  %tmp4 = extractelement <2 x i64> %tmp, i32 0
  store i64 %tmp4, i64* %tmp3, align 8
  %tmp5 = getelementptr inbounds %struct.spam, %struct.spam* %arg1, i64 0, i32 1
  %tmp6 = extractelement <2 x i64> %tmp2, i32 1
  store i64 %tmp6, i64* %tmp5, align 8
  ret void
}