llvm.org GIT mirror llvm / release_50 test / CodeGen / X86 / avx512-rotate.ll
release_50

Tree @release_50 (Download .tar.gz)

avx512-rotate.ll @release_50raw · history · blame

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX

declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)

; Tests showing replacement of variable rotates with immediate splat versions.

define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
; KNL-LABEL: test_splat_rol_v16i32:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprold $5, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprold $5, %zmm0, %zmm0
; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_rol_v16i32:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprold $5, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprold $5, %zmm0, %zmm0
; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
  %res3 = add <16 x i32> %res, %res1
  %res4 = add <16 x i32> %res3, %res2
  ret <16 x i32> %res4
}

define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
; KNL-LABEL: test_splat_rol_v8i64:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprolq $5, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprolq $5, %zmm0, %zmm0
; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_rol_v8i64:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprolq $5, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprolq $5, %zmm0, %zmm0
; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
  %res3 = add <8 x i64> %res, %res1
  %res4 = add <8 x i64> %res3, %res2
  ret <8 x i64> %res4
}

define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
; KNL-LABEL: test_splat_ror_v16i32:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprord $5, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprord $5, %zmm0, %zmm0
; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_ror_v16i32:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprord $5, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprord $5, %zmm0, %zmm0
; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
  %res3 = add <16 x i32> %res, %res1
  %res4 = add <16 x i32> %res3, %res2
  ret <16 x i32> %res4
}

define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
; KNL-LABEL: test_splat_ror_v8i64:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprorq $5, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprorq $5, %zmm0, %zmm0
; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_ror_v8i64:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprorq $5, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprorq $5, %zmm0, %zmm0
; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
  %res3 = add <8 x i64> %res, %res1
  %res4 = add <8 x i64> %res3, %res2
  ret <8 x i64> %res4
}

; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.

define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
; KNL-LABEL: test_splat_bounds_rol_v16i32:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprold $30, %zmm0, %zmm0
; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_bounds_rol_v16i32:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprold $30, %zmm0, %zmm0
; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
  %res3 = add <16 x i32> %res, %res1
  %res4 = add <16 x i32> %res3, %res2
  ret <16 x i32> %res4
}

define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
; KNL-LABEL: test_splat_bounds_rol_v8i64:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprolq $63, %zmm0, %zmm0
; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_bounds_rol_v8i64:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprolq $63, %zmm0, %zmm0
; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
  %res3 = add <8 x i64> %res, %res1
  %res4 = add <8 x i64> %res3, %res2
  ret <8 x i64> %res4
}

define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
; KNL-LABEL: test_splat_bounds_ror_v16i32:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprord $30, %zmm0, %zmm0
; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_bounds_ror_v16i32:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprord $30, %zmm0, %zmm0
; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
  %res3 = add <16 x i32> %res, %res1
  %res4 = add <16 x i32> %res3, %res2
  ret <16 x i32> %res4
}

define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
; KNL-LABEL: test_splat_bounds_ror_v8i64:
; KNL:       # BB#0:
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
; KNL-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; KNL-NEXT:    vprorq $63, %zmm0, %zmm0
; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_splat_bounds_ror_v8i64:
; SKX:       # BB#0:
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
; SKX-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
; SKX-NEXT:    vprorq $63, %zmm0, %zmm0
; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
; SKX-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
  %res3 = add <8 x i64> %res, %res1
  %res4 = add <8 x i64> %res3, %res2
  ret <8 x i64> %res4
}

; Constant folding

define <8 x i64> @test_fold_rol_v8i64() {
; CHECK-LABEL: test_fold_rol_v8i64:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
; CHECK-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
  ret <8 x i64> %res
}

define <8 x i64> @test_fold_ror_v8i64() {
; CHECK-LABEL: test_fold_ror_v8i64:
; CHECK:       # BB#0:
; CHECK-NEXT:    vmovaps {{.*#+}} zmm0 = [1,9223372036854775808,4611686018427387904,2,9223372036854775808,4,2,2]
; CHECK-NEXT:    retq
  %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
  ret <8 x i64> %res
}