llvm.org GIT mirror llvm / a813b38 test / CodeGen / AArch64 / arm64-popcnt.ll
a813b38

Tree @a813b38 (Download .tar.gz)

arm64-popcnt.ll @a813b38raw · history · blame

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-eabi -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
; RUN: llc < %s -mtriple=armv8a -mattr=+neon | FileCheck %s -check-prefix=CHECK-ARM8A-NEON

define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-LABEL: cnt32_advsimd:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w8, w0
; CHECK-NEXT:    fmov d0, x8
; CHECK-NEXT:    cnt.8b v0, v0
; CHECK-NEXT:    uaddlv.8b h0, v0
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: cnt32_advsimd:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr w8, w0, #1
; CHECK-NONEON-NEXT:    and w8, w8, #0x55555555
; CHECK-NONEON-NEXT:    sub w8, w0, w8
; CHECK-NONEON-NEXT:    and w9, w8, #0x33333333
; CHECK-NONEON-NEXT:    lsr w8, w8, #2
; CHECK-NONEON-NEXT:    and w8, w8, #0x33333333
; CHECK-NONEON-NEXT:    add w8, w9, w8
; CHECK-NONEON-NEXT:    add w8, w8, w8, lsr #4
; CHECK-NONEON-NEXT:    and w8, w8, #0xf0f0f0f
; CHECK-NONEON-NEXT:    mov w9, #16843009
; CHECK-NONEON-NEXT:    mul w8, w8, w9
; CHECK-NONEON-NEXT:    lsr w0, w8, #24
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: cnt32_advsimd:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    movw r1, #21845
; CHECK-ARM8A-NEON-NEXT:    movt r1, #21845
; CHECK-ARM8A-NEON-NEXT:    and r1, r1, r0, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #13107
; CHECK-ARM8A-NEON-NEXT:    movt r1, #13107
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r1
; CHECK-ARM8A-NEON-NEXT:    and r0, r1, r0, lsr #2
; CHECK-ARM8A-NEON-NEXT:    movw r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r2, r0
; CHECK-ARM8A-NEON-NEXT:    movt r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r0, lsr #4
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #257
; CHECK-ARM8A-NEON-NEXT:    movt r1, #257
; CHECK-ARM8A-NEON-NEXT:    mul r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    lsr r0, r0, #24
; CHECK-ARM8A-NEON-NEXT:    bx lr
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
}

define i32 @cnt32_advsimd_2(<2 x i32> %x) {
; CHECK-LABEL: cnt32_advsimd_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    fmov d0, x0
; CHECK-NEXT:    cnt.8b v0, v0
; CHECK-NEXT:    uaddlv.8b h0, v0
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: cnt32_advsimd_2:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr w8, w0, #1
; CHECK-NONEON-NEXT:    and w8, w8, #0x55555555
; CHECK-NONEON-NEXT:    sub w8, w0, w8
; CHECK-NONEON-NEXT:    and w9, w8, #0x33333333
; CHECK-NONEON-NEXT:    lsr w8, w8, #2
; CHECK-NONEON-NEXT:    and w8, w8, #0x33333333
; CHECK-NONEON-NEXT:    add w8, w9, w8
; CHECK-NONEON-NEXT:    add w8, w8, w8, lsr #4
; CHECK-NONEON-NEXT:    and w8, w8, #0xf0f0f0f
; CHECK-NONEON-NEXT:    mov w9, #16843009
; CHECK-NONEON-NEXT:    mul w8, w8, w9
; CHECK-NONEON-NEXT:    lsr w0, w8, #24
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: cnt32_advsimd_2:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    vmov d16, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #21845
; CHECK-ARM8A-NEON-NEXT:    movt r1, #21845
; CHECK-ARM8A-NEON-NEXT:    vmov.32 r0, d16[0]
; CHECK-ARM8A-NEON-NEXT:    and r1, r1, r0, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #13107
; CHECK-ARM8A-NEON-NEXT:    movt r1, #13107
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r1
; CHECK-ARM8A-NEON-NEXT:    and r0, r1, r0, lsr #2
; CHECK-ARM8A-NEON-NEXT:    movw r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r2, r0
; CHECK-ARM8A-NEON-NEXT:    movt r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r0, lsr #4
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #257
; CHECK-ARM8A-NEON-NEXT:    movt r1, #257
; CHECK-ARM8A-NEON-NEXT:    mul r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    lsr r0, r0, #24
; CHECK-ARM8A-NEON-NEXT:    bx lr
  %1 = extractelement <2 x i32> %x, i64 0
  %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
  ret i32 %2
}

define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
; CHECK-LABEL: cnt64_advsimd:
; CHECK:       // %bb.0:
; CHECK-NEXT:    fmov d0, x0
; CHECK-NEXT:    cnt.8b v0, v0
; CHECK-NEXT:    uaddlv.8b h0, v0
; CHECK-NEXT:    fmov w0, s0
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: cnt64_advsimd:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr x8, x0, #1
; CHECK-NONEON-NEXT:    and x8, x8, #0x5555555555555555
; CHECK-NONEON-NEXT:    sub x8, x0, x8
; CHECK-NONEON-NEXT:    and x9, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    lsr x8, x8, #2
; CHECK-NONEON-NEXT:    and x8, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    add x8, x9, x8
; CHECK-NONEON-NEXT:    add x8, x8, x8, lsr #4
; CHECK-NONEON-NEXT:    and x8, x8, #0xf0f0f0f0f0f0f0f
; CHECK-NONEON-NEXT:    mov x9, #72340172838076673
; CHECK-NONEON-NEXT:    mul x8, x8, x9
; CHECK-NONEON-NEXT:    lsr x0, x8, #56
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: cnt64_advsimd:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    push {r11, lr}
; CHECK-ARM8A-NEON-NEXT:    movw r12, #21845
; CHECK-ARM8A-NEON-NEXT:    movw lr, #3855
; CHECK-ARM8A-NEON-NEXT:    movt r12, #21845
; CHECK-ARM8A-NEON-NEXT:    and r3, r12, r0, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r0, r0, r3
; CHECK-ARM8A-NEON-NEXT:    movw r3, #13107
; CHECK-ARM8A-NEON-NEXT:    movt r3, #13107
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r3
; CHECK-ARM8A-NEON-NEXT:    and r0, r3, r0, lsr #2
; CHECK-ARM8A-NEON-NEXT:    movt lr, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r2, r0
; CHECK-ARM8A-NEON-NEXT:    and r2, r12, r1, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r1, r1, r2
; CHECK-ARM8A-NEON-NEXT:    and r2, r1, r3
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r0, lsr #4
; CHECK-ARM8A-NEON-NEXT:    and r1, r3, r1, lsr #2
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, lr
; CHECK-ARM8A-NEON-NEXT:    add r1, r2, r1
; CHECK-ARM8A-NEON-NEXT:    movw r2, #257
; CHECK-ARM8A-NEON-NEXT:    movt r2, #257
; CHECK-ARM8A-NEON-NEXT:    add r1, r1, r1, lsr #4
; CHECK-ARM8A-NEON-NEXT:    mul r0, r0, r2
; CHECK-ARM8A-NEON-NEXT:    and r1, r1, lr
; CHECK-ARM8A-NEON-NEXT:    mul r1, r1, r2
; CHECK-ARM8A-NEON-NEXT:    lsr r0, r0, #24
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r1, lsr #24
; CHECK-ARM8A-NEON-NEXT:    mov r1, #0
; CHECK-ARM8A-NEON-NEXT:    pop {r11, pc}
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

; Do not use AdvSIMD when -mno-implicit-float is specified.
; rdar://9473858

define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt32:
; CHECK:       // %bb.0:
; CHECK-NEXT:    lsr w8, w0, #1
; CHECK-NEXT:    and w8, w8, #0x55555555
; CHECK-NEXT:    sub w8, w0, w8
; CHECK-NEXT:    and w9, w8, #0x33333333
; CHECK-NEXT:    lsr w8, w8, #2
; CHECK-NEXT:    and w8, w8, #0x33333333
; CHECK-NEXT:    add w8, w9, w8
; CHECK-NEXT:    add w8, w8, w8, lsr #4
; CHECK-NEXT:    and w8, w8, #0xf0f0f0f
; CHECK-NEXT:    mov w9, #16843009
; CHECK-NEXT:    mul w8, w8, w9
; CHECK-NEXT:    lsr w0, w8, #24
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: cnt32:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr w8, w0, #1
; CHECK-NONEON-NEXT:    and w8, w8, #0x55555555
; CHECK-NONEON-NEXT:    sub w8, w0, w8
; CHECK-NONEON-NEXT:    and w9, w8, #0x33333333
; CHECK-NONEON-NEXT:    lsr w8, w8, #2
; CHECK-NONEON-NEXT:    and w8, w8, #0x33333333
; CHECK-NONEON-NEXT:    add w8, w9, w8
; CHECK-NONEON-NEXT:    add w8, w8, w8, lsr #4
; CHECK-NONEON-NEXT:    and w8, w8, #0xf0f0f0f
; CHECK-NONEON-NEXT:    mov w9, #16843009
; CHECK-NONEON-NEXT:    mul w8, w8, w9
; CHECK-NONEON-NEXT:    lsr w0, w8, #24
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: cnt32:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    movw r1, #21845
; CHECK-ARM8A-NEON-NEXT:    movt r1, #21845
; CHECK-ARM8A-NEON-NEXT:    and r1, r1, r0, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #13107
; CHECK-ARM8A-NEON-NEXT:    movt r1, #13107
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r1
; CHECK-ARM8A-NEON-NEXT:    and r0, r1, r0, lsr #2
; CHECK-ARM8A-NEON-NEXT:    movw r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r2, r0
; CHECK-ARM8A-NEON-NEXT:    movt r1, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r0, lsr #4
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    movw r1, #257
; CHECK-ARM8A-NEON-NEXT:    movt r1, #257
; CHECK-ARM8A-NEON-NEXT:    mul r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    lsr r0, r0, #24
; CHECK-ARM8A-NEON-NEXT:    bx lr
  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
  ret i32 %cnt
}

define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
; CHECK-LABEL: cnt64:
; CHECK:       // %bb.0:
; CHECK-NEXT:    lsr x8, x0, #1
; CHECK-NEXT:    and x8, x8, #0x5555555555555555
; CHECK-NEXT:    sub x8, x0, x8
; CHECK-NEXT:    and x9, x8, #0x3333333333333333
; CHECK-NEXT:    lsr x8, x8, #2
; CHECK-NEXT:    and x8, x8, #0x3333333333333333
; CHECK-NEXT:    add x8, x9, x8
; CHECK-NEXT:    add x8, x8, x8, lsr #4
; CHECK-NEXT:    and x8, x8, #0xf0f0f0f0f0f0f0f
; CHECK-NEXT:    mov x9, #72340172838076673
; CHECK-NEXT:    mul x8, x8, x9
; CHECK-NEXT:    lsr x0, x8, #56
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: cnt64:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr x8, x0, #1
; CHECK-NONEON-NEXT:    and x8, x8, #0x5555555555555555
; CHECK-NONEON-NEXT:    sub x8, x0, x8
; CHECK-NONEON-NEXT:    and x9, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    lsr x8, x8, #2
; CHECK-NONEON-NEXT:    and x8, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    add x8, x9, x8
; CHECK-NONEON-NEXT:    add x8, x8, x8, lsr #4
; CHECK-NONEON-NEXT:    and x8, x8, #0xf0f0f0f0f0f0f0f
; CHECK-NONEON-NEXT:    mov x9, #72340172838076673
; CHECK-NONEON-NEXT:    mul x8, x8, x9
; CHECK-NONEON-NEXT:    lsr x0, x8, #56
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: cnt64:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    push {r11, lr}
; CHECK-ARM8A-NEON-NEXT:    movw r12, #21845
; CHECK-ARM8A-NEON-NEXT:    movw lr, #3855
; CHECK-ARM8A-NEON-NEXT:    movt r12, #21845
; CHECK-ARM8A-NEON-NEXT:    and r3, r12, r0, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r0, r0, r3
; CHECK-ARM8A-NEON-NEXT:    movw r3, #13107
; CHECK-ARM8A-NEON-NEXT:    movt r3, #13107
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r3
; CHECK-ARM8A-NEON-NEXT:    and r0, r3, r0, lsr #2
; CHECK-ARM8A-NEON-NEXT:    movt lr, #3855
; CHECK-ARM8A-NEON-NEXT:    add r0, r2, r0
; CHECK-ARM8A-NEON-NEXT:    and r2, r12, r1, lsr #1
; CHECK-ARM8A-NEON-NEXT:    sub r1, r1, r2
; CHECK-ARM8A-NEON-NEXT:    and r2, r1, r3
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r0, lsr #4
; CHECK-ARM8A-NEON-NEXT:    and r1, r3, r1, lsr #2
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, lr
; CHECK-ARM8A-NEON-NEXT:    add r1, r2, r1
; CHECK-ARM8A-NEON-NEXT:    movw r2, #257
; CHECK-ARM8A-NEON-NEXT:    movt r2, #257
; CHECK-ARM8A-NEON-NEXT:    add r1, r1, r1, lsr #4
; CHECK-ARM8A-NEON-NEXT:    mul r0, r0, r2
; CHECK-ARM8A-NEON-NEXT:    and r1, r1, lr
; CHECK-ARM8A-NEON-NEXT:    mul r1, r1, r2
; CHECK-ARM8A-NEON-NEXT:    lsr r0, r0, #24
; CHECK-ARM8A-NEON-NEXT:    add r0, r0, r1, lsr #24
; CHECK-ARM8A-NEON-NEXT:    mov r1, #0
; CHECK-ARM8A-NEON-NEXT:    pop {r11, pc}
  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
  ret i64 %cnt
}

define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop_eq_one:
; CHECK:       // %bb.0:
; CHECK-NEXT:    fmov d0, x0
; CHECK-NEXT:    cnt.8b v0, v0
; CHECK-NEXT:    uaddlv.8b h0, v0
; CHECK-NEXT:    fmov w8, s0
; CHECK-NEXT:    cmp x8, #1 // =1
; CHECK-NEXT:    cset w0, eq
; CHECK-NEXT:    ret
;
; CHECK-NONEON-LABEL: ctpop_eq_one:
; CHECK-NONEON:       // %bb.0:
; CHECK-NONEON-NEXT:    lsr x8, x0, #1
; CHECK-NONEON-NEXT:    and x8, x8, #0x5555555555555555
; CHECK-NONEON-NEXT:    sub x8, x0, x8
; CHECK-NONEON-NEXT:    and x9, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    lsr x8, x8, #2
; CHECK-NONEON-NEXT:    and x8, x8, #0x3333333333333333
; CHECK-NONEON-NEXT:    add x8, x9, x8
; CHECK-NONEON-NEXT:    add x8, x8, x8, lsr #4
; CHECK-NONEON-NEXT:    and x8, x8, #0xf0f0f0f0f0f0f0f
; CHECK-NONEON-NEXT:    mov x9, #72340172838076673
; CHECK-NONEON-NEXT:    mul x8, x8, x9
; CHECK-NONEON-NEXT:    lsr x8, x8, #56
; CHECK-NONEON-NEXT:    cmp x8, #1 // =1
; CHECK-NONEON-NEXT:    cset w0, eq
; CHECK-NONEON-NEXT:    ret
;
; CHECK-ARM8A-NEON-LABEL: ctpop_eq_one:
; CHECK-ARM8A-NEON:       @ %bb.0:
; CHECK-ARM8A-NEON-NEXT:    subs r2, r0, #1
; CHECK-ARM8A-NEON-NEXT:    sbc r3, r1, #0
; CHECK-ARM8A-NEON-NEXT:    and r2, r0, r2
; CHECK-ARM8A-NEON-NEXT:    and r3, r1, r3
; CHECK-ARM8A-NEON-NEXT:    orrs r0, r0, r1
; CHECK-ARM8A-NEON-NEXT:    orr r2, r2, r3
; CHECK-ARM8A-NEON-NEXT:    movwne r0, #1
; CHECK-ARM8A-NEON-NEXT:    clz r2, r2
; CHECK-ARM8A-NEON-NEXT:    lsr r2, r2, #5
; CHECK-ARM8A-NEON-NEXT:    and r0, r0, r2
; CHECK-ARM8A-NEON-NEXT:    bx lr
  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
  %cmp = icmp eq i64 %count, 1
  %conv = zext i1 %cmp to i32
  ret i32 %conv
}


declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i64 @llvm.ctpop.i64(i64) nounwind readnone