llvm.org GIT mirror llvm / 152804e
Fix ctlz and cttz. llvm definition requires them to return number of bits in of the src type when value is zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45029 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 12 years ago
3 changed file(s) with 54 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
53515351
53525352 Op = Op.getOperand(0);
53535353 if (VT == MVT::i8) {
5354 // Zero extend to i32 since there is not an i8 bsr.
53545355 OpVT = MVT::i32;
53555356 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
53565357 }
5357 if (VT == MVT::i32 || VT == MVT::i64)
5358 return DAG.getNode(ISD::XOR, OpVT, DAG.getNode(X86ISD::BSR, OpVT, Op),
5359 DAG.getConstant(NumBits-1, OpVT));
5360
5361 Op = DAG.getNode(ISD::SUB, OpVT, DAG.getConstant(NumBits-1, OpVT),
5362 DAG.getNode(X86ISD::BSR, OpVT, Op));
5358
5359 // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
5360 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
5361 Op = DAG.getNode(X86ISD::BSR, VTs, Op);
5362
5363 // If src is zero (i.e. bsr sets ZF), returns NumBits.
5364 SmallVector Ops;
5365 Ops.push_back(Op);
5366 Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
5367 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
5368 Ops.push_back(Op.getValue(1));
5369 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
5370
5371 // Finally xor with NumBits-1.
5372 Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
5373
53635374 if (VT == MVT::i8)
53645375 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
53655376 return Op;
53685379 SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
53695380 MVT::ValueType VT = Op.getValueType();
53705381 MVT::ValueType OpVT = VT;
5382 unsigned NumBits = MVT::getSizeInBits(VT);
53715383
53725384 Op = Op.getOperand(0);
53735385 if (VT == MVT::i8) {
53745386 OpVT = MVT::i32;
53755387 Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
53765388 }
5377 Op = DAG.getNode(X86ISD::BSF, OpVT, Op);
5389
5390 // Issue a bsf (scan bits forward) which also sets EFLAGS.
5391 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
5392 Op = DAG.getNode(X86ISD::BSF, VTs, Op);
5393
5394 // If src is zero (i.e. bsf sets ZF), returns NumBits.
5395 SmallVector Ops;
5396 Ops.push_back(Op);
5397 Ops.push_back(DAG.getConstant(NumBits, OpVT));
5398 Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
5399 Ops.push_back(Op.getValue(1));
5400 Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
5401
53785402 if (VT == MVT::i8)
53795403 Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
53805404 return Op;
450450 let Defs = [EFLAGS] in {
451451 def BSF16rr : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
452452 "bsf{w}\t{$src, $dst||$dst, $src}",
453 [(set GR16:$dst, (X86bsf GR16:$src))]>, TB;
453 [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
454454 def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
455455 "bsf{w}\t{$src, $dst||$dst, $src}",
456 [(set GR16:$dst, (X86bsf (loadi16 addr:$src)))]>, TB;
456 [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
457 (implicit EFLAGS)]>, TB;
457458 def BSF32rr : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
458459 "bsf{l}\t{$src, $dst||$dst, $src}",
459 [(set GR32:$dst, (X86bsf GR32:$src))]>, TB;
460 [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
460461 def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
461462 "bsf{l}\t{$src, $dst||$dst, $src}",
462 [(set GR32:$dst, (X86bsf (loadi32 addr:$src)))]>, TB;
463 [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
464 (implicit EFLAGS)]>, TB;
463465
464466 def BSR16rr : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
465467 "bsr{w}\t{$src, $dst||$dst, $src}",
466 [(set GR16:$dst, (X86bsr GR16:$src))]>, TB;
468 [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
467469 def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
468470 "bsr{w}\t{$src, $dst||$dst, $src}",
469 [(set GR16:$dst, (X86bsr (loadi16 addr:$src)))]>, TB;
471 [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
472 (implicit EFLAGS)]>, TB;
470473 def BSR32rr : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
471474 "bsr{l}\t{$src, $dst||$dst, $src}",
472 [(set GR32:$dst, (X86bsr GR32:$src))]>, TB;
475 [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
473476 def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
474477 "bsr{l}\t{$src, $dst||$dst, $src}",
475 [(set GR32:$dst, (X86bsr (loadi32 addr:$src)))]>, TB;
478 [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
479 (implicit EFLAGS)]>, TB;
476480 } // Defs = [EFLAGS]
477481
478482 def LEA16r : I<0x8D, MRMSrcMem,
None ; RUN: llvm-as < %s | llc -march=x86 | grep bsr
0 ; RUN: llvm-as < %s | llc -march=x86 | grep bsr | count 2
11 ; RUN: llvm-as < %s | llc -march=x86 | grep bsf
2 ; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 3
23
34 define i32 @t1(i32 %x) nounwind {
45 %tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
1314 }
1415
1516 declare i32 @llvm.cttz.i32(i32) nounwind readnone
17
18 define i16 @t3(i16 %x, i16 %y) nounwind {
19 entry:
20 %tmp1 = add i16 %x, %y
21 %tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 ) ; [#uses=1]
22 ret i16 %tmp2
23 }
24
25 declare i16 @llvm.ctlz.i16(i16) nounwind readnone