llvm.org GIT mirror llvm / 9960990
[SLP] Support unary FNeg vectorization Differential Revision: https://reviews.llvm.org/D63609 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364219 91177308-0d34-0410-b5e6-96231b3b80d8 Cameron McInally 2 months ago
3 changed file(s) with 40 addition(s) and 24 deletion(s). Raw diff Collapse all Expand all
23892389 return;
23902390 }
23912391 case Instruction::Select:
2392 case Instruction::FNeg:
23922393 case Instruction::Add:
23932394 case Instruction::FAdd:
23942395 case Instruction::Sub:
24082409 case Instruction::Or:
24092410 case Instruction::Xor: {
24102411 auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
2411 LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
2412 LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
24122413
24132414 // Sort operands of the instructions so that each side is more likely to
24142415 // have the same opcode.
28802881 int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
28812882 return ReuseShuffleCost + VecCost - ScalarCost;
28822883 }
2884 case Instruction::FNeg:
28832885 case Instruction::Add:
28842886 case Instruction::FAdd:
28852887 case Instruction::Sub:
29172919 ConstantInt *CInt0 = nullptr;
29182920 for (unsigned i = 0, e = VL.size(); i < e; ++i) {
29192921 const Instruction *I = cast(VL[i]);
2920 ConstantInt *CInt = dyn_cast(I->getOperand(1));
2922 unsigned OpIdx = isa(I) ? 1 : 0;
2923 ConstantInt *CInt = dyn_cast(I->getOperand(OpIdx));
29212924 if (!CInt) {
29222925 Op2VK = TargetTransformInfo::OK_AnyValue;
29232926 Op2VP = TargetTransformInfo::OP_None;
36953698 }
36963699 E->VectorizedValue = V;
36973700 ++NumVectorInstructions;
3701 return V;
3702 }
3703 case Instruction::FNeg: {
3704 setInsertPointAfterBundle(E->Scalars, S);
3705
3706 Value *Op = vectorizeTree(E->getOperand(0));
3707
3708 if (E->VectorizedValue) {
3709 LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
3710 return E->VectorizedValue;
3711 }
3712
3713 Value *V = Builder.CreateUnOp(
3714 static_cast(S.getOpcode()), Op);
3715 propagateIRFlags(V, E->Scalars, VL0);
3716 if (auto *I = dyn_cast(V))
3717 V = propagateMetadata(I, E->Scalars);
3718
3719 if (NeedToShuffleReuses) {
3720 V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
3721 E->ReuseShuffleIndices, "shuffle");
3722 }
3723 E->VectorizedValue = V;
3724 ++NumVectorInstructions;
3725
36983726 return V;
36993727 }
37003728 case Instruction::Add:
5353 define void @Rf_GReset_unary_fneg() {
5454 ; CHECK-LABEL: @Rf_GReset_unary_fneg(
5555 ; CHECK-NEXT: entry:
56 ; CHECK-NEXT: [[SUB:%.*]] = fneg double undef
5756 ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8
58 ; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP0]]
57 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1
58 ; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
5959 ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
6060 ; CHECK: if.then:
61 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[SUB]], i32 0
62 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[SUB1]], i32 1
6361 ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
6462 ; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
6563 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
558558 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
559559 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
560560 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
561 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
562 ; CHECK-NEXT: [[SUB1:%.*]] = fneg fast double [[TMP4]]
563 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
564 ; CHECK-NEXT: [[SUB2:%.*]] = fneg fast double [[TMP5]]
565 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0
566 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1
567 ; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]]
568 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
569 ; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
561 ; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]]
562 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
563 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
564 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
570565 ; CHECK-NEXT: ret void
571566 ;
572567 %idx1 = getelementptr inbounds double, double* %x, i64 0
631626 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
632627 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
633628 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
634 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
635 ; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP4]]
636 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
637 ; CHECK-NEXT: [[SUB2:%.*]] = fneg double [[TMP5]]
638 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0
639 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1
640 ; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]]
641 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
642 ; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
629 ; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]]
630 ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
631 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>*
632 ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
643633 ; CHECK-NEXT: ret void
644634 ;
645635 %idx1 = getelementptr inbounds double, double* %x, i64 0