llvm.org GIT mirror llvm / 8c0b3da
[LoopVectorize] Add FNeg instruction support Differential Revision: https://reviews.llvm.org/D62510 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362124 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 4 months ago
4 changed file(s) with 44 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
13821382 return Insert(UnOp, Name);
13831383 }
13841384
1385 /// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
1386 /// Correct number of operands must be passed accordingly.
1387 Value *CreateNAryOp(unsigned Opc, ArrayRef Ops,
1388 const Twine &Name = "",
1389 MDNode *FPMathTag = nullptr) {
1390 if (Instruction::isBinaryOp(Opc)) {
1391 assert(Ops.size() == 2 && "Invalid number of operands!");
1392 return CreateBinOp(static_cast(Opc),
1393 Ops[0], Ops[1], Name, FPMathTag);
1394 }
1395 if (Instruction::isUnaryOp(Opc)) {
1396 assert(Ops.size() == 1 && "Invalid number of operands!");
1397 return CreateUnOp(static_cast(Opc),
1398 Ops[0], Name, FPMathTag);
1399 }
1400 llvm_unreachable("Unexpected opcode!");
1401 }
1402
13851403 //===--------------------------------------------------------------------===//
13861404 // Instruction creation methods: Memory Instructions
13871405 //===--------------------------------------------------------------------===//
39683968 case Instruction::FAdd:
39693969 case Instruction::Sub:
39703970 case Instruction::FSub:
3971 case Instruction::FNeg:
39713972 case Instruction::Mul:
39723973 case Instruction::FMul:
39733974 case Instruction::FDiv:
39783979 case Instruction::And:
39793980 case Instruction::Or:
39803981 case Instruction::Xor: {
3981 // Just widen binops.
3982 auto *BinOp = cast(&I);
3983 setDebugLocFromInst(Builder, BinOp);
3982 // Just widen unops and binops.
3983 setDebugLocFromInst(Builder, &I);
39843984
39853985 for (unsigned Part = 0; Part < UF; ++Part) {
3986 Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
3987 Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
3988 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
3989
3990 if (BinaryOperator *VecOp = dyn_cast(V))
3991 VecOp->copyIRFlags(BinOp);
3986 SmallVector Ops;
3987 for (Value *Op : I.operands())
3988 Ops.push_back(getOrCreateVectorValue(Op, Part));
3989
3990 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
3991
3992 if (auto *VecOp = dyn_cast(V))
3993 VecOp->copyIRFlags(&I);
39923994
39933995 // Use this vector value for all users of the original instruction.
39943996 VectorLoopValueMap.setVectorValue(&I, Part, V);
3995 addMetadata(V, BinOp);
3997 addMetadata(V, &I);
39963998 }
39973999
39984000 break;
59595961 I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
59605962 Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
59615963 }
5964 case Instruction::FNeg: {
5965 unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
5966 return N * TTI.getArithmeticInstrCost(
5967 I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
5968 TargetTransformInfo::OK_AnyValue,
5969 TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
5970 I->getOperand(0));
5971 }
59625972 case Instruction::Select: {
59635973 SelectInst *SI = cast(I);
59645974 const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
65886598 case Instruction::FCmp:
65896599 case Instruction::FDiv:
65906600 case Instruction::FMul:
6601 case Instruction::FNeg:
65916602 case Instruction::FPExt:
65926603 case Instruction::FPToSI:
65936604 case Instruction::FPToUI:
44 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
55 target triple = "x86_64-apple-macosx10.8.0"
66
7 ; CHECK: Found an estimated cost of 2 for VF 1 For instruction: %neg = fneg float %{{.*}}
8 ; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %neg = fneg float %{{.*}}
9 ; CHECK: Found an estimated cost of 14 for VF 4 For instruction: %neg = fneg float %{{.*}}
7 ; CHECK: Found an estimated cost of 4 for VF 1 For instruction: %neg = fneg float %{{.*}}
8 ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %neg = fneg float %{{.*}}
9 ; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %neg = fneg float %{{.*}}
1010 define void @fneg_cost(float* %a, i64 %n) {
1111 entry:
1212 br label %for.body
22 define void @foo(float* %a, i64 %n) {
33 ; CHECK: vector.body:
44 ; CHECK: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
5 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
6 ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
7 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
8 ; CHECK-NEXT: [[TMP7:%.*]] = fneg float [[TMP6]]
9 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
10 ; CHECK-NEXT: [[TMP9:%.*]] = fneg float [[TMP8]]
11 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
12 ; CHECK-NEXT: [[TMP11:%.*]] = fneg float [[TMP10]]
13 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
14 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
15 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
16 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
17 ; CHECK: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
5 ; CHECK-NEXT: [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
6 ; CHECK: store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
187 ;
198 entry:
209 br label %for.body