llvm.org GIT mirror llvm / e6f7c26
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112101 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 10 years ago
5 changed file(s) with 193 addition(s) and 74 deletion(s). Raw diff Collapse all Expand all
213213 /// ValueTypeActions - For each value type, keep a LegalizeAction enum
214214 /// that indicates how instruction selection should deal with the type.
215215 uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
216
217 LegalizeAction getExtendedTypeAction(EVT VT) const {
218 // Handle non-vector integers.
219 if (!VT.isVector()) {
220 assert(VT.isInteger() && "Unsupported extended type!");
221 unsigned BitSize = VT.getSizeInBits();
222 // First promote to a power-of-two size, then expand if necessary.
223 if (BitSize < 8 || !isPowerOf2_32(BitSize))
224 return Promote;
225 return Expand;
226 }
227
228 // If this is a type smaller than a legal vector type, promote to that
229 // type, e.g. <2 x float> -> <4 x float>.
230 if (VT.getVectorElementType().isSimple() &&
231 VT.getVectorNumElements() != 1) {
232 MVT EltType = VT.getVectorElementType().getSimpleVT();
233 unsigned NumElts = VT.getVectorNumElements();
234 while (1) {
235 // Round up to the nearest power of 2.
236 NumElts = (unsigned)NextPowerOf2(NumElts);
237
238 MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
239 if (LargerVector == MVT()) break;
240
241 // If this the larger type is legal, promote to it.
242 if (getTypeAction(LargerVector) == Legal) return Promote;
243 }
244 }
245
246 return VT.isPow2VectorType() ? Expand : Promote;
247 }
216248 public:
217249 ValueTypeActionImpl() {
218250 std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
219251 }
252
253 /// FIXME: This Context argument is now dead, zap it.
220254 LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
221 if (VT.isExtended()) {
222 if (VT.isVector()) {
223 return VT.isPow2VectorType() ? Expand : Promote;
224 }
225 if (VT.isInteger())
226 // First promote to a power-of-two size, then expand if necessary.
227 return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
228 assert(0 && "Unsupported extended type!");
229 return Legal;
230 }
231 unsigned I = VT.getSimpleVT().SimpleTy;
232 return (LegalizeAction)ValueTypeActions[I];
255 return getTypeAction(VT);
233256 }
257
258 LegalizeAction getTypeAction(EVT VT) const {
259 if (!VT.isExtended())
260 return getTypeAction(VT.getSimpleVT());
261 return getExtendedTypeAction(VT);
262 }
263
264 LegalizeAction getTypeAction(MVT VT) const {
265 return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
266 }
267
268
234269 void setTypeAction(EVT VT, LegalizeAction Action) {
235270 unsigned I = VT.getSimpleVT().SimpleTy;
236271 ValueTypeActions[I] = Action;
251251 if (PartVT == ValueVT)
252252 return Val;
253253
254 if (PartVT.isVector()) // Vector/Vector bitcast.
254 if (PartVT.isVector()) {
255 // If the element type of the source/dest vectors are the same, but the
256 // parts vector has more elements than the value vector, then we have a
257 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
258 // elements we want.
259 if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
260 assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
261 "Cannot narrow, it would be a lossy transformation");
262 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
263 DAG.getIntPtrConstant(0));
264 }
265
266 // Vector/Vector bitcast.
255267 return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
268 }
256269
257270 assert(ValueVT.getVectorElementType() == PartVT &&
258271 ValueVT.getVectorNumElements() == 1 &&
391404 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
392405
393406 if (NumParts == 1) {
394 if (PartVT != ValueVT) {
395 if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
396 Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
397 } else {
398 assert(ValueVT.getVectorElementType() == PartVT &&
399 ValueVT.getVectorNumElements() == 1 &&
400 "Only trivial vector-to-scalar conversions should get here!");
401 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
402 PartVT, Val, DAG.getIntPtrConstant(0));
403 }
407 if (PartVT == ValueVT) {
408 // Nothing to do.
409 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
410 // Bitconvert vector->vector case.
411 Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
412 } else if (PartVT.isVector() &&
413 PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
414 PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
415 EVT ElementVT = PartVT.getVectorElementType();
416 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
417 // undef elements.
418 SmallVector Ops;
419 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
420 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
421 ElementVT, Val, DAG.getIntPtrConstant(i)));
422
423 for (unsigned i = ValueVT.getVectorNumElements(),
424 e = PartVT.getVectorNumElements(); i != e; ++i)
425 Ops.push_back(DAG.getUNDEF(ElementVT));
426
427 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
428
429 // FIXME: Use CONCAT for 2x -> 4x.
430
431 //SDValue UndefElts = DAG.getUNDEF(VectorTy);
432 //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
433 } else {
434 // Vector -> scalar conversion.
435 assert(ValueVT.getVectorElementType() == PartVT &&
436 ValueVT.getVectorNumElements() == 1 &&
437 "Only trivial vector-to-scalar conversions should get here!");
438 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
439 PartVT, Val, DAG.getIntPtrConstant(0));
404440 }
405441
406442 Parts[0] = Val;
427463 DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
428464 else
429465 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
430 IntermediateVT, Val,
431 DAG.getIntPtrConstant(i));
466 IntermediateVT, Val, DAG.getIntPtrConstant(i));
432467 }
433468
434469 // Split the intermediate operands into legal parts.
696696 return std::make_pair(BestRC, 1);
697697 }
698698
699
699700 /// computeRegisterProperties - Once all of the register classes are added,
700701 /// this allows us to compute derived properties we expose.
701702 void TargetLowering::computeRegisterProperties() {
781782 MVT VT = (MVT::SimpleValueType)i;
782783 if (isTypeLegal(VT)) continue;
783784
785 // Determine if there is a legal wider type. If so, we should promote to
786 // that wider vector type.
787 EVT EltVT = VT.getVectorElementType();
788 unsigned NElts = VT.getVectorNumElements();
789 if (NElts != 1) {
790 bool IsLegalWiderType = false;
791 for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
792 EVT SVT = (MVT::SimpleValueType)nVT;
793 if (SVT.getVectorElementType() == EltVT &&
794 SVT.getVectorNumElements() > NElts &&
795 isTypeSynthesizable(SVT)) {
796 TransformToType[i] = SVT;
797 RegisterTypeForVT[i] = SVT;
798 NumRegistersForVT[i] = 1;
799 ValueTypeActions.setTypeAction(VT, Promote);
800 IsLegalWiderType = true;
801 break;
802 }
803 }
804 if (IsLegalWiderType) continue;
805 }
806
784807 MVT IntermediateVT;
785808 EVT RegisterVT;
786809 unsigned NumIntermediates;
789812 RegisterVT, this);
790813 RegisterTypeForVT[i] = RegisterVT;
791814
792 // Determine if there is a legal wider type.
793 bool IsLegalWiderType = false;
794 EVT EltVT = VT.getVectorElementType();
795 unsigned NElts = VT.getVectorNumElements();
796 for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
797 EVT SVT = (MVT::SimpleValueType)nVT;
798 if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
799 SVT.getVectorNumElements() > NElts && NElts != 1) {
800 TransformToType[i] = SVT;
801 ValueTypeActions.setTypeAction(VT, Promote);
802 IsLegalWiderType = true;
803 break;
804 }
805 }
806 if (!IsLegalWiderType) {
807 EVT NVT = VT.getPow2VectorType();
808 if (NVT == VT) {
809 // Type is already a power of 2. The default action is to split.
810 TransformToType[i] = MVT::Other;
811 ValueTypeActions.setTypeAction(VT, Expand);
812 } else {
813 TransformToType[i] = NVT;
814 ValueTypeActions.setTypeAction(VT, Promote);
815 }
815 EVT NVT = VT.getPow2VectorType();
816 if (NVT == VT) {
817 // Type is already a power of 2. The default action is to split.
818 TransformToType[i] = MVT::Other;
819 ValueTypeActions.setTypeAction(VT, Expand);
820 } else {
821 TransformToType[i] = NVT;
822 ValueTypeActions.setTypeAction(VT, Promote);
816823 }
817824 }
818825
856863 EVT &IntermediateVT,
857864 unsigned &NumIntermediates,
858865 EVT &RegisterVT) const {
866 unsigned NumElts = VT.getVectorNumElements();
867
868 // If there is a wider vector type with the same element type as this one,
869 // we should widen to that legal vector type. This handles things like
870 // <2 x float> -> <4 x float>.
871 if (NumElts != 1 && getTypeAction(Context, VT) == Promote) {
872 RegisterVT = getTypeToTransformTo(Context, VT);
873 if (isTypeLegal(RegisterVT)) {
874 IntermediateVT = RegisterVT;
875 NumIntermediates = 1;
876 return 1;
877 }
878 }
879
859880 // Figure out the right, legal destination reg to copy into.
860 unsigned NumElts = VT.getVectorNumElements();
861881 EVT EltTy = VT.getVectorElementType();
862882
863883 unsigned NumVectorRegs = 1;
886906
887907 EVT DestVT = getRegisterType(Context, NewVT);
888908 RegisterVT = DestVT;
889 if (DestVT.bitsLT(NewVT)) {
890 // Value is expanded, e.g. i64 -> i16.
909 if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
891910 return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
892 } else {
893 // Otherwise, promotion or legal types use the same number of registers as
894 // the vector decimated to the appropriate level.
895 return NumVectorRegs;
896 }
897911
898 return 1;
912 // Otherwise, promotion or legal types use the same number of registers as
913 // the vector decimated to the appropriate level.
914 return NumVectorRegs;
899915 }
900916
901917 /// Get the EVTs and ArgFlags collections that represent the legalized return
99 store float %c, float* %P2
1010 ret void
1111 ; X64: test1:
12 ; X64-NEXT: addss %xmm1, %xmm0
13 ; X64-NEXT: movss %xmm0, (%rdi)
12 ; X64-NEXT: pshufd $1, %xmm0, %xmm1
13 ; X64-NEXT: addss %xmm0, %xmm1
14 ; X64-NEXT: movss %xmm1, (%rdi)
1415 ; X64-NEXT: ret
1516
1617 ; X32: test1:
17 ; X32-NEXT: movss 4(%esp), %xmm0
18 ; X32-NEXT: addss 8(%esp), %xmm0
19 ; X32-NEXT: movl 12(%esp), %eax
20 ; X32-NEXT: movss %xmm0, (%eax)
18 ; X32-NEXT: pshufd $1, %xmm0, %xmm1
19 ; X32-NEXT: addss %xmm0, %xmm1
20 ; X32-NEXT: movl 4(%esp), %eax
21 ; X32-NEXT: movss %xmm1, (%eax)
2122 ; X32-NEXT: ret
2223 }
2324
2728 ret <2 x float> %Z
2829
2930 ; X64: test2:
30 ; X64-NEXT: insertps $0
31 ; X64-NEXT: insertps $16
32 ; X64-NEXT: insertps $0
33 ; X64-NEXT: insertps $16
34 ; X64-NEXT: addps
35 ; X64-NEXT: movaps
36 ; X64-NEXT: pshufd
31 ; X64-NEXT: addps %xmm1, %xmm0
3732 ; X64-NEXT: ret
3833 }
34
35
36 define <2 x float> @test3(<4 x float> %A) nounwind {
37 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32>
38 %C = fadd <2 x float> %B, %B
39 ret <2 x float> %C
40 ; CHECK: test3:
41 ; CHECK-NEXT: addps %xmm0, %xmm0
42 ; CHECK-NEXT: ret
43 }
44
45 define <2 x float> @test4(<2 x float> %A) nounwind {
46 %C = fadd <2 x float> %A, %A
47 ret <2 x float> %C
48 ; CHECK: test4:
49 ; CHECK-NEXT: addps %xmm0, %xmm0
50 ; CHECK-NEXT: ret
51 }
52
53 define <4 x float> @test5(<4 x float> %A) nounwind {
54 %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32>
55 %C = fadd <2 x float> %B, %B
56 br label %BB
57
58 BB:
59 %D = fadd <2 x float> %C, %C
60 %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32>
61 ret <4 x float> %E
62
63 ; CHECK: _test5:
64 ; CHECK-NEXT: addps %xmm0, %xmm0
65 ; CHECK-NEXT: addps %xmm0, %xmm0
66 ; CHECK-NEXT: ret
67 }
68
69
22 ; widening shuffle v3float and then a add
33 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
44 entry:
5 ; CHECK: insertps
5 ; CHECK: shuf:
6 ; CHECK: extractps
67 ; CHECK: extractps
78 %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
89 %val = fadd <3 x float> %x, %src2
1415 ; widening shuffle v3float with a different mask and then a add
1516 define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
1617 entry:
17 ; CHECK: insertps
18 ; CHECK: shuf2:
19 ; CHECK: extractps
1820 ; CHECK: extractps
1921 %x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
2022 %val = fadd <3 x float> %x, %src2
2527 ; Example of when widening a v3float operation causes the DAG to replace a node
2628 ; with the operation that we are currently widening, i.e. when replacing
2729 ; opA with opB, the DAG will produce new operations with opA.
28 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
30 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
2931 entry:
3032 ; CHECK: pshufd
3133 %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32>