llvm.org GIT mirror llvm / 43afab3
Extended syntax of vector version of getelementptr instruction. The justification of this change is here: http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-March/082989.html According to the current GEP syntax, vector GEP requires that each index must be a vector with the same number of elements. %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets In this implementation I let each index be or vector or scalar. All vector indices must have the same number of elements. The scalar value will mean the splat vector value. (1) %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets or (2) %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset In all cases the %A type is <4 x i8*> In the case (2) we add the same offset to all pointers. The case (1) covers C[B[i]] case, when we have the same base C and different offsets B[i]. The documentation is updated. http://reviews.llvm.org/D10496 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241788 91177308-0d34-0410-b5e6-96231b3b80d8 Elena Demikhovsky 4 years ago
9 changed file(s) with 155 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
67176717
67186718 The '``getelementptr``' instruction is used to get the address of a
67196719 subelement of an :ref:`aggregate ` data structure. It performs
6720 address calculation only and does not access memory.
6720 address calculation only and does not access memory. The instruction can also
6721 be used to calculate a vector of such addresses.
67216722
67226723 Arguments:
67236724 """"""""""
68436844 ; yields i32*:iptr
68446845 %iptr = getelementptr [10 x i32], [10 x i32]* @arr, i16 0, i16 0
68456846
6846 In cases where the pointer argument is a vector of pointers, each index
6847 must be a vector with the same number of elements. For example:
6848
6849 .. code-block:: llvm
6850
6851 %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets,
6847 Vector of pointers:
6848 """""""""""""""""""
6849
6850 The ``getelementptr`` returns a vector of pointers, instead of a single address,
6851 when one or more of its arguments is a vector. In such cases, all vector
6852 arguments should have the same number of elements, and every scalar argument
6853 will be effectively broadcast into a vector during address calculation.
6854
6855 .. code-block:: llvm
6856
6857 ; All arguments are vectors:
6858 ; A[i] = ptrs[i] + offsets[i]*sizeof(i8)
6859 %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets
6860
6861 ; Add the same scalar offset to each pointer of a vector:
6862 ; A[i] = ptrs[i] + offset*sizeof(i8)
6863 %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset
6864
6865 ; Add distinct offsets to the same pointer:
6866 ; A[i] = ptr + offsets[i]*sizeof(i8)
6867 %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
6868
6869 ; In all cases described above the type of the result is <4 x i8*>
6870
6871 The two following instructions are equivalent:
6872
6873 .. code-block:: llvm
6874
6875 getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
6876 <4 x i32> ,
6877 <4 x i32> ,
6878 <4 x i32> %ind4,
6879 <4 x i64>
6880
6881 getelementptr %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
6882 i32 2, i32 1, <4 x i32> %ind4, i64 13
6883
6884 Let's look at the C code, where the vector version of ``getelementptr``
6885 makes sense:
6886
6887 .. code-block:: c
6888
6889 // Let's assume that we vectorize the following loop:
6890 double *A, B; int *C;
6891 for (int i = 0; i < size; ++i) {
6892 A[i] = B[C[i]];
6893 }
6894
6895 .. code-block:: llvm
6896
6897 ; get pointers for 8 elements from array B
6898 %ptrs = getelementptr double, double* %B, <8 x i32> %C
6899 ; load 8 elements from array B into A
6900 %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs,
6901 i32 8, <8 x i1> %mask, <8 x double> %passthru)
68526902
68536903 Conversion Operations
68546904 ---------------------
989989 Ptr->getType()->getPointerAddressSpace());
990990 // Vector GEP
991991 if (Ptr->getType()->isVectorTy()) {
992 unsigned NumElem = cast(Ptr->getType())->getNumElements();
992 unsigned NumElem = Ptr->getType()->getVectorNumElements();
993993 return VectorType::get(PtrTy, NumElem);
994994 }
995
995 for (Value *Index : IdxList)
996 if (Index->getType()->isVectorTy()) {
997 unsigned NumElem = Index->getType()->getVectorNumElements();
998 return VectorType::get(PtrTy, NumElem);
999 }
9961000 // Scalar GEP
9971001 return PtrTy;
9981002 }
28722872 if (ValTy->isVectorTy() != BaseType->isVectorTy())
28732873 return Error(ID.Loc, "getelementptr index type missmatch");
28742874 if (ValTy->isVectorTy()) {
2875 unsigned ValNumEl = cast(ValTy)->getNumElements();
2876 unsigned PtrNumEl = cast(BaseType)->getNumElements();
2875 unsigned ValNumEl = ValTy->getVectorNumElements();
2876 unsigned PtrNumEl = BaseType->getVectorNumElements();
28772877 if (ValNumEl != PtrNumEl)
28782878 return Error(
28792879 ID.Loc,
55715571
55725572 SmallVector Indices;
55735573 bool AteExtraComma = false;
5574 // GEP returns a vector of pointers if at least one of parameters is a vector.
5575 // All vector parameters should have the same vector width.
5576 unsigned GEPWidth = BaseType->isVectorTy() ?
5577 BaseType->getVectorNumElements() : 0;
5578
55745579 while (EatIfPresent(lltok::comma)) {
55755580 if (Lex.getKind() == lltok::MetadataVar) {
55765581 AteExtraComma = true;
55795584 if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
55805585 if (!Val->getType()->getScalarType()->isIntegerTy())
55815586 return Error(EltLoc, "getelementptr index must be an integer");
5582 if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
5583 return Error(EltLoc, "getelementptr index type missmatch");
5587
55845588 if (Val->getType()->isVectorTy()) {
5585 unsigned ValNumEl = cast(Val->getType())->getNumElements();
5586 unsigned PtrNumEl = cast(Ptr->getType())->getNumElements();
5587 if (ValNumEl != PtrNumEl)
5589 unsigned ValNumEl = Val->getType()->getVectorNumElements();
5590 if (GEPWidth && GEPWidth != ValNumEl)
55885591 return Error(EltLoc,
55895592 "getelementptr vector index has a wrong number of elements");
5593 GEPWidth = ValNumEl;
55905594 }
55915595 Indices.push_back(Val);
55925596 }
27862786 SDValue N = getValue(Op0);
27872787 SDLoc dl = getCurSDLoc();
27882788
2789 // Normalize Vector GEP - all scalar operands should be converted to the
2790 // splat vector.
2791 unsigned VectorWidth = I.getType()->isVectorTy() ?
2792 cast(I.getType())->getVectorNumElements() : 0;
2793
2794 if (VectorWidth && !N.getValueType().isVector()) {
2795 MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
2796 SmallVector Ops(VectorWidth, N);
2797 N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
2798 }
27892799 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
27902800 OI != E; ++OI) {
27912801 const Value *Idx = *OI;
28062816 unsigned PtrSize = PtrTy.getSizeInBits();
28072817 APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
28082818
2809 // If this is a constant subscript, handle it quickly.
2810 if (const auto *CI = dyn_cast(Idx)) {
2819 // If this is a scalar constant or a splat vector of constants,
2820 // handle it quickly.
2821 const auto *CI = dyn_cast(Idx);
2822 if (!CI && isa(Idx) &&
2823 cast(Idx)->getSplatValue())
2824 CI = cast(cast(Idx)->getSplatValue());
2825
2826 if (CI) {
28112827 if (CI->isZero())
28122828 continue;
28132829 APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
2814 SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
2830 SDValue OffsVal = VectorWidth ?
2831 DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
2832 DAG.getConstant(Offs, dl, PtrTy);
28152833 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
28162834 continue;
28172835 }
28192837 // N = N + Idx * ElementSize;
28202838 SDValue IdxN = getValue(Idx);
28212839
2840 if (!IdxN.getValueType().isVector() && VectorWidth) {
2841 MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
2842 SmallVector Ops(VectorWidth, IdxN);
2843 IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
2844 }
28222845 // If the index is smaller or larger than intptr_t, truncate or extend
28232846 // it.
28242847 IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
25372537 Assert(isa(TargetTy),
25382538 "GEP base pointer is not a vector or a vector of pointers", &GEP);
25392539 Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
2540 Assert(GEP.getPointerOperandType()->isVectorTy() ==
2541 GEP.getType()->isVectorTy(),
2542 "Vector GEP must return a vector value", &GEP);
2543
25442540 SmallVector Idxs(GEP.idx_begin(), GEP.idx_end());
25452541 Type *ElTy =
25462542 GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
25502546 GEP.getResultElementType() == ElTy,
25512547 "GEP is not of right type for indices!", &GEP, ElTy);
25522548
2553 if (GEP.getPointerOperandType()->isVectorTy()) {
2549 if (GEP.getType()->isVectorTy()) {
25542550 // Additional checks for vector GEPs.
2555 unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
2556 Assert(GepWidth == GEP.getType()->getVectorNumElements(),
2557 "Vector GEP result width doesn't match operand's", &GEP);
2551 unsigned GEPWidth = GEP.getType()->getVectorNumElements();
2552 if (GEP.getPointerOperandType()->isVectorTy())
2553 Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(),
2554 "Vector GEP result width doesn't match operand's", &GEP);
25582555 for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
25592556 Type *IndexTy = Idxs[i]->getType();
2560 Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
2561 &GEP);
2562 unsigned IndexWidth = IndexTy->getVectorNumElements();
2563 Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
2557 if (IndexTy->isVectorTy()) {
2558 unsigned IndexWidth = IndexTy->getVectorNumElements();
2559 Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
2560 }
2561 Assert(IndexTy->getScalarType()->isIntegerTy(),
2562 "All GEP indices should be of integer type");
25642563 }
25652564 }
25662565 visitInstruction(GEP);
0 ; RUN: not llvm-as < %s >/dev/null 2> %t
11 ; RUN: FileCheck %s < %t
2 ; Test that a vector index is only used with a vector pointer.
2 ; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
33
4 ; CHECK: getelementptr index type missmatch
4 ; CHECK: '%w' defined with type '<2 x i32*>
55
66 define i32 @test(i32* %a) {
77 %w = getelementptr i32, i32* %a, <2 x i32>
0 ; RUN: not llvm-as < %s >/dev/null 2> %t
11 ; RUN: FileCheck %s < %t
2 ; Test that a vector pointer is only used with a vector index.
2 ; Test that a vector pointer may be used with a scalar index.
3 ; Test that a vector pointer and vector index should have the same vector width
34
4 ; CHECK: getelementptr index type missmatch
5 ; This code is correct
6 define <2 x i32*> @test2(<2 x i32*> %a) {
7 %w = getelementptr i32, <2 x i32*> %a, i32 2
8 ret <2 x i32*> %w
9 }
510
6 define <2 x i32> @test(<2 x i32*> %a) {
7 %w = getelementptr i32, <2 x i32*> %a, i32 2
11 ; This code is correct
12 define <2 x i32*> @test3(i32* %a) {
13 %w = getelementptr i32, i32* %a, <2 x i32>
14 ret <2 x i32*> %w
15 }
16
17 ; CHECK: getelementptr vector index has a wrong number of elements
18
19 define <2 x i32> @test1(<2 x i32*> %a) {
20 %w = getelementptr i32, <2 x i32*> %a, <4 x i32>
821 ret <2 x i32> %w
922 }
23
0 ; RUN: not llvm-as < %s >/dev/null 2> %t
11 ; RUN: FileCheck %s < %t
2 ; Test that vector indices have the same number of elements as the pointer.
2 ; Test that a vector GEP may be used with a scalar base, the result is a vector of pointers
33
4 ; CHECK: getelementptr index type missmatch
4 ; CHECK: '%w' defined with type '<2 x <4 x i32>*>'
55
66 define <4 x i32> @test(<4 x i32>* %a) {
77 %w = getelementptr <4 x i32>, <4 x i32>* %a, <2 x i32>
9191 ;CHECK: ret
9292 }
9393
94 ;CHECK-LABEL: AGEP7:
95 define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind {
96 entry:
97 ;CHECK: vbroadcastss
98 ;CHECK: vpadd
99 %A = getelementptr i8, <4 x i8*> %param, i32 %off
100 ret <4 x i8*> %A
101 ;CHECK: ret
102 }
103
104 ;CHECK-LABEL: AGEP8:
105 define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind {
106 entry:
107 ; Multiply offset by two (add it to itself).
108 ;CHECK: vpadd
109 ; add the base to the offset
110 ;CHECK: vbroadcastss
111 ;CHECK-NEXT: vpadd
112 %A = getelementptr i16, i16* %param, <4 x i32> %off
113 ret <4 x i16*> %A
114 ;CHECK: ret
115 }