llvm.org GIT mirror llvm / ea59a24
Support arbitrary address space pointers in masked gather/scatter intrinsics. Fixes PR31789 - When loop-vectorize tries to use these intrinsics for a non-default address space pointer we fail with a "Calling a function with a bad singature!" assertion. This patch solves this by adding the 'vector of pointers' argument as an overloaded type which will determine the address space. Differential revision: https://reviews.llvm.org/D31490 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302018 91177308-0d34-0410-b5e6-96231b3b80d8 Elad Cohen 3 years ago
19 changed file(s) with 560 addition(s) and 175 deletion(s). Raw diff Collapse all Expand all
79147914 ; get pointers for 8 elements from array B
79157915 %ptrs = getelementptr double, double* %B, <8 x i32> %C
79167916 ; load 8 elements from array B into A
7917 %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs,
7917 %A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs,
79187918 i32 8, <8 x i1> %mask, <8 x double> %passthru)
79197919
79207920 Conversion Operations
1202312023
1202412024 ::
1202512025
12026 declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> )
12027 declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> , i32 , <2 x i1> , <2 x double> )
12028 declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> , i32 , <8 x i1> , <8 x float*> )
12026 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> )
12027 declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64 (<2 x double addrspace(1)*> , i32 , <2 x i1> , <2 x double> )
12028 declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> , i32 , <8 x i1> , <8 x float*> )
1202912029
1203012030 Overview:
1203112031 """""""""
1204812048
1204912049 ::
1205012050
12051 %res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> , <4 x double> undef)
12051 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> , <4 x double> undef)
1205212052
1205312053 ;; The gather with all-true mask is equivalent to the following instruction sequence
1205412054 %ptr0 = extractelement <4 x double*> %ptrs, i32 0
1207712077
1207812078 ::
1207912079
12080 declare void @llvm.masked.scatter.v8i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
12081 declare void @llvm.masked.scatter.v16f32 (<16 x float> , <16 x float*> , i32 , <16 x i1> )
12082 declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> , <4 x double**> , i32 , <4 x i1> )
12080 declare void @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
12081 declare void @llvm.masked.scatter.v16f32.v16p1f32 (<16 x float> , <16 x float addrspace(1)*> , i32 , <16 x i1> )
12082 declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> , <4 x double**> , i32 , <4 x i1> )
1208312083
1208412084 Overview:
1208512085 """""""""
1210012100 ::
1210112101
1210212102 ;; This instruction unconditionally stores data vector in multiple addresses
12103 call @llvm.masked.scatter.v8i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> )
12103 call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> )
1210412104
1210512105 ;; It is equivalent to a list of scalar stores
1210612106 %val0 = extractelement <8 x i32> %value, i32 0
9999 Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
100100 Integer, Vector, Pointer, Struct,
101101 Argument, ExtendArgument, TruncArgument, HalfVecArgument,
102 SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt
102 SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
103103 } Kind;
104104
105105 union {
118118 AK_AnyVector,
119119 AK_AnyPointer
120120 };
121
121122 unsigned getArgumentNumber() const {
122123 assert(Kind == Argument || Kind == ExtendArgument ||
123124 Kind == TruncArgument || Kind == HalfVecArgument ||
124125 Kind == SameVecWidthArgument || Kind == PtrToArgument ||
125 Kind == PtrToElt || Kind == VecOfPtrsToElt);
126 Kind == PtrToElt);
126127 return Argument_Info >> 3;
127128 }
128129 ArgKind getArgumentKind() const {
129130 assert(Kind == Argument || Kind == ExtendArgument ||
130131 Kind == TruncArgument || Kind == HalfVecArgument ||
131 Kind == SameVecWidthArgument || Kind == PtrToArgument ||
132 Kind == VecOfPtrsToElt);
132 Kind == SameVecWidthArgument || Kind == PtrToArgument);
133133 return (ArgKind)(Argument_Info & 7);
134 }
135
136 // VecOfAnyPtrsToElt uses both an overloaded argument (for address space)
137 // and a reference argument (for matching vector width and element types)
138 unsigned getOverloadArgNumber() const {
139 assert(Kind == VecOfAnyPtrsToElt);
140 return Argument_Info >> 16;
141 }
142 unsigned getRefArgNumber() const {
143 assert(Kind == VecOfAnyPtrsToElt);
144 return Argument_Info & 0xFFFF;
134145 }
135146
136147 static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
137148 IITDescriptor Result = { K, { Field } };
149 return Result;
150 }
151
152 static IITDescriptor get(IITDescriptorKind K, unsigned short Hi,
153 unsigned short Lo) {
154 unsigned Field = Hi << 16 | Lo;
155 IITDescriptor Result = {K, {Field}};
138156 return Result;
139157 }
140158 };
154154 }
155155 class LLVMPointerTo : LLVMMatchType;
156156 class LLVMPointerToElt : LLVMMatchType;
157 class LLVMVectorOfPointersToElt : LLVMMatchType;
157 class LLVMVectorOfAnyPointersToElt : LLVMMatchType;
158158
159159 // Match the type of another intrinsic parameter that is expected to be a
160160 // vector type, but change the element count to be half as many
760760 [IntrReadMem, IntrArgMemOnly]>;
761761
762762 def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
763 [LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
763 [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
764764 LLVMVectorSameWidth<0, llvm_i1_ty>,
765765 LLVMMatchType<0>],
766766 [IntrReadMem]>;
767767
768768 def int_masked_scatter: Intrinsic<[],
769769 [llvm_anyvector_ty,
770 LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
770 LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
771771 LLVMVectorSameWidth<0, llvm_i1_ty>]>;
772772
773773 def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
466466 return true;
467467 }
468468 }
469 // Renaming gather/scatter intrinsics with no address space overloading
470 // to the new overload which includes an address space
471 if (Name.startswith("masked.gather.")) {
472 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
473 if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
474 rename(F);
475 NewFn = Intrinsic::getDeclaration(F->getParent(),
476 Intrinsic::masked_gather, Tys);
477 return true;
478 }
479 }
480 if (Name.startswith("masked.scatter.")) {
481 auto Args = F->getFunctionType()->params();
482 Type *Tys[] = {Args[0], Args[1]};
483 if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
484 rename(F);
485 NewFn = Intrinsic::getDeclaration(F->getParent(),
486 Intrinsic::masked_scatter, Tys);
487 return true;
488 }
489 }
469490 break;
470491 }
471492 case 'n': {
20712092 case Intrinsic::invariant_start:
20722093 case Intrinsic::invariant_end:
20732094 case Intrinsic::masked_load:
2074 case Intrinsic::masked_store: {
2095 case Intrinsic::masked_store:
2096 case Intrinsic::masked_gather:
2097 case Intrinsic::masked_scatter: {
20752098 SmallVector Args(CI->arg_operands().begin(),
20762099 CI->arg_operands().end());
20772100 NewCall = Builder.CreateCall(NewFn, Args);
573573 IIT_SAME_VEC_WIDTH_ARG = 31,
574574 IIT_PTR_TO_ARG = 32,
575575 IIT_PTR_TO_ELT = 33,
576 IIT_VEC_OF_PTRS_TO_ELT = 34,
576 IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
577577 IIT_I128 = 35,
578578 IIT_V512 = 36,
579579 IIT_V1024 = 37
580580 };
581
582581
583582 static void DecodeIITType(unsigned &NextElt, ArrayRef Infos,
584583 SmallVectorImpl &OutputTable) {
715714 OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
716715 return;
717716 }
718 case IIT_VEC_OF_PTRS_TO_ELT: {
719 unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
720 OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
721 ArgInfo));
717 case IIT_VEC_OF_ANYPTRS_TO_ELT: {
718 unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
719 unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
720 OutputTable.push_back(
721 IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo));
722722 return;
723723 }
724724 case IIT_EMPTYSTRUCT:
807807 Elts[i] = DecodeFixedType(Infos, Tys, Context);
808808 return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements));
809809 }
810
811810 case IITDescriptor::Argument:
812811 return Tys[D.getArgumentNumber()];
813812 case IITDescriptor::ExtendArgument: {
849848 Type *EltTy = VTy->getVectorElementType();
850849 return PointerType::getUnqual(EltTy);
851850 }
852 case IITDescriptor::VecOfPtrsToElt: {
853 Type *Ty = Tys[D.getArgumentNumber()];
854 VectorType *VTy = dyn_cast(Ty);
855 if (!VTy)
856 llvm_unreachable("Expected an argument of Vector Type");
857 Type *EltTy = VTy->getVectorElementType();
858 return VectorType::get(PointerType::getUnqual(EltTy),
859 VTy->getNumElements());
860 }
851 case IITDescriptor::VecOfAnyPtrsToElt:
852 // Return the overloaded type (which determines the pointers address space)
853 return Tys[D.getOverloadArgNumber()];
861854 }
862855 llvm_unreachable("unhandled");
863856 }
10531046 return (!ThisArgType || !ReferenceType ||
10541047 ThisArgType->getElementType() != ReferenceType->getElementType());
10551048 }
1056 case IITDescriptor::VecOfPtrsToElt: {
1057 if (D.getArgumentNumber() >= ArgTys.size())
1049 case IITDescriptor::VecOfAnyPtrsToElt: {
1050 unsigned RefArgNumber = D.getRefArgNumber();
1051
1052 // This may only be used when referring to a previous argument.
1053 if (RefArgNumber >= ArgTys.size())
10581054 return true;
1059 VectorType * ReferenceType =
1060 dyn_cast (ArgTys[D.getArgumentNumber()]);
1055
1056 // Record the overloaded type
1057 assert(D.getOverloadArgNumber() == ArgTys.size() &&
1058 "Table consistency error");
1059 ArgTys.push_back(Ty);
1060
1061 // Verify the overloaded type "matches" the Ref type.
1062 // i.e. Ty is a vector with the same width as Ref.
1063 // Composed of pointers to the same element type as Ref.
1064 VectorType *ReferenceType = dyn_cast(ArgTys[RefArgNumber]);
10611065 VectorType *ThisArgVecTy = dyn_cast(Ty);
10621066 if (!ThisArgVecTy || !ReferenceType ||
10631067 (ReferenceType->getVectorNumElements() !=
292292 Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
293293 NumElts));
294294
295 Type *OverloadedTypes[] = {DataTy, PtrsTy};
295296 Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)};
296297
297298 // We specify only one type when we create this intrinsic. Types of other
298299 // arguments are derived from this type.
299 return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name);
300 return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes,
301 Name);
300302 }
301303
302304 /// \brief Create a call to a Masked Scatter intrinsic.
322324 if (!Mask)
323325 Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
324326 NumElts));
327
328 Type *OverloadedTypes[] = {DataTy, PtrsTy};
325329 Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask};
326330
327331 // We specify only one type when we create this intrinsic. Types of other
328332 // arguments are derived from this type.
329 return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy });
333 return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes);
330334 }
331335
332336 template
7777 ; SKX-LABEL: test_gather_2f64
7878 ; SKX: Found an estimated cost of 7 {{.*}}.gather
7979
80 %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
80 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
8181 ret <2 x double> %res
8282 }
83 declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
83 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
8484
8585 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
8686
9393 ; SKX-LABEL: test_gather_4i32
9494 ; SKX: Found an estimated cost of 6 {{.*}}.gather
9595
96 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
96 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
9797 ret <4 x i32> %res
9898 }
9999
108108 ; SKX-LABEL: test_gather_4i32_const_mask
109109 ; SKX: Found an estimated cost of 6 {{.*}}.gather
110110
111 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0)
111 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0)
112112 ret <4 x i32> %res
113113 }
114 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
114 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
115115
116116 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
117117
127127 %sext_ind = sext <16 x i32> %ind to <16 x i64>
128128 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
129129
130 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef)
130 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef)
131131 ret <16 x float>%res
132132 }
133133
145145 %sext_ind = sext <16 x i32> %ind to <16 x i64>
146146 %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
147147
148 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
148 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
149149 ret <16 x float>%res
150150 }
151151
163163 %sext_ind = sext <16 x i32> %ind to <16 x i64>
164164 %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
165165
166 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
166 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
167167 ret <16 x float>%res
168168 }
169169
184184 %sext_ind = sext <16 x i32> %ind to <16 x i64>
185185 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
186186
187 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
187 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
188188 ret <16 x float>%res
189189 }
190190
203203
204204 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
205205 %imask = bitcast i16 %mask to <16 x i1>
206 call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
206 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
207207 ret void
208208 }
209209
217217 ; SKX-LABEL: test_scatter_8i32
218218 ; SKX: Found an estimated cost of 10 {{.*}}.scatter
219219
220 call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
221 ret void
222 }
223
224 declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
220 call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
221 ret void
222 }
223
224 declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
225225
226226 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
227227 ; AVX2-LABEL: test_scatter_4i32
233233 ; SKX-LABEL: test_scatter_4i32
234234 ; SKX: Found an estimated cost of 6 {{.*}}.scatter
235235
236 call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
236 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
237237 ret void
238238 }
239239
251251 %sext_ind = sext <4 x i32> %ind to <4 x i64>
252252 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
253253
254 %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
254 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
255255 ret <4 x float>%res
256256 }
257257
269269 %sext_ind = sext <4 x i32> %ind to <4 x i64>
270270 %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
271271
272 %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef)
272 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef)
273273 ret <4 x float>%res
274274 }
275275
276 declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
277 declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
278 declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
279 declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
276 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
277 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
278 declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
279 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
280280
281281 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
282282 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
22 %struct.S = type { [1000 x i32] }
33
44
5 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
5 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
66
77 define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
88 %temp = insertelement <4 x i64> undef, i64 %base, i32 0
1111 %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
1212 ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
1313 %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
14 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef)
14 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef)
1515 ret <4 x i32> %res
1616 }
8484 ret void
8585 }
8686
87 declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
88
89 define <2 x double> @tests.masked.gather(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %passthru) {
90 ; CHECK-LABEL: @tests.masked.gather(
91 ; CHECK: @llvm.masked.gather.v2f64.v2p0f64
92 %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru)
93 ret <2 x double> %res
94 }
95
96 declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
97
98 define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val) {
99 ; CHECK-LABEL: @tests.masked.scatter(
100 ; CHECK: @llvm.masked.scatter.v2f64.v2p0f64
101 call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask)
102 ret void
103 }
87104
88105 declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
89106 declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind
5353 %sext_ind = sext <16 x i32> %ind to <16 x i64>
5454 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
5555
56 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
56 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
5757 ret <16 x float>%res
5858 }
5959
60 declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
61 declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
62 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
60 declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
61 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
62 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
6363
6464
6565 ; SCALAR-LABEL: test2
110110 %sext_ind = sext <16 x i32> %ind to <16 x i64>
111111 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
112112 %imask = bitcast i16 %mask to <16 x i1>
113 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
113 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
114114 ret <16 x float> %res
115115 }
116116
151151 %sext_ind = sext <16 x i32> %ind to <16 x i64>
152152 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
153153 %imask = bitcast i16 %mask to <16 x i1>
154 %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
154 %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
155155 ret <16 x i32> %res
156156 }
157157
204204
205205 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
206206 %imask = bitcast i16 %mask to <16 x i1>
207 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
208 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
207 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
208 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
209209 %res = add <16 x i32> %gt1, %gt2
210210 ret <16 x i32> %res
211211 }
269269
270270 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
271271 %imask = bitcast i16 %mask to <16 x i1>
272 call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
273 call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
272 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
273 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
274274 ret void
275275 }
276276
277 declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
278 declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
277 declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
278 declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
279279
280280
281281 ; SCALAR-LABEL: test6
325325 ; SKX_32-NEXT: vmovdqa %ymm2, %ymm0
326326 ; SKX_32-NEXT: retl
327327
328 %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> , <8 x i32> undef)
329
330 call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> )
328 %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> , <8 x i32> undef)
329
330 call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> )
331331 ret <8 x i32>%a
332332 }
333333
383383
384384 %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
385385 %imask = bitcast i8 %mask to <8 x i1>
386 %gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
387 %gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
386 %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
387 %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
388388 %res = add <8 x i32> %gt1, %gt2
389389 ret <8 x i32> %res
390390 }
443443 ; SKX_32-NEXT: retl
444444
445445 %imask = bitcast i16 %mask to <16 x i1>
446 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
447 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
446 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
447 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
448448 %res = add <16 x i32> %gt1, %gt2
449449 ret <16 x i32> %res
450450 }
521521 %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
522522
523523 %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> , <8 x i32>, <8 x i32> %ind5, <8 x i64>
524 %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef)
524 %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef)
525525 ret <8 x i32> %res
526526 }
527527
590590 %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
591591
592592 %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
593 %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef)
593 %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> , <8 x i32> undef)
594594 ret <8 x i32> %res
595595 }
596596
631631
632632 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
633633
634 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
634 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
635635 ret <16 x float>%res
636636 }
637637
670670 %sext_ind = sext <16 x i32> %ind to <16 x i64>
671671 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
672672
673 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
673 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
674674 ret <16 x float>%res
675675 }
676676
709709 %sext_ind = sext <16 x i32> %ind to <16 x i64>
710710 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
711711
712 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
712 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
713713 ret <16 x float>%res
714714 }
715715
771771
772772 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
773773
774 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
774 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
775775 ret <16 x float>%res
776776 }
777777
778 declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
779 declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
780 declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
778 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
779 declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
780 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
781781
782782 ; Gather smaller than existing instruction
783783 define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
830830
831831 %sext_ind = sext <4 x i32> %ind to <4 x i64>
832832 %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
833 %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
833 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
834834 ret <4 x float>%res
835835 }
836836
889889
890890 %sext_ind = sext <4 x i32> %ind to <4 x i64>
891891 %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
892 %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
892 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
893893 ret <4 x double>%res
894894 }
895895
941941
942942 %sext_ind = sext <2 x i32> %ind to <2 x i64>
943943 %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
944 %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
944 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
945945 ret <2 x double>%res
946946 }
947947
948 declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
949 declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
950 declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
951 declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
952 declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
948 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
949 declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
950 declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
951 declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
952 declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
953953
954954 define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
955955 ;
994994 ; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1
995995 ; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
996996 ; SKX_32-NEXT: retl
997 call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
997 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
998998 ret void
999999 }
10001000
10481048 ; SKX_32-NEXT: vzeroupper
10491049 ; SKX_32-NEXT: retl
10501050 %gep = getelementptr double, double* %ptr, <4 x i64> %ind
1051 call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
1051 call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
10521052 ret void
10531053 }
10541054
11021102 ; SKX_32-NEXT: kshiftrb $6, %k0, %k1
11031103 ; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
11041104 ; SKX_32-NEXT: retl
1105 call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
1105 call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
11061106 ret void
11071107 }
11081108
11561156 ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
11571157 ; SKX_32-NEXT: vzeroupper
11581158 ; SKX_32-NEXT: retl
1159 call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
1159 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
11601160 ret void
11611161 }
11621162
11631163 ; The result type requires widening
1164 declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
1164 declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
11651165
11661166 define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
11671167 ;
12211221 ; SKX_32-NEXT: retl
12221222 %sext_ind = sext <2 x i32> %ind to <2 x i64>
12231223 %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
1224 %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
1224 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
12251225 ret <2 x float>%res
12261226 }
12271227
1228 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
1229 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
1228 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
1229 declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
12301230
12311231 define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
12321232 ;
12751275 ; SKX_32-NEXT: retl
12761276 %sext_ind = sext <2 x i32> %ind to <2 x i64>
12771277 %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
1278 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
1278 %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
12791279 ret <2 x i32>%res
12801280 }
12811281
13191319 ; SKX_32-NEXT: retl
13201320 %sext_ind = sext <2 x i32> %ind to <2 x i64>
13211321 %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
1322 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> , <2 x i32> undef)
1322 %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> , <2 x i32> undef)
13231323 ret <2 x i32>%res
13241324 }
13251325
13701370 ; SKX_32-NEXT: retl
13711371 %sext_ind = sext <2 x i32> %ind to <2 x i64>
13721372 %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
1373 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
1373 %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
13741374 ret <2 x i64>%res
13751375 }
13761376
14171417 ; SKX_32-NEXT: retl
14181418 %sext_ind = sext <2 x i32> %ind to <2 x i64>
14191419 %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
1420 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> , <2 x i64> %src0)
1420 %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> , <2 x i64> %src0)
14211421 ret <2 x i64>%res
14221422 }
14231423
14651465 ; SKX_32-NEXT: retl
14661466 %sext_ind = sext <2 x i32> %ind to <2 x i64>
14671467 %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
1468 %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> , <2 x float> undef)
1468 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> , <2 x float> undef)
14691469 ret <2 x float>%res
14701470 }
14711471
15141514 ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
15151515 ; SKX_32-NEXT: vzeroupper
15161516 ; SKX_32-NEXT: retl
1517 call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> )
1517 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> )
15181518 ret void
15191519 }
15201520
15671567 %sext_ind = sext <16 x i32> %ind to <16 x i64>
15681568 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
15691569
1570 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
1570 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef)
15711571 ret <16 x float>%res
15721572 }
15731573
15741574 ; Check non-power-of-2 case. It should be scalarized.
1575 declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
1575 declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
15761576 define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
15771577 ; ALL-LABEL: test30:
15781578 ; ALL-NOT: gather
15791579
15801580 %sext_ind = sext <3 x i32> %ind to <3 x i64>
15811581 %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
1582 %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
1582 %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
15831583 ret <3 x i32>%res
15841584 }
15851585
1586 declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
1586 declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
15871587
15881588 ; KNL-LABEL: test31
15891589 ; KNL: vpgatherqq
16251625 ; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
16261626 ; SKX_32-NEXT: retl
16271627
1628 %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> , <16 x float*> undef)
1628 %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> , <16 x float*> undef)
16291629 ret <16 x float*>%res
16301630 }
16311631
16711671 ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
16721672 ; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
16731673 ; SKX_32-NEXT: retl
1674 %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
1674 %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
16751675 ret <16 x i32> %res
16761676 }
16771677 define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
17481748 ; SKX_32-NEXT: movl %ebp, %esp
17491749 ; SKX_32-NEXT: popl %ebp
17501750 ; SKX_32-NEXT: retl
1751 %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
1751 %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
17521752 ret <16 x i64> %res
17531753 }
1754 declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
1754 declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
17551755 define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
17561756 ; KNL_64-LABEL: test_gather_16f32:
17571757 ; KNL_64: # BB#0:
17941794 ; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
17951795 ; SKX_32-NEXT: vmovaps %zmm2, %zmm0
17961796 ; SKX_32-NEXT: retl
1797 %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
1797 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
17981798 ret <16 x float> %res
17991799 }
18001800 define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
18711871 ; SKX_32-NEXT: movl %ebp, %esp
18721872 ; SKX_32-NEXT: popl %ebp
18731873 ; SKX_32-NEXT: retl
1874 %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
1874 %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
18751875 ret <16 x double> %res
18761876 }
1877 declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
1877 declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
18781878 define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
18791879 ; KNL_64-LABEL: test_scatter_16i32:
18801880 ; KNL_64: # BB#0:
19171917 ; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
19181918 ; SKX_32-NEXT: vzeroupper
19191919 ; SKX_32-NEXT: retl
1920 call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
1920 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
19211921 ret void
19221922 }
19231923 define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
19921992 ; SKX_32-NEXT: popl %ebp
19931993 ; SKX_32-NEXT: vzeroupper
19941994 ; SKX_32-NEXT: retl
1995 call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
1995 call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
19961996 ret void
19971997 }
1998 declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
1998 declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
19991999 define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
20002000 ; KNL_64-LABEL: test_scatter_16f32:
20012001 ; KNL_64: # BB#0:
20382038 ; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
20392039 ; SKX_32-NEXT: vzeroupper
20402040 ; SKX_32-NEXT: retl
2041 call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
2041 call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
20422042 ret void
20432043 }
2044 declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
2044 declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
20452045 define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
20462046 ; KNL_64-LABEL: test_scatter_16f64:
20472047 ; KNL_64: # BB#0:
21142114 ; SKX_32-NEXT: popl %ebp
21152115 ; SKX_32-NEXT: vzeroupper
21162116 ; SKX_32-NEXT: retl
2117 call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
2117 call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
21182118 ret void
21192119 }
2120 declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
2120 declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
21212121
21222122 define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) {
21232123 ; KNL_64-LABEL: test_pr28312:
21922192 ; SKX_32-NEXT: movl %ebp, %esp
21932193 ; SKX_32-NEXT: popl %ebp
21942194 ; SKX_32-NEXT: retl
2195 %g1 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
2196 %g2 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
2197 %g3 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
2195 %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
2196 %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
2197 %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
21982198 %a = add <4 x i64> %g1, %g2
21992199 %b = add <4 x i64> %a, %g3
22002200 ret <4 x i64> %b
22012201 }
2202 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
2202 declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -functionattrs -S | FileCheck %s
12 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s
23 @x = global i32 0
6768 }
6869
6970 ; CHECK: declare void @llvm.masked.scatter
70 declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
71 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
7172
7273 ; CHECK-NOT: readnone
7374 ; CHECK-NOT: readonly
7475 ; CHECK: define void @test9
7576 define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
76 call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1>)
77 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1>)
7778 ret void
7879 }
7980
8081 ; CHECK: declare <4 x i32> @llvm.masked.gather
81 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
82 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
8283 ; CHECK: readonly
8384 ; CHECK: define <4 x i32> @test10
8485 define <4 x i32> @test10(<4 x i32*> %ptrs) {
85 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1>, <4 x i32>undef)
86 %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1>, <4 x i32>undef)
8687 ret <4 x i32> %res
8788 }
8889
0 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
11
2 declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
3 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
2 declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
3 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
44
55 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
66 ; do not have pointer aliasing ignored when being processed.
1919 %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
2020 %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
2121 ; Read from in1 and in2
22 %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1
23 %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1
22 %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1
23 %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1
2424 ; Store in1 to the allocas
25 call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
25 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
2626 ; Read in1 from the allocas
2727 ; This gather should alias the scatter we just saw
28 %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
28 %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
2929 ; Store in2 to the allocas
30 call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
30 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
3131 ; Read in2 from the allocas
3232 ; This gather should alias the scatter we just saw, and not be eliminated
33 %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
33 %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
3434 ; Store in2 to out for good measure
3535 %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
3636 %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
11
22 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
33 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
4 declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
5 declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
4 declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
5 declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
66
77 define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
88 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
4848 }
4949
5050 define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) {
51 %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
51 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
5252 ret <2 x double> %res
5353
5454 ; CHECK-LABEL: @gather_zeromask(
5656 }
5757
5858 define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) {
59 call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
59 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
6060 ret void
6161
6262 ; CHECK-LABEL: @scatter_zeromask(
3535 ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4
3636 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32>
3737 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]]
38 ; CHECK-NEXT: call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> )
38 ; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> )
3939 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
4040 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]],
4141 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
1616 ;}
1717
1818 ;AVX512-LABEL: @foo1
19 ;AVX512: llvm.masked.load.v16i32
20 ;AVX512: llvm.masked.gather.v16f32
21 ;AVX512: llvm.masked.store.v16f32
19 ;AVX512: llvm.masked.load.v16i32.p0v16i32
20 ;AVX512: llvm.masked.gather.v16f32.v16p0f32
21 ;AVX512: llvm.masked.store.v16f32.p0v16f32
2222 ;AVX512: ret void
2323
2424 ; Function Attrs: nounwind uwtable
9595
9696 ;AVX512-LABEL: @foo2
9797 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
98 ;AVX512: llvm.masked.gather.v16f32
99 ;AVX512: llvm.masked.scatter.v16f32
98 ;AVX512: llvm.masked.gather.v16f32.v16p0f32
99 ;AVX512: llvm.masked.scatter.v16f32.v16p0f32
100100 ;AVX512: ret void
101101 define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
102102 entry:
170170
171171 ;AVX512-LABEL: @foo3
172172 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
173 ;AVX512: llvm.masked.gather.v16f32
173 ;AVX512: llvm.masked.gather.v16f32.v16p0f32
174174 ;AVX512: fadd <16 x float>
175175 ;AVX512: getelementptr inbounds %struct.Out, %struct.Out* %out, <16 x i64> {{.*}}, i32 1
176 ;AVX512: llvm.masked.scatter.v16f32
176 ;AVX512: llvm.masked.scatter.v16f32.v16p0f32
177177 ;AVX512: ret void
178178
179179 %struct.Out = type { float, float }
232232 for.end: ; preds = %for.cond
233233 ret void
234234 }
235 declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
235 declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
236
237 ; The same as @foo2 but scatter/gather argument is a vecotr of ptrs with addresspace 1
238
239 ;AVX512-LABEL: @foo2_addrspace
240 ;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
241 ;AVX512: llvm.masked.gather.v16f32.v16p1f32
242 ;AVX512: llvm.masked.scatter.v16f32.v16p1f32
243 ;AVX512: ret void
244 define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
245 entry:
246 %in.addr = alloca %struct.In addrspace(1)*, align 8
247 %out.addr = alloca float addrspace(1)*, align 8
248 %trigger.addr = alloca i32*, align 8
249 %index.addr = alloca i32*, align 8
250 %i = alloca i32, align 4
251 store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
252 store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
253 store i32* %trigger, i32** %trigger.addr, align 8
254 store i32* %index, i32** %index.addr, align 8
255 store i32 0, i32* %i, align 4
256 br label %for.cond
257
258 for.cond: ; preds = %for.inc, %entry
259 %0 = load i32, i32* %i, align 4
260 %cmp = icmp slt i32 %0, 4096
261 br i1 %cmp, label %for.body, label %for.end
262
263 for.body: ; preds = %for.cond
264 %1 = load i32, i32* %i, align 4
265 %idxprom = sext i32 %1 to i64
266 %2 = load i32*, i32** %trigger.addr, align 8
267 %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
268 %3 = load i32, i32* %arrayidx, align 4
269 %cmp1 = icmp sgt i32 %3, 0
270 br i1 %cmp1, label %if.then, label %if.end
271
272 if.then: ; preds = %for.body
273 %4 = load i32, i32* %i, align 4
274 %idxprom2 = sext i32 %4 to i64
275 %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
276 %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
277 %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
278 %6 = load float, float addrspace(1)* %b, align 4
279 %add = fadd float %6, 5.000000e-01
280 %7 = load i32, i32* %i, align 4
281 %idxprom4 = sext i32 %7 to i64
282 %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
283 %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
284 store float %add, float addrspace(1)* %arrayidx5, align 4
285 br label %if.end
286
287 if.end: ; preds = %if.then, %for.body
288 br label %for.inc
289
290 for.inc: ; preds = %if.end
291 %9 = load i32, i32* %i, align 4
292 %inc = add nsw i32 %9, 16
293 store i32 %inc, i32* %i, align 4
294 br label %for.cond
295
296 for.end: ; preds = %for.cond
297 ret void
298 }
299
300 ; Same as foo2_addrspace but here only the input has the non-default address space.
301
302 ;AVX512-LABEL: @foo2_addrspace2
303 ;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
304 ;AVX512: llvm.masked.gather.v16f32.v16p1f32
305 ;AVX512: llvm.masked.scatter.v16f32.v16p0f32
306 ;AVX512: ret void
307 define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspace(0)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
308 entry:
309 %in.addr = alloca %struct.In addrspace(1)*, align 8
310 %out.addr = alloca float addrspace(0)*, align 8
311 %trigger.addr = alloca i32*, align 8
312 %index.addr = alloca i32*, align 8
313 %i = alloca i32, align 4
314 store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
315 store float addrspace(0)* %out, float addrspace(0)** %out.addr, align 8
316 store i32* %trigger, i32** %trigger.addr, align 8
317 store i32* %index, i32** %index.addr, align 8
318 store i32 0, i32* %i, align 4
319 br label %for.cond
320
321 for.cond: ; preds = %for.inc, %entry
322 %0 = load i32, i32* %i, align 4
323 %cmp = icmp slt i32 %0, 4096
324 br i1 %cmp, label %for.body, label %for.end
325
326 for.body: ; preds = %for.cond
327 %1 = load i32, i32* %i, align 4
328 %idxprom = sext i32 %1 to i64
329 %2 = load i32*, i32** %trigger.addr, align 8
330 %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
331 %3 = load i32, i32* %arrayidx, align 4
332 %cmp1 = icmp sgt i32 %3, 0
333 br i1 %cmp1, label %if.then, label %if.end
334
335 if.then: ; preds = %for.body
336 %4 = load i32, i32* %i, align 4
337 %idxprom2 = sext i32 %4 to i64
338 %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
339 %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
340 %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
341 %6 = load float, float addrspace(1)* %b, align 4
342 %add = fadd float %6, 5.000000e-01
343 %7 = load i32, i32* %i, align 4
344 %idxprom4 = sext i32 %7 to i64
345 %8 = load float addrspace(0)*, float addrspace(0)** %out.addr, align 8
346 %arrayidx5 = getelementptr inbounds float, float addrspace(0)* %8, i64 %idxprom4
347 store float %add, float addrspace(0)* %arrayidx5, align 4
348 br label %if.end
349
350 if.end: ; preds = %if.then, %for.body
351 br label %for.inc
352
353 for.inc: ; preds = %if.end
354 %9 = load i32, i32* %i, align 4
355 %inc = add nsw i32 %9, 16
356 store i32 %inc, i32* %i, align 4
357 br label %for.cond
358
359 for.end: ; preds = %for.cond
360 ret void
361 }
362
363 ; Same as foo2_addrspace but here only the output has the non-default address space.
364
365 ;AVX512-LABEL: @foo2_addrspace3
366 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
367 ;AVX512: llvm.masked.gather.v16f32.v16p0f32
368 ;AVX512: llvm.masked.scatter.v16f32.v16p1f32
369 ;AVX512: ret void
370
371 define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
372 entry:
373 %in.addr = alloca %struct.In addrspace(0)*, align 8
374 %out.addr = alloca float addrspace(1)*, align 8
375 %trigger.addr = alloca i32*, align 8
376 %index.addr = alloca i32*, align 8
377 %i = alloca i32, align 4
378 store %struct.In addrspace(0)* %in, %struct.In addrspace(0)** %in.addr, align 8
379 store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
380 store i32* %trigger, i32** %trigger.addr, align 8
381 store i32* %index, i32** %index.addr, align 8
382 store i32 0, i32* %i, align 4
383 br label %for.cond
384
385 for.cond: ; preds = %for.inc, %entry
386 %0 = load i32, i32* %i, align 4
387 %cmp = icmp slt i32 %0, 4096
388 br i1 %cmp, label %for.body, label %for.end
389
390 for.body: ; preds = %for.cond
391 %1 = load i32, i32* %i, align 4
392 %idxprom = sext i32 %1 to i64
393 %2 = load i32*, i32** %trigger.addr, align 8
394 %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
395 %3 = load i32, i32* %arrayidx, align 4
396 %cmp1 = icmp sgt i32 %3, 0
397 br i1 %cmp1, label %if.then, label %if.end
398
399 if.then: ; preds = %for.body
400 %4 = load i32, i32* %i, align 4
401 %idxprom2 = sext i32 %4 to i64
402 %5 = load %struct.In addrspace(0)*, %struct.In addrspace(0)** %in.addr, align 8
403 %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %5, i64 %idxprom2
404 %b = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %arrayidx3, i32 0, i32 1
405 %6 = load float, float addrspace(0)* %b, align 4
406 %add = fadd float %6, 5.000000e-01
407 %7 = load i32, i32* %i, align 4
408 %idxprom4 = sext i32 %7 to i64
409 %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
410 %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
411 store float %add, float addrspace(1)* %arrayidx5, align 4
412 br label %if.end
413
414 if.end: ; preds = %if.then, %for.body
415 br label %for.inc
416
417 for.inc: ; preds = %if.end
418 %9 = load i32, i32* %i, align 4
419 %inc = add nsw i32 %9, 16
420 store i32 %inc, i32* %i, align 4
421 br label %for.cond
422
423 for.end: ; preds = %for.cond
424 ret void
425 }
2222 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]]
2323 ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
2424 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0
25 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> [[TMP13]], i32 16, <16 x i1> )
25 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> [[TMP13]], i32 16, <16 x i1> )
2626 ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]],
2727 ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
2828 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0
29 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> [[TMP16]], i32 8, <16 x i1> )
29 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> [[TMP16]], i32 8, <16 x i1> )
3030 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
3131 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]],
3232 ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]],
0 ; XFAIL: *
11 ; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
22
3 declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
4 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
3 declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
4 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
55
66 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
77 ; do not have pointer aliasing ignored when being processed.
2020 %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
2121 %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
2222 ; Read from in1 and in2
23 %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1
24 %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1
23 %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1
24 %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1
2525 ; Store in1 to the allocas
26 call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
26 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
2727 ; Read in1 from the allocas
2828 ; This gather should alias the scatter we just saw
29 %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
29 %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
3030 ; Store in2 to the allocas
31 call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
31 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> );
3232 ; Read in2 from the allocas
3333 ; This gather should alias the scatter we just saw, and not be eliminated
34 %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
34 %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1
3535 ; Store in2 to out for good measure
3636 %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
3737 %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
0 ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
1
2 ; Mask is not a vector
3 ; CHECK: Intrinsic has incorrect argument type!
4 define <16 x float> @gather2(<16 x float*> %ptrs, <16 x i1>* %mask, <16 x float> %passthru) {
5 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1>* %mask, <16 x float> %passthru)
6 ret <16 x float> %res
7 }
8 declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>*, <16 x float>)
9
10 ; Mask length != return length
11 ; CHECK: Intrinsic has incorrect argument type!
12 define <8 x float> @gather3(<8 x float*> %ptrs, <16 x i1> %mask, <8 x float> %passthru) {
13 %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <16 x i1> %mask, <8 x float> %passthru)
14 ret <8 x float> %res
15 }
16 declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <16 x i1>, <8 x float>)
17
18 ; Return type is not a vector
19 ; CHECK: Intrinsic has incorrect return type!
20 define <8 x float>* @gather4(<8 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
21 %res = call <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
22 ret <8 x float>* %res
23 }
24 declare <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
25
26 ; Value type is not a vector
27 ; CHECK: Intrinsic has incorrect argument type!
28 define <8 x float> @gather5(<8 x float*>* %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
29 %res = call <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>* %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
30 ret <8 x float> %res
31 }
32 declare <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>*, i32, <8 x i1>, <8 x float>)
33
34 ; Value type is not a vector of pointers
35 ; CHECK: Intrinsic has incorrect argument type!
36 define <8 x float> @gather6(<8 x float> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
37 %res = call <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
38 ret <8 x float> %res
39 }
40 declare <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float>, i32, <8 x i1>, <8 x float>)
41
42 ; Value element type != vector of pointers element
43 ; CHECK: Intrinsic has incorrect argument type!
44 define <8 x float> @gather7(<8 x double*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
45 %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
46 ret <8 x float> %res
47 }
48 declare <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x float>)
49
50 ; Value length!= vector of pointers length
51 ; CHECK: Intrinsic has incorrect argument type!
52 define <8 x float> @gather8(<16 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
53 %res = call <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
54 ret <8 x float> %res
55 }
56 declare <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*>, i32, <8 x i1>, <8 x float>)
57
58 ; Passthru type doesn't match return type
59 ; CHECK: Intrinsic has incorrect argument type!
60 define <16 x i32> @gather9(<16 x i32*> %ptrs, <16 x i1> %mask, <8 x i32> %passthru) {
61 %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <8 x i32> %passthru)
62 ret <16 x i32> %res
63 }
64 declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <8 x i32>)
65
66 ; Mask is not a vector
67 ; CHECK: Intrinsic has incorrect argument type!
68 define void @scatter2(<16 x float> %value, <16 x float*> %ptrs, <16 x i1>* %mask) {
69 call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %value, <16 x float*> %ptrs, i32 4, <16 x i1>* %mask)
70 ret void
71 }
72 declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>*)
73
74 ; Mask length != value length
75 ; CHECK: Intrinsic has incorrect argument type!
76 define void @scatter3(<8 x float> %value, <8 x float*> %ptrs, <16 x i1> %mask) {
77 call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 4, <16 x i1> %mask)
78 ret void
79 }
80 declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <16 x i1>)
81
82 ; Value type is not a vector
83 ; CHECK: Intrinsic has incorrect argument type!
84 define void @scatter4(<8 x float>* %value, <8 x float*> %ptrs, <8 x i1> %mask) {
85 call void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>* %value, <8 x float*> %ptrs, i32 4, <8 x i1> %mask)
86 ret void
87 }
88 declare void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>*, <8 x float*>, i32, <8 x i1>)
89
90 ; ptrs is not a vector
91 ; CHECK: Intrinsic has incorrect argument type!
92 define void @scatter5(<8 x float> %value, <8 x float*>* %ptrs, <8 x i1> %mask) {
93 call void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float> %value, <8 x float*>* %ptrs, i32 4, <8 x i1> %mask)
94 ret void
95 }
96 declare void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float>, <8 x float*>*, i32, <8 x i1>)
97
98 ; Value type is not a vector of pointers
99 ; CHECK: Intrinsic has incorrect argument type!
100 define void @scatter6(<8 x float> %value, <8 x float> %ptrs, <8 x i1> %mask) {
101 call void @llvm.masked.scatter.v8f32.v8f32(<8 x float> %value, <8 x float> %ptrs, i32 4, <8 x i1> %mask)
102 ret void
103 }
104 declare void @llvm.masked.scatter.v8f32.v8f32(<8 x float>, <8 x float>, i32, <8 x i1>)
105
106 ; Value element type != vector of pointers element
107 ; CHECK: Intrinsic has incorrect argument type!
108 define void @scatter7(<8 x float> %value, <8 x double*> %ptrs, <8 x i1> %mask) {
109 call void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float> %value, <8 x double*> %ptrs, i32 4, <8 x i1> %mask)
110 ret void
111 }
112 declare void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float>, <8 x double*>, i32, <8 x i1>)
113
114 ; Value length!= vector of pointers length
115 ; CHECK: Intrinsic has incorrect argument type!
116 define void @scatter8(<8 x float> %value, <16 x float*> %ptrs, <8 x i1> %mask) {
117 call void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float> %value, <16 x float*> %ptrs, i32 4, <8 x i1> %mask)
118 ret void
119 }
120 declare void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float>, <16 x float*>, i32, <8 x i1>)
121
210210 IIT_SAME_VEC_WIDTH_ARG = 31,
211211 IIT_PTR_TO_ARG = 32,
212212 IIT_PTR_TO_ELT = 33,
213 IIT_VEC_OF_PTRS_TO_ELT = 34,
213 IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
214214 IIT_I128 = 35,
215215 IIT_V512 = 36,
216216 IIT_V1024 = 37
217217 };
218
219218
220219 static void EncodeFixedValueType(MVT::SimpleValueType VT,
221220 std::vector &Sig) {
272271 }
273272 else if (R->isSubClassOf("LLVMPointerTo"))
274273 Sig.push_back(IIT_PTR_TO_ARG);
275 else if (R->isSubClassOf("LLVMVectorOfPointersToElt"))
276 Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT);
277 else if (R->isSubClassOf("LLVMPointerToElt"))
274 else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
275 Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
276 unsigned ArgNo = ArgCodes.size();
277 ArgCodes.push_back(3 /*vAny*/);
278 // Encode overloaded ArgNo
279 Sig.push_back(ArgNo);
280 // Encode LLVMMatchType ArgNo
281 Sig.push_back(Number);
282 return;
283 } else if (R->isSubClassOf("LLVMPointerToElt"))
278284 Sig.push_back(IIT_PTR_TO_ELT);
279285 else
280286 Sig.push_back(IIT_ARG);