llvm.org GIT mirror llvm / 6e8fb99
IR: Add immarg attribute This indicates an intrinsic parameter is required to be a constant, and should not be replaced with a non-constant value. Add the attribute to all AMDGPU and generic intrinsics that comments indicate it should apply to. I scanned other target intrinsics, but I don't see any obvious comments indicating which arguments are intended to be only immediates. This breaks one questionable testcase for the autoupgrade. I'm unclear on whether the autoupgrade is supposed to really handle declarations which were never valid. The verifier fails because the attributes now refer to a parameter past the end of the argument list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355981 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 7 months ago
49 changed file(s) with 1565 addition(s) and 687 deletion(s). Raw diff Collapse all Expand all
11761176 These constraints also allow LLVM to assume that a ``swifterror`` argument
11771177 does not alias any other memory visible within a function and that a
11781178 ``swifterror`` alloca passed as an argument does not escape.
1179
1180 ``immarg``
1181 This indicates the parameter is required to be an immediate
1182 value. This must be a trivial immediate integer or floating-point
1183 constant. Undef or constant expressions are not valid. This is
1184 only valid on intrinsic declarations and cannot be applied to a
1185 call site or arbitrary function.
11791186
11801187 .. _gc:
11811188
5656
5757 Changes to the LLVM IR
5858 ----------------------
59
60 * Added ``immarg`` parameter attribute. This indicates an intrinsic
61 parameter is required to be a simple constant. This annotation must
62 be accurate to avoid possible miscompiles.
5963
6064
6165 Changes to the ARM Backend
604604 ATTR_KIND_OPT_FOR_FUZZING = 57,
605605 ATTR_KIND_SHADOWCALLSTACK = 58,
606606 ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
607 ATTR_KIND_IMMARG = 60
607608 };
608609
609610 enum ComdatSelectionKindCodes {
128128
129129 /// Return value is always equal to this argument.
130130 def Returned : EnumAttr<"returned">;
131
132 /// Parameter is required to be a trivial constant.
133 def ImmArg : EnumAttr<"immarg">;
131134
132135 /// Function can return twice.
133136 def ReturnsTwice : EnumAttr<"returns_twice">;
6565 // Returned - The specified argument is always the return value of the
6666 // intrinsic.
6767 class Returned : IntrinsicProperty {
68 int ArgNo = argNo;
69 }
70
71 // ImmArg - The specified argument must be an immediate.
72 class ImmArg : IntrinsicProperty {
6873 int ArgNo = argNo;
6974 }
7075
396401
397402 //===--------------------- Code Generator Intrinsics ----------------------===//
398403 //
399 def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
404 def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
400405 def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
401 def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
406 def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
402407 def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
403408 def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
404409 [IntrReadMem], "llvm.read_register">;
416421 // to an escaped allocation indicated by the index.
417422 def int_localrecover : Intrinsic<[llvm_ptr_ty],
418423 [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
419 [IntrNoMem]>;
424 [IntrNoMem, ImmArg<2>]>;
420425
421426 // Given the frame pointer passed into an SEH filter function, returns a
422427 // pointer to the local variable area suitable for use with llvm.localrecover.
442447 // memory while not impeding optimization.
443448 def int_prefetch
444449 : Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
445 [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0> ]>;
450 [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
451 ImmArg<1>, ImmArg<2>]>;
446452 def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
447453
448454 def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
483489 [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
484490 llvm_i1_ty],
485491 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
486 WriteOnly<0>, ReadOnly<1>]>;
492 WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
487493 def int_memmove : Intrinsic<[],
488494 [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
489495 llvm_i1_ty],
490496 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
491 ReadOnly<1>]>;
497 ReadOnly<1>, ImmArg<3>]>;
492498 def int_memset : Intrinsic<[],
493499 [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
494500 llvm_i1_ty],
495 [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
501 [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>,
502 ImmArg<3>]>;
496503
497504 // FIXME: Add version of these floating point intrinsics which allow non-default
498505 // rounding modes and FP exception handling.
559566 def int_objectsize : Intrinsic<[llvm_anyint_ty],
560567 [llvm_anyptr_ty, llvm_i1_ty,
561568 llvm_i1_ty, llvm_i1_ty],
562 [IntrNoMem, IntrSpeculatable]>,
569 [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
563570 GCCBuiltin<"__builtin_object_size">;
564571
565572 //===--------------- Constrained Floating Point Intrinsics ----------------===//
686693
687694 //===------------------------- Expect Intrinsics --------------------------===//
688695 //
689 def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
690 LLVMMatchType<0>], [IntrNoMem]>;
696 def int_expect : Intrinsic<[llvm_anyint_ty],
697 [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, ImmArg<1>]>;
691698
692699 //===-------------------- Bit Manipulation Intrinsics ---------------------===//
693700 //
696703 let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
697704 def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
698705 def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
699 def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
700 def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
701706 def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
702707 def int_fshl : Intrinsic<[llvm_anyint_ty],
703708 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
704709 def int_fshr : Intrinsic<[llvm_anyint_ty],
705710 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
711 }
712
713 let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
714 def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
715 def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
706716 }
707717
708718 //===------------------------ Debugger Intrinsics -------------------------===//
847857 //
848858 def int_smul_fix : Intrinsic<[llvm_anyint_ty],
849859 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
850 [IntrNoMem, IntrSpeculatable, Commutative]>;
860 [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
851861
852862 def int_umul_fix : Intrinsic<[llvm_anyint_ty],
853863 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
854 [IntrNoMem, IntrSpeculatable, Commutative]>;
864 [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
855865
856866 //===------------------------- Memory Use Markers -------------------------===//
857867 //
858868 def int_lifetime_start : Intrinsic<[],
859869 [llvm_i64_ty, llvm_anyptr_ty],
860 [IntrArgMemOnly, NoCapture<1>]>;
870 [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
861871 def int_lifetime_end : Intrinsic<[],
862872 [llvm_i64_ty, llvm_anyptr_ty],
863 [IntrArgMemOnly, NoCapture<1>]>;
873 [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
864874 def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
865875 [llvm_i64_ty, llvm_anyptr_ty],
866 [IntrArgMemOnly, NoCapture<1>]>;
876 [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
867877 def int_invariant_end : Intrinsic<[],
868878 [llvm_descriptor_ty, llvm_i64_ty,
869879 llvm_anyptr_ty],
870 [IntrArgMemOnly, NoCapture<2>]>;
880 [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>;
871881
872882 // launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
873883 // because it would cause CSE of two barriers with the same argument.
914924 [llvm_i64_ty, llvm_i32_ty,
915925 llvm_anyptr_ty, llvm_i32_ty,
916926 llvm_i32_ty, llvm_vararg_ty],
917 [Throws]>;
927 [Throws, ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>]>;
918928
919929 def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty],
920930 [IntrReadMem]>;
921931 def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
922932 [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
923 [IntrReadMem]>;
933 [IntrReadMem, ImmArg<1>, ImmArg<2>]>;
924934
925935 //===------------------------ Coroutine Intrinsics ---------------===//
926936 // These are documented in docs/Coroutines.rst
10171027 LLVMAnyPointerType>,
10181028 llvm_i32_ty,
10191029 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1020 [IntrArgMemOnly]>;
1030 [IntrArgMemOnly, ImmArg<2>]>;
10211031
10221032 def int_masked_load : Intrinsic<[llvm_anyvector_ty],
10231033 [LLVMAnyPointerType>, llvm_i32_ty,
10241034 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
1025 [IntrReadMem, IntrArgMemOnly]>;
1035 [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
10261036
10271037 def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
10281038 [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
10291039 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
10301040 LLVMMatchType<0>],
1031 [IntrReadMem]>;
1041 [IntrReadMem, ImmArg<1>]>;
10321042
10331043 def int_masked_scatter: Intrinsic<[],
10341044 [llvm_anyvector_ty,
10351045 LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
1036 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>]>;
1046 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1047 [ImmArg<2>]>;
10371048
10381049 def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
10391050 [LLVMPointerToElt<0>,
10641075 [IntrReadMem, IntrArgMemOnly]>;
10651076
10661077 def int_hwasan_check_memaccess :
1067 Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly]>;
1078 Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
10681079
10691080 // Xray intrinsics
10701081 //===----------------------------------------------------------------------===//
10891100 ],
10901101 [
10911102 IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
1092 ReadOnly<1>
1103 ReadOnly<1>, ImmArg<3>
10931104 ]>;
10941105
10951106 // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize)
11001111 ],
11011112 [
11021113 IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
1103 ReadOnly<1>
1114 ReadOnly<1>, ImmArg<3>
11041115 ]>;
11051116
11061117 // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
11071118 def int_memset_element_unordered_atomic
11081119 : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
1109 [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
1120 [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
11101121
11111122 //===------------------------ Reduction Intrinsics ------------------------===//
11121123 //
176176 // This is always moved to the beginning of the basic block.
177177 def int_amdgcn_init_exec : Intrinsic<[],
178178 [llvm_i64_ty], // 64-bit literal constant
179 [IntrConvergent]>;
179 [IntrConvergent, ImmArg<0>]>;
180180
181181 // Set EXEC according to a thread count packed in an SGPR input:
182182 // thread_count = (input >> bitoffset) & 0x7f;
194194 // The first parameter is s_sendmsg immediate (i16),
195195 // the second one is copied to m0
196196 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
197 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
197 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
198198 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
199 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
199 Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
200200
201201 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
202202 Intrinsic<[], [], [IntrConvergent]>;
205205 Intrinsic<[], [], [IntrConvergent]>;
206206
207207 def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
208 Intrinsic<[], [llvm_i32_ty], []>;
208 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
209209
210210 def int_amdgcn_div_scale : Intrinsic<
211211 // 1st parameter: Numerator
214214 // second. (0 = first, 1 = second).
215215 [llvm_anyfloat_ty, llvm_i1_ty],
216216 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
217 [IntrNoMem, IntrSpeculatable]
217 [IntrNoMem, IntrSpeculatable, ImmArg<2>]
218218 >;
219219
220220 def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
372372 llvm_i32_ty, // ordering
373373 llvm_i32_ty, // scope
374374 llvm_i1_ty], // isVolatile
375 [IntrArgMemOnly, NoCapture<0>], "",
375 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "",
376376 [SDNPMemOperand]
377377 >;
378378
387387 llvm_i32_ty, // ordering
388388 llvm_i32_ty, // scope
389389 llvm_i1_ty], // isVolatile
390 [IntrArgMemOnly, NoCapture<0>]
390 [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]
391391 >;
392392
393393 class AMDGPUDSOrderedIntrinsic : Intrinsic<
402402 llvm_i32_ty, // ordered count index (OA index), also added to the address
403403 llvm_i1_ty, // wave release, usually set to 1
404404 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
405 [NoCapture<0>]
405 [NoCapture<0>,
406 ImmArg<2>, ImmArg<3>, ImmArg<4>,
407 ImmArg<5>, ImmArg<6>, ImmArg<7>
408 ]
406409 >;
407410
408411 class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
409412 [llvm_i32_ty],
410413 [llvm_anyptr_ty, // LDS or GDS ptr
411414 llvm_i1_ty], // isVolatile
412 [IntrConvergent, IntrArgMemOnly, NoCapture<0>]
415 [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>]
413416 >;
414417
415418 def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
648651 let LodClampMip = "mip";
649652 }
650653
654 // Helper class for figuring out image intrinsic argument indexes.
655 class AMDGPUImageDimIntrinsicEval {
656 int NumDataArgs = !size(P_.DataArgs);
657 int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
658 int NumVAddrArgs = !size(P_.AddrArgs);
659 int NumRSrcArgs = 1;
660 int NumSampArgs = !if(P_.IsSample, 2, 0);
661 int DmaskArgIndex = NumDataArgs;
662 int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
663 int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
664 int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
665 }
666
651667 // All dimension-aware intrinsics are derived from this class.
652668 class AMDGPUImageDimIntrinsic
653669 list props,
662678 llvm_i1_ty], []), // unorm(imm)
663679 [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
664680 llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
665 props, "", sdnodeprops>,
681 !listconcat(props,
682 !if(P_.IsAtomic, [], [ImmArg.DmaskArgIndex>]),
683 !if(P_.IsSample, [ImmArg.UnormArgIndex>], []),
684 [ImmArg.TexFailCtrlArgIndex>,
685 ImmArg.CachePolicyArgIndex>]),
686 "", sdnodeprops>,
666687 AMDGPURsrcIntrinsic
667688 !if(P_.IsAtomic, 0, 1)), 1> {
668689 AMDGPUDimProfile P = P_;
824845 llvm_i32_ty, // offset(SGPR/VGPR/imm)
825846 llvm_i1_ty, // glc(imm)
826847 llvm_i1_ty], // slc(imm)
827 [IntrReadMem], "", [SDNPMemOperand]>,
848 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
828849 AMDGPURsrcIntrinsic<0>;
829850 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
830851 def int_amdgcn_buffer_load : AMDGPUBufferLoad;
834855 [llvm_v4i32_ty, // rsrc(SGPR)
835856 llvm_i32_ty, // byte offset(SGPR/VGPR/imm)
836857 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc)
837 [IntrNoMem]>,
858 [IntrNoMem, ImmArg<2>]>,
838859 AMDGPURsrcIntrinsic<0>;
839860
840861 class AMDGPUBufferStore : Intrinsic <
845866 llvm_i32_ty, // offset(SGPR/VGPR/imm)
846867 llvm_i1_ty, // glc(imm)
847868 llvm_i1_ty], // slc(imm)
848 [IntrWriteMem], "", [SDNPMemOperand]>,
869 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
849870 AMDGPURsrcIntrinsic<1>;
850871 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
851872 def int_amdgcn_buffer_store : AMDGPUBufferStore;
863884 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
864885 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
865886 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
866 [IntrReadMem], "", [SDNPMemOperand]>,
887 [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
867888 AMDGPURsrcIntrinsic<0>;
868889 def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
869890 def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
875896 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
876897 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
877898 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
878 [IntrReadMem], "", [SDNPMemOperand]>,
899 [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
879900 AMDGPURsrcIntrinsic<0>;
880901 def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
881902 def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
887908 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
888909 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
889910 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
890 [IntrWriteMem], "", [SDNPMemOperand]>,
911 [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
891912 AMDGPURsrcIntrinsic<1>;
892913 def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
893914 def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
900921 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
901922 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
902923 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
903 [IntrWriteMem], "", [SDNPMemOperand]>,
924 [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
904925 AMDGPURsrcIntrinsic<1>;
905926 def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
906927 def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
912933 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
913934 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
914935 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
915 [], "", [SDNPMemOperand]>,
936 [ImmArg<4>], "", [SDNPMemOperand]>,
916937 AMDGPURsrcIntrinsic<1, 0>;
917938 def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
918939 def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
932953 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
933954 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
934955 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
935 [], "", [SDNPMemOperand]>,
956 [ImmArg<5>], "", [SDNPMemOperand]>,
936957 AMDGPURsrcIntrinsic<2, 0>;
937958
938959 class AMDGPUStructBufferAtomic : Intrinsic <
943964 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
944965 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
945966 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
946 [], "", [SDNPMemOperand]>,
967 [ImmArg<5>], "", [SDNPMemOperand]>,
947968 AMDGPURsrcIntrinsic<1, 0>;
948969 def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
949970 def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
964985 llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
965986 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
966987 llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
967 [], "", [SDNPMemOperand]>,
988 [ImmArg<6>], "", [SDNPMemOperand]>,
968989 AMDGPURsrcIntrinsic<2, 0>;
969990
970991 // Obsolescent tbuffer intrinsics.
9791000 llvm_i32_ty, // nfmt(imm)
9801001 llvm_i1_ty, // glc(imm)
9811002 llvm_i1_ty], // slc(imm)
982 [IntrReadMem], "", [SDNPMemOperand]>,
1003 [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>,
1004 ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>,
9831005 AMDGPURsrcIntrinsic<0>;
9841006
9851007 def int_amdgcn_tbuffer_store : Intrinsic <
9941016 llvm_i32_ty, // nfmt(imm)
9951017 llvm_i1_ty, // glc(imm)
9961018 llvm_i1_ty], // slc(imm)
997 [IntrWriteMem], "", [SDNPMemOperand]>,
1019 [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>,
1020 ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>,
9981021 AMDGPURsrcIntrinsic<1>;
9991022
10001023 // New tbuffer intrinsics, with:
10081031 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
10091032 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
10101033 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
1011 [IntrReadMem], "", [SDNPMemOperand]>,
1034 [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
10121035 AMDGPURsrcIntrinsic<0>;
10131036
10141037 def int_amdgcn_raw_tbuffer_store : Intrinsic <
10191042 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
10201043 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
10211044 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
1022 [IntrWriteMem], "", [SDNPMemOperand]>,
1045 [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
10231046 AMDGPURsrcIntrinsic<1>;
10241047
10251048 def int_amdgcn_struct_tbuffer_load : Intrinsic <
10301053 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
10311054 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
10321055 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
1033 [IntrReadMem], "", [SDNPMemOperand]>,
1056 [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
10341057 AMDGPURsrcIntrinsic<0>;
10351058
10361059 def int_amdgcn_struct_tbuffer_store : Intrinsic <
10421065 llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
10431066 llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
10441067 llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
1045 [IntrWriteMem], "", [SDNPMemOperand]>,
1068 [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
10461069 AMDGPURsrcIntrinsic<1>;
10471070
10481071 class AMDGPUBufferAtomic : Intrinsic <
10521075 llvm_i32_ty, // vindex(VGPR)
10531076 llvm_i32_ty, // offset(SGPR/VGPR/imm)
10541077 llvm_i1_ty], // slc(imm)
1055 [], "", [SDNPMemOperand]>,
1078 [ImmArg<4>], "", [SDNPMemOperand]>,
10561079 AMDGPURsrcIntrinsic<1, 0>;
10571080 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
10581081 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
10721095 llvm_i32_ty, // vindex(VGPR)
10731096 llvm_i32_ty, // offset(SGPR/VGPR/imm)
10741097 llvm_i1_ty], // slc(imm)
1075 [], "", [SDNPMemOperand]>,
1098 [ImmArg<5>], "", [SDNPMemOperand]>,
10761099 AMDGPURsrcIntrinsic<2, 0>;
10771100
10781101 } // defset AMDGPUBufferIntrinsics
10891112 llvm_i1_ty, // done
10901113 llvm_i1_ty // vm
10911114 ],
1092 []
1115 [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>]
10931116 >;
10941117
10951118 // exp with compr bit set.
11001123 LLVMMatchType<0>, // src1
11011124 llvm_i1_ty, // done
11021125 llvm_i1_ty], // vm
1103 []
1126 [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>]
11041127 >;
11051128
11061129 def int_amdgcn_buffer_wbinvl1_sc :
11211144
11221145 def int_amdgcn_s_sleep :
11231146 GCCBuiltin<"__builtin_amdgcn_s_sleep">,
1124 Intrinsic<[], [llvm_i32_ty], []> {
1147 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
11251148 }
11261149
11271150 def int_amdgcn_s_incperflevel :
11281151 GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
1129 Intrinsic<[], [llvm_i32_ty], []> {
1152 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
11301153 }
11311154
11321155 def int_amdgcn_s_decperflevel :
11331156 GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
1134 Intrinsic<[], [llvm_i32_ty], []> {
1157 Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
11351158 }
11361159
11371160 def int_amdgcn_s_getreg :
11381161 GCCBuiltin<"__builtin_amdgcn_s_getreg">,
11391162 Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
1140 [IntrReadMem, IntrSpeculatable]
1163 [IntrReadMem, IntrSpeculatable, ImmArg<0>]
11411164 >;
11421165
11431166 // int_amdgcn_s_getpc is provided to allow a specific style of position
12071230 // llvm.amdgcn.ds.swizzle src offset
12081231 def int_amdgcn_ds_swizzle :
12091232 GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
1210 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
1233 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
1234 [IntrNoMem, IntrConvergent, ImmArg<1>]>;
12111235
12121236 def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
12131237 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
12751299
12761300 def int_amdgcn_icmp :
12771301 Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
1278 [IntrNoMem, IntrConvergent]>;
1302 [IntrNoMem, IntrConvergent, ImmArg<2>]>;
12791303
12801304 def int_amdgcn_fcmp :
12811305 Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
1282 [IntrNoMem, IntrConvergent]>;
1306 [IntrNoMem, IntrConvergent, ImmArg<2>]>;
12831307
12841308 def int_amdgcn_readfirstlane :
12851309 GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
13691393 def int_amdgcn_mov_dpp :
13701394 Intrinsic<[llvm_anyint_ty],
13711395 [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
1372 llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
1396 llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>,
1397 ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
13731398
13741399 // llvm.amdgcn.update.dpp.i32
13751400 // Should be equivalent to:
13771402 // v_mov_b32
13781403 def int_amdgcn_update_dpp :
13791404 Intrinsic<[llvm_anyint_ty],
1380 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
1381 llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
1405 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
1406 llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
1407 [IntrNoMem, IntrConvergent,
1408 ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
13821409
13831410 def int_amdgcn_s_dcache_wb :
13841411 GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
14181445 llvm_float_ty, // %c
14191446 llvm_i1_ty // %clamp
14201447 ],
1421 [IntrNoMem, IntrSpeculatable]
1448 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14221449 >;
14231450
14241451 // i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
14331460 llvm_i32_ty, // %c
14341461 llvm_i1_ty // %clamp
14351462 ],
1436 [IntrNoMem, IntrSpeculatable]
1463 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14371464 >;
14381465
14391466 // u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
14481475 llvm_i32_ty, // %c
14491476 llvm_i1_ty // %clamp
14501477 ],
1451 [IntrNoMem, IntrSpeculatable]
1478 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14521479 >;
14531480
14541481 // i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
14631490 llvm_i32_ty, // %c
14641491 llvm_i1_ty // %clamp
14651492 ],
1466 [IntrNoMem, IntrSpeculatable]
1493 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14671494 >;
14681495
14691496 // u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
14781505 llvm_i32_ty, // %c
14791506 llvm_i1_ty // %clamp
14801507 ],
1481 [IntrNoMem, IntrSpeculatable]
1508 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14821509 >;
14831510
14841511 // i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
14941521 llvm_i32_ty, // %c
14951522 llvm_i1_ty // %clamp
14961523 ],
1497 [IntrNoMem, IntrSpeculatable]
1524 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
14981525 >;
14991526
15001527 // u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
15101537 llvm_i32_ty, // %c
15111538 llvm_i1_ty // %clamp
15121539 ],
1513 [IntrNoMem, IntrSpeculatable]
1540 [IntrNoMem, IntrSpeculatable, ImmArg<3>]
15141541 >;
15151542
15161543 //===----------------------------------------------------------------------===//
683683 KEYWORD(uwtable);
684684 KEYWORD(writeonly);
685685 KEYWORD(zeroext);
686 KEYWORD(immarg);
686687
687688 KEYWORD(type);
688689 KEYWORD(opaque);
13091309 case lltok::kw_sret:
13101310 case lltok::kw_swifterror:
13111311 case lltok::kw_swiftself:
1312 case lltok::kw_immarg:
13121313 HaveError |=
13131314 Error(Lex.getLoc(),
13141315 "invalid use of parameter-only attribute on a function");
16021603 case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break;
16031604 case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
16041605 case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
1606 case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break;
16051607
16061608 case lltok::kw_alignstack:
16071609 case lltok::kw_alwaysinline:
16961698 case lltok::kw_sret:
16971699 case lltok::kw_swifterror:
16981700 case lltok::kw_swiftself:
1701 case lltok::kw_immarg:
16991702 HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
17001703 break;
17011704
225225 kw_uwtable,
226226 kw_writeonly,
227227 kw_zeroext,
228 kw_immarg,
228229
229230 kw_type,
230231 kw_opaque,
11871187 case Attribute::ShadowCallStack: return 1ULL << 59;
11881188 case Attribute::SpeculativeLoadHardening:
11891189 return 1ULL << 60;
1190 case Attribute::ImmArg:
1191 return 1ULL << 61;
11901192 case Attribute::Dereferenceable:
11911193 llvm_unreachable("dereferenceable attribute not supported in raw format");
11921194 break;
14231425 return Attribute::WriteOnly;
14241426 case bitc::ATTR_KIND_Z_EXT:
14251427 return Attribute::ZExt;
1428 case bitc::ATTR_KIND_IMMARG:
1429 return Attribute::ImmArg;
14261430 }
14271431 }
14281432
711711 return bitc::ATTR_KIND_WRITEONLY;
712712 case Attribute::ZExt:
713713 return bitc::ATTR_KIND_Z_EXT;
714 case Attribute::ImmArg:
715 return bitc::ATTR_KIND_IMMARG;
714716 case Attribute::EndAttrKinds:
715717 llvm_unreachable("Can not encode end-attribute kinds marker.");
716718 case Attribute::None:
349349 return "zeroext";
350350 if (hasAttribute(Attribute::Cold))
351351 return "cold";
352 if (hasAttribute(Attribute::ImmArg))
353 return "immarg";
352354
353355 // FIXME: These should be output like this:
354356 //
499499 const Value *V);
500500 void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V);
501501 void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
502 const Value *V);
502 const Value *V, bool IsIntrinsic);
503503 void verifyFunctionMetadata(ArrayRef> MDs);
504504
505505 void visitConstantExprsRecursively(const Constant *EntryC);
15611561
15621562 verifyAttributeTypes(Attrs, /*IsFunction=*/false, V);
15631563
1564 if (Attrs.hasAttribute(Attribute::ImmArg)) {
1565 Assert(Attrs.getNumAttributes() == 1,
1566 "Attribute 'immarg' is incompatible with other attributes", V);
1567 }
1568
15641569 // Check for mutually incompatible attributes. Only inreg is compatible with
15651570 // sret.
15661571 unsigned AttrCount = 0;
16481653 // Check parameter attributes against a function type.
16491654 // The value V is printed in error messages.
16501655 void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
1651 const Value *V) {
1656 const Value *V, bool IsIntrinsic) {
16521657 if (Attrs.isEmpty())
16531658 return;
16541659
16851690 Type *Ty = FT->getParamType(i);
16861691 AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
16871692
1693 if (!IsIntrinsic) {
1694 Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
1695 "immarg attribute only applies to intrinsics",V);
1696 }
1697
16881698 verifyParameterAttrs(ArgAttrs, Ty, V);
16891699
16901700 if (ArgAttrs.hasAttribute(Attribute::Nest)) {
19031913 "reordering restrictions required by safepoint semantics",
19041914 Call);
19051915
1906 const Value *IDV = Call.getArgOperand(0);
1907 Assert(isa(IDV), "gc.statepoint ID must be a constant integer",
1908 Call);
1909
1910 const Value *NumPatchBytesV = Call.getArgOperand(1);
1911 Assert(isa(NumPatchBytesV),
1912 "gc.statepoint number of patchable bytes must be a constant integer",
1913 Call);
19141916 const int64_t NumPatchBytes =
1915 cast(NumPatchBytesV)->getSExtValue();
1917 cast(Call.getArgOperand(1))->getSExtValue();
19161918 assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
19171919 Assert(NumPatchBytes >= 0,
19181920 "gc.statepoint number of patchable bytes must be "
19251927 "gc.statepoint callee must be of function pointer type", Call, Target);
19261928 FunctionType *TargetFuncType = cast(PT->getElementType());
19271929
1928 const Value *NumCallArgsV = Call.getArgOperand(3);
1929 Assert(isa(NumCallArgsV),
1930 "gc.statepoint number of arguments to underlying call "
1931 "must be constant integer",
1932 Call);
1933 const int NumCallArgs = cast(NumCallArgsV)->getZExtValue();
1930 const int NumCallArgs = cast(Call.getArgOperand(3))->getZExtValue();
19341931 Assert(NumCallArgs >= 0,
19351932 "gc.statepoint number of arguments to underlying call "
19361933 "must be positive",
19491946 Assert(NumCallArgs == NumParams,
19501947 "gc.statepoint mismatch in number of call args", Call);
19511948
1952 const Value *FlagsV = Call.getArgOperand(4);
1953 Assert(isa(FlagsV),
1954 "gc.statepoint flags must be constant integer", Call);
1955 const uint64_t Flags = cast(FlagsV)->getZExtValue();
1949 const uint64_t Flags
1950 = cast(Call.getArgOperand(4))->getZExtValue();
19561951 Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
19571952 "unknown flag used in gc.statepoint flags argument", Call);
19581953
21292124 Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
21302125 "Attribute after last parameter!", &F);
21312126
2127 bool isLLVMdotName = F.getName().size() >= 5 &&
2128 F.getName().substr(0, 5) == "llvm.";
2129
21322130 // Check function attributes.
2133 verifyFunctionAttrs(FT, Attrs, &F);
2131 verifyFunctionAttrs(FT, Attrs, &F, isLLVMdotName);
21342132
21352133 // On function declarations/definitions, we do not support the builtin
21362134 // attribute. We do not check this in VerifyFunctionAttrs since that is
21692167 break;
21702168 }
21712169
2172 bool isLLVMdotName = F.getName().size() >= 5 &&
2173 F.getName().substr(0, 5) == "llvm.";
2174
21752170 // Check that the argument values match the function type for this function...
21762171 unsigned i = 0;
21772172 for (const Argument &Arg : F.args()) {
27992794 Assert(verifyAttributeCount(Attrs, Call.arg_size()),
28002795 "Attribute after last parameter!", Call);
28012796
2797 bool IsIntrinsic = Call.getCalledFunction() &&
2798 Call.getCalledFunction()->getName().startswith("llvm.");
2799
2800 Function *Callee
2801 = dyn_cast(Call.getCalledValue()->stripPointerCasts());
2802
28022803 if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
28032804 // Don't allow speculatable on call sites, unless the underlying function
28042805 // declaration is also speculatable.
2805 Function *Callee =
2806 dyn_cast(Call.getCalledValue()->stripPointerCasts());
28072806 Assert(Callee && Callee->isSpeculatable(),
28082807 "speculatable attribute may not apply to call sites", Call);
28092808 }
28102809
28112810 // Verify call attributes.
2812 verifyFunctionAttrs(FTy, Attrs, &Call);
2811 verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic);
28132812
28142813 // Conservatively check the inalloca argument.
28152814 // We have a bug if we can find that there is an underlying alloca without
28242823 // For each argument of the callsite, if it has the swifterror argument,
28252824 // make sure the underlying alloca/parameter it comes from has a swifterror as
28262825 // well.
2827 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
2826 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
28282827 if (Call.paramHasAttr(i, Attribute::SwiftError)) {
28292828 Value *SwiftErrorArg = Call.getArgOperand(i);
28302829 if (auto AI = dyn_cast(SwiftErrorArg->stripInBoundsOffsets())) {
28412840 Call);
28422841 }
28432842
2843 if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
2844 // Don't allow immarg on call sites, unless the underlying declaration
2845 // also has the matching immarg.
2846 Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
2847 "immarg may not apply only to call sites",
2848 Call.getArgOperand(i), Call);
2849 }
2850
2851 if (Call.paramHasAttr(i, Attribute::ImmArg)) {
2852 Value *ArgVal = Call.getArgOperand(i);
2853 Assert(isa(ArgVal) || isa(ArgVal),
2854 "immarg operand has non-immediate parameter", ArgVal, Call);
2855 }
2856 }
2857
28442858 if (FTy->isVarArg()) {
28452859 // FIXME? is 'nest' even legal here?
28462860 bool SawNest = false;
28902904 }
28912905
28922906 // Verify that there's no metadata unless it's a direct call to an intrinsic.
2893 if (!Call.getCalledFunction() ||
2894 !Call.getCalledFunction()->getName().startswith("llvm.")) {
2907 if (!IsIntrinsic) {
28952908 for (Type *ParamTy : FTy->params()) {
28962909 Assert(!ParamTy->isMetadataTy(),
28972910 "Function has metadata parameter but isn't an intrinsic", Call);
41804193 "an array");
41814194 break;
41824195 }
4183 case Intrinsic::ctlz: // llvm.ctlz
4184 case Intrinsic::cttz: // llvm.cttz
4185 Assert(isa(Call.getArgOperand(1)),
4186 "is_zero_undef argument of bit counting intrinsics must be a "
4187 "constant int",
4188 Call);
4189 break;
41904196 case Intrinsic::experimental_constrained_fadd:
41914197 case Intrinsic::experimental_constrained_fsub:
41924198 case Intrinsic::experimental_constrained_fmul:
42424248 "alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
42434249 Call);
42444250 }
4245 Assert(isa(Call.getArgOperand(3)),
4246 "isvolatile argument of memory intrinsics must be a constant int",
4247 Call);
4251
42484252 break;
42494253 }
42504254 case Intrinsic::memcpy_element_unordered_atomic:
42534257 const auto *AMI = cast(&Call);
42544258
42554259 ConstantInt *ElementSizeCI =
4256 dyn_cast(AMI->getRawElementSizeInBytes());
4257 Assert(ElementSizeCI,
4258 "element size of the element-wise unordered atomic memory "
4259 "intrinsic must be a constant int",
4260 Call);
4260 cast(AMI->getRawElementSizeInBytes());
42614261 const APInt &ElementSizeVal = ElementSizeCI->getValue();
42624262 Assert(ElementSizeVal.isPowerOf2(),
42634263 "element size of the element-wise atomic memory intrinsic "
43124312 Call);
43134313 break;
43144314 case Intrinsic::prefetch:
4315 Assert(isa(Call.getArgOperand(1)) &&
4316 isa(Call.getArgOperand(2)) &&
4317 cast(Call.getArgOperand(1))->getZExtValue() < 2 &&
4318 cast(Call.getArgOperand(2))->getZExtValue() < 4,
4315 Assert(cast(Call.getArgOperand(1))->getZExtValue() < 2 &&
4316 cast(Call.getArgOperand(2))->getZExtValue() < 4,
43194317 "invalid arguments to llvm.prefetch", Call);
43204318 break;
43214319 case Intrinsic::stackprotector:
43224320 Assert(isa(Call.getArgOperand(1)->stripPointerCasts()),
43234321 "llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
43244322 break;
4325 case Intrinsic::lifetime_start:
4326 case Intrinsic::lifetime_end:
4327 case Intrinsic::invariant_start:
4328 Assert(isa(Call.getArgOperand(0)),
4329 "size argument of memory use markers must be a constant integer",
4330 Call);
4331 break;
4332 case Intrinsic::invariant_end:
4333 Assert(isa(Call.getArgOperand(1)),
4334 "llvm.invariant.end parameter #2 must be a constant integer", Call);
4335 break;
4336
43374323 case Intrinsic::localescape: {
43384324 BasicBlock *BB = Call.getParent();
43394325 Assert(BB == &BB->getParent()->front(),
43584344 "llvm.localrecover first "
43594345 "argument must be function defined in this module",
43604346 Call);
4361 auto *IdxArg = dyn_cast(Call.getArgOperand(2));
4362 Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
4363 Call);
4347 auto *IdxArg = cast(Call.getArgOperand(2));
43644348 auto &Entry = FrameEscapeInfo[Fn];
43654349 Entry.second = unsigned(
43664350 std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
46054589 "second operand of [us]mul_fix must be an int type or vector "
46064590 "of ints");
46074591
4608 auto *Op3 = dyn_cast(Call.getArgOperand(2));
4609 Assert(Op3, "third argument of [us]mul_fix must be a constant integer");
4592 auto *Op3 = cast(Call.getArgOperand(2));
46104593 Assert(Op3->getType()->getBitWidth() <= 32,
46114594 "third argument of [us]mul_fix must fit within 32 bits");
46124595
919919 Info.align = 0;
920920 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
921921
922 const ConstantInt *Vol = dyn_cast(CI.getOperand(4));
923 if (!Vol || !Vol->isZero())
922 const ConstantInt *Vol = cast(CI.getOperand(4));
923 if (!Vol->isZero())
924924 Info.flags |= MachineMemOperand::MOVolatile;
925925
926926 return true;
933933 Info.align = 0;
934934 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
935935
936 const ConstantInt *Vol = dyn_cast(CI.getOperand(1));
937 if (!Vol || !Vol->isZero())
936 const ConstantInt *Vol = cast(CI.getOperand(1));
937 if (!Vol->isZero())
938938 Info.flags |= MachineMemOperand::MOVolatile;
939939
940940 return true;
37403740 static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
37413741 SDNode *N, SelectionDAG &DAG) {
37423742 EVT VT = N->getValueType(0);
3743 const auto *CD = dyn_cast(N->getOperand(3));
3744 if (!CD)
3745 return DAG.getUNDEF(VT);
3746
3743 const auto *CD = cast(N->getOperand(3));
37473744 int CondCode = CD->getSExtValue();
37483745 if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
37493746 CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
37743771 static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
37753772 SDNode *N, SelectionDAG &DAG) {
37763773 EVT VT = N->getValueType(0);
3777 const auto *CD = dyn_cast(N->getOperand(3));
3778 if (!CD)
3779 return DAG.getUNDEF(VT);
3774 const auto *CD = cast(N->getOperand(3));
37803775
37813776 int CondCode = CD->getSExtValue();
37823777 if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
46174612
46184613 static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
46194614 SDValue *GLC, SDValue *SLC) {
4620 auto CachePolicyConst = dyn_cast(CachePolicy.getNode());
4621 if (!CachePolicyConst)
4622 return false;
4615 auto CachePolicyConst = cast(CachePolicy.getNode());
46234616
46244617 uint64_t Value = CachePolicyConst->getZExtValue();
46254618 SDLoc DL(CachePolicy);
47204713
47214714 static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
47224715 SDValue *LWE, bool &IsTexFail) {
4723 auto TexFailCtrlConst = dyn_cast(TexFailCtrl.getNode());
4724 if (!TexFailCtrlConst)
4725 return false;
4716 auto TexFailCtrlConst = cast(TexFailCtrl.getNode());
47264717
47274718 uint64_t Value = TexFailCtrlConst->getZExtValue();
47284719 if (Value) {
47854776 }
47864777 } else {
47874778 unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa(Op) ? 2 : 1;
4788 auto DMaskConst = dyn_cast(Op.getOperand(DMaskIdx));
4789 if (!DMaskConst)
4790 return Op;
4779 auto DMaskConst = cast(Op.getOperand(DMaskIdx));
47914780 DMask = DMaskConst->getZExtValue();
47924781 DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
47934782
49014890 CtrlIdx = AddrIdx + NumVAddrs + 1;
49024891 } else {
49034892 auto UnormConst =
4904 dyn_cast(Op.getOperand(AddrIdx + NumVAddrs + 2));
4905 if (!UnormConst)
4906 return Op;
4893 cast(Op.getOperand(AddrIdx + NumVAddrs + 2));
49074894
49084895 Unorm = UnormConst->getZExtValue() ? True : False;
49094896 CtrlIdx = AddrIdx + NumVAddrs + 3;
53565343 return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
53575344 Op.getOperand(1), Op.getOperand(2));
53585345 case Intrinsic::amdgcn_div_scale: {
5359 // 3rd parameter required to be a constant.
5360 const ConstantSDNode *Param = dyn_cast(Op.getOperand(3));
5361 if (!Param)
5362 return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL);
5346 const ConstantSDNode *Param = cast(Op.getOperand(3));
53635347
53645348 // Translate to the operands expected by the machine instruction. The
53655349 // first parameter must be the same as the first instruction.
35753575 }
35763576 case Intrinsic::amdgcn_exp:
35773577 case Intrinsic::amdgcn_exp_compr: {
3578 ConstantInt *En = dyn_cast(II->getArgOperand(1));
3579 if (!En) // Illegal.
3580 break;
3581
3578 ConstantInt *En = cast(II->getArgOperand(1));
35823579 unsigned EnBits = En->getZExtValue();
35833580 if (EnBits == 0xf)
35843581 break; // All inputs enabled.
36683665 }
36693666 case Intrinsic::amdgcn_icmp:
36703667 case Intrinsic::amdgcn_fcmp: {
3671 const ConstantInt *CC = dyn_cast(II->getArgOperand(2));
3672 if (!CC)
3673 break;
3674
3668 const ConstantInt *CC = cast(II->getArgOperand(2));
36753669 // Guard against invalid arguments.
36763670 int64_t CCVal = CC->getZExtValue();
36773671 bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
38213815 case Intrinsic::amdgcn_update_dpp: {
38223816 Value *Old = II->getArgOperand(0);
38233817
3824 auto BC = dyn_cast(II->getArgOperand(5));
3825 auto RM = dyn_cast(II->getArgOperand(3));
3826 auto BM = dyn_cast(II->getArgOperand(4));
3827 if (!BC || !RM || !BM ||
3828 BC->isZeroValue() ||
3818 auto BC = cast(II->getArgOperand(5));
3819 auto RM = cast(II->getArgOperand(3));
3820 auto BM = cast(II->getArgOperand(4));
3821 if (BC->isZeroValue() ||
38293822 RM->getZExtValue() != 0xF ||
38303823 BM->getZExtValue() != 0xF ||
38313824 isa(Old))
981981 // below.
982982 DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
983983 } else {
984 ConstantInt *DMask = dyn_cast(II->getArgOperand(DMaskIdx));
985 if (!DMask)
986 return nullptr; // non-constant dmask is not supported by codegen
987
984 ConstantInt *DMask = cast(II->getArgOperand(DMaskIdx));
988985 unsigned DMaskVal = DMask->getZExtValue() & 0xf;
989986
990987 // Mask off values that are undefined because the dmask doesn't cover them
16381635 return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
16391636 default: {
16401637 if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) {
1641 LLVM_DEBUG(
1642 Value *TFC = II->getArgOperand(II->getNumOperands() - 2);
1643 assert(!isa(TFC) ||
1644 dyn_cast(TFC)->getZExtValue() == 0);
1645 );
1646
1638 assert(cast(
1639 II->getArgOperand(
1640 II->getNumOperands() - 2))->getZExtValue() == 0);
16471641 return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
16481642 }
16491643
797797 case Attribute::SwiftSelf:
798798 case Attribute::WriteOnly:
799799 case Attribute::ZExt:
800 case Attribute::ImmArg:
800801 case Attribute::EndAttrKinds:
801802 continue;
802803 // Those attributes should be safe to propagate to the extracted function.
145145 ; CHECK: declare i32 @llvm.objectsize.i32.p0i8
146146
147147
148 ; CHECK: declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
149 ; CHECK: declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
148 ; CHECK: declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
149 ; CHECK: declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
0 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
1
2 ; Check that remangling code doesn't fail on an intrinsic with wrong signature
3
4 ; CHECK: Attribute after last parameter!
5 ; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memset.i64
6 declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
7
8 ; CHECK: Attribute after last parameter!
9 ; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memcpy.i64
10 declare void @llvm.memcpy.i64(i8* nocapture, i8, i64) nounwind
11
12 ; CHECK: Attribute after last parameter!
13 ; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memmove.i64
14 declare void @llvm.memmove.i64(i8* nocapture, i8, i64) nounwind
0 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
1
2 ; CHECK: declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
3 declare void @llvm.test.immarg.intrinsic.i32(i32 immarg)
4
5 ; CHECK: declare void @llvm.test.immarg.intrinsic.f32(float immarg)
6 declare void @llvm.test.immarg.intrinsic.f32(float immarg)
7
8 ; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.i32(
9 define void @call_llvm.test.immarg.intrinsic.i32() {
10 ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
11 call void @llvm.test.immarg.intrinsic.i32(i32 0)
12
13 ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
14 call void @llvm.test.immarg.intrinsic.i32(i32 0)
15
16 ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1)
17 call void @llvm.test.immarg.intrinsic.i32(i32 1)
18
19 ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5)
20 call void @llvm.test.immarg.intrinsic.i32(i32 add (i32 2, i32 3))
21
22 ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0)
23 call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* null to i32))
24 ret void
25 }
26
27 ; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.f32(
28 define void @call_llvm.test.immarg.intrinsic.f32() {
29 ; CHECK: call void @llvm.test.immarg.intrinsic.f32(float 1.000000e+00)
30 call void @llvm.test.immarg.intrinsic.f32(float 1.0)
31 ret void
32 }
33
34 define void @on_callsite_and_declaration() {
35 ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
36 call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0)
37 ret void
38 }
0 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
1
2 ; CHECK: Attribute 'immarg' is incompatible with other attributes
3 declare void @llvm.immarg.byval(i32* byval immarg)
4
5 ; CHECK: Attribute 'immarg' is incompatible with other attributes
6 declare void @llvm.immarg.inalloca(i32* inalloca immarg)
7
8 ; CHECK: Attribute 'immarg' is incompatible with other attributes
9 declare void @llvm.immarg.inreg(i32 inreg immarg)
10
11 ; CHECK: Attribute 'immarg' is incompatible with other attributes
12 declare void @llvm.immarg.nest(i32* nest immarg)
13
14 ; CHECK: Attribute 'immarg' is incompatible with other attributes
15 declare void @llvm.immarg.sret(i32* sret immarg)
16
17 ; CHECK: Attribute 'immarg' is incompatible with other attributes
18 declare void @llvm.immarg.zeroext(i32 zeroext immarg)
19
20 ; CHECK: Attribute 'immarg' is incompatible with other attributes
21 declare void @llvm.immarg.signext(i32 signext immarg)
22
23 ; CHECK: Attribute 'immarg' is incompatible with other attributes
24 declare void @llvm.immarg.returned(i32 returned immarg)
25
26 ; CHECK: Attribute 'immarg' is incompatible with other attributes
27 declare void @llvm.immarg.noalias(i32* noalias immarg)
28
29 ; CHECK: Attribute 'immarg' is incompatible with other attributes
30 declare void @llvm.immarg.readnone(i32* readnone immarg)
31
32 ; CHECK: Attribute 'immarg' is incompatible with other attributes
33 declare void @llvm.immarg.readonly(i32* readonly immarg)
0 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
1
2 ; CHECK: error: invalid use of parameter-only attribute on a function
3 declare void @llvm.immarg.func() immarg
0 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
1
2 ; CHECK: error: invalid use of parameter-only attribute
3 declare immarg i32 @llvm.immarg.retattr(i32)
16801680 ret i8** getelementptr inbounds ({ [4 x i8*], [4 x i8*] }, { [4 x i8*], [4 x i8*] }* null, i32 0, inrange i32 1, i32 2)
16811681 }
16821682
1683 ; immarg attribute
1684 declare void @llvm.test.immarg.intrinsic(i32 immarg)
1685 ; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
1686
16831687 ; CHECK: attributes #0 = { alignstack=4 }
16841688 ; CHECK: attributes #1 = { alignstack=8 }
16851689 ; CHECK: attributes #2 = { alwaysinline }
88 }
99
1010 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1)
11 ; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
11 ; CHECK: declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg)
2626 ret void
2727 }
2828
29 ; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
30 ; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
31 ; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
29 ; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
30 ; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg)
31 ; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg)
3232 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
3333 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1)
3434 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
6969
7070 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
7171 ; GCN-NOT: store_dword
72 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
73 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
72 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 {
73 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
7474 %bc = bitcast i64 %undef to <2 x i32>
7575 store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out
7676 ret void
7878
7979 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
8080 ; GCN-NOT: store_dword
81 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 {
82 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1
81 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 {
82 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
8383 %bc = bitcast i64 %undef to <2 x i32>
8484 %elt1 = extractelement <2 x i32> %bc, i32 1
8585 store volatile i32 %elt1, i32 addrspace(1)* %out
1010 declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
1111
1212 declare i32 @llvm.amdgcn.workitem.id.x() #1
13
14 ; Make sure no crash on invalid non-constant
15 ; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32:
16 ; CIVI-DAG: s_mov_b32 m0
17 ; GFX9-NOT: m0
18 define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 {
19 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false)
20 store i32 %result, i32 addrspace(1)* %out
21 ret void
22 }
23
24 ; Make sure no crash on invalid non-constant
25 ; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32:
26 ; CIVI-DAG: s_mov_b32 m0
27 ; GFX9-NOT: m0
28 define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 {
29 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false)
30 store i32 %result, i32 addrspace(1)* %out
31 ret void
32 }
33
34 ; Make sure no crash on invalid non-constant
35 ; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32:
36 define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 {
37 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var)
38 store i32 %result, i32 addrspace(1)* %out
39 ret void
40 }
4113
4214 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
4315 ; CIVI-DAG: s_mov_b32 m0
394394 ret void
395395 }
396396
397 ; Undefined selector gets deleted
398 ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_undef:
399 ; SI-NOT: v_div_scale
400 define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) #0 {
401 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef)
402 %result0 = extractvalue { float, i1 } %result, 0
403 store float %result0, float addrspace(1)* %out, align 4
404 ret void
405 }
406
407 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_undef:
408 ; SI-NOT: v_div_scale
409 define amdgpu_kernel void @test_div_scale_f32_undef_undef_undef(float addrspace(1)* %out) #0 {
410 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 undef)
411 %result0 = extractvalue { float, i1 } %result, 0
412 store float %result0, float addrspace(1)* %out, align 4
413 ret void
414 }
415
416 ; SI-LABEL: {{^}}test_div_scale_f32_val_val_undef:
417 ; SI-NOT: v_div_scale
418 define amdgpu_kernel void @test_div_scale_f32_val_val_undef(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
419 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
420 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
421 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
422
423 %a = load volatile float, float addrspace(1)* %gep.0, align 4
424 %b = load volatile float, float addrspace(1)* %gep.1, align 4
425
426 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 undef)
427 %result0 = extractvalue { float, i1 } %result, 0
428 store float %result0, float addrspace(1)* %out, align 4
429 ret void
430 }
431
432397 ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
433398 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
434399 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
66
77 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
88 declare half @llvm.fabs.f16(half) #0
9
10 ; GCN-LABEL: {{^}}v_fcmp_f32_dynamic_cc:
11 ; GCN: s_endpgm
12 define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %src0, float %src1, i32 %cc) {
13 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src0, float %src1, i32 %cc)
14 store i64 %result, i64 addrspace(1)* %out
15 ret void
16 }
179
1810 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
1911 ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}|
55 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
66 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
77
8 ; No crash on invalid input
9 ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
10 ; GCN: s_endpgm
11 define amdgpu_kernel void @v_icmp_i32_dynamic_cc(i64 addrspace(1)* %out, i32 %src, i32 %cc) {
12 %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 %cc)
13 store i64 %result, i64 addrspace(1)* %out
14 ret void
15 }
16
178 ; GCN-LABEL: {{^}}v_icmp_i32_eq:
189 ; GCN: v_cmp_eq_u32_e64
1910 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
180171 ret void
181172 }
182173
183 ; GCN-LABEL: {{^}}v_icmp_i16_dynamic_cc:
184 ; GCN: s_endpgm
185 define amdgpu_kernel void @v_icmp_i16_dynamic_cc(i64 addrspace(1)* %out, i16 %src, i32 %cc) {
186 %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 %cc)
187 store i64 %result, i64 addrspace(1)* %out
188 ret void
189 }
190
191 ; GCN-LABEL: {{^}}v_icmp_i16_eq:
192174 ; VI: v_cmp_eq_u16_e64
193175
194176 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
6666 declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
6767 declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
6868 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
69 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i1, i1) #1
7069
7170 attributes #0 = { nounwind }
7271 attributes #1 = { nounwind readonly }
73
74
121121 declare i8* @__memset_to_buf(i64, i8*, i32, i64) local_unnamed_addr
122122
123123 declare i8* @memset(i8*, i32, i64) local_unnamed_addr
124
125 ; Function Attrs: nounwind readnone speculatable
126 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
127124
128125 ; Function Attrs: nounwind readnone speculatable
129126 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
1818 }
1919
2020 declare void @llvm.memset.p0struct.rtx_def.i32(%struct.rtx_def*, i8, i32, i1)
21
22 ; Check that remangling code doesn't fail on an intrinsic with wrong signature
23 declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind
124124 ret i64 %ret
125125 }
126126
127 ; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
128 ; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
129 ; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
130 define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
131 %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
132 %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
133 ret i64 %ret
134 }
135
136127 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1
137128 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) #1
138129 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
319319 ; --------------------------------------------------------------------
320320
321321 ; CHECK-LABEL: @raw_buffer_load_f32(
322 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
323 ; CHECK-NEXT: ret float %data
324 define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
325 %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
322 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
323 ; CHECK-NEXT: ret float %data
324 define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
325 %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
326326 ret float %data
327327 }
328328
329329 ; CHECK-LABEL: @raw_buffer_load_v1f32(
330 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
330 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
331331 ; CHECK-NEXT: ret <1 x float> %data
332 define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
333 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
332 define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
333 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
334334 ret <1 x float> %data
335335 }
336336
337337 ; CHECK-LABEL: @raw_buffer_load_v2f32(
338 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
338 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
339339 ; CHECK-NEXT: ret <2 x float> %data
340 define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
341 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
340 define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
341 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
342342 ret <2 x float> %data
343343 }
344344
345345 ; CHECK-LABEL: @raw_buffer_load_v4f32(
346 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
346 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
347347 ; CHECK-NEXT: ret <4 x float> %data
348 define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
349 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
348 define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
349 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
350350 ret <4 x float> %data
351351 }
352352
353353 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32(
354 ; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
355 ; CHECK-NEXT: ret float %data
356 define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
357 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
354 ; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
355 ; CHECK-NEXT: ret float %data
356 define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
357 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
358358 %elt0 = extractelement <2 x float> %data, i32 0
359359 ret float %elt0
360360 }
361361
362362 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32(
363 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
363 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
364364 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
365365 ; CHECK-NEXT: ret float %elt1
366 define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
367 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
366 define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
367 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
368368 %elt1 = extractelement <2 x float> %data, i32 1
369369 ret float %elt1
370370 }
371371
372372 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32(
373 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
374 ; CHECK-NEXT: ret float %data
375 define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
376 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
373 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
374 ; CHECK-NEXT: ret float %data
375 define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
376 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
377377 %elt0 = extractelement <4 x float> %data, i32 0
378378 ret float %elt0
379379 }
380380
381381 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32(
382 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
382 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
383383 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
384384 ; CHECK-NEXT: ret float %elt1
385 define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
386 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
385 define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
386 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
387387 %elt1 = extractelement <4 x float> %data, i32 1
388388 ret float %elt1
389389 }
390390
391391 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32(
392 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
392 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
393393 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
394394 ; CHECK-NEXT: ret float %elt1
395 define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
396 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
395 define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
396 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
397397 %elt1 = extractelement <4 x float> %data, i32 2
398398 ret float %elt1
399399 }
400400
401401 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32(
402 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
402 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
403403 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
404404 ; CHECK-NEXT: ret float %elt1
405 define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
406 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
405 define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
406 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
407407 %elt1 = extractelement <4 x float> %data, i32 3
408408 ret float %elt1
409409 }
410410
411411 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32(
412 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
412 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
413413 ; CHECK-NEXT: ret <2 x float>
414 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
415 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
414 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
415 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
416416 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
417417 ret <2 x float> %shuf
418418 }
419419
420420 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32(
421 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
421 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
422422 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
423423 ; CHECK-NEXT: ret <2 x float> %shuf
424 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
425 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
424 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
425 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
426426 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
427427 ret <2 x float> %shuf
428428 }
429429
430430 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32(
431 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
431 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
432432 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
433433 ; CHECK-NEXT: ret <2 x float> %shuf
434 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
435 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
434 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
435 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
436436 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
437437 ret <2 x float> %shuf
438438 }
439439
440440 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(
441 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
441 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
442442 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
443443 ; CHECK-NEXT: ret <3 x float> %shuf
444 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
445 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
444 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
445 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
446446 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
447447 ret <3 x float> %shuf
448448 }
449449
450450 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(
451 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
451 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
452452 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
453453 ; CHECK-NEXT: ret <3 x float> %shuf
454 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
455 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
454 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
455 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
456456 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
457457 ret <3 x float> %shuf
458458 }
459459
460460 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(
461 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
461 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
462462 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
463463 ; CHECK-NEXT: ret <3 x float> %shuf
464 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
465 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
464 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
465 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
466466 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
467467 ret <3 x float> %shuf
468468 }
469469
470470 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32(
471 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
472 ; CHECK-NEXT: ret float %data
473 define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
474 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
471 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
472 ; CHECK-NEXT: ret float %data
473 define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
474 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
475475 %elt0 = extractelement <3 x float> %data, i32 0
476476 ret float %elt0
477477 }
478478
479479 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32(
480 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
480 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
481481 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
482482 ; CHECK-NEXT: ret float %elt1
483 define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
484 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
483 define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
484 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
485485 %elt1 = extractelement <3 x float> %data, i32 1
486486 ret float %elt1
487487 }
488488
489489 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32(
490 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
490 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
491491 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
492492 ; CHECK-NEXT: ret float %elt1
493 define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
494 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
493 define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
494 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
495495 %elt1 = extractelement <3 x float> %data, i32 2
496496 ret float %elt1
497497 }
498498
499499 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32(
500 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
500 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
501501 ; CHECK-NEXT: ret <2 x float>
502 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
503 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
502 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
503 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
504504 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
505505 ret <2 x float> %shuf
506506 }
507507
508508 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32(
509 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
509 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
510510 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
511511 ; CHECK-NEXT: ret <2 x float> %shuf
512 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
513 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
512 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
513 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
514514 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
515515 ret <2 x float> %shuf
516516 }
517517
518518 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32(
519 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
519 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
520520 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
521521 ; CHECK-NEXT: ret i32 %tmp2
522 define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
523 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
522 define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
523 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
524524 %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
525525 %tmp2 = extractelement <4 x i32> %tmp1, i32 0
526526 ret i32 %tmp2
527527 }
528528
529529 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32(
530 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
530 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
531531 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
532532 ; CHECK-NEXT: ret float %tmp2
533 define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
534 %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
533 define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
534 %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
535535 %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
536536 %tmp2 = extractelement <4 x float> %tmp1, i32 0
537537 ret float %tmp2
538538 }
539539
540540 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(
541 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
542 ; CHECK-NEXT: ret float %data
543 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
544 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
541 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
542 ; CHECK-NEXT: ret float %data
543 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
544 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
545545 %elt0 = extractelement <2 x float> %data, i32 0
546546 ret float %elt0
547547 }
559559 ; --------------------------------------------------------------------
560560
561561 ; CHECK-LABEL: @raw_buffer_load_format_f32(
562 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
563 ; CHECK-NEXT: ret float %data
564 define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
565 %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
562 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
563 ; CHECK-NEXT: ret float %data
564 define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
565 %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
566566 ret float %data
567567 }
568568
569569 ; CHECK-LABEL: @raw_buffer_load_format_v1f32(
570 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
570 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
571571 ; CHECK-NEXT: ret <1 x float> %data
572 define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
573 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
572 define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
573 %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
574574 ret <1 x float> %data
575575 }
576576
577577 ; CHECK-LABEL: @raw_buffer_load_format_v2f32(
578 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
578 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
579579 ; CHECK-NEXT: ret <2 x float> %data
580 define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
581 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
580 define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
581 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
582582 ret <2 x float> %data
583583 }
584584
585585 ; CHECK-LABEL: @raw_buffer_load_format_v4f32(
586 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
586 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
587587 ; CHECK-NEXT: ret <4 x float> %data
588 define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
589 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
588 define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
589 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
590590 ret <4 x float> %data
591591 }
592592
593593 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32(
594 ; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
595 ; CHECK-NEXT: ret float %data
596 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
597 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
594 ; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
595 ; CHECK-NEXT: ret float %data
596 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
597 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
598598 %elt0 = extractelement <2 x float> %data, i32 0
599599 ret float %elt0
600600 }
601601
602602 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32(
603 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
603 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
604604 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
605605 ; CHECK-NEXT: ret float %elt1
606 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
607 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
606 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
607 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
608608 %elt1 = extractelement <2 x float> %data, i32 1
609609 ret float %elt1
610610 }
611611
612612 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32(
613 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
614 ; CHECK-NEXT: ret float %data
615 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
616 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
613 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
614 ; CHECK-NEXT: ret float %data
615 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
616 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
617617 %elt0 = extractelement <4 x float> %data, i32 0
618618 ret float %elt0
619619 }
620620
621621 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32(
622 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
622 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
623623 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
624624 ; CHECK-NEXT: ret float %elt1
625 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
626 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
625 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
626 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
627627 %elt1 = extractelement <4 x float> %data, i32 1
628628 ret float %elt1
629629 }
630630
631631 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32(
632 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
632 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
633633 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
634634 ; CHECK-NEXT: ret float %elt1
635 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
636 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
635 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
636 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
637637 %elt1 = extractelement <4 x float> %data, i32 2
638638 ret float %elt1
639639 }
640640
641641 ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32(
642 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
642 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
643643 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
644644 ; CHECK-NEXT: ret float %elt1
645 define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
646 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
645 define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
646 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
647647 %elt1 = extractelement <4 x float> %data, i32 3
648648 ret float %elt1
649649 }
650650
651651 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32(
652 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
652 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
653653 ; CHECK-NEXT: ret <2 x float>
654 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
655 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
654 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
655 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
656656 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
657657 ret <2 x float> %shuf
658658 }
659659
660660 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32(
661 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
661 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
662662 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
663663 ; CHECK-NEXT: ret <2 x float> %shuf
664 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
665 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
664 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
665 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
666666 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
667667 ret <2 x float> %shuf
668668 }
669669
670670 ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32(
671 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
671 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
672672 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
673673 ; CHECK-NEXT: ret <2 x float> %shuf
674 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
675 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
674 define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
675 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
676676 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
677677 ret <2 x float> %shuf
678678 }
679679
680680 ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(
681 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
681 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
682682 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
683683 ; CHECK-NEXT: ret <3 x float> %shuf
684 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
685 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
684 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
685 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
686686 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
687687 ret <3 x float> %shuf
688688 }
689689
690690 ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(
691 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
691 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
692692 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
693693 ; CHECK-NEXT: ret <3 x float> %shuf
694 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
695 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
694 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
695 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
696696 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
697697 ret <3 x float> %shuf
698698 }
699699
700700 ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(
701 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
701 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
702702 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
703703 ; CHECK-NEXT: ret <3 x float> %shuf
704 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
705 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
704 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
705 %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
706706 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
707707 ret <3 x float> %shuf
708708 }
709709
710710 ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32(
711 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
712 ; CHECK-NEXT: ret float %data
713 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
714 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
711 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
712 ; CHECK-NEXT: ret float %data
713 define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
714 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
715715 %elt0 = extractelement <3 x float> %data, i32 0
716716 ret float %elt0
717717 }
718718
719719 ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32(
720 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
720 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
721721 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
722722 ; CHECK-NEXT: ret float %elt1
723 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
724 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
723 define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
724 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
725725 %elt1 = extractelement <3 x float> %data, i32 1
726726 ret float %elt1
727727 }
728728
729729 ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32(
730 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
730 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
731731 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
732732 ; CHECK-NEXT: ret float %elt1
733 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
734 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
733 define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
734 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
735735 %elt1 = extractelement <3 x float> %data, i32 2
736736 ret float %elt1
737737 }
738738
739739 ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32(
740 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
740 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
741741 ; CHECK-NEXT: ret <2 x float>
742 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
743 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
742 define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
743 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
744744 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
745745 ret <2 x float> %shuf
746746 }
747747
748748 ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32(
749 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
749 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
750750 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
751751 ; CHECK-NEXT: ret <2 x float> %shuf
752 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
753 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
752 define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
753 %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
754754 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
755755 ret <2 x float> %shuf
756756 }
757757
758758 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32(
759 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
759 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
760760 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
761761 ; CHECK-NEXT: ret i32 %tmp2
762 define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
763 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
762 define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
763 %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
764764 %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
765765 %tmp2 = extractelement <4 x i32> %tmp1, i32 0
766766 ret i32 %tmp2
767767 }
768768
769769 ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32(
770 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
770 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
771771 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
772772 ; CHECK-NEXT: ret float %tmp2
773 define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
774 %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent)
773 define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
774 %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0)
775775 %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
776776 %tmp2 = extractelement <4 x float> %tmp1, i32 0
777777 ret float %tmp2
778778 }
779779
780780 ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(
781 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
782 ; CHECK-NEXT: ret float %data
783 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
784 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
781 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
782 ; CHECK-NEXT: ret float %data
783 define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 {
784 %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
785785 %elt0 = extractelement <2 x float> %data, i32 0
786786 ret float %elt0
787787 }
799799 ; --------------------------------------------------------------------
800800
801801 ; CHECK-LABEL: @struct_buffer_load_f32(
802 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
803 ; CHECK-NEXT: ret float %data
804 define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
805 %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
802 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
803 ; CHECK-NEXT: ret float %data
804 define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
805 %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
806806 ret float %data
807807 }
808808
809809 ; CHECK-LABEL: @struct_buffer_load_v1f32(
810 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
810 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
811811 ; CHECK-NEXT: ret <1 x float> %data
812 define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
813 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
812 define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
813 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
814814 ret <1 x float> %data
815815 }
816816
817817 ; CHECK-LABEL: @struct_buffer_load_v2f32(
818 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
818 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
819819 ; CHECK-NEXT: ret <2 x float> %data
820 define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
821 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
820 define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
821 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
822822 ret <2 x float> %data
823823 }
824824
825825 ; CHECK-LABEL: @struct_buffer_load_v4f32(
826 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
826 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
827827 ; CHECK-NEXT: ret <4 x float> %data
828 define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
829 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
828 define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
829 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
830830 ret <4 x float> %data
831831 }
832832
833833 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32(
834 ; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
835 ; CHECK-NEXT: ret float %data
836 define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
837 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
834 ; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
835 ; CHECK-NEXT: ret float %data
836 define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
837 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
838838 %elt0 = extractelement <2 x float> %data, i32 0
839839 ret float %elt0
840840 }
841841
842842 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32(
843 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
843 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
844844 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
845845 ; CHECK-NEXT: ret float %elt1
846 define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
847 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
846 define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
847 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
848848 %elt1 = extractelement <2 x float> %data, i32 1
849849 ret float %elt1
850850 }
851851
852852 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32(
853 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
854 ; CHECK-NEXT: ret float %data
855 define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
856 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
853 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
854 ; CHECK-NEXT: ret float %data
855 define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
856 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
857857 %elt0 = extractelement <4 x float> %data, i32 0
858858 ret float %elt0
859859 }
860860
861861 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32(
862 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
862 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
863863 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
864864 ; CHECK-NEXT: ret float %elt1
865 define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
866 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
865 define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
866 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
867867 %elt1 = extractelement <4 x float> %data, i32 1
868868 ret float %elt1
869869 }
870870
871871 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32(
872 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
872 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
873873 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
874874 ; CHECK-NEXT: ret float %elt1
875 define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
876 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
875 define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
876 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
877877 %elt1 = extractelement <4 x float> %data, i32 2
878878 ret float %elt1
879879 }
880880
881881 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32(
882 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
882 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
883883 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
884884 ; CHECK-NEXT: ret float %elt1
885 define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
886 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
885 define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
886 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
887887 %elt1 = extractelement <4 x float> %data, i32 3
888888 ret float %elt1
889889 }
890890
891891 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32(
892 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
892 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
893893 ; CHECK-NEXT: ret <2 x float>
894 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
895 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
894 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
895 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
896896 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
897897 ret <2 x float> %shuf
898898 }
899899
900900 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32(
901 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
901 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
902902 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
903903 ; CHECK-NEXT: ret <2 x float> %shuf
904 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
905 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
904 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
905 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
906906 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
907907 ret <2 x float> %shuf
908908 }
909909
910910 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32(
911 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
911 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
912912 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
913913 ; CHECK-NEXT: ret <2 x float> %shuf
914 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
915 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
914 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
915 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
916916 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
917917 ret <2 x float> %shuf
918918 }
919919
920920 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(
921 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
921 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
922922 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
923923 ; CHECK-NEXT: ret <3 x float> %shuf
924 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
925 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
924 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
925 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
926926 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
927927 ret <3 x float> %shuf
928928 }
929929
930930 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(
931 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
931 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
932932 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
933933 ; CHECK-NEXT: ret <3 x float> %shuf
934 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
935 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
934 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
935 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
936936 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
937937 ret <3 x float> %shuf
938938 }
939939
940940 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(
941 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
941 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
942942 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
943943 ; CHECK-NEXT: ret <3 x float> %shuf
944 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
945 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
944 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
945 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
946946 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
947947 ret <3 x float> %shuf
948948 }
949949
950950 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32(
951 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
952 ; CHECK-NEXT: ret float %data
953 define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
954 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
951 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
952 ; CHECK-NEXT: ret float %data
953 define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
954 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
955955 %elt0 = extractelement <3 x float> %data, i32 0
956956 ret float %elt0
957957 }
958958
959959 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32(
960 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
960 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
961961 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
962962 ; CHECK-NEXT: ret float %elt1
963 define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
964 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
963 define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
964 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
965965 %elt1 = extractelement <3 x float> %data, i32 1
966966 ret float %elt1
967967 }
968968
969969 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32(
970 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
970 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
971971 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
972972 ; CHECK-NEXT: ret float %elt1
973 define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
974 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
973 define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
974 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
975975 %elt1 = extractelement <3 x float> %data, i32 2
976976 ret float %elt1
977977 }
978978
979979 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32(
980 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
980 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
981981 ; CHECK-NEXT: ret <2 x float>
982 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
983 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
982 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
983 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
984984 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
985985 ret <2 x float> %shuf
986986 }
987987
988988 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32(
989 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
989 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
990990 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
991991 ; CHECK-NEXT: ret <2 x float> %shuf
992 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
993 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
992 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
993 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
994994 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
995995 ret <2 x float> %shuf
996996 }
997997
998998 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32(
999 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
999 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10001000 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
10011001 ; CHECK-NEXT: ret i32 %tmp2
1002 define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1003 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1002 define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1003 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10041004 %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
10051005 %tmp2 = extractelement <4 x i32> %tmp1, i32 0
10061006 ret i32 %tmp2
10071007 }
10081008
10091009 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32(
1010 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1010 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10111011 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
10121012 ; CHECK-NEXT: ret float %tmp2
1013 define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1014 %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1013 define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1014 %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10151015 %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
10161016 %tmp2 = extractelement <4 x float> %tmp1, i32 0
10171017 ret float %tmp2
10181018 }
10191019
10201020 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(
1021 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
1022 ; CHECK-NEXT: ret float %data
1023 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1024 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
1021 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1022 ; CHECK-NEXT: ret float %data
1023 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1024 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
10251025 %elt0 = extractelement <2 x float> %data, i32 0
10261026 ret float %elt0
10271027 }
10391039 ; --------------------------------------------------------------------
10401040
10411041 ; CHECK-LABEL: @struct_buffer_load_format_f32(
1042 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1043 ; CHECK-NEXT: ret float %data
1044 define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1045 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1042 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1043 ; CHECK-NEXT: ret float %data
1044 define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1045 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10461046 ret float %data
10471047 }
10481048
10491049 ; CHECK-LABEL: @struct_buffer_load_format_v1f32(
1050 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1050 ; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10511051 ; CHECK-NEXT: ret <1 x float> %data
1052 define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1053 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1052 define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1053 %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10541054 ret <1 x float> %data
10551055 }
10561056
10571057 ; CHECK-LABEL: @struct_buffer_load_format_v2f32(
1058 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1058 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10591059 ; CHECK-NEXT: ret <2 x float> %data
1060 define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1061 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1060 define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1061 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10621062 ret <2 x float> %data
10631063 }
10641064
10651065 ; CHECK-LABEL: @struct_buffer_load_format_v4f32(
1066 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1066 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10671067 ; CHECK-NEXT: ret <4 x float> %data
1068 define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1069 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1068 define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1069 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10701070 ret <4 x float> %data
10711071 }
10721072
10731073 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32(
1074 ; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1075 ; CHECK-NEXT: ret float %data
1076 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1077 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1074 ; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1075 ; CHECK-NEXT: ret float %data
1076 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1077 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10781078 %elt0 = extractelement <2 x float> %data, i32 0
10791079 ret float %elt0
10801080 }
10811081
10821082 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32(
1083 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1083 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10841084 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
10851085 ; CHECK-NEXT: ret float %elt1
1086 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1087 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1086 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1087 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10881088 %elt1 = extractelement <2 x float> %data, i32 1
10891089 ret float %elt1
10901090 }
10911091
10921092 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32(
1093 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1094 ; CHECK-NEXT: ret float %data
1095 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1096 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1093 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1094 ; CHECK-NEXT: ret float %data
1095 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1096 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
10971097 %elt0 = extractelement <4 x float> %data, i32 0
10981098 ret float %elt0
10991099 }
11001100
11011101 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32(
1102 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1102 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11031103 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
11041104 ; CHECK-NEXT: ret float %elt1
1105 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1106 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1105 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1106 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11071107 %elt1 = extractelement <4 x float> %data, i32 1
11081108 ret float %elt1
11091109 }
11101110
11111111 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32(
1112 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1112 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11131113 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2
11141114 ; CHECK-NEXT: ret float %elt1
1115 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1116 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1115 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1116 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11171117 %elt1 = extractelement <4 x float> %data, i32 2
11181118 ret float %elt1
11191119 }
11201120
11211121 ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32(
1122 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1122 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11231123 ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3
11241124 ; CHECK-NEXT: ret float %elt1
1125 define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1126 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1125 define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1126 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11271127 %elt1 = extractelement <4 x float> %data, i32 3
11281128 ret float %elt1
11291129 }
11301130
11311131 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32(
1132 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1132 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11331133 ; CHECK-NEXT: ret <2 x float>
1134 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1135 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1134 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1135 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11361136 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
11371137 ret <2 x float> %shuf
11381138 }
11391139
11401140 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32(
1141 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1141 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11421142 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
11431143 ; CHECK-NEXT: ret <2 x float> %shuf
1144 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1145 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1144 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1145 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11461146 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
11471147 ret <2 x float> %shuf
11481148 }
11491149
11501150 ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32(
1151 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1151 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11521152 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
11531153 ; CHECK-NEXT: ret <2 x float> %shuf
1154 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1155 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1154 define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1155 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11561156 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32>
11571157 ret <2 x float> %shuf
11581158 }
11591159
11601160 ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(
1161 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1161 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11621162 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11631163 ; CHECK-NEXT: ret <3 x float> %shuf
1164 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1165 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1164 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1165 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11661166 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11671167 ret <3 x float> %shuf
11681168 }
11691169
11701170 ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(
1171 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1171 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11721172 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11731173 ; CHECK-NEXT: ret <3 x float> %shuf
1174 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1175 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1174 define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1175 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11761176 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11771177 ret <3 x float> %shuf
11781178 }
11791179
11801180 ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(
1181 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1181 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11821182 ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11831183 ; CHECK-NEXT: ret <3 x float> %shuf
1184 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1185 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1184 define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1185 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11861186 %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32>
11871187 ret <3 x float> %shuf
11881188 }
11891189
11901190 ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32(
1191 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1192 ; CHECK-NEXT: ret float %data
1193 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1194 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1191 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
1192 ; CHECK-NEXT: ret float %data
1193 define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1194 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
11951195 %elt0 = extractelement <3 x float> %data, i32 0
11961196 ret float %elt0
11971197 }
11981198
11991199 ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32(
1200 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1200 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12011201 ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1
12021202 ; CHECK-NEXT: ret float %elt1
1203 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1204 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1203 define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1204 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12051205 %elt1 = extractelement <3 x float> %data, i32 1
12061206 ret float %elt1
12071207 }
12081208
12091209 ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32(
1210 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1210 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12111211 ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2
12121212 ; CHECK-NEXT: ret float %elt1
1213 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1214 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1213 define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1214 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12151215 %elt1 = extractelement <3 x float> %data, i32 2
12161216 ret float %elt1
12171217 }
12181218
12191219 ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32(
1220 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1220 ; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12211221 ; CHECK-NEXT: ret <2 x float>
1222 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1223 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1222 define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1223 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12241224 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
12251225 ret <2 x float> %shuf
12261226 }
12271227
12281228 ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32(
1229 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1229 ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12301230 ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
12311231 ; CHECK-NEXT: ret <2 x float> %shuf
1232 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1233 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1232 define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1233 %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12341234 %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32>
12351235 ret <2 x float> %shuf
12361236 }
12371237
12381238 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32(
1239 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1239 ; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12401240 ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32
12411241 ; CHECK-NEXT: ret i32 %tmp2
1242 define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1243 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1242 define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1243 %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12441244 %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
12451245 %tmp2 = extractelement <4 x i32> %tmp1, i32 0
12461246 ret i32 %tmp2
12471247 }
12481248
12491249 ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32(
1250 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1250 ; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12511251 ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float
12521252 ; CHECK-NEXT: ret float %tmp2
1253 define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1254 %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent)
1253 define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1254 %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0)
12551255 %tmp1 = bitcast <4 x i32> %tmp to <4 x float>
12561256 %tmp2 = extractelement <4 x float> %tmp1, i32 0
12571257 ret float %tmp2
12581258 }
12591259
12601260 ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(
1261 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
1262 ; CHECK-NEXT: ret float %data
1263 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 {
1264 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0
1261 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
1262 ; CHECK-NEXT: ret float %data
1263 define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 {
1264 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0
12651265 %elt0 = extractelement <2 x float> %data, i32 0
12661266 ret float %elt0
12671267 }
13181318 ret float %elt0
13191319 }
13201320
1321 ; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(
1322 ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
1323 ; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0
1324 ; CHECK-NEXT: ret float %elt0
1325 define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 {
1326 %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
1327 %elt0 = extractelement <4 x float> %data, i32 0
1328 ret float %elt0
1329 }
1330
13311321 ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(
13321322 ; CHECK-NEXT: ret float undef
13331323 define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 {
23942384 ret float %elt0
23952385 }
23962386
2397 ; Verify that we don't creash on non-constant operand.
2398 define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr {
2399 %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1
2400 ret <4 x half> %tmp
2401 }
2402
24032387 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
2404 declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
24052388
24062389 ; --------------------------------------------------------------------
24072390 ; TFE / LWE
10771077 ; llvm.amdgcn.exp
10781078 ; --------------------------------------------------------------------
10791079
1080 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
1081
1082 ; Make sure no crashing on invalid variable params
1083 ; CHECK-LABEL: @exp_invalid_inputs(
1084 ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
1085 ; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
1086 define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
1087 call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1088 call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
1089 ret void
1090 }
1080 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly
10911081
10921082 ; CHECK-LABEL: @exp_disabled_inputs_to_undef(
10931083 ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
11351125 ; llvm.amdgcn.exp.compr
11361126 ; --------------------------------------------------------------------
11371127
1138 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
1139
1140 ; CHECK-LABEL: @exp_compr_invalid_inputs(
1141 ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false)
1142 ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false)
1143 define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
1144 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false)
1145 call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false)
1146 ret void
1147 }
1128 declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly
11481129
11491130 ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
11501131 ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
14031384 ; llvm.amdgcn.icmp
14041385 ; --------------------------------------------------------------------
14051386
1406 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
1407 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
1408 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
1409
1410 ; Make sure there's no crash for invalid input
1411 ; CHECK-LABEL: @invalid_nonconstant_icmp_code(
1412 ; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
1413 define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
1414 %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
1415 ret i64 %result
1416 }
1387 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent
1388 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent
1389 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent
14171390
14181391 ; CHECK-LABEL: @invalid_icmp_code(
14191392 ; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
20111984 ; llvm.amdgcn.fcmp
20121985 ; --------------------------------------------------------------------
20131986
2014 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
2015
2016 ; Make sure there's no crash for invalid input
2017 ; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
2018 ; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
2019 define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
2020 %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
2021 ret i64 %result
2022 }
1987 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent
20231988
20241989 ; CHECK-LABEL: @invalid_fcmp_code(
20251990 ; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
66 store i64 %arg, i64* %tmp, align 8
77 %tmp1 = load i64, i64* %tmp, align 8
88 %tmp2 = load i64, i64* %tmp, align 8
9 %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2)
9 %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 123)
1010 ret i64 %tmp3
1111 }
1212
1313 ; Function Attrs: nounwind readnone
14 declare i64 @llvm.expect.i64(i64, i64)
14 declare i64 @llvm.expect.i64(i64, i64 immarg)
1515
1616
1717 !llvm.module.flags = !{!0}
0 ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
1
2 declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1)
3 define void @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i1 %bool) {
4 ; CHECK: immarg operand has non-immediate parameter
5 ; CHECK-NEXT: i1 %bool
6 ; CHECK-NEXT: %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
7 %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false)
8
9 ; CHECK: immarg operand has non-immediate parameter
10 ; CHECK-NEXT: i1 %bool
11 ; CHECK-NEXT: %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
12 %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool)
13 ret void
14 }
15
16 declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32)
17 define void @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
18 ; CHECK: immarg operand has non-immediate parameter
19 ; CHECK-NEXT: i32 %arg
20 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
21 %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
22 ret void
23 }
24
25 declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32)
26 define void @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) {
27 ; CHECK: immarg operand has non-immediate parameter
28 ; CHECK-NEXT: i32 %arg
29 ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
30 %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg)
31 ret void
32 }
33
34 declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32)
35 define void @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
36 ; CHECK: immarg operand has non-immediate parameter
37 ; CHECK-NEXT: i32 %arg
38 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
39 %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
40 ret void
41 }
42
43 declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32)
44 define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) {
45 ; CHECK: immarg operand has non-immediate parameter
46 ; CHECK-NEXT: i32 %arg
47 ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
48 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg)
49 ret void
50 }
51
52 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
53 define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) {
54 ; CHECK: immarg operand has non-immediate parameter
55 ; CHECK-NEXT: i32 %dmask
56 ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
57 %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0)
58
59 ; CHECK: immarg operand has non-immediate parameter
60 ; CHECK-NEXT: i1 %bool
61 ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
62 %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0)
63
64 ; CHECK: immarg operand has non-immediate parameter
65 ; CHECK-NEXT: i32 %arg
66 ; CHECK-NEXT: %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
67 %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0)
68
69 ; CHECK: immarg operand has non-immediate parameter
70 ; CHECK-NEXT: i32 %arg
71 ; CHECK-NEXT: %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
72 %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg)
73 ret void
74 }
75
76 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
77 define void @exp_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
78 ; CHECK: immarg operand has non-immediate parameter
79 ; CHECK-NEXT: i32 %en
80 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
81 call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
82
83 ; CHECK: immarg operand has non-immediate parameter
84 ; CHECK-NEXT: i32 %tgt
85 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
86 call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
87
88 ; CHECK: immarg operand has non-immediate parameter
89 ; CHECK-NEXT: i1 %bool
90 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 %bool, i1 false)
91 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 %bool, i1 false)
92
93 ; CHECK: immarg operand has non-immediate parameter
94 ; CHECK-NEXT: i1 %bool
95 ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 %bool)
96 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 %bool)
97 ret void
98 }
99
100 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1)
101
102 define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
103 ; CHECK: immarg operand has non-immediate parameter
104 ; CHECK-NEXT: i32 %en
105 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false)
106 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false)
107
108 ; CHECK: immarg operand has non-immediate parameter
109 ; CHECK-NEXT: i32 %tgt
110 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false)
111 call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false)
112
113 ; CHECK: immarg operand has non-immediate parameter
114 ; CHECK-NEXT: i1 %bool
115 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 %bool, i1 false)
116 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 %bool, i1 false)
117
118 ; CHECK: immarg operand has non-immediate parameter
119 ; CHECK-NEXT: i1 %bool
120 ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 false, i1 %bool)
121 call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 false, i1 %bool)
122 ret void
123 }
124
125 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32)
126
127 define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
128 ; CHECK: immarg operand has non-immediate parameter
129 ; CHECK-NEXT: i32 %c
130 ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
131 %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
132 ret i64 %result
133 }
134
135 declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32)
136 define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
137 ; CHECK: immarg operand has non-immediate parameter
138 ; CHECK-NEXT: i32 %c
139