llvm.org GIT mirror llvm / 8b6a26c
Implement new way of expanding extloads. Now that the source and destination types can be specified, allow doing an expansion that doesn't use an EXTLOAD of the result type. Try to do a legal extload to an intermediate type and extend that if possible. This generalizes the special case custom lowering of extloads R600 has been using to work around this problem. This also happens to fix a bug that would incorrectly use more aligned loads than should be used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225925 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 5 years ago
11 changed file(s) with 1452 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
752752 LAST_LOADEXT_TYPE
753753 };
754754
755 NodeType getExtForLoadExtType(LoadExtType);
755 NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
756756
757757 //===--------------------------------------------------------------------===//
758758 /// ISD::CondCode enum - These are ordered carefully to make the bitfields
10891089 break;
10901090 }
10911091 case TargetLowering::Expand:
1092 if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0),
1093 SrcVT) && TLI.isTypeLegal(SrcVT)) {
1094 SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, LD->getMemOperand());
1095 unsigned ExtendOp;
1096 switch (ExtType) {
1097 case ISD::EXTLOAD:
1098 ExtendOp = (SrcVT.isFloatingPoint() ?
1099 ISD::FP_EXTEND : ISD::ANY_EXTEND);
1092 if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
1093 // If the source type is not legal, see if there is a legal extload to
1094 // an intermediate type that we can then extend further.
1095 EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
1096 if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
1097 TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
1098 // If we are loading a legal type, this is a non-extload followed by a
1099 // full extend.
1100 ISD::LoadExtType MidExtType =
1101 (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
1102
1103 SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
1104 SrcVT, LD->getMemOperand());
1105 unsigned ExtendOp =
1106 ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
1107 Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
1108 Chain = Load.getValue(1);
11001109 break;
1101 case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
1102 case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
1103 default: llvm_unreachable("Unexpected extend load type!");
11041110 }
1105 Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
1106 Chain = Load.getValue(1);
1107 break;
11081111 }
11091112
11101113 assert(!SrcVT.isVector() &&
233233 return true;
234234 }
235235
236 ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
236 ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
237237 switch (ExtType) {
238238 case ISD::EXTLOAD:
239 return ISD::ANY_EXTEND;
239 return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
240240 case ISD::SEXTLOAD:
241241 return ISD::SIGN_EXTEND;
242242 case ISD::ZEXTLOAD:
214214 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
215215 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
216216 setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
217
218 // There are no 64-bit extloads. These should be done as a 32-bit extload and
219 // an extension to 64-bit.
220 for (MVT VT : MVT::integer_valuetypes()) {
221 setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
222 setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
223 setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
224 }
217225
218226 for (MVT VT : MVT::integer_vector_valuetypes()) {
219227 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
14101418 ISD::LoadExtType ExtType = Load->getExtensionType();
14111419 EVT VT = Op.getValueType();
14121420 EVT MemVT = Load->getMemoryVT();
1413
1414 if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
1415 // We can do the extload to 32-bits, and then need to separately extend to
1416 // 64-bits.
1417
1418 SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
1419 Load->getChain(),
1420 Load->getBasePtr(),
1421 MemVT,
1422 Load->getMemOperand());
1423
1424 SDValue Ops[] = {
1425 DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
1426 ExtLoad32.getValue(1)
1427 };
1428
1429 return DAG.getMergeValues(Ops, DL);
1430 }
14311421
14321422 if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
14331423 assert(VT == MVT::i1 && "Only i1 non-extloads expected");
130130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
131131
132132 for (MVT VT : MVT::integer_valuetypes()) {
133 if (VT == MVT::i64)
134 continue;
135
133136 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
134 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
135 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
137 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
138 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
136139 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
137140
138141 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
140 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
142 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
143 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
141144 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
142145
143146 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
144 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
145 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
147 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
148 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
146149 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
147150 }
148151
2121 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
2222 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
2323 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
24 %load = load <2 x i8> addrspace(1)* %in, align 1
24 %load = load <2 x i8> addrspace(1)* %in, align 2
2525 %cvt = uitofp <2 x i8> %load to <2 x float>
2626 store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
2727 ret void
4242 }
4343
4444 ; SI-LABEL: {{^}}load_v4i8_to_v4f32:
45 ; We can't use buffer_load_dword here, because the load is byte aligned, and
46 ; buffer_load_dword requires dword alignment.
47 ; SI: buffer_load_ushort
48 ; SI: buffer_load_ushort
49 ; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
45 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
5046 ; SI-NOT: bfe
5147 ; SI-NOT: lshr
5248 ; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
5551 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
5652 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
5753 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
54 %load = load <4 x i8> addrspace(1)* %in, align 4
55 %cvt = uitofp <4 x i8> %load to <4 x float>
56 store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
57 ret void
58 }
59
60 ; This should not be adding instructions to shift into the correct
61 ; position in the word for the component.
62
63 ; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
64 ; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
65 ; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
66 ; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
67 ; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
68
69 ; SI: v_lshlrev_b32
70 ; SI: v_or_b32
71 ; SI: v_lshlrev_b32
72 ; SI: v_or_b32
73 ; SI: v_lshlrev_b32
74 ; SI: v_or_b32
75
76 ; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]]
77 ; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
78 ; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
79 ; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]]
80
81 ; SI-DAG: v_cvt_f32_ubyte0_e32
82 ; SI-DAG: v_cvt_f32_ubyte1_e32
83 ; SI-DAG: v_cvt_f32_ubyte2_e32
84 ; SI-DAG: v_cvt_f32_ubyte3_e32
85
86 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
87 define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
5888 %load = load <4 x i8> addrspace(1)* %in, align 1
5989 %cvt = uitofp <4 x i8> %load to <4 x float>
6090 store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2 ; FIXME: Evergreen broken
3
4 ; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
5 ; SI: buffer_load_ubyte
6 ; SI: buffer_store_dword
7 ; SI: s_endpgm
8 define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
9 %a = load i1 addrspace(1)* %in
10 %ext = zext i1 %a to i32
11 store i32 %ext, i32 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
16 ; SI: buffer_load_ubyte
17 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
18 ; SI: buffer_store_dword
19 ; SI: s_endpgm
20 define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
21 %a = load i1 addrspace(1)* %in
22 %ext = sext i1 %a to i32
23 store i32 %ext, i32 addrspace(1)* %out
24 ret void
25 }
26
27 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
28 ; SI: s_endpgm
29 define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
30 %load = load <1 x i1> addrspace(1)* %in
31 %ext = zext <1 x i1> %load to <1 x i32>
32 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
33 ret void
34 }
35
36 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
37 ; SI: s_endpgm
38 define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
39 %load = load <1 x i1> addrspace(1)* %in
40 %ext = sext <1 x i1> %load to <1 x i32>
41 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
42 ret void
43 }
44
45 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
46 ; SI: s_endpgm
47 define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
48 %load = load <2 x i1> addrspace(1)* %in
49 %ext = zext <2 x i1> %load to <2 x i32>
50 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
51 ret void
52 }
53
54 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
55 ; SI: s_endpgm
56 define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
57 %load = load <2 x i1> addrspace(1)* %in
58 %ext = sext <2 x i1> %load to <2 x i32>
59 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
60 ret void
61 }
62
63 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
64 ; SI: s_endpgm
65 define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
66 %load = load <4 x i1> addrspace(1)* %in
67 %ext = zext <4 x i1> %load to <4 x i32>
68 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
69 ret void
70 }
71
72 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
73 ; SI: s_endpgm
74 define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
75 %load = load <4 x i1> addrspace(1)* %in
76 %ext = sext <4 x i1> %load to <4 x i32>
77 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
78 ret void
79 }
80
81 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
82 ; SI: s_endpgm
83 define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
84 %load = load <8 x i1> addrspace(1)* %in
85 %ext = zext <8 x i1> %load to <8 x i32>
86 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
87 ret void
88 }
89
90 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
91 ; SI: s_endpgm
92 define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
93 %load = load <8 x i1> addrspace(1)* %in
94 %ext = sext <8 x i1> %load to <8 x i32>
95 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
96 ret void
97 }
98
99 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
100 ; SI: s_endpgm
101 define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
102 %load = load <16 x i1> addrspace(1)* %in
103 %ext = zext <16 x i1> %load to <16 x i32>
104 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
105 ret void
106 }
107
108 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
109 ; SI: s_endpgm
110 define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
111 %load = load <16 x i1> addrspace(1)* %in
112 %ext = sext <16 x i1> %load to <16 x i32>
113 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
114 ret void
115 }
116
117 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
118 ; XSI: s_endpgm
119 ; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
120 ; %load = load <32 x i1> addrspace(1)* %in
121 ; %ext = zext <32 x i1> %load to <32 x i32>
122 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
123 ; ret void
124 ; }
125
126 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
127 ; XSI: s_endpgm
128 ; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
129 ; %load = load <32 x i1> addrspace(1)* %in
130 ; %ext = sext <32 x i1> %load to <32 x i32>
131 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
132 ; ret void
133 ; }
134
135 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
136 ; XSI: s_endpgm
137 ; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
138 ; %load = load <64 x i1> addrspace(1)* %in
139 ; %ext = zext <64 x i1> %load to <64 x i32>
140 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
141 ; ret void
142 ; }
143
144 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
145 ; XSI: s_endpgm
146 ; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
147 ; %load = load <64 x i1> addrspace(1)* %in
148 ; %ext = sext <64 x i1> %load to <64 x i32>
149 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
150 ; ret void
151 ; }
152
153 ; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
154 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
155 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
156 ; SI: buffer_store_dwordx2
157 define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
158 %a = load i1 addrspace(1)* %in
159 %ext = zext i1 %a to i64
160 store i64 %ext, i64 addrspace(1)* %out
161 ret void
162 }
163
164 ; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
165 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
166 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
167 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
168 ; SI: buffer_store_dwordx2
169 define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
170 %a = load i1 addrspace(1)* %in
171 %ext = sext i1 %a to i64
172 store i64 %ext, i64 addrspace(1)* %out
173 ret void
174 }
175
176 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
177 ; SI: s_endpgm
178 define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
179 %load = load <1 x i1> addrspace(1)* %in
180 %ext = zext <1 x i1> %load to <1 x i64>
181 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
182 ret void
183 }
184
185 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
186 ; SI: s_endpgm
187 define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
188 %load = load <1 x i1> addrspace(1)* %in
189 %ext = sext <1 x i1> %load to <1 x i64>
190 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
191 ret void
192 }
193
194 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
195 ; SI: s_endpgm
196 define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
197 %load = load <2 x i1> addrspace(1)* %in
198 %ext = zext <2 x i1> %load to <2 x i64>
199 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
200 ret void
201 }
202
203 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
204 ; SI: s_endpgm
205 define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
206 %load = load <2 x i1> addrspace(1)* %in
207 %ext = sext <2 x i1> %load to <2 x i64>
208 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
209 ret void
210 }
211
212 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
213 ; SI: s_endpgm
214 define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
215 %load = load <4 x i1> addrspace(1)* %in
216 %ext = zext <4 x i1> %load to <4 x i64>
217 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
218 ret void
219 }
220
221 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
222 ; SI: s_endpgm
223 define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
224 %load = load <4 x i1> addrspace(1)* %in
225 %ext = sext <4 x i1> %load to <4 x i64>
226 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
227 ret void
228 }
229
230 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
231 ; SI: s_endpgm
232 define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
233 %load = load <8 x i1> addrspace(1)* %in
234 %ext = zext <8 x i1> %load to <8 x i64>
235 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
236 ret void
237 }
238
239 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
240 ; SI: s_endpgm
241 define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
242 %load = load <8 x i1> addrspace(1)* %in
243 %ext = sext <8 x i1> %load to <8 x i64>
244 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
245 ret void
246 }
247
248 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
249 ; SI: s_endpgm
250 define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
251 %load = load <16 x i1> addrspace(1)* %in
252 %ext = zext <16 x i1> %load to <16 x i64>
253 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
254 ret void
255 }
256
257 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
258 ; SI: s_endpgm
259 define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
260 %load = load <16 x i1> addrspace(1)* %in
261 %ext = sext <16 x i1> %load to <16 x i64>
262 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
263 ret void
264 }
265
266 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
267 ; XSI: s_endpgm
268 ; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
269 ; %load = load <32 x i1> addrspace(1)* %in
270 ; %ext = zext <32 x i1> %load to <32 x i64>
271 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
272 ; ret void
273 ; }
274
275 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
276 ; XSI: s_endpgm
277 ; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
278 ; %load = load <32 x i1> addrspace(1)* %in
279 ; %ext = sext <32 x i1> %load to <32 x i64>
280 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
281 ; ret void
282 ; }
283
284 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
285 ; XSI: s_endpgm
286 ; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
287 ; %load = load <64 x i1> addrspace(1)* %in
288 ; %ext = zext <64 x i1> %load to <64 x i64>
289 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
290 ; ret void
291 ; }
292
293 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
294 ; XSI: s_endpgm
295 ; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
296 ; %load = load <64 x i1> addrspace(1)* %in
297 ; %ext = sext <64 x i1> %load to <64 x i64>
298 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
299 ; ret void
300 ; }
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2 ; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
3
4 ; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
5 ; SI: buffer_load_ushort
6 ; SI: buffer_store_dword
7 ; SI: s_endpgm
8 define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
9 %a = load i16 addrspace(1)* %in
10 %ext = zext i16 %a to i32
11 store i32 %ext, i32 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
16 ; SI: buffer_load_sshort
17 ; SI: buffer_store_dword
18 ; SI: s_endpgm
19 define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
20 %a = load i16 addrspace(1)* %in
21 %ext = sext i16 %a to i32
22 store i32 %ext, i32 addrspace(1)* %out
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
27 ; SI: buffer_load_ushort
28 ; SI: s_endpgm
29 define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
30 %load = load <1 x i16> addrspace(1)* %in
31 %ext = zext <1 x i16> %load to <1 x i32>
32 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
33 ret void
34 }
35
36 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
37 ; SI: buffer_load_sshort
38 ; SI: s_endpgm
39 define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
40 %load = load <1 x i16> addrspace(1)* %in
41 %ext = sext <1 x i16> %load to <1 x i32>
42 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
43 ret void
44 }
45
46 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
47 ; SI: s_endpgm
48 define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
49 %load = load <2 x i16> addrspace(1)* %in
50 %ext = zext <2 x i16> %load to <2 x i32>
51 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
52 ret void
53 }
54
55 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
56 ; SI: s_endpgm
57 define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
58 %load = load <2 x i16> addrspace(1)* %in
59 %ext = sext <2 x i16> %load to <2 x i32>
60 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
61 ret void
62 }
63
64 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
65 ; SI: s_endpgm
66 define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
67 %load = load <4 x i16> addrspace(1)* %in
68 %ext = zext <4 x i16> %load to <4 x i32>
69 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
70 ret void
71 }
72
73 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
74 ; SI: s_endpgm
75 define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
76 %load = load <4 x i16> addrspace(1)* %in
77 %ext = sext <4 x i16> %load to <4 x i32>
78 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
79 ret void
80 }
81
82 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
83 ; SI: s_endpgm
84 define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
85 %load = load <8 x i16> addrspace(1)* %in
86 %ext = zext <8 x i16> %load to <8 x i32>
87 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
88 ret void
89 }
90
91 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
92 ; SI: s_endpgm
93 define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
94 %load = load <8 x i16> addrspace(1)* %in
95 %ext = sext <8 x i16> %load to <8 x i32>
96 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
97 ret void
98 }
99
100 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
101 ; SI: s_endpgm
102 define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
103 %load = load <16 x i16> addrspace(1)* %in
104 %ext = zext <16 x i16> %load to <16 x i32>
105 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
106 ret void
107 }
108
109 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
110 ; SI: s_endpgm
111 define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
112 %load = load <16 x i16> addrspace(1)* %in
113 %ext = sext <16 x i16> %load to <16 x i32>
114 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
115 ret void
116 }
117
118 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
119 ; SI: s_endpgm
120 define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
121 %load = load <32 x i16> addrspace(1)* %in
122 %ext = zext <32 x i16> %load to <32 x i32>
123 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
124 ret void
125 }
126
127 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
128 ; SI: s_endpgm
129 define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
130 %load = load <32 x i16> addrspace(1)* %in
131 %ext = sext <32 x i16> %load to <32 x i32>
132 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
133 ret void
134 }
135
136 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
137 ; SI: s_endpgm
138 define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
139 %load = load <64 x i16> addrspace(1)* %in
140 %ext = zext <64 x i16> %load to <64 x i32>
141 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
142 ret void
143 }
144
145 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
146 ; SI: s_endpgm
147 define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
148 %load = load <64 x i16> addrspace(1)* %in
149 %ext = sext <64 x i16> %load to <64 x i32>
150 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
151 ret void
152 }
153
154 ; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
155 ; SI: buffer_load_ushort [[LOAD:v[0-9]+]],
156 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
157 ; SI: buffer_store_dwordx2
158 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
159 %a = load i16 addrspace(1)* %in
160 %ext = zext i16 %a to i64
161 store i64 %ext, i64 addrspace(1)* %out
162 ret void
163 }
164
165 ; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
166 ; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
167 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
168 ; SI: buffer_store_dwordx2
169 define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
170 %a = load i16 addrspace(1)* %in
171 %ext = sext i16 %a to i64
172 store i64 %ext, i64 addrspace(1)* %out
173 ret void
174 }
175
176 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
177 ; SI: s_endpgm
178 define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
179 %load = load <1 x i16> addrspace(1)* %in
180 %ext = zext <1 x i16> %load to <1 x i64>
181 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
182 ret void
183 }
184
185 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
186 ; SI: s_endpgm
187 define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
188 %load = load <1 x i16> addrspace(1)* %in
189 %ext = sext <1 x i16> %load to <1 x i64>
190 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
191 ret void
192 }
193
194 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
195 ; SI: s_endpgm
196 define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
197 %load = load <2 x i16> addrspace(1)* %in
198 %ext = zext <2 x i16> %load to <2 x i64>
199 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
200 ret void
201 }
202
203 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
204 ; SI: s_endpgm
205 define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
206 %load = load <2 x i16> addrspace(1)* %in
207 %ext = sext <2 x i16> %load to <2 x i64>
208 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
209 ret void
210 }
211
212 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
213 ; SI: s_endpgm
214 define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
215 %load = load <4 x i16> addrspace(1)* %in
216 %ext = zext <4 x i16> %load to <4 x i64>
217 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
218 ret void
219 }
220
221 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
222 ; SI: s_endpgm
223 define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
224 %load = load <4 x i16> addrspace(1)* %in
225 %ext = sext <4 x i16> %load to <4 x i64>
226 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
227 ret void
228 }
229
230 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
231 ; SI: s_endpgm
232 define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
233 %load = load <8 x i16> addrspace(1)* %in
234 %ext = zext <8 x i16> %load to <8 x i64>
235 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
236 ret void
237 }
238
239 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
240 ; SI: s_endpgm
241 define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
242 %load = load <8 x i16> addrspace(1)* %in
243 %ext = sext <8 x i16> %load to <8 x i64>
244 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
245 ret void
246 }
247
248 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
249 ; SI: s_endpgm
250 define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
251 %load = load <16 x i16> addrspace(1)* %in
252 %ext = zext <16 x i16> %load to <16 x i64>
253 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
254 ret void
255 }
256
257 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
258 ; SI: s_endpgm
259 define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
260 %load = load <16 x i16> addrspace(1)* %in
261 %ext = sext <16 x i16> %load to <16 x i64>
262 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
263 ret void
264 }
265
266 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
267 ; SI: s_endpgm
268 define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
269 %load = load <32 x i16> addrspace(1)* %in
270 %ext = zext <32 x i16> %load to <32 x i64>
271 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
272 ret void
273 }
274
275 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
276 ; SI: s_endpgm
277 define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
278 %load = load <32 x i16> addrspace(1)* %in
279 %ext = sext <32 x i16> %load to <32 x i64>
280 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
281 ret void
282 }
283
284 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
285 ; SI: s_endpgm
286 define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
287 %load = load <64 x i16> addrspace(1)* %in
288 %ext = zext <64 x i16> %load to <64 x i64>
289 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
290 ret void
291 }
292
293 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
294 ; SI: s_endpgm
295 define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
296 %load = load <64 x i16> addrspace(1)* %in
297 %ext = sext <64 x i16> %load to <64 x i64>
298 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
299 ret void
300 }
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2
3 ; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
4 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
5 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
6 ; SI: buffer_store_dwordx2
7 ; SI: s_endpgm
8 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
9 %a = load i32 addrspace(1)* %in
10 %ext = zext i32 %a to i64
11 store i64 %ext, i64 addrspace(1)* %out
12 ret void
13 }
14
15 ; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
16 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
17 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
18 ; SI: buffer_store_dwordx2
19 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
20 %a = load i32 addrspace(1)* %in
21 %ext = sext i32 %a to i64
22 store i64 %ext, i64 addrspace(1)* %out
23 ret void
24 }
25
26 ; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
27 ; SI: buffer_load_dword
28 ; SI: buffer_store_dwordx2
29 ; SI: s_endpgm
30 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
31 %load = load <1 x i32> addrspace(1)* %in
32 %ext = zext <1 x i32> %load to <1 x i64>
33 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
34 ret void
35 }
36
37 ; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
38 ; SI: buffer_load_dword
39 ; SI: v_ashrrev_i32
40 ; SI: buffer_store_dwordx2
41 ; SI: s_endpgm
42 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
43 %load = load <1 x i32> addrspace(1)* %in
44 %ext = sext <1 x i32> %load to <1 x i64>
45 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
46 ret void
47 }
48
49 ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
50 ; SI: buffer_load_dwordx2
51 ; SI: buffer_store_dwordx2
52 ; SI: buffer_store_dwordx2
53 ; SI: s_endpgm
54 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
55 %load = load <2 x i32> addrspace(1)* %in
56 %ext = zext <2 x i32> %load to <2 x i64>
57 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
58 ret void
59 }
60
61 ; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
62 ; SI: buffer_load_dwordx2
63 ; SI-DAG: v_ashrrev_i32
64 ; SI-DAG: v_ashrrev_i32
65 ; SI-DAG: buffer_store_dwordx2
66 ; SI-DAG: buffer_store_dwordx2
67 ; SI: s_endpgm
68 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
69 %load = load <2 x i32> addrspace(1)* %in
70 %ext = sext <2 x i32> %load to <2 x i64>
71 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
72 ret void
73 }
74
75 ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
76 ; SI: buffer_load_dwordx4
77 ; SI: buffer_store_dwordx2
78 ; SI: buffer_store_dwordx2
79 ; SI: buffer_store_dwordx2
80 ; SI: buffer_store_dwordx2
81 ; SI: s_endpgm
82 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
83 %load = load <4 x i32> addrspace(1)* %in
84 %ext = zext <4 x i32> %load to <4 x i64>
85 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
86 ret void
87 }
88
89 ; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
90 ; SI: buffer_load_dwordx4
91 ; SI-DAG: v_ashrrev_i32
92 ; SI-DAG: v_ashrrev_i32
93 ; SI-DAG: v_ashrrev_i32
94 ; SI-DAG: v_ashrrev_i32
95 ; SI-DAG: buffer_store_dwordx2
96 ; SI-DAG: buffer_store_dwordx2
97 ; SI-DAG: buffer_store_dwordx2
98 ; SI-DAG: buffer_store_dwordx2
99 ; SI: s_endpgm
100 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
101 %load = load <4 x i32> addrspace(1)* %in
102 %ext = sext <4 x i32> %load to <4 x i64>
103 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
104 ret void
105 }
106
107 ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
108 ; SI: buffer_load_dword
109 ; SI: buffer_load_dword
110 ; SI: buffer_load_dword
111 ; SI: buffer_load_dword
112 ; SI: buffer_load_dword
113 ; SI: buffer_load_dword
114 ; SI: buffer_load_dword
115 ; SI: buffer_load_dword
116 ; SI-DAG: buffer_store_dwordx2
117 ; SI-DAG: buffer_store_dwordx2
118 ; SI-DAG: buffer_store_dwordx2
119 ; SI-DAG: buffer_store_dwordx2
120 ; SI-DAG: buffer_store_dwordx2
121 ; SI-DAG: buffer_store_dwordx2
122 ; SI-DAG: buffer_store_dwordx2
123 ; SI-DAG: buffer_store_dwordx2
124 ; SI: s_endpgm
125 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
126 %load = load <8 x i32> addrspace(1)* %in
127 %ext = zext <8 x i32> %load to <8 x i64>
128 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
129 ret void
130 }
131
132 ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
133 ; SI: buffer_load_dword
134 ; SI: buffer_load_dword
135 ; SI: buffer_load_dword
136 ; SI: buffer_load_dword
137 ; SI: buffer_load_dword
138 ; SI: buffer_load_dword
139 ; SI: buffer_load_dword
140 ; SI: buffer_load_dword
141
142 ; SI-DAG: v_ashrrev_i32
143 ; SI-DAG: v_ashrrev_i32
144 ; SI-DAG: v_ashrrev_i32
145 ; SI-DAG: v_ashrrev_i32
146 ; SI-DAG: v_ashrrev_i32
147 ; SI-DAG: v_ashrrev_i32
148 ; SI-DAG: v_ashrrev_i32
149 ; SI-DAG: v_ashrrev_i32
150 ; SI-DAG: buffer_store_dwordx2
151 ; SI-DAG: buffer_store_dwordx2
152 ; SI-DAG: buffer_store_dwordx2
153 ; SI-DAG: buffer_store_dwordx2
154 ; SI-DAG: buffer_store_dwordx2
155 ; SI-DAG: buffer_store_dwordx2
156 ; SI-DAG: buffer_store_dwordx2
157 ; SI-DAG: buffer_store_dwordx2
158
159 ; SI: s_endpgm
160 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
161 %load = load <8 x i32> addrspace(1)* %in
162 %ext = sext <8 x i32> %load to <8 x i64>
163 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
164 ret void
165 }
166
167 ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
168 ; SI: buffer_load_dword
169 ; SI: buffer_load_dword
170 ; SI: buffer_load_dword
171 ; SI: buffer_load_dword
172 ; SI: buffer_load_dword
173 ; SI: buffer_load_dword
174 ; SI: buffer_load_dword
175 ; SI: buffer_load_dword
176 ; SI: buffer_load_dword
177 ; SI: buffer_load_dword
178 ; SI: buffer_load_dword
179 ; SI: buffer_load_dword
180 ; SI: buffer_load_dword
181 ; SI: buffer_load_dword
182 ; SI: buffer_load_dword
183 ; SI: buffer_load_dword
184
185 ; SI-DAG: v_ashrrev_i32
186 ; SI-DAG: v_ashrrev_i32
187 ; SI-DAG: v_ashrrev_i32
188 ; SI-DAG: v_ashrrev_i32
189 ; SI-DAG: buffer_store_dwordx2
190 ; SI-DAG: buffer_store_dwordx2
191
192 ; SI-DAG: v_ashrrev_i32
193 ; SI-DAG: v_ashrrev_i32
194 ; SI-DAG: v_ashrrev_i32
195 ; SI-DAG: v_ashrrev_i32
196 ; SI-DAG: buffer_store_dwordx2
197 ; SI-DAG: buffer_store_dwordx2
198
199 ; SI-DAG: v_ashrrev_i32
200 ; SI-DAG: v_ashrrev_i32
201 ; SI-DAG: v_ashrrev_i32
202 ; SI-DAG: v_ashrrev_i32
203 ; SI-DAG: buffer_store_dwordx2
204 ; SI-DAG: buffer_store_dwordx2
205
206 ; SI-DAG: v_ashrrev_i32
207 ; SI-DAG: v_ashrrev_i32
208 ; SI-DAG: v_ashrrev_i32
209 ; SI-DAG: v_ashrrev_i32
210 ; SI-DAG: buffer_store_dwordx2
211 ; SI-DAG: buffer_store_dwordx2
212 ; SI: s_endpgm
213 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
214 %load = load <16 x i32> addrspace(1)* %in
215 %ext = sext <16 x i32> %load to <16 x i64>
216 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
217 ret void
218 }
219
220 ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
221 ; SI: buffer_load_dword
222 ; SI: buffer_load_dword
223 ; SI: buffer_load_dword
224 ; SI: buffer_load_dword
225 ; SI: buffer_load_dword
226 ; SI: buffer_load_dword
227 ; SI: buffer_load_dword
228 ; SI: buffer_load_dword
229 ; SI: buffer_load_dword
230 ; SI: buffer_load_dword
231 ; SI: buffer_load_dword
232 ; SI: buffer_load_dword
233 ; SI: buffer_load_dword
234 ; SI: buffer_load_dword
235 ; SI: buffer_load_dword
236 ; SI: buffer_load_dword
237
238 ; SI: buffer_store_dwordx2
239 ; SI: buffer_store_dwordx2
240 ; SI: buffer_store_dwordx2
241 ; SI: buffer_store_dwordx2
242 ; SI: buffer_store_dwordx2
243 ; SI: buffer_store_dwordx2
244 ; SI: buffer_store_dwordx2
245 ; SI: buffer_store_dwordx2
246 ; SI: buffer_store_dwordx2
247 ; SI: buffer_store_dwordx2
248 ; SI: buffer_store_dwordx2
249 ; SI: buffer_store_dwordx2
250 ; SI: buffer_store_dwordx2
251 ; SI: buffer_store_dwordx2
252 ; SI: buffer_store_dwordx2
253 ; SI: buffer_store_dwordx2
254
255 ; SI: s_endpgm
256 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
257 %load = load <16 x i32> addrspace(1)* %in
258 %ext = zext <16 x i32> %load to <16 x i64>
259 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
260 ret void
261 }
262
263 ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
264 ; SI: buffer_load_dword
265 ; SI: buffer_load_dword
266 ; SI: buffer_load_dword
267 ; SI: buffer_load_dword
268 ; SI: buffer_load_dword
269 ; SI: buffer_load_dword
270 ; SI: buffer_load_dword
271 ; SI: buffer_load_dword
272
273 ; SI: buffer_load_dword
274 ; SI: buffer_load_dword
275 ; SI: buffer_load_dword
276 ; SI: buffer_load_dword
277 ; SI: buffer_load_dword
278 ; SI: buffer_load_dword
279 ; SI: buffer_load_dword
280 ; SI: buffer_load_dword
281
282 ; SI: buffer_load_dword
283 ; SI: buffer_load_dword
284 ; SI: buffer_load_dword
285 ; SI: buffer_load_dword
286 ; SI: buffer_load_dword
287 ; SI: buffer_load_dword
288 ; SI: buffer_load_dword
289 ; SI: buffer_load_dword
290
291 ; SI: buffer_load_dword
292 ; SI: buffer_load_dword
293 ; SI: buffer_load_dword
294 ; SI: buffer_load_dword
295 ; SI: buffer_load_dword
296 ; SI: buffer_load_dword
297 ; SI: buffer_load_dword
298 ; SI: buffer_load_dword
299
300 ; SI-DAG: v_ashrrev_i32
301 ; SI-DAG: v_ashrrev_i32
302 ; SI-DAG: v_ashrrev_i32
303 ; SI-DAG: v_ashrrev_i32
304 ; SI-DAG: v_ashrrev_i32
305 ; SI-DAG: v_ashrrev_i32
306 ; SI-DAG: v_ashrrev_i32
307 ; SI-DAG: v_ashrrev_i32
308 ; SI-DAG: v_ashrrev_i32
309 ; SI-DAG: v_ashrrev_i32
310 ; SI-DAG: v_ashrrev_i32
311 ; SI-DAG: v_ashrrev_i32
312 ; SI-DAG: v_ashrrev_i32
313 ; SI-DAG: v_ashrrev_i32
314 ; SI-DAG: v_ashrrev_i32
315 ; SI-DAG: v_ashrrev_i32
316 ; SI-DAG: v_ashrrev_i32
317 ; SI-DAG: v_ashrrev_i32
318 ; SI-DAG: v_ashrrev_i32
319 ; SI-DAG: v_ashrrev_i32
320 ; SI-DAG: v_ashrrev_i32
321 ; SI-DAG: v_ashrrev_i32
322 ; SI-DAG: v_ashrrev_i32
323 ; SI-DAG: v_ashrrev_i32
324 ; SI-DAG: v_ashrrev_i32
325 ; SI-DAG: v_ashrrev_i32
326 ; SI-DAG: v_ashrrev_i32
327 ; SI-DAG: v_ashrrev_i32
328 ; SI-DAG: v_ashrrev_i32
329 ; SI-DAG: v_ashrrev_i32
330 ; SI-DAG: v_ashrrev_i32
331 ; SI-DAG: v_ashrrev_i32
332
333 ; SI-DAG: buffer_store_dwordx2
334 ; SI-DAG: buffer_store_dwordx2
335 ; SI-DAG: buffer_store_dwordx2
336 ; SI-DAG: buffer_store_dwordx2
337 ; SI-DAG: buffer_store_dwordx2
338 ; SI-DAG: buffer_store_dwordx2
339 ; SI-DAG: buffer_store_dwordx2
340 ; SI-DAG: buffer_store_dwordx2
341
342 ; SI-DAG: buffer_store_dwordx2
343 ; SI-DAG: buffer_store_dwordx2
344 ; SI-DAG: buffer_store_dwordx2
345 ; SI-DAG: buffer_store_dwordx2
346 ; SI-DAG: buffer_store_dwordx2
347 ; SI-DAG: buffer_store_dwordx2
348 ; SI-DAG: buffer_store_dwordx2
349 ; SI-DAG: buffer_store_dwordx2
350
351 ; SI-DAG: buffer_store_dwordx2
352 ; SI-DAG: buffer_store_dwordx2
353 ; SI-DAG: buffer_store_dwordx2
354 ; SI-DAG: buffer_store_dwordx2
355 ; SI-DAG: buffer_store_dwordx2
356 ; SI-DAG: buffer_store_dwordx2
357 ; SI-DAG: buffer_store_dwordx2
358 ; SI-DAG: buffer_store_dwordx2
359
360 ; SI-DAG: buffer_store_dwordx2
361 ; SI-DAG: buffer_store_dwordx2
362 ; SI-DAG: buffer_store_dwordx2
363 ; SI-DAG: buffer_store_dwordx2
364 ; SI-DAG: buffer_store_dwordx2
365 ; SI-DAG: buffer_store_dwordx2
366 ; SI-DAG: buffer_store_dwordx2
367 ; SI-DAG: buffer_store_dwordx2
368
369 ; SI: s_endpgm
370 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
371 %load = load <32 x i32> addrspace(1)* %in
372 %ext = sext <32 x i32> %load to <32 x i64>
373 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
374 ret void
375 }
376
377 ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
378 ; SI: buffer_load_dword
379 ; SI: buffer_load_dword
380 ; SI: buffer_load_dword
381 ; SI: buffer_load_dword
382 ; SI: buffer_load_dword
383 ; SI: buffer_load_dword
384 ; SI: buffer_load_dword
385 ; SI: buffer_load_dword
386
387 ; SI: buffer_load_dword
388 ; SI: buffer_load_dword
389 ; SI: buffer_load_dword
390 ; SI: buffer_load_dword
391 ; SI: buffer_load_dword
392 ; SI: buffer_load_dword
393 ; SI: buffer_load_dword
394 ; SI: buffer_load_dword
395
396 ; SI: buffer_load_dword
397 ; SI: buffer_load_dword
398 ; SI: buffer_load_dword
399 ; SI: buffer_load_dword
400 ; SI: buffer_load_dword
401 ; SI: buffer_load_dword
402 ; SI: buffer_load_dword
403 ; SI: buffer_load_dword
404
405 ; SI: buffer_load_dword
406 ; SI: buffer_load_dword
407 ; SI: buffer_load_dword
408 ; SI: buffer_load_dword
409 ; SI: buffer_load_dword
410 ; SI: buffer_load_dword
411 ; SI: buffer_load_dword
412 ; SI: buffer_load_dword
413
414 ; SI-DAG: buffer_store_dwordx2
415 ; SI-DAG: buffer_store_dwordx2
416 ; SI-DAG: buffer_store_dwordx2
417 ; SI-DAG: buffer_store_dwordx2
418 ; SI-DAG: buffer_store_dwordx2
419 ; SI-DAG: buffer_store_dwordx2
420 ; SI-DAG: buffer_store_dwordx2
421 ; SI-DAG: buffer_store_dwordx2
422
423 ; SI-DAG: buffer_store_dwordx2
424 ; SI-DAG: buffer_store_dwordx2
425 ; SI-DAG: buffer_store_dwordx2
426 ; SI-DAG: buffer_store_dwordx2
427 ; SI-DAG: buffer_store_dwordx2
428 ; SI-DAG: buffer_store_dwordx2
429 ; SI-DAG: buffer_store_dwordx2
430 ; SI-DAG: buffer_store_dwordx2
431
432 ; SI-DAG: buffer_store_dwordx2
433 ; SI-DAG: buffer_store_dwordx2
434 ; SI-DAG: buffer_store_dwordx2
435 ; SI-DAG: buffer_store_dwordx2
436 ; SI-DAG: buffer_store_dwordx2
437 ; SI-DAG: buffer_store_dwordx2
438 ; SI-DAG: buffer_store_dwordx2
439 ; SI-DAG: buffer_store_dwordx2
440
441 ; SI-DAG: buffer_store_dwordx2
442 ; SI-DAG: buffer_store_dwordx2
443 ; SI-DAG: buffer_store_dwordx2
444 ; SI-DAG: buffer_store_dwordx2
445 ; SI-DAG: buffer_store_dwordx2
446 ; SI-DAG: buffer_store_dwordx2
447 ; SI-DAG: buffer_store_dwordx2
448 ; SI-DAG: buffer_store_dwordx2
449
450 ; SI: s_endpgm
451 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
452 %load = load <32 x i32> addrspace(1)* %in
453 %ext = zext <32 x i32> %load to <32 x i64>
454 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
455 ret void
456 }
0 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
1 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2
3 ; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
4 ; SI: buffer_load_ubyte
5 ; SI: buffer_store_dword
6 ; SI: s_endpgm
7 define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
8 %a = load i8 addrspace(1)* %in
9 %ext = zext i8 %a to i32
10 store i32 %ext, i32 addrspace(1)* %out
11 ret void
12 }
13
14 ; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
15 ; SI: buffer_load_sbyte
16 ; SI: buffer_store_dword
17 ; SI: s_endpgm
18 define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
19 %a = load i8 addrspace(1)* %in
20 %ext = sext i8 %a to i32
21 store i32 %ext, i32 addrspace(1)* %out
22 ret void
23 }
24
25 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
26 ; SI: s_endpgm
27 define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
28 %load = load <1 x i8> addrspace(1)* %in
29 %ext = zext <1 x i8> %load to <1 x i32>
30 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
31 ret void
32 }
33
34 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
35 ; SI: s_endpgm
36 define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
37 %load = load <1 x i8> addrspace(1)* %in
38 %ext = sext <1 x i8> %load to <1 x i32>
39 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
40 ret void
41 }
42
43 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
44 ; SI: s_endpgm
45 define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
46 %load = load <2 x i8> addrspace(1)* %in
47 %ext = zext <2 x i8> %load to <2 x i32>
48 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
49 ret void
50 }
51
52 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
53 ; SI: s_endpgm
54 define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
55 %load = load <2 x i8> addrspace(1)* %in
56 %ext = sext <2 x i8> %load to <2 x i32>
57 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
58 ret void
59 }
60
61 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
62 ; SI: s_endpgm
63 define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
64 %load = load <4 x i8> addrspace(1)* %in
65 %ext = zext <4 x i8> %load to <4 x i32>
66 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
67 ret void
68 }
69
70 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
71 ; SI: s_endpgm
72 define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
73 %load = load <4 x i8> addrspace(1)* %in
74 %ext = sext <4 x i8> %load to <4 x i32>
75 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
76 ret void
77 }
78
79 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
80 ; SI: s_endpgm
81 define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
82 %load = load <8 x i8> addrspace(1)* %in
83 %ext = zext <8 x i8> %load to <8 x i32>
84 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
85 ret void
86 }
87
88 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
89 ; SI: s_endpgm
90 define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
91 %load = load <8 x i8> addrspace(1)* %in
92 %ext = sext <8 x i8> %load to <8 x i32>
93 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
94 ret void
95 }
96
97 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
98 ; SI: s_endpgm
99 define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
100 %load = load <16 x i8> addrspace(1)* %in
101 %ext = zext <16 x i8> %load to <16 x i32>
102 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
103 ret void
104 }
105
106 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
107 ; SI: s_endpgm
108 define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
109 %load = load <16 x i8> addrspace(1)* %in
110 %ext = sext <16 x i8> %load to <16 x i32>
111 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
112 ret void
113 }
114
115 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
116 ; XSI: s_endpgm
117 ; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
118 ; %load = load <32 x i8> addrspace(1)* %in
119 ; %ext = zext <32 x i8> %load to <32 x i32>
120 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
121 ; ret void
122 ; }
123
124 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
125 ; XSI: s_endpgm
126 ; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
127 ; %load = load <32 x i8> addrspace(1)* %in
128 ; %ext = sext <32 x i8> %load to <32 x i32>
129 ; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
130 ; ret void
131 ; }
132
133 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
134 ; XSI: s_endpgm
135 ; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
136 ; %load = load <64 x i8> addrspace(1)* %in
137 ; %ext = zext <64 x i8> %load to <64 x i32>
138 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
139 ; ret void
140 ; }
141
142 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
143 ; XSI: s_endpgm
144 ; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
145 ; %load = load <64 x i8> addrspace(1)* %in
146 ; %ext = sext <64 x i8> %load to <64 x i32>
147 ; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
148 ; ret void
149 ; }
150
151 ; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
152 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
153 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
154 ; SI: buffer_store_dwordx2
155 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
156 %a = load i8 addrspace(1)* %in
157 %ext = zext i8 %a to i64
158 store i64 %ext, i64 addrspace(1)* %out
159 ret void
160 }
161
162 ; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
163 ; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
164 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
165 ; SI: buffer_store_dwordx2
166 define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
167 %a = load i8 addrspace(1)* %in
168 %ext = sext i8 %a to i64
169 store i64 %ext, i64 addrspace(1)* %out
170 ret void
171 }
172
173 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
174 ; SI: s_endpgm
175 define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
176 %load = load <1 x i8> addrspace(1)* %in
177 %ext = zext <1 x i8> %load to <1 x i64>
178 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
179 ret void
180 }
181
182 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
183 ; SI: s_endpgm
184 define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
185 %load = load <1 x i8> addrspace(1)* %in
186 %ext = sext <1 x i8> %load to <1 x i64>
187 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
188 ret void
189 }
190
191 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
192 ; SI: s_endpgm
193 define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
194 %load = load <2 x i8> addrspace(1)* %in
195 %ext = zext <2 x i8> %load to <2 x i64>
196 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
197 ret void
198 }
199
200 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
201 ; SI: s_endpgm
202 define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
203 %load = load <2 x i8> addrspace(1)* %in
204 %ext = sext <2 x i8> %load to <2 x i64>
205 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
206 ret void
207 }
208
209 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
210 ; SI: s_endpgm
211 define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
212 %load = load <4 x i8> addrspace(1)* %in
213 %ext = zext <4 x i8> %load to <4 x i64>
214 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
215 ret void
216 }
217
218 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
219 ; SI: s_endpgm
220 define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
221 %load = load <4 x i8> addrspace(1)* %in
222 %ext = sext <4 x i8> %load to <4 x i64>
223 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
224 ret void
225 }
226
227 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
228 ; SI: s_endpgm
229 define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
230 %load = load <8 x i8> addrspace(1)* %in
231 %ext = zext <8 x i8> %load to <8 x i64>
232 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
233 ret void
234 }
235
236 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
237 ; SI: s_endpgm
238 define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
239 %load = load <8 x i8> addrspace(1)* %in
240 %ext = sext <8 x i8> %load to <8 x i64>
241 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
242 ret void
243 }
244
245 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
246 ; SI: s_endpgm
247 define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
248 %load = load <16 x i8> addrspace(1)* %in
249 %ext = zext <16 x i8> %load to <16 x i64>
250 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
251 ret void
252 }
253
254 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
255 ; SI: s_endpgm
256 define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
257 %load = load <16 x i8> addrspace(1)* %in
258 %ext = sext <16 x i8> %load to <16 x i64>
259 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
260 ret void
261 }
262
263 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
264 ; XSI: s_endpgm
265 ; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
266 ; %load = load <32 x i8> addrspace(1)* %in
267 ; %ext = zext <32 x i8> %load to <32 x i64>
268 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
269 ; ret void
270 ; }
271
272 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
273 ; XSI: s_endpgm
274 ; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
275 ; %load = load <32 x i8> addrspace(1)* %in
276 ; %ext = sext <32 x i8> %load to <32 x i64>
277 ; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
278 ; ret void
279 ; }
280
281 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
282 ; XSI: s_endpgm
283 ; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
284 ; %load = load <64 x i8> addrspace(1)* %in
285 ; %ext = zext <64 x i8> %load to <64 x i64>
286 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
287 ; ret void
288 ; }
289
290 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
291 ; XSI: s_endpgm
292 ; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
293 ; %load = load <64 x i8> addrspace(1)* %in
294 ; %ext = sext <64 x i8> %load to <64 x i64>
295 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
296 ; ret void
297 ; }
0 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
11
2 ; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes.
32 ; SI-LABEL: {{^}}unaligned_load_store_i32:
4 ; SI: ds_read_u16
5 ; SI: ds_read_u16
3 ; SI: ds_read_u8
4 ; SI: ds_read_u8
65 ; SI: ds_write_b32
76 ; SI: s_endpgm
87 define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
1211 }
1312
1413 ; SI-LABEL: {{^}}unaligned_load_store_v4i32:
15 ; SI: ds_read_u16
16 ; SI: ds_read_u16
17 ; SI: ds_read_u16
18 ; SI: ds_read_u16
19 ; SI: ds_read_u16
20 ; SI: ds_read_u16
21 ; SI: ds_read_u16
22 ; SI: ds_read_u16
14 ; SI: ds_read_u8
15 ; SI: ds_read_u8
16 ; SI: ds_read_u8
17 ; SI: ds_read_u8
18
19 ; SI: ds_read_u8
20 ; SI: ds_read_u8
21 ; SI: ds_read_u8
22 ; SI: ds_read_u8
23
24 ; SI: ds_read_u8
25 ; SI: ds_read_u8
26 ; SI: ds_read_u8
27 ; SI: ds_read_u8
28
29 ; SI: ds_read_u8
30 ; SI: ds_read_u8
31 ; SI: ds_read_u8
32 ; SI: ds_read_u8
33
2334 ; SI: ds_write_b32
2435 ; SI: ds_write_b32
2536 ; SI: ds_write_b32