llvm.org GIT mirror llvm / 5c39a97
ARM: Implement big endian bit-conversion for NEON type git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208538 91177308-0d34-0410-b5e6-96231b3b80d8 Christian Pirker 5 years ago
6 changed file(s) with 538 addition(s) and 60 deletion(s). Raw diff Collapse all Expand all
187187 unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
188188 unsigned ARMSelectCallOp(bool UseReg);
189189 unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
190
191 const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
190192
191193 // Call handling routines.
192194 private:
39633963
39643964 // Turn f64->i64 into VMOVRRD.
39653965 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
3966 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3967 DAG.getVTList(MVT::i32, MVT::i32), Op);
3966 SDValue Cvt;
3967 if (TLI.isBigEndian() && SrcVT.isVector())
3968 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3969 DAG.getVTList(MVT::i32, MVT::i32),
3970 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
3971 else
3972 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
3973 DAG.getVTList(MVT::i32, MVT::i32), Op);
39683974 // Merge the pieces into a single i64 value.
39693975 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
39703976 }
23652365 def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
23662366 (VST1q64 addrmode6:$addr, QPR:$value)>;
23672367 def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2368 (VLD1q32 addrmode6:$addr)>;
2368 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
23692369 def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2370 (VST1q32 addrmode6:$addr, QPR:$value)>;
2370 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
23712371 def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
23722372 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
23732373 def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
61756175 //===----------------------------------------------------------------------===//
61766176
61776177 // bit_convert
6178 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
6179 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
6180 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
6178 let Predicates = [IsLE] in {
6179 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
6180 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
6181 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
6182 }
61816183 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
6182 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
6183 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
6184 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
6185 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
6186 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
6184 let Predicates = [IsLE] in {
6185 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
6186 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
6187 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
6188 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
6189 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
6190 }
61876191 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
6188 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
6189 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
6190 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
6191 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
6192 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
6193 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
6194 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
6195 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
6196 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
6197 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
6192 let Predicates = [IsLE] in {
6193 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
6194 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
6195 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
6196 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
6197 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
6198 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
6199 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
6200 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
6201 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
6202 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
6203 }
61986204 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
6199 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
6200 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
6201 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
6202 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
6203 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
6204 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
6205 let Predicates = [IsLE] in {
6206 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
6207 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
6208 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
6209 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
6210 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
6211 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
6212 }
62056213 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
6206 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
6207 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
6208
6209 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
6210 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
6211 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
6214 let Predicates = [IsLE] in {
6215 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
6216 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
6217 }
6218
6219 let Predicates = [IsLE] in {
6220 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
6221 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
6222 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
6223 }
62126224 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
6213 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
6214 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
6215 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
6216 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
6217 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
6225 let Predicates = [IsLE] in {
6226 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
6227 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
6228 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
6229 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
6230 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
6231 }
62186232 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
6219 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
6220 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
6221 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
6222 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
6223 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
6224 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
6225 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
6226 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
6227 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
6228 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
6229 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
6233 let Predicates = [IsLE] in {
6234 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
6235 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
6236 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
6237 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
6238 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
6239 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
6240 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
6241 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
6242 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
6243 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
6244 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
6245 }
62306246 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
6231 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
6232 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
6233 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
6247 let Predicates = [IsLE] in {
6248 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
6249 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
6250 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
6251 }
62346252 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
6235 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
6236 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
6237 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
6238 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
6253 let Predicates = [IsLE] in {
6254 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
6255 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
6256 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
6257 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
6258 }
6259
6260 let Predicates = [IsBE] in {
6261 // 64 bit conversions
6262 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6263 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6264 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
6265 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6266 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6267 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6268 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
6269 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
6270 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
6271 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
6272 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
6273 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
6274 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
6275 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
6276 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
6277 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
6278 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
6279 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
6280 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
6281 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
6282 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
6283 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
6284 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
6285 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
6286 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
6287 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
6288
6289 // 128 bit conversions
6290 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6291 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6292 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
6293 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6294 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6295 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6296 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
6297 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6298 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
6299 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
6300 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
6301 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
6302 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
6303 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
6304 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
6305 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
6306 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
6307 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
6308 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
6309 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
6310 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
6311 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
6312 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
6313 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
6314 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
6315 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
6316 }
62396317
62406318 // Fold extracting an element out of a v2i32 into a vfp register.
62416319 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
0 ; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s
1 ; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD
2
3 @v2i64 = global <2 x i64> zeroinitializer
4 @v2i32 = global <2 x i32> zeroinitializer
5 @v4i32 = global <4 x i32> zeroinitializer
6 @v4i16 = global <4 x i16> zeroinitializer
7 @v8i16 = global <8 x i16> zeroinitializer
8 @v8i8 = global <8 x i8> zeroinitializer
9 @v16i8 = global <16 x i8> zeroinitializer
10
11 @v2f32 = global <2 x float> zeroinitializer
12 @v2f64 = global <2 x double> zeroinitializer
13 @v4f32 = global <4 x float> zeroinitializer
14
15
16 ; 64 bit conversions
17 define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) {
18 ; CHECK-LABEL: conv_i64_to_v8i8:
19 ; CHECK: vrev64.8
20 %v = bitcast i64 %val to <8 x i8>
21 %w = load <8 x i8>* @v8i8
22 %a = add <8 x i8> %v, %w
23 store <8 x i8> %a, <8 x i8>* %store
24 ret void
25 }
26
27 define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
28 ; CHECK-LABEL: conv_v8i8_to_i64:
29 ; CHECK: vrev64.8
30 %v = load <8 x i8>* %load
31 %w = load <8 x i8>* @v8i8
32 %a = add <8 x i8> %v, %w
33 %f = bitcast <8 x i8> %a to i64
34 call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
35 ret void
36 }
37
38 define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) {
39 ; CHECK-LABEL: conv_i64_to_v4i16:
40 ; CHECK: vrev64.16
41 %v = bitcast i64 %val to <4 x i16>
42 %w = load <4 x i16>* @v4i16
43 %a = add <4 x i16> %v, %w
44 store <4 x i16> %a, <4 x i16>* %store
45 ret void
46 }
47
48 define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
49 ; CHECK-LABEL: conv_v4i16_to_i64:
50 ; CHECK: vrev64.16
51 %v = load <4 x i16>* %load
52 %w = load <4 x i16>* @v4i16
53 %a = add <4 x i16> %v, %w
54 %f = bitcast <4 x i16> %a to i64
55 call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
56 ret void
57 }
58
59 define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) {
60 ; CHECK-LABEL: conv_i64_to_v2i32:
61 ; CHECK: vrev64.32
62 %v = bitcast i64 %val to <2 x i32>
63 %w = load <2 x i32>* @v2i32
64 %a = add <2 x i32> %v, %w
65 store <2 x i32> %a, <2 x i32>* %store
66 ret void
67 }
68
69 define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
70 ; CHECK-LABEL: conv_v2i32_to_i64:
71 ; CHECK: vrev64.32
72 %v = load <2 x i32>* %load
73 %w = load <2 x i32>* @v2i32
74 %a = add <2 x i32> %v, %w
75 %f = bitcast <2 x i32> %a to i64
76 call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
77 ret void
78 }
79
80 define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) {
81 ; CHECK-LABEL: conv_i64_to_v2f32:
82 ; CHECK: vrev64.32
83 %v = bitcast i64 %val to <2 x float>
84 %w = load <2 x float>* @v2f32
85 %a = fadd <2 x float> %v, %w
86 store <2 x float> %a, <2 x float>* %store
87 ret void
88 }
89
90 define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
91 ; CHECK-LABEL: conv_v2f32_to_i64:
92 ; CHECK: vrev64.32
93 %v = load <2 x float>* %load
94 %w = load <2 x float>* @v2f32
95 %a = fadd <2 x float> %v, %w
96 %f = bitcast <2 x float> %a to i64
97 call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
98 ret void
99 }
100
101 define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) {
102 ; CHECK-LABEL: conv_f64_to_v8i8:
103 ; CHECK: vrev64.8
104 %v = bitcast double %val to <8 x i8>
105 %w = load <8 x i8>* @v8i8
106 %a = add <8 x i8> %v, %w
107 store <8 x i8> %a, <8 x i8>* %store
108 ret void
109 }
110
111 define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
112 ; CHECK-LABEL: conv_v8i8_to_f64:
113 ; CHECK: vrev64.8
114 %v = load <8 x i8>* %load
115 %w = load <8 x i8>* @v8i8
116 %a = add <8 x i8> %v, %w
117 %f = bitcast <8 x i8> %a to double
118 call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
119 ret void
120 }
121
122 define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) {
123 ; CHECK-LABEL: conv_f64_to_v4i16:
124 ; CHECK: vrev64.16
125 %v = bitcast double %val to <4 x i16>
126 %w = load <4 x i16>* @v4i16
127 %a = add <4 x i16> %v, %w
128 store <4 x i16> %a, <4 x i16>* %store
129 ret void
130 }
131
132 define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
133 ; CHECK-LABEL: conv_v4i16_to_f64:
134 ; CHECK: vrev64.16
135 %v = load <4 x i16>* %load
136 %w = load <4 x i16>* @v4i16
137 %a = add <4 x i16> %v, %w
138 %f = bitcast <4 x i16> %a to double
139 call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
140 ret void
141 }
142
143 define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) {
144 ; CHECK-LABEL: conv_f64_to_v2i32:
145 ; CHECK: vrev64.32
146 %v = bitcast double %val to <2 x i32>
147 %w = load <2 x i32>* @v2i32
148 %a = add <2 x i32> %v, %w
149 store <2 x i32> %a, <2 x i32>* %store
150 ret void
151 }
152
153 define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
154 ; CHECK-LABEL: conv_v2i32_to_f64:
155 ; CHECK: vrev64.32
156 %v = load <2 x i32>* %load
157 %w = load <2 x i32>* @v2i32
158 %a = add <2 x i32> %v, %w
159 %f = bitcast <2 x i32> %a to double
160 call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
161 ret void
162 }
163
164 define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) {
165 ; CHECK-LABEL: conv_f64_to_v2f32:
166 ; CHECK: vrev64.32
167 %v = bitcast double %val to <2 x float>
168 %w = load <2 x float>* @v2f32
169 %a = fadd <2 x float> %v, %w
170 store <2 x float> %a, <2 x float>* %store
171 ret void
172 }
173
174 define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
175 ; CHECK-LABEL: conv_v2f32_to_f64:
176 ; CHECK: vrev64.32
177 %v = load <2 x float>* %load
178 %w = load <2 x float>* @v2f32
179 %a = fadd <2 x float> %v, %w
180 %f = bitcast <2 x float> %a to double
181 call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
182 ret void
183 }
184
185 ; 128 bit conversions
186
187
188 define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) {
189 ; CHECK-LABEL: conv_i128_to_v16i8:
190 ; CHECK: vrev32.8
191 %v = bitcast i128 %val to <16 x i8>
192 %w = load <16 x i8>* @v16i8
193 %a = add <16 x i8> %v, %w
194 store <16 x i8> %a, <16 x i8>* %store
195 ret void
196 }
197
198 define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
199 ; CHECK-LABEL: conv_v16i8_to_i128:
200 ; CHECK: vrev32.8
201 %v = load <16 x i8>* %load
202 %w = load <16 x i8>* @v16i8
203 %a = add <16 x i8> %v, %w
204 %f = bitcast <16 x i8> %a to i128
205 call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
206 ret void
207 }
208
209 define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) {
210 ; CHECK-LABEL: conv_i128_to_v8i16:
211 ; CHECK: vrev32.16
212 %v = bitcast i128 %val to <8 x i16>
213 %w = load <8 x i16>* @v8i16
214 %a = add <8 x i16> %v, %w
215 store <8 x i16> %a, <8 x i16>* %store
216 ret void
217 }
218
219 define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
220 ; CHECK-LABEL: conv_v8i16_to_i128:
221 ; CHECK: vrev32.16
222 %v = load <8 x i16>* %load
223 %w = load <8 x i16>* @v8i16
224 %a = add <8 x i16> %v, %w
225 %f = bitcast <8 x i16> %a to i128
226 call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
227 ret void
228 }
229
230 define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) {
231 ; CHECK-LABEL: conv_i128_to_v4i32:
232 ; CHECK: vrev64.32
233 %v = bitcast i128 %val to <4 x i32>
234 %w = load <4 x i32>* @v4i32
235 %a = add <4 x i32> %v, %w
236 store <4 x i32> %a, <4 x i32>* %store
237 ret void
238 }
239
240 define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
241 ; CHECK-LABEL: conv_v4i32_to_i128:
242 ; CHECK: vrev64.32
243 %v = load <4 x i32>* %load
244 %w = load <4 x i32>* @v4i32
245 %a = add <4 x i32> %v, %w
246 %f = bitcast <4 x i32> %a to i128
247 call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
248 ret void
249 }
250
251 define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) {
252 ; CHECK-LABEL: conv_i128_to_v4f32:
253 ; CHECK: vrev64.32
254 %v = bitcast i128 %val to <4 x float>
255 %w = load <4 x float>* @v4f32
256 %a = fadd <4 x float> %v, %w
257 store <4 x float> %a, <4 x float>* %store
258 ret void
259 }
260
261 define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
262 ; CHECK-LABEL: conv_v4f32_to_i128:
263 ; CHECK: vrev64.32
264 %v = load <4 x float>* %load
265 %w = load <4 x float>* @v4f32
266 %a = fadd <4 x float> %v, %w
267 %f = bitcast <4 x float> %a to i128
268 call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
269 ret void
270 }
271
272 define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) {
273 ; CHECK-LABEL: conv_f128_to_v2f64:
274 ; CHECK: vrev64.32
275 %v = bitcast fp128 %val to <2 x double>
276 %w = load <2 x double>* @v2f64
277 %a = fadd <2 x double> %v, %w
278 store <2 x double> %a, <2 x double>* %store
279 ret void
280 }
281
282 define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
283 ; CHECK-LABEL: conv_v2f64_to_f128:
284 ; CHECK: vrev64.32
285 %v = load <2 x double>* %load
286 %w = load <2 x double>* @v2f64
287 %a = fadd <2 x double> %v, %w
288 %f = bitcast <2 x double> %a to fp128
289 call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
290 ret void
291 }
292
293 define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) {
294 ; CHECK-LABEL: conv_f128_to_v16i8:
295 ; CHECK: vrev32.8
296 %v = bitcast fp128 %val to <16 x i8>
297 %w = load <16 x i8>* @v16i8
298 %a = add <16 x i8> %v, %w
299 store <16 x i8> %a, <16 x i8>* %store
300 ret void
301 }
302
303 define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
304 ; CHECK-LABEL: conv_v16i8_to_f128:
305 ; CHECK: vrev32.8
306 %v = load <16 x i8>* %load
307 %w = load <16 x i8>* @v16i8
308 %a = add <16 x i8> %v, %w
309 %f = bitcast <16 x i8> %a to fp128
310 call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
311 ret void
312 }
313
314 define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) {
315 ; CHECK-LABEL: conv_f128_to_v8i16:
316 ; CHECK: vrev32.16
317 %v = bitcast fp128 %val to <8 x i16>
318 %w = load <8 x i16>* @v8i16
319 %a = add <8 x i16> %v, %w
320 store <8 x i16> %a, <8 x i16>* %store
321 ret void
322 }
323
324 define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
325 ; CHECK-LABEL: conv_v8i16_to_f128:
326 ; CHECK: vrev32.16
327 %v = load <8 x i16>* %load
328 %w = load <8 x i16>* @v8i16
329 %a = add <8 x i16> %v, %w
330 %f = bitcast <8 x i16> %a to fp128
331 call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
332 ret void
333 }
334
335 define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) {
336 ; CHECK-LABEL: conv_f128_to_v4f32:
337 ; CHECK: vrev64.32
338 %v = bitcast fp128 %val to <4 x float>
339 %w = load <4 x float>* @v4f32
340 %a = fadd <4 x float> %v, %w
341 store <4 x float> %a, <4 x float>* %store
342 ret void
343 }
344
345 define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
346 ; CHECK-LABEL: conv_v4f32_to_f128:
347 ; CHECK: vrev64.32
348 %v = load <4 x float>* %load
349 %w = load <4 x float>* @v4f32
350 %a = fadd <4 x float> %v, %w
351 %f = bitcast <4 x float> %a to fp128
352 call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
353 ret void
354 }
355
356 define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) {
357 ; CHECK-LABEL: arg_v4i32:
358 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
359 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
360 ; CHECK: vst1.64 {[[REG1]], [[REG2]]},
361 ; CHECK-HARD-LABEL: arg_v4i32:
362 ; CHECK-HARD-NOT: vmov
363 ; CHECK-HARD: vst1.64 {d0, d1}
364 store <4 x i32> %var, <4 x i32>* %store
365 ret void
366 }
367
368 define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) {
369 ; CHECK-LABEL: arg_v8i16:
370 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
371 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
372 ; CHECK: vst1.64 {[[REG1]], [[REG2]]},
373 ; CHECK-HARD-LABEL: arg_v8i16:
374 ; CHECK-HARD-NOT: vmov
375 ; CHECK-HARD: vst1.64 {d0, d1}
376 store <8 x i16> %var, <8 x i16>* %store
377 ret void
378 }
379
380 define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) {
381 ; CHECK-LABEL: arg_v16i8:
382 ; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
383 ; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
384 ; CHECK: vst1.64 {[[REG1]], [[REG2]]},
385 ; CHECK-HARD-LABEL: arg_v16i8:
386 ; CHECK-HARD-NOT: vmov
387 ; CHECK-HARD: vst1.64 {d0, d1}
388 store <16 x i8> %var, <16 x i8>* %store
389 ret void
390 }
391
66 ; CHECK-LE-NEXT: vmov {{d[0-9]+}}, r1, r2
77 ; CHECK-LE-NEXT: vmov {{d[0-9]+}}, r3, [[REG]]
88 ; CHECK-BE-NEXT: vmov {{d[0-9]+}}, r2, r1
9 ; CHECK-BE-NEXT: vmov {{d[0-9]+}}, [[REG]], r3
10 ; CHECK-NEXT: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
9 ; CHECK-BE: vmov {{d[0-9]+}}, [[REG]], r3
10 ; CHECK: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
1111 ; CHECK-NEXT: bx lr
1212 define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
1313 bb:
7777 ; CHECK: vget_high8
7878 ; CHECK-NOT: vst
7979 ; CHECK-LE: vmov r0, r1, d17
80 ; CHECK-BE: vmov r1, r0, d17
80 ; CHECK-BE: vmov r1, r0, d16
8181 %tmp1 = load <16 x i8>* %A
8282 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32>
8383 ret <8 x i8> %tmp2