llvm.org GIT mirror llvm / 67dbbba
[ARM] Use MQPR not QPR for MVE registers We should be using MQPR, and if we don't we can get COPYs and PHIs created for QPR. These get folded into instructions, failing verification checks. Differential revision: https://reviews.llvm.org/D66214 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370676 91177308-0d34-0410-b5e6-96231b3b80d8 David Green 1 year, 28 days ago
4 changed file(s) with 214 addition(s) and 99 deletion(s). Raw diff Collapse all Expand all
222222
223223 const TargetRegisterClass *
224224 ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
225 const MachineFunction &) const {
225 const MachineFunction &MF) const {
226226 const TargetRegisterClass *Super = RC;
227227 TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
228228 do {
230230 case ARM::GPRRegClassID:
231231 case ARM::SPRRegClassID:
232232 case ARM::DPRRegClassID:
233 case ARM::GPRPairRegClassID:
234 return Super;
233235 case ARM::QPRRegClassID:
234236 case ARM::QQPRRegClassID:
235237 case ARM::QQQQPRRegClassID:
236 case ARM::GPRPairRegClassID:
237 return Super;
238 if (MF.getSubtarget().hasNEON())
239 return Super;
238240 }
239241 Super = *I++;
240242 } while (Super);
244244 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
245245
246246 for (auto VT : IntTypes) {
247 addRegisterClass(VT, &ARM::QPRRegClass);
247 addRegisterClass(VT, &ARM::MQPRRegClass);
248248 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
249249 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
250250 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
286286
287287 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
288288 for (auto VT : FloatTypes) {
289 addRegisterClass(VT, &ARM::QPRRegClass);
289 addRegisterClass(VT, &ARM::MQPRRegClass);
290290 if (!HasMVEFP)
291291 setAllExpand(VT);
292292
333333 // vector types is inhibited at integer-only level.
334334 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
335335 for (auto VT : LongTypes) {
336 addRegisterClass(VT, &ARM::QPRRegClass);
336 addRegisterClass(VT, &ARM::MQPRRegClass);
337337 setAllExpand(VT);
338338 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
339339 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
50315031 // Bit convert patterns
50325032
50335033 let Predicates = [HasMVEInt] in {
5034 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
5035 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
5036
5037 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
5038 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
5039
5040 def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
5041 def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
5034 def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>;
5035 def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>;
5036
5037 def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>;
5038 def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>;
5039
5040 def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>;
5041 def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>;
50425042 }
50435043
50445044 let Predicates = [IsLE,HasMVEInt] in {
5045 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
5046 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
5047 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
5048 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
5049 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
5050
5051 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
5052 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
5053 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
5054 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
5055 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
5056
5057 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
5058 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
5059 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
5060 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
5061 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
5062
5063 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
5064 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
5065 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
5066 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
5067 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
5068
5069 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
5070 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
5071 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
5072 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
5073 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
5074
5075 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
5076 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
5077 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
5078 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
5079 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
5080
5081 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
5082 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
5083 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
5084 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
5085 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
5086 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
5045 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>;
5046 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>;
5047 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>;
5048 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>;
5049 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>;
5050
5051 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>;
5052 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>;
5053 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>;
5054 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>;
5055 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>;
5056
5057 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>;
5058 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>;
5059 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>;
5060 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>;
5061 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>;
5062
5063 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>;
5064 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>;
5065 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>;
5066 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>;
5067 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>;
5068
5069 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>;
5070 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>;
5071 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>;
5072 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>;
5073 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>;
5074
5075 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>;
5076 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>;
5077 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>;
5078 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>;
5079 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>;
5080
5081 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>;
5082 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>;
5083 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>;
5084 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>;
5085 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>;
5086 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>;
50875087 }
50885088
50895089 let Predicates = [IsBE,HasMVEInt] in {
5090 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 (MVE_VREV64_32 QPR:$src))>;
5091 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 (MVE_VREV64_32 QPR:$src))>;
5092 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 (MVE_VREV64_16 QPR:$src))>;
5093 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 (MVE_VREV64_16 QPR:$src))>;
5094 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 (MVE_VREV64_8 QPR:$src))>;
5095
5096 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 (MVE_VREV64_32 QPR:$src))>;
5097 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 (MVE_VREV64_32 QPR:$src))>;
5098 def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 (MVE_VREV64_16 QPR:$src))>;
5099 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 (MVE_VREV64_16 QPR:$src))>;
5100 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 (MVE_VREV64_8 QPR:$src))>;
5101
5102 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 (MVE_VREV64_32 QPR:$src))>;
5103 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 (MVE_VREV64_32 QPR:$src))>;
5104 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 (MVE_VREV32_16 QPR:$src))>;
5105 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 (MVE_VREV32_16 QPR:$src))>;
5106 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 (MVE_VREV32_8 QPR:$src))>;
5107
5108 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 (MVE_VREV64_32 QPR:$src))>;
5109 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 (MVE_VREV64_32 QPR:$src))>;
5110 def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 (MVE_VREV32_16 QPR:$src))>;
5111 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 (MVE_VREV32_16 QPR:$src))>;
5112 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 (MVE_VREV32_8 QPR:$src))>;
5113
5114 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 (MVE_VREV64_16 QPR:$src))>;
5115 def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 (MVE_VREV64_16 QPR:$src))>;
5116 def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 (MVE_VREV32_16 QPR:$src))>;
5117 def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 (MVE_VREV32_16 QPR:$src))>;
5118 def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 (MVE_VREV16_8 QPR:$src))>;
5119
5120 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 (MVE_VREV64_16 QPR:$src))>;
5121 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 (MVE_VREV64_16 QPR:$src))>;
5122 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 (MVE_VREV32_16 QPR:$src))>;
5123 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 (MVE_VREV32_16 QPR:$src))>;
5124 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 (MVE_VREV16_8 QPR:$src))>;
5125
5126 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 (MVE_VREV64_8 QPR:$src))>;
5127 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 (MVE_VREV64_8 QPR:$src))>;
5128 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 (MVE_VREV32_8 QPR:$src))>;
5129 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 (MVE_VREV32_8 QPR:$src))>;
5130 def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 (MVE_VREV16_8 QPR:$src))>;
5131 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 (MVE_VREV16_8 QPR:$src))>;
5132 }
5090 def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
5091 def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>;
5092 def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
5093 def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>;
5094 def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>;
5095
5096 def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
5097 def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>;
5098 def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
5099 def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>;
5100 def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>;
5101
5102 def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
5103 def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>;
5104 def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
5105 def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>;
5106 def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>;
5107
5108 def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
5109 def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>;
5110 def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
5111 def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>;
5112 def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>;
5113
5114 def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
5115 def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>;
5116 def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
5117 def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>;
5118 def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>;
5119
5120 def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
5121 def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>;
5122 def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
5123 def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>;
5124 def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>;
5125
5126 def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
5127 def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>;
5128 def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
5129 def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>;
5130 def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
5131 def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>;
5132 }
0 ; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
1
2 ; verify-machineinstrs previously caught the incorrect use of QPR in the stack reloads.
3
4 define arm_aapcs_vfpcc void @k() {
5 ; CHECK-LABEL: k:
6 ; CHECK: vstrw.32
7 ; CHECK: vldrw.u32
8 entry:
9 br label %vector.body
10
11 vector.body: ; preds = %vector.body, %entry
12 %vec.ind = phi <8 x i32> [ , %entry ], [ zeroinitializer, %vector.body ]
13 %0 = and <8 x i32> %vec.ind,
14 %1 = icmp eq <8 x i32> %0, zeroinitializer
15 %2 = select <8 x i1> %1, <8 x i16> , <8 x i16>
16 %3 = bitcast i16* undef to <8 x i16>*
17 store <8 x i16> %2, <8 x i16>* %3, align 2
18 %4 = icmp eq i32 undef, 128
19 br i1 %4, label %for.cond4.preheader, label %vector.body
20
21 for.cond4.preheader: ; preds = %vector.body
22 br i1 undef, label %vector.body105, label %for.body10
23
24 for.cond4.loopexit: ; preds = %for.body10
25 %call5 = call arm_aapcs_vfpcc i32 bitcast (i32 (...)* @l to i32 ()*)()
26 br label %vector.body105
27
28 for.body10: ; preds = %for.body10, %for.cond4.preheader
29 %exitcond88 = icmp eq i32 undef, 7
30 br i1 %exitcond88, label %for.cond4.loopexit, label %for.body10
31
32 vector.body105: ; preds = %vector.body105, %for.cond4.loopexit, %for.cond4.preheader
33 %vec.ind113 = phi <8 x i32> [ %vec.ind.next114, %vector.body105 ], [ , %for.cond4.loopexit ], [ , %for.cond4.preheader ]
34 %5 = and <8 x i32> %vec.ind113,
35 %vec.ind.next114 = add <8 x i32> %vec.ind113,
36 %6 = icmp eq i32 undef, 256
37 br i1 %6, label %vector.body115.ph, label %vector.body105
38
39 vector.body115.ph: ; preds = %vector.body105
40 tail call void asm sideeffect "nop", "~{s0},~{s4},~{s8},~{s12},~{s16},~{s20},~{s24},~{s28},~{memory}"()
41 br label %vector.body115
42
43 vector.body115: ; preds = %vector.body115, %vector.body115.ph
44 %vec.ind123 = phi <4 x i32> [ %vec.ind.next124, %vector.body115 ], [ , %vector.body115.ph ]
45 %7 = icmp eq <4 x i32> %vec.ind123, zeroinitializer
46 %vec.ind.next124 = add <4 x i32> %vec.ind123,
47 br label %vector.body115
48 }
49
50
51 @a = external dso_local global i32, align 4
52 @b = dso_local local_unnamed_addr global i32 ptrtoint (i32* @a to i32), align 4
53 @c = dso_local global i32 2, align 4
54 @d = dso_local global i32 2, align 4
55
56 define dso_local i32 @e() #0 {
57 ; CHECK-LABEL: e:
58 ; CHECK: vstrw.32
59 ; CHECK: vldrw.u32
60 entry:
61 %f = alloca i16, align 2
62 %g = alloca [3 x [8 x [4 x i16*]]], align 4
63 store i16 4, i16* %f, align 2
64 %0 = load i32, i32* @c, align 4
65 %1 = load i32, i32* @d, align 4
66 %arrayinit.element7 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 1
67 %2 = bitcast i16** %arrayinit.element7 to i32*
68 store i32 %0, i32* %2, align 4
69 %arrayinit.element8 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 1, i32 2
70 store i16* null, i16** %arrayinit.element8, align 4
71 %3 = bitcast i16** undef to i32*
72 store i32 %1, i32* %3, align 4
73 %4 = bitcast i16** undef to i32*
74 store i32 %0, i32* %4, align 4
75 %arrayinit.element13 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 2, i32 2
76 %5 = bitcast i16** %arrayinit.element13 to <4 x i16*>*
77 store <4 x i16*> , <4 x i16*>* %5, align 4
78 %arrayinit.element24 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 0, i32 4, i32 2
79 %6 = bitcast i16** %arrayinit.element24 to <4 x i16*>*
80 store <4 x i16*> , <4 x i16*>* %6, align 4
81 %7 = bitcast i16** undef to <4 x i16*>*
82 store <4 x i16*> , <4 x i16*>* %7, align 4
83 %8 = bitcast i16** undef to <4 x i16*>*
84 store <4 x i16*> , <4 x i16*>* %8, align 4
85 %9 = bitcast i16** undef to <4 x i16*>*
86 store <4 x i16*> , <4 x i16*>* %9, align 4
87 %10 = bitcast i16** undef to <4 x i16*>*
88 store <4 x i16*> , <4 x i16*>* %10, align 4
89 call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(64) undef, i8 0, i32 64, i1 false)
90 %11 = bitcast i16** undef to <4 x i16*>*
91 store <4 x i16*> , <4 x i16*>* %11, align 4
92 %12 = bitcast i16** undef to <4 x i16*>*
93 store <4 x i16*> , <4 x i16*>* %12, align 4
94 %13 = bitcast i16** undef to <4 x i16*>*
95 store <4 x i16*> , <4 x i16*>* %13, align 4
96 %arrayinit.begin78 = getelementptr inbounds [3 x [8 x [4 x i16*]]], [3 x [8 x [4 x i16*]]]* %g, i32 0, i32 2, i32 3, i32 0
97 store i16* inttoptr (i32 4 to i16*), i16** %arrayinit.begin78, align 4
98 store i32 0, i32* @b, align 4
99 br label %for.cond
100
101 for.cond: ; preds = %for.cond, %entry
102 br label %for.cond
103 }
104
105 ; Function Attrs: argmemonly nounwind willreturn
106 declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg) #1
107
108 ; Function Attrs: argmemonly nounwind willreturn
109 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1
110
111
112 declare arm_aapcs_vfpcc i32 @l(...)