llvm.org GIT mirror llvm / 15b3700
Merging r359834: ------------------------------------------------------------------------ r359834 | evandro | 2019-05-02 15:01:39 -0700 (Thu, 02 May 2019) | 3 lines [AArch64] Update for Exynos Fix the forwarding of multiplication results for Exynos M4. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@359946 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 months ago
3 changed file(s) with 18 addition(s) and 82 deletion(s). Raw diff Collapse all Expand all
238238 M4UnitS0]> { let Latency = 5;
239239 let NumMicroOps = 2; }
240240 def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
241 def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
242241 def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
243242 M4UnitNMSC]> { let Latency = 5;
244243 let NumMicroOps = 2; }
479478 SchedVar]>;
480479 def M4WriteMOVI : SchedWriteVariant<[SchedVar,
481480 SchedVar]>;
482 def M4WriteMULL : SchedWriteVariant<[SchedVar,
483 SchedVar]>;
484481
485482 // Fast forwarding.
486483 def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
488485 M4WriteFMAC4H,
489486 M4WriteFMAC5]>;
490487 def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
491 def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
488 def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>;
489
492490
493491 //===----------------------------------------------------------------------===//
494492 // Coarse scheduling model.
661659 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
662660 def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
663661 def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
664 def : InstRW<[M4WriteFMAC4H,
665 M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
666 def : InstRW<[M4WriteFMAC4,
667 M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
662 def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
663 def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>;
668664
669665 // FP load instructions.
670666 def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
735731 def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
736732 def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
737733 def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
738 def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
739734 def : InstRW<[M4WriteNMUL3,
740735 M4ReadNMULM1], (instregex "^ML[AS]v")>;
741 def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
742 def : InstRW<[M4WriteMULL,
743 M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
744 def : InstRW<[M4WriteMULL,
745 M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
736 def : InstRW<[M4WriteNMUL3,
737 M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>;
738 def : InstRW<[M4WriteNMUL3,
739 M4ReadNMULM1], (instregex "^SQRDML[AS]H")>;
740 def : InstRW<[M4WriteNMUL3,
741 M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
742 def : InstRW<[M4WriteNMUL3,
743 M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
744 def : InstRW<[M4WriteNMUL3,
745 M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
746 def : InstRW<[M4WriteNMUL3,
747 M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
746748 def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
747749 def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
748750 def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
807809 def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
808810 def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
809811 def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
810 def : InstRW<[M4WriteFMAC4H,
811 M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
812 def : InstRW<[M4WriteFMAC4,
813 M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
812 def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
813 def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
814814 def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
815815 def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
816816 def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
102102 // Identify FP instructions.
103103 def ExynosFPPred : MCSchedPredicate>;
104104
105 // Identify whether an instruction whose result is a long vector
106 // operates on the upper half of the input registers.
107 def ExynosLongVectorUpperFn : TIIPredicate<
108 "isExynosLongVectorUpper",
109 MCOpcodeSwitchStatement<
110 [MCOpcodeSwitchCase<
111 IsLongVectorUpperOp.ValidOpcodes,
112 MCReturnStatement>],
113 MCReturnStatement>>;
114 def ExynosLongVectorUpperPred : MCSchedPredicate;
115
116105 // Identify 128-bit NEON instructions.
117106 def ExynosQFormPred : MCSchedPredicate;
118107
267267 def IsLoadStoreRegOffsetOp : CheckOpcode
268268 IsStoreRegOffsetOp.ValidOpcodes)>;
269269
270 // Identify whether an instruction whose result is a long vector
271 // operates on the upper half of the input registers.
272 def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
273 FCVTNv8i16, FCVTNv4i32,
274 FCVTXNv4f32,
275 PMULLv16i8, PMULLv2i64,
276 RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
277 RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
278 RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
279 SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
280 SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
281 SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
282 SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
283 SHLLv16i8, SHLLv8i16, SHLLv4i32,
284 SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
285 SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
286 SMLALv8i16_indexed, SMLALv4i32_indexed,
287 SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
288 SMLSLv8i16_indexed, SMLSLv4i32_indexed,
289 SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
290 SMULLv8i16_indexed, SMULLv4i32_indexed,
291 SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
292 SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
293 SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
294 SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
295 SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
296 SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
297 SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
298 SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
299 SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
300 SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
301 SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
302 SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
303 SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
304 SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
305 SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
306 UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
307 UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
308 UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
309 UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
310 UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
311 UMLALv8i16_indexed, UMLALv4i32_indexed,
312 UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
313 UMLSLv8i16_indexed, UMLSLv4i32_indexed,
314 UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
315 UMULLv8i16_indexed, UMULLv4i32_indexed,
316 UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
317 UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
318 USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
319 USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
320 USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
321 XTNv16i8, XTNv8i16, XTNv4i32]>;
322
323270 // Target predicates.
324271
325272 // Identify an instruction that effectively transfers a register to another.