llvm.org GIT mirror llvm / 51f2854
[ARM] Add machine scheduler for Cortex-R52 This patch adds the Sched Machine Model for Cortex-R52. Details of the pipeline and descriptions are in comments in file ARMScheduleR52.td included in this patch. Reviewers: rengolin, jmolloy Differential Revision: https://reviews.llvm.org/D26500 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286949 91177308-0d34-0410-b5e6-96231b3b80d8 Javed Absar 3 years ago
4 changed file(s) with 1024 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
822822 FeatureCrypto,
823823 FeatureCRC]>;
824824
825 def : ProcNoItin<"cortex-r52", [ARMv8r, ProcR52,
825 def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
826826 FeatureFPAO]>;
827827
828828 //===----------------------------------------------------------------------===//
363363 include "ARMScheduleA8.td"
364364 include "ARMScheduleA9.td"
365365 include "ARMScheduleSwift.td"
366 include "ARMScheduleR52.td"
0 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
10 //
11 //===----------------------------------------------------------------------===//
12
13 // ===---------------------------------------------------------------------===//
14 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
15 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
16 // There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
17 // A number of forwarding paths enable results of computations to be input
18 // to subsequent operations before they are written to registers.
19 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
20
21 def CortexR52Model : SchedMachineModel {
22 let MicroOpBufferSize = 0; // R52 is in-order processor
23 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
24 let LoadLatency = 1; // Optimistic, assuming no misses
25 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
26 let PostRAScheduler = 1; // Enable PostRA scheduler pass.
27 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
28 }
29
30
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available.
33
34 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35 // Cortex-R52 is an in-order processor.
36
37 def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
38 def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
39 def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
40 def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
41 def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
42 def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
43 def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
44 def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46 // Cortex-R52 specific SchedReads
47 def R52Read_ISS : SchedRead;
48 def R52Read_EX1 : SchedRead;
49 def R52Read_EX2 : SchedRead;
50 def R52Read_WRI : SchedRead;
51 def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
52 def R52Read_F1 : SchedRead;
53 def R52Read_F2 : SchedRead;
54
55
56 //===----------------------------------------------------------------------===//
57 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59 let SchedModel = CortexR52Model in {
60
61 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
62 def : WriteRes { let Latency = 3; }
63 def : WriteRes { let Latency = 3; }
64 def : WriteRes { let Latency = 3; }
65 def : WriteRes { let Latency = 3; }
66
67 // Compares
68 def : WriteRes { let Latency = 0; }
69 def : WriteRes { let Latency = 0; }
70 def : WriteRes { let Latency = 0; }
71
72 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
73 def : WriteRes {
74 let Latency = 8; let ResourceCycles = [8]; // not pipelined
75 }
76
77 // Loads
78 def : WriteRes { let Latency = 4; }
79 def : WriteRes { let Latency = 4; }
80
81 // Branches - LR written in Late EX2
82 def : WriteRes { let Latency = 0; }
83 def : WriteRes { let Latency = 0; }
84 def : WriteRes { let Latency = 0; }
85
86 // Misc
87 def : WriteRes { let Latency = 0; let NumMicroOps = 0; }
88 def : WriteRes { let Latency = 3; }
89
90 def : ReadAdvance; // Operand needed in EX1 stage
91 def : ReadAdvance; // Shift operands needed in ISS
92
93
94 //===----------------------------------------------------------------------===//
95 // Subtarget-specific SchedReadWrites.
96
97 // Forwarding information - based on when an operand is read
98 def : ReadAdvance;
99 def : ReadAdvance;
100 def : ReadAdvance;
101 def : ReadAdvance;
102 def : ReadAdvance;
103 def : ReadAdvance;
104
105
106 // Cortex-R52 specific SchedWrites for use with InstRW
107 def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
108 def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
109 let Latency = 8; let ResourceCycles = [8]; // not pipelined
110 }
111 def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
112 def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
113 def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
114 def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
115 def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
116 def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
117 def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
118
119 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
120 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
121
122 def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
123 def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
124 let Latency = 4;
125 }
126 def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
127 def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
128 let Latency = 5;
129 }
130 def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
131 def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
132 let Latency = 6;
133 }
134 def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
135 def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
136 let Latency = 6;
137 }
138 def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
139 let Latency = 11; // as it is internally two insns (MUL then ADD)
140 }
141 def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
142 R52UnitFPALU, R52UnitFPALU]> {
143 let Latency = 11;
144 }
145
146 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
147 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
148
149 def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
150 let Latency = 7; // FP div takes fixed #cycles
151 let ResourceCycles = [7]; // is not pipelined
152 }
153 def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
154 let Latency = 17;
155 let ResourceCycles = [17];
156 }
157
158
159 //===----------------------------------------------------------------------===//
160 // Subtarget-specific - map operands to SchedReadWrites
161
162 def : InstRW<[WriteALU], (instrs COPY)>;
163
164 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
165 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
166 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
167
168 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
169 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
170 "t2MOVi", "t2MOV_ga_dyn")>;
171 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
172 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
173 def : InstRW<[R52WriteLd,R52Read_ISS],
174 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
175
176 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
177
178 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
179 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
180 "(t|t2)UBFX", "(t|t2)SBFX")>;
181
182 // Saturating arithmetic
183 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
184 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
185 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
186 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
187 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
188 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
189 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
190
191 // Parallel arithmetic
192 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
193 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
194 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
195 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
196 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
197
198 // Flag setting.
199 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
200 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
201 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
202 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
203 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
204 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
205 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
206
207 // Sum of Absolute Difference
208 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
209 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
210
211 // Integer Multiply
212 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
213 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
214 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
215 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
216 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
217
218 // Multiply Accumulate
219 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
220 // The store pipeline is used partly for 64-bit operations.
221 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
222 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
223 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
224 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
225 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
226 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
227 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
228 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
229 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
230 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
231 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
232 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
233 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
234 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
235
236 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
237 (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
238
239 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
240 // However, that's non-trivial to specify, so we keep it uniform
241 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
242 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
243 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
244 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
245 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
246 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
247 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
248 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
249 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
250 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
251 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
252 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
253 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
254 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
255
256 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
257 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
258
259 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
260 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
261 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
262 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
263
264 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
265 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
266 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
267
268 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
269 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
270 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
271
272 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
273 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
274 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
275
276 def : InstRW<[R52WriteALU_EX1],
277 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
278
279 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
280 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
281 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
282
283 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
284 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
285 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
286 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
287
288 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
289 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
290
291 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
292
293 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
294 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
295
296 //def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
297 //def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
298 //def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
299
300 //def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
301 //def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
302 //def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
303
304
305 // Integer Load, Multiple.
306 foreach Lat = 3-25 in {
307 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
308 let Latency = Lat;
309 }
310 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
311 let Latency = Lat;
312 let NumMicroOps = 0;
313 }
314 }
315 foreach NAddr = 1-16 in {
316 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
317 }
318 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
319 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
320 def R52WriteILDM : SchedWriteVariant<[
321 SchedVar,
322
323 SchedVar
324 R52WriteILDM6Cy]>,
325 SchedVar
326 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
327
328 SchedVar
329 R52WriteILDM6Cy, R52WriteILDM7Cy,
330 R52WriteILDM8Cy]>,
331 SchedVar
332 R52WriteILDM6Cy, R52WriteILDM7Cy,
333 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
334
335 SchedVar
336 R52WriteILDM6Cy, R52WriteILDM7Cy,
337 R52WriteILDM8Cy, R52WriteILDM9Cy,
338 R52WriteILDM10Cy]>,
339 SchedVar
340 R52WriteILDM6Cy, R52WriteILDM7Cy,
341 R52WriteILDM8Cy, R52WriteILDM9Cy,
342 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
343
344 SchedVar
345 R52WriteILDM6Cy, R52WriteILDM7Cy,
346 R52WriteILDM8Cy, R52WriteILDM9Cy,
347 R52WriteILDM10Cy, R52WriteILDM11Cy,
348 R52WriteILDM12Cy]>,
349 SchedVar
350 R52WriteILDM6Cy, R52WriteILDM7Cy,
351 R52WriteILDM8Cy, R52WriteILDM9Cy,
352 R52WriteILDM10Cy, R52WriteILDM11Cy,
353 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
354
355 SchedVar
356 R52WriteILDM6Cy, R52WriteILDM7Cy,
357 R52WriteILDM8Cy, R52WriteILDM9Cy,
358 R52WriteILDM10Cy, R52WriteILDM11Cy,
359 R52WriteILDM12Cy, R52WriteILDM13Cy,
360 R52WriteILDM14Cy]>,
361 SchedVar
362 R52WriteILDM6Cy, R52WriteILDM7Cy,
363 R52WriteILDM8Cy, R52WriteILDM9Cy,
364 R52WriteILDM10Cy, R52WriteILDM11Cy,
365 R52WriteILDM12Cy, R52WriteILDM13Cy,
366 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
367
368 SchedVar
369 R52WriteILDM6Cy, R52WriteILDM7Cy,
370 R52WriteILDM8Cy, R52WriteILDM9Cy,
371 R52WriteILDM10Cy, R52WriteILDM11Cy,
372 R52WriteILDM12Cy, R52WriteILDM13Cy,
373 R52WriteILDM14Cy, R52WriteILDM15Cy,
374 R52WriteILDM16Cy]>,
375 SchedVar
376 R52WriteILDM6Cy, R52WriteILDM7Cy,
377 R52WriteILDM8Cy, R52WriteILDM9Cy,
378 R52WriteILDM10Cy, R52WriteILDM11Cy,
379 R52WriteILDM12Cy, R52WriteILDM13Cy,
380 R52WriteILDM14Cy, R52WriteILDM15Cy,
381 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
382
383 SchedVar
384 R52WriteILDM6Cy, R52WriteILDM7Cy,
385 R52WriteILDM8Cy, R52WriteILDM9Cy,
386 R52WriteILDM10Cy, R52WriteILDM11Cy,
387 R52WriteILDM12Cy, R52WriteILDM13Cy,
388 R52WriteILDM14Cy, R52WriteILDM15Cy,
389 R52WriteILDM16Cy, R52WriteILDM17Cy,
390 R52WriteILDM18Cy]>,
391 SchedVar
392 R52WriteILDM6Cy, R52WriteILDM7Cy,
393 R52WriteILDM8Cy, R52WriteILDM9Cy,
394 R52WriteILDM10Cy, R52WriteILDM11Cy,
395 R52WriteILDM12Cy, R52WriteILDM13Cy,
396 R52WriteILDM14Cy, R52WriteILDM15Cy,
397 R52WriteILDM16Cy, R52WriteILDM17Cy,
398 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
399
400 // Unknown number of registers, just use resources for two registers.
401 SchedVar
402 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
403 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
404 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
405 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
406 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
407 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
408 R52WriteILDM18Cy, R52WriteILDM19Cy]>
409 ]> { let Variadic=1; }
410
411 // Integer Store, Multiple
412 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
413 let Latency = 4;
414 let NumMicroOps = 2;
415 }
416 foreach NumAddr = 1-16 in {
417 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
418 }
419 def R52WriteISTM : SchedWriteVariant<[
420 SchedVar,
421 SchedVar,
422 SchedVar,
423 SchedVar,
424 SchedVar,
425 SchedVar,
426 SchedVar,
427 SchedVar,
428 SchedVar,
429 SchedVar,
430 SchedVar,
431 SchedVar,
432 SchedVar,
433 SchedVar,
434 SchedVar,
435 // Unknow number of registers, just use resources for two registers.
436 SchedVar
437 ]>;
438
439 def : InstRW<[R52WriteILDM, R52Read_ISS],
440 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
441 "(t|sys)LDM(IA|DA|DB|IB)$")>;
442 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
443 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
444 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
445 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
446
447 // Integer Store, Single Element
448 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
449 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
450 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
451 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
452
453 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
454 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
455 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
456 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
457 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
458
459 // Integer Store, Dual
460 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
461 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
462 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
463 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
464
465 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
466 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
467 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
468 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
469 "PUSH", "tPUSH")>;
470
471 // LDRLIT pseudo instructions, they expand to LDR + PICADD
472 def : InstRW<[R52WriteLd],
473 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
474 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
475 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
476
477
478
479 //===----------------------------------------------------------------------===//
480 // VFP, Floating Point Support
481 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
482 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
483
484 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
485 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
486 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
487
488 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
489 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
490
491 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
492 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
493
494 def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
495 def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
496
497 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
498 (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
499
500 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
501 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
502
503
504 //===----------------------------------------------------------------------===//
505 // Neon Support
506
507 // vector multiple load stores
508 foreach NumAddr = 1-16 in {
509 def R52LMAddrPred#NumAddr :
510 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
511 }
512 foreach Lat = 1-32 in {
513 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
514 let Latency = Lat;
515 }
516 }
517 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
518 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
519 let Latency = 0;
520 let NumMicroOps = Num;
521 let ResourceCycles = [Num];
522 }
523 }
524 def R52WriteVLDM : SchedWriteVariant<[
525 // 1 D reg
526 SchedVar
527 R52ReserveLd5Cy]>,
528 SchedVar
529 R52ReserveLd5Cy]>,
530
531 // 2 D reg
532 SchedVar
533 R52ReserveLd6Cy]>,
534 SchedVar
535 R52ReserveLd6Cy]>,
536
537 // 3 D reg
538 SchedVar
539 R52WriteLM7Cy,
540 R52ReserveLd4Cy]>,
541 SchedVar
542 R52WriteLM7Cy,
543 R52ReserveLd7Cy]>,
544
545 // 4 D reg
546 SchedVar
547 R52WriteLM7Cy, R52WriteLM8Cy,
548 R52ReserveLd8Cy]>,
549 SchedVar
550 R52WriteLM7Cy, R52WriteLM8Cy,
551 R52ReserveLd8Cy]>,
552
553 // 5 D reg
554 SchedVar
555 R52WriteLM7Cy, R52WriteLM8Cy,
556 R52WriteLM9Cy,
557 R52ReserveLd9Cy]>,
558 SchedVar
559 R52WriteLM7Cy, R52WriteLM8Cy,
560 R52WriteLM9Cy,
561 R52ReserveLd9Cy]>,
562
563 // 6 D reg
564 SchedVar
565 R52WriteLM7Cy, R52WriteLM8Cy,
566 R52WriteLM9Cy, R52WriteLM10Cy,
567 R52ReserveLd10Cy]>,
568 SchedVar
569 R52WriteLM7Cy, R52WriteLM8Cy,
570 R52WriteLM9Cy, R52WriteLM10Cy,
571 R52ReserveLd10Cy]>,
572
573 // 7 D reg
574 SchedVar
575 R52WriteLM7Cy, R52WriteLM8Cy,
576 R52WriteLM9Cy, R52WriteLM10Cy,
577 R52WriteLM11Cy,
578 R52ReserveLd11Cy]>,
579 SchedVar
580 R52WriteLM7Cy, R52WriteLM8Cy,
581 R52WriteLM9Cy, R52WriteLM10Cy,
582 R52WriteLM11Cy,
583 R52ReserveLd11Cy]>,
584
585 // 8 D reg
586 SchedVar
587 R52WriteLM7Cy, R52WriteLM8Cy,
588 R52WriteLM9Cy, R52WriteLM10Cy,
589 R52WriteLM11Cy, R52WriteLM12Cy,
590 R52ReserveLd12Cy]>,
591 SchedVar
592 R52WriteLM7Cy, R52WriteLM8Cy,
593 R52WriteLM9Cy, R52WriteLM10Cy,
594 R52WriteLM11Cy, R52WriteLM12Cy,
595 R52ReserveLd12Cy]>,
596 // unknown number of reg.
597 SchedVar
598 R52WriteLM7Cy, R52WriteLM8Cy,
599 R52WriteLM9Cy, R52WriteLM10Cy,
600 R52WriteLM11Cy, R52WriteLM12Cy,
601 R52ReserveLd5Cy]>
602 ]> { let Variadic=1;}
603
604 // variable stores. Cannot dual-issue
605 def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
606 let Latency = 5;
607 let NumMicroOps = 2;
608 let ResourceCycles = [1];
609 }
610 def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
611 let Latency = 6;
612 let NumMicroOps = 4;
613 let ResourceCycles = [2];
614 }
615 def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
616 let Latency = 7;
617 let NumMicroOps = 6;
618 let ResourceCycles = [3];
619 }
620 def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
621 let Latency = 8;
622 let NumMicroOps = 8;
623 let ResourceCycles = [4];
624 }
625 def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
626 let Latency = 9;
627 let NumMicroOps = 10;
628 let ResourceCycles = [5];
629 }
630 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
631 let Latency = 10;
632 let NumMicroOps = 12;
633 let ResourceCycles = [6];
634 }
635 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
636 let Latency = 11;
637 let NumMicroOps = 14;
638 let ResourceCycles = [7];
639 }
640 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
641 let Latency = 12;
642 let NumMicroOps = 16;
643 let ResourceCycles = [8];
644 }
645 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
646 let Latency = 13;
647 let NumMicroOps = 18;
648 let ResourceCycles = [9];
649 }
650 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
651 let Latency = 14;
652 let NumMicroOps = 20;
653 let ResourceCycles = [10];
654 }
655 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
656 let Latency = 15;
657 let NumMicroOps = 22;
658 let ResourceCycles = [11];
659 }
660
661 def R52WriteSTM : SchedWriteVariant<[
662 SchedVar,
663 SchedVar,
664 SchedVar,
665 SchedVar,
666 SchedVar,
667 SchedVar,
668 SchedVar,
669 SchedVar,
670 SchedVar,
671 SchedVar,
672 SchedVar,
673 SchedVar,
674 SchedVar,
675 SchedVar,
676 SchedVar,
677 SchedVar,
678 // unknown number of registers, just use resources for two
679 SchedVar
680 ]>;
681
682 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
683 // another instruction in slot-1, but only in the last issue.
684 def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
685 def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
686 let Latency = 6;
687 let NumMicroOps = 3;
688 let ResourceCycles = [2];
689 }
690 def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
691 let Latency = 7;
692 let NumMicroOps = 5;
693 let ResourceCycles = [3];
694 }
695 def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
696 let Latency = 8;
697 let NumMicroOps = 7;
698 let ResourceCycles = [4];
699 }
700 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
701 let Latency = 5;
702 let NumMicroOps = 1;
703 let ResourceCycles = [1];
704 }
705 def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
706 let Latency = 6;
707 let NumMicroOps = 3;
708 let ResourceCycles = [2];
709 }
710 def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
711 let Latency = 7;
712 let NumMicroOps = 5;
713 let ResourceCycles = [3];
714 }
715 def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
716 let Latency = 8;
717 let NumMicroOps = 7;
718 let ResourceCycles = [4];
719 }
720 def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
721 let Latency = 9;
722 let NumMicroOps = 9;
723 let ResourceCycles = [5];
724 }
725
726
727 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
728 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
729 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
730
731 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
732 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
733 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
734
735 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
736
737 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
738 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
739 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
740 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
741 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
742 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
743
744 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
745 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
746
747 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
748 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
749
750 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
751 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
752
753 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
754 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
755
756 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
757
758 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
759 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
760 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
761 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
762
763 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
764 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
765 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
766 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
767
768 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
769 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
770
771 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
772 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
773
774 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
775 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
776
777 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
778 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
779 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
780 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
781 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
782 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
783 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
784 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
785 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
786 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
787 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
788 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
789 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
790 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
791 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
792 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
793 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
794 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
795 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
796 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
797
798 //---
799 // VLDx. Vector Loads
800 //---
801 // 1-element structure load
802 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
803 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
804 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
805 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
806 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
807 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
808
809 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
810 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
811 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
812
813 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
814 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
815 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
816 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
817 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
818 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
819
820 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
821 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
822 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
823 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
824
825 // 2-element structure load
826 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
827 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
828 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
829 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
830 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
831 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
832
833 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
834 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
835 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
836 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
837 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
838 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
839 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
840 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
841
842 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
843 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
844
845 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
846 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
847
848 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
849 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
850 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
851 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
852
853 // 3-element structure load
854 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
855 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
856 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
857 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
858 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
859 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
860
861 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
862 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
863 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
864
865 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
866 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
867 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
868 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
869
870 // 4-element structure load
871 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
872 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
873 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
874 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
875 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
876 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
877
878
879 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
880 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
881 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
882 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
883 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
884 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
885 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
886
887 //---
888 // VSTx. Vector Stores
889 //---
890 // 1-element structure store
891 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
893 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
894 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
895 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
896 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
897
898 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
899 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
900 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
901
902 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
904 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
905 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
906 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
907 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
908
909 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
910 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
911 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
912
913 // 2-element structure store
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
915 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
916 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
917
918 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
919 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
920 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
921 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
922 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
923 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
924
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
926 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
927 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
928
929 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
930 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
931 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
932 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
933 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
934 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
935
936 // 3-element structure store
937 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
938 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
939 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
940
941 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
942 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
943 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
944 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
945 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
946 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
947
948 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
949 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
950 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
951
952 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
953 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
954 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
955 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
956 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
957 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
958
959 // 4-element structure store
960 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
961 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
962 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
963
964 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
965 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
966 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
967 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
968 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
969 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
970
971 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
972 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
973 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
974
975 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
976 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
977 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
978 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
979 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
980 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
981
982 } // R52 SchedModel
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=R52_SCHED
2 ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ;
4 ; Check the latency for instructions for both generic and cortex-r52.
5 ; Cortex-r52 machine model will cause the div to be sceduled before eor
6 ; as div takes more cycles to compute than eor.
7 ;
8 ; CHECK: ********** MI Scheduling **********
9 ; CHECK: foo:BB#0 entry
10 ; CHECK: EORrr
11 ; GENERIC: Latency : 1
12 ; R52_SCHED: Latency : 3
13 ; CHECK: MLA
14 ; GENERIC: Latency : 1
15 ; R52_SCHED: Latency : 4
16 ; CHECK: SDIV
17 ; GENERIC: Latency : 1
18 ; R52_SCHED: Latency : 8
19 ; CHECK: ** Final schedule for BB#0 ***
20 ; GENERIC: EORrr
21 ; GENERIC: SDIV
22 ; R52_SCHED: SDIV
23 ; R52_SCHED: EORrr
24 ; CHECK: ********** INTERVALS **********
25
26 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
27 target triple = "armv8r-arm-none-eabi"
28
29 ; Function Attrs: norecurse nounwind readnone
30 define hidden i32 @foo(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
31 entry:
32 %xor = xor i32 %c, %b
33 %mul = mul nsw i32 %xor, %c
34 %add = add nsw i32 %mul, %a
35 %div = sdiv i32 %a, %b
36 %sub = sub i32 %add, %div
37 ret i32 %sub
38 }