llvm.org GIT mirror llvm / f8d5c73
[ARM] Classification Improvements to ARM Sched-Models. NFCI. This is a series of patches to enable adding of machine sched models for ARM processors easier and compact. They define new sched-readwrites for groups of ARM instructions. This has been missing so far, and as a consequence, machine scheduler models for individual sub-targets have tended to be larger than they needed to be. The current patch focuses on floating-point instructions. Reviewers: Diana Picus (rovka), Renato Golin (rengolin) Differential Revision: https://reviews.llvm.org/D28194 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292825 91177308-0d34-0410-b5e6-96231b3b80d8 Javed Absar 3 years ago
6 changed file(s) with 365 addition(s) and 117 deletion(s). Raw diff Collapse all Expand all
335335 def VADDD : ADbI<0b11100, 0b11, 0, 0,
336336 (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
337337 IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
338 [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
338 [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
339 Sched<[WriteFPALU64]>;
339340
340341 let TwoOperandAliasConstraint = "$Sn = $Sd" in
341342 def VADDS : ASbIn<0b11100, 0b11, 0, 0,
342343 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
343344 IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
344 [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
345 [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
346 Sched<[WriteFPALU32]> {
345347 // Some single precision VFP instructions may be executed on both NEON and
346348 // VFP pipelines on A8.
347349 let D = VFPNeonA8Domain;
351353 def VADDH : AHbI<0b11100, 0b11, 0, 0,
352354 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
353355 IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
354 []>;
356 []>,
357 Sched<[WriteFPALU32]>;
355358
356359 let TwoOperandAliasConstraint = "$Dn = $Dd" in
357360 def VSUBD : ADbI<0b11100, 0b11, 1, 0,
358361 (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
359362 IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
360 [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
363 [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
364 Sched<[WriteFPALU64]>;
361365
362366 let TwoOperandAliasConstraint = "$Sn = $Sd" in
363367 def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
364368 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
365369 IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
366 [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
370 [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
371 Sched<[WriteFPALU32]>{
367372 // Some single precision VFP instructions may be executed on both NEON and
368373 // VFP pipelines on A8.
369374 let D = VFPNeonA8Domain;
373378 def VSUBH : AHbI<0b11100, 0b11, 1, 0,
374379 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
375380 IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
376 []>;
381 []>,
382 Sched<[WriteFPALU32]>;
377383
378384 let TwoOperandAliasConstraint = "$Dn = $Dd" in
379385 def VDIVD : ADbI<0b11101, 0b00, 0, 0,
380386 (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
381387 IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
382 [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
388 [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
389 Sched<[WriteFPDIV64]>;
383390
384391 let TwoOperandAliasConstraint = "$Sn = $Sd" in
385392 def VDIVS : ASbI<0b11101, 0b00, 0, 0,
386393 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
387394 IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
388 [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
395 [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
396 Sched<[WriteFPDIV32]>;
389397
390398 let TwoOperandAliasConstraint = "$Sn = $Sd" in
391399 def VDIVH : AHbI<0b11101, 0b00, 0, 0,
392400 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
393401 IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
394 []>;
402 []>,
403 Sched<[WriteFPDIV32]>;
395404
396405 let TwoOperandAliasConstraint = "$Dn = $Dd" in
397406 def VMULD : ADbI<0b11100, 0b10, 0, 0,
398407 (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
399408 IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
400 [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
409 [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
410 Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
401411
402412 let TwoOperandAliasConstraint = "$Sn = $Sd" in
403413 def VMULS : ASbIn<0b11100, 0b10, 0, 0,
404414 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
405415 IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
406 [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
416 [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
417 Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
407418 // Some single precision VFP instructions may be executed on both NEON and
408419 // VFP pipelines on A8.
409420 let D = VFPNeonA8Domain;
413424 def VMULH : AHbI<0b11100, 0b10, 0, 0,
414425 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
415426 IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
416 []>;
427 []>,
428 Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
417429
418430 def VNMULD : ADbI<0b11100, 0b10, 1, 0,
419431 (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
420432 IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
421 [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
433 [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
434 Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
422435
423436 def VNMULS : ASbI<0b11100, 0b10, 1, 0,
424437 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
425438 IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
426 [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
439 [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
440 Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
427441 // Some single precision VFP instructions may be executed on both NEON and
428442 // VFP pipelines on A8.
429443 let D = VFPNeonA8Domain;
432446 def VNMULH : AHbI<0b11100, 0b10, 1, 0,
433447 (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
434448 IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
435 []>;
449 []>,
450 Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
436451
437452 multiclass vsel_inst opc, int CC> {
438453 let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
623638 def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
624639 (outs DPR:$Dd), (ins SPR:$Sm),
625640 IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
626 [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
641 [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
642 Sched<[WriteFPCVT]> {
627643 // Instruction operands.
628644 bits<5> Dd;
629645 bits<5> Sm;
640656 // Special case encoding: bits 11-8 is 0b1011.
641657 def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
642658 IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
643 [(set SPR:$Sd, (fpround DPR:$Dm))]> {
659 [(set SPR:$Sd, (fpround DPR:$Dm))]>,
660 Sched<[WriteFPCVT]> {
644661 // Instruction operands.
645662 bits<5> Sd;
646663 bits<5> Dm;
666683 def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
667684 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
668685 [/* For disassembly only; pattern left blank */]>,
669 Requires<[HasFP16]>;
686 Requires<[HasFP16]>,
687 Sched<[WriteFPCVT]>;
670688
671689 def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
672690 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
673691 [/* For disassembly only; pattern left blank */]>,
674 Requires<[HasFP16]>;
692 Requires<[HasFP16]>,
693 Sched<[WriteFPCVT]>;
675694
676695 def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
677696 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
678697 [/* For disassembly only; pattern left blank */]>,
679 Requires<[HasFP16]>;
698 Requires<[HasFP16]>,
699 Sched<[WriteFPCVT]>;
680700
681701 def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
682702 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
683703 [/* For disassembly only; pattern left blank */]>,
684 Requires<[HasFP16]>;
704 Requires<[HasFP16]>,
705 Sched<[WriteFPCVT]>;
685706
686707 def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
687708 (outs DPR:$Dd), (ins SPR:$Sm),
688709 NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
689 []>, Requires<[HasFPARMv8, HasDPVFP]> {
710 []>, Requires<[HasFPARMv8, HasDPVFP]>,
711 Sched<[WriteFPCVT]> {
690712 // Instruction operands.
691713 bits<5> Sm;
692714
945967 def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
946968 (outs DPR:$Dd), (ins DPR:$Dm),
947969 IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
948 [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
970 [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
971 Sched<[WriteFPSQRT64]>;
949972
950973 def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
951974 (outs SPR:$Sd), (ins SPR:$Sm),
952975 IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
953 [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
976 [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
977 Sched<[WriteFPSQRT32]>;
954978
955979 def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
956980 (outs SPR:$Sd), (ins SPR:$Sm),
9861010 def VMOVRS : AVConv2I<0b11100001, 0b1010,
9871011 (outs GPR:$Rt), (ins SPR:$Sn),
9881012 IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
989 [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
1013 [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
1014 Sched<[WriteFPMOV]> {
9901015 // Instruction operands.
9911016 bits<4> Rt;
9921017 bits<5> Sn;
10091034 (outs SPR:$Sn), (ins GPR:$Rt),
10101035 IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
10111036 [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
1012 Requires<[HasVFP2, UseVMOVSR]> {
1037 Requires<[HasVFP2, UseVMOVSR]>,
1038 Sched<[WriteFPMOV]> {
10131039 // Instruction operands.
10141040 bits<5> Sn;
10151041 bits<4> Rt;
10311057 def VMOVRRD : AVConv3I<0b11000101, 0b1011,
10321058 (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
10331059 IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
1034 [/* FIXME: Can't write pattern for multiple result instr*/]> {
1060 [/* FIXME: Can't write pattern for multiple result instr*/]>,
1061 Sched<[WriteFPMOV]> {
10351062 // Instruction operands.
10361063 bits<5> Dm;
10371064 bits<4> Rt;
10581085 def VMOVRRS : AVConv3I<0b11000101, 0b1010,
10591086 (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
10601087 IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
1061 [/* For disassembly only; pattern left blank */]> {
1088 [/* For disassembly only; pattern left blank */]>,
1089 Sched<[WriteFPMOV]> {
10621090 bits<5> src1;
10631091 bits<4> Rt;
10641092 bits<4> Rt2;
10841112 def VMOVDRR : AVConv5I<0b11000100, 0b1011,
10851113 (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
10861114 IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
1087 [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
1115 [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
1116 Sched<[WriteFPMOV]> {
10881117 // Instruction operands.
10891118 bits<5> Dm;
10901119 bits<4> Rt;
11271156 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
11281157 (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
11291158 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
1130 [/* For disassembly only; pattern left blank */]> {
1159 [/* For disassembly only; pattern left blank */]>,
1160 Sched<[WriteFPMOV]> {
11311161 // Instruction operands.
11321162 bits<5> dst1;
11331163 bits<4> src1;
11531183 (outs GPR:$Rt), (ins SPR:$Sn),
11541184 IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
11551185 []>,
1156 Requires<[HasFullFP16]> {
1186 Requires<[HasFullFP16]>,
1187 Sched<[WriteFPMOV]> {
11571188 // Instruction operands.
11581189 bits<4> Rt;
11591190 bits<5> Sn;
11721203 (outs SPR:$Sn), (ins GPR:$Rt),
11731204 IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
11741205 []>,
1175 Requires<[HasFullFP16]> {
1206 Requires<[HasFullFP16]>,
1207 Sched<[WriteFPMOV]> {
11761208 // Instruction operands.
11771209 bits<5> Sn;
11781210 bits<4> Rt;
12531285 def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
12541286 (outs DPR:$Dd), (ins SPR:$Sm),
12551287 IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
1256 []> {
1288 []>,
1289 Sched<[WriteFPCVT]> {
12571290 let Inst{7} = 1; // s32
12581291 }
12591292
12681301 def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
12691302 (outs SPR:$Sd),(ins SPR:$Sm),
12701303 IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
1271 []> {
1304 []>,
1305 Sched<[WriteFPCVT]> {
12721306 let Inst{7} = 1; // s32
12731307
12741308 // Some single precision VFP instructions may be executed on both NEON and
12851319 def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
12861320 (outs SPR:$Sd), (ins SPR:$Sm),
12871321 IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
1288 []> {
1322 []>,
1323 Sched<[WriteFPCVT]> {
12891324 let Inst{7} = 1; // s32
12901325 }
12911326
12921327 def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
12931328 (outs DPR:$Dd), (ins SPR:$Sm),
12941329 IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
1295 []> {
1330 []>,
1331 Sched<[WriteFPCVT]> {
12961332 let Inst{7} = 0; // u32
12971333 }
12981334
13071343 def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
13081344 (outs SPR:$Sd), (ins SPR:$Sm),
13091345 IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
1310 []> {
1346 []>,
1347 Sched<[WriteFPCVT]> {
13111348 let Inst{7} = 0; // u32
13121349
13131350 // Some single precision VFP instructions may be executed on both NEON and
13241361 def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
13251362 (outs SPR:$Sd), (ins SPR:$Sm),
13261363 IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
1327 []> {
1364 []>,
1365 Sched<[WriteFPCVT]> {
13281366 let Inst{7} = 0; // u32
13291367 }
13301368
13891427 def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
13901428 (outs SPR:$Sd), (ins DPR:$Dm),
13911429 IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
1392 []> {
1430 []>,
1431 Sched<[WriteFPCVT]> {
13931432 let Inst{7} = 1; // Z bit
13941433 }
13951434
14041443 def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
14051444 (outs SPR:$Sd), (ins SPR:$Sm),
14061445 IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
1407 []> {
1446 []>,
1447 Sched<[WriteFPCVT]> {
14081448 let Inst{7} = 1; // Z bit
14091449
14101450 // Some single precision VFP instructions may be executed on both NEON and
14221462 def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
14231463 (outs SPR:$Sd), (ins SPR:$Sm),
14241464 IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
1425 []> {
1465 []>,
1466 Sched<[WriteFPCVT]> {
14261467 let Inst{7} = 1; // Z bit
14271468 }
14281469
14291470 def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
14301471 (outs SPR:$Sd), (ins DPR:$Dm),
14311472 IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
1432 []> {
1473 []>,
1474 Sched<[WriteFPCVT]> {
14331475 let Inst{7} = 1; // Z bit
14341476 }
14351477
14441486 def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
14451487 (outs SPR:$Sd), (ins SPR:$Sm),
14461488 IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
1447 []> {
1489 []>,
1490 Sched<[WriteFPCVT]> {
14481491 let Inst{7} = 1; // Z bit
14491492
14501493 // Some single precision VFP instructions may be executed on both NEON and
14621505 def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
14631506 (outs SPR:$Sd), (ins SPR:$Sm),
14641507 IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
1465 []> {
1508 []>,
1509 Sched<[WriteFPCVT]> {
14661510 let Inst{7} = 1; // Z bit
14671511 }
14681512
14721516 def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
14731517 (outs SPR:$Sd), (ins DPR:$Dm),
14741518 IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
1475 [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
1519 [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
1520 Sched<[WriteFPCVT]> {
14761521 let Inst{7} = 0; // Z bit
14771522 }
14781523
14791524 def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
14801525 (outs SPR:$Sd), (ins SPR:$Sm),
14811526 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
1482 [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
1527 [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
1528 Sched<[WriteFPCVT]> {
14831529 let Inst{7} = 0; // Z bit
14841530 }
14851531
14861532 def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
14871533 (outs SPR:$Sd), (ins SPR:$Sm),
14881534 IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
1489 []> {
1535 []>,
1536 Sched<[WriteFPCVT]> {
14901537 let Inst{7} = 0; // Z bit
14911538 }
14921539
14931540 def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
14941541 (outs SPR:$Sd), (ins DPR:$Dm),
14951542 IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
1496 [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
1543 [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
1544 Sched<[WriteFPCVT]> {
14971545 let Inst{7} = 0; // Z bit
14981546 }
14991547
15001548 def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
15011549 (outs SPR:$Sd), (ins SPR:$Sm),
15021550 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
1503 [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
1551 [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
1552 Sched<[WriteFPCVT]> {
15041553 let Inst{7} = 0; // Z bit
15051554 }
15061555
15071556 def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
15081557 (outs SPR:$Sd), (ins SPR:$Sm),
15091558 IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
1510 []> {
1559 []>,
1560 Sched<[WriteFPCVT]> {
15111561 let Inst{7} = 0; // Z bit
15121562 }
15131563 }
15271577 class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4,
15281578 bit op5, dag oops, dag iops, InstrItinClass itin,
15291579 string opc, string asm, list pattern>
1530 : AVConv1XI,
1531 Sched<[WriteCvtFP]> {
1580 : AVConv1XI> {
15321581 bits<5> dst;
15331582 // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
15341583 let Inst{22} = dst{0};
15391588 class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4,
15401589 bit op5, dag oops, dag iops, InstrItinClass itin,
15411590 string opc, string asm, list pattern>
1542 : AVConv1XI,
1543 Sched<[WriteCvtFP]> {
1591 : AVConv1XI> {
15441592 bits<5> dst;
15451593 // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
15461594 let Inst{22} = dst{4};
15521600 def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
15531601 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
15541602 IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
1555 Requires<[HasFullFP16]>;
1603 Requires<[HasFullFP16]>,
1604 Sched<[WriteFPCVT]>;
15561605
15571606 def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
15581607 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
15591608 IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
1560 Requires<[HasFullFP16]>;
1609 Requires<[HasFullFP16]>,
1610 Sched<[WriteFPCVT]>;
15611611
15621612 def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
15631613 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
15641614 IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
1565 Requires<[HasFullFP16]>;
1615 Requires<[HasFullFP16]>,
1616 Sched<[WriteFPCVT]>;
15661617
15671618 def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
15681619 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
15691620 IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
1570 Requires<[HasFullFP16]>;
1621 Requires<[HasFullFP16]>,
1622 Sched<[WriteFPCVT]>;
15711623
15721624 def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
15731625 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
1574 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
1626 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
1627 Sched<[WriteFPCVT]> {
15751628 // Some single precision VFP instructions may be executed on both NEON and
15761629 // VFP pipelines on A8.
15771630 let D = VFPNeonA8Domain;
16031656
16041657 def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
16051658 (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
1606 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
1659 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
1660 Sched<[WriteFPCVT]>;
16071661
16081662 def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
16091663 (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
1610 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
1664 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
1665 Sched<[WriteFPCVT]>;
16111666
16121667 def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
16131668 (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
1614 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
1669 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
1670 Sched<[WriteFPCVT]>;
16151671
16161672 def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
16171673 (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
1618 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
1674 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
1675 Sched<[WriteFPCVT]>;
16191676
16201677 // Fixed-Point to FP:
16211678
16221679 def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
16231680 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
16241681 IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
1625 Requires<[HasFullFP16]>;
1682 Requires<[HasFullFP16]>,
1683 Sched<[WriteFPCVT]>;
16261684
16271685 def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
16281686 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
16291687 IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
1630 Requires<[HasFullFP16]>;
1688 Requires<[HasFullFP16]>,
1689 Sched<[WriteFPCVT]>;
16311690
16321691 def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
16331692 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
16341693 IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
1635 Requires<[HasFullFP16]>;
1694 Requires<[HasFullFP16]>,
1695 Sched<[WriteFPCVT]>;
16361696
16371697 def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
16381698 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
16391699 IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
1640 Requires<[HasFullFP16]>;
1700 Requires<[HasFullFP16]>,
1701 Sched<[WriteFPCVT]>;
16411702
16421703 def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
16431704 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
1644 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
1705 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
1706 Sched<[WriteFPCVT]> {
16451707 // Some single precision VFP instructions may be executed on both NEON and
16461708 // VFP pipelines on A8.
16471709 let D = VFPNeonA8Domain;
16491711
16501712 def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
16511713 (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
1652 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
1714 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
1715 Sched<[WriteFPCVT]> {
16531716 // Some single precision VFP instructions may be executed on both NEON and
16541717 // VFP pipelines on A8.
16551718 let D = VFPNeonA8Domain;
16571720
16581721 def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
16591722 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
1660 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
1723 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
1724 Sched<[WriteFPCVT]> {
16611725 // Some single precision VFP instructions may be executed on both NEON and
16621726 // VFP pipelines on A8.
16631727 let D = VFPNeonA8Domain;
16651729
16661730 def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
16671731 (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
1668 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
1732 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
1733 Sched<[WriteFPCVT]> {
16691734 // Some single precision VFP instructions may be executed on both NEON and
16701735 // VFP pipelines on A8.
16711736 let D = VFPNeonA8Domain;
16731738
16741739 def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
16751740 (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
1676 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
1741 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
1742 Sched<[WriteFPCVT]>;
16771743
16781744 def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
16791745 (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
1680 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
1746 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
1747 Sched<[WriteFPCVT]>;
16811748
16821749 def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
16831750 (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
1684 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
1751 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
1752 Sched<[WriteFPCVT]>;
16851753
16861754 def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
16871755 (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
1688 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
1756 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
1757 Sched<[WriteFPCVT]>;
16891758
16901759 } // End of 'let Constraints = "$a = $dst" in'
16911760
16991768 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
17001769 (f64 DPR:$Ddin)))]>,
17011770 RegConstraint<"$Ddin = $Dd">,
1702 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
1771 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
1772 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
17031773
17041774 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
17051775 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
17071777 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
17081778 SPR:$Sdin))]>,
17091779 RegConstraint<"$Sdin = $Sd">,
1710 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
1780 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
1781 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
17111782 // Some single precision VFP instructions may be executed on both NEON and
17121783 // VFP pipelines on A8.
17131784 let D = VFPNeonA8Domain;
17331804 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
17341805 (f64 DPR:$Ddin)))]>,
17351806 RegConstraint<"$Ddin = $Dd">,
1736 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
1807 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
1808 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
17371809
17381810 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
17391811 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
17411813 [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
17421814 SPR:$Sdin))]>,
17431815 RegConstraint<"$Sdin = $Sd">,
1744 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
1816 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
1817 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
17451818 // Some single precision VFP instructions may be executed on both NEON and
17461819 // VFP pipelines on A8.
17471820 let D = VFPNeonA8Domain;
17671840 [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
17681841 (f64 DPR:$Ddin)))]>,
17691842 RegConstraint<"$Ddin = $Dd">,
1770 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
1843 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
1844 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
17711845
17721846 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
17731847 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
17751849 [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
17761850 SPR:$Sdin))]>,
17771851 RegConstraint<"$Sdin = $Sd">,
1778 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
1852 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
1853 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
17791854 // Some single precision VFP instructions may be executed on both NEON and
17801855 // VFP pipelines on A8.
17811856 let D = VFPNeonA8Domain;
18011876 [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
18021877 (f64 DPR:$Ddin)))]>,
18031878 RegConstraint<"$Ddin = $Dd">,
1804 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
1879 Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
1880 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
18051881
18061882 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
18071883 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
18081884 IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
18091885 [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
18101886 RegConstraint<"$Sdin = $Sd">,
1811 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
1887 Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
1888 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
18121889 // Some single precision VFP instructions may be executed on both NEON and
18131890 // VFP pipelines on A8.
18141891 let D = VFPNeonA8Domain;
18371914 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
18381915 (f64 DPR:$Ddin)))]>,
18391916 RegConstraint<"$Ddin = $Dd">,
1840 Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
1917 Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
1918 Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
18411919
18421920 def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
18431921 (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
18451923 [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
18461924 SPR:$Sdin))]>,
18471925 RegConstraint<"$Sdin = $Sd">,
1848 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
1926 Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
1927 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
18491928 // Some single precision VFP instructions may be executed on both NEON and
18501929 // VFP pipelines.
18511930 }
18551934 IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
18561935 []>,
18571936 RegConstraint<"$Sdin = $Sd">,
1858 Requires<[HasFullFP16,UseFusedMAC]>;
1937 Requires<[HasFullFP16,UseFusedMAC]>,
1938 Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
18591939
18601940 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
18611941 (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
66 //
77 //===----------------------------------------------------------------------===//
88 //===----------------------------------------------------------------------===//
9 // Instruction scheduling annotations for out-of-order CPUs.
9 // Instruction scheduling annotations for in-order and out-of-order CPUs.
1010 // These annotations are independent of the itinerary class defined below.
1111 // Here we define the subtarget independent read/write per-operand resources.
1212 // The subtarget schedule definitions will then map these to the subtarget's
5353 // }
5454 // def : ReadAdvance;
5555
56 //===----------------------------------------------------------------------===//
57 // Sched definitions for integer pipeline instructions
58 //
5659 // Basic ALU operation.
5760 def WriteALU : SchedWrite;
5861 def ReadALU : SchedRead;
8083 def WriteBrL : SchedWrite;
8184 def WriteBrTbl : SchedWrite;
8285
83 // Fixpoint conversions.
84 def WriteCvtFP : SchedWrite;
85
8686 // Noop.
8787 def WriteNoop : SchedWrite;
88
89 //===----------------------------------------------------------------------===//
90 // Sched definitions for floating-point and neon instructions
91 //
92 // Floating point conversions
93 def WriteFPCVT : SchedWrite;
94 def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
95
96 // ALU operations (32/64-bit)
97 def WriteFPALU32 : SchedWrite;
98 def WriteFPALU64 : SchedWrite;
99
100 // Multiplication
101 def WriteFPMUL32 : SchedWrite;
102 def WriteFPMUL64 : SchedWrite;
103 def ReadFPMUL : SchedRead; // multiplier read
104 def ReadFPMAC : SchedRead; // accumulator read
105
106 // Multiply-accumulate
107 def WriteFPMAC32 : SchedWrite;
108 def WriteFPMAC64 : SchedWrite;
109
110 // Division
111 def WriteFPDIV32 : SchedWrite;
112 def WriteFPDIV64 : SchedWrite;
113
114 // Square-root
115 def WriteFPSQRT32 : SchedWrite;
116 def WriteFPSQRT64 : SchedWrite;
88117
89118 // Define TII for use in SchedVariant Predicates.
90119 def : PredicateProlog<[{
24702470 def : SchedAlias;
24712471 def : SchedAlias;
24722472 def : SchedAlias;
2473
2474 // ===---------------------------------------------------------------------===//
2475 // Floating-point. Map target defined SchedReadWrite to processor specific ones
2476 //
2477 def : WriteRes { let Latency = 4; }
2478 def : SchedAlias;
2479
2480 def : SchedAlias;
2481 def : SchedAlias;
2482
2483 def : SchedAlias;
2484 def : SchedAlias;
2485
2486 def : SchedAlias;
2487 def : SchedAlias;
2488
2489 def : SchedAlias;
2490 def : SchedAlias;
2491 def : SchedAlias;
2492 def : SchedAlias;
2493
2494 def : ReadAdvance;
2495 def : ReadAdvance;
2496
2497 // ===---------------------------------------------------------------------===//
2498 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
2499 //
24732500 def : InstRW< [WriteALU],
24742501 (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
24752502 "BICrr")>;
25232550 def : WriteRes;
25242551 def : WriteRes;
25252552 def : WriteRes;
2526 def : SchedAlias;
25272553 def : WriteRes { let Latency = 0; let NumMicroOps = 0; }
25282554 } // SchedModel = CortexA9Model
8585
8686 // Misc
8787 def : WriteRes { let Latency = 0; let NumMicroOps = 0; }
88 def : WriteRes { let Latency = 3; }
89
88
89 // Integer pipeline by-passes
9090 def : ReadAdvance; // Operand needed in EX1 stage
9191 def : ReadAdvance; // Shift operands needed in ISS
92
93
94 // Floating-point. Map target-defined SchedReadWrites to subtarget
95 def : WriteRes { let Latency = 6; }
96
97 def : WriteRes {
98 let Latency = 6;
99 }
100
101 def : WriteRes {
102 let Latency = 11; // as it is internally two insns (MUL then ADD)
103 }
104
105 def : WriteRes
106 R52UnitFPALU, R52UnitFPALU]> {
107 let Latency = 11;
108 }
109
110 def : WriteRes {
111 let Latency = 7; // FP div takes fixed #cycles
112 let ResourceCycles = [7]; // is not pipelined
113 }
114
115 def : WriteRes {
116 let Latency = 17;
117 let ResourceCycles = [17];
118 }
119
120 def : WriteRes { let Latency = 7; }
121 def : WriteRes { let Latency = 17; }
122
123 def : ReadAdvance; // mul operand read in F1
124 def : ReadAdvance; // fp-mac operand read in F1
92125
93126
94127 //===----------------------------------------------------------------------===//
146179 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
147180 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
148181
149 def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
150 let Latency = 7; // FP div takes fixed #cycles
151 let ResourceCycles = [7]; // is not pipelined
152 }
153 def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
154 let Latency = 17;
155 let ResourceCycles = [17];
156 }
157
158
159182 //===----------------------------------------------------------------------===//
160 // Subtarget-specific - map operands to SchedReadWrites
161
183 // Floating-point. Map target defined SchedReadWrites to processor specific ones
184 //
185 def : SchedAlias;
186 def : SchedAlias;
187 def : SchedAlias;
188 def : SchedAlias;
189
190 //===----------------------------------------------------------------------===//
191 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
192 //
162193 def : InstRW<[WriteALU], (instrs COPY)>;
163194
164195 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
490521
491522 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
492523 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
493
494 def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
495 def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
496
497 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
498 (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
499524
500525 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
501526 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
776801
777802 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
778803 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
779 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
804 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
780805 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
781 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
782806 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
783807 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
784808 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
596596 def : InstRW<[SwiftWriteP1FourCycle],
597597 (instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
598598 "VMULL", "VQDMULL")>;
599 def : InstRW<[SwiftWriteP1SixCycle],
600 (instregex "VMULD", "VNMULD")>;
601599 def : InstRW<[SwiftWriteP1FourCycle],
602600 (instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
603601 "VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
606604
607605 // 4.2.36 Advanced SIMD and VFP, Convert
608606 def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
609 // Fixpoint conversions.
610 def : WriteRes { let Latency = 4; }
611607
612608 // 4.2.37 Advanced SIMD and VFP, Move
613609 def : InstRW<[SwiftWriteP0TwoCycle],
10351031 def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
10361032 def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
10371033
1034 // ===---------------------------------------------------------------------===//
1035 // Floating-point. Map target defined SchedReadWrite to processor specific ones
1036 //
1037 def : SchedAlias;
1038 def : SchedAlias;
1039
1040 def : SchedAlias;
1041 def : SchedAlias;
1042
1043 def : SchedAlias;
1044 def : SchedAlias;
1045
1046 def : SchedAlias;
1047 def : SchedAlias;
1048
1049 def : SchedAlias;
1050 def : SchedAlias;
1051
1052 def : SchedAlias;
1053 def : SchedAlias;
1054
1055 def : ReadAdvance;
1056 def : ReadAdvance;
1057
10381058 // Not specified.
10391059 def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
10401060 // Preload.
0 ; REQUIRES: asserts
1 ; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
2 ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9
3 ; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
4 ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT
5 ; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \
6 ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52
7 ;
8 ; Check the latency of instructions for processors with sched-models
9 ;
10 ; Function Attrs: norecurse nounwind readnone
11 define i32 @foo(float %a, float %b, float %c, i32 %d) local_unnamed_addr #0 {
12 entry:
13 ;
14 ; CHECK: ********** MI Scheduling **********
15 ; CHECK_A9: VADDS
16 ; CHECK_SWIFT: VADDfd
17 ; CHECK_R52: VADDS
18 ; CHECK_A9: Latency : 5
19 ; CHECK_SWIFT: Latency : 4
20 ; CHECK_R52: Latency : 6
21 ;
22 ; CHECK_A9: VMULS
23 ; CHECK_SWIFT: VMULfd
24 ; CHECK_R52: VMULS
25 ; CHECK_SWIFT: Latency : 4
26 ; CHECK_A9: Latency : 6
27 ; CHECK_R52: Latency : 6
28 ;
29 ; CHECK: VDIVS
30 ; CHECK_SWIFT: Latency : 17
31 ; CHECK_A9: Latency : 16
32 ; CHECK_R52: Latency : 7
33 ;
34 ; CHECK: VCVTDS
35 ; CHECK_SWIFT: Latency : 4
36 ; CHECK_A9: Latency : 5
37 ; CHECK_R52: Latency : 6
38 ;
39 ; CHECK: VADDD
40 ; CHECK_SWIFT: Latency : 6
41 ; CHECK_A9: Latency : 5
42 ; CHECK_R52: Latency : 6
43 ;
44 ; CHECK: VMULD
45 ; CHECK_SWIFT: Latency : 6
46 ; CHECK_A9: Latency : 7
47 ; CHECK_R52: Latency : 6
48 ;
49 ; CHECK: VDIVD
50 ; CHECK_SWIFT: Latency : 32
51 ; CHECK_A9: Latency : 26
52 ; CHECK_R52: Latency : 17
53 ;
54 ; CHECK: VTOSIZD
55 ; CHECK_SWIFT: Latency : 4
56 ; CHECK_A9: Latency : 5
57 ; CHECK_R52: Latency : 6
58 ;
59 %add = fadd float %a, %b
60 %mul = fmul float %add, %add
61 %div = fdiv float %mul, %b
62 %conv1 = fpext float %div to double
63 %add3 = fadd double %conv1, %conv1
64 %mul4 = fmul double %add3, %add3
65 %div5 = fdiv double %mul4, %conv1
66 %conv6 = fptosi double %div5 to i32
67 ret i32 %conv6
68 }