llvm.org GIT mirror llvm / 1a8f36e
Enhance the InstrStage object to enable the specification of an Itinerary with overlapping stages. The default is to maintain the current behavior that the "next" stage immediately follows the previous one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78827 91177308-0d34-0410-b5e6-96231b3b80d8 David Goodwin 10 years ago
8 changed file(s) with 170 addition(s) and 118 deletion(s). Raw diff Collapse all Expand all
77 //===----------------------------------------------------------------------===//
88 //
99 // This file describes the structures used for instruction itineraries and
10 // states. This is used by schedulers to determine instruction states and
10 // stages. This is used by schedulers to determine instruction stages and
1111 // latencies.
1212 //
1313 //===----------------------------------------------------------------------===//
1515 #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
1616 #define LLVM_TARGET_TARGETINSTRITINERARIES_H
1717
18 #include
19
1820 namespace llvm {
1921
2022 //===----------------------------------------------------------------------===//
21 /// Instruction stage - These values represent a step in the execution of an
22 /// instruction. The latency represents the number of discrete time slots
23 /// needed to complete the stage. Units represent the choice of functional
24 /// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
23 /// Instruction stage - These values represent a non-pipelined step in
24 /// the execution of an instruction. Cycles represents the number of
25 /// discrete time slots needed to complete the stage. Units represent
26 /// the choice of functional units that can be used to complete the
27 /// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
28 /// cycles should elapse from the start of this stage to the start of
29 /// the next stage in the itinerary. A value of -1 indicates that the
30 /// next stage should start immediately after the current one.
31 /// For example:
32 ///
33 /// { 1, x, -1 }
34 /// indicates that the stage occupies FU x for 1 cycle and that
35 /// the next stage starts immediately after this one.
36 ///
37 /// { 2, x|y, 1 }
38 /// indicates that the stage occupies either FU x or FU y for 2
39 /// consecuative cycles and that the next stage starts one cycle
40 /// after this stage starts. That is, the stage requirements
41 /// overlap in time.
42 ///
43 /// { 1, x, 0 }
44 /// indicates that the stage occupies FU x for 1 cycle and that
45 /// the next stage starts in this same cycle. This can be used to
46 /// indicate that the instruction requires multiple stages at the
47 /// same time.
2548 ///
2649 struct InstrStage {
27 unsigned Cycles; ///< Length of stage in machine cycles
28 unsigned Units; ///< Choice of functional units
50 unsigned Cycles_; ///< Length of stage in machine cycles
51 unsigned Units_; ///< Choice of functional units
52 int NextCycles_; ///< Number of machine cycles to next stage
53
54 /// getCycles - returns the number of cycles the stage is occupied
55 unsigned getCycles() const {
56 return Cycles_;
57 }
58
59 /// getUnits - returns the choice of FUs
60 unsigned getUnits() const {
61 return Units_;
62 }
63
64 /// getNextCycles - returns the number of cycles from the start of
65 /// this stage to the start of the next stage in the itinerary
66 unsigned getNextCycles() const {
67 return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
68 }
2969 };
3070
3171
83123 if (isEmpty())
84124 return 1;
85125
86 // Just sum the cycle count for each stage. The assumption is that all
87 // inputs are consumed at the start of the first stage and that all
88 // outputs are produced at the end of the last stage.
89 unsigned Latency = 0;
126 // Caclulate the maximum completion time for any stage. The
127 // assumption is that all inputs are consumed at the start of the
128 // first stage and that all outputs are produced at the end of the
129 // latest completing last stage.
130 unsigned Latency = 0, StartCycle = 0;
90131 for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
91 IS != E; ++IS)
92 Latency += IS->Cycles;
132 IS != E; ++IS) {
133 Latency = std::max(Latency, StartCycle + IS->getCycles());
134 StartCycle += IS->getNextCycles();
135 }
136
93137 return Latency;
94138 }
95139 };
2222 class FuncUnit;
2323
2424 //===----------------------------------------------------------------------===//
25 // Instruction stage - These values represent a step in the execution of an
26 // instruction. The latency represents the number of discrete time slots used
27 // need to complete the stage. Units represent the choice of functional units
28 // that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
25 // Instruction stage - These values represent a non-pipelined step in
26 // the execution of an instruction. Cycles represents the number of
27 // discrete time slots needed to complete the stage. Units represent
28 // the choice of functional units that can be used to complete the
29 // stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
30 // cycles should elapse from the start of this stage to the start of
31 // the next stage in the itinerary. For example:
2932 //
30 class InstrStage units> {
33 // A stage is specified in one of two ways:
34 //
35 // InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles
36 // InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit
37 //
38 class InstrStage units, int timeinc = -1> {
3139 int Cycles = cycles; // length of stage in machine cycles
3240 list Units = units; // choice of functional units
41 int TimeInc = timeinc; // cycles till start of next stage
3342 }
3443
3544 //===----------------------------------------------------------------------===//
3333 // If the begin stage of an itinerary has 0 cycles and units,
3434 // then we have reached the end of the itineraries.
3535 const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
36 if ((IS->Cycles == 0) && (IS->Units == 0))
36 if ((IS->getCycles() == 0) && (IS->getUnits() == 0))
3737 break;
3838
3939 unsigned ItinDepth = 0;
4040 for (; IS != E; ++IS)
41 ItinDepth += std::max(1U, IS->Cycles);
41 ItinDepth += IS->getCycles();
4242
4343 ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
4444 }
8888 unsigned idx = SU->getInstr()->getDesc().getSchedClass();
8989 for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
9090 IS != E; ++IS) {
91 // If the stages cycles are 0, then we must have the FU free in
92 // the current cycle, but we don't advance the cycle time .
93 unsigned StageCycles = std::max(1U, IS->Cycles);
91 // We must find one of the stage's units free for every cycle the
92 // stage is occupied. FIXME it would be more accurate to find the
93 // same unit free in all the cycles.
94 for (unsigned int i = 0; i < IS->getCycles(); ++i) {
95 assert(((cycle + i) < ScoreboardDepth) &&
96 "Scoreboard depth exceeded!");
9497
95 // We must find one of the stage's units free for every cycle the
96 // stage is occupied.
97 for (unsigned int i = 0; i < StageCycles; ++i) {
98 assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
99
100 unsigned index = getFutureIndex(cycle);
101 unsigned freeUnits = IS->Units & ~Scoreboard[index];
98 unsigned index = getFutureIndex(cycle + i);
99 unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
102100 if (!freeUnits) {
103 DEBUG(errs() << "*** Hazard in cycle " << cycle << ", ");
101 DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
104102 DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
105103 DEBUG(SU->getInstr()->dump());
106104 return Hazard;
107105 }
106 }
108107
109 if (IS->Cycles > 0)
110 ++cycle;
111 }
108 // Advance the cycle to the next stage.
109 cycle += IS->getNextCycles();
112110 }
113111
114112 return NoHazard;
122120 unsigned idx = SU->getInstr()->getDesc().getSchedClass();
123121 for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
124122 IS != E; ++IS) {
125 // If the stages cycles are 0, then we must reserve the FU in the
126 // current cycle, but we don't advance the cycle time .
127 unsigned StageCycles = std::max(1U, IS->Cycles);
123 // We must reserve one of the stage's units for every cycle the
124 // stage is occupied. FIXME it would be more accurate to reserve
125 // the same unit free in all the cycles.
126 for (unsigned int i = 0; i < IS->getCycles(); ++i) {
127 assert(((cycle + i) < ScoreboardDepth) &&
128 "Scoreboard depth exceeded!");
128129
129 // We must reserve one of the stage's units for every cycle the
130 // stage is occupied.
131 for (unsigned int i = 0; i < StageCycles; ++i) {
132 assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
133
134 unsigned index = getFutureIndex(cycle);
135 unsigned freeUnits = IS->Units & ~Scoreboard[index];
130 unsigned index = getFutureIndex(cycle + i);
131 unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
136132
137133 // reduce to a single unit
138134 unsigned freeUnit = 0;
143139
144140 assert(freeUnit && "No function unit available!");
145141 Scoreboard[index] |= freeUnit;
146
147 if (IS->Cycles > 0)
148 ++cycle;
149142 }
143
144 // Advance the cycle to the next stage.
145 cycle += IS->getNextCycles();
150146 }
151147
152148 DEBUG(dumpScoreboard());
614614 []>;
615615
616616 // On non-Darwin platforms R9 is callee-saved.
617 let isCall = 1, Itinerary = IIC_Br,
617 let isCall = 1,
618618 Defs = [R0, R1, R2, R3, R12, LR,
619619 D0, D1, D2, D3, D4, D5, D6, D7,
620620 D16, D17, D18, D19, D20, D21, D22, D23,
651651 }
652652
653653 // On Darwin R9 is call-clobbered.
654 let isCall = 1, Itinerary = IIC_Br,
654 let isCall = 1,
655655 Defs = [R0, R1, R2, R3, R9, R12, LR,
656656 D0, D1, D2, D3, D4, D5, D6, D7,
657657 D16, D17, D18, D19, D20, D21, D22, D23,
684684 }
685685 }
686686
687 let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
687 let isBranch = 1, isTerminator = 1 in {
688688 // B is "predicable" since it can be xformed into a Bcc.
689689 let isBarrier = 1 in {
690690 let isPredicable = 1 in
10561056 BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
10571057
10581058 def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
1059 AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary,
1059 AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU,
10601060 "bfc", " $dst, $imm", "$src = $dst",
10611061 [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
10621062 Requires<[IsARM, HasV6T2]> {
10831083 //
10841084
10851085 let isCommutable = 1 in
1086 def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
1086 def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
10871087 "mul", " $dst, $a, $b",
10881088 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
10891089
10901090 def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
1091 IIC_iALU, "mla", " $dst, $a, $b, $c",
1091 IIC_iMPY, "mla", " $dst, $a, $b, $c",
10921092 [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
10931093
10941094 def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
1095 IIC_iALU, "mls", " $dst, $a, $b, $c",
1095 IIC_iMPY, "mls", " $dst, $a, $b, $c",
10961096 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
10971097 Requires<[IsARM, HasV6T2]>;
10981098
11001100 let neverHasSideEffects = 1 in {
11011101 let isCommutable = 1 in {
11021102 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
1103 (ins GPR:$a, GPR:$b), IIC_iALU,
1103 (ins GPR:$a, GPR:$b), IIC_iMPY,
11041104 "smull", " $ldst, $hdst, $a, $b", []>;
11051105
11061106 def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
1107 (ins GPR:$a, GPR:$b), IIC_iALU,
1107 (ins GPR:$a, GPR:$b), IIC_iMPY,
11081108 "umull", " $ldst, $hdst, $a, $b", []>;
11091109 }
11101110
11111111 // Multiply + accumulate
11121112 def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
1113 (ins GPR:$a, GPR:$b), IIC_iALU,
1113 (ins GPR:$a, GPR:$b), IIC_iMPY,
11141114 "smlal", " $ldst, $hdst, $a, $b", []>;
11151115
11161116 def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
1117 (ins GPR:$a, GPR:$b), IIC_iALU,
1117 (ins GPR:$a, GPR:$b), IIC_iMPY,
11181118 "umlal", " $ldst, $hdst, $a, $b", []>;
11191119
11201120 def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
1121 (ins GPR:$a, GPR:$b), IIC_iALU,
1121 (ins GPR:$a, GPR:$b), IIC_iMPY,
11221122 "umaal", " $ldst, $hdst, $a, $b", []>,
11231123 Requires<[IsARM, HasV6]>;
11241124 } // neverHasSideEffects
11251125
11261126 // Most significant word multiply
11271127 def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1128 IIC_iALU, "smmul", " $dst, $a, $b",
1128 IIC_iMPY, "smmul", " $dst, $a, $b",
11291129 [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
11301130 Requires<[IsARM, HasV6]> {
11311131 let Inst{7-4} = 0b0001;
11331133 }
11341134
11351135 def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
1136 IIC_iALU, "smmla", " $dst, $a, $b, $c",
1136 IIC_iMPY, "smmla", " $dst, $a, $b, $c",
11371137 [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
11381138 Requires<[IsARM, HasV6]> {
11391139 let Inst{7-4} = 0b0001;
11411141
11421142
11431143 def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
1144 IIC_iALU, "smmls", " $dst, $a, $b, $c",
1144 IIC_iMPY, "smmls", " $dst, $a, $b, $c",
11451145 [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
11461146 Requires<[IsARM, HasV6]> {
11471147 let Inst{7-4} = 0b1101;
11491149
11501150 multiclass AI_smul {
11511151 def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1152 IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b",
1152 IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b",
11531153 [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
11541154 (sext_inreg GPR:$b, i16)))]>,
11551155 Requires<[IsARM, HasV5TE]> {
11581158 }
11591159
11601160 def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1161 IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b",
1161 IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b",
11621162 [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
11631163 (sra GPR:$b, (i32 16))))]>,
11641164 Requires<[IsARM, HasV5TE]> {
11671167 }
11681168
11691169 def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1170 IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b",
1170 IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b",
11711171 [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
11721172 (sext_inreg GPR:$b, i16)))]>,
11731173 Requires<[IsARM, HasV5TE]> {
11761176 }
11771177
11781178 def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1179 IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b",
1179 IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b",
11801180 [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
11811181 (sra GPR:$b, (i32 16))))]>,
11821182 Requires<[IsARM, HasV5TE]> {
11851185 }
11861186
11871187 def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1188 IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b",
1188 IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b",
11891189 [(set GPR:$dst, (sra (opnode GPR:$a,
11901190 (sext_inreg GPR:$b, i16)), (i32 16)))]>,
11911191 Requires<[IsARM, HasV5TE]> {
11941194 }
11951195
11961196 def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
1197 IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b",
1197 IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b",
11981198 [(set GPR:$dst, (sra (opnode GPR:$a,
11991199 (sra GPR:$b, (i32 16))), (i32 16)))]>,
12001200 Requires<[IsARM, HasV5TE]> {
12061206
12071207 multiclass AI_smla {
12081208 def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1209 IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
1209 IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
12101210 [(set GPR:$dst, (add GPR:$acc,
12111211 (opnode (sext_inreg GPR:$a, i16),
12121212 (sext_inreg GPR:$b, i16))))]>,
12161216 }
12171217
12181218 def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1219 IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
1219 IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
12201220 [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
12211221 (sra GPR:$b, (i32 16)))))]>,
12221222 Requires<[IsARM, HasV5TE]> {
12251225 }
12261226
12271227 def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1228 IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
1228 IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
12291229 [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
12301230 (sext_inreg GPR:$b, i16))))]>,
12311231 Requires<[IsARM, HasV5TE]> {
12341234 }
12351235
12361236 def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1237 IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
1237 IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
12381238 [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
12391239 (sra GPR:$b, (i32 16)))))]>,
12401240 Requires<[IsARM, HasV5TE]> {
12431243 }
12441244
12451245 def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1246 IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
1246 IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
12471247 [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
12481248 (sext_inreg GPR:$b, i16)), (i32 16))))]>,
12491249 Requires<[IsARM, HasV5TE]> {
12521252 }
12531253
12541254 def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
1255 IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
1255 IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
12561256 [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
12571257 (sra GPR:$b, (i32 16))), (i32 16))))]>,
12581258 Requires<[IsARM, HasV5TE]> {
507507
508508 // multiply register
509509 let isCommutable = 1 in
510 def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
510 def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY,
511511 "mul", " $dst, $rhs",
512512 [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
513513
807807 // Multiply Instructions.
808808 //
809809 let isCommutable = 1 in
810 def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
810 def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
811811 "mul", " $dst, $a, $b",
812812 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
813813
814 def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
814 def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
815815 "mla", " $dst, $a, $b, $c",
816816 [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
817817
818 def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
818 def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
819819 "mls", " $dst, $a, $b, $c",
820820 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
821821
822822 // Extra precision multiplies with low / high results
823823 let neverHasSideEffects = 1 in {
824824 let isCommutable = 1 in {
825 def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
825 def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
826826 "smull", " $ldst, $hdst, $a, $b", []>;
827827
828 def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
828 def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
829829 "umull", " $ldst, $hdst, $a, $b", []>;
830830 }
831831
832832 // Multiply + accumulate
833 def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
833 def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
834834 "smlal", " $ldst, $hdst, $a, $b", []>;
835835
836 def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
836 def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
837837 "umlal", " $ldst, $hdst, $a, $b", []>;
838838
839 def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
839 def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
840840 "umaal", " $ldst, $hdst, $a, $b", []>;
841841 } // neverHasSideEffects
842842
843843 // Most significant word multiply
844 def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
844 def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
845845 "smmul", " $dst, $a, $b",
846846 [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
847847
848 def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
848 def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
849849 "smmla", " $dst, $a, $b, $c",
850850 [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
851851
852852
853 def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
853 def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
854854 "smmls", " $dst, $a, $b, $c",
855855 [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
856856
857857 multiclass T2I_smul {
858 def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
858 def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
859859 !strconcat(opc, "bb"), " $dst, $a, $b",
860860 [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
861861 (sext_inreg GPR:$b, i16)))]>;
862862
863 def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
863 def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
864864 !strconcat(opc, "bt"), " $dst, $a, $b",
865865 [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
866866 (sra GPR:$b, (i32 16))))]>;
867867
868 def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
868 def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
869869 !strconcat(opc, "tb"), " $dst, $a, $b",
870870 [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
871871 (sext_inreg GPR:$b, i16)))]>;
872872
873 def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
873 def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
874874 !strconcat(opc, "tt"), " $dst, $a, $b",
875875 [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
876876 (sra GPR:$b, (i32 16))))]>;
877877
878 def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
878 def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
879879 !strconcat(opc, "wb"), " $dst, $a, $b",
880880 [(set GPR:$dst, (sra (opnode GPR:$a,
881881 (sext_inreg GPR:$b, i16)), (i32 16)))]>;
882882
883 def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
883 def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
884884 !strconcat(opc, "wt"), " $dst, $a, $b",
885885 [(set GPR:$dst, (sra (opnode GPR:$a,
886886 (sra GPR:$b, (i32 16))), (i32 16)))]>;
888888
889889
890890 multiclass T2I_smla {
891 def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
891 def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
892892 !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
893893 [(set GPR:$dst, (add GPR:$acc,
894894 (opnode (sext_inreg GPR:$a, i16),
895895 (sext_inreg GPR:$b, i16))))]>;
896896
897 def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
897 def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
898898 !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
899899 [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
900900 (sra GPR:$b, (i32 16)))))]>;
901901
902 def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
902 def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
903903 !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
904904 [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
905905 (sext_inreg GPR:$b, i16))))]>;
906906
907 def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
907 def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
908908 !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
909909 [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
910910 (sra GPR:$b, (i32 16)))))]>;
911911
912 def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
912 def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
913913 !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
914914 [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
915915 (sext_inreg GPR:$b, i16)), (i32 16))))]>;
916916
917 def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
917 def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
918918 !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
919919 [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
920920 (sra GPR:$b, (i32 16))), (i32 16))))]>;
1515 // two fully-pipelined integer ALU pipelines
1616 InstrItinData]>,
1717 // one fully-pipelined integer Multiply pipeline
18 // function units are used in alpha order, so use FU_Pipe1
19 // for the Multiple pipeline
20 InstrItinData]>,
18 // function units are reserved by the scheduler in reverse alpha order,
19 // so use FU_Pipe0 for the Multiple pipeline
20 InstrItinData]>,
2121 // loads have an extra cycle of latency, but are fully pipelined
22 // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
23 InstrItinData,
22 // use FU_Issue to enforce the 1 load/store per cycle limit
23 InstrItinData,
2424 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
2525 InstrStage<1, [FU_LdSt0]>]>,
2626 // fully-pipelined stores
27 // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
28 InstrItinData,
27 // use FU_Issue to enforce the 1 load/store per cycle limit
28 InstrItinData,
2929 InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
3030 // no delay slots, so the latency of a branch is unimportant
3131 InstrItinData]>,
3232
33 // VFP ALU is not pipelined so stall all issues
34 // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
35 InstrItinData]>,
33 // NFP ALU is not pipelined so stall all issues
34 InstrItinData,
35 InstrStage<7, [FU_Pipe1], 0>]>,
3636 // VFP MPY is not pipelined so stall all issues
37 // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
38 InstrItinData, FU_Pipe1]>]>,
37 InstrItinData], 0>,
38 InstrStage<7, [FU_Pipe1], 0>]>,
3939 // loads have an extra cycle of latency, but are fully pipelined
40 // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
41 InstrItinData,
40 // use FU_Issue to enforce the 1 load/store per cycle limit
41 InstrItinData,
4242 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
4343 InstrStage<1, [FU_LdSt0]>]>,
44 // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
45 InstrItinData,
44 // use FU_Issue to enforce the 1 load/store per cycle limit
45 InstrItinData,
4646 InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
4747 ]>;
4848
214214 // Next stage
215215 const Record *Stage = StageList[i];
216216
217 // Form string as ,{ cycles, u1 | u2 | ... | un }
217 // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
218218 int Cycles = Stage->getValueAsInt("Cycles");
219219 ItinString += " { " + itostr(Cycles) + ", ";
220220
228228 if (++j < M) ItinString += " | ";
229229 }
230230
231 int TimeInc = Stage->getValueAsInt("TimeInc");
232 ItinString += ", " + itostr(TimeInc);
233
231234 // Close off stage
232235 ItinString += " }";
233236 if (++i < N) ItinString += ", ";
251254
252255 // Begin stages table
253256 OS << "static const llvm::InstrStage Stages[] = {\n"
254 " { 0, 0 }, // No itinerary\n";
257 " { 0, 0, 0 }, // No itinerary\n";
255258
256259 unsigned StageCount = 1;
257260 unsigned ItinEnum = 1;
288291
289292 // If new itinerary
290293 if (Find == 0) {
291 // Emit as { cycles, u1 | u2 | ... | un }, // index
294 // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
292295 OS << ItinString << ", // " << ItinEnum << "\n";
293296 // Record Itin class number.
294297 ItinMap[ItinString] = Find = StageCount;
312315 }
313316
314317 // Closing stage
315 OS << " { 0, 0 } // End itinerary\n";
318 OS << " { 0, 0, 0 } // End itinerary\n";
316319 // End stages table
317320 OS << "};\n";
318321