llvm.org GIT mirror llvm / ab518ce
[llvm-mca] Fix an invalid memory read introduced by r346487. This patch fixes an invalid memory read introduced by r346487. Before this patch, partial register write had to query the latency of the dependent full register write by calling a method on the full write descriptor. However, if the full write is from an already retired instruction, chances are that the EntryStage already reclaimed its memory. In some parial register write tests, valgrind was reporting an invalid memory read. This change fixes the invalid memory access problem. Writes are now responsible for tracking dependent partial register writes, and notify them in the event of instruction issued. That means, partial register writes no longer need to query their associated full write to check when they are ready to execute. Added test X86/BtVer2/partial-reg-update-7.s git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@347459 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 9 months ago
4 changed file(s) with 155 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
0 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
1 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=5 < %s | FileCheck %s
2
3 sete %r9b
4 movzbl %al, %eax
5 shll $2, %eax
6 imull %ecx, %eax
7 cmpl $1025, %eax
8
9 # CHECK: Iterations: 100
10 # CHECK-NEXT: Instructions: 500
11 # CHECK-NEXT: Total Cycles: 504
12 # CHECK-NEXT: Total uOps: 600
13
14 # CHECK: Dispatch Width: 2
15 # CHECK-NEXT: uOps Per Cycle: 1.19
16 # CHECK-NEXT: IPC: 0.99
17 # CHECK-NEXT: Block RThroughput: 3.0
18
19 # CHECK: Instruction Info:
20 # CHECK-NEXT: [1]: #uOps
21 # CHECK-NEXT: [2]: Latency
22 # CHECK-NEXT: [3]: RThroughput
23 # CHECK-NEXT: [4]: MayLoad
24 # CHECK-NEXT: [5]: MayStore
25 # CHECK-NEXT: [6]: HasSideEffects (U)
26
27 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
28 # CHECK-NEXT: 1 1 0.50 sete %r9b
29 # CHECK-NEXT: 1 1 0.50 movzbl %al, %eax
30 # CHECK-NEXT: 1 1 0.50 shll $2, %eax
31 # CHECK-NEXT: 2 3 1.00 imull %ecx, %eax
32 # CHECK-NEXT: 1 1 0.50 cmpl $1025, %eax
33
34 # CHECK: Resources:
35 # CHECK-NEXT: [0] - JALU0
36 # CHECK-NEXT: [1] - JALU1
37 # CHECK-NEXT: [2] - JDiv
38 # CHECK-NEXT: [3] - JFPA
39 # CHECK-NEXT: [4] - JFPM
40 # CHECK-NEXT: [5] - JFPU0
41 # CHECK-NEXT: [6] - JFPU1
42 # CHECK-NEXT: [7] - JLAGU
43 # CHECK-NEXT: [8] - JMul
44 # CHECK-NEXT: [9] - JSAGU
45 # CHECK-NEXT: [10] - JSTC
46 # CHECK-NEXT: [11] - JVALU0
47 # CHECK-NEXT: [12] - JVALU1
48 # CHECK-NEXT: [13] - JVIMUL
49
50 # CHECK: Resource pressure per iteration:
51 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
52 # CHECK-NEXT: 2.00 3.00 - - - - - - 1.00 - - - - -
53
54 # CHECK: Resource pressure by instruction:
55 # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
56 # CHECK-NEXT: 0.99 0.01 - - - - - - - - - - - - sete %r9b
57 # CHECK-NEXT: 0.01 0.99 - - - - - - - - - - - - movzbl %al, %eax
58 # CHECK-NEXT: - 1.00 - - - - - - - - - - - - shll $2, %eax
59 # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - imull %ecx, %eax
60 # CHECK-NEXT: 1.00 - - - - - - - - - - - - - cmpl $1025, %eax
61
62 # CHECK: Timeline view:
63 # CHECK-NEXT: 0123456789
64 # CHECK-NEXT: Index 0123456789 012345678
65
66 # CHECK: [0,0] DeER . . . . . . sete %r9b
67 # CHECK-NEXT: [0,1] DeER . . . . . . movzbl %al, %eax
68 # CHECK-NEXT: [0,2] .DeER. . . . . . shll $2, %eax
69 # CHECK-NEXT: [0,3] . DeeeER . . . . . imull %ecx, %eax
70 # CHECK-NEXT: [0,4] . D==eER . . . . . cmpl $1025, %eax
71 # CHECK-NEXT: [1,0] . D===eER. . . . . sete %r9b
72 # CHECK-NEXT: [1,1] . D=eE-R. . . . . movzbl %al, %eax
73 # CHECK-NEXT: [1,2] . D==eE-R . . . . shll $2, %eax
74 # CHECK-NEXT: [1,3] . D==eeeER . . . . imull %ecx, %eax
75 # CHECK-NEXT: [1,4] . .D====eER . . . . cmpl $1025, %eax
76 # CHECK-NEXT: [2,0] . .D=====eER. . . . sete %r9b
77 # CHECK-NEXT: [2,1] . . D===eE-R. . . . movzbl %al, %eax
78 # CHECK-NEXT: [2,2] . . D====eE-R . . . shll $2, %eax
79 # CHECK-NEXT: [2,3] . . D====eeeER . . . imull %ecx, %eax
80 # CHECK-NEXT: [2,4] . . D======eER . . . cmpl $1025, %eax
81 # CHECK-NEXT: [3,0] . . D=======eER. . . sete %r9b
82 # CHECK-NEXT: [3,1] . . D=====eE-R. . . movzbl %al, %eax
83 # CHECK-NEXT: [3,2] . . D======eE-R . . shll $2, %eax
84 # CHECK-NEXT: [3,3] . . .D======eeeER . . imull %ecx, %eax
85 # CHECK-NEXT: [3,4] . . . D========eER . . cmpl $1025, %eax
86 # CHECK-NEXT: [4,0] . . . D=========eER. . sete %r9b
87 # CHECK-NEXT: [4,1] . . . D=======eE-R. . movzbl %al, %eax
88 # CHECK-NEXT: [4,2] . . . D========eE-R . shll $2, %eax
89 # CHECK-NEXT: [4,3] . . . D========eeeER. imull %ecx, %eax
90 # CHECK-NEXT: [4,4] . . . D==========eER cmpl $1025, %eax
91
92 # CHECK: Average Wait times (based on the timeline view):
93 # CHECK-NEXT: [0]: Executions
94 # CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
95 # CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
96 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
97
98 # CHECK: [0] [1] [2] [3]
99 # CHECK-NEXT: 0. 5 5.8 0.2 0.0 sete %r9b
100 # CHECK-NEXT: 1. 5 4.2 0.2 0.8 movzbl %al, %eax
101 # CHECK-NEXT: 2. 5 5.0 0.0 0.8 shll $2, %eax
102 # CHECK-NEXT: 3. 5 5.0 0.0 0.0 imull %ecx, %eax
103 # CHECK-NEXT: 4. 5 7.0 0.0 0.0 cmpl $1025, %eax
122122 // that we don't break the WAW, and the two writes can be merged together.
123123 const WriteState *DependentWrite;
124124
125 // Number of writes that are in a WAW dependency with this write.
126 unsigned NumWriteUsers;
125 // A partial write that is in a false dependency with this write.
126 WriteState *PartialWrite;
127
128 unsigned DependentWriteCyclesLeft;
127129
128130 // A list of dependent reads. Users is a set of dependent
129131 // reads. A dependent read is added to the set only if CyclesLeft
138140 bool clearsSuperRegs = false, bool writesZero = false)
139141 : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
140142 PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
141 IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
143 IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
144 DependentWriteCyclesLeft(0) {}
142145
143146 WriteState(const WriteState &Other) = default;
144147 WriteState &operator=(const WriteState &Other) = default;
150153 unsigned getLatency() const { return WD->Latency; }
151154
152155 void addUser(ReadState *Use, int ReadAdvance);
153
154 unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
156 void addUser(WriteState *Use);
157
158 unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
159
160 unsigned getNumUsers() const {
161 unsigned NumUsers = Users.size();
162 if (PartialWrite)
163 ++NumUsers;
164 return NumUsers;
165 }
166
155167 bool clearsSuperRegisters() const { return ClearsSuperRegs; }
156168 bool isWriteZero() const { return WritesZero; }
157169 bool isEliminated() const { return IsEliminated; }
160172 }
161173
162174 const WriteState *getDependentWrite() const { return DependentWrite; }
163 void setDependentWrite(WriteState *Other) {
164 DependentWrite = Other;
165 ++Other->NumWriteUsers;
166 }
175 void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
176 void writeStartEvent(unsigned Cycles) {
177 DependentWriteCyclesLeft = Cycles;
178 DependentWrite = nullptr;
179 }
180
167181 void setWriteZero() { WritesZero = true; }
168182 void setEliminated() {
169183 assert(Users.empty() && "Write is in an inconsistent state.");
184184 // register is allocated.
185185 ShouldAllocatePhysRegs = false;
186186
187 if (OtherWrite.getWriteState() &&
188 (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
187 WriteState *OtherWS = OtherWrite.getWriteState();
188 if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
189189 // This partial write has a false dependency on RenameAs.
190190 assert(!IsEliminated && "Unexpected partial update!");
191 WS.setDependentWrite(OtherWrite.getWriteState());
191 OtherWS->addUser(&WS);
192192 }
193193 }
194194 }
4848 unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
4949 RS->writeStartEvent(ReadCycles);
5050 }
51
52 // Notify any writes that are in a false dependency with this write.
53 if (PartialWrite)
54 PartialWrite->writeStartEvent(CyclesLeft);
5155 }
5256
5357 void WriteState::addUser(ReadState *User, int ReadAdvance) {
6468 Users.insert(NewPair);
6569 }
6670
71 void WriteState::addUser(WriteState *User) {
72 if (CyclesLeft != UNKNOWN_CYCLES) {
73 User->writeStartEvent(std::max(0, CyclesLeft));
74 return;
75 }
76
77 assert(!PartialWrite && "PartialWrite already set!");
78 PartialWrite = User;
79 User->setDependentWrite(this);
80 }
81
6782 void WriteState::cycleEvent() {
6883 // Note: CyclesLeft can be a negative number. It is an error to
6984 // make it an unsigned quantity because users of this write may
7085 // specify a negative ReadAdvance.
7186 if (CyclesLeft != UNKNOWN_CYCLES)
7287 CyclesLeft--;
88
89 if (DependentWriteCyclesLeft)
90 DependentWriteCyclesLeft--;
7391 }
7492
7593 void ReadState::cycleEvent() {
142160
143161 // A partial register write cannot complete before a dependent write.
144162 auto IsDefReady = [&](const WriteState &Def) {
145 if (const WriteState *Write = Def.getDependentWrite()) {
146 int WriteLatency = Write->getCyclesLeft();
147 if (WriteLatency == UNKNOWN_CYCLES)
148 return false;
149 return static_cast(WriteLatency) < getLatency();
163 if (!Def.getDependentWrite()) {
164 unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
165 return !CyclesLeft || CyclesLeft < getLatency();
150166 }
151 return true;
167 return false;
152168 };
153169
154170 if (all_of(getDefs(), IsDefReady))
162178 if (isDispatched()) {
163179 for (ReadState &Use : getUses())
164180 Use.cycleEvent();
181
182 for (WriteState &Def : getDefs())
183 Def.cycleEvent();
165184
166185 update();
167186 return;