llvm.org GIT mirror llvm / 1ce4894
[SystemZ] Use MVC to spill loads and stores Try to use MVC when spilling the destination of a simple load or the source of a simple store. As explained in the comment, this doesn't yet handle the case where the load or store location is also a frame index, since that could lead to two simultaneous scavenger spills, something the backend can't handle yet. spill-02.py tests that this restriction kicks in, but unfortunately I've not yet found a case that would fail without it. The volatile trick I used for other scavenger tests doesn't work here because we can't use MVC for volatile accesses anyway. I'm planning on relaxing the restriction later, hopefully with a test that does trigger the problem... Tests @f8 and @f9 also showed that L(G)RL and ST(G)RL were wrongly classified as SimpleBDX{Load,Store}. It wouldn't be easy to test for that bug separately, which is why I didn't split out the fix as a separate patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185434 91177308-0d34-0410-b5e6-96231b3b80d8 Richard Sandiford 6 years ago
6 changed file(s) with 582 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
1212
1313 #include "SystemZInstrInfo.h"
1414 #include "SystemZInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
1516 #include "llvm/Target/TargetMachine.h"
1617
1718 #define GET_INSTRINFO_CTOR
7980 // Return 0 otherwise.
8081 //
8182 // Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
82 static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
83 static int isSimpleMove(const MachineInstr *MI, int &FrameIndex,
84 unsigned Flag) {
8385 const MCInstrDesc &MCID = MI->getDesc();
8486 if ((MCID.TSFlags & Flag) &&
8587 MI->getOperand(1).isFI() &&
312314 getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
313315 addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
314316 FrameIdx);
317 }
318
319 // Return true if MI is a simple load or store with a 12-bit displacement
320 // and no index. Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
321 static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
322 const MCInstrDesc &MCID = MI->getDesc();
323 return ((MCID.TSFlags & Flag) &&
324 isUInt<12>(MI->getOperand(2).getImm()) &&
325 MI->getOperand(3).getReg() == 0);
326 }
327
328 // Return a MachineMemOperand for FrameIndex with flags MMOFlags.
329 // Offset is the byte offset from the start of FrameIndex.
330 static MachineMemOperand *getFrameMMO(MachineFunction &MF, int FrameIndex,
331 uint64_t &Offset, unsigned MMOFlags) {
332 const MachineFrameInfo *MFI = MF.getFrameInfo();
333 const Value *V = PseudoSourceValue::getFixedStack(FrameIndex);
334 return MF.getMachineMemOperand(MachinePointerInfo(V, Offset), MMOFlags,
335 MFI->getObjectSize(FrameIndex),
336 MFI->getObjectAlignment(FrameIndex));
337 }
338
339 MachineInstr *
340 SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
341 MachineInstr *MI,
342 const SmallVectorImpl &Ops,
343 int FrameIndex) const {
344 const MachineFrameInfo *MFI = MF.getFrameInfo();
345 unsigned Size = MFI->getObjectSize(FrameIndex);
346
347 // Eary exit for cases we don't care about
348 if (Ops.size() != 1)
349 return 0;
350
351 unsigned OpNum = Ops[0];
352 unsigned Reg = MI->getOperand(OpNum).getReg();
353 unsigned RegSize = MF.getRegInfo().getRegClass(Reg)->getSize();
354 assert(Size == RegSize && "Invalid size combination");
355
356 // Look for cases where the source of a simple store or the destination
357 // of a simple load is being spilled. Try to use MVC instead.
358 //
359 // Although MVC is in practice a fast choice in these cases, it is still
360 // logically a bytewise copy. This means that we cannot use it if the
361 // load or store is volatile. It also means that the transformation is
362 // not valid in cases where the two memories partially overlap; however,
363 // that is not a problem here, because we know that one of the memories
364 // is a full frame index.
365 //
366 // For now we punt if the load or store is also to a frame index.
367 // In that case we might end up eliminating both of them to out-of-range
368 // offsets, which might then force the register scavenger to spill two
369 // other registers. The backend can only handle one such scavenger spill
370 // at a time.
371 if (OpNum == 0 && MI->hasOneMemOperand()) {
372 MachineMemOperand *MMO = *MI->memoperands_begin();
373 if (MMO->getSize() == Size && !MMO->isVolatile()) {
374 // Handle conversion of loads.
375 if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad) &&
376 !MI->getOperand(1).isFI()) {
377 uint64_t Offset = 0;
378 MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
379 MachineMemOperand::MOStore);
380 return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
381 .addFrameIndex(FrameIndex).addImm(Offset).addImm(Size)
382 .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
383 .addMemOperand(FrameMMO).addMemOperand(MMO);
384 }
385 // Handle conversion of stores.
386 if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore) &&
387 !MI->getOperand(1).isFI()) {
388 uint64_t Offset = 0;
389 MachineMemOperand *FrameMMO = getFrameMMO(MF, FrameIndex, Offset,
390 MachineMemOperand::MOLoad);
391 return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC))
392 .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm())
393 .addImm(Size).addFrameIndex(FrameIndex).addImm(Offset)
394 .addMemOperand(MMO).addMemOperand(FrameMMO);
395 }
396 }
397 }
398
399 return 0;
400 }
401
402 MachineInstr *
403 SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
404 const SmallVectorImpl &Ops,
405 MachineInstr* LoadMI) const {
406 return 0;
315407 }
316408
317409 bool
110110 unsigned DestReg, int FrameIdx,
111111 const TargetRegisterClass *RC,
112112 const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
113 virtual MachineInstr *
114 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
115 const SmallVectorImpl &Ops,
116 int FrameIndex) const;
117 virtual MachineInstr *
118 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
119 const SmallVectorImpl &Ops,
120 MachineInstr* LoadMI) const;
113121 virtual bool
114122 expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
115123 virtual bool
241241
242242 // Register loads.
243243 let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
244 defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
245 def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
246
247 def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
248 def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
244 defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
245 def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
249246
250247 // These instructions are split after register allocation, so we don't
251248 // want a custom inserter.
254251 [(set GR128:$dst, (load bdxaddr20only128:$src))]>;
255252 }
256253 }
254 let canFoldAsLoad = 1 in {
255 def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
256 def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
257 }
257258
258259 // Register stores.
259260 let SimpleBDXStore = 1 in {
260 let isCodeGenOnly = 1 in {
261 defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
262 def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
263 }
264
265 def STG : StoreRXY<"stg", 0xE324, store, GR64>;
266 def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
261 let isCodeGenOnly = 1 in
262 defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
263 def STG : StoreRXY<"stg", 0xE324, store, GR64>;
267264
268265 // These instructions are split after register allocation, so we don't
269266 // want a custom inserter.
272269 [(store GR128:$src, bdxaddr20only128:$dst)]>;
273270 }
274271 }
272 let isCodeGenOnly = 1 in
273 def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
274 def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
275275
276276 // 8-bit immediate stores to 8-bit fields.
277277 defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
0 # Test cases where MVC is used for spill slots that end up being out of range.
1 # RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
2
3 # There are 8 usable call-saved GPRs, two of which are needed for the base
4 # registers. The first 160 bytes of the frame are needed for the ABI
5 # call frame, and a further 8 bytes are needed for the emergency spill slot.
6 # That means we will have at least one out-of-range slot if:
7 #
8 # count == (4096 - 168) / 8 + 6 + 1 == 498
9 #
10 # Add in some extra room and check both %r15+4096 (the first out-of-range slot)
11 # and %r15+4104.
12 #
13 # CHECK: f1:
14 # CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
15 # CHECK: mvc 0(8,[[REG]]), {{[0-9]+}}({{%r[0-9]+}})
16 # CHECK: brasl %r14, foo@PLT
17 # CHECK: lay [[REG:%r[0-5]]], 4096(%r15)
18 # CHECK: mvc {{[0-9]+}}(8,{{%r[0-9]+}}), 8([[REG]])
19 # CHECK: br %r14
20 count = 500
21
22 print 'declare void @foo()'
23 print ''
24 print 'define void @f1(i64 *%base0, i64 *%base1) {'
25
26 for i in range(count):
27 print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
28 print ' %%val%d = load i64 *%%ptr%d' % (i, i)
29 print ''
30
31 print ' call void @foo()'
32 print ''
33
34 for i in range(count):
35 print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i)
36
37 print ''
38 print ' ret void'
39 print '}'
0 # Test cases where we spill from one frame index to another, both of which
1 # would be out of range of MVC. At present we don't use MVC in this case.
2 # RUN: python %s | llc -mtriple=s390x-linux-gnu | FileCheck %s
3
4 # There are 8 usable call-saved GPRs. The first 160 bytes of the frame
5 # are needed for the ABI call frame, and a further 8 bytes are needed
6 # for the emergency spill slot. That means we will have at least one
7 # out-of-range slot if:
8 #
9 # count == (4096 - 168) / 8 + 8 + 1 == 500
10 #
11 # Add in some extra just to be sure.
12 #
13 # CHECK: f1:
14 # CHECK-NOT: mvc
15 # CHECK: br %r14
16 count = 510
17
18 print 'declare void @foo(i64 *%base0, i64 *%base1)'
19 print ''
20 print 'define void @f1() {'
21
22 for i in range(2):
23 print ' %%alloc%d = alloca [%d x i64]' % (i, count / 2)
24 print (' %%base%d = getelementptr [%d x i64] * %%alloc%d, i64 0, i64 0'
25 % (i, count / 2, i))
26
27 print ' call void @foo(i64 *%base0, i64 *%base1)'
28 print ''
29
30 for i in range(count):
31 print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
32 print ' %%val%d = load i64 *%%ptr%d' % (i, i)
33 print ''
34
35 print ' call void @foo(i64 *%base0, i64 *%base1)'
36 print ''
37
38 for i in range (count):
39 print ' store i64 %%val%d, i64 *%%ptr%d' % (i, i)
40
41 print ''
42 print ' call void @foo(i64 *%base0, i64 *%base1)'
43 print ''
44 print ' ret void'
45 print '}'
0 ; Test spilling using MVC.
1 ;
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3
4 declare void @foo()
5
6 @g0 = global i32 0
7 @g1 = global i32 1
8 @g2 = global i32 2
9 @g3 = global i32 3
10 @g4 = global i32 4
11 @g5 = global i32 5
12 @g6 = global i32 6
13 @g7 = global i32 7
14 @g8 = global i32 8
15 @g9 = global i32 9
16
17 @h0 = global i64 0
18 @h1 = global i64 1
19 @h2 = global i64 2
20 @h3 = global i64 3
21 @h4 = global i64 4
22 @h5 = global i64 5
23 @h6 = global i64 6
24 @h7 = global i64 7
25 @h8 = global i64 8
26 @h9 = global i64 9
27
28 ; This function shouldn't spill anything
29 define void @f1(i32 *%ptr0) {
30 ; CHECK: f1:
31 ; CHECK: stmg
32 ; CHECK: aghi %r15, -160
33 ; CHECK-NOT: %r15
34 ; CHECK: brasl %r14, foo@PLT
35 ; CHECK-NOT: %r15
36 ; CHECK: lmg
37 ; CHECK: br %r14
38 %ptr1 = getelementptr i32 *%ptr0, i32 2
39 %ptr2 = getelementptr i32 *%ptr0, i32 4
40 %ptr3 = getelementptr i32 *%ptr0, i32 6
41 %ptr4 = getelementptr i32 *%ptr0, i32 8
42 %ptr5 = getelementptr i32 *%ptr0, i32 10
43 %ptr6 = getelementptr i32 *%ptr0, i32 12
44
45 %val0 = load i32 *%ptr0
46 %val1 = load i32 *%ptr1
47 %val2 = load i32 *%ptr2
48 %val3 = load i32 *%ptr3
49 %val4 = load i32 *%ptr4
50 %val5 = load i32 *%ptr5
51 %val6 = load i32 *%ptr6
52
53 call void @foo()
54
55 store i32 %val0, i32 *%ptr0
56 store i32 %val1, i32 *%ptr1
57 store i32 %val2, i32 *%ptr2
58 store i32 %val3, i32 *%ptr3
59 store i32 %val4, i32 *%ptr4
60 store i32 %val5, i32 *%ptr5
61 store i32 %val6, i32 *%ptr6
62
63 ret void
64 }
65
66 ; Test a case where at least one i32 load and at least one i32 store
67 ; need spills.
68 define void @f2(i32 *%ptr0) {
69 ; CHECK: f2:
70 ; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
71 ; CHECK: brasl %r14, foo@PLT
72 ; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
73 ; CHECK: br %r14
74 %ptr1 = getelementptr i32 *%ptr0, i64 2
75 %ptr2 = getelementptr i32 *%ptr0, i64 4
76 %ptr3 = getelementptr i32 *%ptr0, i64 6
77 %ptr4 = getelementptr i32 *%ptr0, i64 8
78 %ptr5 = getelementptr i32 *%ptr0, i64 10
79 %ptr6 = getelementptr i32 *%ptr0, i64 12
80 %ptr7 = getelementptr i32 *%ptr0, i64 14
81 %ptr8 = getelementptr i32 *%ptr0, i64 16
82
83 %val0 = load i32 *%ptr0
84 %val1 = load i32 *%ptr1
85 %val2 = load i32 *%ptr2
86 %val3 = load i32 *%ptr3
87 %val4 = load i32 *%ptr4
88 %val5 = load i32 *%ptr5
89 %val6 = load i32 *%ptr6
90 %val7 = load i32 *%ptr7
91 %val8 = load i32 *%ptr8
92
93 call void @foo()
94
95 store i32 %val0, i32 *%ptr0
96 store i32 %val1, i32 *%ptr1
97 store i32 %val2, i32 *%ptr2
98 store i32 %val3, i32 *%ptr3
99 store i32 %val4, i32 *%ptr4
100 store i32 %val5, i32 *%ptr5
101 store i32 %val6, i32 *%ptr6
102 store i32 %val7, i32 *%ptr7
103 store i32 %val8, i32 *%ptr8
104
105 ret void
106 }
107
108 ; Test a case where at least one i64 load and at least one i64 store
109 ; need spills.
110 define void @f3(i64 *%ptr0) {
111 ; CHECK: f3:
112 ; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
113 ; CHECK: brasl %r14, foo@PLT
114 ; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
115 ; CHECK: br %r14
116 %ptr1 = getelementptr i64 *%ptr0, i64 2
117 %ptr2 = getelementptr i64 *%ptr0, i64 4
118 %ptr3 = getelementptr i64 *%ptr0, i64 6
119 %ptr4 = getelementptr i64 *%ptr0, i64 8
120 %ptr5 = getelementptr i64 *%ptr0, i64 10
121 %ptr6 = getelementptr i64 *%ptr0, i64 12
122 %ptr7 = getelementptr i64 *%ptr0, i64 14
123 %ptr8 = getelementptr i64 *%ptr0, i64 16
124
125 %val0 = load i64 *%ptr0
126 %val1 = load i64 *%ptr1
127 %val2 = load i64 *%ptr2
128 %val3 = load i64 *%ptr3
129 %val4 = load i64 *%ptr4
130 %val5 = load i64 *%ptr5
131 %val6 = load i64 *%ptr6
132 %val7 = load i64 *%ptr7
133 %val8 = load i64 *%ptr8
134
135 call void @foo()
136
137 store i64 %val0, i64 *%ptr0
138 store i64 %val1, i64 *%ptr1
139 store i64 %val2, i64 *%ptr2
140 store i64 %val3, i64 *%ptr3
141 store i64 %val4, i64 *%ptr4
142 store i64 %val5, i64 *%ptr5
143 store i64 %val6, i64 *%ptr6
144 store i64 %val7, i64 *%ptr7
145 store i64 %val8, i64 *%ptr8
146
147 ret void
148 }
149
150
151 ; Test a case where at least at least one f32 load and at least one f32 store
152 ; need spills. The 8 call-saved FPRs could be used for 8 of the %vals
153 ; (and are at the time of writing), but it would really be better to use
154 ; MVC for all 10.
155 define void @f4(float *%ptr0) {
156 ; CHECK: f4:
157 ; CHECK: mvc [[OFFSET1:16[04]]](4,%r15), [[OFFSET2:[0-9]+]]({{%r[0-9]+}})
158 ; CHECK: brasl %r14, foo@PLT
159 ; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
160 ; CHECK: br %r14
161 %ptr1 = getelementptr float *%ptr0, i64 2
162 %ptr2 = getelementptr float *%ptr0, i64 4
163 %ptr3 = getelementptr float *%ptr0, i64 6
164 %ptr4 = getelementptr float *%ptr0, i64 8
165 %ptr5 = getelementptr float *%ptr0, i64 10
166 %ptr6 = getelementptr float *%ptr0, i64 12
167 %ptr7 = getelementptr float *%ptr0, i64 14
168 %ptr8 = getelementptr float *%ptr0, i64 16
169 %ptr9 = getelementptr float *%ptr0, i64 18
170
171 %val0 = load float *%ptr0
172 %val1 = load float *%ptr1
173 %val2 = load float *%ptr2
174 %val3 = load float *%ptr3
175 %val4 = load float *%ptr4
176 %val5 = load float *%ptr5
177 %val6 = load float *%ptr6
178 %val7 = load float *%ptr7
179 %val8 = load float *%ptr8
180 %val9 = load float *%ptr9
181
182 call void @foo()
183
184 store float %val0, float *%ptr0
185 store float %val1, float *%ptr1
186 store float %val2, float *%ptr2
187 store float %val3, float *%ptr3
188 store float %val4, float *%ptr4
189 store float %val5, float *%ptr5
190 store float %val6, float *%ptr6
191 store float %val7, float *%ptr7
192 store float %val8, float *%ptr8
193 store float %val9, float *%ptr9
194
195 ret void
196 }
197
198 ; Similarly for f64.
199 define void @f5(double *%ptr0) {
200 ; CHECK: f5:
201 ; CHECK: mvc 160(8,%r15), [[OFFSET:[0-9]+]]({{%r[0-9]+}})
202 ; CHECK: brasl %r14, foo@PLT
203 ; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
204 ; CHECK: br %r14
205 %ptr1 = getelementptr double *%ptr0, i64 2
206 %ptr2 = getelementptr double *%ptr0, i64 4
207 %ptr3 = getelementptr double *%ptr0, i64 6
208 %ptr4 = getelementptr double *%ptr0, i64 8
209 %ptr5 = getelementptr double *%ptr0, i64 10
210 %ptr6 = getelementptr double *%ptr0, i64 12
211 %ptr7 = getelementptr double *%ptr0, i64 14
212 %ptr8 = getelementptr double *%ptr0, i64 16
213 %ptr9 = getelementptr double *%ptr0, i64 18
214
215 %val0 = load double *%ptr0
216 %val1 = load double *%ptr1
217 %val2 = load double *%ptr2
218 %val3 = load double *%ptr3
219 %val4 = load double *%ptr4
220 %val5 = load double *%ptr5
221 %val6 = load double *%ptr6
222 %val7 = load double *%ptr7
223 %val8 = load double *%ptr8
224 %val9 = load double *%ptr9
225
226 call void @foo()
227
228 store double %val0, double *%ptr0
229 store double %val1, double *%ptr1
230 store double %val2, double *%ptr2
231 store double %val3, double *%ptr3
232 store double %val4, double *%ptr4
233 store double %val5, double *%ptr5
234 store double %val6, double *%ptr6
235 store double %val7, double *%ptr7
236 store double %val8, double *%ptr8
237 store double %val9, double *%ptr9
238
239 ret void
240 }
241
242 ; Repeat f2 with atomic accesses. We shouldn't use MVC here.
243 define void @f6(i32 *%ptr0) {
244 ; CHECK: f6:
245 ; CHECK-NOT: mvc
246 ; CHECK: br %r14
247 %ptr1 = getelementptr i32 *%ptr0, i64 2
248 %ptr2 = getelementptr i32 *%ptr0, i64 4
249 %ptr3 = getelementptr i32 *%ptr0, i64 6
250 %ptr4 = getelementptr i32 *%ptr0, i64 8
251 %ptr5 = getelementptr i32 *%ptr0, i64 10
252 %ptr6 = getelementptr i32 *%ptr0, i64 12
253 %ptr7 = getelementptr i32 *%ptr0, i64 14
254 %ptr8 = getelementptr i32 *%ptr0, i64 16
255
256 %val0 = load atomic i32 *%ptr0 unordered, align 4
257 %val1 = load atomic i32 *%ptr1 unordered, align 4
258 %val2 = load atomic i32 *%ptr2 unordered, align 4
259 %val3 = load atomic i32 *%ptr3 unordered, align 4
260 %val4 = load atomic i32 *%ptr4 unordered, align 4
261 %val5 = load atomic i32 *%ptr5 unordered, align 4
262 %val6 = load atomic i32 *%ptr6 unordered, align 4
263 %val7 = load atomic i32 *%ptr7 unordered, align 4
264 %val8 = load atomic i32 *%ptr8 unordered, align 4
265
266 call void @foo()
267
268 store atomic i32 %val0, i32 *%ptr0 unordered, align 4
269 store atomic i32 %val1, i32 *%ptr1 unordered, align 4
270 store atomic i32 %val2, i32 *%ptr2 unordered, align 4
271 store atomic i32 %val3, i32 *%ptr3 unordered, align 4
272 store atomic i32 %val4, i32 *%ptr4 unordered, align 4
273 store atomic i32 %val5, i32 *%ptr5 unordered, align 4
274 store atomic i32 %val6, i32 *%ptr6 unordered, align 4
275 store atomic i32 %val7, i32 *%ptr7 unordered, align 4
276 store atomic i32 %val8, i32 *%ptr8 unordered, align 4
277
278 ret void
279 }
280
281 ; ...likewise volatile accesses.
282 define void @f7(i32 *%ptr0) {
283 ; CHECK: f7:
284 ; CHECK-NOT: mvc
285 ; CHECK: br %r14
286 %ptr1 = getelementptr i32 *%ptr0, i64 2
287 %ptr2 = getelementptr i32 *%ptr0, i64 4
288 %ptr3 = getelementptr i32 *%ptr0, i64 6
289 %ptr4 = getelementptr i32 *%ptr0, i64 8
290 %ptr5 = getelementptr i32 *%ptr0, i64 10
291 %ptr6 = getelementptr i32 *%ptr0, i64 12
292 %ptr7 = getelementptr i32 *%ptr0, i64 14
293 %ptr8 = getelementptr i32 *%ptr0, i64 16
294
295 %val0 = load volatile i32 *%ptr0
296 %val1 = load volatile i32 *%ptr1
297 %val2 = load volatile i32 *%ptr2
298 %val3 = load volatile i32 *%ptr3
299 %val4 = load volatile i32 *%ptr4
300 %val5 = load volatile i32 *%ptr5
301 %val6 = load volatile i32 *%ptr6
302 %val7 = load volatile i32 *%ptr7
303 %val8 = load volatile i32 *%ptr8
304
305 call void @foo()
306
307 store volatile i32 %val0, i32 *%ptr0
308 store volatile i32 %val1, i32 *%ptr1
309 store volatile i32 %val2, i32 *%ptr2
310 store volatile i32 %val3, i32 *%ptr3
311 store volatile i32 %val4, i32 *%ptr4
312 store volatile i32 %val5, i32 *%ptr5
313 store volatile i32 %val6, i32 *%ptr6
314 store volatile i32 %val7, i32 *%ptr7
315 store volatile i32 %val8, i32 *%ptr8
316
317 ret void
318 }
319
320 ; Check that LRL and STRL are not converted.
321 define void @f8() {
322 ; CHECK: f8:
323 ; CHECK-NOT: mvc
324 ; CHECK: br %r14
325 %val0 = load i32 *@g0
326 %val1 = load i32 *@g1
327 %val2 = load i32 *@g2
328 %val3 = load i32 *@g3
329 %val4 = load i32 *@g4
330 %val5 = load i32 *@g5
331 %val6 = load i32 *@g6
332 %val7 = load i32 *@g7
333 %val8 = load i32 *@g8
334 %val9 = load i32 *@g9
335
336 call void @foo()
337
338 store i32 %val0, i32 *@g0
339 store i32 %val1, i32 *@g1
340 store i32 %val2, i32 *@g2
341 store i32 %val3, i32 *@g3
342 store i32 %val4, i32 *@g4
343 store i32 %val5, i32 *@g5
344 store i32 %val6, i32 *@g6
345 store i32 %val7, i32 *@g7
346 store i32 %val8, i32 *@g8
347 store i32 %val9, i32 *@g9
348
349 ret void
350 }
351
352 ; Likewise LGRL and STGRL.
353 define void @f9() {
354 ; CHECK: f9:
355 ; CHECK-NOT: mvc
356 ; CHECK: br %r14
357 %val0 = load i64 *@h0
358 %val1 = load i64 *@h1
359 %val2 = load i64 *@h2
360 %val3 = load i64 *@h3
361 %val4 = load i64 *@h4
362 %val5 = load i64 *@h5
363 %val6 = load i64 *@h6
364 %val7 = load i64 *@h7
365 %val8 = load i64 *@h8
366 %val9 = load i64 *@h9
367
368 call void @foo()
369
370 store i64 %val0, i64 *@h0
371 store i64 %val1, i64 *@h1
372 store i64 %val2, i64 *@h2
373 store i64 %val3, i64 *@h3
374 store i64 %val4, i64 *@h4
375 store i64 %val5, i64 *@h5
376 store i64 %val6, i64 *@h6
377 store i64 %val7, i64 *@h7
378 store i64 %val8, i64 *@h8
379 store i64 %val9, i64 *@h9
380
381 ret void
382 }