llvm.org GIT mirror llvm / 693160b
[X86] Reduce Store Forward Block issues in HW - Recommit after fixing Bug 36346 If a load follows a store and reloads data that the store has written to memory, Intel microarchitectures can in many cases forward the data directly from the store to the load, This "store forwarding" saves cycles by enabling the load to directly obtain the data instead of accessing the data from cache or memory. A "store forward block" occurs in cases that a store cannot be forwarded to the load. The most typical case of store forward block on Intel Core microarchiticutre that a small store cannot be forwarded to a large load. The estimated penalty for a store forward block is ~13 cycles. This pass tries to recognize and handle cases where "store forward block" is created by the compiler when lowering memcpy calls to a sequence of a load and a store. The pass currently only handles cases where memcpy is lowered to XMM/YMM registers, it tries to break the memcpy into smaller copies. breaking the memcpy should be possible since there is no atomicity guarantee for loads and stores to XMM/YMM. Change-Id: Ic41aa9ade6512e0478db66e07e2fde41b4fb35f9 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@325128 91177308-0d34-0410-b5e6-96231b3b80d8 Lama Saba 2 years ago
6 changed file(s) with 3889 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
3030 X86FastISel.cpp
3131 X86FixupBWInsts.cpp
3232 X86FixupLEAs.cpp
33 X86FixupSFB.cpp
3334 X86FixupSetCC.cpp
3435 X86FloatingPoint.cpp
3536 X86FrameLowering.cpp
6969 /// Return a pass that transforms setcc + movzx pairs into xor + setcc.
7070 FunctionPass *createX86FixupSetCC();
7171
72 /// Return a pass that avoids creating store forward block issues in the hardware.
73 FunctionPass *createX86FixupSFB();
74
7275 /// Return a pass that expands WinAlloca pseudo-instructions.
7376 FunctionPass *createX86WinAllocaExpander();
7477
0 //===- X86FixupSFB.cpp - Avoid HW Store Forward Block issues -----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // If a load follows a store and reloads data that the store has written to
10 // memory, Intel microarchitectures can in many cases forward the data directly
11 // from the store to the load, This "store forwarding" saves cycles by enabling
12 // the load to directly obtain the data instead of accessing the data from
13 // cache or memory.
14 // A "store forward block" occurs in cases that a store cannot be forwarded to
15 // the load. The most typical case of store forward block on Intel Core
16 // microarchitecture that a small store cannot be forwarded to a large load.
17 // The estimated penalty for a store forward block is ~13 cycles.
18 //
19 // This pass tries to recognize and handle cases where "store forward block"
20 // is created by the compiler when lowering memcpy calls to a sequence
21 // of a load and a store.
22 //
23 // The pass currently only handles cases where memcpy is lowered to
24 // XMM/YMM registers, it tries to break the memcpy into smaller copies.
25 // breaking the memcpy should be possible since there is no atomicity
26 // guarantee for loads and stores to XMM/YMM.
27 //
28 // It could be better for performance to solve the problem by loading
29 // to XMM/YMM then inserting the partial store before storing back from XMM/YMM
30 // to memory, but this will result in a more conservative optimization since it
31 // requires we prove that all memory accesses between the blocking store and the
32 // load must alias/don't alias before we can move the store, whereas the
33 // transformation done here is correct regardless to other memory accesses.
34 //===----------------------------------------------------------------------===//
35
36 #include "X86InstrInfo.h"
37 #include "X86Subtarget.h"
38 #include "llvm/CodeGen/MachineBasicBlock.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineFunctionPass.h"
41 #include "llvm/CodeGen/MachineInstr.h"
42 #include "llvm/CodeGen/MachineInstrBuilder.h"
43 #include "llvm/CodeGen/MachineOperand.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/IR/DebugInfoMetadata.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/MC/MCInstrDesc.h"
49
50 using namespace llvm;
51
52 #define DEBUG_TYPE "x86-fixup-SFB"
53
54 static cl::opt DisableX86FixupSFB("disable-fixup-SFB", cl::Hidden,
55 cl::desc("X86: Disable SFB fixup."),
56 cl::init(false));
57 namespace {
58
59 class FixupSFBPass : public MachineFunctionPass {
60 public:
61 FixupSFBPass() : MachineFunctionPass(ID) {}
62
63 StringRef getPassName() const override {
64 return "X86 Fixup Store Forward Block";
65 }
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68
69 private:
70 MachineRegisterInfo *MRI;
71 const X86InstrInfo *TII;
72 const X86RegisterInfo *TRI;
73 SmallVector, 2> BlockedLoadsStores;
74 SmallVector ForRemoval;
75 bool Is64Bit;
76
77 /// \brief Returns couples of Load then Store to memory which look
78 /// like a memcpy.
79 void findPotentiallylBlockedCopies(MachineFunction &MF);
80 /// \brief Break the memcpy's load and store into smaller copies
81 /// such that each memory load that was blocked by a smaller store
82 /// would now be copied separately.
83 void
84 breakBlockedCopies(MachineInstr *LoadInst, MachineInstr *StoreInst,
85 const std::map &BlockingStoresDisp);
86 /// \brief Break a copy of size Size to smaller copies.
87 void buildCopies(int Size, MachineInstr *LoadInst, int64_t LdDispImm,
88 MachineInstr *StoreInst, int64_t StDispImm,
89 int64_t LMMOffset, int64_t SMMOffset);
90
91 void buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode, int64_t LoadDisp,
92 MachineInstr *StoreInst, unsigned NStoreOpcode,
93 int64_t StoreDisp, unsigned Size, int64_t LMMOffset,
94 int64_t SMMOffset);
95
96 unsigned getRegSizeInBytes(MachineInstr *Inst);
97 static char ID;
98 };
99
100 } // end anonymous namespace
101
102 char FixupSFBPass::ID = 0;
103
104 FunctionPass *llvm::createX86FixupSFB() { return new FixupSFBPass(); }
105
106 static bool isXMMLoadOpcode(unsigned Opcode) {
107 return Opcode == X86::MOVUPSrm || Opcode == X86::MOVAPSrm ||
108 Opcode == X86::VMOVUPSrm || Opcode == X86::VMOVAPSrm ||
109 Opcode == X86::VMOVUPDrm || Opcode == X86::VMOVAPDrm ||
110 Opcode == X86::VMOVDQUrm || Opcode == X86::VMOVDQArm ||
111 Opcode == X86::VMOVUPSZ128rm || Opcode == X86::VMOVAPSZ128rm ||
112 Opcode == X86::VMOVUPDZ128rm || Opcode == X86::VMOVAPDZ128rm ||
113 Opcode == X86::VMOVDQU64Z128rm || Opcode == X86::VMOVDQA64Z128rm ||
114 Opcode == X86::VMOVDQU32Z128rm || Opcode == X86::VMOVDQA32Z128rm;
115 }
116 static bool isYMMLoadOpcode(unsigned Opcode) {
117 return Opcode == X86::VMOVUPSYrm || Opcode == X86::VMOVAPSYrm ||
118 Opcode == X86::VMOVUPDYrm || Opcode == X86::VMOVAPDYrm ||
119 Opcode == X86::VMOVDQUYrm || Opcode == X86::VMOVDQAYrm ||
120 Opcode == X86::VMOVUPSZ256rm || Opcode == X86::VMOVAPSZ256rm ||
121 Opcode == X86::VMOVUPDZ256rm || Opcode == X86::VMOVAPDZ256rm ||
122 Opcode == X86::VMOVDQU64Z256rm || Opcode == X86::VMOVDQA64Z256rm ||
123 Opcode == X86::VMOVDQU32Z256rm || Opcode == X86::VMOVDQA32Z256rm;
124 }
125
126 static bool isPotentialBlockedMemCpyLd(unsigned Opcode) {
127 return isXMMLoadOpcode(Opcode) || isYMMLoadOpcode(Opcode);
128 }
129
130 std::map> PotentialBlockedMemCpy{
131 {X86::MOVUPSrm, {X86::MOVUPSmr, X86::MOVAPSmr}},
132 {X86::MOVAPSrm, {X86::MOVUPSmr, X86::MOVAPSmr}},
133 {X86::VMOVUPSrm, {X86::VMOVUPSmr, X86::VMOVAPSmr}},
134 {X86::VMOVAPSrm, {X86::VMOVUPSmr, X86::VMOVAPSmr}},
135 {X86::VMOVUPDrm, {X86::VMOVUPDmr, X86::VMOVAPDmr}},
136 {X86::VMOVAPDrm, {X86::VMOVUPDmr, X86::VMOVAPDmr}},
137 {X86::VMOVDQUrm, {X86::VMOVDQUmr, X86::VMOVDQAmr}},
138 {X86::VMOVDQArm, {X86::VMOVDQUmr, X86::VMOVDQAmr}},
139 {X86::VMOVUPSZ128rm, {X86::VMOVUPSZ128mr, X86::VMOVAPSZ128mr}},
140 {X86::VMOVAPSZ128rm, {X86::VMOVUPSZ128mr, X86::VMOVAPSZ128mr}},
141 {X86::VMOVUPDZ128rm, {X86::VMOVUPDZ128mr, X86::VMOVAPDZ128mr}},
142 {X86::VMOVAPDZ128rm, {X86::VMOVUPDZ128mr, X86::VMOVAPDZ128mr}},
143 {X86::VMOVUPSYrm, {X86::VMOVUPSYmr, X86::VMOVAPSYmr}},
144 {X86::VMOVAPSYrm, {X86::VMOVUPSYmr, X86::VMOVAPSYmr}},
145 {X86::VMOVUPDYrm, {X86::VMOVUPDYmr, X86::VMOVAPDYmr}},
146 {X86::VMOVAPDYrm, {X86::VMOVUPDYmr, X86::VMOVAPDYmr}},
147 {X86::VMOVDQUYrm, {X86::VMOVDQUYmr, X86::VMOVDQAYmr}},
148 {X86::VMOVDQAYrm, {X86::VMOVDQUYmr, X86::VMOVDQAYmr}},
149 {X86::VMOVUPSZ256rm, {X86::VMOVUPSZ256mr, X86::VMOVAPSZ256mr}},
150 {X86::VMOVAPSZ256rm, {X86::VMOVUPSZ256mr, X86::VMOVAPSZ256mr}},
151 {X86::VMOVUPDZ256rm, {X86::VMOVUPDZ256mr, X86::VMOVAPDZ256mr}},
152 {X86::VMOVAPDZ256rm, {X86::VMOVUPDZ256mr, X86::VMOVAPDZ256mr}},
153 {X86::VMOVDQU64Z128rm, {X86::VMOVDQU64Z128mr, X86::VMOVDQA64Z128mr}},
154 {X86::VMOVDQA64Z128rm, {X86::VMOVDQU64Z128mr, X86::VMOVDQA64Z128mr}},
155 {X86::VMOVDQU32Z128rm, {X86::VMOVDQU32Z128mr, X86::VMOVDQA32Z128mr}},
156 {X86::VMOVDQA32Z128rm, {X86::VMOVDQU32Z128mr, X86::VMOVDQA32Z128mr}},
157 {X86::VMOVDQU64Z256rm, {X86::VMOVDQU64Z256mr, X86::VMOVDQA64Z256mr}},
158 {X86::VMOVDQA64Z256rm, {X86::VMOVDQU64Z256mr, X86::VMOVDQA64Z256mr}},
159 {X86::VMOVDQU32Z256rm, {X86::VMOVDQU32Z256mr, X86::VMOVDQA32Z256mr}},
160 {X86::VMOVDQA32Z256rm, {X86::VMOVDQU32Z256mr, X86::VMOVDQA32Z256mr}},
161 };
162
163 static bool isPotentialBlockedMemCpyPair(unsigned LdOpcode, unsigned StOpcode) {
164 auto PotentialStores = PotentialBlockedMemCpy.at(LdOpcode);
165 return PotentialStores.first == StOpcode ||
166 PotentialStores.second == StOpcode;
167 }
168
169 static bool isPotentialBlockingStoreInst(int Opcode, int LoadOpcode) {
170 bool PBlock = false;
171 PBlock |= Opcode == X86::MOV64mr || Opcode == X86::MOV64mi32 ||
172 Opcode == X86::MOV32mr || Opcode == X86::MOV32mi ||
173 Opcode == X86::MOV16mr || Opcode == X86::MOV16mi ||
174 Opcode == X86::MOV8mr || Opcode == X86::MOV8mi;
175 if (isYMMLoadOpcode(LoadOpcode))
176 PBlock |= Opcode == X86::VMOVUPSmr || Opcode == X86::VMOVAPSmr ||
177 Opcode == X86::VMOVUPDmr || Opcode == X86::VMOVAPDmr ||
178 Opcode == X86::VMOVDQUmr || Opcode == X86::VMOVDQAmr ||
179 Opcode == X86::VMOVUPSZ128mr || Opcode == X86::VMOVAPSZ128mr ||
180 Opcode == X86::VMOVUPDZ128mr || Opcode == X86::VMOVAPDZ128mr ||
181 Opcode == X86::VMOVDQU64Z128mr ||
182 Opcode == X86::VMOVDQA64Z128mr ||
183 Opcode == X86::VMOVDQU32Z128mr || Opcode == X86::VMOVDQA32Z128mr;
184 return PBlock;
185 }
186
187 static const int MOV128SZ = 16;
188 static const int MOV64SZ = 8;
189 static const int MOV32SZ = 4;
190 static const int MOV16SZ = 2;
191 static const int MOV8SZ = 1;
192
193 std::map YMMtoXMMLoadMap = {
194 {X86::VMOVUPSYrm, X86::VMOVUPSrm},
195 {X86::VMOVAPSYrm, X86::VMOVUPSrm},
196 {X86::VMOVUPDYrm, X86::VMOVUPDrm},
197 {X86::VMOVAPDYrm, X86::VMOVUPDrm},
198 {X86::VMOVDQUYrm, X86::VMOVDQUrm},
199 {X86::VMOVDQAYrm, X86::VMOVDQUrm},
200 {X86::VMOVUPSZ256rm, X86::VMOVUPSZ128rm},
201 {X86::VMOVAPSZ256rm, X86::VMOVUPSZ128rm},
202 {X86::VMOVUPDZ256rm, X86::VMOVUPDZ128rm},
203 {X86::VMOVAPDZ256rm, X86::VMOVUPDZ128rm},
204 {X86::VMOVDQU64Z256rm, X86::VMOVDQU64Z128rm},
205 {X86::VMOVDQA64Z256rm, X86::VMOVDQU64Z128rm},
206 {X86::VMOVDQU32Z256rm, X86::VMOVDQU32Z128rm},
207 {X86::VMOVDQA32Z256rm, X86::VMOVDQU32Z128rm},
208 };
209
210 std::map YMMtoXMMStoreMap = {
211 {X86::VMOVUPSYmr, X86::VMOVUPSmr},
212 {X86::VMOVAPSYmr, X86::VMOVUPSmr},
213 {X86::VMOVUPDYmr, X86::VMOVUPDmr},
214 {X86::VMOVAPDYmr, X86::VMOVUPDmr},
215 {X86::VMOVDQUYmr, X86::VMOVDQUmr},
216 {X86::VMOVDQAYmr, X86::VMOVDQUmr},
217 {X86::VMOVUPSZ256mr, X86::VMOVUPSZ128mr},
218 {X86::VMOVAPSZ256mr, X86::VMOVUPSZ128mr},
219 {X86::VMOVUPDZ256mr, X86::VMOVUPDZ128mr},
220 {X86::VMOVAPDZ256mr, X86::VMOVUPDZ128mr},
221 {X86::VMOVDQU64Z256mr, X86::VMOVDQU64Z128mr},
222 {X86::VMOVDQA64Z256mr, X86::VMOVDQU64Z128mr},
223 {X86::VMOVDQU32Z256mr, X86::VMOVDQU32Z128mr},
224 {X86::VMOVDQA32Z256mr, X86::VMOVDQU32Z128mr},
225 };
226
227 static int getAddrOffset(MachineInstr *MI) {
228 const MCInstrDesc &Descl = MI->getDesc();
229 int AddrOffset = X86II::getMemoryOperandNo(Descl.TSFlags);
230 assert(AddrOffset != -1 && "Expected Memory Operand");
231 AddrOffset += X86II::getOperandBias(Descl);
232 return AddrOffset;
233 }
234
235 static MachineOperand &getBaseOperand(MachineInstr *MI) {
236 int AddrOffset = getAddrOffset(MI);
237 return MI->getOperand(AddrOffset + X86::AddrBaseReg);
238 }
239
240 static MachineOperand &getDispOperand(MachineInstr *MI) {
241 int AddrOffset = getAddrOffset(MI);
242 return MI->getOperand(AddrOffset + X86::AddrDisp);
243 }
244
245 // Relevant addressing modes contain only base register and immediate
246 // displacement or frameindex and immediate displacement.
247 // TODO: Consider expanding to other addressing modes in the future
248 static bool isRelevantAddressingMode(MachineInstr *MI) {
249 int AddrOffset = getAddrOffset(MI);
250 MachineOperand &Base = MI->getOperand(AddrOffset + X86::AddrBaseReg);
251 MachineOperand &Disp = MI->getOperand(AddrOffset + X86::AddrDisp);
252 MachineOperand &Scale = MI->getOperand(AddrOffset + X86::AddrScaleAmt);
253 MachineOperand &Index = MI->getOperand(AddrOffset + X86::AddrIndexReg);
254 MachineOperand &Segment = MI->getOperand(AddrOffset + X86::AddrSegmentReg);
255
256 if (!((Base.isReg() && Base.getReg() != X86::NoRegister) || Base.isFI()))
257 return false;
258 if (!Disp.isImm())
259 return false;
260 if (Scale.getImm() != 1)
261 return false;
262 if (!(Index.isReg() && Index.getReg() == X86::NoRegister))
263 return false;
264 if (!(Segment.isReg() && Segment.getReg() == X86::NoRegister))
265 return false;
266 return true;
267 }
268
269 // Collect potentially blocking stores.
270 // Limit the number of instructions backwards we want to inspect
271 // since the effect of store block won't be visible if the store
272 // and load instructions have enough instructions in between to
273 // keep the core busy.
274 static const unsigned LIMIT = 20;
275 static SmallVector
276 findPotentialBlockers(MachineInstr *LoadInst) {
277 SmallVector PotentialBlockers;
278 unsigned BlockLimit = 0;
279 for (MachineBasicBlock::iterator LI = LoadInst,
280 BB = LoadInst->getParent()->begin();
281 LI != BB; --LI) {
282 BlockLimit++;
283 if (BlockLimit >= LIMIT)
284 break;
285 MachineInstr &MI = *LI;
286 if (MI.getDesc().isCall())
287 break;
288 PotentialBlockers.push_back(&MI);
289 }
290 // If we didn't get to the instructions limit try predecessing blocks.
291 // Ideally we should traverse the predecessor blocks in depth with some
292 // coloring algorithm, but for now let's just look at the first order
293 // predecessors.
294 if (BlockLimit < LIMIT) {
295 MachineBasicBlock *MBB = LoadInst->getParent();
296 int LimitLeft = LIMIT - BlockLimit;
297 for (MachineBasicBlock::pred_iterator PB = MBB->pred_begin(),
298 PE = MBB->pred_end();
299 PB != PE; ++PB) {
300 MachineBasicBlock *PMBB = *PB;
301 int PredLimit = 0;
302 for (MachineBasicBlock::reverse_iterator PMI = PMBB->rbegin(),
303 PME = PMBB->rend();
304 PMI != PME; ++PMI) {
305 PredLimit++;
306 if (PredLimit >= LimitLeft)
307 break;
308 if (PMI->getDesc().isCall())
309 break;
310 PotentialBlockers.push_back(&*PMI);
311 }
312 }
313 }
314 return PotentialBlockers;
315 }
316
317 void FixupSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
318 int64_t LoadDisp, MachineInstr *StoreInst,
319 unsigned NStoreOpcode, int64_t StoreDisp,
320 unsigned Size, int64_t LMMOffset,
321 int64_t SMMOffset) {
322 MachineOperand &LoadBase = getBaseOperand(LoadInst);
323 MachineOperand &StoreBase = getBaseOperand(StoreInst);
324 MachineBasicBlock *MBB = LoadInst->getParent();
325 MachineMemOperand *LMMO = *LoadInst->memoperands_begin();
326 MachineMemOperand *SMMO = *StoreInst->memoperands_begin();
327
328 unsigned Reg1 = MRI->createVirtualRegister(
329 TII->getRegClass(TII->get(NLoadOpcode), 0, TRI, *(MBB->getParent())));
330 BuildMI(*MBB, LoadInst, LoadInst->getDebugLoc(), TII->get(NLoadOpcode), Reg1)
331 .add(LoadBase)
332 .addImm(1)
333 .addReg(X86::NoRegister)
334 .addImm(LoadDisp)
335 .addReg(X86::NoRegister)
336 .addMemOperand(
337 MBB->getParent()->getMachineMemOperand(LMMO, LMMOffset, Size));
338 DEBUG(LoadInst->getPrevNode()->dump());
339 // If the load and store are consecutive, use the loadInst location to
340 // reduce register pressure.
341 MachineInstr *StInst = StoreInst;
342 if (StoreInst->getPrevNode() == LoadInst)
343 StInst = LoadInst;
344 BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
345 .add(StoreBase)
346 .addImm(1)
347 .addReg(X86::NoRegister)
348 .addImm(StoreDisp)
349 .addReg(X86::NoRegister)
350 .addReg(Reg1)
351 .addMemOperand(
352 MBB->getParent()->getMachineMemOperand(SMMO, SMMOffset, Size));
353 DEBUG(StInst->getPrevNode()->dump());
354 }
355
356 void FixupSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
357 int64_t LdDispImm, MachineInstr *StoreInst,
358 int64_t StDispImm, int64_t LMMOffset,
359 int64_t SMMOffset) {
360 int LdDisp = LdDispImm;
361 int StDisp = StDispImm;
362 while (Size > 0) {
363 if ((Size - MOV128SZ >= 0) && isYMMLoadOpcode(LoadInst->getOpcode())) {
364 Size = Size - MOV128SZ;
365 buildCopy(LoadInst, YMMtoXMMLoadMap.at(LoadInst->getOpcode()), LdDisp,
366 StoreInst, YMMtoXMMStoreMap.at(StoreInst->getOpcode()), StDisp,
367 MOV128SZ, LMMOffset, SMMOffset);
368 LdDisp += MOV128SZ;
369 StDisp += MOV128SZ;
370 LMMOffset += MOV128SZ;
371 SMMOffset += MOV128SZ;
372 continue;
373 }
374 if (Size - MOV64SZ >= 0 && Is64Bit) {
375 Size = Size - MOV64SZ;
376 buildCopy(LoadInst, X86::MOV64rm, LdDisp, StoreInst, X86::MOV64mr, StDisp,
377 MOV64SZ, LMMOffset, SMMOffset);
378 LdDisp += MOV64SZ;
379 StDisp += MOV64SZ;
380 LMMOffset += MOV64SZ;
381 SMMOffset += MOV64SZ;
382 continue;
383 }
384 if (Size - MOV32SZ >= 0) {
385 Size = Size - MOV32SZ;
386 buildCopy(LoadInst, X86::MOV32rm, LdDisp, StoreInst, X86::MOV32mr, StDisp,
387 MOV32SZ, LMMOffset, SMMOffset);
388 LdDisp += MOV32SZ;
389 StDisp += MOV32SZ;
390 LMMOffset += MOV32SZ;
391 SMMOffset += MOV32SZ;
392 continue;
393 }
394 if (Size - MOV16SZ >= 0) {
395 Size = Size - MOV16SZ;
396 buildCopy(LoadInst, X86::MOV16rm, LdDisp, StoreInst, X86::MOV16mr, StDisp,
397 MOV16SZ, LMMOffset, SMMOffset);
398 LdDisp += MOV16SZ;
399 StDisp += MOV16SZ;
400 LMMOffset += MOV16SZ;
401 SMMOffset += MOV16SZ;
402 continue;
403 }
404 if (Size - MOV8SZ >= 0) {
405 Size = Size - MOV8SZ;
406 buildCopy(LoadInst, X86::MOV8rm, LdDisp, StoreInst, X86::MOV8mr, StDisp,
407 MOV8SZ, LMMOffset, SMMOffset);
408 LdDisp += MOV8SZ;
409 StDisp += MOV8SZ;
410 LMMOffset += MOV8SZ;
411 SMMOffset += MOV8SZ;
412 continue;
413 }
414 }
415 assert(Size == 0 && "Wrong size division");
416 }
417
418 static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
419 MachineOperand &LoadBase = getBaseOperand(LoadInst);
420 MachineOperand &StoreBase = getBaseOperand(StoreInst);
421 if (LoadBase.isReg()) {
422 MachineInstr *LastLoad = LoadInst->getPrevNode();
423 // If the original load and store to xmm/ymm were consecutive
424 // then the partial copies were also created in
425 // a consecutive order to reduce register pressure,
426 // and the location of the last load is before the last store.
427 if (StoreInst->getPrevNode() == LoadInst)
428 LastLoad = LoadInst->getPrevNode()->getPrevNode();
429 getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
430 }
431 if (StoreBase.isReg()) {
432 MachineInstr *StInst = StoreInst;
433 if (StoreInst->getPrevNode() == LoadInst)
434 StInst = LoadInst;
435 getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
436 }
437 }
438
439 void FixupSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
440 for (auto &MBB : MF)
441 for (auto &MI : MBB)
442 if (isPotentialBlockedMemCpyLd(MI.getOpcode())) {
443 int DefVR = MI.getOperand(0).getReg();
444 if (MRI->hasOneUse(DefVR))
445 for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
446 UI != UE;) {
447 MachineOperand &StoreMO = *UI++;
448 MachineInstr &StoreMI = *StoreMO.getParent();
449 if (isPotentialBlockedMemCpyPair(MI.getOpcode(),
450 StoreMI.getOpcode()) &&
451 (StoreMI.getParent() == MI.getParent()))
452 if (isRelevantAddressingMode(&MI) &&
453 isRelevantAddressingMode(&StoreMI))
454 BlockedLoadsStores.push_back(
455 std::pair(&MI, &StoreMI));
456 }
457 }
458 }
459 unsigned FixupSFBPass::getRegSizeInBytes(MachineInstr *LoadInst) {
460 auto TRC = TII->getRegClass(TII->get(LoadInst->getOpcode()), 0, TRI,
461 *LoadInst->getParent()->getParent());
462 return TRI->getRegSizeInBits(*TRC) / 8;
463 }
464
465 void FixupSFBPass::breakBlockedCopies(
466 MachineInstr *LoadInst, MachineInstr *StoreInst,
467 const std::map &BlockingStoresDisp) {
468 int64_t LdDispImm = getDispOperand(LoadInst).getImm();
469 int64_t StDispImm = getDispOperand(StoreInst).getImm();
470 int64_t LMMOffset = (*LoadInst->memoperands_begin())->getOffset();
471 int64_t SMMOffset = (*StoreInst->memoperands_begin())->getOffset();
472
473 int64_t LdDisp1 = LdDispImm;
474 int64_t LdDisp2 = 0;
475 int64_t StDisp1 = StDispImm;
476 int64_t StDisp2 = 0;
477 unsigned Size1 = 0;
478 unsigned Size2 = 0;
479 int64_t LdStDelta = StDispImm - LdDispImm;
480 for (auto inst : BlockingStoresDisp) {
481 LdDisp2 = inst.first;
482 StDisp2 = inst.first + LdStDelta;
483 Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
484 Size2 = inst.second;
485 buildCopies(Size1, LoadInst, LdDisp1, StoreInst, StDisp1, LMMOffset,
486 SMMOffset);
487 buildCopies(Size2, LoadInst, LdDisp2, StoreInst, StDisp2, LMMOffset + Size1,
488 SMMOffset + Size1);
489 LdDisp1 = LdDisp2 + Size2;
490 StDisp1 = StDisp2 + Size2;
491 LMMOffset += Size1 + Size2;
492 SMMOffset += Size1 + Size2;
493 }
494 unsigned Size3 = (LdDispImm + getRegSizeInBytes(LoadInst)) - LdDisp1;
495 buildCopies(Size3, LoadInst, LdDisp1, StoreInst, StDisp1, LMMOffset,
496 LMMOffset);
497 }
498
499 bool FixupSFBPass::runOnMachineFunction(MachineFunction &MF) {
500 bool Changed = false;
501
502 if (DisableX86FixupSFB || skipFunction(MF.getFunction()))
503 return false;
504
505 MRI = &MF.getRegInfo();
506 assert(MRI->isSSA() && "Expected MIR to be in SSA form");
507 TII = MF.getSubtarget().getInstrInfo();
508 TRI = MF.getSubtarget().getRegisterInfo();
509 Is64Bit = MF.getSubtarget().is64Bit();
510 DEBUG(dbgs() << "Start X86FixupSFB\n";);
511 // Look for a load then a store to XMM/YMM which look like a memcpy
512 findPotentiallylBlockedCopies(MF);
513
514 for (auto LoadStoreInst : BlockedLoadsStores) {
515 MachineInstr *LoadInst = LoadStoreInst.first;
516 SmallVector PotentialBlockers =
517 findPotentialBlockers(LoadInst);
518
519 MachineOperand &LoadBase = getBaseOperand(LoadInst);
520 int64_t LdDispImm = getDispOperand(LoadInst).getImm();
521 std::map BlockingStoresDisp;
522 int LdBaseReg = LoadBase.isReg() ? LoadBase.getReg() : LoadBase.getIndex();
523
524 for (auto PBInst : PotentialBlockers) {
525 if (isPotentialBlockingStoreInst(PBInst->getOpcode(),
526 LoadInst->getOpcode())) {
527 if (!isRelevantAddressingMode(PBInst))
528 continue;
529 MachineOperand &PBstoreBase = getBaseOperand(PBInst);
530 int64_t PBstDispImm = getDispOperand(PBInst).getImm();
531 assert(PBInst->hasOneMemOperand() && "Expected One Memory Operand");
532 unsigned PBstSize = (*PBInst->memoperands_begin())->getSize();
533 int PBstBaseReg =
534 PBstoreBase.isReg() ? PBstoreBase.getReg() : PBstoreBase.getIndex();
535 // This check doesn't cover all cases, but it will suffice for now.
536 // TODO: take branch probability into consideration, if the blocking
537 // store is in an unreached block, breaking the memcopy could lose
538 // performance.
539 if (((LoadBase.isReg() && PBstoreBase.isReg()) ||
540 (LoadBase.isFI() && PBstoreBase.isFI())) &&
541 LdBaseReg == PBstBaseReg &&
542 ((PBstDispImm >= LdDispImm) &&
543 (PBstDispImm <=
544 LdDispImm + (getRegSizeInBytes(LoadInst) - PBstSize)))) {
545 if (BlockingStoresDisp.count(PBstDispImm)) {
546 if (BlockingStoresDisp[PBstDispImm] > PBstSize)
547 BlockingStoresDisp[PBstDispImm] = PBstSize;
548
549 } else
550 BlockingStoresDisp[PBstDispImm] = PBstSize;
551 }
552 }
553 }
554
555 if (BlockingStoresDisp.size() == 0)
556 continue;
557
558 // We found a store forward block, break the memcpy's load and store
559 // into smaller copies such that each smaller store that was causing
560 // a store block would now be copied separately.
561 MachineInstr *StoreInst = LoadStoreInst.second;
562 DEBUG(dbgs() << "Blocked load and store instructions: \n");
563 DEBUG(LoadInst->dump());
564 DEBUG(StoreInst->dump());
565 DEBUG(dbgs() << "Replaced with:\n");
566 breakBlockedCopies(LoadInst, StoreInst, BlockingStoresDisp);
567 updateKillStatus(LoadInst, StoreInst);
568 ForRemoval.push_back(LoadInst);
569 ForRemoval.push_back(StoreInst);
570 }
571 for (auto RemovedInst : ForRemoval) {
572 RemovedInst->eraseFromParent();
573 }
574 ForRemoval.clear();
575 BlockedLoadsStores.clear();
576 DEBUG(dbgs() << "End X86FixupSFB\n";);
577
578 return Changed;
579 }
448448 addPass(createX86FixupSetCC());
449449 addPass(createX86OptimizeLEAs());
450450 addPass(createX86CallFrameOptimization());
451 addPass(createX86FixupSFB());
451452 }
452453
453454 addPass(createX86WinAllocaExpander());
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=CHECK
2 ; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB | FileCheck %s --check-prefix=DISABLED
3 ; RUN: llc < %s -mtriple=i686-linux -mattr +sse4.1 | FileCheck %s -check-prefix=CHECK-AVX2
4 ; RUN: llc < %s -mtriple=i686-linux -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s -check-prefix=CHECK-AVX512
5
6 %struct.S = type { i32, i32, i32, i32 }
7
8 ; Function Attrs: nounwind uwtable
9 define void @test_conditional_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4) local_unnamed_addr #0 {
10 ; CHECK-LABEL: test_conditional_block:
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: pushl %edi
13 ; CHECK-NEXT: .cfi_def_cfa_offset 8
14 ; CHECK-NEXT: pushl %esi
15 ; CHECK-NEXT: .cfi_def_cfa_offset 12
16 ; CHECK-NEXT: .cfi_offset %esi, -12
17 ; CHECK-NEXT: .cfi_offset %edi, -8
18 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
19 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
20 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
21 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
22 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
23 ; CHECK-NEXT: cmpl $18, %edi
24 ; CHECK-NEXT: jl .LBB0_2
25 ; CHECK-NEXT: # %bb.1: # %if.then
26 ; CHECK-NEXT: movl %edi, 4(%ecx)
27 ; CHECK-NEXT: .LBB0_2: # %if.end
28 ; CHECK-NEXT: movups (%esi), %xmm0
29 ; CHECK-NEXT: movups %xmm0, (%edx)
30 ; CHECK-NEXT: movl (%ecx), %edx
31 ; CHECK-NEXT: movl %edx, (%eax)
32 ; CHECK-NEXT: movl 4(%ecx), %edx
33 ; CHECK-NEXT: movl %edx, 4(%eax)
34 ; CHECK-NEXT: movl 8(%ecx), %edx
35 ; CHECK-NEXT: movl %edx, 8(%eax)
36 ; CHECK-NEXT: movl 12(%ecx), %ecx
37 ; CHECK-NEXT: movl %ecx, 12(%eax)
38 ; CHECK-NEXT: popl %esi
39 ; CHECK-NEXT: popl %edi
40 ; CHECK-NEXT: retl
41 ;
42 ; DISABLED-LABEL: test_conditional_block:
43 ; DISABLED: # %bb.0: # %entry
44 ; DISABLED-NEXT: pushl %edi
45 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
46 ; DISABLED-NEXT: pushl %esi
47 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
48 ; DISABLED-NEXT: .cfi_offset %esi, -12
49 ; DISABLED-NEXT: .cfi_offset %edi, -8
50 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
51 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
52 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
53 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
54 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
55 ; DISABLED-NEXT: cmpl $18, %edi
56 ; DISABLED-NEXT: jl .LBB0_2
57 ; DISABLED-NEXT: # %bb.1: # %if.then
58 ; DISABLED-NEXT: movl %edi, 4(%esi)
59 ; DISABLED-NEXT: .LBB0_2: # %if.end
60 ; DISABLED-NEXT: movups (%edx), %xmm0
61 ; DISABLED-NEXT: movups %xmm0, (%ecx)
62 ; DISABLED-NEXT: movups (%esi), %xmm0
63 ; DISABLED-NEXT: movups %xmm0, (%eax)
64 ; DISABLED-NEXT: popl %esi
65 ; DISABLED-NEXT: popl %edi
66 ; DISABLED-NEXT: retl
67 ;
68 ; CHECK-AVX2-LABEL: test_conditional_block:
69 ; CHECK-AVX2: # %bb.0: # %entry
70 ; CHECK-AVX2-NEXT: pushl %edi
71 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
72 ; CHECK-AVX2-NEXT: pushl %esi
73 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
74 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -12
75 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -8
76 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
77 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
78 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
79 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
80 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
81 ; CHECK-AVX2-NEXT: cmpl $18, %edi
82 ; CHECK-AVX2-NEXT: jl .LBB0_2
83 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
84 ; CHECK-AVX2-NEXT: movl %edi, 4(%ecx)
85 ; CHECK-AVX2-NEXT: .LBB0_2: # %if.end
86 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
87 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
88 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
89 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
90 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
91 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
92 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edx
93 ; CHECK-AVX2-NEXT: movl %edx, 8(%eax)
94 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx
95 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
96 ; CHECK-AVX2-NEXT: popl %esi
97 ; CHECK-AVX2-NEXT: popl %edi
98 ; CHECK-AVX2-NEXT: retl
99 ;
100 ; CHECK-AVX512-LABEL: test_conditional_block:
101 ; CHECK-AVX512: # %bb.0: # %entry
102 ; CHECK-AVX512-NEXT: pushl %edi
103 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
104 ; CHECK-AVX512-NEXT: pushl %esi
105 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
106 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -12
107 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -8
108 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
109 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
110 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
111 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
112 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
113 ; CHECK-AVX512-NEXT: cmpl $18, %edi
114 ; CHECK-AVX512-NEXT: jl .LBB0_2
115 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
116 ; CHECK-AVX512-NEXT: movl %edi, 4(%ecx)
117 ; CHECK-AVX512-NEXT: .LBB0_2: # %if.end
118 ; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0
119 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx)
120 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
121 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
122 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
123 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
124 ; CHECK-AVX512-NEXT: movl 8(%ecx), %edx
125 ; CHECK-AVX512-NEXT: movl %edx, 8(%eax)
126 ; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx
127 ; CHECK-AVX512-NEXT: movl %ecx, 12(%eax)
128 ; CHECK-AVX512-NEXT: popl %esi
129 ; CHECK-AVX512-NEXT: popl %edi
130 ; CHECK-AVX512-NEXT: retl
131 entry:
132 %cmp = icmp sgt i32 %x, 17
133 br i1 %cmp, label %if.then, label %if.end
134
135 if.then: ; preds = %entry
136 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
137 store i32 %x, i32* %b, align 4
138 br label %if.end
139
140 if.end: ; preds = %if.then, %entry
141 %0 = bitcast %struct.S* %s3 to i8*
142 %1 = bitcast %struct.S* %s4 to i8*
143 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
144 %2 = bitcast %struct.S* %s2 to i8*
145 %3 = bitcast %struct.S* %s1 to i8*
146 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
147 ret void
148 }
149
150 ; Function Attrs: nounwind uwtable
151 define void @test_imm_store(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
152 ; CHECK-LABEL: test_imm_store:
153 ; CHECK: # %bb.0: # %entry
154 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
155 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
156 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
157 ; CHECK-NEXT: movl $0, (%edx)
158 ; CHECK-NEXT: movl $1, (%ecx)
159 ; CHECK-NEXT: movl (%edx), %ecx
160 ; CHECK-NEXT: movl %ecx, (%eax)
161 ; CHECK-NEXT: movl 4(%edx), %ecx
162 ; CHECK-NEXT: movl %ecx, 4(%eax)
163 ; CHECK-NEXT: movl 8(%edx), %ecx
164 ; CHECK-NEXT: movl %ecx, 8(%eax)
165 ; CHECK-NEXT: movl 12(%edx), %ecx
166 ; CHECK-NEXT: movl %ecx, 12(%eax)
167 ; CHECK-NEXT: retl
168 ;
169 ; DISABLED-LABEL: test_imm_store:
170 ; DISABLED: # %bb.0: # %entry
171 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
172 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
173 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
174 ; DISABLED-NEXT: movl $0, (%edx)
175 ; DISABLED-NEXT: movl $1, (%ecx)
176 ; DISABLED-NEXT: movups (%edx), %xmm0
177 ; DISABLED-NEXT: movups %xmm0, (%eax)
178 ; DISABLED-NEXT: retl
179 ;
180 ; CHECK-AVX2-LABEL: test_imm_store:
181 ; CHECK-AVX2: # %bb.0: # %entry
182 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
183 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
184 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
185 ; CHECK-AVX2-NEXT: movl $0, (%edx)
186 ; CHECK-AVX2-NEXT: movl $1, (%ecx)
187 ; CHECK-AVX2-NEXT: movl (%edx), %ecx
188 ; CHECK-AVX2-NEXT: movl %ecx, (%eax)
189 ; CHECK-AVX2-NEXT: movl 4(%edx), %ecx
190 ; CHECK-AVX2-NEXT: movl %ecx, 4(%eax)
191 ; CHECK-AVX2-NEXT: movl 8(%edx), %ecx
192 ; CHECK-AVX2-NEXT: movl %ecx, 8(%eax)
193 ; CHECK-AVX2-NEXT: movl 12(%edx), %ecx
194 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
195 ; CHECK-AVX2-NEXT: retl
196 ;
197 ; CHECK-AVX512-LABEL: test_imm_store:
198 ; CHECK-AVX512: # %bb.0: # %entry
199 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
200 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
201 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
202 ; CHECK-AVX512-NEXT: movl $0, (%edx)
203 ; CHECK-AVX512-NEXT: movl $1, (%ecx)
204 ; CHECK-AVX512-NEXT: movl (%edx), %ecx
205 ; CHECK-AVX512-NEXT: movl %ecx, (%eax)
206 ; CHECK-AVX512-NEXT: movl 4(%edx), %ecx
207 ; CHECK-AVX512-NEXT: movl %ecx, 4(%eax)
208 ; CHECK-AVX512-NEXT: movl 8(%edx), %ecx
209 ; CHECK-AVX512-NEXT: movl %ecx, 8(%eax)
210 ; CHECK-AVX512-NEXT: movl 12(%edx), %ecx
211 ; CHECK-AVX512-NEXT: movl %ecx, 12(%eax)
212 ; CHECK-AVX512-NEXT: retl
213 entry:
214 %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
215 store i32 0, i32* %a, align 4
216 %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
217 store i32 1, i32* %a1, align 4
218 %0 = bitcast %struct.S* %s2 to i8*
219 %1 = bitcast %struct.S* %s1 to i8*
220 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
221 ret void
222 }
223
224 ; Function Attrs: nounwind uwtable
225 define void @test_nondirect_br(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
226 ; CHECK-LABEL: test_nondirect_br:
227 ; CHECK: # %bb.0: # %entry
228 ; CHECK-NEXT: pushl %edi
229 ; CHECK-NEXT: .cfi_def_cfa_offset 8
230 ; CHECK-NEXT: pushl %esi
231 ; CHECK-NEXT: .cfi_def_cfa_offset 12
232 ; CHECK-NEXT: .cfi_offset %esi, -12
233 ; CHECK-NEXT: .cfi_offset %edi, -8
234 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
235 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
236 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
237 ; CHECK-NEXT: cmpl $18, %ecx
238 ; CHECK-NEXT: jl .LBB2_2
239 ; CHECK-NEXT: # %bb.1: # %if.then
240 ; CHECK-NEXT: movl %ecx, 4(%eax)
241 ; CHECK-NEXT: .LBB2_2: # %if.end
242 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
243 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
244 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
245 ; CHECK-NEXT: cmpl $14, %edx
246 ; CHECK-NEXT: jl .LBB2_4
247 ; CHECK-NEXT: # %bb.3: # %if.then2
248 ; CHECK-NEXT: movl %edx, 12(%eax)
249 ; CHECK-NEXT: .LBB2_4: # %if.end3
250 ; CHECK-NEXT: movups (%edi), %xmm0
251 ; CHECK-NEXT: movups %xmm0, (%esi)
252 ; CHECK-NEXT: movl (%eax), %edx
253 ; CHECK-NEXT: movl %edx, (%ecx)
254 ; CHECK-NEXT: movl 4(%eax), %edx
255 ; CHECK-NEXT: movl %edx, 4(%ecx)
256 ; CHECK-NEXT: movl 8(%eax), %edx
257 ; CHECK-NEXT: movl %edx, 8(%ecx)
258 ; CHECK-NEXT: movl 12(%eax), %eax
259 ; CHECK-NEXT: movl %eax, 12(%ecx)
260 ; CHECK-NEXT: popl %esi
261 ; CHECK-NEXT: popl %edi
262 ; CHECK-NEXT: retl
263 ;
264 ; DISABLED-LABEL: test_nondirect_br:
265 ; DISABLED: # %bb.0: # %entry
266 ; DISABLED-NEXT: pushl %edi
267 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
268 ; DISABLED-NEXT: pushl %esi
269 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
270 ; DISABLED-NEXT: .cfi_offset %esi, -12
271 ; DISABLED-NEXT: .cfi_offset %edi, -8
272 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
273 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
274 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
275 ; DISABLED-NEXT: cmpl $18, %edx
276 ; DISABLED-NEXT: jl .LBB2_2
277 ; DISABLED-NEXT: # %bb.1: # %if.then
278 ; DISABLED-NEXT: movl %edx, 4(%eax)
279 ; DISABLED-NEXT: .LBB2_2: # %if.end
280 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
281 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
282 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
283 ; DISABLED-NEXT: cmpl $14, %ecx
284 ; DISABLED-NEXT: jl .LBB2_4
285 ; DISABLED-NEXT: # %bb.3: # %if.then2
286 ; DISABLED-NEXT: movl %ecx, 12(%eax)
287 ; DISABLED-NEXT: .LBB2_4: # %if.end3
288 ; DISABLED-NEXT: movups (%edi), %xmm0
289 ; DISABLED-NEXT: movups %xmm0, (%esi)
290 ; DISABLED-NEXT: movups (%eax), %xmm0
291 ; DISABLED-NEXT: movups %xmm0, (%edx)
292 ; DISABLED-NEXT: popl %esi
293 ; DISABLED-NEXT: popl %edi
294 ; DISABLED-NEXT: retl
295 ;
296 ; CHECK-AVX2-LABEL: test_nondirect_br:
297 ; CHECK-AVX2: # %bb.0: # %entry
298 ; CHECK-AVX2-NEXT: pushl %edi
299 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
300 ; CHECK-AVX2-NEXT: pushl %esi
301 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
302 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -12
303 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -8
304 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
305 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
306 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
307 ; CHECK-AVX2-NEXT: cmpl $18, %ecx
308 ; CHECK-AVX2-NEXT: jl .LBB2_2
309 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
310 ; CHECK-AVX2-NEXT: movl %ecx, 4(%eax)
311 ; CHECK-AVX2-NEXT: .LBB2_2: # %if.end
312 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
313 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
314 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
315 ; CHECK-AVX2-NEXT: cmpl $14, %edx
316 ; CHECK-AVX2-NEXT: jl .LBB2_4
317 ; CHECK-AVX2-NEXT: # %bb.3: # %if.then2
318 ; CHECK-AVX2-NEXT: movl %edx, 12(%eax)
319 ; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3
320 ; CHECK-AVX2-NEXT: movups (%edi), %xmm0
321 ; CHECK-AVX2-NEXT: movups %xmm0, (%esi)
322 ; CHECK-AVX2-NEXT: movl (%eax), %edx
323 ; CHECK-AVX2-NEXT: movl %edx, (%ecx)
324 ; CHECK-AVX2-NEXT: movl 4(%eax), %edx
325 ; CHECK-AVX2-NEXT: movl %edx, 4(%ecx)
326 ; CHECK-AVX2-NEXT: movl 8(%eax), %edx
327 ; CHECK-AVX2-NEXT: movl %edx, 8(%ecx)
328 ; CHECK-AVX2-NEXT: movl 12(%eax), %eax
329 ; CHECK-AVX2-NEXT: movl %eax, 12(%ecx)
330 ; CHECK-AVX2-NEXT: popl %esi
331 ; CHECK-AVX2-NEXT: popl %edi
332 ; CHECK-AVX2-NEXT: retl
333 ;
334 ; CHECK-AVX512-LABEL: test_nondirect_br:
335 ; CHECK-AVX512: # %bb.0: # %entry
336 ; CHECK-AVX512-NEXT: pushl %edi
337 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
338 ; CHECK-AVX512-NEXT: pushl %esi
339 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
340 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -12
341 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -8
342 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
343 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
344 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
345 ; CHECK-AVX512-NEXT: cmpl $18, %ecx
346 ; CHECK-AVX512-NEXT: jl .LBB2_2
347 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
348 ; CHECK-AVX512-NEXT: movl %ecx, 4(%eax)
349 ; CHECK-AVX512-NEXT: .LBB2_2: # %if.end
350 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
351 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
352 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
353 ; CHECK-AVX512-NEXT: cmpl $14, %edx
354 ; CHECK-AVX512-NEXT: jl .LBB2_4
355 ; CHECK-AVX512-NEXT: # %bb.3: # %if.then2
356 ; CHECK-AVX512-NEXT: movl %edx, 12(%eax)
357 ; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3
358 ; CHECK-AVX512-NEXT: vmovups (%edi), %xmm0
359 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%esi)
360 ; CHECK-AVX512-NEXT: movl (%eax), %edx
361 ; CHECK-AVX512-NEXT: movl %edx, (%ecx)
362 ; CHECK-AVX512-NEXT: movl 4(%eax), %edx
363 ; CHECK-AVX512-NEXT: movl %edx, 4(%ecx)
364 ; CHECK-AVX512-NEXT: movl 8(%eax), %edx
365 ; CHECK-AVX512-NEXT: movl %edx, 8(%ecx)
366 ; CHECK-AVX512-NEXT: movl 12(%eax), %eax
367 ; CHECK-AVX512-NEXT: movl %eax, 12(%ecx)
368 ; CHECK-AVX512-NEXT: popl %esi
369 ; CHECK-AVX512-NEXT: popl %edi
370 ; CHECK-AVX512-NEXT: retl
371 entry:
372 %cmp = icmp sgt i32 %x, 17
373 br i1 %cmp, label %if.then, label %if.end
374
375 if.then: ; preds = %entry
376 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
377 store i32 %x, i32* %b, align 4
378 br label %if.end
379
380 if.end: ; preds = %if.then, %entry
381 %cmp1 = icmp sgt i32 %x2, 13
382 br i1 %cmp1, label %if.then2, label %if.end3
383
384 if.then2: ; preds = %if.end
385 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
386 store i32 %x2, i32* %d, align 4
387 br label %if.end3
388
389 if.end3: ; preds = %if.then2, %if.end
390 %0 = bitcast %struct.S* %s3 to i8*
391 %1 = bitcast %struct.S* %s4 to i8*
392 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
393 %2 = bitcast %struct.S* %s2 to i8*
394 %3 = bitcast %struct.S* %s1 to i8*
395 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
396 ret void
397 }
398
399 ; Function Attrs: nounwind uwtable
400 define void @test_2preds_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
401 ; CHECK-LABEL: test_2preds_block:
402 ; CHECK: # %bb.0: # %entry
403 ; CHECK-NEXT: pushl %ebx
404 ; CHECK-NEXT: .cfi_def_cfa_offset 8
405 ; CHECK-NEXT: pushl %edi
406 ; CHECK-NEXT: .cfi_def_cfa_offset 12
407 ; CHECK-NEXT: pushl %esi
408 ; CHECK-NEXT: .cfi_def_cfa_offset 16
409 ; CHECK-NEXT: .cfi_offset %esi, -16
410 ; CHECK-NEXT: .cfi_offset %edi, -12
411 ; CHECK-NEXT: .cfi_offset %ebx, -8
412 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
413 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
414 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
415 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
416 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
417 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
418 ; CHECK-NEXT: movl %ebx, 12(%ecx)
419 ; CHECK-NEXT: cmpl $18, %edi
420 ; CHECK-NEXT: jl .LBB3_2
421 ; CHECK-NEXT: # %bb.1: # %if.then
422 ; CHECK-NEXT: movl %edi, 4(%ecx)
423 ; CHECK-NEXT: .LBB3_2: # %if.end
424 ; CHECK-NEXT: movups (%esi), %xmm0
425 ; CHECK-NEXT: movups %xmm0, (%edx)
426 ; CHECK-NEXT: movl (%ecx), %edx
427 ; CHECK-NEXT: movl %edx, (%eax)
428 ; CHECK-NEXT: movl 4(%ecx), %edx
429 ; CHECK-NEXT: movl %edx, 4(%eax)
430 ; CHECK-NEXT: movl 8(%ecx), %edx
431 ; CHECK-NEXT: movl %edx, 8(%eax)
432 ; CHECK-NEXT: movl 12(%ecx), %ecx
433 ; CHECK-NEXT: movl %ecx, 12(%eax)
434 ; CHECK-NEXT: popl %esi
435 ; CHECK-NEXT: popl %edi
436 ; CHECK-NEXT: popl %ebx
437 ; CHECK-NEXT: retl
438 ;
439 ; DISABLED-LABEL: test_2preds_block:
440 ; DISABLED: # %bb.0: # %entry
441 ; DISABLED-NEXT: pushl %ebx
442 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
443 ; DISABLED-NEXT: pushl %edi
444 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
445 ; DISABLED-NEXT: pushl %esi
446 ; DISABLED-NEXT: .cfi_def_cfa_offset 16
447 ; DISABLED-NEXT: .cfi_offset %esi, -16
448 ; DISABLED-NEXT: .cfi_offset %edi, -12
449 ; DISABLED-NEXT: .cfi_offset %ebx, -8
450 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
451 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
452 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
453 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
454 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
455 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx
456 ; DISABLED-NEXT: movl %ebx, 12(%esi)
457 ; DISABLED-NEXT: cmpl $18, %edi
458 ; DISABLED-NEXT: jl .LBB3_2
459 ; DISABLED-NEXT: # %bb.1: # %if.then
460 ; DISABLED-NEXT: movl %edi, 4(%esi)
461 ; DISABLED-NEXT: .LBB3_2: # %if.end
462 ; DISABLED-NEXT: movups (%edx), %xmm0
463 ; DISABLED-NEXT: movups %xmm0, (%ecx)
464 ; DISABLED-NEXT: movups (%esi), %xmm0
465 ; DISABLED-NEXT: movups %xmm0, (%eax)
466 ; DISABLED-NEXT: popl %esi
467 ; DISABLED-NEXT: popl %edi
468 ; DISABLED-NEXT: popl %ebx
469 ; DISABLED-NEXT: retl
470 ;
471 ; CHECK-AVX2-LABEL: test_2preds_block:
472 ; CHECK-AVX2: # %bb.0: # %entry
473 ; CHECK-AVX2-NEXT: pushl %ebx
474 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
475 ; CHECK-AVX2-NEXT: pushl %edi
476 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
477 ; CHECK-AVX2-NEXT: pushl %esi
478 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
479 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -16
480 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -12
481 ; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8
482 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
483 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
484 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
485 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
486 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
487 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx
488 ; CHECK-AVX2-NEXT: movl %ebx, 12(%ecx)
489 ; CHECK-AVX2-NEXT: cmpl $18, %edi
490 ; CHECK-AVX2-NEXT: jl .LBB3_2
491 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
492 ; CHECK-AVX2-NEXT: movl %edi, 4(%ecx)
493 ; CHECK-AVX2-NEXT: .LBB3_2: # %if.end
494 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
495 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
496 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
497 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
498 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
499 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
500 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edx
501 ; CHECK-AVX2-NEXT: movl %edx, 8(%eax)
502 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx
503 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
504 ; CHECK-AVX2-NEXT: popl %esi
505 ; CHECK-AVX2-NEXT: popl %edi
506 ; CHECK-AVX2-NEXT: popl %ebx
507 ; CHECK-AVX2-NEXT: retl
508 ;
509 ; CHECK-AVX512-LABEL: test_2preds_block:
510 ; CHECK-AVX512: # %bb.0: # %entry
511 ; CHECK-AVX512-NEXT: pushl %ebx
512 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
513 ; CHECK-AVX512-NEXT: pushl %edi
514 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
515 ; CHECK-AVX512-NEXT: pushl %esi
516 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
517 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -16
518 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -12
519 ; CHECK-AVX512-NEXT: .cfi_offset %ebx, -8
520 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
521 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
522 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
523 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
524 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
525 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx
526 ; CHECK-AVX512-NEXT: movl %ebx, 12(%ecx)
527 ; CHECK-AVX512-NEXT: cmpl $18, %edi
528 ; CHECK-AVX512-NEXT: jl .LBB3_2
529 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
530 ; CHECK-AVX512-NEXT: movl %edi, 4(%ecx)
531 ; CHECK-AVX512-NEXT: .LBB3_2: # %if.end
532 ; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0
533 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx)
534 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
535 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
536 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
537 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
538 ; CHECK-AVX512-NEXT: movl 8(%ecx), %edx
539 ; CHECK-AVX512-NEXT: movl %edx, 8(%eax)
540 ; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx
541 ; CHECK-AVX512-NEXT: movl %ecx, 12(%eax)
542 ; CHECK-AVX512-NEXT: popl %esi
543 ; CHECK-AVX512-NEXT: popl %edi
544 ; CHECK-AVX512-NEXT: popl %ebx
545 ; CHECK-AVX512-NEXT: retl
546 entry:
547 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
548 store i32 %x2, i32* %d, align 4
549 %cmp = icmp sgt i32 %x, 17
550 br i1 %cmp, label %if.then, label %if.end
551
552 if.then: ; preds = %entry
553 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
554 store i32 %x, i32* %b, align 4
555 br label %if.end
556
557 if.end: ; preds = %if.then, %entry
558 %0 = bitcast %struct.S* %s3 to i8*
559 %1 = bitcast %struct.S* %s4 to i8*
560 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
561 %2 = bitcast %struct.S* %s2 to i8*
562 %3 = bitcast %struct.S* %s1 to i8*
563 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
564 ret void
565 }
566 %struct.S2 = type { i64, i64 }
567
568 ; Function Attrs: nounwind uwtable
569 define void @test_type64(%struct.S2* nocapture %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
570 ; CHECK-LABEL: test_type64:
571 ; CHECK: # %bb.0: # %entry
572 ; CHECK-NEXT: pushl %edi
573 ; CHECK-NEXT: .cfi_def_cfa_offset 8
574 ; CHECK-NEXT: pushl %esi
575 ; CHECK-NEXT: .cfi_def_cfa_offset 12
576 ; CHECK-NEXT: .cfi_offset %esi, -12
577 ; CHECK-NEXT: .cfi_offset %edi, -8
578 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
579 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
580 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
581 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
582 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
583 ; CHECK-NEXT: cmpl $18, %edi
584 ; CHECK-NEXT: jl .LBB4_2
585 ; CHECK-NEXT: # %bb.1: # %if.then
586 ; CHECK-NEXT: movl %edi, 8(%ecx)
587 ; CHECK-NEXT: sarl $31, %edi
588 ; CHECK-NEXT: movl %edi, 12(%ecx)
589 ; CHECK-NEXT: .LBB4_2: # %if.end
590 ; CHECK-NEXT: movups (%esi), %xmm0
591 ; CHECK-NEXT: movups %xmm0, (%edx)
592 ; CHECK-NEXT: movl (%ecx), %edx
593 ; CHECK-NEXT: movl %edx, (%eax)
594 ; CHECK-NEXT: movl 4(%ecx), %edx
595 ; CHECK-NEXT: movl %edx, 4(%eax)
596 ; CHECK-NEXT: movl 8(%ecx), %edx
597 ; CHECK-NEXT: movl %edx, 8(%eax)
598 ; CHECK-NEXT: movl 12(%ecx), %ecx
599 ; CHECK-NEXT: movl %ecx, 12(%eax)
600 ; CHECK-NEXT: popl %esi
601 ; CHECK-NEXT: popl %edi
602 ; CHECK-NEXT: retl
603 ;
604 ; DISABLED-LABEL: test_type64:
605 ; DISABLED: # %bb.0: # %entry
606 ; DISABLED-NEXT: pushl %edi
607 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
608 ; DISABLED-NEXT: pushl %esi
609 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
610 ; DISABLED-NEXT: .cfi_offset %esi, -12
611 ; DISABLED-NEXT: .cfi_offset %edi, -8
612 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
613 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
614 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
615 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
616 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
617 ; DISABLED-NEXT: cmpl $18, %edi
618 ; DISABLED-NEXT: jl .LBB4_2
619 ; DISABLED-NEXT: # %bb.1: # %if.then
620 ; DISABLED-NEXT: movl %edi, 8(%esi)
621 ; DISABLED-NEXT: sarl $31, %edi
622 ; DISABLED-NEXT: movl %edi, 12(%esi)
623 ; DISABLED-NEXT: .LBB4_2: # %if.end
624 ; DISABLED-NEXT: movups (%edx), %xmm0
625 ; DISABLED-NEXT: movups %xmm0, (%ecx)
626 ; DISABLED-NEXT: movups (%esi), %xmm0
627 ; DISABLED-NEXT: movups %xmm0, (%eax)
628 ; DISABLED-NEXT: popl %esi
629 ; DISABLED-NEXT: popl %edi
630 ; DISABLED-NEXT: retl
631 ;
632 ; CHECK-AVX2-LABEL: test_type64:
633 ; CHECK-AVX2: # %bb.0: # %entry
634 ; CHECK-AVX2-NEXT: pushl %edi
635 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
636 ; CHECK-AVX2-NEXT: pushl %esi
637 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
638 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -12
639 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -8
640 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
641 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
642 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
643 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
644 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
645 ; CHECK-AVX2-NEXT: cmpl $18, %edi
646 ; CHECK-AVX2-NEXT: jl .LBB4_2
647 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
648 ; CHECK-AVX2-NEXT: movl %edi, 8(%ecx)
649 ; CHECK-AVX2-NEXT: sarl $31, %edi
650 ; CHECK-AVX2-NEXT: movl %edi, 12(%ecx)
651 ; CHECK-AVX2-NEXT: .LBB4_2: # %if.end
652 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
653 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
654 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
655 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
656 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
657 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
658 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edx
659 ; CHECK-AVX2-NEXT: movl %edx, 8(%eax)
660 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx
661 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
662 ; CHECK-AVX2-NEXT: popl %esi
663 ; CHECK-AVX2-NEXT: popl %edi
664 ; CHECK-AVX2-NEXT: retl
665 ;
666 ; CHECK-AVX512-LABEL: test_type64:
667 ; CHECK-AVX512: # %bb.0: # %entry
668 ; CHECK-AVX512-NEXT: pushl %edi
669 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
670 ; CHECK-AVX512-NEXT: pushl %esi
671 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
672 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -12
673 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -8
674 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
675 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
676 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
677 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
678 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
679 ; CHECK-AVX512-NEXT: cmpl $18, %edi
680 ; CHECK-AVX512-NEXT: jl .LBB4_2
681 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
682 ; CHECK-AVX512-NEXT: movl %edi, 8(%ecx)
683 ; CHECK-AVX512-NEXT: sarl $31, %edi
684 ; CHECK-AVX512-NEXT: movl %edi, 12(%ecx)
685 ; CHECK-AVX512-NEXT: .LBB4_2: # %if.end
686 ; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0
687 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx)
688 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
689 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
690 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
691 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
692 ; CHECK-AVX512-NEXT: movl 8(%ecx), %edx
693 ; CHECK-AVX512-NEXT: movl %edx, 8(%eax)
694 ; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx
695 ; CHECK-AVX512-NEXT: movl %ecx, 12(%eax)
696 ; CHECK-AVX512-NEXT: popl %esi
697 ; CHECK-AVX512-NEXT: popl %edi
698 ; CHECK-AVX512-NEXT: retl
699 entry:
700 %cmp = icmp sgt i32 %x, 17
701 br i1 %cmp, label %if.then, label %if.end
702
703 if.then: ; preds = %entry
704 %conv = sext i32 %x to i64
705 %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
706 store i64 %conv, i64* %b, align 8
707 br label %if.end
708
709 if.end: ; preds = %if.then, %entry
710 %0 = bitcast %struct.S2* %s3 to i8*
711 %1 = bitcast %struct.S2* %s4 to i8*
712 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
713 %2 = bitcast %struct.S2* %s2 to i8*
714 %3 = bitcast %struct.S2* %s1 to i8*
715 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
716 ret void
717 }
718 %struct.S3 = type { i64, i8, i8, i16, i32 }
719
720 ; Function Attrs: noinline nounwind uwtable
721 define void @test_mixed_type(%struct.S3* nocapture %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
722 ; CHECK-LABEL: test_mixed_type:
723 ; CHECK: # %bb.0: # %entry
724 ; CHECK-NEXT: pushl %esi
725 ; CHECK-NEXT: .cfi_def_cfa_offset 8
726 ; CHECK-NEXT: .cfi_offset %esi, -8
727 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
728 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
729 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
730 ; CHECK-NEXT: cmpl $18, %edx
731 ; CHECK-NEXT: jl .LBB5_2
732 ; CHECK-NEXT: # %bb.1: # %if.then
733 ; CHECK-NEXT: movl %edx, %esi
734 ; CHECK-NEXT: sarl $31, %esi
735 ; CHECK-NEXT: movl %edx, (%ecx)
736 ; CHECK-NEXT: movl %esi, 4(%ecx)
737 ; CHECK-NEXT: movb %dl, 8(%ecx)
738 ; CHECK-NEXT: .LBB5_2: # %if.end
739 ; CHECK-NEXT: movl (%ecx), %edx
740 ; CHECK-NEXT: movl %edx, (%eax)
741 ; CHECK-NEXT: movl 4(%ecx), %edx
742 ; CHECK-NEXT: movl %edx, 4(%eax)
743 ; CHECK-NEXT: movb 8(%ecx), %dl
744 ; CHECK-NEXT: movb %dl, 8(%eax)
745 ; CHECK-NEXT: movl 9(%ecx), %edx
746 ; CHECK-NEXT: movl %edx, 9(%eax)
747 ; CHECK-NEXT: movzwl 13(%ecx), %edx
748 ; CHECK-NEXT: movw %dx, 13(%eax)
749 ; CHECK-NEXT: movb 15(%ecx), %cl
750 ; CHECK-NEXT: movb %cl, 15(%eax)
751 ; CHECK-NEXT: popl %esi
752 ; CHECK-NEXT: retl
753 ;
754 ; DISABLED-LABEL: test_mixed_type:
755 ; DISABLED: # %bb.0: # %entry
756 ; DISABLED-NEXT: pushl %esi
757 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
758 ; DISABLED-NEXT: .cfi_offset %esi, -8
759 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
760 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
761 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
762 ; DISABLED-NEXT: cmpl $18, %edx
763 ; DISABLED-NEXT: jl .LBB5_2
764 ; DISABLED-NEXT: # %bb.1: # %if.then
765 ; DISABLED-NEXT: movl %edx, %esi
766 ; DISABLED-NEXT: sarl $31, %esi
767 ; DISABLED-NEXT: movl %edx, (%ecx)
768 ; DISABLED-NEXT: movl %esi, 4(%ecx)
769 ; DISABLED-NEXT: movb %dl, 8(%ecx)
770 ; DISABLED-NEXT: .LBB5_2: # %if.end
771 ; DISABLED-NEXT: movups (%ecx), %xmm0
772 ; DISABLED-NEXT: movups %xmm0, (%eax)
773 ; DISABLED-NEXT: popl %esi
774 ; DISABLED-NEXT: retl
775 ;
776 ; CHECK-AVX2-LABEL: test_mixed_type:
777 ; CHECK-AVX2: # %bb.0: # %entry
778 ; CHECK-AVX2-NEXT: pushl %esi
779 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
780 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -8
781 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
782 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
783 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
784 ; CHECK-AVX2-NEXT: cmpl $18, %edx
785 ; CHECK-AVX2-NEXT: jl .LBB5_2
786 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
787 ; CHECK-AVX2-NEXT: movl %edx, %esi
788 ; CHECK-AVX2-NEXT: sarl $31, %esi
789 ; CHECK-AVX2-NEXT: movl %edx, (%ecx)
790 ; CHECK-AVX2-NEXT: movl %esi, 4(%ecx)
791 ; CHECK-AVX2-NEXT: movb %dl, 8(%ecx)
792 ; CHECK-AVX2-NEXT: .LBB5_2: # %if.end
793 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
794 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
795 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
796 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
797 ; CHECK-AVX2-NEXT: movb 8(%ecx), %dl
798 ; CHECK-AVX2-NEXT: movb %dl, 8(%eax)
799 ; CHECK-AVX2-NEXT: movl 9(%ecx), %edx
800 ; CHECK-AVX2-NEXT: movl %edx, 9(%eax)
801 ; CHECK-AVX2-NEXT: movzwl 13(%ecx), %edx
802 ; CHECK-AVX2-NEXT: movw %dx, 13(%eax)
803 ; CHECK-AVX2-NEXT: movb 15(%ecx), %cl
804 ; CHECK-AVX2-NEXT: movb %cl, 15(%eax)
805 ; CHECK-AVX2-NEXT: popl %esi
806 ; CHECK-AVX2-NEXT: retl
807 ;
808 ; CHECK-AVX512-LABEL: test_mixed_type:
809 ; CHECK-AVX512: # %bb.0: # %entry
810 ; CHECK-AVX512-NEXT: pushl %esi
811 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
812 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -8
813 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
814 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
815 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
816 ; CHECK-AVX512-NEXT: cmpl $18, %edx
817 ; CHECK-AVX512-NEXT: jl .LBB5_2
818 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
819 ; CHECK-AVX512-NEXT: movl %edx, %esi
820 ; CHECK-AVX512-NEXT: sarl $31, %esi
821 ; CHECK-AVX512-NEXT: movl %edx, (%ecx)
822 ; CHECK-AVX512-NEXT: movl %esi, 4(%ecx)
823 ; CHECK-AVX512-NEXT: movb %dl, 8(%ecx)
824 ; CHECK-AVX512-NEXT: .LBB5_2: # %if.end
825 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
826 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
827 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
828 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
829 ; CHECK-AVX512-NEXT: movb 8(%ecx), %dl
830 ; CHECK-AVX512-NEXT: movb %dl, 8(%eax)
831 ; CHECK-AVX512-NEXT: movl 9(%ecx), %edx
832 ; CHECK-AVX512-NEXT: movl %edx, 9(%eax)
833 ; CHECK-AVX512-NEXT: movzwl 13(%ecx), %edx
834 ; CHECK-AVX512-NEXT: movw %dx, 13(%eax)
835 ; CHECK-AVX512-NEXT: movb 15(%ecx), %cl
836 ; CHECK-AVX512-NEXT: movb %cl, 15(%eax)
837 ; CHECK-AVX512-NEXT: popl %esi
838 ; CHECK-AVX512-NEXT: retl
839 entry:
840 %cmp = icmp sgt i32 %x, 17
841 br i1 %cmp, label %if.then, label %if.end
842
843 if.then: ; preds = %entry
844 %conv = sext i32 %x to i64
845 %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
846 store i64 %conv, i64* %a, align 8
847 %conv1 = trunc i32 %x to i8
848 %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
849 store i8 %conv1, i8* %b, align 8
850 br label %if.end
851
852 if.end: ; preds = %if.then, %entry
853 %0 = bitcast %struct.S3* %s2 to i8*
854 %1 = bitcast %struct.S3* %s1 to i8*
855 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
856 ret void
857 }
858 %struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
859
860 ; Function Attrs: nounwind uwtable
861 define void @test_multiple_blocks(%struct.S4* nocapture %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
862 ; CHECK-LABEL: test_multiple_blocks:
863 ; CHECK: # %bb.0: # %entry
864 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
865 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
866 ; CHECK-NEXT: movl $0, 4(%ecx)
867 ; CHECK-NEXT: movl $0, 36(%ecx)
868 ; CHECK-NEXT: movups 16(%ecx), %xmm0
869 ; CHECK-NEXT: movups %xmm0, 16(%eax)
870 ; CHECK-NEXT: movl 32(%ecx), %edx
871 ; CHECK-NEXT: movl %edx, 32(%eax)
872 ; CHECK-NEXT: movl 36(%ecx), %edx
873 ; CHECK-NEXT: movl %edx, 36(%eax)
874 ; CHECK-NEXT: movl 40(%ecx), %edx
875 ; CHECK-NEXT: movl %edx, 40(%eax)
876 ; CHECK-NEXT: movl 44(%ecx), %edx
877 ; CHECK-NEXT: movl %edx, 44(%eax)
878 ; CHECK-NEXT: movl (%ecx), %edx
879 ; CHECK-NEXT: movl %edx, (%eax)
880 ; CHECK-NEXT: movl 4(%ecx), %edx
881 ; CHECK-NEXT: movl %edx, 4(%eax)
882 ; CHECK-NEXT: movl 8(%ecx), %edx
883 ; CHECK-NEXT: movl %edx, 8(%eax)
884 ; CHECK-NEXT: movl 12(%ecx), %ecx
885 ; CHECK-NEXT: movl %ecx, 12(%eax)
886 ; CHECK-NEXT: retl
887 ;
888 ; DISABLED-LABEL: test_multiple_blocks:
889 ; DISABLED: # %bb.0: # %entry
890 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
891 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
892 ; DISABLED-NEXT: movl $0, 4(%ecx)
893 ; DISABLED-NEXT: movl $0, 36(%ecx)
894 ; DISABLED-NEXT: movups 16(%ecx), %xmm0
895 ; DISABLED-NEXT: movups %xmm0, 16(%eax)
896 ; DISABLED-NEXT: movups 32(%ecx), %xmm0
897 ; DISABLED-NEXT: movups %xmm0, 32(%eax)
898 ; DISABLED-NEXT: movups (%ecx), %xmm0
899 ; DISABLED-NEXT: movups %xmm0, (%eax)
900 ; DISABLED-NEXT: retl
901 ;
902 ; CHECK-AVX2-LABEL: test_multiple_blocks:
903 ; CHECK-AVX2: # %bb.0: # %entry
904 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
905 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
906 ; CHECK-AVX2-NEXT: movl $0, 4(%ecx)
907 ; CHECK-AVX2-NEXT: movl $0, 36(%ecx)
908 ; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0
909 ; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax)
910 ; CHECK-AVX2-NEXT: movl 32(%ecx), %edx
911 ; CHECK-AVX2-NEXT: movl %edx, 32(%eax)
912 ; CHECK-AVX2-NEXT: movl 36(%ecx), %edx
913 ; CHECK-AVX2-NEXT: movl %edx, 36(%eax)
914 ; CHECK-AVX2-NEXT: movl 40(%ecx), %edx
915 ; CHECK-AVX2-NEXT: movl %edx, 40(%eax)
916 ; CHECK-AVX2-NEXT: movl 44(%ecx), %edx
917 ; CHECK-AVX2-NEXT: movl %edx, 44(%eax)
918 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
919 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
920 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
921 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
922 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edx
923 ; CHECK-AVX2-NEXT: movl %edx, 8(%eax)
924 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx
925 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
926 ; CHECK-AVX2-NEXT: retl
927 ;
928 ; CHECK-AVX512-LABEL: test_multiple_blocks:
929 ; CHECK-AVX512: # %bb.0: # %entry
930 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
931 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
932 ; CHECK-AVX512-NEXT: movl $0, 4(%ecx)
933 ; CHECK-AVX512-NEXT: movl $0, 36(%ecx)
934 ; CHECK-AVX512-NEXT: vmovups 16(%ecx), %xmm0
935 ; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%eax)
936 ; CHECK-AVX512-NEXT: movl 32(%ecx), %edx
937 ; CHECK-AVX512-NEXT: movl %edx, 32(%eax)
938 ; CHECK-AVX512-NEXT: movl 36(%ecx), %edx
939 ; CHECK-AVX512-NEXT: movl %edx, 36(%eax)
940 ; CHECK-AVX512-NEXT: movl 40(%ecx), %edx
941 ; CHECK-AVX512-NEXT: movl %edx, 40(%eax)
942 ; CHECK-AVX512-NEXT: movl 44(%ecx), %edx
943 ; CHECK-AVX512-NEXT: movl %edx, 44(%eax)
944 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
945 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
946 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
947 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
948 ; CHECK-AVX512-NEXT: vmovups 8(%ecx), %xmm0
949 ; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%eax)
950 ; CHECK-AVX512-NEXT: movl 24(%ecx), %edx
951 ; CHECK-AVX512-NEXT: movl %edx, 24(%eax)
952 ; CHECK-AVX512-NEXT: movl 28(%ecx), %ecx
953 ; CHECK-AVX512-NEXT: movl %ecx, 28(%eax)
954 ; CHECK-AVX512-NEXT: retl
955 entry:
956 %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
957 store i32 0, i32* %b, align 4
958 %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
959 store i32 0, i32* %b3, align 4
960 %0 = bitcast %struct.S4* %s2 to i8*
961 %1 = bitcast %struct.S4* %s1 to i8*
962 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
963 ret void
964 }
965 %struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
966
967 ; Function Attrs: nounwind uwtable
968 define void @test_type16(%struct.S5* nocapture %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
969 ; CHECK-LABEL: test_type16:
970 ; CHECK: # %bb.0: # %entry
971 ; CHECK-NEXT: pushl %edi
972 ; CHECK-NEXT: .cfi_def_cfa_offset 8
973 ; CHECK-NEXT: pushl %esi
974 ; CHECK-NEXT: .cfi_def_cfa_offset 12
975 ; CHECK-NEXT: .cfi_offset %esi, -12
976 ; CHECK-NEXT: .cfi_offset %edi, -8
977 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
978 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
979 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
980 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
981 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
982 ; CHECK-NEXT: cmpl $18, %edi
983 ; CHECK-NEXT: jl .LBB7_2
984 ; CHECK-NEXT: # %bb.1: # %if.then
985 ; CHECK-NEXT: movw %di, 2(%ecx)
986 ; CHECK-NEXT: .LBB7_2: # %if.end
987 ; CHECK-NEXT: movups (%esi), %xmm0
988 ; CHECK-NEXT: movups %xmm0, (%edx)
989 ; CHECK-NEXT: movzwl (%ecx), %edx
990 ; CHECK-NEXT: movw %dx, (%eax)
991 ; CHECK-NEXT: movzwl 2(%ecx), %edx
992 ; CHECK-NEXT: movw %dx, 2(%eax)
993 ; CHECK-NEXT: movl 4(%ecx), %edx
994 ; CHECK-NEXT: movl %edx, 4(%eax)
995 ; CHECK-NEXT: movl 8(%ecx), %edx
996 ; CHECK-NEXT: movl %edx, 8(%eax)
997 ; CHECK-NEXT: movl 12(%ecx), %ecx
998 ; CHECK-NEXT: movl %ecx, 12(%eax)
999 ; CHECK-NEXT: popl %esi
1000 ; CHECK-NEXT: popl %edi
1001 ; CHECK-NEXT: retl
1002 ;
1003 ; DISABLED-LABEL: test_type16:
1004 ; DISABLED: # %bb.0: # %entry
1005 ; DISABLED-NEXT: pushl %edi
1006 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1007 ; DISABLED-NEXT: pushl %esi
1008 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
1009 ; DISABLED-NEXT: .cfi_offset %esi, -12
1010 ; DISABLED-NEXT: .cfi_offset %edi, -8
1011 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
1012 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
1013 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
1014 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1015 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
1016 ; DISABLED-NEXT: cmpl $18, %edi
1017 ; DISABLED-NEXT: jl .LBB7_2
1018 ; DISABLED-NEXT: # %bb.1: # %if.then
1019 ; DISABLED-NEXT: movw %di, 2(%esi)
1020 ; DISABLED-NEXT: .LBB7_2: # %if.end
1021 ; DISABLED-NEXT: movups (%edx), %xmm0
1022 ; DISABLED-NEXT: movups %xmm0, (%ecx)
1023 ; DISABLED-NEXT: movups (%esi), %xmm0
1024 ; DISABLED-NEXT: movups %xmm0, (%eax)
1025 ; DISABLED-NEXT: popl %esi
1026 ; DISABLED-NEXT: popl %edi
1027 ; DISABLED-NEXT: retl
1028 ;
1029 ; CHECK-AVX2-LABEL: test_type16:
1030 ; CHECK-AVX2: # %bb.0: # %entry
1031 ; CHECK-AVX2-NEXT: pushl %edi
1032 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1033 ; CHECK-AVX2-NEXT: pushl %esi
1034 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
1035 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -12
1036 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -8
1037 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
1038 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
1039 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
1040 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1041 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1042 ; CHECK-AVX2-NEXT: cmpl $18, %edi
1043 ; CHECK-AVX2-NEXT: jl .LBB7_2
1044 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1045 ; CHECK-AVX2-NEXT: movw %di, 2(%ecx)
1046 ; CHECK-AVX2-NEXT: .LBB7_2: # %if.end
1047 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
1048 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
1049 ; CHECK-AVX2-NEXT: movzwl (%ecx), %edx
1050 ; CHECK-AVX2-NEXT: movw %dx, (%eax)
1051 ; CHECK-AVX2-NEXT: movzwl 2(%ecx), %edx
1052 ; CHECK-AVX2-NEXT: movw %dx, 2(%eax)
1053 ; CHECK-AVX2-NEXT: movl 4(%ecx), %edx
1054 ; CHECK-AVX2-NEXT: movl %edx, 4(%eax)
1055 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edx
1056 ; CHECK-AVX2-NEXT: movl %edx, 8(%eax)
1057 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ecx
1058 ; CHECK-AVX2-NEXT: movl %ecx, 12(%eax)
1059 ; CHECK-AVX2-NEXT: popl %esi
1060 ; CHECK-AVX2-NEXT: popl %edi
1061 ; CHECK-AVX2-NEXT: retl
1062 ;
1063 ; CHECK-AVX512-LABEL: test_type16:
1064 ; CHECK-AVX512: # %bb.0: # %entry
1065 ; CHECK-AVX512-NEXT: pushl %edi
1066 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1067 ; CHECK-AVX512-NEXT: pushl %esi
1068 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
1069 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -12
1070 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -8
1071 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
1072 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
1073 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
1074 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1075 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1076 ; CHECK-AVX512-NEXT: cmpl $18, %edi
1077 ; CHECK-AVX512-NEXT: jl .LBB7_2
1078 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1079 ; CHECK-AVX512-NEXT: movw %di, 2(%ecx)
1080 ; CHECK-AVX512-NEXT: .LBB7_2: # %if.end
1081 ; CHECK-AVX512-NEXT: vmovups (%esi), %xmm0
1082 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%edx)
1083 ; CHECK-AVX512-NEXT: movzwl (%ecx), %edx
1084 ; CHECK-AVX512-NEXT: movw %dx, (%eax)
1085 ; CHECK-AVX512-NEXT: movzwl 2(%ecx), %edx
1086 ; CHECK-AVX512-NEXT: movw %dx, 2(%eax)
1087 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
1088 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
1089 ; CHECK-AVX512-NEXT: movl 8(%ecx), %edx
1090 ; CHECK-AVX512-NEXT: movl %edx, 8(%eax)
1091 ; CHECK-AVX512-NEXT: movl 12(%ecx), %ecx
1092 ; CHECK-AVX512-NEXT: movl %ecx, 12(%eax)
1093 ; CHECK-AVX512-NEXT: popl %esi
1094 ; CHECK-AVX512-NEXT: popl %edi
1095 ; CHECK-AVX512-NEXT: retl
1096 entry:
1097 %cmp = icmp sgt i32 %x, 17
1098 br i1 %cmp, label %if.then, label %if.end
1099
1100 if.then: ; preds = %entry
1101 %conv = trunc i32 %x to i16
1102 %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
1103 store i16 %conv, i16* %b, align 2
1104 br label %if.end
1105
1106 if.end: ; preds = %if.then, %entry
1107 %0 = bitcast %struct.S5* %s3 to i8*
1108 %1 = bitcast %struct.S5* %s4 to i8*
1109 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
1110 %2 = bitcast %struct.S5* %s2 to i8*
1111 %3 = bitcast %struct.S5* %s1 to i8*
1112 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
1113 ret void
1114 }
1115
1116 %struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
1117
1118 ; Function Attrs: nounwind uwtable
1119 define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
1120 ; CHECK-LABEL: test_stack:
1121 ; CHECK: # %bb.0: # %entry
1122 ; CHECK-NEXT: pushl %eax
1123 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1124 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1125 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
1126 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1127 ; CHECK-NEXT: movups {{[0-9]+}}(%esp), %xmm0
1128 ; CHECK-NEXT: movups %xmm0, (%eax)
1129 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1130 ; CHECK-NEXT: movl %ecx, 16(%eax)
1131 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1132 ; CHECK-NEXT: movl %ecx, 20(%eax)
1133 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1134 ; CHECK-NEXT: movl %ecx, 24(%eax)
1135 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1136 ; CHECK-NEXT: movl %ecx, 28(%eax)
1137 ; CHECK-NEXT: popl %ecx
1138 ; CHECK-NEXT: retl $4
1139 ;
1140 ; DISABLED-LABEL: test_stack:
1141 ; DISABLED: # %bb.0: # %entry
1142 ; DISABLED-NEXT: pushl %eax
1143 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1144 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1145 ; DISABLED-NEXT: movl %eax, {{[0-9]+}}(%esp)
1146 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1147 ; DISABLED-NEXT: movups {{[0-9]+}}(%esp), %xmm0
1148 ; DISABLED-NEXT: movups %xmm0, (%eax)
1149 ; DISABLED-NEXT: movups {{[0-9]+}}(%esp), %xmm0
1150 ; DISABLED-NEXT: movups %xmm0, 16(%eax)
1151 ; DISABLED-NEXT: popl %ecx
1152 ; DISABLED-NEXT: retl $4
1153 ;
1154 ; CHECK-AVX2-LABEL: test_stack:
1155 ; CHECK-AVX2: # %bb.0: # %entry
1156 ; CHECK-AVX2-NEXT: pushl %eax
1157 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1158 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1159 ; CHECK-AVX2-NEXT: movl %eax, {{[0-9]+}}(%esp)
1160 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1161 ; CHECK-AVX2-NEXT: movups {{[0-9]+}}(%esp), %xmm0
1162 ; CHECK-AVX2-NEXT: movups %xmm0, (%eax)
1163 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1164 ; CHECK-AVX2-NEXT: movl %ecx, 16(%eax)
1165 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1166 ; CHECK-AVX2-NEXT: movl %ecx, 20(%eax)
1167 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1168 ; CHECK-AVX2-NEXT: movl %ecx, 24(%eax)
1169 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1170 ; CHECK-AVX2-NEXT: movl %ecx, 28(%eax)
1171 ; CHECK-AVX2-NEXT: popl %ecx
1172 ; CHECK-AVX2-NEXT: retl $4
1173 ;
1174 ; CHECK-AVX512-LABEL: test_stack:
1175 ; CHECK-AVX512: # %bb.0: # %entry
1176 ; CHECK-AVX512-NEXT: pushl %eax
1177 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1178 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1179 ; CHECK-AVX512-NEXT: movl %eax, {{[0-9]+}}(%esp)
1180 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1181 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1182 ; CHECK-AVX512-NEXT: movl %ecx, 16(%eax)
1183 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1184 ; CHECK-AVX512-NEXT: movl %ecx, 20(%eax)
1185 ; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
1186 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1187 ; CHECK-AVX512-NEXT: movl %ecx, 24(%eax)
1188 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%eax)
1189 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1190 ; CHECK-AVX512-NEXT: movl %ecx, 28(%eax)
1191 ; CHECK-AVX512-NEXT: popl %ecx
1192 ; CHECK-AVX512-NEXT: retl $4
1193 entry:
1194 %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
1195 %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
1196 store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
1197 %0 = bitcast %struct.S6* %agg.result to i8*
1198 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
1199 ret void
1200 }
1201
1202 ; Function Attrs: nounwind uwtable
1203 define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
1204 ; CHECK-LABEL: test_limit_all:
1205 ; CHECK: # %bb.0: # %entry
1206 ; CHECK-NEXT: pushl %ebp
1207 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1208 ; CHECK-NEXT: pushl %ebx
1209 ; CHECK-NEXT: .cfi_def_cfa_offset 12
1210 ; CHECK-NEXT: pushl %edi
1211 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1212 ; CHECK-NEXT: pushl %esi
1213 ; CHECK-NEXT: .cfi_def_cfa_offset 20
1214 ; CHECK-NEXT: subl $12, %esp
1215 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1216 ; CHECK-NEXT: .cfi_offset %esi, -20
1217 ; CHECK-NEXT: .cfi_offset %edi, -16
1218 ; CHECK-NEXT: .cfi_offset %ebx, -12
1219 ; CHECK-NEXT: .cfi_offset %ebp, -8
1220 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
1221 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
1222 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
1223 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
1224 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1225 ; CHECK-NEXT: movl %eax, 12(%ebp)
1226 ; CHECK-NEXT: movl %ebp, (%esp)
1227 ; CHECK-NEXT: calll bar
1228 ; CHECK-NEXT: cmpl $18, %esi
1229 ; CHECK-NEXT: jl .LBB9_2
1230 ; CHECK-NEXT: # %bb.1: # %if.then
1231 ; CHECK-NEXT: movl %esi, 4(%ebp)
1232 ; CHECK-NEXT: movl %ebp, (%esp)
1233 ; CHECK-NEXT: calll bar
1234 ; CHECK-NEXT: .LBB9_2: # %if.end
1235 ; CHECK-NEXT: movups (%ebx), %xmm0
1236 ; CHECK-NEXT: movups %xmm0, (%edi)
1237 ; CHECK-NEXT: movups (%ebp), %xmm0
1238 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1239 ; CHECK-NEXT: movups %xmm0, (%eax)
1240 ; CHECK-NEXT: addl $12, %esp
1241 ; CHECK-NEXT: popl %esi
1242 ; CHECK-NEXT: popl %edi
1243 ; CHECK-NEXT: popl %ebx
1244 ; CHECK-NEXT: popl %ebp
1245 ; CHECK-NEXT: retl
1246 ;
1247 ; DISABLED-LABEL: test_limit_all:
1248 ; DISABLED: # %bb.0: # %entry
1249 ; DISABLED-NEXT: pushl %ebp
1250 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1251 ; DISABLED-NEXT: pushl %ebx
1252 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
1253 ; DISABLED-NEXT: pushl %edi
1254 ; DISABLED-NEXT: .cfi_def_cfa_offset 16
1255 ; DISABLED-NEXT: pushl %esi
1256 ; DISABLED-NEXT: .cfi_def_cfa_offset 20
1257 ; DISABLED-NEXT: subl $12, %esp
1258 ; DISABLED-NEXT: .cfi_def_cfa_offset 32
1259 ; DISABLED-NEXT: .cfi_offset %esi, -20
1260 ; DISABLED-NEXT: .cfi_offset %edi, -16
1261 ; DISABLED-NEXT: .cfi_offset %ebx, -12
1262 ; DISABLED-NEXT: .cfi_offset %ebp, -8
1263 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx
1264 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
1265 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
1266 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebp
1267 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1268 ; DISABLED-NEXT: movl %eax, 12(%ebp)
1269 ; DISABLED-NEXT: movl %ebp, (%esp)
1270 ; DISABLED-NEXT: calll bar
1271 ; DISABLED-NEXT: cmpl $18, %esi
1272 ; DISABLED-NEXT: jl .LBB9_2
1273 ; DISABLED-NEXT: # %bb.1: # %if.then
1274 ; DISABLED-NEXT: movl %esi, 4(%ebp)
1275 ; DISABLED-NEXT: movl %ebp, (%esp)
1276 ; DISABLED-NEXT: calll bar
1277 ; DISABLED-NEXT: .LBB9_2: # %if.end
1278 ; DISABLED-NEXT: movups (%ebx), %xmm0
1279 ; DISABLED-NEXT: movups %xmm0, (%edi)
1280 ; DISABLED-NEXT: movups (%ebp), %xmm0
1281 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1282 ; DISABLED-NEXT: movups %xmm0, (%eax)
1283 ; DISABLED-NEXT: addl $12, %esp
1284 ; DISABLED-NEXT: popl %esi
1285 ; DISABLED-NEXT: popl %edi
1286 ; DISABLED-NEXT: popl %ebx
1287 ; DISABLED-NEXT: popl %ebp
1288 ; DISABLED-NEXT: retl
1289 ;
1290 ; CHECK-AVX2-LABEL: test_limit_all:
1291 ; CHECK-AVX2: # %bb.0: # %entry
1292 ; CHECK-AVX2-NEXT: pushl %ebp
1293 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1294 ; CHECK-AVX2-NEXT: pushl %ebx
1295 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
1296 ; CHECK-AVX2-NEXT: pushl %edi
1297 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
1298 ; CHECK-AVX2-NEXT: pushl %esi
1299 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 20
1300 ; CHECK-AVX2-NEXT: subl $12, %esp
1301 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
1302 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -20
1303 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -16
1304 ; CHECK-AVX2-NEXT: .cfi_offset %ebx, -12
1305 ; CHECK-AVX2-NEXT: .cfi_offset %ebp, -8
1306 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx
1307 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
1308 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
1309 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebp
1310 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1311 ; CHECK-AVX2-NEXT: movl %eax, 12(%ebp)
1312 ; CHECK-AVX2-NEXT: movl %ebp, (%esp)
1313 ; CHECK-AVX2-NEXT: calll bar
1314 ; CHECK-AVX2-NEXT: cmpl $18, %esi
1315 ; CHECK-AVX2-NEXT: jl .LBB9_2
1316 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1317 ; CHECK-AVX2-NEXT: movl %esi, 4(%ebp)
1318 ; CHECK-AVX2-NEXT: movl %ebp, (%esp)
1319 ; CHECK-AVX2-NEXT: calll bar
1320 ; CHECK-AVX2-NEXT: .LBB9_2: # %if.end
1321 ; CHECK-AVX2-NEXT: movups (%ebx), %xmm0
1322 ; CHECK-AVX2-NEXT: movups %xmm0, (%edi)
1323 ; CHECK-AVX2-NEXT: movups (%ebp), %xmm0
1324 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1325 ; CHECK-AVX2-NEXT: movups %xmm0, (%eax)
1326 ; CHECK-AVX2-NEXT: addl $12, %esp
1327 ; CHECK-AVX2-NEXT: popl %esi
1328 ; CHECK-AVX2-NEXT: popl %edi
1329 ; CHECK-AVX2-NEXT: popl %ebx
1330 ; CHECK-AVX2-NEXT: popl %ebp
1331 ; CHECK-AVX2-NEXT: retl
1332 ;
1333 ; CHECK-AVX512-LABEL: test_limit_all:
1334 ; CHECK-AVX512: # %bb.0: # %entry
1335 ; CHECK-AVX512-NEXT: pushl %ebp
1336 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1337 ; CHECK-AVX512-NEXT: pushl %ebx
1338 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
1339 ; CHECK-AVX512-NEXT: pushl %edi
1340 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
1341 ; CHECK-AVX512-NEXT: pushl %esi
1342 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 20
1343 ; CHECK-AVX512-NEXT: subl $12, %esp
1344 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
1345 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -20
1346 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -16
1347 ; CHECK-AVX512-NEXT: .cfi_offset %ebx, -12
1348 ; CHECK-AVX512-NEXT: .cfi_offset %ebp, -8
1349 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx
1350 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
1351 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
1352 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebp
1353 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1354 ; CHECK-AVX512-NEXT: movl %eax, 12(%ebp)
1355 ; CHECK-AVX512-NEXT: movl %ebp, (%esp)
1356 ; CHECK-AVX512-NEXT: calll bar
1357 ; CHECK-AVX512-NEXT: cmpl $18, %esi
1358 ; CHECK-AVX512-NEXT: jl .LBB9_2
1359 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1360 ; CHECK-AVX512-NEXT: movl %esi, 4(%ebp)
1361 ; CHECK-AVX512-NEXT: movl %ebp, (%esp)
1362 ; CHECK-AVX512-NEXT: calll bar
1363 ; CHECK-AVX512-NEXT: .LBB9_2: # %if.end
1364 ; CHECK-AVX512-NEXT: vmovups (%ebx), %xmm0
1365 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%edi)
1366 ; CHECK-AVX512-NEXT: vmovups (%ebp), %xmm0
1367 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1368 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%eax)
1369 ; CHECK-AVX512-NEXT: addl $12, %esp
1370 ; CHECK-AVX512-NEXT: popl %esi
1371 ; CHECK-AVX512-NEXT: popl %edi
1372 ; CHECK-AVX512-NEXT: popl %ebx
1373 ; CHECK-AVX512-NEXT: popl %ebp
1374 ; CHECK-AVX512-NEXT: retl
1375 entry:
1376 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
1377 store i32 %x2, i32* %d, align 4
1378 tail call void @bar(%struct.S* %s1) #3
1379 %cmp = icmp sgt i32 %x, 17
1380 br i1 %cmp, label %if.then, label %if.end
1381
1382 if.then: ; preds = %entry
1383 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
1384 store i32 %x, i32* %b, align 4
1385 tail call void @bar(%struct.S* nonnull %s1) #3
1386 br label %if.end
1387
1388 if.end: ; preds = %if.then, %entry
1389 %0 = bitcast %struct.S* %s3 to i8*
1390 %1 = bitcast %struct.S* %s4 to i8*
1391 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1392 %2 = bitcast %struct.S* %s2 to i8*
1393 %3 = bitcast %struct.S* %s1 to i8*
1394 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1395 ret void
1396 }
1397
1398 ; Function Attrs: nounwind uwtable
1399 define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
1400 ; CHECK-LABEL: test_limit_one_pred:
1401 ; CHECK: # %bb.0: # %entry
1402 ; CHECK-NEXT: pushl %ebp
1403 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1404 ; CHECK-NEXT: pushl %ebx
1405 ; CHECK-NEXT: .cfi_def_cfa_offset 12
1406 ; CHECK-NEXT: pushl %edi
1407 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1408 ; CHECK-NEXT: pushl %esi
1409 ; CHECK-NEXT: .cfi_def_cfa_offset 20
1410 ; CHECK-NEXT: subl $12, %esp
1411 ; CHECK-NEXT: .cfi_def_cfa_offset 32
1412 ; CHECK-NEXT: .cfi_offset %esi, -20
1413 ; CHECK-NEXT: .cfi_offset %edi, -16
1414 ; CHECK-NEXT: .cfi_offset %ebx, -12
1415 ; CHECK-NEXT: .cfi_offset %ebp, -8
1416 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
1417 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
1418 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1419 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
1420 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
1421 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1422 ; CHECK-NEXT: movl %ecx, 12(%edi)
1423 ; CHECK-NEXT: cmpl $18, %eax
1424 ; CHECK-NEXT: jl .LBB10_2
1425 ; CHECK-NEXT: # %bb.1: # %if.then
1426 ; CHECK-NEXT: movl %eax, 4(%edi)
1427 ; CHECK-NEXT: movl %edi, (%esp)
1428 ; CHECK-NEXT: calll bar
1429 ; CHECK-NEXT: .LBB10_2: # %if.end
1430 ; CHECK-NEXT: movups (%ebp), %xmm0
1431 ; CHECK-NEXT: movups %xmm0, (%ebx)
1432 ; CHECK-NEXT: movl (%edi), %eax
1433 ; CHECK-NEXT: movl %eax, (%esi)
1434 ; CHECK-NEXT: movl 4(%edi), %eax
1435 ; CHECK-NEXT: movl %eax, 4(%esi)
1436 ; CHECK-NEXT: movl 8(%edi), %eax
1437 ; CHECK-NEXT: movl %eax, 8(%esi)
1438 ; CHECK-NEXT: movl 12(%edi), %eax
1439 ; CHECK-NEXT: movl %eax, 12(%esi)
1440 ; CHECK-NEXT: addl $12, %esp
1441 ; CHECK-NEXT: popl %esi
1442 ; CHECK-NEXT: popl %edi
1443 ; CHECK-NEXT: popl %ebx
1444 ; CHECK-NEXT: popl %ebp
1445 ; CHECK-NEXT: retl
1446 ;
1447 ; DISABLED-LABEL: test_limit_one_pred:
1448 ; DISABLED: # %bb.0: # %entry
1449 ; DISABLED-NEXT: pushl %ebp
1450 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1451 ; DISABLED-NEXT: pushl %ebx
1452 ; DISABLED-NEXT: .cfi_def_cfa_offset 12
1453 ; DISABLED-NEXT: pushl %edi
1454 ; DISABLED-NEXT: .cfi_def_cfa_offset 16
1455 ; DISABLED-NEXT: pushl %esi
1456 ; DISABLED-NEXT: .cfi_def_cfa_offset 20
1457 ; DISABLED-NEXT: subl $12, %esp
1458 ; DISABLED-NEXT: .cfi_def_cfa_offset 32
1459 ; DISABLED-NEXT: .cfi_offset %esi, -20
1460 ; DISABLED-NEXT: .cfi_offset %edi, -16
1461 ; DISABLED-NEXT: .cfi_offset %ebx, -12
1462 ; DISABLED-NEXT: .cfi_offset %ebp, -8
1463 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebx
1464 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edi
1465 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1466 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
1467 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ebp
1468 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
1469 ; DISABLED-NEXT: movl %ecx, 12(%ebp)
1470 ; DISABLED-NEXT: cmpl $18, %eax
1471 ; DISABLED-NEXT: jl .LBB10_2
1472 ; DISABLED-NEXT: # %bb.1: # %if.then
1473 ; DISABLED-NEXT: movl %eax, 4(%ebp)
1474 ; DISABLED-NEXT: movl %ebp, (%esp)
1475 ; DISABLED-NEXT: calll bar
1476 ; DISABLED-NEXT: .LBB10_2: # %if.end
1477 ; DISABLED-NEXT: movups (%ebx), %xmm0
1478 ; DISABLED-NEXT: movups %xmm0, (%edi)
1479 ; DISABLED-NEXT: movups (%ebp), %xmm0
1480 ; DISABLED-NEXT: movups %xmm0, (%esi)
1481 ; DISABLED-NEXT: addl $12, %esp
1482 ; DISABLED-NEXT: popl %esi
1483 ; DISABLED-NEXT: popl %edi
1484 ; DISABLED-NEXT: popl %ebx
1485 ; DISABLED-NEXT: popl %ebp
1486 ; DISABLED-NEXT: retl
1487 ;
1488 ; CHECK-AVX2-LABEL: test_limit_one_pred:
1489 ; CHECK-AVX2: # %bb.0: # %entry
1490 ; CHECK-AVX2-NEXT: pushl %ebp
1491 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1492 ; CHECK-AVX2-NEXT: pushl %ebx
1493 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
1494 ; CHECK-AVX2-NEXT: pushl %edi
1495 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
1496 ; CHECK-AVX2-NEXT: pushl %esi
1497 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 20
1498 ; CHECK-AVX2-NEXT: subl $12, %esp
1499 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
1500 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -20
1501 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -16
1502 ; CHECK-AVX2-NEXT: .cfi_offset %ebx, -12
1503 ; CHECK-AVX2-NEXT: .cfi_offset %ebp, -8
1504 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebp
1505 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ebx
1506 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1507 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
1508 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edi
1509 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1510 ; CHECK-AVX2-NEXT: movl %ecx, 12(%edi)
1511 ; CHECK-AVX2-NEXT: cmpl $18, %eax
1512 ; CHECK-AVX2-NEXT: jl .LBB10_2
1513 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1514 ; CHECK-AVX2-NEXT: movl %eax, 4(%edi)
1515 ; CHECK-AVX2-NEXT: movl %edi, (%esp)
1516 ; CHECK-AVX2-NEXT: calll bar
1517 ; CHECK-AVX2-NEXT: .LBB10_2: # %if.end
1518 ; CHECK-AVX2-NEXT: movups (%ebp), %xmm0
1519 ; CHECK-AVX2-NEXT: movups %xmm0, (%ebx)
1520 ; CHECK-AVX2-NEXT: movl (%edi), %eax
1521 ; CHECK-AVX2-NEXT: movl %eax, (%esi)
1522 ; CHECK-AVX2-NEXT: movl 4(%edi), %eax
1523 ; CHECK-AVX2-NEXT: movl %eax, 4(%esi)
1524 ; CHECK-AVX2-NEXT: movl 8(%edi), %eax
1525 ; CHECK-AVX2-NEXT: movl %eax, 8(%esi)
1526 ; CHECK-AVX2-NEXT: movl 12(%edi), %eax
1527 ; CHECK-AVX2-NEXT: movl %eax, 12(%esi)
1528 ; CHECK-AVX2-NEXT: addl $12, %esp
1529 ; CHECK-AVX2-NEXT: popl %esi
1530 ; CHECK-AVX2-NEXT: popl %edi
1531 ; CHECK-AVX2-NEXT: popl %ebx
1532 ; CHECK-AVX2-NEXT: popl %ebp
1533 ; CHECK-AVX2-NEXT: retl
1534 ;
1535 ; CHECK-AVX512-LABEL: test_limit_one_pred:
1536 ; CHECK-AVX512: # %bb.0: # %entry
1537 ; CHECK-AVX512-NEXT: pushl %ebp
1538 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1539 ; CHECK-AVX512-NEXT: pushl %ebx
1540 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 12
1541 ; CHECK-AVX512-NEXT: pushl %edi
1542 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 16
1543 ; CHECK-AVX512-NEXT: pushl %esi
1544 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 20
1545 ; CHECK-AVX512-NEXT: subl $12, %esp
1546 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
1547 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -20
1548 ; CHECK-AVX512-NEXT: .cfi_offset %edi, -16
1549 ; CHECK-AVX512-NEXT: .cfi_offset %ebx, -12
1550 ; CHECK-AVX512-NEXT: .cfi_offset %ebp, -8
1551 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebp
1552 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ebx
1553 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1554 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
1555 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edi
1556 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1557 ; CHECK-AVX512-NEXT: movl %ecx, 12(%edi)
1558 ; CHECK-AVX512-NEXT: cmpl $18, %eax
1559 ; CHECK-AVX512-NEXT: jl .LBB10_2
1560 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1561 ; CHECK-AVX512-NEXT: movl %eax, 4(%edi)
1562 ; CHECK-AVX512-NEXT: movl %edi, (%esp)
1563 ; CHECK-AVX512-NEXT: calll bar
1564 ; CHECK-AVX512-NEXT: .LBB10_2: # %if.end
1565 ; CHECK-AVX512-NEXT: vmovups (%ebp), %xmm0
1566 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%ebx)
1567 ; CHECK-AVX512-NEXT: movl (%edi), %eax
1568 ; CHECK-AVX512-NEXT: movl %eax, (%esi)
1569 ; CHECK-AVX512-NEXT: movl 4(%edi), %eax
1570 ; CHECK-AVX512-NEXT: movl %eax, 4(%esi)
1571 ; CHECK-AVX512-NEXT: movl 8(%edi), %eax
1572 ; CHECK-AVX512-NEXT: movl %eax, 8(%esi)
1573 ; CHECK-AVX512-NEXT: movl 12(%edi), %eax
1574 ; CHECK-AVX512-NEXT: movl %eax, 12(%esi)
1575 ; CHECK-AVX512-NEXT: addl $12, %esp
1576 ; CHECK-AVX512-NEXT: popl %esi
1577 ; CHECK-AVX512-NEXT: popl %edi
1578 ; CHECK-AVX512-NEXT: popl %ebx
1579 ; CHECK-AVX512-NEXT: popl %ebp
1580 ; CHECK-AVX512-NEXT: retl
1581 entry:
1582 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
1583 store i32 %x2, i32* %d, align 4
1584 %cmp = icmp sgt i32 %x, 17
1585 br i1 %cmp, label %if.then, label %if.end
1586
1587 if.then: ; preds = %entry
1588 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
1589 store i32 %x, i32* %b, align 4
1590 tail call void @bar(%struct.S* nonnull %s1) #3
1591 br label %if.end
1592
1593 if.end: ; preds = %if.then, %entry
1594 %0 = bitcast %struct.S* %s3 to i8*
1595 %1 = bitcast %struct.S* %s4 to i8*
1596 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1597 %2 = bitcast %struct.S* %s2 to i8*
1598 %3 = bitcast %struct.S* %s1 to i8*
1599 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1600 ret void
1601 }
1602
1603
1604 declare void @bar(%struct.S*) local_unnamed_addr #1
1605
1606
1607 ; Function Attrs: argmemonly nounwind
1608 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
1609
1610 attributes #0 = { nounwind uwtable "target-cpu"="x86-64" }
1611
1612 %struct.S7 = type { float, float, float , float, float, float, float, float }
1613
1614 ; Function Attrs: nounwind uwtable
1615 define void @test_conditional_block_float(%struct.S7* nocapture %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 {
1616 ; CHECK-LABEL: test_conditional_block_float:
1617 ; CHECK: # %bb.0: # %entry
1618 ; CHECK-NEXT: pushl %ebx
1619 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1620 ; CHECK-NEXT: pushl %edi
1621 ; CHECK-NEXT: .cfi_def_cfa_offset 12
1622 ; CHECK-NEXT: pushl %esi
1623 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1624 ; CHECK-NEXT: .cfi_offset %esi, -16
1625 ; CHECK-NEXT: .cfi_offset %edi, -12
1626 ; CHECK-NEXT: .cfi_offset %ebx, -8
1627 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
1628 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
1629 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1630 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1631 ; CHECK-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1632 ; CHECK-NEXT: jl .LBB11_2
1633 ; CHECK-NEXT: # %bb.1: # %if.then
1634 ; CHECK-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000
1635 ; CHECK-NEXT: .LBB11_2: # %if.end
1636 ; CHECK-NEXT: movups (%esi), %xmm0
1637 ; CHECK-NEXT: movups 16(%esi), %xmm1
1638 ; CHECK-NEXT: movups %xmm1, 16(%edx)
1639 ; CHECK-NEXT: movups %xmm0, (%edx)
1640 ; CHECK-NEXT: movl (%ecx), %edx
1641 ; CHECK-NEXT: movl 4(%ecx), %esi
1642 ; CHECK-NEXT: movl 8(%ecx), %edi
1643 ; CHECK-NEXT: movl 12(%ecx), %ebx
1644 ; CHECK-NEXT: movups 16(%ecx), %xmm0
1645 ; CHECK-NEXT: movups %xmm0, 16(%eax)
1646 ; CHECK-NEXT: movl %edx, (%eax)
1647 ; CHECK-NEXT: movl %esi, 4(%eax)
1648 ; CHECK-NEXT: movl %edi, 8(%eax)
1649 ; CHECK-NEXT: movl %ebx, 12(%eax)
1650 ; CHECK-NEXT: popl %esi
1651 ; CHECK-NEXT: popl %edi
1652 ; CHECK-NEXT: popl %ebx
1653 ; CHECK-NEXT: retl
1654 ;
1655 ; DISABLED-LABEL: test_conditional_block_float:
1656 ; DISABLED: # %bb.0: # %entry
1657 ; DISABLED-NEXT: pushl %esi
1658 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1659 ; DISABLED-NEXT: .cfi_offset %esi, -8
1660 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
1661 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
1662 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1663 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
1664 ; DISABLED-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1665 ; DISABLED-NEXT: jl .LBB11_2
1666 ; DISABLED-NEXT: # %bb.1: # %if.then
1667 ; DISABLED-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000
1668 ; DISABLED-NEXT: .LBB11_2: # %if.end
1669 ; DISABLED-NEXT: movups (%esi), %xmm0
1670 ; DISABLED-NEXT: movups 16(%esi), %xmm1
1671 ; DISABLED-NEXT: movups %xmm1, 16(%edx)
1672 ; DISABLED-NEXT: movups %xmm0, (%edx)
1673 ; DISABLED-NEXT: movups (%ecx), %xmm0
1674 ; DISABLED-NEXT: movups 16(%ecx), %xmm1
1675 ; DISABLED-NEXT: movups %xmm1, 16(%eax)
1676 ; DISABLED-NEXT: movups %xmm0, (%eax)
1677 ; DISABLED-NEXT: popl %esi
1678 ; DISABLED-NEXT: retl
1679 ;
1680 ; CHECK-AVX2-LABEL: test_conditional_block_float:
1681 ; CHECK-AVX2: # %bb.0: # %entry
1682 ; CHECK-AVX2-NEXT: pushl %ebx
1683 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1684 ; CHECK-AVX2-NEXT: pushl %edi
1685 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
1686 ; CHECK-AVX2-NEXT: pushl %esi
1687 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
1688 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -16
1689 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -12
1690 ; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8
1691 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
1692 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
1693 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1694 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1695 ; CHECK-AVX2-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1696 ; CHECK-AVX2-NEXT: jl .LBB11_2
1697 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1698 ; CHECK-AVX2-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000
1699 ; CHECK-AVX2-NEXT: .LBB11_2: # %if.end
1700 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
1701 ; CHECK-AVX2-NEXT: movups 16(%esi), %xmm1
1702 ; CHECK-AVX2-NEXT: movups %xmm1, 16(%edx)
1703 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
1704 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
1705 ; CHECK-AVX2-NEXT: movl 4(%ecx), %esi
1706 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edi
1707 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ebx
1708 ; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0
1709 ; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax)
1710 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
1711 ; CHECK-AVX2-NEXT: movl %esi, 4(%eax)
1712 ; CHECK-AVX2-NEXT: movl %edi, 8(%eax)
1713 ; CHECK-AVX2-NEXT: movl %ebx, 12(%eax)
1714 ; CHECK-AVX2-NEXT: popl %esi
1715 ; CHECK-AVX2-NEXT: popl %edi
1716 ; CHECK-AVX2-NEXT: popl %ebx
1717 ; CHECK-AVX2-NEXT: retl
1718 ;
1719 ; CHECK-AVX512-LABEL: test_conditional_block_float:
1720 ; CHECK-AVX512: # %bb.0: # %entry
1721 ; CHECK-AVX512-NEXT: pushl %esi
1722 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1723 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -8
1724 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
1725 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
1726 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1727 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1728 ; CHECK-AVX512-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1729 ; CHECK-AVX512-NEXT: jl .LBB11_2
1730 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1731 ; CHECK-AVX512-NEXT: movl $1065353216, 4(%ecx) # imm = 0x3F800000
1732 ; CHECK-AVX512-NEXT: .LBB11_2: # %if.end
1733 ; CHECK-AVX512-NEXT: vmovups (%esi), %ymm0
1734 ; CHECK-AVX512-NEXT: vmovups %ymm0, (%edx)
1735 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
1736 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
1737 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
1738 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
1739 ; CHECK-AVX512-NEXT: vmovups 8(%ecx), %xmm0
1740 ; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%eax)
1741 ; CHECK-AVX512-NEXT: movl 24(%ecx), %edx
1742 ; CHECK-AVX512-NEXT: movl %edx, 24(%eax)
1743 ; CHECK-AVX512-NEXT: movl 28(%ecx), %ecx
1744 ; CHECK-AVX512-NEXT: movl %ecx, 28(%eax)
1745 ; CHECK-AVX512-NEXT: popl %esi
1746 ; CHECK-AVX512-NEXT: vzeroupper
1747 ; CHECK-AVX512-NEXT: retl
1748 entry:
1749 %cmp = icmp sgt i32 %x, 17
1750 br i1 %cmp, label %if.then, label %if.end
1751
1752 if.then: ; preds = %entry
1753 %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1
1754 store float 1.0, float* %b, align 4
1755 br label %if.end
1756
1757 if.end: ; preds = %if.then, %entry
1758 %0 = bitcast %struct.S7* %s3 to i8*
1759 %1 = bitcast %struct.S7* %s4 to i8*
1760 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1761 %2 = bitcast %struct.S7* %s2 to i8*
1762 %3 = bitcast %struct.S7* %s1 to i8*
1763 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1764 ret void
1765 }
1766
1767 %struct.S8 = type { i64, i64, i64, i64, i64, i64 }
1768
1769 ; Function Attrs: nounwind uwtable
1770 define void @test_conditional_block_ymm(%struct.S8* nocapture %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 {
1771 ; CHECK-LABEL: test_conditional_block_ymm:
1772 ; CHECK: # %bb.0: # %entry
1773 ; CHECK-NEXT: pushl %ebx
1774 ; CHECK-NEXT: .cfi_def_cfa_offset 8
1775 ; CHECK-NEXT: pushl %edi
1776 ; CHECK-NEXT: .cfi_def_cfa_offset 12
1777 ; CHECK-NEXT: pushl %esi
1778 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1779 ; CHECK-NEXT: .cfi_offset %esi, -16
1780 ; CHECK-NEXT: .cfi_offset %edi, -12
1781 ; CHECK-NEXT: .cfi_offset %ebx, -8
1782 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
1783 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
1784 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1785 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1786 ; CHECK-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1787 ; CHECK-NEXT: jl .LBB12_2
1788 ; CHECK-NEXT: # %bb.1: # %if.then
1789 ; CHECK-NEXT: movl $0, 12(%ecx)
1790 ; CHECK-NEXT: movl $1, 8(%ecx)
1791 ; CHECK-NEXT: .LBB12_2: # %if.end
1792 ; CHECK-NEXT: movups (%esi), %xmm0
1793 ; CHECK-NEXT: movups 16(%esi), %xmm1
1794 ; CHECK-NEXT: movups %xmm1, 16(%edx)
1795 ; CHECK-NEXT: movups %xmm0, (%edx)
1796 ; CHECK-NEXT: movl (%ecx), %edx
1797 ; CHECK-NEXT: movl 4(%ecx), %esi
1798 ; CHECK-NEXT: movl 8(%ecx), %edi
1799 ; CHECK-NEXT: movl 12(%ecx), %ebx
1800 ; CHECK-NEXT: movups 16(%ecx), %xmm0
1801 ; CHECK-NEXT: movups %xmm0, 16(%eax)
1802 ; CHECK-NEXT: movl %edx, (%eax)
1803 ; CHECK-NEXT: movl %esi, 4(%eax)
1804 ; CHECK-NEXT: movl %edi, 8(%eax)
1805 ; CHECK-NEXT: movl %ebx, 12(%eax)
1806 ; CHECK-NEXT: popl %esi
1807 ; CHECK-NEXT: popl %edi
1808 ; CHECK-NEXT: popl %ebx
1809 ; CHECK-NEXT: retl
1810 ;
1811 ; DISABLED-LABEL: test_conditional_block_ymm:
1812 ; DISABLED: # %bb.0: # %entry
1813 ; DISABLED-NEXT: pushl %esi
1814 ; DISABLED-NEXT: .cfi_def_cfa_offset 8
1815 ; DISABLED-NEXT: .cfi_offset %esi, -8
1816 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %esi
1817 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %edx
1818 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %eax
1819 ; DISABLED-NEXT: movl {{[0-9]+}}(%esp), %ecx
1820 ; DISABLED-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1821 ; DISABLED-NEXT: jl .LBB12_2
1822 ; DISABLED-NEXT: # %bb.1: # %if.then
1823 ; DISABLED-NEXT: movl $0, 12(%ecx)
1824 ; DISABLED-NEXT: movl $1, 8(%ecx)
1825 ; DISABLED-NEXT: .LBB12_2: # %if.end
1826 ; DISABLED-NEXT: movups (%esi), %xmm0
1827 ; DISABLED-NEXT: movups 16(%esi), %xmm1
1828 ; DISABLED-NEXT: movups %xmm1, 16(%edx)
1829 ; DISABLED-NEXT: movups %xmm0, (%edx)
1830 ; DISABLED-NEXT: movups (%ecx), %xmm0
1831 ; DISABLED-NEXT: movups 16(%ecx), %xmm1
1832 ; DISABLED-NEXT: movups %xmm1, 16(%eax)
1833 ; DISABLED-NEXT: movups %xmm0, (%eax)
1834 ; DISABLED-NEXT: popl %esi
1835 ; DISABLED-NEXT: retl
1836 ;
1837 ; CHECK-AVX2-LABEL: test_conditional_block_ymm:
1838 ; CHECK-AVX2: # %bb.0: # %entry
1839 ; CHECK-AVX2-NEXT: pushl %ebx
1840 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
1841 ; CHECK-AVX2-NEXT: pushl %edi
1842 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 12
1843 ; CHECK-AVX2-NEXT: pushl %esi
1844 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
1845 ; CHECK-AVX2-NEXT: .cfi_offset %esi, -16
1846 ; CHECK-AVX2-NEXT: .cfi_offset %edi, -12
1847 ; CHECK-AVX2-NEXT: .cfi_offset %ebx, -8
1848 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %esi
1849 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx
1850 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
1851 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1852 ; CHECK-AVX2-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1853 ; CHECK-AVX2-NEXT: jl .LBB12_2
1854 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
1855 ; CHECK-AVX2-NEXT: movl $0, 12(%ecx)
1856 ; CHECK-AVX2-NEXT: movl $1, 8(%ecx)
1857 ; CHECK-AVX2-NEXT: .LBB12_2: # %if.end
1858 ; CHECK-AVX2-NEXT: movups (%esi), %xmm0
1859 ; CHECK-AVX2-NEXT: movups 16(%esi), %xmm1
1860 ; CHECK-AVX2-NEXT: movups %xmm1, 16(%edx)
1861 ; CHECK-AVX2-NEXT: movups %xmm0, (%edx)
1862 ; CHECK-AVX2-NEXT: movl (%ecx), %edx
1863 ; CHECK-AVX2-NEXT: movl 4(%ecx), %esi
1864 ; CHECK-AVX2-NEXT: movl 8(%ecx), %edi
1865 ; CHECK-AVX2-NEXT: movl 12(%ecx), %ebx
1866 ; CHECK-AVX2-NEXT: movups 16(%ecx), %xmm0
1867 ; CHECK-AVX2-NEXT: movups %xmm0, 16(%eax)
1868 ; CHECK-AVX2-NEXT: movl %edx, (%eax)
1869 ; CHECK-AVX2-NEXT: movl %esi, 4(%eax)
1870 ; CHECK-AVX2-NEXT: movl %edi, 8(%eax)
1871 ; CHECK-AVX2-NEXT: movl %ebx, 12(%eax)
1872 ; CHECK-AVX2-NEXT: popl %esi
1873 ; CHECK-AVX2-NEXT: popl %edi
1874 ; CHECK-AVX2-NEXT: popl %ebx
1875 ; CHECK-AVX2-NEXT: retl
1876 ;
1877 ; CHECK-AVX512-LABEL: test_conditional_block_ymm:
1878 ; CHECK-AVX512: # %bb.0: # %entry
1879 ; CHECK-AVX512-NEXT: pushl %esi
1880 ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
1881 ; CHECK-AVX512-NEXT: .cfi_offset %esi, -8
1882 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi
1883 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
1884 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
1885 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
1886 ; CHECK-AVX512-NEXT: cmpl $18, {{[0-9]+}}(%esp)
1887 ; CHECK-AVX512-NEXT: jl .LBB12_2
1888 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
1889 ; CHECK-AVX512-NEXT: movl $0, 12(%ecx)
1890 ; CHECK-AVX512-NEXT: movl $1, 8(%ecx)
1891 ; CHECK-AVX512-NEXT: .LBB12_2: # %if.end
1892 ; CHECK-AVX512-NEXT: vmovups (%esi), %ymm0
1893 ; CHECK-AVX512-NEXT: vmovups %ymm0, (%edx)
1894 ; CHECK-AVX512-NEXT: movl (%ecx), %edx
1895 ; CHECK-AVX512-NEXT: movl %edx, (%eax)
1896 ; CHECK-AVX512-NEXT: movl 4(%ecx), %edx
1897 ; CHECK-AVX512-NEXT: movl %edx, 4(%eax)
1898 ; CHECK-AVX512-NEXT: movl 8(%ecx), %edx
1899 ; CHECK-AVX512-NEXT: movl %edx, 8(%eax)
1900 ; CHECK-AVX512-NEXT: movl 12(%ecx), %edx
1901 ; CHECK-AVX512-NEXT: movl %edx, 12(%eax)
1902 ; CHECK-AVX512-NEXT: vmovups 16(%ecx), %xmm0
1903 ; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%eax)
1904 ; CHECK-AVX512-NEXT: popl %esi
1905 ; CHECK-AVX512-NEXT: vzeroupper
1906 ; CHECK-AVX512-NEXT: retl
1907 entry:
1908 %cmp = icmp sgt i32 %x, 17
1909 br i1 %cmp, label %if.then, label %if.end
1910
1911 if.then: ; preds = %entry
1912 %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1
1913 store i64 1, i64* %b, align 4
1914 br label %if.end
1915
1916 if.end: ; preds = %if.then, %entry
1917 %0 = bitcast %struct.S8* %s3 to i8*
1918 %1 = bitcast %struct.S8* %s4 to i8*
1919 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1920 %2 = bitcast %struct.S8* %s2 to i8*
1921 %3 = bitcast %struct.S8* %s1 to i8*
1922 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1923 ret void
1924 }
1925
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
2 ; RUN: llc < %s -mtriple=x86_64-linux --disable-fixup-SFB | FileCheck %s --check-prefix=DISABLED
3 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
4 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512
5
6 ; RUN: llc < %s -mtriple=i686-linux
7 ; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB
8 ; RUN: llc < %s -mtriple=i686-linux -mattr sse4
9 ; RUN: llc < %s -mtriple=i686-linux -mattr avx512
10
11 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
12 target triple = "x86_64-unknown-linux-gnu"
13
14 %struct.S = type { i32, i32, i32, i32 }
15
16 ; Function Attrs: nounwind uwtable
17 define void @test_conditional_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4) local_unnamed_addr #0 {
18 ; CHECK-LABEL: test_conditional_block:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: cmpl $18, %edx
21 ; CHECK-NEXT: jl .LBB0_2
22 ; CHECK-NEXT: # %bb.1: # %if.then
23 ; CHECK-NEXT: movl %edx, 4(%rdi)
24 ; CHECK-NEXT: .LBB0_2: # %if.end
25 ; CHECK-NEXT: movups (%r8), %xmm0
26 ; CHECK-NEXT: movups %xmm0, (%rcx)
27 ; CHECK-NEXT: movl (%rdi), %eax
28 ; CHECK-NEXT: movl %eax, (%rsi)
29 ; CHECK-NEXT: movl 4(%rdi), %eax
30 ; CHECK-NEXT: movl %eax, 4(%rsi)
31 ; CHECK-NEXT: movq 8(%rdi), %rax
32 ; CHECK-NEXT: movq %rax, 8(%rsi)
33 ; CHECK-NEXT: retq
34 ;
35 ; DISABLED-LABEL: test_conditional_block:
36 ; DISABLED: # %bb.0: # %entry
37 ; DISABLED-NEXT: cmpl $18, %edx
38 ; DISABLED-NEXT: jl .LBB0_2
39 ; DISABLED-NEXT: # %bb.1: # %if.then
40 ; DISABLED-NEXT: movl %edx, 4(%rdi)
41 ; DISABLED-NEXT: .LBB0_2: # %if.end
42 ; DISABLED-NEXT: movups (%r8), %xmm0
43 ; DISABLED-NEXT: movups %xmm0, (%rcx)
44 ; DISABLED-NEXT: movups (%rdi), %xmm0
45 ; DISABLED-NEXT: movups %xmm0, (%rsi)
46 ; DISABLED-NEXT: retq
47 ;
48 ; CHECK-AVX2-LABEL: test_conditional_block:
49 ; CHECK-AVX2: # %bb.0: # %entry
50 ; CHECK-AVX2-NEXT: cmpl $18, %edx
51 ; CHECK-AVX2-NEXT: jl .LBB0_2
52 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
53 ; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
54 ; CHECK-AVX2-NEXT: .LBB0_2: # %if.end
55 ; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
56 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
57 ; CHECK-AVX2-NEXT: movl (%rdi), %eax
58 ; CHECK-AVX2-NEXT: movl %eax, (%rsi)
59 ; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
60 ; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
61 ; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
62 ; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
63 ; CHECK-AVX2-NEXT: retq
64 ;
65 ; CHECK-AVX512-LABEL: test_conditional_block:
66 ; CHECK-AVX512: # %bb.0: # %entry
67 ; CHECK-AVX512-NEXT: cmpl $18, %edx
68 ; CHECK-AVX512-NEXT: jl .LBB0_2
69 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
70 ; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
71 ; CHECK-AVX512-NEXT: .LBB0_2: # %if.end
72 ; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
73 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
74 ; CHECK-AVX512-NEXT: movl (%rdi), %eax
75 ; CHECK-AVX512-NEXT: movl %eax, (%rsi)
76 ; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
77 ; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
78 ; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
79 ; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
80 ; CHECK-AVX512-NEXT: retq
81 entry:
82 %cmp = icmp sgt i32 %x, 17
83 br i1 %cmp, label %if.then, label %if.end
84
85 if.then: ; preds = %entry
86 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
87 store i32 %x, i32* %b, align 4
88 br label %if.end
89
90 if.end: ; preds = %if.then, %entry
91 %0 = bitcast %struct.S* %s3 to i8*
92 %1 = bitcast %struct.S* %s4 to i8*
93 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
94 %2 = bitcast %struct.S* %s2 to i8*
95 %3 = bitcast %struct.S* %s1 to i8*
96 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
97 ret void
98 }
99
100 ; Function Attrs: nounwind uwtable
101 define void @test_imm_store(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
102 ; CHECK-LABEL: test_imm_store:
103 ; CHECK: # %bb.0: # %entry
104 ; CHECK-NEXT: movl $0, (%rdi)
105 ; CHECK-NEXT: movl $1, (%rcx)
106 ; CHECK-NEXT: movl (%rdi), %eax
107 ; CHECK-NEXT: movl %eax, (%rsi)
108 ; CHECK-NEXT: movq 4(%rdi), %rax
109 ; CHECK-NEXT: movq %rax, 4(%rsi)
110 ; CHECK-NEXT: movl 12(%rdi), %eax
111 ; CHECK-NEXT: movl %eax, 12(%rsi)
112 ; CHECK-NEXT: retq
113 ;
114 ; DISABLED-LABEL: test_imm_store:
115 ; DISABLED: # %bb.0: # %entry
116 ; DISABLED-NEXT: movl $0, (%rdi)
117 ; DISABLED-NEXT: movl $1, (%rcx)
118 ; DISABLED-NEXT: movups (%rdi), %xmm0
119 ; DISABLED-NEXT: movups %xmm0, (%rsi)
120 ; DISABLED-NEXT: retq
121 ;
122 ; CHECK-AVX2-LABEL: test_imm_store:
123 ; CHECK-AVX2: # %bb.0: # %entry
124 ; CHECK-AVX2-NEXT: movl $0, (%rdi)
125 ; CHECK-AVX2-NEXT: movl $1, (%rcx)
126 ; CHECK-AVX2-NEXT: movl (%rdi), %eax
127 ; CHECK-AVX2-NEXT: movl %eax, (%rsi)
128 ; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
129 ; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
130 ; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
131 ; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
132 ; CHECK-AVX2-NEXT: retq
133 ;
134 ; CHECK-AVX512-LABEL: test_imm_store:
135 ; CHECK-AVX512: # %bb.0: # %entry
136 ; CHECK-AVX512-NEXT: movl $0, (%rdi)
137 ; CHECK-AVX512-NEXT: movl $1, (%rcx)
138 ; CHECK-AVX512-NEXT: movl (%rdi), %eax
139 ; CHECK-AVX512-NEXT: movl %eax, (%rsi)
140 ; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
141 ; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
142 ; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
143 ; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
144 ; CHECK-AVX512-NEXT: retq
145 entry:
146 %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
147 store i32 0, i32* %a, align 4
148 %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
149 store i32 1, i32* %a1, align 4
150 %0 = bitcast %struct.S* %s2 to i8*
151 %1 = bitcast %struct.S* %s1 to i8*
152 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
153 ret void
154 }
155
156 ; Function Attrs: nounwind uwtable
157 define void @test_nondirect_br(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
158 ; CHECK-LABEL: test_nondirect_br:
159 ; CHECK: # %bb.0: # %entry
160 ; CHECK-NEXT: cmpl $18, %edx
161 ; CHECK-NEXT: jl .LBB2_2
162 ; CHECK-NEXT: # %bb.1: # %if.then
163 ; CHECK-NEXT: movl %edx, 4(%rdi)
164 ; CHECK-NEXT: .LBB2_2: # %if.end
165 ; CHECK-NEXT: cmpl $14, %r9d
166 ; CHECK-NEXT: jl .LBB2_4
167 ; CHECK-NEXT: # %bb.3: # %if.then2
168 ; CHECK-NEXT: movl %r9d, 12(%rdi)
169 ; CHECK-NEXT: .LBB2_4: # %if.end3
170 ; CHECK-NEXT: movups (%r8), %xmm0
171 ; CHECK-NEXT: movups %xmm0, (%rcx)
172 ; CHECK-NEXT: movq (%rdi), %rax
173 ; CHECK-NEXT: movq %rax, (%rsi)
174 ; CHECK-NEXT: movl 8(%rdi), %eax
175 ; CHECK-NEXT: movl %eax, 8(%rsi)
176 ; CHECK-NEXT: movl 12(%rdi), %eax
177 ; CHECK-NEXT: movl %eax, 12(%rsi)
178 ; CHECK-NEXT: retq
179 ;
180 ; DISABLED-LABEL: test_nondirect_br:
181 ; DISABLED: # %bb.0: # %entry
182 ; DISABLED-NEXT: cmpl $18, %edx
183 ; DISABLED-NEXT: jl .LBB2_2
184 ; DISABLED-NEXT: # %bb.1: # %if.then
185 ; DISABLED-NEXT: movl %edx, 4(%rdi)
186 ; DISABLED-NEXT: .LBB2_2: # %if.end
187 ; DISABLED-NEXT: cmpl $14, %r9d
188 ; DISABLED-NEXT: jl .LBB2_4
189 ; DISABLED-NEXT: # %bb.3: # %if.then2
190 ; DISABLED-NEXT: movl %r9d, 12(%rdi)
191 ; DISABLED-NEXT: .LBB2_4: # %if.end3
192 ; DISABLED-NEXT: movups (%r8), %xmm0
193 ; DISABLED-NEXT: movups %xmm0, (%rcx)
194 ; DISABLED-NEXT: movups (%rdi), %xmm0
195 ; DISABLED-NEXT: movups %xmm0, (%rsi)
196 ; DISABLED-NEXT: retq
197 ;
198 ; CHECK-AVX2-LABEL: test_nondirect_br:
199 ; CHECK-AVX2: # %bb.0: # %entry
200 ; CHECK-AVX2-NEXT: cmpl $18, %edx
201 ; CHECK-AVX2-NEXT: jl .LBB2_2
202 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
203 ; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
204 ; CHECK-AVX2-NEXT: .LBB2_2: # %if.end
205 ; CHECK-AVX2-NEXT: cmpl $14, %r9d
206 ; CHECK-AVX2-NEXT: jl .LBB2_4
207 ; CHECK-AVX2-NEXT: # %bb.3: # %if.then2
208 ; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
209 ; CHECK-AVX2-NEXT: .LBB2_4: # %if.end3
210 ; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
211 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
212 ; CHECK-AVX2-NEXT: movq (%rdi), %rax
213 ; CHECK-AVX2-NEXT: movq %rax, (%rsi)
214 ; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
215 ; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
216 ; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
217 ; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
218 ; CHECK-AVX2-NEXT: retq
219 ;
220 ; CHECK-AVX512-LABEL: test_nondirect_br:
221 ; CHECK-AVX512: # %bb.0: # %entry
222 ; CHECK-AVX512-NEXT: cmpl $18, %edx
223 ; CHECK-AVX512-NEXT: jl .LBB2_2
224 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
225 ; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
226 ; CHECK-AVX512-NEXT: .LBB2_2: # %if.end
227 ; CHECK-AVX512-NEXT: cmpl $14, %r9d
228 ; CHECK-AVX512-NEXT: jl .LBB2_4
229 ; CHECK-AVX512-NEXT: # %bb.3: # %if.then2
230 ; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
231 ; CHECK-AVX512-NEXT: .LBB2_4: # %if.end3
232 ; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
233 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
234 ; CHECK-AVX512-NEXT: movq (%rdi), %rax
235 ; CHECK-AVX512-NEXT: movq %rax, (%rsi)
236 ; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
237 ; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
238 ; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
239 ; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
240 ; CHECK-AVX512-NEXT: retq
241 entry:
242 %cmp = icmp sgt i32 %x, 17
243 br i1 %cmp, label %if.then, label %if.end
244
245 if.then: ; preds = %entry
246 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
247 store i32 %x, i32* %b, align 4
248 br label %if.end
249
250 if.end: ; preds = %if.then, %entry
251 %cmp1 = icmp sgt i32 %x2, 13
252 br i1 %cmp1, label %if.then2, label %if.end3
253
254 if.then2: ; preds = %if.end
255 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
256 store i32 %x2, i32* %d, align 4
257 br label %if.end3
258
259 if.end3: ; preds = %if.then2, %if.end
260 %0 = bitcast %struct.S* %s3 to i8*
261 %1 = bitcast %struct.S* %s4 to i8*
262 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
263 %2 = bitcast %struct.S* %s2 to i8*
264 %3 = bitcast %struct.S* %s1 to i8*
265 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
266 ret void
267 }
268
269 ; Function Attrs: nounwind uwtable
270 define void @test_2preds_block(%struct.S* nocapture %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
271 ; CHECK-LABEL: test_2preds_block:
272 ; CHECK: # %bb.0: # %entry
273 ; CHECK-NEXT: movl %r9d, 12(%rdi)
274 ; CHECK-NEXT: cmpl $18, %edx
275 ; CHECK-NEXT: jl .LBB3_2
276 ; CHECK-NEXT: # %bb.1: # %if.then
277 ; CHECK-NEXT: movl %edx, 4(%rdi)
278 ; CHECK-NEXT: .LBB3_2: # %if.end
279 ; CHECK-NEXT: movups (%r8), %xmm0
280 ; CHECK-NEXT: movups %xmm0, (%rcx)
281 ; CHECK-NEXT: movl (%rdi), %eax
282 ; CHECK-NEXT: movl %eax, (%rsi)
283 ; CHECK-NEXT: movl 4(%rdi), %eax
284 ; CHECK-NEXT: movl %eax, 4(%rsi)
285 ; CHECK-NEXT: movl 8(%rdi), %eax
286 ; CHECK-NEXT: movl %eax, 8(%rsi)
287 ; CHECK-NEXT: movl 12(%rdi), %eax
288 ; CHECK-NEXT: movl %eax, 12(%rsi)
289 ; CHECK-NEXT: retq
290 ;
291 ; DISABLED-LABEL: test_2preds_block:
292 ; DISABLED: # %bb.0: # %entry
293 ; DISABLED-NEXT: movl %r9d, 12(%rdi)
294 ; DISABLED-NEXT: cmpl $18, %edx
295 ; DISABLED-NEXT: jl .LBB3_2
296 ; DISABLED-NEXT: # %bb.1: # %if.then
297 ; DISABLED-NEXT: movl %edx, 4(%rdi)
298 ; DISABLED-NEXT: .LBB3_2: # %if.end
299 ; DISABLED-NEXT: movups (%r8), %xmm0
300 ; DISABLED-NEXT: movups %xmm0, (%rcx)
301 ; DISABLED-NEXT: movups (%rdi), %xmm0
302 ; DISABLED-NEXT: movups %xmm0, (%rsi)
303 ; DISABLED-NEXT: retq
304 ;
305 ; CHECK-AVX2-LABEL: test_2preds_block:
306 ; CHECK-AVX2: # %bb.0: # %entry
307 ; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
308 ; CHECK-AVX2-NEXT: cmpl $18, %edx
309 ; CHECK-AVX2-NEXT: jl .LBB3_2
310 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
311 ; CHECK-AVX2-NEXT: movl %edx, 4(%rdi)
312 ; CHECK-AVX2-NEXT: .LBB3_2: # %if.end
313 ; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
314 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
315 ; CHECK-AVX2-NEXT: movl (%rdi), %eax
316 ; CHECK-AVX2-NEXT: movl %eax, (%rsi)
317 ; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
318 ; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
319 ; CHECK-AVX2-NEXT: movl 8(%rdi), %eax
320 ; CHECK-AVX2-NEXT: movl %eax, 8(%rsi)
321 ; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
322 ; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
323 ; CHECK-AVX2-NEXT: retq
324 ;
325 ; CHECK-AVX512-LABEL: test_2preds_block:
326 ; CHECK-AVX512: # %bb.0: # %entry
327 ; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi)
328 ; CHECK-AVX512-NEXT: cmpl $18, %edx
329 ; CHECK-AVX512-NEXT: jl .LBB3_2
330 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
331 ; CHECK-AVX512-NEXT: movl %edx, 4(%rdi)
332 ; CHECK-AVX512-NEXT: .LBB3_2: # %if.end
333 ; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
334 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
335 ; CHECK-AVX512-NEXT: movl (%rdi), %eax
336 ; CHECK-AVX512-NEXT: movl %eax, (%rsi)
337 ; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
338 ; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
339 ; CHECK-AVX512-NEXT: movl 8(%rdi), %eax
340 ; CHECK-AVX512-NEXT: movl %eax, 8(%rsi)
341 ; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
342 ; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
343 ; CHECK-AVX512-NEXT: retq
344 entry:
345 %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
346 store i32 %x2, i32* %d, align 4
347 %cmp = icmp sgt i32 %x, 17
348 br i1 %cmp, label %if.then, label %if.end
349
350 if.then: ; preds = %entry
351 %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
352 store i32 %x, i32* %b, align 4
353 br label %if.end
354
355 if.end: ; preds = %if.then, %entry
356 %0 = bitcast %struct.S* %s3 to i8*
357 %1 = bitcast %struct.S* %s4 to i8*
358 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
359 %2 = bitcast %struct.S* %s2 to i8*
360 %3 = bitcast %struct.S* %s1 to i8*
361 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
362 ret void
363 }
364 %struct.S2 = type { i64, i64 }
365
366 ; Function Attrs: nounwind uwtable
367 define void @test_type64(%struct.S2* nocapture %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
368 ; CHECK-LABEL: test_type64:
369 ; CHECK: # %bb.0: # %entry
370 ; CHECK-NEXT: cmpl $18, %edx
371 ; CHECK-NEXT: jl .LBB4_2
372 ; CHECK-NEXT: # %bb.1: # %if.then
373 ; CHECK-NEXT: movslq %edx, %rax
374 ; CHECK-NEXT: movq %rax, 8(%rdi)
375 ; CHECK-NEXT: .LBB4_2: # %if.end
376 ; CHECK-NEXT: movups (%r8), %xmm0
377 ; CHECK-NEXT: movups %xmm0, (%rcx)
378 ; CHECK-NEXT: movq (%rdi), %rax
379 ; CHECK-NEXT: movq %rax, (%rsi)
380 ; CHECK-NEXT: movq 8(%rdi), %rax
381 ; CHECK-NEXT: movq %rax, 8(%rsi)
382 ; CHECK-NEXT: retq
383 ;
384 ; DISABLED-LABEL: test_type64:
385 ; DISABLED: # %bb.0: # %entry
386 ; DISABLED-NEXT: cmpl $18, %edx
387 ; DISABLED-NEXT: jl .LBB4_2
388 ; DISABLED-NEXT: # %bb.1: # %if.then
389 ; DISABLED-NEXT: movslq %edx, %rax
390 ; DISABLED-NEXT: movq %rax, 8(%rdi)
391 ; DISABLED-NEXT: .LBB4_2: # %if.end
392 ; DISABLED-NEXT: movups (%r8), %xmm0
393 ; DISABLED-NEXT: movups %xmm0, (%rcx)
394 ; DISABLED-NEXT: movups (%rdi), %xmm0
395 ; DISABLED-NEXT: movups %xmm0, (%rsi)
396 ; DISABLED-NEXT: retq
397 ;
398 ; CHECK-AVX2-LABEL: test_type64:
399 ; CHECK-AVX2: # %bb.0: # %entry
400 ; CHECK-AVX2-NEXT: cmpl $18, %edx
401 ; CHECK-AVX2-NEXT: jl .LBB4_2
402 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
403 ; CHECK-AVX2-NEXT: movslq %edx, %rax
404 ; CHECK-AVX2-NEXT: movq %rax, 8(%rdi)
405 ; CHECK-AVX2-NEXT: .LBB4_2: # %if.end
406 ; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
407 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
408 ; CHECK-AVX2-NEXT: movq (%rdi), %rax
409 ; CHECK-AVX2-NEXT: movq %rax, (%rsi)
410 ; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
411 ; CHECK-AVX2-NEXT: movq %rax, 8(%rsi)
412 ; CHECK-AVX2-NEXT: retq
413 ;
414 ; CHECK-AVX512-LABEL: test_type64:
415 ; CHECK-AVX512: # %bb.0: # %entry
416 ; CHECK-AVX512-NEXT: cmpl $18, %edx
417 ; CHECK-AVX512-NEXT: jl .LBB4_2
418 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
419 ; CHECK-AVX512-NEXT: movslq %edx, %rax
420 ; CHECK-AVX512-NEXT: movq %rax, 8(%rdi)
421 ; CHECK-AVX512-NEXT: .LBB4_2: # %if.end
422 ; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
423 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
424 ; CHECK-AVX512-NEXT: movq (%rdi), %rax
425 ; CHECK-AVX512-NEXT: movq %rax, (%rsi)
426 ; CHECK-AVX512-NEXT: movq 8(%rdi), %rax
427 ; CHECK-AVX512-NEXT: movq %rax, 8(%rsi)
428 ; CHECK-AVX512-NEXT: retq
429 entry:
430 %cmp = icmp sgt i32 %x, 17
431 br i1 %cmp, label %if.then, label %if.end
432
433 if.then: ; preds = %entry
434 %conv = sext i32 %x to i64
435 %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
436 store i64 %conv, i64* %b, align 8
437 br label %if.end
438
439 if.end: ; preds = %if.then, %entry
440 %0 = bitcast %struct.S2* %s3 to i8*
441 %1 = bitcast %struct.S2* %s4 to i8*
442 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
443 %2 = bitcast %struct.S2* %s2 to i8*
444 %3 = bitcast %struct.S2* %s1 to i8*
445 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
446 ret void
447 }
448 %struct.S3 = type { i64, i8, i8, i16, i32 }
449
450 ; Function Attrs: noinline nounwind uwtable
451 define void @test_mixed_type(%struct.S3* nocapture %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
452 ; CHECK-LABEL: test_mixed_type:
453 ; CHECK: # %bb.0: # %entry
454 ; CHECK-NEXT: cmpl $18, %edx
455 ; CHECK-NEXT: jl .LBB5_2
456 ; CHECK-NEXT: # %bb.1: # %if.then
457 ; CHECK-NEXT: movslq %edx, %rax
458 ; CHECK-NEXT: movq %rax, (%rdi)
459 ; CHECK-NEXT: movb %dl, 8(%rdi)
460 ; CHECK-NEXT: .LBB5_2: # %if.end
461 ; CHECK-NEXT: movq (%rdi), %rax
462 ; CHECK-NEXT: movq %rax, (%rsi)
463 ; CHECK-NEXT: movb 8(%rdi), %al
464 ; CHECK-NEXT: movb %al, 8(%rsi)
465 ; CHECK-NEXT: movl 9(%rdi), %eax
466 ; CHECK-NEXT: movl %eax, 9(%rsi)
467 ; CHECK-NEXT: movzwl 13(%rdi), %eax
468 ; CHECK-NEXT: movw %ax, 13(%rsi)
469 ; CHECK-NEXT: movb 15(%rdi), %al
470 ; CHECK-NEXT: movb %al, 15(%rsi)
471 ; CHECK-NEXT: retq
472 ;
473 ; DISABLED-LABEL: test_mixed_type:
474 ; DISABLED: # %bb.0: # %entry
475 ; DISABLED-NEXT: cmpl $18, %edx
476 ; DISABLED-NEXT: jl .LBB5_2
477 ; DISABLED-NEXT: # %bb.1: # %if.then
478 ; DISABLED-NEXT: movslq %edx, %rax
479 ; DISABLED-NEXT: movq %rax, (%rdi)
480 ; DISABLED-NEXT: movb %dl, 8(%rdi)
481 ; DISABLED-NEXT: .LBB5_2: # %if.end
482 ; DISABLED-NEXT: movups (%rdi), %xmm0
483 ; DISABLED-NEXT: movups %xmm0, (%rsi)
484 ; DISABLED-NEXT: retq
485 ;
486 ; CHECK-AVX2-LABEL: test_mixed_type:
487 ; CHECK-AVX2: # %bb.0: # %entry
488 ; CHECK-AVX2-NEXT: cmpl $18, %edx
489 ; CHECK-AVX2-NEXT: jl .LBB5_2
490 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
491 ; CHECK-AVX2-NEXT: movslq %edx, %rax
492 ; CHECK-AVX2-NEXT: movq %rax, (%rdi)
493 ; CHECK-AVX2-NEXT: movb %dl, 8(%rdi)
494 ; CHECK-AVX2-NEXT: .LBB5_2: # %if.end
495 ; CHECK-AVX2-NEXT: movq (%rdi), %rax
496 ; CHECK-AVX2-NEXT: movq %rax, (%rsi)
497 ; CHECK-AVX2-NEXT: movb 8(%rdi), %al
498 ; CHECK-AVX2-NEXT: movb %al, 8(%rsi)
499 ; CHECK-AVX2-NEXT: movl 9(%rdi), %eax
500 ; CHECK-AVX2-NEXT: movl %eax, 9(%rsi)
501 ; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax
502 ; CHECK-AVX2-NEXT: movw %ax, 13(%rsi)
503 ; CHECK-AVX2-NEXT: movb 15(%rdi), %al
504 ; CHECK-AVX2-NEXT: movb %al, 15(%rsi)
505 ; CHECK-AVX2-NEXT: retq
506 ;
507 ; CHECK-AVX512-LABEL: test_mixed_type:
508 ; CHECK-AVX512: # %bb.0: # %entry
509 ; CHECK-AVX512-NEXT: cmpl $18, %edx
510 ; CHECK-AVX512-NEXT: jl .LBB5_2
511 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
512 ; CHECK-AVX512-NEXT: movslq %edx, %rax
513 ; CHECK-AVX512-NEXT: movq %rax, (%rdi)
514 ; CHECK-AVX512-NEXT: movb %dl, 8(%rdi)
515 ; CHECK-AVX512-NEXT: .LBB5_2: # %if.end
516 ; CHECK-AVX512-NEXT: movq (%rdi), %rax
517 ; CHECK-AVX512-NEXT: movq %rax, (%rsi)
518 ; CHECK-AVX512-NEXT: movb 8(%rdi), %al
519 ; CHECK-AVX512-NEXT: movb %al, 8(%rsi)
520 ; CHECK-AVX512-NEXT: movl 9(%rdi), %eax
521 ; CHECK-AVX512-NEXT: movl %eax, 9(%rsi)
522 ; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax
523 ; CHECK-AVX512-NEXT: movw %ax, 13(%rsi)
524 ; CHECK-AVX512-NEXT: movb 15(%rdi), %al
525 ; CHECK-AVX512-NEXT: movb %al, 15(%rsi)
526 ; CHECK-AVX512-NEXT: retq
527 entry:
528 %cmp = icmp sgt i32 %x, 17
529 br i1 %cmp, label %if.then, label %if.end
530
531 if.then: ; preds = %entry
532 %conv = sext i32 %x to i64
533 %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
534 store i64 %conv, i64* %a, align 8
535 %conv1 = trunc i32 %x to i8
536 %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
537 store i8 %conv1, i8* %b, align 8
538 br label %if.end
539
540 if.end: ; preds = %if.then, %entry
541 %0 = bitcast %struct.S3* %s2 to i8*
542 %1 = bitcast %struct.S3* %s1 to i8*
543 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
544 ret void
545 }
546 %struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
547
548 ; Function Attrs: nounwind uwtable
549 define void @test_multiple_blocks(%struct.S4* nocapture %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
550 ; CHECK-LABEL: test_multiple_blocks:
551 ; CHECK: # %bb.0: # %entry
552 ; CHECK-NEXT: movl $0, 4(%rdi)
553 ; CHECK-NEXT: movl $0, 36(%rdi)
554 ; CHECK-NEXT: movups 16(%rdi), %xmm0
555 ; CHECK-NEXT: movups %xmm0, 16(%rsi)
556 ; CHECK-NEXT: movl 32(%rdi), %eax
557 ; CHECK-NEXT: movl %eax, 32(%rsi)
558 ; CHECK-NEXT: movl 36(%rdi), %eax
559 ; CHECK-NEXT: movl %eax, 36(%rsi)
560 ; CHECK-NEXT: movq 40(%rdi), %rax
561 ; CHECK-NEXT: movq %rax, 40(%rsi)
562 ; CHECK-NEXT: movl (%rdi), %eax
563 ; CHECK-NEXT: movl %eax, (%rsi)
564 ; CHECK-NEXT: movl 4(%rdi), %eax
565 ; CHECK-NEXT: movl %eax, 4(%rsi)
566 ; CHECK-NEXT: movq 8(%rdi), %rax
567 ; CHECK-NEXT: movq %rax, 8(%rsi)
568 ; CHECK-NEXT: retq
569 ;
570 ; DISABLED-LABEL: test_multiple_blocks:
571 ; DISABLED: # %bb.0: # %entry
572 ; DISABLED-NEXT: movl $0, 4(%rdi)
573 ; DISABLED-NEXT: movl $0, 36(%rdi)
574 ; DISABLED-NEXT: movups 16(%rdi), %xmm0
575 ; DISABLED-NEXT: movups %xmm0, 16(%rsi)
576 ; DISABLED-NEXT: movups 32(%rdi), %xmm0
577 ; DISABLED-NEXT: movups %xmm0, 32(%rsi)
578 ; DISABLED-NEXT: movups (%rdi), %xmm0
579 ; DISABLED-NEXT: movups %xmm0, (%rsi)
580 ; DISABLED-NEXT: retq
581 ;
582 ; CHECK-AVX2-LABEL: test_multiple_blocks:
583 ; CHECK-AVX2: # %bb.0: # %entry
584 ; CHECK-AVX2-NEXT: movl $0, 4(%rdi)
585 ; CHECK-AVX2-NEXT: movl $0, 36(%rdi)
586 ; CHECK-AVX2-NEXT: vmovups 16(%rdi), %xmm0
587 ; CHECK-AVX2-NEXT: vmovups %xmm0, 16(%rsi)
588 ; CHECK-AVX2-NEXT: movl 32(%rdi), %eax
589 ; CHECK-AVX2-NEXT: movl %eax, 32(%rsi)
590 ; CHECK-AVX2-NEXT: movl 36(%rdi), %eax
591 ; CHECK-AVX2-NEXT: movl %eax, 36(%rsi)
592 ; CHECK-AVX2-NEXT: movq 40(%rdi), %rax
593 ; CHECK-AVX2-NEXT: movq %rax, 40(%rsi)
594 ; CHECK-AVX2-NEXT: movl (%rdi), %eax
595 ; CHECK-AVX2-NEXT: movl %eax, (%rsi)
596 ; CHECK-AVX2-NEXT: movl 4(%rdi), %eax
597 ; CHECK-AVX2-NEXT: movl %eax, 4(%rsi)
598 ; CHECK-AVX2-NEXT: vmovups 8(%rdi), %xmm0
599 ; CHECK-AVX2-NEXT: vmovups %xmm0, 8(%rsi)
600 ; CHECK-AVX2-NEXT: movq 24(%rdi), %rax
601 ; CHECK-AVX2-NEXT: movq %rax, 24(%rsi)
602 ; CHECK-AVX2-NEXT: retq
603 ;
604 ; CHECK-AVX512-LABEL: test_multiple_blocks:
605 ; CHECK-AVX512: # %bb.0: # %entry
606 ; CHECK-AVX512-NEXT: movl $0, 4(%rdi)
607 ; CHECK-AVX512-NEXT: movl $0, 36(%rdi)
608 ; CHECK-AVX512-NEXT: vmovups 16(%rdi), %xmm0
609 ; CHECK-AVX512-NEXT: vmovups %xmm0, 16(%rsi)
610 ; CHECK-AVX512-NEXT: movl 32(%rdi), %eax
611 ; CHECK-AVX512-NEXT: movl %eax, 32(%rsi)
612 ; CHECK-AVX512-NEXT: movl 36(%rdi), %eax
613 ; CHECK-AVX512-NEXT: movl %eax, 36(%rsi)
614 ; CHECK-AVX512-NEXT: movq 40(%rdi), %rax
615 ; CHECK-AVX512-NEXT: movq %rax, 40(%rsi)
616 ; CHECK-AVX512-NEXT: movl (%rdi), %eax
617 ; CHECK-AVX512-NEXT: movl %eax, (%rsi)
618 ; CHECK-AVX512-NEXT: movl 4(%rdi), %eax
619 ; CHECK-AVX512-NEXT: movl %eax, 4(%rsi)
620 ; CHECK-AVX512-NEXT: vmovups 8(%rdi), %xmm0
621 ; CHECK-AVX512-NEXT: vmovups %xmm0, 8(%rsi)
622 ; CHECK-AVX512-NEXT: movq 24(%rdi), %rax
623 ; CHECK-AVX512-NEXT: movq %rax, 24(%rsi)
624 ; CHECK-AVX512-NEXT: retq
625 entry:
626 %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
627 store i32 0, i32* %b, align 4
628 %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
629 store i32 0, i32* %b3, align 4
630 %0 = bitcast %struct.S4* %s2 to i8*
631 %1 = bitcast %struct.S4* %s1 to i8*
632 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
633 ret void
634 }
635 %struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
636
637 ; Function Attrs: nounwind uwtable
638 define void @test_type16(%struct.S5* nocapture %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
639 ; CHECK-LABEL: test_type16:
640 ; CHECK: # %bb.0: # %entry
641 ; CHECK-NEXT: cmpl $18, %edx
642 ; CHECK-NEXT: jl .LBB7_2
643 ; CHECK-NEXT: # %bb.1: # %if.then
644 ; CHECK-NEXT: movw %dx, 2(%rdi)
645 ; CHECK-NEXT: .LBB7_2: # %if.end
646 ; CHECK-NEXT: movups (%r8), %xmm0
647 ; CHECK-NEXT: movups %xmm0, (%rcx)
648 ; CHECK-NEXT: movzwl (%rdi), %eax
649 ; CHECK-NEXT: movw %ax, (%rsi)
650 ; CHECK-NEXT: movzwl 2(%rdi), %eax
651 ; CHECK-NEXT: movw %ax, 2(%rsi)
652 ; CHECK-NEXT: movq 4(%rdi), %rax
653 ; CHECK-NEXT: movq %rax, 4(%rsi)
654 ; CHECK-NEXT: movl 12(%rdi), %eax
655 ; CHECK-NEXT: movl %eax, 12(%rsi)
656 ; CHECK-NEXT: retq
657 ;
658 ; DISABLED-LABEL: test_type16:
659 ; DISABLED: # %bb.0: # %entry
660 ; DISABLED-NEXT: cmpl $18, %edx
661 ; DISABLED-NEXT: jl .LBB7_2
662 ; DISABLED-NEXT: # %bb.1: # %if.then
663 ; DISABLED-NEXT: movw %dx, 2(%rdi)
664 ; DISABLED-NEXT: .LBB7_2: # %if.end
665 ; DISABLED-NEXT: movups (%r8), %xmm0
666 ; DISABLED-NEXT: movups %xmm0, (%rcx)
667 ; DISABLED-NEXT: movups (%rdi), %xmm0
668 ; DISABLED-NEXT: movups %xmm0, (%rsi)
669 ; DISABLED-NEXT: retq
670 ;
671 ; CHECK-AVX2-LABEL: test_type16:
672 ; CHECK-AVX2: # %bb.0: # %entry
673 ; CHECK-AVX2-NEXT: cmpl $18, %edx
674 ; CHECK-AVX2-NEXT: jl .LBB7_2
675 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
676 ; CHECK-AVX2-NEXT: movw %dx, 2(%rdi)
677 ; CHECK-AVX2-NEXT: .LBB7_2: # %if.end
678 ; CHECK-AVX2-NEXT: vmovups (%r8), %xmm0
679 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rcx)
680 ; CHECK-AVX2-NEXT: movzwl (%rdi), %eax
681 ; CHECK-AVX2-NEXT: movw %ax, (%rsi)
682 ; CHECK-AVX2-NEXT: movzwl 2(%rdi), %eax
683 ; CHECK-AVX2-NEXT: movw %ax, 2(%rsi)
684 ; CHECK-AVX2-NEXT: movq 4(%rdi), %rax
685 ; CHECK-AVX2-NEXT: movq %rax, 4(%rsi)
686 ; CHECK-AVX2-NEXT: movl 12(%rdi), %eax
687 ; CHECK-AVX2-NEXT: movl %eax, 12(%rsi)
688 ; CHECK-AVX2-NEXT: retq
689 ;
690 ; CHECK-AVX512-LABEL: test_type16:
691 ; CHECK-AVX512: # %bb.0: # %entry
692 ; CHECK-AVX512-NEXT: cmpl $18, %edx
693 ; CHECK-AVX512-NEXT: jl .LBB7_2
694 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then
695 ; CHECK-AVX512-NEXT: movw %dx, 2(%rdi)
696 ; CHECK-AVX512-NEXT: .LBB7_2: # %if.end
697 ; CHECK-AVX512-NEXT: vmovups (%r8), %xmm0
698 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rcx)
699 ; CHECK-AVX512-NEXT: movzwl (%rdi), %eax
700 ; CHECK-AVX512-NEXT: movw %ax, (%rsi)
701 ; CHECK-AVX512-NEXT: movzwl 2(%rdi), %eax
702 ; CHECK-AVX512-NEXT: movw %ax, 2(%rsi)
703 ; CHECK-AVX512-NEXT: movq 4(%rdi), %rax
704 ; CHECK-AVX512-NEXT: movq %rax, 4(%rsi)
705 ; CHECK-AVX512-NEXT: movl 12(%rdi), %eax
706 ; CHECK-AVX512-NEXT: movl %eax, 12(%rsi)
707 ; CHECK-AVX512-NEXT: retq
708 entry:
709 %cmp = icmp sgt i32 %x, 17
710 br i1 %cmp, label %if.then, label %if.end
711
712 if.then: ; preds = %entry
713 %conv = trunc i32 %x to i16
714 %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
715 store i16 %conv, i16* %b, align 2
716 br label %if.end
717
718 if.end: ; preds = %if.then, %entry
719 %0 = bitcast %struct.S5* %s3 to i8*
720 %1 = bitcast %struct.S5* %s4 to i8*
721 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
722 %2 = bitcast %struct.S5* %s2 to i8*
723 %3 = bitcast %struct.S5* %s1 to i8*
724 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
725 ret void
726 }
727
728 %struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
729
730 ; Function Attrs: nounwind uwtable
731 define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
732 ; CHECK-LABEL: test_stack:
733 ; CHECK: # %bb.0: # %entry
734 ; CHECK-NEXT: movl %esi, {{[0-9]+}}(%rsp)
735 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
736 ; CHECK-NEXT: movups %xmm0, (%rdi)
737 ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
738 ; CHECK-NEXT: movq %rax, 16(%rdi)
739 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
740 ; CHECK-NEXT: movl %eax, 24(%rdi)
741 ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
742 ; CHECK-NEXT: movl %eax, 28(%rdi)
743 ; CHECK-NEXT: movq %rdi, %rax
744 ; CHECK-NEXT: retq
745 ;
746 ; DISABLED-LABEL: test_stack:
747 ; DISABLED: # %bb.0: # %entry
748 ; DISABLED-NEXT: movl %esi, {{[0-9]+}}(%rsp)
749 ; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
750 ; DISABLED-NEXT: movups %xmm0, (%rdi)
751 ; DISABLED-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0
752 ; DISABLED-NEXT: movups %xmm0, 16(%rdi)
753 ; DISABLED-NEXT: movq %rdi, %rax
754 ; DISABLED-NEXT: retq
755 ;
756 ; CHECK-AVX2-LABEL: test_stack:
757 ; CHECK-AVX2: # %bb.0: # %entry
758 ; CHECK-AVX2-NEXT: movl %esi, {{[0-9]+}}(%rsp)
759 ; CHECK-AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
760 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi)
761 ; CHECK-AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax
762 ; CHECK-AVX2-NEXT: movq %rax, 16(%rdi)
763 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
764 ; CHECK-AVX2-NEXT: movl %eax, 24(%rdi)
765 ; CHECK-AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax
766 ; CHECK-AVX2-NEXT: movl %eax, 28(%rdi)
767 ; CHECK-AVX2-NEXT: movq %rdi, %rax
768 ; CHECK-AVX2-NEXT: retq
769 ;
770 ; CHECK-AVX512-LABEL: test_stack:
771 ; CHECK-AVX512: # %bb.0: # %entry
772 ; CHECK-AVX512-NEXT: movl %esi, {{[0-9]+}}(%rsp)
773 ; CHECK-AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %xmm0
774 ; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi)
775 ; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rax
776 ; CHECK-AVX512-NEXT: movq %rax, 16(%rdi)
777 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
778 ; CHECK-AVX512-NEXT: movl %eax, 24(%rdi)
779 ; CHECK-AVX512-NEXT: movl {{[0-9]+}}(%rsp), %eax
780 ; CHECK-AVX512-NEXT: movl %eax, 28(%rdi)
781 ; CHECK-AVX512-NEXT: movq %rdi, %rax
782 ; CHECK-AVX512-NEXT: retq
783 entry:
784 %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
785 %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
786 store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
787 %0 = bitcast %struct.S6* %agg.result to i8*
788 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
789 ret void
790 }
791
792 ; Function Attrs: nounwind uwtable
793 define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
794 ; CHECK-LABEL: test_limit_all:
795 ; CHECK: # %bb.0: # %entry
796 ; CHECK-NEXT: pushq %rbp
797 ; CHECK-NEXT: .cfi_def_cfa_offset 16
798 ; CHECK-NEXT: pushq %r15
799 ; CHECK-NEXT: .cfi_def_cfa_offset 24
800 ; CHECK-NEXT: pushq %r14
801 ; CHECK-NEXT: .cfi_def_cfa_offset 32
802 ; CHECK-NEXT: pushq %r12
803 ; CHECK-NEXT: .cfi_def_cfa_offset 40
804 ; CHECK-NEXT: pushq %rbx
805 ; CHECK-NEXT: .cfi_def_cfa_offset 48
806 ; CHECK-NEXT: .cfi_offset %rbx, -48
807 ; CHECK-NEXT: .cfi_offset %r12, -40
808 ; CHECK-NEXT: .cfi_offset %r14, -32
809 ; CHECK-NEXT: .cfi_offset %r15, -24
810 ; CHECK-NEXT: .cfi_offset %rbp, -16
811 ; CHECK-NEXT: movq %r8, %r15
812 ; CHECK-NEXT: movq %rcx, %r14
813 ; CHECK-NEXT: movl %edx, %ebp
814 ; CHECK-NEXT: movq %rsi, %r12
815 ; CHECK-NEXT: movq %rdi, %rbx
816 ; CHECK-NEXT: movl %r9d, 12(%rdi)
817 ; CHECK-NEXT: callq bar
818 ; CHECK-NEXT: cmpl $18, %ebp
819 ; CHECK-NEXT: jl .LBB9_2
820 ; CHECK-NEXT: # %bb.1: # %if.then
821 ; CHECK-NEXT: movl %ebp, 4(%rbx)
822 ; CHECK-NEXT: movq %rbx, %rdi
823 ; CHECK-NEXT: callq bar
824 ; CHECK-NEXT: .LBB9_2: # %if.end
825 ; CHECK-NEXT: movups (%r15), %xmm0
826 ; CHECK-NEXT: movups %xmm0, (%r14)
827 ; CHECK-NEXT: movups (%rbx), %xmm0
828 ; CHECK-NEXT: movups %xmm0, (%r12)
829 ; CHECK-NEXT: popq %rbx
830 ; CHECK-NEXT: popq %r12
831 ; CHECK-NEXT: popq %r14
832 ; CHECK-NEXT: popq %r15
833 ; CHECK-NEXT: popq %rbp
834 ; CHECK-NEXT: retq
835 ;
836 ; DISABLED-LABEL: test_limit_all:
837 ; DISABLED: # %bb.0: # %entry
838 ; DISABLED-NEXT: pushq %rbp
839 ; DISABLED-NEXT: .cfi_def_cfa_offset 16
840 ; DISABLED-NEXT: pushq %r15
841 ; DISABLED-NEXT: .cfi_def_cfa_offset 24
842 ; DISABLED-NEXT: pushq %r14
843 ; DISABLED-NEXT: .cfi_def_cfa_offset 32
844 ; DISABLED-NEXT: pushq %r12
845 ; DISABLED-NEXT: .cfi_def_cfa_offset 40
846 ; DISABLED-NEXT: pushq %rbx
847 ; DISABLED-NEXT: .cfi_def_cfa_offset 48
848 ; DISABLED-NEXT: .cfi_offset %rbx, -48
849 ; DISABLED-NEXT: .cfi_offset %r12, -40
850 ; DISABLED-NEXT: .cfi_offset %r14, -32
851 ; DISABLED-NEXT: .cfi_offset %r15, -24
852 ; DISABLED-NEXT: .cfi_offset %rbp, -16
853 ; DISABLED-NEXT: movq %r8, %r15
854 ; DISABLED-NEXT: movq %rcx, %r14
855 ; DISABLED-NEXT: movl %edx, %ebp
856 ; DISABLED-NEXT: movq %rsi, %r12
857 ; DISABLED-NEXT: movq %rdi, %rbx
858 ; DISABLED-NEXT: movl %r9d, 12(%rdi)
859 ; DISABLED-NEXT: callq bar
860 ; DISABLED-NEXT: cmpl $18, %ebp
861 ; DISABLED-NEXT: jl .LBB9_2
862 ; DISABLED-NEXT: # %bb.1: # %if.then
863 ; DISABLED-NEXT: movl %ebp, 4(%rbx)
864 ; DISABLED-NEXT: movq %rbx, %rdi
865 ; DISABLED-NEXT: callq bar
866 ; DISABLED-NEXT: .LBB9_2: # %if.end
867 ; DISABLED-NEXT: movups (%r15), %xmm0
868 ; DISABLED-NEXT: movups %xmm0, (%r14)
869 ; DISABLED-NEXT: movups (%rbx), %xmm0
870 ; DISABLED-NEXT: movups %xmm0, (%r12)
871 ; DISABLED-NEXT: popq %rbx
872 ; DISABLED-NEXT: popq %r12
873 ; DISABLED-NEXT: popq %r14
874 ; DISABLED-NEXT: popq %r15
875 ; DISABLED-NEXT: popq %rbp
876 ; DISABLED-NEXT: retq
877 ;
878 ; CHECK-AVX2-LABEL: test_limit_all:
879 ; CHECK-AVX2: # %bb.0: # %entry
880 ; CHECK-AVX2-NEXT: pushq %rbp
881 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 16
882 ; CHECK-AVX2-NEXT: pushq %r15
883 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 24
884 ; CHECK-AVX2-NEXT: pushq %r14
885 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
886 ; CHECK-AVX2-NEXT: pushq %r12
887 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 40
888 ; CHECK-AVX2-NEXT: pushq %rbx
889 ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
890 ; CHECK-AVX2-NEXT: .cfi_offset %rbx, -48
891 ; CHECK-AVX2-NEXT: .cfi_offset %r12, -40
892 ; CHECK-AVX2-NEXT: .cfi_offset %r14, -32
893 ; CHECK-AVX2-NEXT: .cfi_offset %r15, -24
894 ; CHECK-AVX2-NEXT: .cfi_offset %rbp, -16
895 ; CHECK-AVX2-NEXT: movq %r8, %r15
896 ; CHECK-AVX2-NEXT: movq %rcx, %r14
897 ; CHECK-AVX2-NEXT: movl %edx, %ebp
898 ; CHECK-AVX2-NEXT: movq %rsi, %r12
899 ; CHECK-AVX2-NEXT: movq %rdi, %rbx
900 ; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi)
901 ; CHECK-AVX2-NEXT: callq bar
902 ; CHECK-AVX2-NEXT: cmpl $18, %ebp
903 ; CHECK-AVX2-NEXT: jl .LBB9_2
904 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then
905 ; CHECK-AVX2-NEXT: movl %ebp, 4(%rbx)
906 ; CHECK-AVX2-NEXT: movq %rbx, %rdi
907 ; CHECK-AVX2-NEXT: callq bar
908 ; CHECK-AVX2-NEXT: .LBB9_2: # %if.end
909 ; CHECK-AVX2-NEXT: vmovups (%r15), %xmm0
910 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%r14)
911 ; CHECK-AVX2-NEXT: vmovups (%rbx), %xmm0
912 ; CHECK-AVX2-NEXT: vmovups %xmm0, (%r12)
913 ; CHECK-AVX2-N