llvm.org GIT mirror llvm / 0028f6a
MachineInstr: Reason locally about some memory objects before going to AA. This addresses a FIXME in MachineInstr::mayAlias. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310825 91177308-0d34-0410-b5e6-96231b3b80d8 Balaram Makam 2 years ago
12 changed file(s) with 103 addition(s) and 80 deletion(s). Raw diff Collapse all Expand all
16621662 bool UseTBAA) {
16631663 const MachineFunction *MF = getParent()->getParent();
16641664 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1665 const MachineFrameInfo &MFI = MF->getFrameInfo();
16651666
16661667 // If neither instruction stores to memory, they can't alias in any
16671668 // meaningful way, even if they read from the same address.
16721673 if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
16731674 return false;
16741675
1675 if (!AA)
1676 return true;
1677
16781676 // FIXME: Need to handle multiple memory operands to support all targets.
16791677 if (!hasOneMemOperand() || !Other.hasOneMemOperand())
16801678 return true;
16811679
16821680 MachineMemOperand *MMOa = *memoperands_begin();
16831681 MachineMemOperand *MMOb = *Other.memoperands_begin();
1684
1685 if (!MMOa->getValue() || !MMOb->getValue())
1686 return true;
16871682
16881683 // The following interface to AA is fashioned after DAGCombiner::isAlias
16891684 // and operates with MachineMemOperand offset with some important
16971692 // - There should never be any negative offsets here.
16981693 //
16991694 // FIXME: Modify API to hide this math from "user"
1700 // FIXME: Even before we go to AA we can reason locally about some
1695 // Even before we go to AA we can reason locally about some
17011696 // memory objects. It can save compile time, and possibly catch some
17021697 // corner cases not currently covered.
17031698
1704 assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
1705 assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
1706
1707 int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
1708 int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
1709 int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
1710
1711 AliasResult AAResult =
1712 AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
1713 UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
1714 MemoryLocation(MMOb->getValue(), Overlapb,
1715 UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
1699 int64_t OffsetA = MMOa->getOffset();
1700 int64_t OffsetB = MMOb->getOffset();
1701
1702 assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
1703 assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
1704
1705 int64_t MinOffset = std::min(OffsetA, OffsetB);
1706 int64_t WidthA = MMOa->getSize();
1707 int64_t WidthB = MMOb->getSize();
1708 const Value *ValA = MMOa->getValue();
1709 const Value *ValB = MMOb->getValue();
1710 bool SameVal = (ValA && ValB && (ValA == ValB));
1711 if (!SameVal) {
1712 const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
1713 const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
1714 if (PSVa && PSVa->isConstant(&MFI))
1715 return false;
1716 if (PSVb && PSVb->isConstant(&MFI))
1717 return false;
1718 if (PSVa && PSVb && (PSVa == PSVb))
1719 SameVal = true;
1720 }
1721
1722 if (SameVal) {
1723 int64_t MaxOffset = std::max(OffsetA, OffsetB);
1724 int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
1725 return (MinOffset + LowWidth > MaxOffset);
1726 }
1727
1728 if (!AA)
1729 return true;
1730
1731 if (!ValA || !ValB)
1732 return true;
1733
1734 int64_t Overlapa = WidthA + OffsetA - MinOffset;
1735 int64_t Overlapb = WidthB + OffsetB - MinOffset;
1736
1737 AliasResult AAResult = AA->alias(
1738 MemoryLocation(ValA, Overlapa, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
1739 MemoryLocation(ValB, Overlapb,
1740 UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
17161741
17171742 return (AAResult != NoAlias);
17181743 }
15301530 ; CHECK-LABEL: merge_zr64_unalign:
15311531 ; CHECK: // %entry
15321532 ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}]
1533 ; STRICTALIGN: strb wzr,
15341533 ; STRICTALIGN: strb
15351534 ; STRICTALIGN: strb
15361535 ; STRICTALIGN: strb
15461545 ; STRICTALIGN: strb
15471546 ; STRICTALIGN: strb
15481547 ; STRICTALIGN: strb
1548 ; STRICTALIGN: strb
15491549 ; CHECK-NEXT: ret
15501550 entry:
15511551 store <2 x i64> zeroinitializer, <2 x i64>* %p, align 1
451451 ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
452452 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
453453
454 ; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4
454455 ; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8
455 ; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4
456456
457457
458458 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8
459459 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12
460460
461 ; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4
461462 ; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
462 ; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4
463463
464464 ; GCN-NEXT: s_swappc_b64
465465 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
486486 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
487487
488488 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
489 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
489490 ; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
490 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
491491 ; GCN-NEXT: s_swappc_b64
492492 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
493493 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
178178 ; GCN-NOHSA: buffer_load_dwordx2
179179 ; GCN-HSA: flat_load_dwordx2
180180
181 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
181182 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
182 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
183183 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
184184 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
185185 ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1
187187 ; TODO: This should use DST, but for some there are redundant MOVs
188188 ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
189189 ; EGCM: 16
190 ; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
191 ; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal
192190 define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
193191 entry:
194192 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
201199 ; GCN-NOHSA: buffer_load_dwordx2
202200 ; GCN-HSA: flat_load_dwordx2
203201
202 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
204203 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}
205 ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}}
206204 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}},
207205 ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}},
208206 ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1
351351
352352 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
353353 ; TODO: These should use DST, but for some there are redundant MOVs
354 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
365 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
366 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
367 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
368 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
369 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
354 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
365 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
366 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
367 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
368 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
369 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
370370 ; EG-DAG: 8
371371 ; EG-DAG: 8
372372 ; EG-DAG: 8
529529 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
530530 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
531531 ; EG-DAG: LDS_WRITE
532 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
533532 define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
534533 %a = load i16, i16 addrspace(3)* %in
535534 %ext = zext i16 %a to i64
571570 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
572571 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
573572 ; EG-DAG: LDS_WRITE
574 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
575573 define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
576574 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
577575 %ext = zext <1 x i16> %load to <1 x i64>
None ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
1 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s
0 ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC
1 ; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC
22
33 @.str = private constant [1 x i8] zeroinitializer, align 1
44
55 define void @g() {
66 entry:
77 ;CHECK: [sp, #8]
8 ;CHECK: [sp, #12]
9 ;CHECK: [sp]
8 ;NOREGALLOC: [sp, #12]
9 ;NOREGALLOC: [sp]
10 ;REGALLOC: [sp]
11 ;REGALLOC: [sp, #12]
1012 tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
1113 ret void
1214 }
123123 ; BE-LABEL: i56_and_or:
124124 ; BE: @ BB#0:
125125 ; BE-NEXT: mov r1, r0
126 ; BE-NEXT: ldr r12, [r0]
127 ; BE-NEXT: ldrh r2, [r1, #4]!
126128 ; BE-NEXT: mov r3, #128
127 ; BE-NEXT: ldrh r2, [r1, #4]!
128129 ; BE-NEXT: strb r3, [r1, #2]
129 ; BE-NEXT: ldr r12, [r0]
130130 ; BE-NEXT: lsl r2, r2, #8
131131 ; BE-NEXT: orr r2, r2, r12, lsl #24
132132 ; BE-NEXT: orr r2, r2, #384
117117 ; X64: # BB#0:
118118 ; X64-NEXT: movzwl 4(%rdi), %eax
119119 ; X64-NEXT: movzbl 6(%rdi), %ecx
120 ; X64-NEXT: movl (%rdi), %edx
121120 ; X64-NEXT: movb %cl, 6(%rdi)
122121 ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX
123122 ; X64-NEXT: shll $16, %ecx
124123 ; X64-NEXT: orl %eax, %ecx
125124 ; X64-NEXT: shlq $32, %rcx
126 ; X64-NEXT: orq %rcx, %rdx
127 ; X64-NEXT: orq $384, %rdx # imm = 0x180
128 ; X64-NEXT: movl %edx, (%rdi)
129 ; X64-NEXT: shrq $32, %rdx
130 ; X64-NEXT: movw %dx, 4(%rdi)
125 ; X64-NEXT: movl (%rdi), %eax
126 ; X64-NEXT: orq %rcx, %rax
127 ; X64-NEXT: orq $384, %rax # imm = 0x180
128 ; X64-NEXT: movl %eax, (%rdi)
129 ; X64-NEXT: shrq $32, %rax
130 ; X64-NEXT: movw %ax, 4(%rdi)
131131 ; X64-NEXT: retq
132132 %aa = load i56, i56* %a, align 1
133133 %b = or i56 %aa, 384
149149 ; X64: # BB#0:
150150 ; X64-NEXT: movzwl 4(%rdi), %eax
151151 ; X64-NEXT: movzbl 6(%rdi), %ecx
152 ; X64-NEXT: movl (%rdi), %edx
153152 ; X64-NEXT: movb %cl, 6(%rdi)
154153 ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX
155154 ; X64-NEXT: shll $16, %ecx
156155 ; X64-NEXT: orl %eax, %ecx
157156 ; X64-NEXT: shlq $32, %rcx
158 ; X64-NEXT: orq %rcx, %rdx
159 ; X64-NEXT: orq $384, %rdx # imm = 0x180
160 ; X64-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80
161 ; X64-NEXT: andq %rdx, %rax
162 ; X64-NEXT: movl %eax, (%rdi)
163 ; X64-NEXT: shrq $32, %rax
164 ; X64-NEXT: movw %ax, 4(%rdi)
157 ; X64-NEXT: movl (%rdi), %eax
158 ; X64-NEXT: orq %rcx, %rax
159 ; X64-NEXT: orq $384, %rax # imm = 0x180
160 ; X64-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80
161 ; X64-NEXT: andq %rax, %rcx
162 ; X64-NEXT: movl %ecx, (%rdi)
163 ; X64-NEXT: shrq $32, %rcx
164 ; X64-NEXT: movw %cx, 4(%rdi)
165165 ; X64-NEXT: retq
166166 %b = load i56, i56* %a, align 1
167167 %c = and i56 %b, -128
187187 ; X64-NEXT: movzbl %sil, %eax
188188 ; X64-NEXT: movzwl 4(%rdi), %ecx
189189 ; X64-NEXT: movzbl 6(%rdi), %edx
190 ; X64-NEXT: movl (%rdi), %esi
191190 ; X64-NEXT: movb %dl, 6(%rdi)
192191 ; X64-NEXT: # kill: %EDX %EDX %RDX %RDX
193192 ; X64-NEXT: shll $16, %edx
194193 ; X64-NEXT: orl %ecx, %edx
195194 ; X64-NEXT: shlq $32, %rdx
196 ; X64-NEXT: orq %rdx, %rsi
195 ; X64-NEXT: movl (%rdi), %ecx
196 ; X64-NEXT: orq %rdx, %rcx
197197 ; X64-NEXT: shlq $13, %rax
198 ; X64-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF
199 ; X64-NEXT: andq %rsi, %rcx
200 ; X64-NEXT: orq %rax, %rcx
201 ; X64-NEXT: movl %ecx, (%rdi)
202 ; X64-NEXT: shrq $32, %rcx
203 ; X64-NEXT: movw %cx, 4(%rdi)
198 ; X64-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF
199 ; X64-NEXT: andq %rcx, %rdx
200 ; X64-NEXT: orq %rax, %rdx
201 ; X64-NEXT: movl %edx, (%rdi)
202 ; X64-NEXT: shrq $32, %rdx
203 ; X64-NEXT: movw %dx, 4(%rdi)
204204 ; X64-NEXT: retq
205205 %extbit = zext i1 %bit to i56
206206 %b = load i56, i56* %a, align 1
1111 define void @t1(i32 %argc, i8** %argv) nounwind {
1212 entry:
1313 ; SSE2-Darwin-LABEL: t1:
14 ; SSE2-Darwin: movaps _.str, %xmm0
15 ; SSE2-Darwin: movaps %xmm0
1416 ; SSE2-Darwin: movsd _.str+16, %xmm0
1517 ; SSE2-Darwin: movsd %xmm0, 16(%esp)
16 ; SSE2-Darwin: movaps _.str, %xmm0
17 ; SSE2-Darwin: movaps %xmm0
1818 ; SSE2-Darwin: movb $0, 24(%esp)
1919
2020 ; SSE2-Mingw32-LABEL: t1:
21 ; SSE2-Mingw32: movaps _.str, %xmm0
22 ; SSE2-Mingw32: movups %xmm0
2123 ; SSE2-Mingw32: movsd _.str+16, %xmm0
2224 ; SSE2-Mingw32: movsd %xmm0, 16(%esp)
23 ; SSE2-Mingw32: movaps _.str, %xmm0
24 ; SSE2-Mingw32: movups %xmm0
2525 ; SSE2-Mingw32: movb $0, 24(%esp)
2626
2727 ; SSE1-LABEL: t1:
2828 ; SSE1: movaps _.str, %xmm0
29 ; SSE1: movb $0, 24(%esp)
2930 ; SSE1: movaps %xmm0
30 ; SSE1: movb $0, 24(%esp)
3131 ; SSE1: movl $0, 20(%esp)
3232 ; SSE1: movl $0, 16(%esp)
3333
2424 ; CHECK-NEXT: xorl %eax, %eax
2525 ; CHECK-NEXT: movaps %xmm0, (%esp)
2626 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
27 ; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
2728 ; CHECK-NEXT: movaps %xmm1, (%esp)
28 ; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
2929 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
3030 ; CHECK-NEXT: movl %ebp, %esp
3131 ; CHECK-NEXT: popl %ebp
1515 ; CHECK-NEXT: movl {{\.LCPI.*}}, %eax
1616 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1717 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
18 ; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp)
1819 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
1920 ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
20 ; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp)
2121 ; CHECK-NEXT: jmp .LBB0_1
2222 ; CHECK-NEXT: .p2align 4, 0x90
2323 ; CHECK-NEXT: .LBB0_2: # %forbody