llvm.org GIT mirror llvm / cc47f49
[X86] Add missing properties on llvm.x86.sse.{st,ld}mxcsr Summary: llvm.x86.sse.stmxcsr only writes to memory. llvm.x86.sse.ldmxcsr only reads from memory, and might generate an FPE. Reviewers: craig.topper, RKSimon Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62896 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363773 91177308-0d34-0410-b5e6-96231b3b80d8 Clement Courbet 30 days ago
24 changed file(s) with 60 addition(s) and 48 deletion(s). Raw diff Collapse all Expand all
276276 // Control register.
277277 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
278278 def int_x86_sse_stmxcsr :
279 Intrinsic<[], [llvm_ptr_ty], []>;
279 Intrinsic<[], [llvm_ptr_ty],
280 [IntrWriteMem, IntrArgMemOnly,
281 // This prevents reordering with ldmxcsr
282 IntrHasSideEffects]>;
280283 def int_x86_sse_ldmxcsr :
281 Intrinsic<[], [llvm_ptr_ty], []>;
284 Intrinsic<[], [llvm_ptr_ty],
285 [IntrReadMem, IntrArgMemOnly, IntrHasSideEffects,
286 // FIXME: LDMXCSR does not actualy write to memory,
287 // but Fast and DAG Isel both use writing to memory
288 // as a proxy for having side effects.
289 IntrWriteMem]>;
282290 }
283291
284292 // Misc.
31733173 // SSE 1 & 2 - Load/Store XCSR register
31743174 //===----------------------------------------------------------------------===//
31753175
3176 let mayLoad=1, hasSideEffects=1 in
31763177 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
31773178 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
31783179 VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
3180 let mayStore=1, hasSideEffects=1 in
31793181 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
31803182 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
31813183 VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
31823184
3185 let mayLoad=1, hasSideEffects=1 in
31833186 def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
31843187 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
31853188 TB, Sched<[WriteLDMXCSR]>;
3189 let mayStore=1, hasSideEffects=1 in
31863190 def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
31873191 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
31883192 TB, Sched<[WriteSTMXCSR]>;
229229 # CHECK-NEXT: 1 70 35.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 34 17.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 1 34 17.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 1 5 2.50 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 1 5 2.50 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 5 5.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 1 5 5.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 1 70 35.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 34 17.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 1 34 17.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 1 15 7.50 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 1 15 7.50 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 5 5.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 1 5 5.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 5 5.00 subss %xmm0, %xmm2
2323
2424 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
2525 # CHECK-NEXT: 1 100 0.50 * * U int3
26 # CHECK-NEXT: 2 1 18.00 * * U stmxcsr (%rsp)
26 # CHECK-NEXT: 2 1 18.00 * U stmxcsr (%rsp)
2727
2828 # CHECK: Timeline view:
2929 # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
3939 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
4040
4141 # CHECK: [0] [1] [2] [3]
42 # CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3
43 # CHECK-NEXT: 1. 2 100.5 0.0 0.0 stmxcsr (%rsp)
42 # CHECK-NEXT: 0. 2 51.5 0.5 0.0 int3
43 # CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
12171217 # CHECK-NEXT: 1 7 1.50 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 5 1.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 2 5 1.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 1 5 0.50 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 1 5 0.50 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 1 1 1.50 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 1 6 2.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 1 14 4.50 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 9 4.50 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 1 14 4.50 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 2 1 18.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 2 1 18.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 5 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 1 10 1.50 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 2 5 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 1 14 4.50 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 9 4.50 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 1 14 4.50 * divss (%rax), %xmm2
232 # CHECK-NEXT: 1 5 0.50 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 1 5 0.50 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 2 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 2 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 1 7 1.50 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 1 14 4.50 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 9 4.50 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 1 14 4.50 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 2 1 18.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 2 1 18.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 5 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 1 10 1.50 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 5 1.00 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 6 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 5 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 3 7 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 3 7 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 2 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 3 7 2.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 3 8 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 21 14.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 11 4.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 16 7.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 3 2 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 3 2 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 8 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 16 5.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 16 5.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 3 7 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 3 7 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 8 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 16 7.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 11 4.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 16 7.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 3 2 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 3 2 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 8 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
2323
2424 # CHECK: [1] [2] [3] [4] [5] [6] Instructions:
2525 # CHECK-NEXT: 1 100 0.50 * * U int3
26 # CHECK-NEXT: 1 1 1.00 * * U stmxcsr (%rsp)
26 # CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rsp)
2727
2828 # CHECK: Timeline view:
2929 # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
3939 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
4040
4141 # CHECK: [0] [1] [2] [3]
42 # CHECK-NEXT: 0. 2 1.0 0.5 0.0 int3
43 # CHECK-NEXT: 1. 2 100.5 0.0 0.0 stmxcsr (%rsp)
42 # CHECK-NEXT: 0. 2 51.0 0.5 0.0 int3
43 # CHECK-NEXT: 1. 2 151.0 0.0 0.0 stmxcsr (%rsp)
12171217 # CHECK-NEXT: 1 6 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 1 3 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 1 3 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 1 6 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 1 32 27.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 21 21.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 1 26 21.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 1 1 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 1 1 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 1 8 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 2 3 2.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 1 24 19.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 19 19.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 1 24 19.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 1 3 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 0.50 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 2 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 1 7 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 1 26 21.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 21 21.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 1 26 21.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 1 1 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 1 8 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 4 5 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 4 5 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 3 8 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 3 9 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 27 21.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 14 14.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 20 14.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 4 5 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 4 5 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 9 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 20 14.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 4 5 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 4 5 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 9 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 20 14.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 14 14.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 20 14.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 4 5 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 9 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 3 7 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 3 7 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 2 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 3 8 2.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 3 9 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 21 14.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 11 7.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 16 7.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 3 2 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 3 2 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 9 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 19 7.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 13 7.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 18 7.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 3 7 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 3 7 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 9 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 17 7.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 11 7.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 16 7.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 3 2 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 3 2 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 9 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
229229 # CHECK-NEXT: 1 42 39.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 19 17.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 1 22 17.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 1 3 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 1 3 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 1 6 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 1 44 40.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 1 23 20.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 1 1 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 1 1 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 1 6 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 4 5 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 4 5 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 3 8 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 3 9 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 27 21.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 14 14.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 20 14.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 4 5 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 4 5 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 9 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 20 14.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 4 5 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 4 5 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 9 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 20 14.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 14 14.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 20 14.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 4 5 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 9 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 3 7 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 3 7 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 2 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 2 7 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 2 8 0.50 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 23 6.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 3 2 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 3 2 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 17 5.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 3 7 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 3 7 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 18 3.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 3 2 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 3 2 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 2 7 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 3 7 1.00 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 3 7 1.00 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 2 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 2 7 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 2 8 0.50 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 2 23 6.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 3 2 1.00 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 3 2 1.00 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 2 17 5.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 11 3.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 3 7 1.00 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 3 7 1.00 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 2 18 3.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 3 2 1.00 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 3 2 1.00 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2
12171217 # CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
12181218 # CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
12191219 # CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %ymm2
1220 # CHECK-NEXT: 1 100 0.25 * * U vldmxcsr (%rax)
1220 # CHECK-NEXT: 1 100 0.25 * U vldmxcsr (%rax)
12211221 # CHECK-NEXT: 1 100 0.25 * * U vmaskmovdqu %xmm0, %xmm1
12221222 # CHECK-NEXT: 1 8 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2
12231223 # CHECK-NEXT: 2 8 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
16691669 # CHECK-NEXT: 1 27 20.00 * vsqrtsd (%rax), %xmm1, %xmm2
16701670 # CHECK-NEXT: 1 20 20.00 vsqrtss %xmm0, %xmm1, %xmm2
16711671 # CHECK-NEXT: 1 27 20.00 * vsqrtss (%rax), %xmm1, %xmm2
1672 # CHECK-NEXT: 1 100 0.25 * * U vstmxcsr (%rax)
1672 # CHECK-NEXT: 1 100 0.25 * U vstmxcsr (%rax)
16731673 # CHECK-NEXT: 1 3 1.00 vsubpd %xmm0, %xmm1, %xmm2
16741674 # CHECK-NEXT: 1 10 1.00 * vsubpd (%rax), %xmm1, %xmm2
16751675 # CHECK-NEXT: 1 3 1.00 vsubpd %ymm0, %ymm1, %ymm2
229229 # CHECK-NEXT: 1 22 1.00 * divps (%rax), %xmm2
230230 # CHECK-NEXT: 1 15 1.00 divss %xmm0, %xmm2
231231 # CHECK-NEXT: 1 22 1.00 * divss (%rax), %xmm2
232 # CHECK-NEXT: 1 100 0.25 * * U ldmxcsr (%rax)
232 # CHECK-NEXT: 1 100 0.25 * U ldmxcsr (%rax)
233233 # CHECK-NEXT: 1 100 0.25 * * U maskmovq %mm0, %mm1
234234 # CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
235235 # CHECK-NEXT: 1 10 1.00 * maxps (%rax), %xmm2
304304 # CHECK-NEXT: 1 27 20.00 * sqrtps (%rax), %xmm2
305305 # CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
306306 # CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2
307 # CHECK-NEXT: 1 100 0.25 * * U stmxcsr (%rax)
307 # CHECK-NEXT: 1 100 0.25 * U stmxcsr (%rax)
308308 # CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
309309 # CHECK-NEXT: 1 10 1.00 * subps (%rax), %xmm2
310310 # CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2