llvm.org GIT mirror llvm / 57b2164
[DAG] Refactor DAGCombiner::ReassociateOps Summary: Extract the logic for doing reassociations from DAGCombiner::reassociateOps into a helper function DAGCombiner::reassociateOpsCommutative, and use that helper to trigger reassociation on the original operand order, or the commuted operand order. Codegen is not identical since the operand order will be different when doing the reassociations for the commuted case. That causes some unfortunate churn in some test cases. Apart from that this should be NFC. Reviewers: spatel, craig.topper, tstellar Reviewed By: spatel Subscribers: dmgreen, dschuff, jvesely, nhaehnle, javed.absar, sbc100, jgravelle-google, hiraditya, aheejin, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D61199 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359476 91177308-0d34-0410-b5e6-96231b3b80d8 Bjorn Pettersson 1 year, 4 months ago
19 changed file(s) with 204 addition(s) and 204 deletion(s). Raw diff Collapse all Expand all
457457 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
458458
459459 SDValue XformToShuffleWithZero(SDNode *N);
460 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
460 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
461 SDValue N1);
462 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
461463 SDValue N1, SDNodeFlags Flags);
462464
463465 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
9991001 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
10001002 }
10011003
1002 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1004 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1005 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1006 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1007 SDValue N0, SDValue N1) {
1008 EVT VT = N0.getValueType();
1009
1010 if (N0.getOpcode() != Opc)
1011 return SDValue();
1012
1013 // Don't reassociate reductions.
1014 if (N0->getFlags().hasVectorReduction())
1015 return SDValue();
1016
1017 if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1018 if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1019 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1020 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1021 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1022 return SDValue();
1023 }
1024 if (N0.hasOneUse()) {
1025 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1026 // iff (op x, c1) has one use
1027 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1028 if (!OpNode.getNode())
1029 return SDValue();
1030 AddToWorklist(OpNode.getNode());
1031 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1032 }
1033 }
1034 return SDValue();
1035 }
1036
1037 // Try to reassociate commutative binops.
1038 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
10031039 SDValue N1, SDNodeFlags Flags) {
1040 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
10041041 // Don't reassociate reductions.
10051042 if (Flags.hasVectorReduction())
10061043 return SDValue();
1007
1008 EVT VT = N0.getValueType();
1009 if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
1010 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1011 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1012 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
1013 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
1014 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1015 return SDValue();
1016 }
1017 if (N0.hasOneUse()) {
1018 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
1019 // use
1020 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1021 if (!OpNode.getNode())
1022 return SDValue();
1023 AddToWorklist(OpNode.getNode());
1024 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1025 }
1026 }
1027 }
1028
1029 if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
1030 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
1031 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1032 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
1033 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
1034 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
1035 return SDValue();
1036 }
1037 if (N1.hasOneUse()) {
1038 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
1039 // use
1040 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
1041 if (!OpNode.getNode())
1042 return SDValue();
1043 AddToWorklist(OpNode.getNode());
1044 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
1045 }
1046 }
1047 }
1048
1044 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1045 return Combined;
1046 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1047 return Combined;
10491048 return SDValue();
10501049 }
10511050
21922191 return NewSel;
21932192
21942193 // reassociate add
2195 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2194 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
21962195 return RADD;
21972196
21982197 // fold ((0-A) + B) -> B-A
32743273 N0.getOperand(1), N1));
32753274
32763275 // reassociate mul
3277 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3276 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
32783277 return RMUL;
32793278
32803279 return SDValue();
47984797 return NewSel;
47994798
48004799 // reassociate and
4801 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4800 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
48024801 return RAND;
48034802
48044803 // Try to convert a constant mask AND into a shuffle clear mask.
55245523 return BSwap;
55255524
55265525 // reassociate or
5527 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5526 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
55285527 return ROR;
55295528
55305529 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
64116410 return NewSel;
64126411
64136412 // reassociate xor
6414 if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6413 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
64156414 return RXOR;
64166415
64176416 // fold !(x cc y) -> (x !cc y)
1818 ; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
1919 ; CHECK-NEXT: b.ne
2020 ; Next BB
21 ; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
22 ; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
21 ; CHECK: add [[BLOCKBASE1:x[0-9]+]], [[I1]], [[BLOCKBASE]]
22 ; CHECK-NEXT: add [[BLOCKBASE2:x[0-9]+]], [[I2]], [[BLOCKBASE]]
2323 ; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
2424 ; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
2525 ; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
183183 ; VI: s_and_b32 s1, s0, 0xffff0000
184184 ; VI: s_add_i32 s0, s0, 1
185185 ; VI: s_and_b32 s0, s0, 0xffff
186 ; VI: s_or_b32 s0, s0, s1
186 ; VI: s_or_b32 s0, s1, s0
187187 ; VI: s_add_i32 s0, s0, 0x10000
188188 ; VI: v_mov_b32_e32 v0, s0
189189
190190 ; SI: s_lshl_b32 s1, s1, 16
191191 ; SI: s_add_i32 s0, s0, 1
192192 ; SI: s_and_b32 s0, s0, 0xffff
193 ; SI: s_or_b32 s0, s0, s1
193 ; SI: s_or_b32 s0, s1, s0
194194 ; SI: s_add_i32 s0, s0, 0x10000
195195 define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
196196 %add = add <2 x i16> %arg0,
288288 ; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1
289289 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1
290290 ; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v6
291 ; SI-NEXT: v_and_b32_e32 v7, s12, v7
291292 ; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4
292 ; SI-NEXT: v_and_b32_e32 v7, s12, v7
293293 ; SI-NEXT: s_waitcnt lgkmcnt(0)
294294 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
295295 ; SI-NEXT: s_waitcnt expcnt(0)
296 ; SI-NEXT: v_or_b32_e32 v1, v7, v6
296 ; SI-NEXT: v_or_b32_e32 v0, v6, v7
297297 ; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5
298 ; SI-NEXT: v_and_b32_e32 v0, s12, v4
299 ; SI-NEXT: v_or_b32_e32 v0, v0, v5
300 ; SI-NEXT: v_add_i32_e32 v1, vcc, 0x900, v1
301 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
302 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
298 ; SI-NEXT: v_and_b32_e32 v1, s12, v4
299 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0
300 ; SI-NEXT: v_or_b32_e32 v1, v5, v1
301 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
302 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
303303 ; SI-NEXT: v_or_b32_e32 v0, v1, v0
304304 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0
305305 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
334334 ; VI-NEXT: v_add_u16_e32 v9, 9, v5
335335 ; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
336336 ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v7
337 ; VI-NEXT: v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
338 ; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
337 ; VI-NEXT: v_or_b32_sdwa v0, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
338 ; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
339339 ; VI-NEXT: v_add_u16_e32 v0, s8, v0
340340 ; VI-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
341341 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
7070 ; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
7171 ; SI-DAG: s_load_dwordx2 s{{\[}}[[X:[0-9]+]]:[[Y:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x13
7272 ; SI: s_lshl_b32 [[SHL3:s[0-9]+]], s[[X]], 3
73 ; SI: s_add_i32 [[TMP:s[0-9]+]], s[[Y]], [[SHL3]]
73 ; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], s[[Y]]
7474 ; SI: s_addk_i32 [[TMP]], 0x3d8
7575 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
7676 ; SI: buffer_store_dword [[VRESULT]]
215215 ; SI-NEXT: s_add_i32 s0, s0, 12
216216 ; SI-NEXT: s_or_b32 s0, s0, 4
217217 ; SI-NEXT: s_and_b32 s0, s0, 0xff
218 ; SI-NEXT: s_or_b32 s0, s0, s1
218 ; SI-NEXT: s_or_b32 s0, s1, s0
219219 ; SI-NEXT: s_addk_i32 s0, 0x2c00
220220 ; SI-NEXT: s_or_b32 s0, s0, 0x300
221221 ; SI-NEXT: v_mov_b32_e32 v0, s0
413413 define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
414414 ; ARM-LABEL: cmp_and8_short_int:
415415 ; ARM: @ %bb.0: @ %entry
416 ; ARM-NEXT: ldrb r1, [r1]
416417 ; ARM-NEXT: ldrb r0, [r0]
417 ; ARM-NEXT: ldrb r1, [r1]
418 ; ARM-NEXT: and r0, r1, r0
418 ; ARM-NEXT: and r0, r0, r1
419419 ; ARM-NEXT: clz r0, r0
420420 ; ARM-NEXT: lsr r0, r0, #5
421421 ; ARM-NEXT: bx lr
422422 ;
423423 ; ARMEB-LABEL: cmp_and8_short_int:
424424 ; ARMEB: @ %bb.0: @ %entry
425 ; ARMEB-NEXT: ldrb r1, [r1, #3]
425426 ; ARMEB-NEXT: ldrb r0, [r0, #1]
426 ; ARMEB-NEXT: ldrb r1, [r1, #3]
427 ; ARMEB-NEXT: and r0, r1, r0
427 ; ARMEB-NEXT: and r0, r0, r1
428428 ; ARMEB-NEXT: clz r0, r0
429429 ; ARMEB-NEXT: lsr r0, r0, #5
430430 ; ARMEB-NEXT: bx lr
431431 ;
432432 ; THUMB1-LABEL: cmp_and8_short_int:
433433 ; THUMB1: @ %bb.0: @ %entry
434 ; THUMB1-NEXT: ldrb r0, [r0]
435434 ; THUMB1-NEXT: ldrb r1, [r1]
436 ; THUMB1-NEXT: ands r1, r0
437 ; THUMB1-NEXT: rsbs r0, r1, #0
438 ; THUMB1-NEXT: adcs r0, r1
435 ; THUMB1-NEXT: ldrb r2, [r0]
436 ; THUMB1-NEXT: ands r2, r1
437 ; THUMB1-NEXT: rsbs r0, r2, #0
438 ; THUMB1-NEXT: adcs r0, r2
439439 ; THUMB1-NEXT: bx lr
440440 ;
441441 ; THUMB2-LABEL: cmp_and8_short_int:
442442 ; THUMB2: @ %bb.0: @ %entry
443 ; THUMB2-NEXT: ldrb r1, [r1]
443444 ; THUMB2-NEXT: ldrb r0, [r0]
444 ; THUMB2-NEXT: ldrb r1, [r1]
445445 ; THUMB2-NEXT: ands r0, r1
446446 ; THUMB2-NEXT: clz r0, r0
447447 ; THUMB2-NEXT: lsrs r0, r0, #5
845845 ; ARM-LABEL: test6:
846846 ; ARM: @ %bb.0: @ %entry
847847 ; ARM-NEXT: ldrb r0, [r0]
848 ; ARM-NEXT: and r0, r0, r1
848 ; ARM-NEXT: and r0, r1, r0
849849 ; ARM-NEXT: uxtb r1, r2
850850 ; ARM-NEXT: sub r0, r0, r1
851851 ; ARM-NEXT: clz r0, r0
855855 ; ARMEB-LABEL: test6:
856856 ; ARMEB: @ %bb.0: @ %entry
857857 ; ARMEB-NEXT: ldrb r0, [r0]
858 ; ARMEB-NEXT: and r0, r0, r1
858 ; ARMEB-NEXT: and r0, r1, r0
859859 ; ARMEB-NEXT: uxtb r1, r2
860860 ; ARMEB-NEXT: sub r0, r0, r1
861861 ; ARMEB-NEXT: clz r0, r0
892892 ; ARM-LABEL: test7:
893893 ; ARM: @ %bb.0: @ %entry
894894 ; ARM-NEXT: ldrb r0, [r0]
895 ; ARM-NEXT: and r0, r0, r1
895 ; ARM-NEXT: and r0, r1, r0
896896 ; ARM-NEXT: uxtb r1, r2
897897 ; ARM-NEXT: sub r0, r0, r1
898898 ; ARM-NEXT: clz r0, r0
902902 ; ARMEB-LABEL: test7:
903903 ; ARMEB: @ %bb.0: @ %entry
904904 ; ARMEB-NEXT: ldrb r0, [r0, #1]
905 ; ARMEB-NEXT: and r0, r0, r1
905 ; ARMEB-NEXT: and r0, r1, r0
906906 ; ARMEB-NEXT: uxtb r1, r2
907907 ; ARMEB-NEXT: sub r0, r0, r1
908908 ; ARMEB-NEXT: clz r0, r0
15491549 ret i64 %and
15501550 }
15511551
1552 define void @test27(i32* nocapture %ptr) {
15521553 ; ARM-LABEL: test27:
1553 ; ARM: @ %bb.0:
1554 ; ARM: @ %bb.0: @ %entry
15541555 ; ARM-NEXT: ldrb r1, [r0, #1]
15551556 ; ARM-NEXT: lsl r1, r1, #16
15561557 ; ARM-NEXT: str r1, [r0]
15571558 ; ARM-NEXT: bx lr
15581559 ;
15591560 ; ARMEB-LABEL: test27:
1560 ; ARMEB: @ %bb.0:
1561 ; ARMEB-NEXT: ldrb r1, [r0, #2]
1562 ; ARMEB-NEXT: lsl r1, r1, #16
1563 ; ARMEB-NEXT: str r1, [r0]
1564 ; ARMEB-NEXT: bx lr
1561 ; ARMEB: @ %bb.0: @ %entry
1562 ; ARMEB-NEXT: ldrb r1, [r0, #2]
1563 ; ARMEB-NEXT: lsl r1, r1, #16
1564 ; ARMEB-NEXT: str r1, [r0]
1565 ; ARMEB-NEXT: bx lr
15651566 ;
15661567 ; THUMB1-LABEL: test27:
1567 ; THUMB1: @ %bb.0:
1568 ; THUMB1-NEXT: ldrb r1, [r0, #1]
1569 ; THUMB1-NEXT: lsls r1, r1, #16
1570 ; THUMB1-NEXT: str r1, [r0]
1571 ; THUMB1-NEXT: bx lr
1568 ; THUMB1: @ %bb.0: @ %entry
1569 ; THUMB1-NEXT: ldrb r1, [r0, #1]
1570 ; THUMB1-NEXT: lsls r1, r1, #16
1571 ; THUMB1-NEXT: str r1, [r0]
1572 ; THUMB1-NEXT: bx lr
15721573 ;
15731574 ; THUMB2-LABEL: test27:
1574 ; THUMB2: @ %bb.0:
1575 ; THUMB2: @ %bb.0: @ %entry
15751576 ; THUMB2-NEXT: ldrb r1, [r0, #1]
15761577 ; THUMB2-NEXT: lsls r1, r1, #16
15771578 ; THUMB2-NEXT: str r1, [r0]
15781579 ; THUMB2-NEXT: bx lr
1579 define void @test27(i32* nocapture %ptr) {
15801580 entry:
15811581 %0 = load i32, i32* %ptr, align 4
15821582 %and = and i32 %0, 65280
527527 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
528528 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
529529 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
530 ; CHECK: add r0, r0, r1
530 ; CHECK: add r0, r1, r0
531531 ; CHECK-NEXT: mov r1, #65280
532532 ; CHECK-NEXT: mov r2, #16711680
533533 ; CHECK-NEXT: ldr r0, [r0, #13]
539539 ; CHECK-NEXT: mov pc, lr
540540 ;
541541 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
542 ; CHECK-ARMv6: add r0, r0, r1
542 ; CHECK-ARMv6: add r0, r1, r0
543543 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
544544 ; CHECK-ARMv6-NEXT: rev r0, r0
545545 ; CHECK-ARMv6-NEXT: bx lr
478478 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
479479 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
480480 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
481 ; CHECK: add r0, r0, r1
481 ; CHECK: add r0, r1, r0
482482 ; CHECK-NEXT: ldr r0, [r0, #13]
483483 ; CHECK-NEXT: mov pc, lr
484484 ;
485485 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
486 ; CHECK-ARMv6: add r0, r0, r1
486 ; CHECK-ARMv6: add r0, r1, r0
487487 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
488488 ; CHECK-ARMv6-NEXT: bx lr
489489 %tmp = add nuw nsw i32 %i, 4
1212 ; CHECK-NEXT: vn %v0, %v0, %v0
1313 ; CHECK-NEXT: vno %v2, %v2, %v2
1414 ; CHECK-NEXT: vceqg %v0, %v0, %v1
15 ; CHECK-NEXT: vx %v0, %v2, %v0
15 ; CHECK-NEXT: vx %v0, %v0, %v2
1616 ; CHECK-NEXT: vnc %v0, %v2, %v0
1717 ; CHECK-NEXT: vlgvf %r0, %v0, 1
1818 ; CHECK-NEXT: tmll %r0, 1
1616 ; CHECK-V6M-NEXT: adds r0, r1, r0
1717 ; CHECK-V6M-NEXT: bx lr
1818 ; CHECK-V6M-NEXT: .LBB0_5:
19 ; CHECK-V6M-NEXT: adds r0, r1, r0
19 ; CHECK-V6M-NEXT: adds r0, r0, r1
2020 ; CHECK-V6M-NEXT: adds r0, r0, #4
2121 ; CHECK-V6M-NEXT: .LBB0_6:
2222 ; CHECK-V6M-NEXT: bx lr
2323 ; CHECK-V6M-NEXT: .LBB0_7:
24 ; CHECK-V6M-NEXT: adds r0, r1, r0
24 ; CHECK-V6M-NEXT: adds r0, r0, r1
2525 ; CHECK-V6M-NEXT: adds r0, r0, #1
2626 ; CHECK-V6M-NEXT: bx lr
2727 ; CHECK-V6M-NEXT: .LBB0_8:
28 ; CHECK-V6M-NEXT: adds r0, r1, r0
28 ; CHECK-V6M-NEXT: adds r0, r0, r1
2929 ; CHECK-V6M-NEXT: adds r0, r0, #2
3030 ; CHECK-V6M-NEXT: bx lr
3131 ; CHECK-V6M-NEXT: .p2align 2
164164 ; NON-PIC-NEXT: i32.load $push4=, 0($pop3){{$}}
165165 ; NON-PIC-NEXT: return $pop4{{$}}
166166
167 ; PIC-NEXT: global.get $push2=, g@GOT{{$}}
168167 ; PIC-NEXT: i32.const $push0=, 2{{$}}
169168 ; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}}
170 ; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}}
169 ; PIC-NEXT: global.get $push2=, g@GOT{{$}}
170 ; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
171171 ; PIC-NEXT: i32.const $push4=, -40{{$}}
172172 ; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}}
173173 ; PIC-NEXT: i32.load $push6=, 0($pop5){{$}}
205205 ; CHECK-NEXT: .functype load_test12 (i32, i32) -> (i32){{$}}
206206 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
207207 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
208 ; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
208 ; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
209209 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
210210 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
211211 ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
221221 ; CHECK-NEXT: .functype load_test13 (i32, i32) -> (i32){{$}}
222222 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
223223 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
224 ; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
224 ; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
225225 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
226226 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
227227 ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
283283 ; CHECK-NEXT: .functype load_test17 (i32, i32) -> (i32){{$}}
284284 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
285285 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
286 ; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
286 ; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
287287 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
288288 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
289289 ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
313313 ; CHECK-NEXT: .functype load_test19 (i32, i32) -> (i32){{$}}
314314 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
315315 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
316 ; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
316 ; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
317317 ; CHECK-NEXT: i32.const $push3=, 40{{$}}
318318 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
319319 ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
341341 ; CHECK-NEXT: .functype load_test21 (i32, i32) -> (i32){{$}}
342342 ; CHECK-NEXT: i32.const $push0=, 2{{$}}
343343 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0{{$}}
344 ; CHECK-NEXT: i32.add $push2=, $0, $pop1{{$}}
344 ; CHECK-NEXT: i32.add $push2=, $pop1, $0{{$}}
345345 ; CHECK-NEXT: i32.const $push3=, -40{{$}}
346346 ; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
347347 ; CHECK-NEXT: i32.load $push5=, 0($pop4){{$}}
500500 ; NON-PIC-NEXT: i32.const $push2=, g-40{{$}}
501501 ; NON-PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
502502 ; NON-PIC-NEXT: i32.store 0($pop3), $1{{$}}
503 ; PIC-NEXT: global.get $push2=, g@GOT{{$}}
504503 ; PIC-NEXT: i32.const $push0=, 2{{$}}
505504 ; PIC-NEXT: i32.shl $push1=, $0, $pop0{{$}}
506 ; PIC-NEXT: i32.add $push3=, $pop2, $pop1{{$}}
505 ; PIC-NEXT: global.get $push2=, g@GOT{{$}}
506 ; PIC-NEXT: i32.add $push3=, $pop1, $pop2{{$}}
507507 ; PIC-NEXT: i32.const $push4=, -40{{$}}
508508 ; PIC-NEXT: i32.add $push5=, $pop3, $pop4{{$}}
509509 ; PIC-NEXT: i32.store 0($pop5), $1{{$}}
541541 ; CHECK-NEXT: .functype store_test12 (i32, i32, i32) -> (){{$}}
542542 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
543543 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
544 ; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
545 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
546 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
547 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
548 ; NON-PIC-NEXT: return{{$}}
549 define void @store_test12(i32* %p, i32 %n, i32 %i) {
550 %add = add nsw i32 %n, 10
551 %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add
552 store i32 %i, i32* %arrayidx, align 4
553 ret void
554 }
555
556 ; CHECK-LABEL: store_test13:
557 ; CHECK-NEXT: .functype store_test13 (i32, i32, i32) -> (){{$}}
558 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
559 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
560 ; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
561 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
562 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
563 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
564 ; NON-PIC-NEXT: return{{$}}
565 define void @store_test13(i32* %p, i32 %n, i32 %i) {
566 %add = add nsw i32 10, %n
567 %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add
568 store i32 %i, i32* %arrayidx, align 4
569 ret void
570 }
571
572 ; CHECK-LABEL: store_test14:
573 ; CHECK-NEXT: .functype store_test14 (i32, i32, i32) -> (){{$}}
574 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
575 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
576 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
577 ; NON-PIC-NEXT: i32.store 40($pop2), $2{{$}}
578 ; NON-PIC-NEXT: return{{$}}
579 define void @store_test14(i32* %p, i32 %n, i32 %i) {
580 %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n
581 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10
582 store i32 %i, i32* %add.ptr1, align 4
583 ret void
584 }
585
586 ; CHECK-LABEL: store_test15:
587 ; CHECK-NEXT: .functype store_test15 (i32, i32, i32) -> (){{$}}
588 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
589 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
544590 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
545591 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
546592 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
547593 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
548594 ; NON-PIC-NEXT: return{{$}}
549 define void @store_test12(i32* %p, i32 %n, i32 %i) {
550 %add = add nsw i32 %n, 10
551 %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add
552 store i32 %i, i32* %arrayidx, align 4
553 ret void
554 }
555
556 ; CHECK-LABEL: store_test13:
557 ; CHECK-NEXT: .functype store_test13 (i32, i32, i32) -> (){{$}}
595 define void @store_test15(i32* %p, i32 %n, i32 %i) {
596 %add.ptr = getelementptr inbounds i32, i32* %p, i32 10
597 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n
598 store i32 %i, i32* %add.ptr1, align 4
599 ret void
600 }
601
602 ; CHECK-LABEL: store_test16:
603 ; CHECK-NEXT: .functype store_test16 (i32, i32, i32) -> (){{$}}
558604 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
559605 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
560606 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
562608 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
563609 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
564610 ; NON-PIC-NEXT: return{{$}}
565 define void @store_test13(i32* %p, i32 %n, i32 %i) {
566 %add = add nsw i32 10, %n
567 %arrayidx = getelementptr inbounds i32, i32* %p, i32 %add
568 store i32 %i, i32* %arrayidx, align 4
569 ret void
570 }
571
572 ; CHECK-LABEL: store_test14:
573 ; CHECK-NEXT: .functype store_test14 (i32, i32, i32) -> (){{$}}
574 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
575 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
576 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
577 ; NON-PIC-NEXT: i32.store 40($pop2), $2{{$}}
578 ; NON-PIC-NEXT: return{{$}}
579 define void @store_test14(i32* %p, i32 %n, i32 %i) {
580 %add.ptr = getelementptr inbounds i32, i32* %p, i32 %n
581 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10
582 store i32 %i, i32* %add.ptr1, align 4
583 ret void
584 }
585
586 ; CHECK-LABEL: store_test15:
587 ; CHECK-NEXT: .functype store_test15 (i32, i32, i32) -> (){{$}}
588 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
589 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
590 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
591 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
592 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
593 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
594 ; NON-PIC-NEXT: return{{$}}
595 define void @store_test15(i32* %p, i32 %n, i32 %i) {
596 %add.ptr = getelementptr inbounds i32, i32* %p, i32 10
597 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n
598 store i32 %i, i32* %add.ptr1, align 4
599 ret void
600 }
601
602 ; CHECK-LABEL: store_test16:
603 ; CHECK-NEXT: .functype store_test16 (i32, i32, i32) -> (){{$}}
604 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
605 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
606 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
607 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
608 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
609 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
610 ; NON-PIC-NEXT: return{{$}}
611611 define void @store_test16(i32* %p, i32 %n, i32 %i) {
612612 %add.ptr = getelementptr inbounds i32, i32* %p, i32 10
613613 %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %n
619619 ; CHECK-NEXT: .functype store_test17 (i32, i32, i32) -> (){{$}}
620620 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
621621 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
622 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
622 ; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
623623 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
624624 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
625625 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
649649 ; CHECK-NEXT: .functype store_test19 (i32, i32, i32) -> (){{$}}
650650 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
651651 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
652 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
652 ; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
653653 ; NON-PIC-NEXT: i32.const $push3=, 40{{$}}
654654 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
655655 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
677677 ; CHECK-NEXT: .functype store_test21 (i32, i32, i32) -> (){{$}}
678678 ; NON-PIC-NEXT: i32.const $push0=, 2{{$}}
679679 ; NON-PIC-NEXT: i32.shl $push1=, $1, $pop0{{$}}
680 ; NON-PIC-NEXT: i32.add $push2=, $0, $pop1{{$}}
680 ; NON-PIC-NEXT: i32.add $push2=, $pop1, $0{{$}}
681681 ; NON-PIC-NEXT: i32.const $push3=, -40{{$}}
682682 ; NON-PIC-NEXT: i32.add $push4=, $pop2, $pop3{{$}}
683683 ; NON-PIC-NEXT: i32.store 0($pop4), $2{{$}}
2525 ; CHECK-LABEL: add_nsw_sext_add:
2626 ; CHECK: # %bb.0:
2727 ; CHECK-NEXT: movslq %edi, %rax
28 ; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
28 ; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
2929 ; CHECK-NEXT: retq
3030
3131 %add = add nsw i32 %i, 5
7272 ; CHECK-LABEL: gep8:
7373 ; CHECK: # %bb.0:
7474 ; CHECK-NEXT: movslq %edi, %rax
75 ; CHECK-NEXT: leaq 5(%rsi,%rax), %rax
75 ; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
7676 ; CHECK-NEXT: retq
7777
7878 %add = add nsw i32 %i, 5
127127 ; CHECK: # %bb.0:
128128 ; CHECK-NEXT: movslq %edi, %rax
129129 ; CHECK-NEXT: shlq $4, %rax
130 ; CHECK-NEXT: leaq 80(%rsi,%rax), %rax
130 ; CHECK-NEXT: leaq 80(%rax,%rsi), %rax
131131 ; CHECK-NEXT: retq
132132
133133 %add = add nsw i32 %i, 5
168168
169169 ; The same as @PR20134 but sign extension is replaced with zero extension
170170 define void @PR20134_zext(i32* %a, i32 %i) {
171 ; CHECK: # %bb.0:
172 ; CHECK-NEXT: movl %esi, %eax
173 ; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
174 ; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
175 ; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
176 ; CHECK-NEXT: retq
171 ; CHECK-LABEL: PR20134_zext:
172 ; CHECK: # %bb.0:
173 ; CHECK-NEXT: movl %esi, %eax
174 ; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
175 ; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
176 ; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
177 ; CHECK-NEXT: retq
177178
178179 %add1 = add nuw i32 %i, 1
179180 %idx1 = zext i32 %add1 to i64
3737 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
3838 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
3939 ; CHECK-NEXT: imull $400, %ecx, %edx # imm = 0x190
40 ; CHECK-NEXT: leal (%eax,%edx), %esi
40 ; CHECK-NEXT: leal (%edx,%eax), %esi
4141 ; CHECK-NEXT: movl $11, 2020(%esi,%ecx,4)
42 ; CHECK-NEXT: movl $22, 2080(%eax,%edx)
43 ; CHECK-NEXT: movl $33, 10080(%eax,%edx)
42 ; CHECK-NEXT: movl $22, 2080(%edx,%eax)
43 ; CHECK-NEXT: movl $33, 10080(%edx,%eax)
4444 ; CHECK-NEXT: popl %esi
4545 ; CHECK-NEXT: retl
4646 entry:
965965 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
966966 ; CHECK64: # %bb.0:
967967 ; CHECK64-NEXT: movl %esi, %eax
968 ; CHECK64-NEXT: movl 13(%rdi,%rax), %eax
968 ; CHECK64-NEXT: movl 13(%rax,%rdi), %eax
969969 ; CHECK64-NEXT: retq
970970 %tmp = add nuw nsw i32 %i, 4
971971 %tmp2 = add nuw nsw i32 %i, 3
10151015 ; CHECK: # %bb.0:
10161016 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
10171017 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1018 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1018 ; CHECK-NEXT: movl 12(%ecx,%eax), %eax
10191019 ; CHECK-NEXT: retl
10201020 ;
10211021 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
10711071 ; CHECK: # %bb.0:
10721072 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
10731073 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1074 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1074 ; CHECK-NEXT: movl 12(%ecx,%eax), %eax
10751075 ; CHECK-NEXT: retl
10761076 ;
10771077 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
6565 ; GENERIC-NEXT: movzbl 2(%r8,%rbx,4), %ebx
6666 ; GENERIC-NEXT: shll $16, %ebx
6767 ; GENERIC-NEXT: orl %eax, %ebx
68 ; GENERIC-NEXT: xorl 16(%rdx,%rcx), %ebx
68 ; GENERIC-NEXT: xorl 16(%rcx,%rdx), %ebx
6969 ; GENERIC-NEXT: shrl $8, %edi
7070 ; GENERIC-NEXT: movzbl 3(%r9,%rdi,4), %eax
7171 ; GENERIC-NEXT: shll $24, %eax
7373 ; GENERIC-NEXT: movzbl 2(%r8,%rdi,4), %edi
7474 ; GENERIC-NEXT: shll $16, %edi
7575 ; GENERIC-NEXT: orl %eax, %edi
76 ; GENERIC-NEXT: xorl 20(%rdx,%rcx), %edi
76 ; GENERIC-NEXT: xorl 20(%rcx,%rdx), %edi
7777 ; GENERIC-NEXT: movl %ebx, %eax
7878 ; GENERIC-NEXT: shrl $24, %eax
7979 ; GENERIC-NEXT: movb %al, (%rsi)
155155 ; ATOM-NEXT: shll $16, %eax
156156 ; ATOM-NEXT: orl %edi, %ebp
157157 ; ATOM-NEXT: orl %r15d, %eax
158 ; ATOM-NEXT: xorl 20(%rdx,%rcx), %ebp
159 ; ATOM-NEXT: xorl 16(%rdx,%rcx), %eax
158 ; ATOM-NEXT: xorl 20(%rcx,%rdx), %ebp
159 ; ATOM-NEXT: xorl 16(%rcx,%rdx), %eax
160160 ; ATOM-NEXT: movl %eax, %edi
161161 ; ATOM-NEXT: shrl $16, %eax
162162 ; ATOM-NEXT: shrl $24, %edi
4343 define void @indexed_store_merge(i64 %p, i8* %v) {
4444 ; CHECK-LABEL: indexed_store_merge:
4545 ; CHECK: # %bb.0: # %entry
46 ; CHECK-NEXT: movl $0, 2(%rsi,%rdi)
46 ; CHECK-NEXT: movl $0, 2(%rdi,%rsi)
4747 ; CHECK-NEXT: movb $0, (%rsi)
4848 ; CHECK-NEXT: retq
4949 entry:
14021402 ; SSE2-NEXT: movdqu (%rdi), %xmm0
14031403 ; SSE2-NEXT: movdqu (%rsi), %xmm1
14041404 ; SSE2-NEXT: psadbw %xmm0, %xmm1
1405 ; SSE2-NEXT: movdqu (%rdx), %xmm0
1406 ; SSE2-NEXT: movdqu (%rcx), %xmm2
1407 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14051408 ; SSE2-NEXT: movl $1, %eax
14061409 ; SSE2-NEXT: movd %eax, %xmm0
1410 ; SSE2-NEXT: paddd %xmm2, %xmm0
14071411 ; SSE2-NEXT: paddd %xmm1, %xmm0
1408 ; SSE2-NEXT: movdqu (%rdx), %xmm1
1409 ; SSE2-NEXT: movdqu (%rcx), %xmm2
1410 ; SSE2-NEXT: psadbw %xmm1, %xmm2
1411 ; SSE2-NEXT: paddd %xmm0, %xmm2
1412 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1413 ; SSE2-NEXT: paddd %xmm2, %xmm0
1414 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1412 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
14151413 ; SSE2-NEXT: paddd %xmm0, %xmm1
1416 ; SSE2-NEXT: movd %xmm1, %eax
1414 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1415 ; SSE2-NEXT: paddd %xmm1, %xmm0
1416 ; SSE2-NEXT: movd %xmm0, %eax
14171417 ; SSE2-NEXT: retq
14181418 ;
14191419 ; AVX1-LABEL: sad_unroll_nonzero_initial:
14241424 ; AVX1-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
14251425 ; AVX1-NEXT: movl $1, %eax
14261426 ; AVX1-NEXT: vmovd %eax, %xmm2
1427 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
1428 ; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
1427 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1428 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
14291429 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
14301430 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
14311431 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
14371437 ; AVX2: # %bb.0: # %bb
14381438 ; AVX2-NEXT: vmovdqu (%rdi), %xmm0
14391439 ; AVX2-NEXT: vpsadbw (%rsi), %xmm0, %xmm0
1440 ; AVX2-NEXT: movl $1, %eax
1441 ; AVX2-NEXT: vmovd %eax, %xmm1
1442 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
14431440 ; AVX2-NEXT: vmovdqu (%rdx), %xmm1
14441441 ; AVX2-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
1445 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
1442 ; AVX2-NEXT: movl $1, %eax
1443 ; AVX2-NEXT: vmovd %eax, %xmm2
1444 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
1445 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
14461446 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
14471447 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
14481448 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
14571457 ; AVX512: # %bb.0: # %bb
14581458 ; AVX512-NEXT: vmovdqu (%rdi), %xmm0
14591459 ; AVX512-NEXT: vpsadbw (%rsi), %xmm0, %xmm0
1460 ; AVX512-NEXT: movl $1, %eax
1461 ; AVX512-NEXT: vmovd %eax, %xmm1
1462 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
14631460 ; AVX512-NEXT: vmovdqu (%rdx), %xmm1
14641461 ; AVX512-NEXT: vpsadbw (%rcx), %xmm1, %xmm1
1465 ; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0
1462 ; AVX512-NEXT: movl $1, %eax
1463 ; AVX512-NEXT: vmovd %eax, %xmm2
1464 ; AVX512-NEXT: vpaddd %zmm2, %zmm1, %zmm1
1465 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
14661466 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
14671467 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
14681468 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
145145 ;
146146 ; AVX2-LABEL: zext_and_v8i16:
147147 ; AVX2: # %bb.0:
148 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
148 ; AVX2-NEXT: vandps %xmm0, %xmm1, %xmm0
149149 ; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
150150 ; AVX2-NEXT: retq
151151 %xz = zext <8 x i8> %x to <8 x i16>