llvm.org GIT mirror llvm / b7b7394
[X86] Add load folding support to the custom isel we do for X86ISD::UMUL/SMUL. The peephole pass isn't always able to fold the load because it can't commute the implicit usage of AL/AX/EAX/RAX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350272 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 months ago
3 changed file(s) with 77 addition(s) and 38 deletion(s). Raw diff Collapse all Expand all
34533453 SDValue N0 = Node->getOperand(0);
34543454 SDValue N1 = Node->getOperand(1);
34553455
3456 unsigned LoReg, Opc;
3456 unsigned LoReg, ROpc, MOpc;
34573457 switch (NVT.SimpleTy) {
34583458 default: llvm_unreachable("Unsupported VT!");
34593459 case MVT::i8:
34603460 LoReg = X86::AL;
3461 Opc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
3461 ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
3462 MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
34623463 break;
3463 case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
3464 case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
3465 case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
3464 case MVT::i16:
3465 LoReg = X86::AX;
3466 ROpc = X86::MUL16r;
3467 MOpc = X86::MUL16m;
3468 break;
3469 case MVT::i32:
3470 LoReg = X86::EAX;
3471 ROpc = X86::MUL32r;
3472 MOpc = X86::MUL32m;
3473 break;
3474 case MVT::i64:
3475 LoReg = X86::RAX;
3476 ROpc = X86::MUL64r;
3477 MOpc = X86::MUL64m;
3478 break;
3479 }
3480
3481 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
3482 bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3483 // Multiply is commmutative.
3484 if (!FoldedLoad) {
3485 FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
3486 if (FoldedLoad)
3487 std::swap(N0, N1);
34663488 }
34673489
34683490 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
34693491 N0, SDValue()).getValue(1);
34703492
3471 // i16/i32/i64 use an instruction that produces a low and high result even
3472 // though only the low result is used.
3473 SDVTList VTs;
3474 if (NVT == MVT::i8)
3475 VTs = CurDAG->getVTList(NVT, MVT::i32);
3476 else
3477 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3478
3479 SDValue Ops[] = {N1, InFlag};
3480 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3493 MachineSDNode *CNode;
3494 if (FoldedLoad) {
3495 // i16/i32/i64 use an instruction that produces a low and high result even
3496 // though only the low result is used.
3497 SDVTList VTs;
3498 if (NVT == MVT::i8)
3499 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
3500 else
3501 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
3502
3503 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
3504 InFlag };
3505 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3506
3507 // Update the chain.
3508 ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
3509 // Record the mem-refs
3510 CurDAG->setNodeMemRefs(CNode, {cast(N1)->getMemOperand()});
3511 } else {
3512 // i16/i32/i64 use an instruction that produces a low and high result even
3513 // though only the low result is used.
3514 SDVTList VTs;
3515 if (NVT == MVT::i8)
3516 VTs = CurDAG->getVTList(NVT, MVT::i32);
3517 else
3518 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
3519
3520 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
3521 }
3522
34813523 ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
34823524 ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
34833525 CurDAG->RemoveDeadNode(Node);
66 define zeroext i1 @a(i32 %x) nounwind {
77 ; X86-LABEL: a:
88 ; X86: # %bb.0:
9 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10 ; X86-NEXT: movl $3, %ecx
11 ; X86-NEXT: mull %ecx
9 ; X86-NEXT: movl $3, %eax
10 ; X86-NEXT: mull {{[0-9]+}}(%esp)
1211 ; X86-NEXT: seto %al
1312 ; X86-NEXT: retl
1413 ;
724724 define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
725725 ; SDAG-LABEL: smuloi8_load:
726726 ; SDAG: ## %bb.0:
727 ; SDAG-NEXT: movb (%rdi), %al
728 ; SDAG-NEXT: imulb %sil
727 ; SDAG-NEXT: movl %esi, %eax
728 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
729 ; SDAG-NEXT: imulb (%rdi)
729730 ; SDAG-NEXT: seto %cl
730731 ; SDAG-NEXT: movb %al, (%rdx)
731732 ; SDAG-NEXT: movl %ecx, %eax
752753 ; SDAG-LABEL: smuloi8_load2:
753754 ; SDAG: ## %bb.0:
754755 ; SDAG-NEXT: movl %edi, %eax
755 ; SDAG-NEXT: movb (%rsi), %cl
756756 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
757 ; SDAG-NEXT: imulb %cl
757 ; SDAG-NEXT: imulb (%rsi)
758758 ; SDAG-NEXT: seto %cl
759759 ; SDAG-NEXT: movb %al, (%rdx)
760760 ; SDAG-NEXT: movl %ecx, %eax
925925 define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
926926 ; SDAG-LABEL: umuloi8_load:
927927 ; SDAG: ## %bb.0:
928 ; SDAG-NEXT: movb (%rdi), %al
929 ; SDAG-NEXT: mulb %sil
928 ; SDAG-NEXT: movl %esi, %eax
929 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
930 ; SDAG-NEXT: mulb (%rdi)
930931 ; SDAG-NEXT: seto %cl
931932 ; SDAG-NEXT: movb %al, (%rdx)
932933 ; SDAG-NEXT: movl %ecx, %eax
953954 ; SDAG-LABEL: umuloi8_load2:
954955 ; SDAG: ## %bb.0:
955956 ; SDAG-NEXT: movl %edi, %eax
956 ; SDAG-NEXT: movb (%rsi), %cl
957957 ; SDAG-NEXT: ## kill: def $al killed $al killed $eax
958 ; SDAG-NEXT: mulb %cl
958 ; SDAG-NEXT: mulb (%rsi)
959959 ; SDAG-NEXT: seto %cl
960960 ; SDAG-NEXT: movb %al, (%rdx)
961961 ; SDAG-NEXT: movl %ecx, %eax
983983 ; SDAG-LABEL: umuloi16_load:
984984 ; SDAG: ## %bb.0:
985985 ; SDAG-NEXT: movq %rdx, %rcx
986 ; SDAG-NEXT: movzwl (%rdi), %eax
987 ; SDAG-NEXT: mulw %si
986 ; SDAG-NEXT: movl %esi, %eax
987 ; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
988 ; SDAG-NEXT: mulw (%rdi)
988989 ; SDAG-NEXT: seto %dl
989990 ; SDAG-NEXT: movw %ax, (%rcx)
990991 ; SDAG-NEXT: movl %edx, %eax
10131014 ; SDAG: ## %bb.0:
10141015 ; SDAG-NEXT: movq %rdx, %rcx
10151016 ; SDAG-NEXT: movl %edi, %eax
1016 ; SDAG-NEXT: movzwl (%rsi), %edx
10171017 ; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
1018 ; SDAG-NEXT: mulw %dx
1018 ; SDAG-NEXT: mulw (%rsi)
10191019 ; SDAG-NEXT: seto %dl
10201020 ; SDAG-NEXT: movw %ax, (%rcx)
10211021 ; SDAG-NEXT: movl %edx, %eax
10441044 ; SDAG-LABEL: umuloi32_load:
10451045 ; SDAG: ## %bb.0:
10461046 ; SDAG-NEXT: movq %rdx, %rcx
1047 ; SDAG-NEXT: movl (%rdi), %eax
1048 ; SDAG-NEXT: mull %esi
1047 ; SDAG-NEXT: movl %esi, %eax
1048 ; SDAG-NEXT: mull (%rdi)
10491049 ; SDAG-NEXT: seto %dl
10501050 ; SDAG-NEXT: movl %eax, (%rcx)
10511051 ; SDAG-NEXT: movl %edx, %eax
10741074 ; SDAG: ## %bb.0:
10751075 ; SDAG-NEXT: movq %rdx, %rcx
10761076 ; SDAG-NEXT: movl %edi, %eax
1077 ; SDAG-NEXT: movl (%rsi), %edx
1078 ; SDAG-NEXT: mull %edx
1077 ; SDAG-NEXT: mull (%rsi)
10791078 ; SDAG-NEXT: seto %dl
10801079 ; SDAG-NEXT: movl %eax, (%rcx)
10811080 ; SDAG-NEXT: movl %edx, %eax
11031102 ; SDAG-LABEL: umuloi64_load:
11041103 ; SDAG: ## %bb.0:
11051104 ; SDAG-NEXT: movq %rdx, %rcx
1106 ; SDAG-NEXT: movq (%rdi), %rax
1107 ; SDAG-NEXT: mulq %rsi
1105 ; SDAG-NEXT: movq %rsi, %rax
1106 ; SDAG-NEXT: mulq (%rdi)
11081107 ; SDAG-NEXT: seto %dl
11091108 ; SDAG-NEXT: movq %rax, (%rcx)
11101109 ; SDAG-NEXT: movl %edx, %eax
11331132 ; SDAG: ## %bb.0:
11341133 ; SDAG-NEXT: movq %rdx, %rcx
11351134 ; SDAG-NEXT: movq %rdi, %rax
1136 ; SDAG-NEXT: movq (%rsi), %rdx
1137 ; SDAG-NEXT: mulq %rdx
1135 ; SDAG-NEXT: mulq (%rsi)
11381136 ; SDAG-NEXT: seto %dl
11391137 ; SDAG-NEXT: movq %rax, (%rcx)
11401138 ; SDAG-NEXT: movl %edx, %eax