llvm.org GIT mirror llvm / 977c10d
Merging r359891: ------------------------------------------------------------------------ r359891 | arsenm | 2019-05-03 07:40:10 -0700 (Fri, 03 May 2019) | 9 lines AMDGPU: Replace shrunk instruction with dummy implicit_def This was broken if the original operand was killed. The kill flag would appear on both instructions, and fail the verifier. Keep the kill flag, but remove the operands from the old instruction. This has an added benefit of really reducing the use count for future folds. Ideally the pass would be structured more like what PeepholeOptimizer does to avoid this hack to avoid breaking instruction iterators. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@362634 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 2 months ago
2 changed file(s) with 64 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
217217
218218 const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
219219 unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
220 const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
221 unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
222220
223221 MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
224222
228226 }
229227
230228 // Keep the old instruction around to avoid breaking iterators, but
231 // replace the outputs with dummy registers.
229 // replace it with a dummy instruction to remove uses.
230 //
231 // FIXME: We should not invert how this pass looks at operands to avoid
232 // this. Should track set of foldable movs instead of looking for uses
233 // when looking at a use.
232234 Dst0.setReg(NewReg0);
233 Dst1.setReg(NewReg1);
235 for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
236 MI->RemoveOperand(I);
237 MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
234238
235239 if (Fold.isCommuted())
236240 TII.commuteInstruction(*Inst32, false);
589589 S_ENDPGM implicit %2
590590
591591 ...
592
593 ---
594 name: shrink_add_kill_flags_src0
595 tracksRegLiveness: true
596 body: |
597 bb.0:
598 liveins: $vgpr0
599 ; GCN-LABEL: name: shrink_add_kill_flags_src0
600 ; GCN: liveins: $vgpr0
601 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
602 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
603 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
604 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
605 %0:vgpr_32 = COPY $vgpr0
606 %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
607 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
608 S_ENDPGM 0, implicit %2
609 ...
610
611 ---
612 name: shrink_add_kill_flags_src1
613 tracksRegLiveness: true
614 body: |
615 bb.0:
616 liveins: $vgpr0
617 ; GCN-LABEL: name: shrink_add_kill_flags_src1
618 ; GCN: liveins: $vgpr0
619 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
620 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
621 ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
622 ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
623 %0:vgpr_32 = COPY $vgpr0
624 %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
625 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
626 S_ENDPGM 0, implicit %2
627 ...
628
629 ---
630 name: shrink_addc_kill_flags_src2
631 tracksRegLiveness: true
632 body: |
633 bb.0:
634 liveins: $vgpr0, $vcc
635 ; GCN-LABEL: name: shrink_addc_kill_flags_src2
636 ; GCN: liveins: $vgpr0, $vcc
637 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
638 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
639 ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $vcc
640 ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], [[COPY1]], 0, implicit $exec
641 ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
642 %0:vgpr_32 = COPY $vgpr0
643 %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
644 %2:sreg_64_xexec = COPY $vcc
645 %3:vgpr_32, %4:sreg_64_xexec = V_ADDC_U32_e64 %1, %0, %2, 0, implicit $exec
646 S_ENDPGM 0, implicit %3
647 ...