llvm.org GIT mirror llvm / c71a1c3
AMDGPU: Make fixing i1 copies robust against re-ordering Summary: The new test case led to incorrect code. Change-Id: Ief48b227e97aa662dd3535c9bafb27d4a184efca Reviewers: arsenm, david-salinas Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63871 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364566 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 7 months ago
2 changed file(s) with 64 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
9494 MachineBasicBlock::iterator
9595 getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
9696
97 bool isVreg1(unsigned Reg) const {
98 return TargetRegisterInfo::isVirtualRegister(Reg) &&
99 MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
100 }
101
97102 bool isLaneMaskReg(unsigned Reg) const {
98103 return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
99104 TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
493498
494499 unsigned DstReg = MI.getOperand(0).getReg();
495500 unsigned SrcReg = MI.getOperand(1).getReg();
496 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
497 MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
498 continue;
499
500 if (isLaneMaskReg(DstReg) ||
501 (TargetRegisterInfo::isVirtualRegister(DstReg) &&
502 MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
501 if (!isVreg1(SrcReg))
502 continue;
503
504 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
503505 continue;
504506
505507 // Copy into a 32-bit vector register.
542544
543545 for (MachineInstr &MI : MBB.phis()) {
544546 unsigned DstReg = MI.getOperand(0).getReg();
545 if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
547 if (!isVreg1(DstReg))
546548 continue;
547549
548550 LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
559561
560562 if (IncomingDef->getOpcode() == AMDGPU::COPY) {
561563 IncomingReg = IncomingDef->getOperand(1).getReg();
562 assert(isLaneMaskReg(IncomingReg));
564 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
563565 assert(!IncomingDef->getOperand(1).getSubReg());
564566 } else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
565567 continue;
667669 continue;
668670
669671 unsigned DstReg = MI.getOperand(0).getReg();
670 if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
671 MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
672 if (!isVreg1(DstReg))
672673 continue;
673674
674675 if (MRI->use_empty(DstReg)) {
688689 assert(!MI.getOperand(1).getSubReg());
689690
690691 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
691 !isLaneMaskReg(SrcReg)) {
692 (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
692693 assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
693694 unsigned TmpReg = createLaneMaskReg(*MF);
694695 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck %s
2
3 # The strange block ordering visits the use before the def.
4 ---
5 name: inserted_cmp_operand_class_rpo
6 tracksRegLiveness: true
7 machineFunctionInfo:
8 isEntryFunction: true
9 body: |
10 ; CHECK-LABEL: name: inserted_cmp_operand_class_rpo
11 ; CHECK: bb.0:
12 ; CHECK: successors: %bb.3(0x80000000)
13 ; CHECK: S_BRANCH %bb.3
14 ; CHECK: bb.1:
15 ; CHECK: successors: %bb.2(0x80000000)
16 ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %1
17 ; CHECK: bb.2:
18 ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
19 ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
20 ; CHECK: S_ENDPGM 0
21 ; CHECK: bb.3:
22 ; CHECK: successors: %bb.1(0x80000000)
23 ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
24 ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
25 ; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_MOV_B32_e32_1]], killed [[S_MOV_B32_]], implicit $exec
26 ; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[V_CMP_EQ_U32_e64_]]
27 ; CHECK: S_BRANCH %bb.1
28 bb.0:
29 successors: %bb.3
30
31 S_BRANCH %bb.3
32
33 bb.1:
34 successors: %bb.2
35
36 %0:vreg_1 = COPY %1
37
38 bb.2:
39 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
40 %3:sreg_64_xexec = COPY %0
41 S_ENDPGM 0
42
43 bb.3:
44 successors: %bb.1
45
46 %4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
47 %5:sreg_32_xm0 = S_MOV_B32 0
48 %6:sreg_64 = V_CMP_EQ_U32_e64 killed %4, killed %5, implicit $exec
49 %1:vreg_1 = COPY %6
50 S_BRANCH %bb.1