llvm.org GIT mirror llvm / add2e2e
R600/SI: Fix extra mov from legalizing 64-bit SALU ops. Check the register class of each operand individually to avoid an extra copy to a vgpr. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204662 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 6 years ago
2 changed file(s) with 32 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
10271027 MachineBasicBlock::iterator MII = Inst;
10281028
10291029 const MCInstrDesc &InstDesc = get(Opcode);
1030 const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg());
1031 const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0);
1032 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, RC,
1033 AMDGPU::sub0, SubRC);
1034 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, RC,
1035 AMDGPU::sub0, SubRC);
1036
1037 unsigned DestSub0 = MRI.createVirtualRegister(SubRC);
1030 const TargetRegisterClass *Src0RC = Src0.isReg() ?
1031 MRI.getRegClass(Src0.getReg()) :
1032 &AMDGPU::SGPR_32RegClass;
1033
1034 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
1035 const TargetRegisterClass *Src1RC = Src1.isReg() ?
1036 MRI.getRegClass(Src1.getReg()) :
1037 &AMDGPU::SGPR_32RegClass;
1038
1039 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
1040
1041 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1042 AMDGPU::sub0, Src0SubRC);
1043 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1044 AMDGPU::sub0, Src1SubRC);
1045
1046 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
1047 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
1048
1049 unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
10381050 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
10391051 .addOperand(SrcReg0Sub0)
10401052 .addOperand(SrcReg1Sub0);
10411053
1042 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, RC,
1043 AMDGPU::sub1, SubRC);
1044 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, RC,
1045 AMDGPU::sub1, SubRC);
1046
1047 unsigned DestSub1 = MRI.createVirtualRegister(SubRC);
1054 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
1055 AMDGPU::sub1, Src0SubRC);
1056 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
1057 AMDGPU::sub1, Src1SubRC);
1058
1059 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
10481060 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
10491061 .addOperand(SrcReg0Sub1)
10501062 .addOperand(SrcReg1Sub1);
10511063
1052 unsigned FullDestReg = MRI.createVirtualRegister(RC);
1064 unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
10531065 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
10541066 .addReg(DestSub0)
10551067 .addImm(AMDGPU::sub0)
8888 }
8989
9090 ; SI-LABEL: @vector_or_i64_loadimm
91 ; SI-DAG: S_MOV_B32
92 ; SI-DAG: S_MOV_B32
93 ; SI-DAG: BUFFER_LOAD_DWORDX2
94 ; SI: V_OR_B32_e32
95 ; SI: V_OR_B32_e32
91 ; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], -545810305
92 ; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 5231
93 ; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
94 ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
95 ; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
9696 ; SI: S_ENDPGM
9797 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
9898 %loada = load i64 addrspace(1)* %a, align 8