llvm.org GIT mirror llvm / db18071
R600/SI: Fix 64-bit bit ops that require the VALU. Try to match scalar and first like the other instructions. Expand 64-bit ands to a pair of 32-bit ands since that is not available on the VALU. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204660 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 6 years ago
4 changed file(s) with 108 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
878878 Inst->eraseFromParent();
879879 continue;
880880 }
881 case AMDGPU::S_AND_B64:
882 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
883 Inst->eraseFromParent();
884 continue;
885
886 case AMDGPU::S_OR_B64:
887 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
888 Inst->eraseFromParent();
889 continue;
890
891 case AMDGPU::S_XOR_B64:
892 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
893 Inst->eraseFromParent();
894 continue;
895
896 case AMDGPU::S_NOT_B64:
897 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
898 Inst->eraseFromParent();
899 continue;
900
901 case AMDGPU::S_BFE_U64:
902 case AMDGPU::S_BFE_I64:
903 case AMDGPU::S_BFM_B64:
904 llvm_unreachable("Moving this op to VALU not implemented");
881905 }
882906
883907 unsigned NewOpcode = getVALUOp(*Inst);
967991 return &AMDGPU::VReg_32RegClass;
968992 }
969993
994 void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl &Worklist,
995 MachineInstr *Inst,
996 unsigned Opcode) const {
997 MachineBasicBlock &MBB = *Inst->getParent();
998 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
999
1000 // We shouldn't need to worry about immediate operands here.
1001 MachineOperand &Dest = Inst->getOperand(0);
1002 MachineOperand &Src0 = Inst->getOperand(1);
1003 MachineOperand &Src1 = Inst->getOperand(2);
1004 DebugLoc DL = Inst->getDebugLoc();
1005
1006 MachineBasicBlock::iterator MII = Inst;
1007
1008 const MCInstrDesc &InstDesc = get(Opcode);
1009 const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg());
1010 const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0);
1011 unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC,
1012 AMDGPU::sub0, SubRC);
1013 unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC,
1014 AMDGPU::sub0, SubRC);
1015
1016 unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1017 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1018 .addReg(SrcReg0Sub0)
1019 .addReg(SrcReg1Sub0);
1020
1021 unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC,
1022 AMDGPU::sub1, SubRC);
1023 unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC,
1024 AMDGPU::sub1, SubRC);
1025
1026 unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1027 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1028 .addReg(SrcReg0Sub1)
1029 .addReg(SrcReg1Sub1);
1030
1031 unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
1032 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
1033 .addReg(DestSub0)
1034 .addImm(AMDGPU::sub0)
1035 .addReg(DestSub1)
1036 .addImm(AMDGPU::sub1);
1037
1038 MRI.replaceRegWith(Dest.getReg(), FullDestReg);
1039
1040 // Try to legalize the operands in case we need to swap the order to keep it
1041 // valid.
1042 Worklist.push_back(LoHalf);
1043 Worklist.push_back(HiHalf);
1044 }
1045
9701046 MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
9711047 MachineBasicBlock *MBB,
9721048 MachineBasicBlock::iterator I,
3636 MachineRegisterInfo &MRI,
3737 const TargetRegisterClass *RC,
3838 const MachineOperand &Op) const;
39
40 void splitScalar64BitOp(SmallVectorImpl &Worklist,
41 MachineInstr *Inst, unsigned Opcode) const;
42
3943
4044 public:
4145 explicit SIInstrInfo(AMDGPUTargetMachine &tm);
9195
9296 bool isSALUInstr(const MachineInstr &MI) const;
9397 static unsigned getVALUOp(const MachineInstr &MI);
98
9499 bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
95100
96101 /// \brief Return the correct register class for \p OpNo. For target-specific
12211221 >;
12221222
12231223 def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
1224 []
1224 [(set i64:$dst, (or i64:$src0, i64:$src1))]
12251225 >;
12261226
12271227 def : Pat <
5555 ret void
5656 }
5757
58 ; EG-CHECK-LABEL: @or_i64
58 ; EG-CHECK-LABEL: @scalar_or_i64
5959 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
6060 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
61 ; SI-CHECK-LABEL: @or_i64
61 ; SI-CHECK-LABEL: @scalar_or_i64
62 ; SI-CHECK: S_OR_B64
63 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
64 %or = or i64 %a, %b
65 store i64 %or, i64 addrspace(1)* %out
66 ret void
67 }
68
69 ; SI-CHECK-LABEL: @vector_or_i64
6270 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
6371 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
64 define void @or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
65 entry:
66 %0 = or i64 %a, %b
67 store i64 %0, i64 addrspace(1)* %out
68 ret void
72 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
73 %loada = load i64 addrspace(1)* %a, align 8
74 %loadb = load i64 addrspace(1)* %a, align 8
75 %or = or i64 %loada, %loadb
76 store i64 %or, i64 addrspace(1)* %out
77 ret void
6978 }
79
80 ; SI-CHECK-LABEL: @scalar_vector_or_i64
81 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
82 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
83 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
84 %loada = load i64 addrspace(1)* %a
85 %or = or i64 %loada, %b
86 store i64 %or, i64 addrspace(1)* %out
87 ret void
88 }