llvm.org GIT mirror llvm / 3a96e61
R600/SI: Sub-optimial fix for 64-bit immediates with SALU ops. No longer asserts, but now you get moves loading legal immediates into the split 32-bit operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204661 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 6 years ago
3 changed file(s) with 108 addition(s) and 54 deletion(s). Raw diff Collapse all Expand all
590590 return SubReg;
591591 }
592592
593 MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
594 MachineBasicBlock::iterator MII,
595 MachineRegisterInfo &MRI,
596 MachineOperand &Op,
597 const TargetRegisterClass *SuperRC,
598 unsigned SubIdx,
599 const TargetRegisterClass *SubRC) const {
600 if (Op.isImm()) {
601 // XXX - Is there a better way to do this?
602 if (SubIdx == AMDGPU::sub0)
603 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
604 if (SubIdx == AMDGPU::sub1)
605 return MachineOperand::CreateImm(Op.getImm() >> 32);
606
607 llvm_unreachable("Unhandled register index for immediate");
608 }
609
610 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
611 SubIdx, SubRC);
612 return MachineOperand::CreateReg(SubReg, false);
613 }
614
593615 unsigned SIInstrInfo::split64BitImm(SmallVectorImpl &Worklist,
594616 MachineBasicBlock::iterator MI,
595617 MachineRegisterInfo &MRI,
9971019 MachineBasicBlock &MBB = *Inst->getParent();
9981020 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
9991021
1000 // We shouldn't need to worry about immediate operands here.
10011022 MachineOperand &Dest = Inst->getOperand(0);
10021023 MachineOperand &Src0 = Inst->getOperand(1);
10031024 MachineOperand &Src1 = Inst->getOperand(2);
10081029 const MCInstrDesc &InstDesc = get(Opcode);
10091030 const TargetRegisterClass *RC = MRI.getRegClass(Src0.getReg());
10101031 const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, AMDGPU::sub0);
1011 unsigned SrcReg0Sub0 = buildExtractSubReg(MII, MRI, Src0, RC,
1012 AMDGPU::sub0, SubRC);
1013 unsigned SrcReg1Sub0 = buildExtractSubReg(MII, MRI, Src1, RC,
1014 AMDGPU::sub0, SubRC);
1015
1016 unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1032 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, RC,
1033 AMDGPU::sub0, SubRC);
1034 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, RC,
1035 AMDGPU::sub0, SubRC);
1036
1037 unsigned DestSub0 = MRI.createVirtualRegister(SubRC);
10171038 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
1018 .addReg(SrcReg0Sub0)
1019 .addReg(SrcReg1Sub0);
1020
1021 unsigned SrcReg0Sub1 = buildExtractSubReg(MII, MRI, Src0, RC,
1022 AMDGPU::sub1, SubRC);
1023 unsigned SrcReg1Sub1 = buildExtractSubReg(MII, MRI, Src1, RC,
1024 AMDGPU::sub1, SubRC);
1025
1026 unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
1039 .addOperand(SrcReg0Sub0)
1040 .addOperand(SrcReg1Sub0);
1041
1042 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, RC,
1043 AMDGPU::sub1, SubRC);
1044 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, RC,
1045 AMDGPU::sub1, SubRC);
1046
1047 unsigned DestSub1 = MRI.createVirtualRegister(SubRC);
10271048 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
1028 .addReg(SrcReg0Sub1)
1029 .addReg(SrcReg1Sub1);
1030
1031 unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
1049 .addOperand(SrcReg0Sub1)
1050 .addOperand(SrcReg1Sub1);
1051
1052 unsigned FullDestReg = MRI.createVirtualRegister(RC);
10321053 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
10331054 .addReg(DestSub0)
10341055 .addImm(AMDGPU::sub0)
3030 const TargetRegisterClass *SuperRC,
3131 unsigned SubIdx,
3232 const TargetRegisterClass *SubRC) const;
33 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
34 MachineRegisterInfo &MRI,
35 MachineOperand &SuperReg,
36 const TargetRegisterClass *SuperRC,
37 unsigned SubIdx,
38 const TargetRegisterClass *SubRC) const;
3339
3440 unsigned split64BitImm(SmallVectorImpl &Worklist,
3541 MachineBasicBlock::iterator MI,
3743 const TargetRegisterClass *RC,
3844 const MachineOperand &Op) const;
3945
40 void splitScalar64BitOp(SmallVectorImpl &Worklist,
46 void splitScalar64BitOp(SmallVectorImpl & Worklist,
4147 MachineInstr *Inst, unsigned Opcode) const;
4248
4349
None ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
1 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
1 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
22
3 ; EG-CHECK-LABEL: @or_v2i32
4 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3 ; EG-LABEL: @or_v2i32
4 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
7 ;SI-CHECK-LABEL: @or_v2i32
8 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
9 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
7 ; SI-LABEL: @or_v2i32
8 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
9 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
1010
1111 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
1212 %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
1717 ret void
1818 }
1919
20 ; EG-CHECK-LABEL: @or_v4i32
21 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ; EG-CHECK: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
20 ; EG-LABEL: @or_v4i32
21 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
26 ;SI-CHECK-LABEL: @or_v4i32
27 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
28 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
29 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
30 ;SI-CHECK: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
26 ; SI-LABEL: @or_v4i32
27 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
28 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
29 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
30 ; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
3131
3232 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
3333 %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
3838 ret void
3939 }
4040
41 ; SI-CHECK-LABEL: @scalar_or_i32
42 ; SI-CHECK: S_OR_B32
41 ; SI-LABEL: @scalar_or_i32
42 ; SI: S_OR_B32
4343 define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
4444 %or = or i32 %a, %b
4545 store i32 %or, i32 addrspace(1)* %out
4646 ret void
4747 }
4848
49 ; SI-CHECK-LABEL: @vector_or_i32
50 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
49 ; SI-LABEL: @vector_or_i32
50 ; SI: V_OR_B32_e32 v{{[0-9]}}
5151 define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
5252 %loada = load i32 addrspace(1)* %a
5353 %or = or i32 %loada, %b
5555 ret void
5656 }
5757
58 ; EG-CHECK-LABEL: @scalar_or_i64
59 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
60 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
61 ; SI-CHECK-LABEL: @scalar_or_i64
62 ; SI-CHECK: S_OR_B64
58 ; EG-LABEL: @scalar_or_i64
59 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
60 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
61 ; SI-LABEL: @scalar_or_i64
62 ; SI: S_OR_B64
6363 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
6464 %or = or i64 %a, %b
6565 store i64 %or, i64 addrspace(1)* %out
6666 ret void
6767 }
6868
69 ; SI-CHECK-LABEL: @vector_or_i64
70 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
71 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
69 ; SI-LABEL: @vector_or_i64
70 ; SI: V_OR_B32_e32 v{{[0-9]}}
71 ; SI: V_OR_B32_e32 v{{[0-9]}}
7272 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
7373 %loada = load i64 addrspace(1)* %a, align 8
7474 %loadb = load i64 addrspace(1)* %a, align 8
7777 ret void
7878 }
7979
80 ; SI-CHECK-LABEL: @scalar_vector_or_i64
81 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
82 ; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
80 ; SI-LABEL: @scalar_vector_or_i64
81 ; SI: V_OR_B32_e32 v{{[0-9]}}
82 ; SI: V_OR_B32_e32 v{{[0-9]}}
8383 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
8484 %loada = load i64 addrspace(1)* %a
8585 %or = or i64 %loada, %b
8686 store i64 %or, i64 addrspace(1)* %out
8787 ret void
8888 }
89
90 ; SI-LABEL: @vector_or_i64_loadimm
91 ; SI-DAG: S_MOV_B32
92 ; SI-DAG: S_MOV_B32
93 ; SI-DAG: BUFFER_LOAD_DWORDX2
94 ; SI: V_OR_B32_e32
95 ; SI: V_OR_B32_e32
96 ; SI: S_ENDPGM
97 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
98 %loada = load i64 addrspace(1)* %a, align 8
99 %or = or i64 %loada, 22470723082367
100 store i64 %or, i64 addrspace(1)* %out
101 ret void
102 }
103
104 ; FIXME: The or 0 should really be removed.
105 ; SI-LABEL: @vector_or_i64_imm
106 ; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
107 ; SI: V_OR_B32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
108 ; SI: V_OR_B32_e32 {{v[0-9]+}}, 0, {{.*}}
109 ; SI: S_ENDPGM
110 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
111 %loada = load i64 addrspace(1)* %a, align 8
112 %or = or i64 %loada, 8
113 store i64 %or, i64 addrspace(1)* %out
114 ret void
115 }