llvm.org GIT mirror llvm / c64ec29
Merging r235641: ------------------------------------------------------------------------ r235641 | thomas.stellard | 2015-04-23 16:32:01 -0400 (Thu, 23 Apr 2015) | 9 lines R600/SI: Fix indirect addressing with a negative constant offset When the base register index of the vector plus the constant offset was less than zero, we were passing the wrong base register to the indirect addressing instruction. In this case, we need to set the base register to v0 and then add the computed (negative) index to m0. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@236449 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
2 changed file(s) with 127 addition(s) and 16 deletion(s). Raw diff Collapse all Expand all
8787 void Kill(MachineInstr &MI);
8888 void Branch(MachineInstr &MI);
8989
90 void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
90 void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
91 void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
9192 void IndirectSrc(MachineInstr &MI);
9293 void IndirectDst(MachineInstr &MI);
9394
322323 MI.eraseFromParent();
323324 }
324325
325 void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
326 void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
326327
327328 MachineBasicBlock &MBB = *MI.getParent();
328329 DebugLoc DL = MI.getDebugLoc();
332333 unsigned Idx = MI.getOperand(3).getReg();
333334
334335 if (AMDGPU::SReg_32RegClass.contains(Idx)) {
335 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
336 .addReg(Idx);
336 if (Offset) {
337 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
338 .addReg(Idx)
339 .addImm(Offset);
340 } else {
341 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
342 .addReg(Idx);
343 }
337344 MBB.insert(I, MovRel);
338345 } else {
339346
362369 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
363370 .addReg(AMDGPU::VCC);
364371
372 if (Offset) {
373 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
374 .addReg(AMDGPU::M0)
375 .addImm(Offset);
376 }
365377 // Do the actual move
366378 MBB.insert(I, MovRel);
367379
383395 MI.eraseFromParent();
384396 }
385397
398 /// \param @VecReg The register which holds element zero of the vector
399 /// being addressed into.
400 /// \param[out] @Reg The base register to use in the indirect addressing instruction.
401 /// \param[in,out] @Offset As an input, this is the constant offset part of the
402 // indirect Index. e.g. v0 = v[VecReg + Offset]
403 // As an output, this is a constant value that needs
404 // to be added to the value stored in M0.
405 void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
406 unsigned &Reg,
407 int &Offset) {
408 unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
409 if (!SubReg)
410 SubReg = VecReg;
411
412 const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
413 int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
414
415 if (RegIdx < 0) {
416 Offset = RegIdx;
417 RegIdx = 0;
418 } else {
419 Offset = 0;
420 }
421
422 Reg = RC->getRegister(RegIdx);
423 }
424
386425 void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
387426
388427 MachineBasicBlock &MBB = *MI.getParent();
390429
391430 unsigned Dst = MI.getOperand(0).getReg();
392431 unsigned Vec = MI.getOperand(2).getReg();
393 unsigned Off = MI.getOperand(4).getImm();
394 unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
395 if (!SubReg)
396 SubReg = Vec;
432 int Off = MI.getOperand(4).getImm();
433 unsigned Reg;
434
435 computeIndirectRegAndOffset(Vec, Reg, Off);
397436
398437 MachineInstr *MovRel =
399438 BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
400 .addReg(SubReg + Off)
439 .addReg(Reg)
401440 .addReg(AMDGPU::M0, RegState::Implicit)
402441 .addReg(Vec, RegState::Implicit);
403442
404 LoadM0(MI, MovRel);
443 LoadM0(MI, MovRel, Off);
405444 }
406445
407446 void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
410449 DebugLoc DL = MI.getDebugLoc();
411450
412451 unsigned Dst = MI.getOperand(0).getReg();
413 unsigned Off = MI.getOperand(4).getImm();
452 int Off = MI.getOperand(4).getImm();
414453 unsigned Val = MI.getOperand(5).getReg();
415 unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
416 if (!SubReg)
417 SubReg = Dst;
454 unsigned Reg;
455
456 computeIndirectRegAndOffset(Dst, Reg, Off);
418457
419458 MachineInstr *MovRel =
420459 BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
421 .addReg(SubReg + Off, RegState::Define)
460 .addReg(Reg, RegState::Define)
422461 .addReg(Val)
423462 .addReg(AMDGPU::M0, RegState::Implicit)
424463 .addReg(Dst, RegState::Implicit);
425464
426 LoadM0(MI, MovRel);
465 LoadM0(MI, MovRel, Off);
427466 }
428467
429468 bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
2424 ret void
2525 }
2626
27 ; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
28 ; The offset depends on the register that holds the first element of the vector.
29 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
30 ; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
31 define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
32 entry:
33 %index = add i32 %offset, -512
34 %value = extractelement <4 x i32> , i32 %index
35 store i32 %value, i32 addrspace(1)* %out
36 ret void
37 }
38
39 ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
40 ; The offset depends on the register that holds the first element of the vector.
41 ; CHECK: v_readfirstlane_b32
42 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
43 ; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
44 ; CHECK: s_cbranch_execnz
45 define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
46 entry:
47 %id = call i32 @llvm.r600.read.tidig.x() #1
48 %index = add i32 %id, -512
49 %value = extractelement <4 x i32> , i32 %index
50 store i32 %value, i32 addrspace(1)* %out
51 ret void
52 }
53
2754 ; CHECK-LABEL: {{^}}insert_w_offset:
2855 ; CHECK: s_mov_b32 m0
2956 ; CHECK-NEXT: v_movreld_b32_e32
4673 store float %1, float addrspace(1)* %out
4774 ret void
4875 }
76
77 ; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
78 ; The offset depends on the register that holds the first element of the vector.
79 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
80 ; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
81 define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
82 entry:
83 %index = add i32 %offset, -512
84 %value = insertelement <4 x i32> , i32 5, i32 %index
85 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
86 ret void
87 }
88
89 ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
90 ; The offset depends on the register that holds the first element of the vector.
91 ; CHECK: v_readfirstlane_b32
92 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
93 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
94 ; CHECK: s_cbranch_execnz
95 define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
96 entry:
97 %id = call i32 @llvm.r600.read.tidig.x() #1
98 %index = add i32 %id, -512
99 %value = insertelement <4 x i32> , i32 5, i32 %index
100 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
101 ret void
102 }
103
104 ; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
105 ; The offset depends on the register that holds the first element of the vector.
106 ; CHECK: v_readfirstlane_b32
107 ; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
108 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
109 ; CHECK: s_cbranch_execnz
110 define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
111 entry:
112 %id = call i32 @llvm.r600.read.tidig.x() #1
113 %index = add i32 %id, -16
114 %value = insertelement <4 x i32> , i32 5, i32 %index
115 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
116 ret void
117 }
118
119 declare i32 @llvm.r600.read.tidig.x() #1
120 attributes #1 = { nounwind readnone }