llvm.org GIT mirror llvm / c15c355
Merging r263441: ------------------------------------------------------------------------ r263441 | marek.olsak | 2016-03-14 08:57:14 -0700 (Mon, 14 Mar 2016) | 8 lines AMDGPU/SI: Incomplete shader binaries need to finish execution at the end Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D18058 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@271723 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 years ago
3 changed file(s) with 81 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
504504
505505 // Wait for everything at the end of the MBB
506506 Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
507
508 // Functions returning something shouldn't contain S_ENDPGM, because other
509 // bytecode will be appended after it.
510 if (!ReturnsVoid) {
511 MachineBasicBlock::iterator I = MBB.getFirstTerminator();
512 if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
513 I->eraseFromParent();
514 }
515507 }
516508
517509 return Changes;
485485 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
486486 BI != BE; ++BI) {
487487
488 MachineBasicBlock *EmptyMBBAtEnd = NULL;
488489 MachineBasicBlock &MBB = *BI;
489490 MachineBasicBlock::iterator I, Next;
490491 for (I = MBB.begin(); I != MBB.end(); I = Next) {
561562 case AMDGPU::SI_INDIRECT_DST_V16:
562563 IndirectDst(MI);
563564 break;
565
566 case AMDGPU::S_ENDPGM: {
567 if (MF.getInfo()->returnsVoid())
568 break;
569
570 // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
571 // because external bytecode will be appended at the end.
572 if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
573 // S_ENDPGM is not the last instruction. Add an empty block at
574 // the end and jump there.
575 if (!EmptyMBBAtEnd) {
576 EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
577 MF.insert(MF.end(), EmptyMBBAtEnd);
578 }
579
580 MBB.addSuccessor(EmptyMBBAtEnd);
581 BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
582 .addMBB(EmptyMBBAtEnd);
583 }
584
585 I->eraseFromParent();
586 break;
587 }
564588 }
565589 }
566590 }
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3 target triple = "amdgcn--"
4
5 ; GCN-LABEL: {{^}}main:
6 ; GCN: BB0_3:
7 ; GCN-NEXT: s_branch [[LASTBB:BB[0-9]*_[0-9]*]]
8 ; GCN-NEXT: BB0_
9 ; GCN: [[LASTBB]]
10 ; GCN-NEXT: .Lfunc_end0:
11 ; ModuleID = 'bugpoint-reduced-simplified.bc'
12 target triple = "amdgcn--"
13
14 define <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
15 main_body:
16 %p83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
17 %p87 = fmul float undef, %p83
18 %p88 = fadd float %p87, undef
19 %p93 = fadd float %p88, undef
20 %p97 = fmul float %p93, undef
21 %p102 = fsub float %p97, undef
22 %p104 = fmul float %p102, undef
23 %p106 = fadd float 0.000000e+00, %p104
24 %p108 = fadd float undef, %p106
25 br i1 undef, label %ENDIF69, label %ELSE
26
27 ELSE: ; preds = %main_body
28 %p124 = fmul float %p108, %p108
29 %p125 = fsub float %p124, undef
30 %p126 = fcmp olt float %p125, 0.000000e+00
31 br i1 %p126, label %ENDIF69, label %ELSE41
32
33 ELSE41: ; preds = %ELSE
34 unreachable
35
36 ENDIF69: ; preds = %ELSE, %main_body
37 ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef
38 }
39
40 ; Function Attrs: nounwind readnone
41 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
42
43 ; Function Attrs: nounwind readnone
44 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
45
46 ; Function Attrs: nounwind readnone
47 declare float @llvm.fabs.f32(float) #1
48
49 ; Function Attrs: nounwind readnone
50 declare float @llvm.sqrt.f32(float) #1
51
52 ; Function Attrs: nounwind readnone
53 declare float @llvm.floor.f32(float) #1
54
55 attributes #0 = { "InitialPSInputAddr"="36983" "ShaderType"="0" }
56 attributes #1 = { nounwind readnone }