llvm.org GIT mirror llvm / 3ff2fb8
[AMDGPU] ImmArg and SourceOfDivergence for permlane/dpp Added missing ImmArg and SourceOfDivergence to the crosslane intrinsics. Differential Revision: https://reviews.llvm.org/D63216 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363276 91177308-0d34-0410-b5e6-96231b3b80d8 Stanislav Mekhanoshin 4 months ago
4 changed file(s) with 76 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
14401440 def int_amdgcn_permlane16 :
14411441 Intrinsic<[llvm_i32_ty],
14421442 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
1443 [IntrNoMem, IntrConvergent]>;
1443 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
14441444
14451445 // llvm.amdgcn.permlanex16
14461446 def int_amdgcn_permlanex16 :
14471447 Intrinsic<[llvm_i32_ty],
14481448 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
1449 [IntrNoMem, IntrConvergent]>;
1449 [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
14501450
14511451 // llvm.amdgcn.mov.dpp8.i32
14521452 // is a 32-bit constant whose high 8 bits must be zero which selects
14541454 def int_amdgcn_mov_dpp8 :
14551455 Intrinsic<[llvm_anyint_ty],
14561456 [LLVMMatchType<0>, llvm_i32_ty],
1457 [IntrNoMem, IntrConvergent]>;
1457 [IntrNoMem, IntrConvergent, ImmArg<1>]>;
14581458
14591459 def int_amdgcn_s_get_waveid_in_workgroup :
14601460 GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
9696 def : SourceOfDivergence;
9797 def : SourceOfDivergence;
9898 def : SourceOfDivergence;
99 def : SourceOfDivergence;
100 def : SourceOfDivergence;
101 def : SourceOfDivergence;
102 def : SourceOfDivergence;
103 def : SourceOfDivergence;
99104
100105 foreach intr = AMDGPUImageDimAtomicIntrinsics in
101106 def : SourceOfDivergence;
66 ret void
77 }
88
9 ; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
10 define amdgpu_kernel void @v_permlane16_b32(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
11 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
12 store i32 %v, i32 addrspace(1)* %out
13 ret void
14 }
15
16 ; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
17 define amdgpu_kernel void @v_permlanex16_b32(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
18 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
19 store i32 %v, i32 addrspace(1)* %out
20 ret void
21 }
22
23 ; CHECK: DIVERGENT: %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0
24 define amdgpu_kernel void @update_dpp(i32 addrspace(1)* %out, i32 %in1, i32 %in2) #0 {
25 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0
26 store i32 %tmp0, i32 addrspace(1)* %out
27 ret void
28 }
29
30 ; CHECK: DIVERGENT: %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
31 define amdgpu_kernel void @mov_dpp(i32 addrspace(1)* %out, i32 %in) #0 {
32 %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
33 store i32 %tmp0, i32 addrspace(1)* %out
34 ret void
35 }
36
37 ; CHECK: DIVERGENT: %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
38 define amdgpu_kernel void @mov_dpp8(i32 addrspace(1)* %out, i32 %in) #0 {
39 %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0
40 store i32 %tmp0, i32 addrspace(1)* %out
41 ret void
42 }
43
944 declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
45 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
46 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
47 declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
48 declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
49 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #1
1050
1151 attributes #0 = { nounwind convergent }
1252 attributes #1 = { nounwind readnone convergent }
549549 %val = call i32 @llvm.amdgcn.udot4(i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3)
550550 ret i32 %val
551551 }
552
553 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1)
554 define i32 @test_permlane16(i32 addrspace(1)* %out, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 %arg4) {
555 ; CHECK: immarg operand has non-immediate parameter
556 ; CHECK-NEXT: i1 %arg3
557 ; CHECK-NEXT: %v1 = call i32 @llvm.amdgcn.permlane16(i32 %arg0, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 false)
558 %v1 = call i32 @llvm.amdgcn.permlane16(i32 %arg0, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 false)
559
560 ; CHECK: immarg operand has non-immediate parameter
561 ; CHECK-NEXT: i1 %arg4
562 ; CHECK-NEXT: call i32 @llvm.amdgcn.permlane16(i32 %v2, i32 %arg0, i32 %arg1, i32 %arg2, i1 false, i1 %arg4)
563 %v2 = call i32 @llvm.amdgcn.permlane16(i32 %v2, i32 %arg0, i32 %arg1, i32 %arg2, i1 false, i1 %arg4)
564 ret i32 %v2
565 }
566
567 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1)
568 define i32 @test_permlanex16(i32 addrspace(1)* %out, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 %arg4) {
569 ; CHECK: immarg operand has non-immediate parameter
570 ; CHECK-NEXT: i1 %arg3
571 ; CHECK-NEXT: %v1 = call i32 @llvm.amdgcn.permlanex16(i32 %arg0, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 false)
572 %v1 = call i32 @llvm.amdgcn.permlanex16(i32 %arg0, i32 %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i1 false)
573
574 ; CHECK: immarg operand has non-immediate parameter
575 ; CHECK-NEXT: i1 %arg4
576 ; CHECK-NEXT: call i32 @llvm.amdgcn.permlanex16(i32 %v2, i32 %arg0, i32 %arg1, i32 %arg2, i1 false, i1 %arg4)
577 %v2 = call i32 @llvm.amdgcn.permlanex16(i32 %v2, i32 %arg0, i32 %arg1, i32 %arg2, i1 false, i1 %arg4)
578 ret i32 %v2
579 }