llvm.org GIT mirror llvm / a7eea05
R600: Use KCache for kernel arguments Reviewed-by: Vincent Lejeune <vljn at ovi.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186918 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
21 changed file(s) with 109 addition(s) and 140 deletion(s). Raw diff Collapse all Expand all
7373 ADDRESS_NONE = 5, ///< Address space for unknown memory.
7474 PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
7575 PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
76
77 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
78 // order to be able to dynamically index a constant buffer, for example:
79 //
80 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
81
7682 CONSTANT_BUFFER_0 = 8,
7783 CONSTANT_BUFFER_1 = 9,
7884 CONSTANT_BUFFER_2 = 10,
564564 return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
565565 }
566566
567 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const {
568 if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
569 return true;
570 }
571
572 const DataLayout *DL = TM.getDataLayout();
573 MachineMemOperand *MMO = N->getMemOperand();
574 const Value *V = MMO->getValue();
575 const Value *BV = GetUnderlyingObject(V, DL, 0);
576 if (MMO
577 && MMO->getValue()
578 && ((V && dyn_cast(V))
579 || (BV && dyn_cast(
580 GetUnderlyingObject(MMO->getValue(), DL, 0))))) {
581 return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
582 } else {
583 return false;
584 }
567 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
568 if (CbId == -1) {
569 return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS);
570 }
571 return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
585572 }
586573
587574 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
7171 setOperationAction(ISD::LOAD, MVT::i32, Custom);
7272 setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
7373 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
74 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
75 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Expand);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
74 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
75 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
76 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
77 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
7878 setOperationAction(ISD::STORE, MVT::i8, Custom);
7979 setOperationAction(ISD::STORE, MVT::i32, Custom);
8080 setOperationAction(ISD::STORE, MVT::v2i32, Expand);
774774 unsigned DwordOffset) const {
775775 unsigned ByteOffset = DwordOffset * 4;
776776 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
777 AMDGPUAS::PARAM_I_ADDRESS);
777 AMDGPUAS::CONSTANT_BUFFER_0);
778778
779779 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
780780 assert(isInt<16>(ByteOffset));
12181218
12191219 AnalyzeFormalArguments(CCInfo, Ins);
12201220
1221 Function::const_arg_iterator FuncArg =
1222 DAG.getMachineFunction().getFunction()->arg_begin();
1223 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
1221 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
12241222 CCValAssign &VA = ArgLocs[i];
12251223 EVT VT = VA.getLocVT();
1226 Type *ArgType = FuncArg->getType();
1227 unsigned ArgSizeInBits = ArgType->isPointerTy() ?
1228 32 : ArgType->getPrimitiveSizeInBits();
1229 unsigned ArgBytes = ArgSizeInBits >> 3;
1230 EVT ArgVT;
1231 if (ArgSizeInBits < VT.getSizeInBits()) {
1232 assert(!ArgType->isFloatTy() &&
1233 "Extending floating point arguments not supported yet");
1234 ArgVT = MVT::getIntegerVT(ArgSizeInBits);
1235 } else {
1236 ArgVT = VT;
1237 }
1238
1239 ISD::LoadExtType LoadType = ISD::EXTLOAD;
1240 if (Ins[i].Flags.isZExt()) {
1241 LoadType = ISD::ZEXTLOAD;
1242 } else if (Ins[i].Flags.isSExt()) {
1243 LoadType = ISD::SEXTLOAD;
1244 }
12451224
12461225 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1247 AMDGPUAS::PARAM_I_ADDRESS);
1226 AMDGPUAS::CONSTANT_BUFFER_0);
12481227
12491228 // The first 36 bytes of the input buffer contains information about
12501229 // thread group and global sizes.
1251 SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(),
1230 SDValue Arg = DAG.getLoad(VT, DL, Chain,
12521231 DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
1253 MachinePointerInfo(UndefValue::get(PtrTy)),
1254 ArgVT, false, false, ArgBytes);
1232 MachinePointerInfo(UndefValue::get(PtrTy)), false,
1233 false, false, 4); // 4 is the prefered alignment for
1234 // the CONSTANT memory space.
12551235 InVals.push_back(Arg);
12561236 }
12571237 return Chain;
312312
313313 class LoadParamFrag : PatFrag <
314314 (ops node:$ptr), (load_type node:$ptr),
315 [{ return isParamLoad(dyn_cast(N)); }]
315 [{ return isConstantLoad(dyn_cast(N), 0); }]
316316 >;
317317
318318 def load_param : LoadParamFrag;
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @v4i32_kernel_arg
4 ; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
4 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
5 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
6 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
7 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
58 ; SI-CHECK: @v4i32_kernel_arg
69 ; SI-CHECK: BUFFER_STORE_DWORDX4
710 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
1114 }
1215
1316 ; R600-CHECK: @v4f32_kernel_arg
14 ; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
17 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
18 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
19 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
20 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
1521 ; SI-CHECK: @v4f32_kernel_arg
1622 ; SI-CHECK: BUFFER_STORE_DWORDX4
1723 define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {
11 ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
22
33 ;EG-CHECK: @test2
4 ;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal\.[xyzw]}}
4 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
77 ;SI-CHECK: @test2
88 ;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
1818 }
1919
2020 ;EG-CHECK: @test4
21 ;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
2626 ;SI-CHECK: @test4
2727 ;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
3535 ; SHA-256 Ma function
3636 ; ((x & z) | (y & (x | z)))
3737 ; R600-CHECK: @bfi_sha256_ma
38 ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
39 ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
38 ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]],
39 ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
4040 ; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
4141 ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
4242
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
2 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3 ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
4 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
6 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
7 ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
8 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
9 ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
2 ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
3 ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
4 ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
5 ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
6 ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
7 ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
8 ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
9 ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
1010
11 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
12 %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
13 %a = load <4 x float> addrspace(1) * %in
14 %b = load <4 x float> addrspace(1) * %b_ptr
15 %result = fdiv <4 x float> %a, %b
16 store <4 x float> %result, <4 x float> addrspace(1)* %out
11 define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
12 entry:
13 %0 = fdiv <4 x float> %a, %b
14 store <4 x float> %0, <4 x float> addrspace(1)* %out
1715 ret void
1816 }
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
22 ; CHECK: @fp_to_sint_v4i32
3 ; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
4 ; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
6 ; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
3 ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
4 ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
6 ; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
77
88 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
99 %value = load <4 x float> addrspace(1) * %in
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
2 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
2 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
44 define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
55 %1 = load i8 addrspace(1)* %in
11
22 ; Test using an integer literal constant.
33 ; Generated ASM should be:
4 ; ADD_INT REG literal.x, 5
4 ; ADD_INT KC0[2].Z literal.x, 5
55 ; or
6 ; ADD_INT literal.x REG, 5
6 ; ADD_INT literal.x KC0[2].Z, 5
77
88 ; CHECK: @i32_literal
9 ; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x
9 ; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
1010 ; CHECK-NEXT: 5
1111 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
1212 entry:
1717
1818 ; Test using a float literal constant.
1919 ; Generated ASM should be:
20 ; ADD REG literal.x, 5.0
20 ; ADD KC0[2].Z literal.x, 5.0
2121 ; or
22 ; ADD literal.x REG, 5.0
22 ; ADD literal.x KC0[2].Z, 5.0
2323
2424 ; CHECK: @float_literal
25 ; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x
25 ; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
2626 ; CHECK-NEXT: 1084227584(5.0
2727 define void @float_literal(float addrspace(1)* %out, float %in) {
2828 entry:
11 ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
22
33 ; R600-CHECK: @amdgpu_trunc
4 ; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4 ; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
55 ; SI-CHECK: @amdgpu_trunc
66 ; SI-CHECK: V_TRUNC_F32
77
22
33 ; load a v2i32 value from the global address space.
44 ; EG-CHECK: @load_v2i32
5 ; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4
6 ; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
5 ; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4
6 ; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
77 ; SI-CHECK: @load_v2i32
88 ; SI-CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
99 define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
2 ;CHECK: TEX
32 ;CHECK: ALU_PUSH
4 ;CHECK: JUMP @15
5 ;CHECK: TEX
6 ;CHECK: LOOP_START_DX10 @14
7 ;CHECK: LOOP_BREAK @13
8 ;CHECK: POP @15
3 ;CHECK: LOOP_START_DX10 @11
4 ;CHECK: LOOP_BREAK @10
5 ;CHECK: POP @10
96
107 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
118 target triple = "r600--"
1818 ; R600-CHECK: @rotl
1919 ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
2020 ; R600-CHECK-NEXT: 32
21 ; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}}
21 ; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
2222
2323 ; SI-CHECK: @rotl
2424 ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}
2828 ; for the icmp instruction
2929
3030 ; CHECK: @test_b
31 ; CHECK: VTX_READ
3231 ; CHECK: SET{{[GTEQN]+}}_DX10
3332 ; CHECK-NEXT: PRED_
3433 ; CHECK-NEXT: ALU clause starting
44 ; SET*DX10 instructions.
55
66 ; CHECK: @fcmp_une_select_fptosi
7 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
7 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
88 ; CHECK-NEXT: 1084227584(5.000000e+00)
99 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
1010 entry:
1717 }
1818
1919 ; CHECK: @fcmp_une_select_i32
20 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
20 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
2121 ; CHECK-NEXT: 1084227584(5.000000e+00)
2222 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
2323 entry:
2828 }
2929
3030 ; CHECK: @fcmp_ueq_select_fptosi
31 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
31 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
3232 ; CHECK-NEXT: 1084227584(5.000000e+00)
3333 define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
3434 entry:
4141 }
4242
4343 ; CHECK: @fcmp_ueq_select_i32
44 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
44 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
4545 ; CHECK-NEXT: 1084227584(5.000000e+00)
4646 define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
4747 entry:
5252 }
5353
5454 ; CHECK: @fcmp_ugt_select_fptosi
55 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
55 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
5656 ; CHECK-NEXT: 1084227584(5.000000e+00)
5757 define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
5858 entry:
6565 }
6666
6767 ; CHECK: @fcmp_ugt_select_i32
68 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
68 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
6969 ; CHECK-NEXT: 1084227584(5.000000e+00)
7070 define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
7171 entry:
7676 }
7777
7878 ; CHECK: @fcmp_uge_select_fptosi
79 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
79 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
8080 ; CHECK-NEXT: 1084227584(5.000000e+00)
8181 define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
8282 entry:
8989 }
9090
9191 ; CHECK: @fcmp_uge_select_i32
92 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
92 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
9393 ; CHECK-NEXT: 1084227584(5.000000e+00)
9494 define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
9595 entry:
100100 }
101101
102102 ; CHECK: @fcmp_ule_select_fptosi
103 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
103 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
104104 ; CHECK-NEXT: 1084227584(5.000000e+00)
105105 define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
106106 entry:
113113 }
114114
115115 ; CHECK: @fcmp_ule_select_i32
116 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
116 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
117117 ; CHECK-NEXT: 1084227584(5.000000e+00)
118118 define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
119119 entry:
124124 }
125125
126126 ; CHECK: @fcmp_ult_select_fptosi
127 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
127 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
128128 ; CHECK-NEXT: 1084227584(5.000000e+00)
129129 define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
130130 entry:
137137 }
138138
139139 ; CHECK: @fcmp_ult_select_i32
140 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
140 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
141141 ; CHECK-NEXT: 1084227584(5.000000e+00)
142142 define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
143143 entry:
11 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
22
33 ; CHECK: @i8_arg
4 ; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
4 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
55
66 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
77 entry:
1111 }
1212
1313 ; CHECK: @i8_zext_arg
14 ; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
14 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
1515
1616 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
1717 entry:
2121 }
2222
2323 ; CHECK: @i8_sext_arg
24 ; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
24 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
2525 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
2626 entry:
2727 %0 = sext i8 %in to i32
3030 }
3131
3232 ; CHECK: @i16_arg
33 ; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
33 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
3434
3535 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
3636 entry:
4040 }
4141
4242 ; CHECK: @i16_zext_arg
43 ; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
43 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
4444
4545 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
4646 entry:
5050 }
5151
5252 ; CHECK: @i16_sext_arg
53 ; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
53 ; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
5454
5555 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
5656 entry:
22 ; These tests are for condition codes that are not supported by the hardware
33
44 ; CHECK: @slt
5 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
5 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
66 ; CHECK-NEXT: 5(7.006492e-45)
77 define void @slt(i32 addrspace(1)* %out, i32 %in) {
88 entry:
1313 }
1414
1515 ; CHECK: @ult_i32
16 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
16 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
1717 ; CHECK-NEXT: 5(7.006492e-45)
1818 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
1919 entry:
2424 }
2525
2626 ; CHECK: @ult_float
27 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
27 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
2828 ; CHECK-NEXT: 1084227584(5.000000e+00)
2929 define void @ult_float(float addrspace(1)* %out, float %in) {
3030 entry:
3535 }
3636
3737 ; CHECK: @olt
38 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
38 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
3939 ;CHECK-NEXT: 1084227584(5.000000e+00)
4040 define void @olt(float addrspace(1)* %out, float %in) {
4141 entry:
4646 }
4747
4848 ; CHECK: @sle
49 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
49 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
5050 ; CHECK-NEXT: 6(8.407791e-45)
5151 define void @sle(i32 addrspace(1)* %out, i32 %in) {
5252 entry:
5757 }
5858
5959 ; CHECK: @ule_i32
60 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
60 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
6161 ; CHECK-NEXT: 6(8.407791e-45)
6262 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
6363 entry:
6868 }
6969
7070 ; CHECK: @ule_float
71 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
71 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
7272 ; CHECK-NEXT: 1084227584(5.000000e+00)
7373 define void @ule_float(float addrspace(1)* %out, float %in) {
7474 entry:
7979 }
8080
8181 ; CHECK: @ole
82 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
82 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
8383 ; CHECK-NEXT:1084227584(5.000000e+00)
8484 define void @ole(float addrspace(1)* %out, float %in) {
8585 entry:
55
66 ; CHECK: @test
77 ; CHECK: Fetch clause
8 ; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 40
9 ; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 44
8 ; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
109 ; CHECK: Fetch clause
11 ; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
1210 ; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
13 define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in0, i32 addrspace(1)* nocapture %in1) {
11 define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
1412 entry:
15 %0 = load i32 addrspace(1)* %in0, align 4
16 %1 = load i32 addrspace(1)* %in1, align 4
17 %cmp.i = icmp slt i32 %0, %1
18 %cond.i = select i1 %cmp.i, i32 %0, i32 %1
19 store i32 %cond.i, i32 addrspace(1)* %out, align 4
13 %0 = load i32 addrspace(1)* addrspace(1)* %in0
14 %1 = load i32 addrspace(1)* %0
15 store i32 %1, i32 addrspace(1)* %out
2016 ret void
2117 }
22
33 ; R600-CHECK: @ngroups_x
44 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
5 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 0
5 ; R600-CHECK: MOV * [[VAL]], KC0[0].X
66 ; SI-CHECK: @ngroups_x
77 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
88 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
1616
1717 ; R600-CHECK: @ngroups_y
1818 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
19 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 4
19 ; R600-CHECK: MOV * [[VAL]], KC0[0].Y
2020 ; SI-CHECK: @ngroups_y
2121 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
2222 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
3030
3131 ; R600-CHECK: @ngroups_z
3232 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
33 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 8
33 ; R600-CHECK: MOV * [[VAL]], KC0[0].Z
3434 ; SI-CHECK: @ngroups_z
3535 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
3636 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
4444
4545 ; R600-CHECK: @global_size_x
4646 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
47 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 12
47 ; R600-CHECK: MOV * [[VAL]], KC0[0].W
4848 ; SI-CHECK: @global_size_x
4949 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
5050 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
5858
5959 ; R600-CHECK: @global_size_y
6060 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
61 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 16
61 ; R600-CHECK: MOV * [[VAL]], KC0[1].X
6262 ; SI-CHECK: @global_size_y
6363 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
6464 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
7272
7373 ; R600-CHECK: @global_size_z
7474 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
75 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 20
75 ; R600-CHECK: MOV * [[VAL]], KC0[1].Y
7676 ; SI-CHECK: @global_size_z
7777 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
7878 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
8686
8787 ; R600-CHECK: @local_size_x
8888 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
89 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 24
89 ; R600-CHECK: MOV * [[VAL]], KC0[1].Z
9090 ; SI-CHECK: @local_size_x
9191 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
9292 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
100100
101101 ; R600-CHECK: @local_size_y
102102 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
103 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 28
103 ; R600-CHECK: MOV * [[VAL]], KC0[1].W
104104 ; SI-CHECK: @local_size_y
105105 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
106106 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
114114
115115 ; R600-CHECK: @local_size_z
116116 ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
117 ; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 32
117 ; R600-CHECK: MOV * [[VAL]], KC0[2].X
118118 ; SI-CHECK: @local_size_z
119119 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
120120 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]