llvm.org GIT mirror llvm / 70a7d5d
R600: Use function inputs to represent data stored in gpr git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194425 91177308-0d34-0410-b5e6-96231b3b80d8 Vincent Lejeune 6 years ago
29 changed file(s) with 286 addition(s) and 322 deletion(s). Raw diff Collapse all Expand all
4141
4242 ]>;
4343
44 // Calling convention for R600
45 def CC_R600 : CallingConv<[
46 CCIfInReg
47 T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
48 T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
49 T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
50 T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
51 T30_XYZW, T31_XYZW, T32_XYZW
52 ]>>>
53 ]>;
54
4455 // Calling convention for compute kernels
4556 def CC_AMDGPU_Kernel : CallingConv<[
4657 CCCustom<"allocateStack">
5667 "State.getMachineFunction().getInfo()->"
5768 "ShaderType == ShaderType::COMPUTE", CCDelegateTo>,
5869 CCIf<"State.getTarget().getSubtarget()"#
59 ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo>
70 ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo>,
71 CCIf<"State.getTarget().getSubtarget()"#
72 ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo>
6073 ]>;
553553 SDLoc DL(Op);
554554 switch(IntrinsicID) {
555555 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
556 case AMDGPUIntrinsic::R600_load_input: {
557 int64_t RegIndex = cast(Op.getOperand(1))->getZExtValue();
558 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
559 MachineFunction &MF = DAG.getMachineFunction();
560 MachineRegisterInfo &MRI = MF.getRegInfo();
561 MRI.addLiveIn(Reg);
562 return DAG.getCopyFromReg(DAG.getEntryNode(),
563 SDLoc(DAG.getEntryNode()), Reg, VT);
564 }
565
566 case AMDGPUIntrinsic::R600_interp_input: {
556 case AMDGPUIntrinsic::R600_interp_xy:
557 case AMDGPUIntrinsic::R600_interp_zw: {
567558 int slot = cast(Op.getOperand(1))->getZExtValue();
568 int ijb = cast(Op.getOperand(2))->getSExtValue();
569559 MachineSDNode *interp;
570 if (ijb < 0) {
571 const MachineFunction &MF = DAG.getMachineFunction();
572 const R600InstrInfo *TII =
573 static_cast(MF.getTarget().getInstrInfo());
574 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
575 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
576 return DAG.getTargetExtractSubreg(
577 TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
578 DL, MVT::f32, SDValue(interp, 0));
579 }
580
581 MachineFunction &MF = DAG.getMachineFunction();
582 MachineRegisterInfo &MRI = MF.getRegInfo();
583 unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
584 unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
585 MRI.addLiveIn(RegisterI);
586 MRI.addLiveIn(RegisterJ);
587 SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
588 SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
589 SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
590 SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
591
592 if (slot % 4 < 2)
560 SDValue RegisterINode = Op.getOperand(2);
561 SDValue RegisterJNode = Op.getOperand(3);
562
563 if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
593564 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
594 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
565 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
595566 RegisterJNode, RegisterINode);
596567 else
597568 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
598 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
569 MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
599570 RegisterJNode, RegisterINode);
600 return SDValue(interp, slot % 2);
571 return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
572 SDValue(interp, 0), SDValue(interp, 1));
601573 }
602574 case AMDGPUIntrinsic::R600_tex:
603575 case AMDGPUIntrinsic::R600_texc:
13381310 SmallVector ArgLocs;
13391311 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
13401312 getTargetMachine(), ArgLocs, *DAG.getContext());
1313 MachineFunction &MF = DAG.getMachineFunction();
1314 unsigned ShaderType = MF.getInfo()->ShaderType;
13411315
13421316 SmallVector LocalIns;
13431317
13501324 CCValAssign &VA = ArgLocs[i];
13511325 EVT VT = Ins[i].VT;
13521326 EVT MemVT = LocalIns[i].VT;
1327
1328 if (ShaderType != ShaderType::COMPUTE) {
1329 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1330 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1331 InVals.push_back(Register);
1332 continue;
1333 }
13531334
13541335 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
13551336 AMDGPUAS::CONSTANT_BUFFER_0);
417417 (outs R600_Reg128:$dst),
418418 (ins i32imm:$src0),
419419 "INTERP_LOAD $src0 : $dst",
420 []>;
420 [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
421421
422422 def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
423423 let bank_swizzle = 5;
3838 llvm_i32_ty // coord_type_w
3939 ], [IntrNoMem]>;
4040
41 def int_R600_load_input :
42 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
4341 def int_R600_interp_input :
4442 Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
43 def int_R600_interp_const :
44 Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
45 def int_R600_interp_xy :
46 Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
47 def int_R600_interp_zw :
48 Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
4549 def int_R600_load_texbuf :
4650 Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
4751 def int_R600_tex : TextureIntrinsicFloatInput;
33 ;This test ensures that R600 backend can handle ifcvt properly
44 ;and do not generate ALU clauses with more than 128 instructions.
55
6 define void @main() #0 {
6 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
77 main_body:
8 %0 = call float @llvm.R600.load.input(i32 0)
9 %1 = call float @llvm.R600.load.input(i32 1)
10 %2 = call float @llvm.R600.load.input(i32 2)
11 %3 = call float @llvm.R600.load.input(i32 3)
12 %4 = call float @llvm.R600.load.input(i32 4)
13 %5 = call float @llvm.R600.load.input(i32 36)
14 %6 = call float @llvm.R600.load.input(i32 32)
8 %0 = extractelement <4 x float> %reg0, i32 0
9 %1 = extractelement <4 x float> %reg0, i32 1
10 %2 = extractelement <4 x float> %reg0, i32 2
11 %3 = extractelement <4 x float> %reg0, i32 3
12 %4 = extractelement <4 x float> %reg1, i32 0
13 %5 = extractelement <4 x float> %reg9, i32 0
14 %6 = extractelement <4 x float> %reg8, i32 0
1515 %7 = fcmp ugt float %6, 0.000000e+00
1616 %8 = select i1 %7, float %4, float %5
17 %9 = call float @llvm.R600.load.input(i32 5)
18 %10 = call float @llvm.R600.load.input(i32 37)
19 %11 = call float @llvm.R600.load.input(i32 32)
17 %9 = extractelement <4 x float> %reg1, i32 1
18 %10 = extractelement <4 x float> %reg9, i32 1
19 %11 = extractelement <4 x float> %reg8, i32 0
2020 %12 = fcmp ugt float %11, 0.000000e+00
2121 %13 = select i1 %12, float %9, float %10
22 %14 = call float @llvm.R600.load.input(i32 6)
23 %15 = call float @llvm.R600.load.input(i32 38)
24 %16 = call float @llvm.R600.load.input(i32 32)
22 %14 = extractelement <4 x float> %reg1, i32 2
23 %15 = extractelement <4 x float> %reg9, i32 2
24 %16 = extractelement <4 x float> %reg8, i32 0
2525 %17 = fcmp ugt float %16, 0.000000e+00
2626 %18 = select i1 %17, float %14, float %15
27 %19 = call float @llvm.R600.load.input(i32 7)
28 %20 = call float @llvm.R600.load.input(i32 39)
29 %21 = call float @llvm.R600.load.input(i32 32)
30 %22 = call float @llvm.R600.load.input(i32 8)
31 %23 = call float @llvm.R600.load.input(i32 9)
32 %24 = call float @llvm.R600.load.input(i32 10)
33 %25 = call float @llvm.R600.load.input(i32 11)
34 %26 = call float @llvm.R600.load.input(i32 12)
35 %27 = call float @llvm.R600.load.input(i32 13)
36 %28 = call float @llvm.R600.load.input(i32 14)
37 %29 = call float @llvm.R600.load.input(i32 15)
38 %30 = call float @llvm.R600.load.input(i32 16)
39 %31 = call float @llvm.R600.load.input(i32 17)
40 %32 = call float @llvm.R600.load.input(i32 18)
41 %33 = call float @llvm.R600.load.input(i32 19)
42 %34 = call float @llvm.R600.load.input(i32 20)
43 %35 = call float @llvm.R600.load.input(i32 21)
44 %36 = call float @llvm.R600.load.input(i32 22)
45 %37 = call float @llvm.R600.load.input(i32 23)
46 %38 = call float @llvm.R600.load.input(i32 24)
47 %39 = call float @llvm.R600.load.input(i32 25)
48 %40 = call float @llvm.R600.load.input(i32 26)
49 %41 = call float @llvm.R600.load.input(i32 27)
50 %42 = call float @llvm.R600.load.input(i32 28)
51 %43 = call float @llvm.R600.load.input(i32 29)
52 %44 = call float @llvm.R600.load.input(i32 30)
53 %45 = call float @llvm.R600.load.input(i32 31)
27 %19 = extractelement <4 x float> %reg1, i32 3
28 %20 = extractelement <4 x float> %reg9, i32 3
29 %21 = extractelement <4 x float> %reg8, i32 0
30 %22 = extractelement <4 x float> %reg2, i32 0
31 %23 = extractelement <4 x float> %reg2, i32 1
32 %24 = extractelement <4 x float> %reg2, i32 2
33 %25 = extractelement <4 x float> %reg2, i32 3
34 %26 = extractelement <4 x float> %reg3, i32 0
35 %27 = extractelement <4 x float> %reg3, i32 1
36 %28 = extractelement <4 x float> %reg3, i32 2
37 %29 = extractelement <4 x float> %reg3, i32 3
38 %30 = extractelement <4 x float> %reg4, i32 0
39 %31 = extractelement <4 x float> %reg4, i32 1
40 %32 = extractelement <4 x float> %reg4, i32 2
41 %33 = extractelement <4 x float> %reg4, i32 3
42 %34 = extractelement <4 x float> %reg5, i32 0
43 %35 = extractelement <4 x float> %reg5, i32 1
44 %36 = extractelement <4 x float> %reg5, i32 2
45 %37 = extractelement <4 x float> %reg5, i32 3
46 %38 = extractelement <4 x float> %reg6, i32 0
47 %39 = extractelement <4 x float> %reg6, i32 1
48 %40 = extractelement <4 x float> %reg6, i32 2
49 %41 = extractelement <4 x float> %reg6, i32 3
50 %42 = extractelement <4 x float> %reg7, i32 0
51 %43 = extractelement <4 x float> %reg7, i32 1
52 %44 = extractelement <4 x float> %reg7, i32 2
53 %45 = extractelement <4 x float> %reg7, i32 3
5454 %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
5555 %47 = extractelement <4 x float> %46, i32 0
5656 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
11461146 }
11471147
11481148 ; Function Attrs: readnone
1149 declare float @llvm.R600.load.input(i32) #1
1150
1151 ; Function Attrs: readnone
11521149 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
11531150
11541151 ; Function Attrs: readnone
11
22 ; CHECK: @main
33 ; CHECK-NOT: MOV
4 define void @main() {
4 define void @main(<4 x float> inreg %reg0) #0 {
55 entry:
6 %0 = call float @llvm.R600.load.input(i32 0)
6 %0 = extractelement <4 x float> %reg0, i32 0
77 %1 = call float @fabs(float %0)
88 %2 = fptoui float %1 to i32
99 %3 = bitcast i32 %2 to float
1212 ret void
1313 }
1414
15 declare float @llvm.R600.load.input(i32) readnone
1615 declare float @fabs(float ) readnone
17 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
17
18 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = call float @floor(float %r0)
7 call void @llvm.AMDGPU.store.output(float %r1, i32 0)
7 %vec = insertelement <4 x float> undef, float %r1, i32 0
8 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
89 ret void
910 }
1011
11 declare float @llvm.R600.load.input(i32) readnone
12 declare float @floor(float) readonly
13 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1214
13 declare void @llvm.AMDGPU.store.output(float, i32)
14
15 declare float @floor(float) readonly
15 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
6 %r1 = call float @llvm.R600.load.input(i32 1)
7 %r2 = call float @llvm.R600.load.input(i32 2)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
6 %r1 = extractelement <4 x float> %reg0, i32 1
7 %r2 = extractelement <4 x float> %reg0, i32 2
88 %r3 = fmul float %r0, %r1
9 %r4 = fadd float %r3, %r2
10 call void @llvm.AMDGPU.store.output(float %r4, i32 0)
9 %r4 = fadd float %r3, %r2
10 %vec = insertelement <4 x float> undef, float %r4, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1112 ret void
1213 }
1314
14 declare float @llvm.R600.load.input(i32) readnone
15 declare float @fabs(float ) readnone
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1517
16 declare void @llvm.AMDGPU.store.output(float, i32)
17
18 declare float @fabs(float ) readnone
18 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
6 %r1 = call float @llvm.R600.load.input(i32 1)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
6 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = fcmp oge float %r0, %r1
88 %r3 = select i1 %r2, float %r0, float %r1
9 call void @llvm.AMDGPU.store.output(float %r3, i32 0)
9 %vec = insertelement <4 x float> undef, float %r3, i32 0
10 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1011 ret void
1112 }
1213
13 declare float @llvm.R600.load.input(i32) readnone
14 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1415
15 declare void @llvm.AMDGPU.store.output(float, i32)
16 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
6 %r1 = call float @llvm.R600.load.input(i32 1)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
6 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = fcmp uge float %r0, %r1
88 %r3 = select i1 %r2, float %r1, float %r0
9 call void @llvm.AMDGPU.store.output(float %r3, i32 0)
9 %vec = insertelement <4 x float> undef, float %r3, i32 0
10 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1011 ret void
1112 }
1213
13 declare float @llvm.R600.load.input(i32) readnone
14 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1415
15 declare void @llvm.AMDGPU.store.output(float, i32)
16 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
6 %r1 = call float @llvm.R600.load.input(i32 1)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
6 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
8 call void @llvm.AMDGPU.store.output(float %r2, i32 0)
8 %vec = insertelement <4 x float> undef, float %r2, i32 0
9 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
910 ret void
1011 }
1112
12 declare float @llvm.R600.load.input(i32) readnone
13 declare float @llvm.AMDGPU.mul(float ,float ) readnone
14 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1315
14 declare void @llvm.AMDGPU.store.output(float, i32)
15
16 declare float @llvm.AMDGPU.mul(float ,float ) readnone
16 attributes #0 = { "ShaderType"="0" }
44 ;CHECK: ADD *
55 ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
66
7 define void @test() {
8 %r0 = call float @llvm.R600.load.input(i32 0)
7 define void @test(<4 x float> inreg %reg0) #0 {
8 %r0 = extractelement <4 x float> %reg0, i32 0
99 %r1 = call float @llvm.cos.f32(float %r0)
10 call void @llvm.AMDGPU.store.output(float %r1, i32 0)
10 %vec = insertelement <4 x float> undef, float %r1, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1112 ret void
1213 }
1314
1415 declare float @llvm.cos.f32(float) readnone
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1517
16 declare float @llvm.R600.load.input(i32) readnone
17
18 declare void @llvm.AMDGPU.store.output(float, i32)
18 attributes #0 = { "ShaderType"="0" }
33 ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
44 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
55
6 define void @test() {
7 %r0 = call float @llvm.R600.load.input(i32 0)
8 %r1 = call float @llvm.R600.load.input(i32 1)
6 define void @test(<4 x float> inreg %reg0) #0 {
7 %r0 = extractelement <4 x float> %reg0, i32 0
8 %r1 = extractelement <4 x float> %reg0, i32 1
99 %r2 = call float @llvm.pow.f32( float %r0, float %r1)
10 call void @llvm.AMDGPU.store.output(float %r2, i32 0)
10 %vec = insertelement <4 x float> undef, float %r2, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1112 ret void
1213 }
1314
14 declare float @llvm.R600.load.input(i32) readnone
15 declare float @llvm.pow.f32(float ,float ) readonly
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1517
16 declare void @llvm.AMDGPU.store.output(float, i32)
17
18 declare float @llvm.pow.f32(float ,float ) readonly
18 attributes #0 = { "ShaderType"="0" }
44 ;CHECK: ADD *
55 ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
66
7 define void @test() {
8 %r0 = call float @llvm.R600.load.input(i32 0)
7 define void @test(<4 x float> inreg %reg0) #0 {
8 %r0 = extractelement <4 x float> %reg0, i32 0
99 %r1 = call float @llvm.sin.f32( float %r0)
10 call void @llvm.AMDGPU.store.output(float %r1, i32 0)
10 %vec = insertelement <4 x float> undef, float %r1, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1112 ret void
1213 }
1314
1415 declare float @llvm.sin.f32(float) readnone
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1517
16 declare float @llvm.R600.load.input(i32) readnone
17
18 declare void @llvm.AMDGPU.store.output(float, i32)
18 attributes #0 = { "ShaderType"="0" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman
11 ;REQUIRES: asserts
22
3 define void @main() #0 {
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
44 main_body:
5 %0 = call float @llvm.R600.load.input(i32 4)
6 %1 = call float @llvm.R600.load.input(i32 5)
7 %2 = call float @llvm.R600.load.input(i32 6)
8 %3 = call float @llvm.R600.load.input(i32 7)
9 %4 = call float @llvm.R600.load.input(i32 8)
10 %5 = call float @llvm.R600.load.input(i32 9)
11 %6 = call float @llvm.R600.load.input(i32 10)
12 %7 = call float @llvm.R600.load.input(i32 11)
13 %8 = call float @llvm.R600.load.input(i32 12)
14 %9 = call float @llvm.R600.load.input(i32 13)
15 %10 = call float @llvm.R600.load.input(i32 14)
16 %11 = call float @llvm.R600.load.input(i32 15)
5 %0 = extractelement <4 x float> %reg1, i32 0
6 %1 = extractelement <4 x float> %reg1, i32 1
7 %2 = extractelement <4 x float> %reg1, i32 2
8 %3 = extractelement <4 x float> %reg1, i32 3
9 %4 = extractelement <4 x float> %reg2, i32 0
10 %5 = extractelement <4 x float> %reg2, i32 1
11 %6 = extractelement <4 x float> %reg2, i32 2
12 %7 = extractelement <4 x float> %reg2, i32 3
13 %8 = extractelement <4 x float> %reg3, i32 0
14 %9 = extractelement <4 x float> %reg3, i32 1
15 %10 = extractelement <4 x float> %reg3, i32 2
16 %11 = extractelement <4 x float> %reg3, i32 3
1717 %12 = load <4 x float> addrspace(8)* null
1818 %13 = extractelement <4 x float> %12, i32 0
1919 %14 = fmul float %0, %13
9595 }
9696
9797 ; Function Attrs: readnone
98 declare float @llvm.R600.load.input(i32) #1
99
100 ; Function Attrs: readnone
10198 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
10299
103100 ; Function Attrs: readonly
22 ; CHECK: @main
33 ; CHECK: ADD *
44
5 define void @main() #0 {
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
66 main_body:
7 %0 = call float @llvm.R600.load.input(i32 4)
8 %1 = call float @llvm.R600.load.input(i32 5)
9 %2 = call float @llvm.R600.load.input(i32 6)
10 %3 = call float @llvm.R600.load.input(i32 7)
11 %4 = call float @llvm.R600.load.input(i32 8)
7 %0 = extractelement <4 x float> %reg1, i32 0
8 %1 = extractelement <4 x float> %reg1, i32 1
9 %2 = extractelement <4 x float> %reg1, i32 2
10 %3 = extractelement <4 x float> %reg1, i32 3
11 %4 = extractelement <4 x float> %reg2, i32 0
1212 %5 = fadd float %0, 2.0
1313 %6 = fadd float %1, 3.0
1414 %7 = fadd float %2, 4.0
3131 ; CHECK: @main
3232 ; CHECK-NOT: ADD *
3333
34 define void @main2() #0 {
34 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
3535 main_body:
36 %0 = call float @llvm.R600.load.input(i32 4)
37 %1 = call float @llvm.R600.load.input(i32 5)
38 %2 = call float @llvm.R600.load.input(i32 6)
39 %3 = call float @llvm.R600.load.input(i32 7)
40 %4 = call float @llvm.R600.load.input(i32 8)
36 %0 = extractelement <4 x float> %reg1, i32 0
37 %1 = extractelement <4 x float> %reg1, i32 1
38 %2 = extractelement <4 x float> %reg1, i32 2
39 %3 = extractelement <4 x float> %reg1, i32 3
40 %4 = extractelement <4 x float> %reg2, i32 0
4141 %5 = fadd float %0, 2.0
4242 %6 = fadd float %1, 3.0
4343 %7 = fadd float %2, 4.0
5858 }
5959
6060 ; Function Attrs: readnone
61 declare float @llvm.R600.load.input(i32) #1
6261 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
6362
6463 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
22 ;CHECK: DOT4 T{{[0-9]\.X}}
33 ;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
44
5 define void @main() #0 {
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
66 main_body:
7 %0 = call float @llvm.R600.load.input(i32 4)
8 %1 = call float @llvm.R600.load.input(i32 5)
9 %2 = call float @llvm.R600.load.input(i32 6)
10 %3 = call float @llvm.R600.load.input(i32 8)
11 %4 = call float @llvm.R600.load.input(i32 9)
12 %5 = call float @llvm.R600.load.input(i32 10)
13 %6 = call float @llvm.R600.load.input(i32 12)
14 %7 = call float @llvm.R600.load.input(i32 13)
15 %8 = call float @llvm.R600.load.input(i32 14)
7 %0 = extractelement <4 x float> %reg1, i32 0
8 %1 = extractelement <4 x float> %reg1, i32 1
9 %2 = extractelement <4 x float> %reg1, i32 2
10 %3 = extractelement <4 x float> %reg2, i32 0
11 %4 = extractelement <4 x float> %reg2, i32 1
12 %5 = extractelement <4 x float> %reg2, i32 2
13 %6 = extractelement <4 x float> %reg3, i32 0
14 %7 = extractelement <4 x float> %reg3, i32 1
15 %8 = extractelement <4 x float> %reg3, i32 2
1616 %9 = load <4 x float> addrspace(8)* null
1717 %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
1818 %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
3535 }
3636
3737 ; Function Attrs: readnone
38 declare float @llvm.R600.load.input(i32) #1
39
40 ; Function Attrs: readnone
4138 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
4239
4340
4542
4643 attributes #0 = { "ShaderType"="1" }
4744 attributes #1 = { readnone }
48 attributes #2 = { readonly }
49 attributes #3 = { nounwind readonly }
22 ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
33 ;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
44
5 define void @main() #0 {
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
66 main_body:
7 %0 = call float @llvm.R600.load.input(i32 4)
8 %1 = call float @llvm.R600.load.input(i32 5)
9 %2 = call float @llvm.R600.load.input(i32 6)
10 %3 = call float @llvm.R600.load.input(i32 7)
11 %4 = call float @llvm.R600.load.input(i32 8)
12 %5 = call float @llvm.R600.load.input(i32 9)
13 %6 = call float @llvm.R600.load.input(i32 10)
14 %7 = call float @llvm.R600.load.input(i32 11)
15 %8 = call float @llvm.R600.load.input(i32 12)
16 %9 = call float @llvm.R600.load.input(i32 13)
17 %10 = call float @llvm.R600.load.input(i32 14)
18 %11 = call float @llvm.R600.load.input(i32 15)
19 %12 = call float @llvm.R600.load.input(i32 16)
20 %13 = call float @llvm.R600.load.input(i32 17)
21 %14 = call float @llvm.R600.load.input(i32 18)
22 %15 = call float @llvm.R600.load.input(i32 19)
23 %16 = call float @llvm.R600.load.input(i32 20)
24 %17 = call float @llvm.R600.load.input(i32 21)
25 %18 = call float @llvm.R600.load.input(i32 22)
26 %19 = call float @llvm.R600.load.input(i32 23)
27 %20 = call float @llvm.R600.load.input(i32 24)
28 %21 = call float @llvm.R600.load.input(i32 25)
29 %22 = call float @llvm.R600.load.input(i32 26)
30 %23 = call float @llvm.R600.load.input(i32 27)
31 %24 = call float @llvm.R600.load.input(i32 28)
32 %25 = call float @llvm.R600.load.input(i32 29)
33 %26 = call float @llvm.R600.load.input(i32 30)
34 %27 = call float @llvm.R600.load.input(i32 31)
7 %0 = extractelement <4 x float> %reg1, i32 0
8 %1 = extractelement <4 x float> %reg1, i32 1
9 %2 = extractelement <4 x float> %reg1, i32 2
10 %3 = extractelement <4 x float> %reg1, i32 3
11 %4 = extractelement <4 x float> %reg2, i32 0
12 %5 = extractelement <4 x float> %reg2, i32 1
13 %6 = extractelement <4 x float> %reg2, i32 2
14 %7 = extractelement <4 x float> %reg2, i32 3
15 %8 = extractelement <4 x float> %reg3, i32 0
16 %9 = extractelement <4 x float> %reg3, i32 1
17 %10 = extractelement <4 x float> %reg3, i32 2
18 %11 = extractelement <4 x float> %reg3, i32 3
19 %12 = extractelement <4 x float> %reg4, i32 0
20 %13 = extractelement <4 x float> %reg4, i32 1
21 %14 = extractelement <4 x float> %reg4, i32 2
22 %15 = extractelement <4 x float> %reg4, i32 3
23 %16 = extractelement <4 x float> %reg5, i32 0
24 %17 = extractelement <4 x float> %reg5, i32 1
25 %18 = extractelement <4 x float> %reg5, i32 2
26 %19 = extractelement <4 x float> %reg5, i32 3
27 %20 = extractelement <4 x float> %reg6, i32 0
28 %21 = extractelement <4 x float> %reg6, i32 1
29 %22 = extractelement <4 x float> %reg6, i32 2
30 %23 = extractelement <4 x float> %reg6, i32 3
31 %24 = extractelement <4 x float> %reg7, i32 0
32 %25 = extractelement <4 x float> %reg7, i32 1
33 %26 = extractelement <4 x float> %reg7, i32 2
34 %27 = extractelement <4 x float> %reg7, i32 3
3535 %28 = load <4 x float> addrspace(8)* null
3636 %29 = extractelement <4 x float> %28, i32 0
3737 %30 = fmul float %0, %29
218218 }
219219
220220 ; Function Attrs: readnone
221 declare float @llvm.R600.load.input(i32) #1
222
223 ; Function Attrs: readnone
224221 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
225222
226223 ; Function Attrs: readonly
99 ; R600-CHECK: @test
1010 ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
1111
12 define void @test() {
12 define void @test(<4 x float> inreg %reg0) #0 {
1313 entry:
14 %0 = call float @llvm.R600.load.input(i32 0)
15 %1 = call float @llvm.R600.load.input(i32 1)
16 %2 = fmul float %0, %1
17 call void @llvm.AMDGPU.store.output(float %2, i32 0)
14 %r0 = extractelement <4 x float> %reg0, i32 0
15 %r1 = extractelement <4 x float> %reg0, i32 1
16 %r2 = fmul float %r0, %r1
17 %vec = insertelement <4 x float> undef, float %r2, i32 0
18 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1819 ret void
1920 }
2021
21 declare float @llvm.R600.load.input(i32) readnone
22 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
2223
23 declare void @llvm.AMDGPU.store.output(float, i32)
24 attributes #0 = { "ShaderType"="0" }
99 ;CHECK: EXPORT T{{[0-9]}}.0000
1010
1111
12 define void @main() #0 {
12 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
1313 main_body:
14 %0 = call float @llvm.R600.load.input(i32 4)
15 %1 = call float @llvm.R600.load.input(i32 5)
16 %2 = call float @llvm.R600.load.input(i32 6)
17 %3 = call float @llvm.R600.load.input(i32 7)
14 %0 = extractelement <4 x float> %reg1, i32 0
15 %1 = extractelement <4 x float> %reg1, i32 1
16 %2 = extractelement <4 x float> %reg1, i32 2
17 %3 = extractelement <4 x float> %reg1, i32 3
1818 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
1919 %5 = extractelement <4 x float> %4, i32 0
2020 %6 = fmul float %5, %0
136136 ret void
137137 }
138138
139 ; Function Attrs: readnone
140 declare float @llvm.R600.load.input(i32) #1
141
142139 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
143140
144141 attributes #0 = { "ShaderType"="1" }
145 attributes #1 = { readnone }
0 ;RUN: llc < %s -march=r600 -mcpu=redwood
11 ;REQUIRES: asserts
22
3 define void @main() #0 {
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
44 main_body:
5 %0 = call float @llvm.R600.load.input(i32 4)
6 %1 = call float @llvm.R600.load.input(i32 5)
7 %2 = call float @llvm.R600.load.input(i32 6)
8 %3 = call float @llvm.R600.load.input(i32 7)
5 %0 = extractelement <4 x float> %reg1, i32 0
6 %1 = extractelement <4 x float> %reg1, i32 1
7 %2 = extractelement <4 x float> %reg1, i32 2
8 %3 = extractelement <4 x float> %reg1, i32 3
99 %4 = bitcast float %0 to i32
1010 %5 = icmp eq i32 %4, 0
1111 %6 = sext i1 %5 to i32
112112 br label %LOOP47
113113 }
114114
115 ; Function Attrs: readnone
116 declare float @llvm.R600.load.input(i32) #1
117
118115 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
119116
120117 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
121118
122119 attributes #0 = { "ShaderType"="1" }
123 attributes #1 = { readnone }
11
22 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test() {
5 %r0 = call float @llvm.R600.load.input(i32 0)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = fdiv float 1.0, %r0
7 call void @llvm.AMDGPU.store.output(float %r1, i32 0)
7 %vec = insertelement <4 x float> undef, float %r1, i32 0
8 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
89 ret void
910 }
1011
11 declare float @llvm.R600.load.input(i32) readnone
12 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1213
13 declare void @llvm.AMDGPU.store.output(float, i32)
14
15 declare float @llvm.AMDGPU.rcp(float ) readnone
14 attributes #0 = { "ShaderType"="0" }
0 ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
11
2 ; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
2 ; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
33
4 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
5 %1 = call float @llvm.R600.load.input(i32 4)
6 %2 = call float @llvm.R600.load.input(i32 5)
7 %3 = call float @llvm.R600.load.input(i32 6)
8 %4 = call float @llvm.R600.load.input(i32 7)
4 define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
5 %1 = extractelement <4 x float> %reg1, i32 0
6 %2 = extractelement <4 x float> %reg1, i32 1
7 %3 = extractelement <4 x float> %reg1, i32 2
8 %4 = extractelement <4 x float> %reg1, i32 3
99 %5 = insertelement <4 x float> undef, float %1, i32 0
1010 %6 = insertelement <4 x float> %5, float %2, i32 1
1111 %7 = insertelement <4 x float> %6, float %3, i32 2
3535
3636 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
3737
38 ; Function Attrs: readnone
39 declare float @llvm.R600.load.input(i32) #1
38 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
4039
41
42 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
43 attributes #1 = { readnone }
40 attributes #0 = { "ShaderType"="1" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
11 ;REQUIRES: asserts
22
3 define void @main() {
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
44 main_body:
5 %0 = call float @llvm.R600.interp.input(i32 0, i32 0)
6 %1 = call float @llvm.R600.interp.input(i32 1, i32 0)
7 %2 = call float @llvm.R600.interp.input(i32 2, i32 0)
8 %3 = call float @llvm.R600.interp.input(i32 3, i32 0)
5 %0 = extractelement <4 x float> %reg1, i32 0
6 %1 = extractelement <4 x float> %reg1, i32 1
7 %2 = extractelement <4 x float> %reg1, i32 2
8 %3 = extractelement <4 x float> %reg1, i32 3
99 %4 = fcmp ult float %1, 0.000000e+00
1010 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
1111 %6 = fsub float -0.000000e+00, %5
7373 br label %ENDIF
7474 }
7575
76 declare float @llvm.R600.interp.input(i32, i32) #0
77
7876 declare float @llvm.AMDIL.clamp.(float, float, float) #0
7977
8078 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
8179
8280 attributes #0 = { readnone }
81 attributes #1 = { "ShaderType"="1" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
11 ;REQUIRES: asserts
22
3 define void @main() {
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
44 main_body:
5 %0 = call float @llvm.R600.load.input(i32 4)
6 %1 = call float @llvm.R600.load.input(i32 5)
7 %2 = call float @llvm.R600.load.input(i32 6)
8 %3 = call float @llvm.R600.load.input(i32 7)
5 %0 = extractelement <4 x float> %reg1, i32 0
6 %1 = extractelement <4 x float> %reg1, i32 1
7 %2 = extractelement <4 x float> %reg1, i32 2
8 %3 = extractelement <4 x float> %reg1, i32 3
99 %4 = fcmp ult float %0, 0.000000e+00
1010 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
1111 %6 = fsub float -0.000000e+00, %5
126126 br label %LOOP
127127 }
128128
129 declare float @llvm.R600.load.input(i32) #0
130
131129 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
132130
133 attributes #0 = { readnone }
131 attributes #0 = { "ShaderType"="1" }
33 ; CHECK: MULADD_IEEE *
44 ; CHECK-NOT: MULADD_IEEE *
55
6 define void @main() {
7 %w0 = call float @llvm.R600.load.input(i32 3)
8 %w1 = call float @llvm.R600.load.input(i32 7)
9 %w2 = call float @llvm.R600.load.input(i32 11)
6 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
7 %w0 = extractelement <4 x float> %reg0, i32 3
8 %w1 = extractelement <4 x float> %reg1, i32 3
9 %w2 = extractelement <4 x float> %reg2, i32 3
1010 %sq0 = fmul float %w0, %w0
1111 %r0 = fadd float %sq0, 2.0
1212 %sq1 = fmul float %w1, %w1
2323 }
2424
2525 ; Function Attrs: readnone
26 declare float @llvm.R600.load.input(i32) #1
27
28 ; Function Attrs: readnone
2926 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
30
3127
3228 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
3329
3430 attributes #0 = { "ShaderType"="1" }
35 attributes #1 = { readnone }
36 attributes #2 = { readonly }
37 attributes #3 = { nounwind readonly }
31 attributes #1 = { readnone }
55 ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
66 ;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW
77
8 define void @main() #0 {
8 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
99 main_body:
10 %0 = call float @llvm.R600.load.input(i32 4)
11 %1 = call float @llvm.R600.load.input(i32 5)
12 %2 = call float @llvm.R600.load.input(i32 6)
13 %3 = call float @llvm.R600.load.input(i32 7)
10 %0 = extractelement <4 x float> %reg1, i32 0
11 %1 = extractelement <4 x float> %reg1, i32 1
12 %2 = extractelement <4 x float> %reg1, i32 2
13 %3 = extractelement <4 x float> %reg1, i32 3
1414 %4 = load <4 x float> addrspace(8)* null
1515 %5 = extractelement <4 x float> %4, i32 1
1616 %6 = load <4 x float> addrspace(8)* null
9595 ; EG-CHECK: T{{[0-9]+}}.XY__
9696 ; EG-CHECK: T{{[0-9]+}}.YXZ0
9797
98 define void @main2() #0 {
98 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
9999 main_body:
100 %0 = call float @llvm.R600.load.input(i32 4)
101 %1 = call float @llvm.R600.load.input(i32 5)
102 %2 = call float @llvm.R600.load.input(i32 6)
103 %3 = call float @llvm.R600.load.input(i32 7)
100 %0 = extractelement <4 x float> %reg1, i32 0
101 %1 = extractelement <4 x float> %reg1, i32 1
102 %2 = fadd float %0, 2.5
103 %3 = fmul float %1, 3.5
104104 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
105105 %5 = extractelement <4 x float> %4, i32 0
106106 %6 = call float @llvm.cos.f32(float %5)
108108 %8 = extractelement <4 x float> %7, i32 0
109109 %9 = load <4 x float> addrspace(8)* null
110110 %10 = extractelement <4 x float> %9, i32 1
111 %11 = insertelement <4 x float> undef, float %0, i32 0
112 %12 = insertelement <4 x float> %11, float %1, i32 1
111 %11 = insertelement <4 x float> undef, float %2, i32 0
112 %12 = insertelement <4 x float> %11, float %3, i32 1
113113 call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
114114 %13 = insertelement <4 x float> undef, float %6, i32 0
115115 %14 = insertelement <4 x float> %13, float %8, i32 1
119119 ret void
120120 }
121121
122 ; Function Attrs: readnone
123 declare float @llvm.R600.load.input(i32) #1
124
125122 ; Function Attrs: nounwind readonly
126 declare float @llvm.cos.f32(float) #2
123 declare float @llvm.cos.f32(float) #1
127124
128125 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
129126
130127 attributes #0 = { "ShaderType"="1" }
131 attributes #1 = { readnone }
132 attributes #2 = { nounwind readonly }
128 attributes #1 = { nounwind readonly }
22 ;CHECK: TEX
33 ;CHECK-NEXT: ALU
44
5 define void @test() {
6 %1 = call float @llvm.R600.load.input(i32 0)
7 %2 = call float @llvm.R600.load.input(i32 1)
8 %3 = call float @llvm.R600.load.input(i32 2)
9 %4 = call float @llvm.R600.load.input(i32 3)
5 define void @test(<4 x float> inreg %reg0) #0 {
6 %1 = extractelement <4 x float> %reg0, i32 0
7 %2 = extractelement <4 x float> %reg0, i32 1
8 %3 = extractelement <4 x float> %reg0, i32 2
9 %4 = extractelement <4 x float> %reg0, i32 3
1010 %5 = insertelement <4 x float> undef, float %1, i32 0
1111 %6 = insertelement <4 x float> %5, float %2, i32 1
1212 %7 = insertelement <4 x float> %6, float %3, i32 2
1818 ret void
1919 }
2020
21 declare float @llvm.R600.load.input(i32) readnone
2221 declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
2322 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
23
24 attributes #0 = { "ShaderType"="1" }
11
22 ;CHECK-NOT: MOV
33
4 define void @test() {
5 %1 = call float @llvm.R600.load.input(i32 0)
6 %2 = call float @llvm.R600.load.input(i32 1)
7 %3 = call float @llvm.R600.load.input(i32 2)
8 %4 = call float @llvm.R600.load.input(i32 3)
4 define void @test(<4 x float> inreg %reg0) #0 {
5 %1 = extractelement <4 x float> %reg0, i32 0
6 %2 = extractelement <4 x float> %reg0, i32 1
7 %3 = extractelement <4 x float> %reg0, i32 2
8 %4 = extractelement <4 x float> %reg0, i32 3
99 %5 = fmul float %1, 3.0
1010 %6 = fmul float %2, 3.0
1111 %7 = fmul float %3, 3.0
2424 ret void
2525 }
2626
27 declare float @llvm.R600.load.input(i32) readnone
2827 declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
2928 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
29
30 attributes #0 = { "ShaderType"="1" }