llvm.org GIT mirror llvm / 5fecfa2
AMDGPU: Fix TargetPrefix for remaining r600 intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275619 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
39 changed file(s) with 146 addition(s) and 144 deletion(s). Raw diff Collapse all Expand all
640640 unsigned IntrinsicID =
641641 cast(Op.getOperand(1))->getZExtValue();
642642 switch (IntrinsicID) {
643 case AMDGPUIntrinsic::R600_store_swizzle: {
643 case AMDGPUIntrinsic::r600_store_swizzle: {
644644 SDLoc DL(Op);
645645 const SDValue Args[8] = {
646646 Chain,
510510 multiclass SteamOutputExportPattern
511511 bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
512512 // Stream0
513 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
513 def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
514514 (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
515515 (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
516516 4095, imm:$mask, buf0inst, 0)>;
517517 // Stream1
518 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
518 def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
519519 (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
520520 (ExportInst $src, 0, imm:$arraybase,
521521 4095, imm:$mask, buf1inst, 0)>;
522522 // Stream2
523 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
523 def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
524524 (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
525525 (ExportInst $src, 0, imm:$arraybase,
526526 4095, imm:$mask, buf2inst, 0)>;
527527 // Stream3
528 def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
528 def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
529529 (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
530530 (ExportInst $src, 0, imm:$arraybase,
531531 4095, imm:$mask, buf3inst, 0)>;
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 // FIXME: Should migrate to using TargetPrefix that matches triple arch name.
14 let TargetPrefix = "R600", isTarget = 1 in {
15 def int_R600_store_swizzle :
16 Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
17 def int_R600_store_stream_output :
18 Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
19 } // End TargetPrefix = "R600", isTarget = 1
13 class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
14 llvm_v4f32_ty, // Coord
15 llvm_i32_ty, // offset_x
16 llvm_i32_ty, // offset_y,
17 llvm_i32_ty, // offset_z,
18 llvm_i32_ty, // resource_id
19 llvm_i32_ty, // samplerid
20 llvm_i32_ty, // coord_type_x
21 llvm_i32_ty, // coord_type_y
22 llvm_i32_ty, // coord_type_z
23 llvm_i32_ty], // coord_type_w
24 [IntrNoMem]
25 >;
26
27 class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
28 llvm_v4i32_ty, // Coord
29 llvm_i32_ty, // offset_x
30 llvm_i32_ty, // offset_y,
31 llvm_i32_ty, // offset_z,
32 llvm_i32_ty, // resource_id
33 llvm_i32_ty, // samplerid
34 llvm_i32_ty, // coord_type_x
35 llvm_i32_ty, // coord_type_y
36 llvm_i32_ty, // coord_type_z
37 llvm_i32_ty], // coord_type_w
38 [IntrNoMem]
39 >;
2040
2141 let TargetPrefix = "r600", isTarget = 1 in {
22 class TextureIntrinsicFloatInput :
23 Intrinsic<[llvm_v4f32_ty], [
24 llvm_v4f32_ty, // Coord
25 llvm_i32_ty, // offset_x
26 llvm_i32_ty, // offset_y,
27 llvm_i32_ty, // offset_z,
28 llvm_i32_ty, // resource_id
29 llvm_i32_ty, // samplerid
30 llvm_i32_ty, // coord_type_x
31 llvm_i32_ty, // coord_type_y
32 llvm_i32_ty, // coord_type_z
33 llvm_i32_ty // coord_type_w
34 ], [IntrNoMem]>;
35 class TextureIntrinsicInt32Input :
36 Intrinsic<[llvm_v4i32_ty], [
37 llvm_v4i32_ty, // Coord
38 llvm_i32_ty, // offset_x
39 llvm_i32_ty, // offset_y,
40 llvm_i32_ty, // offset_z,
41 llvm_i32_ty, // resource_id
42 llvm_i32_ty, // samplerid
43 llvm_i32_ty, // coord_type_x
44 llvm_i32_ty, // coord_type_y
45 llvm_i32_ty, // coord_type_z
46 llvm_i32_ty // coord_type_w
47 ], [IntrNoMem]>;
4842
43 def int_r600_store_swizzle :
44 Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []
45 >;
4946
50 def int_r600_tex : TextureIntrinsicFloatInput;
51 def int_r600_texc : TextureIntrinsicFloatInput;
52 def int_r600_txl : TextureIntrinsicFloatInput;
53 def int_r600_txlc : TextureIntrinsicFloatInput;
54 def int_r600_txb : TextureIntrinsicFloatInput;
55 def int_r600_txbc : TextureIntrinsicFloatInput;
56 def int_r600_txf : TextureIntrinsicInt32Input;
57 def int_r600_txq : TextureIntrinsicInt32Input;
58 def int_r600_ddx : TextureIntrinsicFloatInput;
59 def int_r600_ddy : TextureIntrinsicFloatInput;
47 def int_r600_store_stream_output : Intrinsic<
48 [], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []
49 >;
6050
61 def int_r600_dot4 : Intrinsic<[llvm_float_ty],
62 [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
63 >;
51 def int_r600_tex : TextureIntrinsicFloatInput;
52 def int_r600_texc : TextureIntrinsicFloatInput;
53 def int_r600_txl : TextureIntrinsicFloatInput;
54 def int_r600_txlc : TextureIntrinsicFloatInput;
55 def int_r600_txb : TextureIntrinsicFloatInput;
56 def int_r600_txbc : TextureIntrinsicFloatInput;
57 def int_r600_txf : TextureIntrinsicInt32Input;
58 def int_r600_txq : TextureIntrinsicInt32Input;
59 def int_r600_ddx : TextureIntrinsicFloatInput;
60 def int_r600_ddy : TextureIntrinsicFloatInput;
61
62 def int_r600_dot4 : Intrinsic<[llvm_float_ty],
63 [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
64 >;
65
6466 } // End TargetPrefix = "r600", isTarget = 1
12831283 %tmp954 = insertelement <4 x float> %tmp953, float %result.i8, i32 1
12841284 %tmp955 = insertelement <4 x float> %tmp954, float %result.i4, i32 2
12851285 %tmp956 = insertelement <4 x float> %tmp955, float %tmp931, i32 3
1286 call void @llvm.R600.store.swizzle(<4 x float> %tmp956, i32 0, i32 0)
1286 call void @llvm.r600.store.swizzle(<4 x float> %tmp956, i32 0, i32 0)
12871287 ret void
12881288 }
12891289
13021302 ; Function Attrs: nounwind readnone
13031303 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
13041304
1305 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1305 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
13061306
13071307 ; Function Attrs: nounwind readnone
13081308 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
88 %2 = fptoui float %1 to i32
99 %3 = bitcast i32 %2 to float
1010 %4 = insertelement <4 x float> undef, float %3, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
11 call void @llvm.r600.store.swizzle(<4 x float> %4, i32 0, i32 0)
1212 ret void
1313 }
1414
1515 declare float @fabs(float ) readnone
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
16 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
4343 %bc = fadd <4 x float> %b, %c
4444 %de = fadd <4 x float> %d, %e
4545 %bcde = fadd <4 x float> %bc, %de
46 call void @llvm.R600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
46 call void @llvm.r600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
4747 ret void
4848 }
4949
50 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
50 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
5151
5252 ; Function Attrs: readnone
5353 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
8484 %bcde = fadd <4 x float> %bc, %de
8585 %fghi = fadd <4 x float> %fg, %hi
8686 %bcdefghi = fadd <4 x float> %bcde, %fghi
87 call void @llvm.R600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
87 call void @llvm.r600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
8888 ret void
8989 }
9090
91 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
91 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
9292
9393 ; Function Attrs: readnone
9494 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
44 %r0 = extractelement <4 x float> %reg0, i32 0
55 %r1 = call float @floor(float %r0)
66 %vec = insertelement <4 x float> undef, float %r1, i32 0
7 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
7 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
88 ret void
99 }
1010
1111 declare float @floor(float) readonly
12 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
12 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
1313
88 %r3 = fmul float %r0, %r1
99 %r4 = fadd float %r3, %r2
1010 %vec = insertelement <4 x float> undef, float %r4, i32 0
11 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
11 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1212 ret void
1313 }
1414
1515 declare float @fabs(float ) readnone
16 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
16 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
77 %r2 = fcmp oge float %r0, %r1
88 %r3 = select i1 %r2, float %r0, float %r1
99 %vec = insertelement <4 x float> undef, float %r3, i32 0
10 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
10 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1111 ret void
1212 }
1313
14 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
14 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
77 %r2 = fcmp uge float %r0, %r1
88 %r3 = select i1 %r2, float %r1, float %r0
99 %vec = insertelement <4 x float> undef, float %r3, i32 0
10 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
10 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1111 ret void
1212 }
1313
14 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
14 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
1212 ret void
1313 }
1414
15 declare float @llvm.R600.load.input(i32) readnone
15 declare float @llvm.r600.load.input(i32) readnone
1616
1717 declare void @llvm.AMDGPU.store.output(float, i32)
1818
2222 ret void
2323 }
2424
25 declare float @llvm.R600.load.input(i32) readnone
25 declare float @llvm.r600.load.input(i32) readnone
2626
2727 declare void @llvm.AMDGPU.store.output(float, i32)
2828
1414 %r2 = fsub float -0.000000e+00, %r1
1515 %r3 = call float @llvm.exp2.f32(float %r2)
1616 %vec = insertelement <4 x float> undef, float %r3, i32 0
17 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
17 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1818 ret void
1919 }
2020
2121 declare float @llvm.exp2.f32(float) readnone
2222 declare float @llvm.fabs.f32(float) readnone
23 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
23 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
3535 %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
3636 %18 = insertelement <4 x float> %17, float %temp2.0, i32 2
3737 %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
38 call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
38 call void @llvm.r600.store.swizzle(<4 x float> %19, i32 0, i32 0)
3939 ret void
4040
4141 IF13: ; preds = %ELSE
4646 br label %ENDIF
4747 }
4848
49 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
49 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
4343 %37 = insertelement <4 x float> %36, float %33, i32 1
4444 %38 = insertelement <4 x float> %37, float %34, i32 2
4545 %39 = insertelement <4 x float> %38, float %35, i32 3
46 call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
46 call void @llvm.r600.store.swizzle(<4 x float> %39, i32 0, i32 0)
4747 ret void
4848 }
4949
9191 %37 = insertelement <4 x float> %36, float %33, i32 1
9292 %38 = insertelement <4 x float> %37, float %34, i32 2
9393 %39 = insertelement <4 x float> %38, float %35, i32 3
94 call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
94 call void @llvm.r600.store.swizzle(<4 x float> %39, i32 0, i32 0)
9595 ret void
9696 }
9797
9898 declare float @llvm.AMDGPU.clamp.f32(float, float, float) readnone
99 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
99 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
3838 %tmp29 = insertelement <4 x float> %tmp28, float %tmp25, i32 3
3939 %tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32>
4040 %tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 16, i32 0, i32 1, i32 1, i32 1, i32 1)
41 call void @llvm.R600.store.swizzle(<4 x float> %tmp31, i32 0, i32 0)
41 call void @llvm.r600.store.swizzle(<4 x float> %tmp31, i32 0, i32 0)
4242 ret void
4343 }
4444
4848 ; Function Attrs: nounwind readnone
4949 declare float @llvm.fabs.f32(float) #0
5050
51 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
51 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
5252
5353 ; Function Attrs: readnone
5454 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
99 %r1 = extractelement <4 x float> %reg0, i32 1
1010 %r2 = call float @llvm.pow.f32( float %r0, float %r1)
1111 %vec = insertelement <4 x float> undef, float %r2, i32 0
12 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
12 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1313 ret void
1414 }
1515
2828 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
2929 define amdgpu_ps void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
3030 %vec = call <4 x float> @llvm.pow.v4f32( <4 x float> %reg0, <4 x float> %reg1)
31 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
31 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
3232 ret void
3333 }
3434
3535 declare float @llvm.pow.f32(float ,float ) readonly
3636 declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
37 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
37 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
8989 %85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
9090 %86 = call float @llvm.r600.dot4(<4 x float> %81, <4 x float> %85)
9191 %87 = insertelement <4 x float> undef, float %86, i32 0
92 call void @llvm.R600.store.swizzle(<4 x float> %87, i32 2, i32 2)
92 call void @llvm.r600.store.swizzle(<4 x float> %87, i32 2, i32 2)
9393 ret void
9494 }
9595
108108 ; Function Attrs: nounwind readonly
109109 declare float @llvm.pow.f32(float, float) #3
110110
111 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
111 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
112112
113113 attributes #1 = { readnone }
114114 attributes #2 = { readonly }
2424
2525 %17 = call float @llvm.r600.dot4(<4 x float> %15,<4 x float> %16)
2626 %18 = insertelement <4 x float> undef, float %17, i32 0
27 call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
27 call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 2)
2828 ret void
2929 }
3030
5353
5454 %17 = call float @llvm.r600.dot4(<4 x float> %15,<4 x float> %16)
5555 %18 = insertelement <4 x float> undef, float %17, i32 0
56 call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
56 call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 2)
5757 ret void
5858 }
5959
6060 ; Function Attrs: readnone
6161 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
6262
63 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
63 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
6464
6565 attributes #1 = { readnone }
1616 ENDIF: ; preds = %IF, %main_body
1717 %5 = phi float [%4, %IF], [0.000000e+00, %main_body]
1818 %6 = insertelement <4 x float> undef, float %5, i32 0
19 call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0)
19 call void @llvm.r600.store.swizzle(<4 x float> %6, i32 0, i32 0)
2020 ret void
2121 }
2222
2323 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
24 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
24 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
2525 attributes #1 = { readnone }
3030 %23 = insertelement <4 x float> %22, float %19, i32 3
3131 %24 = call float @llvm.r600.dot4(<4 x float> %23, <4 x float> %10)
3232 %25 = insertelement <4 x float> undef, float %24, i32 0
33 call void @llvm.R600.store.swizzle(<4 x float> %25, i32 0, i32 2)
33 call void @llvm.r600.store.swizzle(<4 x float> %25, i32 0, i32 2)
3434 ret void
3535 }
3636
3838 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
3939
4040
41 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
41 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
4242
4343 attributes #1 = { readnone }
208208 %201 = insertelement <4 x float> %200, float %79, i32 1
209209 %202 = insertelement <4 x float> %201, float %83, i32 2
210210 %203 = insertelement <4 x float> %202, float %87, i32 3
211 call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1)
211 call void @llvm.r600.store.swizzle(<4 x float> %203, i32 60, i32 1)
212212 %204 = insertelement <4 x float> undef, float %197, i32 0
213213 %205 = insertelement <4 x float> %204, float %198, i32 1
214214 %206 = insertelement <4 x float> %205, float %199, i32 2
215215 %207 = insertelement <4 x float> %206, float %117, i32 3
216 call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2)
216 call void @llvm.r600.store.swizzle(<4 x float> %207, i32 0, i32 2)
217217 ret void
218218 }
219219
232232 ; Function Attrs: nounwind readonly
233233 declare float @llvm.pow.f32(float, float) #2
234234
235 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) #3
235 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #3
236236
237237 attributes #1 = { nounwind readnone }
238238 attributes #2 = { nounwind readonly }
1515 %r1 = extractelement <4 x float> %reg0, i32 1
1616 %r2 = fmul float %r0, %r1
1717 %vec = insertelement <4 x float> undef, float %r2, i32 0
18 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
18 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
1919 ret void
2020 }
2121
22 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
22 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
9797 %83 = insertelement <4 x float> %82, float %55, i32 1
9898 %84 = insertelement <4 x float> %83, float %59, i32 2
9999 %85 = insertelement <4 x float> %84, float %63, i32 3
100 call void @llvm.R600.store.swizzle(<4 x float> %85, i32 60, i32 1)
100 call void @llvm.r600.store.swizzle(<4 x float> %85, i32 60, i32 1)
101101 %86 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
102102 %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1
103103 %88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2
104104 %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
105 call void @llvm.R600.store.swizzle(<4 x float> %89, i32 0, i32 2)
105 call void @llvm.r600.store.swizzle(<4 x float> %89, i32 0, i32 2)
106106 %90 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
107107 %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 1
108108 %92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2
109109 %93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3
110 call void @llvm.R600.store.swizzle(<4 x float> %93, i32 1, i32 2)
110 call void @llvm.r600.store.swizzle(<4 x float> %93, i32 1, i32 2)
111111 %94 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
112112 %95 = insertelement <4 x float> %94, float %65, i32 1
113113 %96 = insertelement <4 x float> %95, float %67, i32 2
114114 %97 = insertelement <4 x float> %96, float %69, i32 3
115 call void @llvm.R600.store.swizzle(<4 x float> %97, i32 2, i32 2)
115 call void @llvm.r600.store.swizzle(<4 x float> %97, i32 2, i32 2)
116116 %98 = insertelement <4 x float> undef, float %77, i32 0
117117 %99 = insertelement <4 x float> %98, float %79, i32 1
118118 %100 = insertelement <4 x float> %99, float %81, i32 2
119119 %101 = insertelement <4 x float> %100, float %71, i32 3
120 call void @llvm.R600.store.swizzle(<4 x float> %101, i32 3, i32 2)
120 call void @llvm.r600.store.swizzle(<4 x float> %101, i32 3, i32 2)
121121 %102 = insertelement <4 x float> undef, float %73, i32 0
122122 %103 = insertelement <4 x float> %102, float %75, i32 1
123123 %104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2
124124 %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
125 call void @llvm.R600.store.swizzle(<4 x float> %105, i32 4, i32 2)
125 call void @llvm.r600.store.swizzle(<4 x float> %105, i32 4, i32 2)
126126 %106 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
127127 %107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1
128128 %108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2
129129 %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
130 call void @llvm.R600.store.swizzle(<4 x float> %109, i32 5, i32 2)
130 call void @llvm.r600.store.swizzle(<4 x float> %109, i32 5, i32 2)
131131 %110 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
132132 %111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1
133133 %112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2
134134 %113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3
135 call void @llvm.R600.store.swizzle(<4 x float> %113, i32 6, i32 2)
135 call void @llvm.r600.store.swizzle(<4 x float> %113, i32 6, i32 2)
136136 ret void
137137 }
138138
139 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
139 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
3939 %tmp36 = insertelement <4 x float> %tmp35, float %tmp34, i32 1
4040 %tmp37 = insertelement <4 x float> %tmp36, float %tmp34, i32 2
4141 %tmp38 = insertelement <4 x float> %tmp37, float 1.000000e+00, i32 3
42 call void @llvm.R600.store.swizzle(<4 x float> %tmp38, i32 0, i32 0)
42 call void @llvm.r600.store.swizzle(<4 x float> %tmp38, i32 0, i32 0)
4343 ret void
4444 }
4545
4949 ; Function Attrs: readnone
5050 declare float @fabs(float) #0
5151
52 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
52 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
5353
5454 ; Function Attrs: readnone
5555 declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
3131 %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
3232 %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
3333 %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
34 call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
34 call void @llvm.r600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
3535 %20 = insertelement <4 x float> undef, float %0, i32 0
3636 %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
3737 %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
3838 %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
39 call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
39 call void @llvm.r600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
4040 %24 = insertelement <4 x float> undef, float %0, i32 0
4141 %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
4242 %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
4343 %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
44 call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
44 call void @llvm.r600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
4545 %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
4646 %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
4747 %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
4848 %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
49 call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
49 call void @llvm.r600.store.swizzle(<4 x float> %31, i32 60, i32 1)
5050 %32 = insertelement <4 x float> undef, float %0, i32 0
5151 %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
5252 %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
5353 %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
54 call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
54 call void @llvm.r600.store.swizzle(<4 x float> %35, i32 0, i32 2)
5555 ret void
5656
5757 ENDIF40: ; preds = %LOOP
111111 br label %LOOP47
112112 }
113113
114 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
114 declare void @llvm.r600.store.stream.output(<4 x float>, i32, i32, i32)
115115
116 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
116 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
55 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = fdiv float 1.0, %r0
77 %vec = insertelement <4 x float> undef, float %r1, i32 0
8 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
8 call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
99 ret void
1010 }
1111
12 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
12 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
3939 %tmp34 = fadd <4 x float> %tmp33, %tmp23
4040 %tmp35 = fadd <4 x float> %tmp34, %tmp25
4141 %tmp36 = fadd <4 x float> %tmp35, %tmp27
42 call void @llvm.R600.store.swizzle(<4 x float> %tmp36, i32 0, i32 2)
42 call void @llvm.r600.store.swizzle(<4 x float> %tmp36, i32 0, i32 2)
4343 ret void
4444 }
4545
46 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
46 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
4747
4848 ; Function Attrs: nounwind readnone
4949 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
5151 %32 = insertelement <4 x float> %31, float %28, i32 1
5252 %33 = insertelement <4 x float> %32, float %29, i32 2
5353 %34 = insertelement <4 x float> %33, float %30, i32 3
54 call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
54 call void @llvm.r600.store.swizzle(<4 x float> %34, i32 0, i32 0)
5555 ret void
5656
5757 ELSE17: ; preds = %ELSE
7575
7676 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
7777
78 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
78 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
7979
8080 attributes #0 = { readnone }
5050 %35 = insertelement <4 x float> %34, float %31, i32 1
5151 %36 = insertelement <4 x float> %35, float %32, i32 2
5252 %37 = insertelement <4 x float> %36, float %33, i32 3
53 call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
53 call void @llvm.r600.store.swizzle(<4 x float> %37, i32 0, i32 0)
5454 ret void
5555
5656 LOOP29: ; preds = %LOOP, %ENDIF30
8282
8383 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
8484
85 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
85 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
8686
8787 attributes #0 = { readnone }
3737 %22 = insertelement <4 x float> %21, float %18, i32 1
3838 %23 = insertelement <4 x float> %22, float %19, i32 2
3939 %24 = insertelement <4 x float> %23, float %20, i32 3
40 call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
40 call void @llvm.r600.store.swizzle(<4 x float> %24, i32 0, i32 0)
4141 ret void
4242
4343 ENDIF: ; preds = %LOOP
4949
5050 declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
5151
52 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
52 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
5353
5454 attributes #0 = { readnone }
6565 %45 = insertelement <4 x float> %44, float %temp5.0, i32 1
6666 %46 = insertelement <4 x float> %45, float %temp6.0, i32 2
6767 %47 = insertelement <4 x float> %46, float %temp7.0, i32 3
68 call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
68 call void @llvm.r600.store.swizzle(<4 x float> %47, i32 0, i32 0)
6969 ret void
7070
7171 IF23: ; preds = %ELSE
8888
8989 declare float @fabs(float) #0
9090
91 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
91 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
9292
9393 attributes #0 = { readonly }
3131 %17 = insertelement <4 x float> %16, float %temp1.0, i32 1
3232 %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
3333 %19 = insertelement <4 x float> %18, float %temp3.0, i32 3
34 call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
34 call void @llvm.r600.store.swizzle(<4 x float> %19, i32 0, i32 0)
3535 ret void
3636
3737 IF13: ; preds = %ELSE
4242 br label %ENDIF
4343 }
4444
45 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
45 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
8484 %72 = insertelement <4 x float> %71, float %62, i32 1
8585 %73 = insertelement <4 x float> %72, float %66, i32 2
8686 %74 = insertelement <4 x float> %73, float %70, i32 3
87 call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
87 call void @llvm.r600.store.swizzle(<4 x float> %74, i32 60, i32 1)
8888 %75 = insertelement <4 x float> undef, float %temp.0, i32 0
8989 %76 = insertelement <4 x float> %75, float %temp1.0, i32 1
9090 %77 = insertelement <4 x float> %76, float %temp2.0, i32 2
9191 %78 = insertelement <4 x float> %77, float %temp3.0, i32 3
92 call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
92 call void @llvm.r600.store.swizzle(<4 x float> %78, i32 0, i32 2)
9393 ret void
9494
9595 LOOP: ; preds = %main_body, %ENDIF19
126126 br label %LOOP
127127 }
128128
129 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
129 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
1818 %v2 = insertelement <4 x float> %v1, float %r2, i32 2
1919 %res = call float @llvm.r600.dot4(<4 x float> %v2, <4 x float> %v2)
2020 %vecres = insertelement <4 x float> undef, float %res, i32 0
21 call void @llvm.R600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
21 call void @llvm.r600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
2222 ret void
2323 }
2424
2525 ; Function Attrs: readnone
2626 declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
2727
28 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
28 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
2929
3030 attributes #1 = { readnone }
6767 %57 = insertelement <4 x float> %56, float %1, i32 1
6868 %58 = insertelement <4 x float> %57, float %2, i32 2
6969 %59 = insertelement <4 x float> %58, float %3, i32 3
70 call void @llvm.R600.store.swizzle(<4 x float> %59, i32 60, i32 1)
70 call void @llvm.r600.store.swizzle(<4 x float> %59, i32 60, i32 1)
7171 %60 = insertelement <4 x float> undef, float %10, i32 0
7272 %61 = insertelement <4 x float> %60, float %13, i32 1
7373 %62 = insertelement <4 x float> %61, float %16, i32 2
7474 %63 = insertelement <4 x float> %62, float %19, i32 3
75 call void @llvm.R600.store.swizzle(<4 x float> %63, i32 0, i32 2)
75 call void @llvm.r600.store.swizzle(<4 x float> %63, i32 0, i32 2)
7676 %64 = insertelement <4 x float> undef, float %22, i32 0
7777 %65 = insertelement <4 x float> %64, float %25, i32 1
7878 %66 = insertelement <4 x float> %65, float %28, i32 2
7979 %67 = insertelement <4 x float> %66, float %31, i32 3
80 call void @llvm.R600.store.swizzle(<4 x float> %67, i32 1, i32 2)
80 call void @llvm.r600.store.swizzle(<4 x float> %67, i32 1, i32 2)
8181 %68 = insertelement <4 x float> undef, float %34, i32 0
8282 %69 = insertelement <4 x float> %68, float %37, i32 1
8383 %70 = insertelement <4 x float> %69, float %40, i32 2
8484 %71 = insertelement <4 x float> %70, float %43, i32 3
85 call void @llvm.R600.store.swizzle(<4 x float> %71, i32 2, i32 2)
85 call void @llvm.r600.store.swizzle(<4 x float> %71, i32 2, i32 2)
8686 %72 = insertelement <4 x float> undef, float %46, i32 0
8787 %73 = insertelement <4 x float> %72, float %49, i32 1
8888 %74 = insertelement <4 x float> %73, float %52, i32 2
8989 %75 = insertelement <4 x float> %74, float %55, i32 3
90 call void @llvm.R600.store.swizzle(<4 x float> %75, i32 3, i32 2)
90 call void @llvm.r600.store.swizzle(<4 x float> %75, i32 3, i32 2)
9191 ret void
9292 }
9393
110110 %10 = extractelement <4 x float> %9, i32 1
111111 %11 = insertelement <4 x float> undef, float %2, i32 0
112112 %12 = insertelement <4 x float> %11, float %3, i32 1
113 call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
113 call void @llvm.r600.store.swizzle(<4 x float> %12, i32 60, i32 1)
114114 %13 = insertelement <4 x float> undef, float %6, i32 0
115115 %14 = insertelement <4 x float> %13, float %8, i32 1
116116 %15 = insertelement <4 x float> %14, float %10, i32 2
117117 %16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
118 call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
118 call void @llvm.r600.store.swizzle(<4 x float> %16, i32 0, i32 2)
119119 ret void
120120 }
121121
122122 ; Function Attrs: nounwind readonly
123123 declare float @llvm.cos.f32(float) #1
124124
125 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
125 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
126126
127127 attributes #1 = { nounwind readonly }
1414 %9 = call <4 x float> @llvm.r600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
1515 %10 = call <4 x float> @llvm.r600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
1616 %11 = fadd <4 x float> %9, %10
17 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
17 call void @llvm.r600.store.swizzle(<4 x float> %11, i32 0, i32 0)
1818 ret void
1919 }
2020
2121 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
22 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
22 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
2020 %16 = call <4 x float> @llvm.r600.tex(<4 x float> %13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2121 %17 = fadd <4 x float> %14, %15
2222 %18 = fadd <4 x float> %17, %16
23 call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0)
23 call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 0)
2424 ret void
2525 }
2626
2727 declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
28 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
28 declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)