llvm.org GIT mirror llvm / 0a92af4
More working CellSPU test cases: - call.ll: Function call - ctpop.ll: Count population - dp_farith.ll: DP arithmetic - eqv.ll: Equivalence primitives - fcmp.ll: SP comparisons - fdiv.ll: SP division - fneg-fabs.ll: SP negation, aboslute value - int2fp.ll: Integer -> SP conversion - rotate_ops.ll: Rotation primitives - select_bits.ll: (a & c) | (b & ~c) bit selection - shift_ops.ll: Shift primitives - sp_farith.ll: SP arithmentic git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45217 91177308-0d34-0410-b5e6-96231b3b80d8 Scott Michel 12 years ago
12 changed file(s) with 1164 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep brsl %t1.s | count 1 &&
2 ; RUN: grep brasl %t1.s | count 1
3
4 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
5 target triple = "spu"
6
7 define i32 @main() {
8 entry:
9 %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
10 call void @extern_stub_1(i32 %a, i32 4)
11 ret i32 %a
12 }
13
14 declare void @extern_stub_1(i32, i32)
15
16 define i32 @stub_1(i32 %x, float %y) {
17 entry:
18 ret i32 0
19 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep cntb %t1.s | count 3 &&
2 ; RUN: grep andi %t1.s | count 3 &&
3 ; RUN: grep rotmi %t1.s | count 2 &&
4 ; RUN: grep rothmi %t1.s | count 1
5
6 declare i32 @llvm.ctpop.i8(i8)
7 declare i32 @llvm.ctpop.i16(i16)
8 declare i32 @llvm.ctpop.i32(i32)
9
10 define i32 @test_i8(i8 %X) {
11 call i32 @llvm.ctpop.i8(i8 %X)
12 %Y = bitcast i32 %1 to i32
13 ret i32 %Y
14 }
15
16 define i32 @test_i16(i16 %X) {
17 call i32 @llvm.ctpop.i16(i16 %X)
18 %Y = bitcast i32 %1 to i32
19 ret i32 %Y
20 }
21
22 define i32 @test_i32(i32 %X) {
23 call i32 @llvm.ctpop.i32(i32 %X)
24 %Y = bitcast i32 %1 to i32
25 ret i32 %Y
26 }
27
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep dfa %t1.s | count 2 &&
2 ; RUN: grep dfs %t1.s | count 2 &&
3 ; RUN: grep dfm %t1.s | count 6 &&
4 ; RUN: grep dfma %t1.s | count 2 &&
5 ; RUN: grep dfms %t1.s | count 2 &&
6 ; RUN: grep dfnms %t1.s | count 4
7 ;
8 ; This file includes double precision floating point arithmetic instructions
9
10 define double @fadd(double %arg1, double %arg2) {
11 %A = add double %arg1, %arg2
12 ret double %A
13 }
14
15 define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
16 %A = add <2 x double> %arg1, %arg2
17 ret <2 x double> %A
18 }
19
20 define double @fsub(double %arg1, double %arg2) {
21 %A = sub double %arg1, %arg2
22 ret double %A
23 }
24
25 define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
26 %A = sub <2 x double> %arg1, %arg2
27 ret <2 x double> %A
28 }
29
30 define double @fmul(double %arg1, double %arg2) {
31 %A = mul double %arg1, %arg2
32 ret double %A
33 }
34
35 define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
36 %A = mul <2 x double> %arg1, %arg2
37 ret <2 x double> %A
38 }
39
40 define double @fma(double %arg1, double %arg2, double %arg3) {
41 %A = mul double %arg1, %arg2
42 %B = add double %A, %arg3
43 ret double %B
44 }
45
46 define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
47 %A = mul <2 x double> %arg1, %arg2
48 %B = add <2 x double> %A, %arg3
49 ret <2 x double> %B
50 }
51
52 define double @fms(double %arg1, double %arg2, double %arg3) {
53 %A = mul double %arg1, %arg2
54 %B = sub double %A, %arg3
55 ret double %B
56 }
57
58 define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
59 %A = mul <2 x double> %arg1, %arg2
60 %B = sub <2 x double> %A, %arg3
61 ret <2 x double> %B
62 }
63
64 ; - (a * b - c)
65 define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
66 %A = mul double %arg1, %arg2
67 %B = sub double %A, %arg3
68 %C = sub double -0.000000e+00, %B ; [#uses=1]
69 ret double %C
70 }
71
72 ; Annother way of getting fnms
73 ; - ( a * b ) + c => c - (a * b)
74 define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
75 %A = mul double %arg1, %arg2
76 %B = sub double %arg3, %A
77 ret double %B
78 }
79
80 ; FNMS: - (a * b - c) => c - (a * b)
81 define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
82 %A = mul <2 x double> %arg1, %arg2
83 %B = sub <2 x double> %arg3, %A ;
84 ret <2 x double> %B
85 }
86
87 ; Another way to get fnms using a constant vector
88 ; - ( a * b - c)
89 define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
90 %A = mul <2 x double> %arg1, %arg2 ; <<2 x double>> [#uses=1]
91 %B = sub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
92 %C = sub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
93 ret <2 x double> %C
94 }
95
96 ;define double @fdiv_1(double %arg1, double %arg2) {
97 ; %A = fdiv double %arg1, %arg2 ; [#uses=1]
98 ; ret double %A
99 ;}
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep eqv %t1.s | count 18 &&
2 ; RUN: grep xshw %t1.s | count 6 &&
3 ; RUN: grep xsbh %t1.s | count 3 &&
4 ; RUN: grep andi %t1.s | count 3
5
6 ; Test the 'eqv' instruction, whose boolean expression is:
7 ; (a & b) | (~a & ~b), which simplifies to
8 ; (a & b) | ~(a | b)
9 ; Alternatively, a ^ ~b, which the compiler will also match.
10
11 ; ModuleID = 'eqv.bc'
12
13 define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
14 %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
15 %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
16 %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
17 %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
18 ret <4 x i32> %C
19 }
20
21 define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
22 %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
23 %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
24 %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
25 %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
26 ret <4 x i32> %C
27 }
28
29 define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
30 %B = or <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
31 %A = and <4 x i32> %arg1, %arg2 ; <<4 x i32>> [#uses=1]
32 %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
33 %C = or <4 x i32> %A, %Bnot ; <<4 x i32>> [#uses=1]
34 ret <4 x i32> %C
35 }
36
37 define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
38 %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
39 %C = xor <4 x i32> %arg1, %arg2not
40 ret <4 x i32> %C
41 }
42
43 define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
44 %A = and i32 %arg1, %arg2 ; [#uses=1]
45 %B = or i32 %arg1, %arg2 ; [#uses=1]
46 %Bnot = xor i32 %B, -1 ; [#uses=1]
47 %C = or i32 %A, %Bnot ; [#uses=1]
48 ret i32 %C
49 }
50
51 define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
52 %B = or i32 %arg1, %arg2 ; [#uses=1]
53 %Bnot = xor i32 %B, -1 ; [#uses=1]
54 %A = and i32 %arg1, %arg2 ; [#uses=1]
55 %C = or i32 %A, %Bnot ; [#uses=1]
56 ret i32 %C
57 }
58
59 define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
60 %B = or i32 %arg1, %arg2 ; [#uses=1]
61 %A = and i32 %arg1, %arg2 ; [#uses=1]
62 %Bnot = xor i32 %B, -1 ; [#uses=1]
63 %C = or i32 %A, %Bnot ; [#uses=1]
64 ret i32 %C
65 }
66
67 define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
68 %arg2not = xor i32 %arg2, -1
69 %C = xor i32 %arg1, %arg2not
70 ret i32 %C
71 }
72
73 define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
74 %arg1not = xor i32 %arg1, -1
75 %C = xor i32 %arg2, %arg1not
76 ret i32 %C
77 }
78
79 define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
80 %A = and i16 %arg1, %arg2 ; [#uses=1]
81 %B = or i16 %arg1, %arg2 ; [#uses=1]
82 %Bnot = xor i16 %B, -1 ; [#uses=1]
83 %C = or i16 %A, %Bnot ; [#uses=1]
84 ret i16 %C
85 }
86
87 define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
88 %B = or i16 %arg1, %arg2 ; [#uses=1]
89 %Bnot = xor i16 %B, -1 ; [#uses=1]
90 %A = and i16 %arg1, %arg2 ; [#uses=1]
91 %C = or i16 %A, %Bnot ; [#uses=1]
92 ret i16 %C
93 }
94
95 define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
96 %B = or i16 %arg1, %arg2 ; [#uses=1]
97 %A = and i16 %arg1, %arg2 ; [#uses=1]
98 %Bnot = xor i16 %B, -1 ; [#uses=1]
99 %C = or i16 %A, %Bnot ; [#uses=1]
100 ret i16 %C
101 }
102
103 define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
104 %A = and i8 %arg1, %arg2 ; [#uses=1]
105 %B = or i8 %arg1, %arg2 ; [#uses=1]
106 %Bnot = xor i8 %B, -1 ; [#uses=1]
107 %C = or i8 %A, %Bnot ; [#uses=1]
108 ret i8 %C
109 }
110
111 define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
112 %B = or i8 %arg1, %arg2 ; [#uses=1]
113 %Bnot = xor i8 %B, -1 ; [#uses=1]
114 %A = and i8 %arg1, %arg2 ; [#uses=1]
115 %C = or i8 %A, %Bnot ; [#uses=1]
116 ret i8 %C
117 }
118
119 define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext {
120 %B = or i8 %arg1, %arg2 ; [#uses=1]
121 %A = and i8 %arg1, %arg2 ; [#uses=1]
122 %Bnot = xor i8 %B, -1 ; [#uses=1]
123 %C = or i8 %A, %Bnot ; [#uses=1]
124 ret i8 %C
125 }
126
127 define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
128 %A = and i8 %arg1, %arg2 ; [#uses=1]
129 %B = or i8 %arg1, %arg2 ; [#uses=1]
130 %Bnot = xor i8 %B, -1 ; [#uses=1]
131 %C = or i8 %A, %Bnot ; [#uses=1]
132 ret i8 %C
133 }
134
135 define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
136 %B = or i8 %arg1, %arg2 ; [#uses=1]
137 %Bnot = xor i8 %B, -1 ; [#uses=1]
138 %A = and i8 %arg1, %arg2 ; [#uses=1]
139 %C = or i8 %A, %Bnot ; [#uses=1]
140 ret i8 %C
141 }
142
143 define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
144 %B = or i8 %arg1, %arg2 ; [#uses=1]
145 %A = and i8 %arg1, %arg2 ; [#uses=1]
146 %Bnot = xor i8 %B, -1 ; [#uses=1]
147 %C = or i8 %A, %Bnot ; [#uses=1]
148 ret i8 %C
149 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fceq %t1.s | count 1 &&
2 ; RUN: grep fcmeq %t1.s | count 1
3 ;
4 ; This file includes standard floating point arithmetic instructions
5
6 declare double @fabs(double)
7 declare float @fabsf(float)
8
9 define i1 @fcmp_eq(float %arg1, float %arg2) {
10 %A = fcmp oeq float %arg1, %arg2 ; [#uses=1]
11 ret i1 %A
12 }
13
14 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
15 %A = call float @fabsf(float %arg1) ; [#uses=1]
16 %B = call float @fabsf(float %arg2) ; [#uses=1]
17 %C = fcmp oeq float %A, %B ; [#uses=1]
18 ret i1 %C
19 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep frest %t1.s | count 2 &&
2 ; RUN: grep fi %t1.s | count 2 &&
3 ; RUN: grep fm %t1.s | count 4 &&
4 ; RUN: grep fma %t1.s | count 2 &&
5 ; RUN: grep fnms %t1.s | count 2
6 ;
7 ; This file includes standard floating point arithmetic instructions
8
9 define float @fdiv32(float %arg1, float %arg2) {
10 %A = fdiv float %arg1, %arg2
11 ret float %A
12 }
13
14 define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
15 %A = fdiv <4 x float> %arg1, %arg2
16 ret <4 x float> %A
17 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fsmbi %t1.s | count 3 &&
2 ; RUN: grep 32768 %t1.s | count 2 &&
3 ; RUN: grep xor %t1.s | count 4 &&
4 ; RUN: grep and %t1.s | count 5 &&
5 ; RUN: grep andbi %t1.s | count 3
6
7 define double @fneg_dp(double %X) {
8 %Y = sub double -0.000000e+00, %X
9 ret double %Y
10 }
11
12 define <2 x double> @fneg_dp_vec(<2 x double> %X) {
13 %Y = sub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
14 ret <2 x double> %Y
15 }
16
17 define float @fneg_sp(float %X) {
18 %Y = sub float -0.000000e+00, %X
19 ret float %Y
20 }
21
22 define <4 x float> @fneg_sp_vec(<4 x float> %X) {
23 %Y = sub <4 x float>
24 float -0.000000e+00, float -0.000000e+00>, %X
25 ret <4 x float> %Y
26 }
27
28 declare double @fabs(double)
29
30 declare float @fabsf(float)
31
32 define double @fabs_dp(double %X) {
33 %Y = call double @fabs( double %X ) ; [#uses=1]
34 ret double %Y
35 }
36
37 define float @fabs_sp(float %X) {
38 %Y = call float @fabsf( float %X ) ; [#uses=1]
39 ret float %Y
40 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep csflt %t1.s | count 5 &&
2 ; RUN: grep cuflt %t1.s | count 1 &&
3 ; RUN: grep xshw %t1.s | count 2 &&
4 ; RUN: grep xsbh %t1.s | count 1 &&
5 ; RUN: grep and %t1.s | count 2 &&
6 ; RUN: grep andi %t1.s | count 1 &&
7 ; RUN: grep ila %t1.s | count 1
8
9 define float @sitofp_i32(i32 %arg1) {
10 %A = sitofp i32 %arg1 to float ; [#uses=1]
11 ret float %A
12 }
13
14 define float @uitofp_u32(i32 %arg1) {
15 %A = uitofp i32 %arg1 to float ; [#uses=1]
16 ret float %A
17 }
18
19 define float @sitofp_i16(i16 %arg1) {
20 %A = sitofp i16 %arg1 to float ; [#uses=1]
21 ret float %A
22 }
23
24 define float @uitofp_i16(i16 %arg1) {
25 %A = uitofp i16 %arg1 to float ; [#uses=1]
26 ret float %A
27 }
28
29 define float @sitofp_i8(i8 %arg1) {
30 %A = sitofp i8 %arg1 to float ; [#uses=1]
31 ret float %A
32 }
33
34 define float @uitofp_i8(i8 %arg1) {
35 %A = uitofp i8 %arg1 to float ; [#uses=1]
36 ret float %A
37 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu -f -o %t1.s
1 ; RUN: grep rot %t1.s | count 85
2 ; RUN: grep roth %t1.s | count 8
3 ; RUN: grep roti.*5 %t1.s | count 1
4 ; RUN: grep roti.*27 %t1.s | count 1
5 ; RUN grep rothi.*5 %t1.s | count 2
6 ; RUN grep rothi.*11 %t1.s | count 1
7 ; RUN grep rothi.*,.3 %t1.s | count 1
8 ; RUN: grep andhi %t1.s | count 4
9 ; RUN: grep shlhi %t1.s | count 4
10
11 ; Vector rotates are not currently supported in gcc or llvm assembly. These are
12 ; not tested.
13
14 ; 32-bit rotates:
15 define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
16 %tmp1 = zext i8 %arg2 to i32 ; [#uses=1]
17 %B = shl i32 %arg1, %tmp1 ; [#uses=1]
18 %arg22 = sub i8 32, %arg2 ; [#uses=1]
19 %tmp2 = zext i8 %arg22 to i32 ; [#uses=1]
20 %C = lshr i32 %arg1, %tmp2 ; [#uses=1]
21 %D = or i32 %B, %C ; [#uses=1]
22 ret i32 %D
23 }
24
25 define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
26 %tmp1 = zext i16 %arg2 to i32 ; [#uses=1]
27 %B = shl i32 %arg1, %tmp1 ; [#uses=1]
28 %arg22 = sub i16 32, %arg2 ; [#uses=1]
29 %tmp2 = zext i16 %arg22 to i32 ; [#uses=1]
30 %C = lshr i32 %arg1, %tmp2 ; [#uses=1]
31 %D = or i32 %B, %C ; [#uses=1]
32 ret i32 %D
33 }
34
35 define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
36 %B = shl i32 %arg1, %arg2 ; [#uses=1]
37 %tmp1 = sub i32 32, %arg2 ; [#uses=1]
38 %C = lshr i32 %arg1, %tmp1 ; [#uses=1]
39 %D = or i32 %B, %C ; [#uses=1]
40 ret i32 %D
41 }
42
43 define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
44 %tmp1 = sub i32 32, %arg2 ; [#uses=1]
45 %B = shl i32 %arg1, %arg2 ; [#uses=1]
46 %C = lshr i32 %arg1, %tmp1 ; [#uses=1]
47 %D = or i32 %B, %C ; [#uses=1]
48 ret i32 %D
49 }
50
51 define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
52 %tmp1 = sub i32 32, %arg2 ; [#uses=1]
53 %C = lshr i32 %arg1, %tmp1 ; [#uses=1]
54 %B = shl i32 %arg1, %arg2 ; [#uses=1]
55 %D = or i32 %B, %C ; [#uses=1]
56 ret i32 %D
57 }
58
59 define i32 @rotr32_1(i32 %A, i8 %Amt) {
60 %tmp1 = zext i8 %Amt to i32 ; [#uses=1]
61 %B = lshr i32 %A, %tmp1 ; [#uses=1]
62 %Amt2 = sub i8 32, %Amt ; [#uses=1]
63 %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1]
64 %C = shl i32 %A, %tmp2 ; [#uses=1]
65 %D = or i32 %B, %C ; [#uses=1]
66 ret i32 %D
67 }
68
69 define i32 @rotr32_2(i32 %A, i8 %Amt) {
70 %Amt2 = sub i8 32, %Amt ; [#uses=1]
71 %tmp1 = zext i8 %Amt to i32 ; [#uses=1]
72 %B = lshr i32 %A, %tmp1 ; [#uses=1]
73 %tmp2 = zext i8 %Amt2 to i32 ; [#uses=1]
74 %C = shl i32 %A, %tmp2 ; [#uses=1]
75 %D = or i32 %B, %C ; [#uses=1]
76 ret i32 %D
77 }
78
79 ; Rotate left with immediate
80 define i32 @rotli32(i32 %A) {
81 %B = shl i32 %A, 5 ; [#uses=1]
82 %C = lshr i32 %A, 27 ; [#uses=1]
83 %D = or i32 %B, %C ; [#uses=1]
84 ret i32 %D
85 }
86
87 ; Rotate right with immediate
88 define i32 @rotri32(i32 %A) {
89 %B = lshr i32 %A, 5 ; [#uses=1]
90 %C = shl i32 %A, 27 ; [#uses=1]
91 %D = or i32 %B, %C ; [#uses=1]
92 ret i32 %D
93 }
94
95 ; 16-bit rotates:
96 define i16 @rotr16_1(i16 %arg1, i8 %arg) {
97 %tmp1 = zext i8 %arg to i16 ; [#uses=1]
98 %B = lshr i16 %arg1, %tmp1 ; [#uses=1]
99 %arg2 = sub i8 16, %arg ; [#uses=1]
100 %tmp2 = zext i8 %arg2 to i16 ; [#uses=1]
101 %C = shl i16 %arg1, %tmp2 ; [#uses=1]
102 %D = or i16 %B, %C ; [#uses=1]
103 ret i16 %D
104 }
105
106 define i16 @rotr16_2(i16 %arg1, i16 %arg) {
107 %B = lshr i16 %arg1, %arg ; [#uses=1]
108 %tmp1 = sub i16 16, %arg ; [#uses=1]
109 %C = shl i16 %arg1, %tmp1 ; [#uses=1]
110 %D = or i16 %B, %C ; [#uses=1]
111 ret i16 %D
112 }
113
114 define i16 @rotli16(i16 %A) {
115 %B = shl i16 %A, 5 ; [#uses=1]
116 %C = lshr i16 %A, 11 ; [#uses=1]
117 %D = or i16 %B, %C ; [#uses=1]
118 ret i16 %D
119 }
120
121 define i16 @rotri16(i16 %A) {
122 %B = lshr i16 %A, 5 ; [#uses=1]
123 %C = shl i16 %A, 11 ; [#uses=1]
124 %D = or i16 %B, %C ; [#uses=1]
125 ret i16 %D
126 }
127
128 define i8 @rotl8(i8 %A, i8 %Amt) {
129 %B = shl i8 %A, %Amt ; [#uses=1]
130 %Amt2 = sub i8 8, %Amt ; [#uses=1]
131 %C = lshr i8 %A, %Amt2 ; [#uses=1]
132 %D = or i8 %B, %C ; [#uses=1]
133 ret i8 %D
134 }
135
136 define i8 @rotr8(i8 %A, i8 %Amt) {
137 %B = lshr i8 %A, %Amt ; [#uses=1]
138 %Amt2 = sub i8 8, %Amt ; [#uses=1]
139 %C = shl i8 %A, %Amt2 ; [#uses=1]
140 %D = or i8 %B, %C ; [#uses=1]
141 ret i8 %D
142 }
143
144 define i8 @rotli8(i8 %A) {
145 %B = shl i8 %A, 5 ; [#uses=1]
146 %C = lshr i8 %A, 3 ; [#uses=1]
147 %D = or i8 %B, %C ; [#uses=1]
148 ret i8 %D
149 }
150
151 define i8 @rotri8(i8 %A) {
152 %B = lshr i8 %A, 5 ; [#uses=1]
153 %C = shl i8 %A, 3 ; [#uses=1]
154 %D = or i8 %B, %C ; [#uses=1]
155 ret i8 %D
156 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep selb %t1.s | count 160
2 ; RUN: grep and %t1.s | count 2
3 ; RUN: grep xsbh %t1.s | count 1
4 ; RUN: grep xshw %t1.s | count 2
5
6 define <16 x i8> @selb_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
7 %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
8 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
9 i8 -1, i8 -1, i8 -1, i8 -1 >
10 %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1]
11 %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1]
12 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
13 ret <16 x i8> %D
14 }
15
16 define <16 x i8> @selb_v16i8_11(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
17 %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
18 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
19 i8 -1, i8 -1, i8 -1, i8 -1 >
20 %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1]
21 %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1]
22 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
23 ret <16 x i8> %D
24 }
25
26 define <16 x i8> @selb_v16i8_12(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
27 %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
28 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
29 i8 -1, i8 -1, i8 -1, i8 -1 >
30 %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1]
31 %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1]
32 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
33 ret <16 x i8> %D
34 }
35
36 define <16 x i8> @selb_v16i8_13(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
37 %A = xor <16 x i8> %arg3, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
38 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
39 i8 -1, i8 -1, i8 -1, i8 -1 >
40 %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1]
41 %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1]
42 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
43 ret <16 x i8> %D
44 }
45
46 define <16 x i8> @selb_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
47 %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
48 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
49 i8 -1, i8 -1, i8 -1, i8 -1 >
50 %B = and <16 x i8> %A, %arg2 ; <<16 x i8>> [#uses=1]
51 %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1]
52 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
53 ret <16 x i8> %D
54 }
55
56 define <16 x i8> @selb_v16i8_21(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
57 %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
58 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
59 i8 -1, i8 -1, i8 -1, i8 -1 >
60 %B = and <16 x i8> %arg2, %A ; <<16 x i8>> [#uses=1]
61 %C = and <16 x i8> %arg3, %arg1 ; <<16 x i8>> [#uses=1]
62 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
63 ret <16 x i8> %D
64 }
65
66 define <16 x i8> @selb_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
67 %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
68 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
69 i8 -1, i8 -1, i8 -1, i8 -1 >
70 %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1]
71 %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1]
72 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
73 ret <16 x i8> %D
74 }
75
76 define <16 x i8> @selb_v16i8_4(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
77 %C = and <16 x i8> %arg3, %arg2 ; <<16 x i8>> [#uses=1]
78 %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
79 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
80 i8 -1, i8 -1, i8 -1, i8 -1 >
81 %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1]
82 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
83 ret <16 x i8> %D
84 }
85
86 define <16 x i8> @selb_v16i8_41(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
87 %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1]
88 %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
89 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
90 i8 -1, i8 -1, i8 -1, i8 -1 >
91 %B = and <16 x i8> %arg1, %A ; <<16 x i8>> [#uses=1]
92 %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1]
93 ret <16 x i8> %D
94 }
95
96 define <16 x i8> @selb_v16i8_42(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
97 %C = and <16 x i8> %arg2, %arg3 ; <<16 x i8>> [#uses=1]
98 %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
99 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
100 i8 -1, i8 -1, i8 -1, i8 -1 >
101 %B = and <16 x i8> %A, %arg1 ; <<16 x i8>> [#uses=1]
102 %D = or <16 x i8> %C, %B ; <<16 x i8>> [#uses=1]
103 ret <16 x i8> %D
104 }
105
106 define <16 x i8> @selb_v16i8_5(<16 x i8> %arg1, <16 x i8> %arg2, <16 x i8> %arg3) {
107 %C = and <16 x i8> %arg2, %arg1 ; <<16 x i8>> [#uses=1]
108 %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
109 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
110 i8 -1, i8 -1, i8 -1, i8 -1 >
111 %B = and <16 x i8> %A, %arg3 ; <<16 x i8>> [#uses=1]
112 %D = or <16 x i8> %B, %C ; <<16 x i8>> [#uses=1]
113 ret <16 x i8> %D
114 }
115
116 define <8 x i16> @selb_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
117 %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
118 i16 -1, i16 -1 >
119 %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1]
120 %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1]
121 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
122 ret <8 x i16> %D
123 }
124
125 define <8 x i16> @selb_v8i16_11(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
126 %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
127 i16 -1, i16 -1 >
128 %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1]
129 %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1]
130 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
131 ret <8 x i16> %D
132 }
133
134 define <8 x i16> @selb_v8i16_12(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
135 %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
136 i16 -1, i16 -1 >
137 %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1]
138 %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1]
139 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
140 ret <8 x i16> %D
141 }
142
143 define <8 x i16> @selb_v8i16_13(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
144 %A = xor <8 x i16> %arg3, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
145 i16 -1, i16 -1 >
146 %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1]
147 %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1]
148 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
149 ret <8 x i16> %D
150 }
151
152 define <8 x i16> @selb_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
153 %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
154 i16 -1, i16 -1 >
155 %B = and <8 x i16> %A, %arg2 ; <<8 x i16>> [#uses=1]
156 %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1]
157 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
158 ret <8 x i16> %D
159 }
160
161 define <8 x i16> @selb_v8i16_21(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
162 %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
163 i16 -1, i16 -1 >
164 %B = and <8 x i16> %arg2, %A ; <<8 x i16>> [#uses=1]
165 %C = and <8 x i16> %arg3, %arg1 ; <<8 x i16>> [#uses=1]
166 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
167 ret <8 x i16> %D
168 }
169
170 define <8 x i16> @selb_v8i16_3(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
171 %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
172 i16 -1, i16 -1 >
173 %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1]
174 %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1]
175 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
176 ret <8 x i16> %D
177 }
178
179 define <8 x i16> @selb_v8i16_4(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
180 %C = and <8 x i16> %arg3, %arg2 ; <<8 x i16>> [#uses=1]
181 %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
182 i16 -1, i16 -1 >
183 %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1]
184 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
185 ret <8 x i16> %D
186 }
187
188 define <8 x i16> @selb_v8i16_41(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
189 %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1]
190 %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
191 i16 -1, i16 -1 >
192 %B = and <8 x i16> %arg1, %A ; <<8 x i16>> [#uses=1]
193 %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1]
194 ret <8 x i16> %D
195 }
196
197 define <8 x i16> @selb_v8i16_42(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
198 %C = and <8 x i16> %arg2, %arg3 ; <<8 x i16>> [#uses=1]
199 %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
200 i16 -1, i16 -1 >
201 %B = and <8 x i16> %A, %arg1 ; <<8 x i16>> [#uses=1]
202 %D = or <8 x i16> %C, %B ; <<8 x i16>> [#uses=1]
203 ret <8 x i16> %D
204 }
205
206 define <8 x i16> @selb_v8i16_5(<8 x i16> %arg1, <8 x i16> %arg2, <8 x i16> %arg3) {
207 %C = and <8 x i16> %arg2, %arg1 ; <<8 x i16>> [#uses=1]
208 %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
209 i16 -1, i16 -1 >
210 %B = and <8 x i16> %A, %arg3 ; <<8 x i16>> [#uses=1]
211 %D = or <8 x i16> %B, %C ; <<8 x i16>> [#uses=1]
212 ret <8 x i16> %D
213 }
214
215 define <4 x i32> @selb_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) {
216 %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
217 %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1]
218 %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1]
219 %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1]
220 ret <4 x i32> %tmp6
221 }
222
223 define <4 x i32> @selb_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) {
224 %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
225 %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1]
226 %tmp5 = and <4 x i32> %arg2, %arg3 ; <<4 x i32>> [#uses=1]
227 %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1]
228 ret <4 x i32> %tmp6
229 }
230
231 define <4 x i32> @selb_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) {
232 %tmpnot = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
233 %tmp2 = and <4 x i32> %tmpnot, %arg1 ; <<4 x i32>> [#uses=1]
234 %tmp5 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1]
235 %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1]
236 ret <4 x i32> %tmp6
237 }
238
239 define <4 x i32> @selb_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) {
240 %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1]
241 %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
242 %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1]
243 %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1]
244 ret <4 x i32> %tmp6
245 }
246
247 define <4 x i32> @selb_v4i32_5(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3) {
248 %tmp2 = and <4 x i32> %arg3, %arg2 ; <<4 x i32>> [#uses=1]
249 %tmp3not = xor <4 x i32> %arg3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
250 %tmp5 = and <4 x i32> %tmp3not, %arg1 ; <<4 x i32>> [#uses=1]
251 %tmp6 = or <4 x i32> %tmp2, %tmp5 ; <<4 x i32>> [#uses=1]
252 ret <4 x i32> %tmp6
253 }
254
255 define i32 @selb_i32(i32 %arg1, i32 %arg2, i32 %arg3) {
256 %tmp1not = xor i32 %arg3, -1 ; [#uses=1]
257 %tmp3 = and i32 %tmp1not, %arg1 ; [#uses=1]
258 %tmp6 = and i32 %arg3, %arg2 ; [#uses=1]
259 %tmp7 = or i32 %tmp3, %tmp6 ; [#uses=1]
260 ret i32 %tmp7
261 }
262
263 define i16 @selb_i16(i16 signext %arg1, i16 signext %arg2, i16 signext %arg3) signext {
264 %tmp3 = and i16 %arg3, %arg1 ; [#uses=1]
265 %tmp4not = xor i16 %arg3, -1 ; [#uses=1]
266 %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1]
267 %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1]
268 ret i16 %retval1011
269 }
270
271 define i16 @selb_i16u(i16 zeroext %arg1, i16 zeroext %arg2, i16 zeroext %arg3) zeroext {
272 %tmp3 = and i16 %arg3, %arg1 ; [#uses=1]
273 %tmp4not = xor i16 %arg3, -1 ; [#uses=1]
274 %tmp6 = and i16 %tmp4not, %arg2 ; [#uses=1]
275 %retval1011 = or i16 %tmp3, %tmp6 ; [#uses=1]
276 ret i16 %retval1011
277 }
278
279 define i8 @selb_i8u(i8 zeroext %arg1, i8 zeroext %arg2, i8 zeroext %arg3) zeroext {
280 %tmp3 = and i8 %arg3, %arg1 ; [#uses=1]
281 %tmp4not = xor i8 %arg3, -1 ; [#uses=1]
282 %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1]
283 %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1]
284 ret i8 %retval1011
285 }
286
287 define i8 @selb_i8(i8 signext %arg1, i8 signext %arg2, i8 signext %arg3) signext {
288 %tmp3 = and i8 %arg3, %arg1 ; [#uses=1]
289 %tmp4not = xor i8 %arg3, -1 ; [#uses=1]
290 %tmp6 = and i8 %tmp4not, %arg2 ; [#uses=1]
291 %retval1011 = or i8 %tmp3, %tmp6 ; [#uses=1]
292 ret i8 %retval1011
293 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep shlh %t1.s | count 84
2 ; RUN: grep shlhi %t1.s | count 51
3 ; RUN: grep shl %t1.s | count 168
4 ; RUN: grep shli %t1.s | count 51
5 ; RUN: grep xshw %t1.s | count 5
6 ; RUN: grep and %t1.s | count 5
7
8 ; Vector shifts are not currently supported in gcc or llvm assembly. These are
9 ; not tested.
10
11 ; Shift left i16 via register, note that the second operand to shl is promoted
12 ; to a 32-bit type:
13
14 define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
15 %A = shl i16 %arg1, %arg2
16 ret i16 %A
17 }
18
19 define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
20 %A = shl i16 %arg2, %arg1
21 ret i16 %A
22 }
23
24 define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
25 %A = shl i16 %arg1, %arg2
26 ret i16 %A
27 }
28
29 define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext {
30 %A = shl i16 %arg2, %arg1
31 ret i16 %A
32 }
33
34 define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
35 %A = shl i16 %arg1, %arg2
36 ret i16 %A
37 }
38
39 define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
40 %A = shl i16 %arg2, %arg1
41 ret i16 %A
42 }
43
44 ; Shift left i16 with immediate:
45 define i16 @shlhi_i16_1(i16 %arg1) {
46 %A = shl i16 %arg1, 12
47 ret i16 %A
48 }
49
50 ; Should not generate anything other than the return, arg1 << 0 = arg1
51 define i16 @shlhi_i16_2(i16 %arg1) {
52 %A = shl i16 %arg1, 0
53 ret i16 %A
54 }
55
56 define i16 @shlhi_i16_3(i16 %arg1) {
57 %A = shl i16 16383, %arg1
58 ret i16 %A
59 }
60
61 ; Should generate 0, 0 << arg1 = 0
62 define i16 @shlhi_i16_4(i16 %arg1) {
63 %A = shl i16 0, %arg1
64 ret i16 %A
65 }
66
67 define i16 @shlhi_i16_5(i16 signext %arg1) signext {
68 %A = shl i16 %arg1, 12
69 ret i16 %A
70 }
71
72 ; Should not generate anything other than the return, arg1 << 0 = arg1
73 define i16 @shlhi_i16_6(i16 signext %arg1) signext {
74 %A = shl i16 %arg1, 0
75 ret i16 %A
76 }
77
78 define i16 @shlhi_i16_7(i16 signext %arg1) signext {
79 %A = shl i16 16383, %arg1
80 ret i16 %A
81 }
82
83 ; Should generate 0, 0 << arg1 = 0
84 define i16 @shlhi_i16_8(i16 signext %arg1) signext {
85 %A = shl i16 0, %arg1
86 ret i16 %A
87 }
88
89 define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext {
90 %A = shl i16 %arg1, 12
91 ret i16 %A
92 }
93
94 ; Should not generate anything other than the return, arg1 << 0 = arg1
95 define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext {
96 %A = shl i16 %arg1, 0
97 ret i16 %A
98 }
99
100 define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext {
101 %A = shl i16 16383, %arg1
102 ret i16 %A
103 }
104
105 ; Should generate 0, 0 << arg1 = 0
106 define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext {
107 %A = shl i16 0, %arg1
108 ret i16 %A
109 }
110
111 ; Shift left i32 via register, note that the second operand to shl is promoted
112 ; to a 32-bit type:
113
114 define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
115 %A = shl i32 %arg1, %arg2
116 ret i32 %A
117 }
118
119 define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
120 %A = shl i32 %arg2, %arg1
121 ret i32 %A
122 }
123
124 define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext {
125 %A = shl i32 %arg1, %arg2
126 ret i32 %A
127 }
128
129 define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext {
130 %A = shl i32 %arg2, %arg1
131 ret i32 %A
132 }
133
134 define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
135 %A = shl i32 %arg1, %arg2
136 ret i32 %A
137 }
138
139 define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
140 %A = shl i32 %arg2, %arg1
141 ret i32 %A
142 }
143
144 ; Shift left i32 with immediate:
145 define i32 @shli_i32_1(i32 %arg1) {
146 %A = shl i32 %arg1, 12
147 ret i32 %A
148 }
149
150 ; Should not generate anything other than the return, arg1 << 0 = arg1
151 define i32 @shli_i32_2(i32 %arg1) {
152 %A = shl i32 %arg1, 0
153 ret i32 %A
154 }
155
156 define i32 @shli_i32_3(i32 %arg1) {
157 %A = shl i32 16383, %arg1
158 ret i32 %A
159 }
160
161 ; Should generate 0, 0 << arg1 = 0
162 define i32 @shli_i32_4(i32 %arg1) {
163 %A = shl i32 0, %arg1
164 ret i32 %A
165 }
166
167 define i32 @shli_i32_5(i32 signext %arg1) signext {
168 %A = shl i32 %arg1, 12
169 ret i32 %A
170 }
171
172 ; Should not generate anything other than the return, arg1 << 0 = arg1
173 define i32 @shli_i32_6(i32 signext %arg1) signext {
174 %A = shl i32 %arg1, 0
175 ret i32 %A
176 }
177
178 define i32 @shli_i32_7(i32 signext %arg1) signext {
179 %A = shl i32 16383, %arg1
180 ret i32 %A
181 }
182
183 ; Should generate 0, 0 << arg1 = 0
184 define i32 @shli_i32_8(i32 signext %arg1) signext {
185 %A = shl i32 0, %arg1
186 ret i32 %A
187 }
188
189 define i32 @shli_i32_9(i32 zeroext %arg1) zeroext {
190 %A = shl i32 %arg1, 12
191 ret i32 %A
192 }
193
194 ; Should not generate anything other than the return, arg1 << 0 = arg1
195 define i32 @shli_i32_10(i32 zeroext %arg1) zeroext {
196 %A = shl i32 %arg1, 0
197 ret i32 %A
198 }
199
200 define i32 @shli_i32_11(i32 zeroext %arg1) zeroext {
201 %A = shl i32 16383, %arg1
202 ret i32 %A
203 }
204
205 ; Should generate 0, 0 << arg1 = 0
206 define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
207 %A = shl i32 0, %arg1
208 ret i32 %A
209 }
0 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
1 ; RUN: grep fa %t1.s | count 2 &&
2 ; RUN: grep fs %t1.s | count 2 &&
3 ; RUN: grep fm %t1.s | count 6 &&
4 ; RUN: grep fma %t1.s | count 2 &&
5 ; RUN: grep fms %t1.s | count 2 &&
6 ; RUN: grep fnms %t1.s | count 3
7 ;
8 ; This file includes standard floating point arithmetic instructions
9 ; NOTE fdiv is tested separately since it is a compound operation
10
11 define float @fp_add(float %arg1, float %arg2) {
12 %A = add float %arg1, %arg2 ; [#uses=1]
13 ret float %A
14 }
15
16 define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
17 %A = add <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
18 ret <4 x float> %A
19 }
20
21 define float @fp_sub(float %arg1, float %arg2) {
22 %A = sub float %arg1, %arg2 ; [#uses=1]
23 ret float %A
24 }
25
26 define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
27 %A = sub <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
28 ret <4 x float> %A
29 }
30
31 define float @fp_mul(float %arg1, float %arg2) {
32 %A = mul float %arg1, %arg2 ; [#uses=1]
33 ret float %A
34 }
35
36 define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
37 %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
38 ret <4 x float> %A
39 }
40
41 define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
42 %A = mul float %arg1, %arg2 ; [#uses=1]
43 %B = add float %A, %arg3 ; [#uses=1]
44 ret float %B
45 }
46
47 define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
48 %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
49 %B = add <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
50 ret <4 x float> %B
51 }
52
53 define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
54 %A = mul float %arg1, %arg2 ; [#uses=1]
55 %B = sub float %A, %arg3 ; [#uses=1]
56 ret float %B
57 }
58
59 define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
60 %A = mul <4 x float> %arg1, %arg2 ; <<4 x float>> [#uses=1]
61 %B = sub <4 x float> %A, %arg3 ; <<4 x float>> [#uses=1]
62 ret <4 x float> %B
63 }
64
65 ; Test the straightforward way of getting fnms
66 ; c - a * b
67 define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
68 %A = mul float %arg1, %arg2
69 %B = sub float %arg3, %A
70 ret float %B
71 }
72
73 ; Test another way of getting fnms
74 ; - ( a *b -c ) = c - a * b
75 define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
76 %A = mul float %arg1, %arg2
77 %B = sub float %A, %arg3
78 %C = sub float -0.0, %B
79 ret float %C
80 }
81
82 define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
83 %A = mul <4 x float> %arg1, %arg2
84 %B = sub <4 x float> %A, %arg3
85 %D = sub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
86 ret <4 x float> %D
87 }