llvm.org GIT mirror llvm / 8afc3ea
SHA1: unroll loop in hashBlock. This code is taken from public domain. https://github.com/jsonn/src/blob/trunk/common/lib/libc/hash/sha1/sha1.c I wrote a sha1 command and ran it on my Xeon E5-2680 v2 2.80GHz machine. Here is a result. The new hash function is 37% faster than before. Performance counter stats for './llvm-sha1-old /ssd/build/bin/lld' (10 runs): 6640.503687 task-clock (msec) # 1.001 CPUs utilized ( +- 0.03% ) 54 context-switches # 0.008 K/sec ( +- 5.03% ) 5 cpu-migrations # 0.001 K/sec ( +- 31.73% ) 183,803 page-faults # 0.028 M/sec ( +- 0.00% ) 18,527,954,113 cycles # 2.790 GHz ( +- 0.03% ) 4,993,237,485 stalled-cycles-frontend # 26.95% frontend cycles idle ( +- 0.11% ) <not supported> stalled-cycles-backend 50,217,149,423 instructions # 2.71 insns per cycle # 0.10 stalled cycles per insn ( +- 0.00% ) 6,094,322,337 branches # 917.750 M/sec ( +- 0.00% ) 11,778,239 branch-misses # 0.19% of all branches ( +- 0.01% ) 6.634017401 seconds time elapsed ( +- 0.03% ) Performance counter stats for './llvm-sha1-new /ssd/build/bin/lld' (10 runs): 4167.062720 task-clock (msec) # 1.001 CPUs utilized ( +- 0.02% ) 52 context-switches # 0.012 K/sec ( +- 16.45% ) 7 cpu-migrations # 0.002 K/sec ( +- 32.20% ) 183,804 page-faults # 0.044 M/sec ( +- 0.00% ) 11,626,611,958 cycles # 2.790 GHz ( +- 0.02% ) 4,491,897,976 stalled-cycles-frontend # 38.63% frontend cycles idle ( +- 0.05% ) <not supported> stalled-cycles-backend 24,320,180,617 instructions # 2.09 insns per cycle # 0.18 stalled cycles per insn ( +- 0.00% ) 1,574,674,576 branches # 377.886 M/sec ( +- 0.00% ) 11,769,693 branch-misses # 0.75% of all branches ( +- 0.00% ) 4.163251552 seconds time elapsed ( +- 0.02% ) Differential Revision: https://reviews.llvm.org/D26890 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287473 91177308-0d34-0410-b5e6-96231b3b80d8 Rui Ueyama 3 years ago
2 changed file(s) with 157 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
6060
6161 // Internal State
6262 struct {
63 uint32_t Buffer[BLOCK_LENGTH / 4];
63 union {
64 uint8_t C[BLOCK_LENGTH];
65 uint32_t L[BLOCK_LENGTH / 4];
66 } Buffer;
6467 uint32_t State[HASH_LENGTH / 4];
6568 uint32_t ByteCount;
6669 uint8_t BufferOffset;
55 // License. See LICENSE.TXT for details.
66 //
77 //===----------------------------------------------------------------------===//
8 //
89 // This code is taken from public domain
9 // (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
10 // (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c and
11 // http://cvsweb.netbsd.org/bsdweb.cgi/src/common/lib/libc/hash/sha1/sha1.c?rev=1.6)
1012 // and modified by wrapping it in a C++ interface for LLVM,
1113 // and removing unnecessary code.
1214 //
2325 #if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
2426 #define SHA_BIG_ENDIAN
2527 #endif
28
29 static uint32_t rol(uint32_t number, int bits) {
30 return (number << bits) | (number >> (32 - bits));
31 };
32
33 #if SHA_BIG_ENDIAN
34 static uint32_t blk0(uint32_t *Buf, int I) {
35 Buf[I] = (rol(Buf[I], 24) & 0xFF00FF00) | (rol(Buf[I], 8) & 0x00FF00FF);
36 return Buf[I];
37 }
38 #else
39 static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; }
40 #endif
41
42 static uint32_t blk(uint32_t *Buf, int I) {
43 Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I + 2) & 15] ^
44 Buf[I & 15],
45 1);
46 return Buf[I & 15];
47 }
48
49 static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
50 int I, uint32_t *Buf) {
51 E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5);
52 B = rol(B, 30);
53 }
54
55 static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
56 int I, uint32_t *Buf) {
57 E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5);
58 B = rol(B, 30);
59 }
60
61 static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
62 int I, uint32_t *Buf) {
63 E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5);
64 B = rol(B, 30);
65 }
66
67 static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
68 int I, uint32_t *Buf) {
69 E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5);
70 B = rol(B, 30);
71 }
72
73 static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
74 int I, uint32_t *Buf) {
75 E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5);
76 B = rol(B, 30);
77 }
2678
2779 /* code */
2880 #define SHA1_K0 0x5a827999
4698 InternalState.BufferOffset = 0;
4799 }
48100
49 static uint32_t rol32(uint32_t number, uint8_t bits) {
50 return ((number << bits) | (number >> (32 - bits)));
51 }
52
53101 void SHA1::hashBlock() {
54 uint8_t i;
55 uint32_t a, b, c, d, e, t;
56
57 a = InternalState.State[0];
58 b = InternalState.State[1];
59 c = InternalState.State[2];
60 d = InternalState.State[3];
61 e = InternalState.State[4];
62 for (i = 0; i < 80; i++) {
63 if (i >= 16) {
64 t = InternalState.Buffer[(i + 13) & 15] ^
65 InternalState.Buffer[(i + 8) & 15] ^
66 InternalState.Buffer[(i + 2) & 15] ^ InternalState.Buffer[i & 15];
67 InternalState.Buffer[i & 15] = rol32(t, 1);
68 }
69 if (i < 20) {
70 t = (d ^ (b & (c ^ d))) + SHA1_K0;
71 } else if (i < 40) {
72 t = (b ^ c ^ d) + SHA1_K20;
73 } else if (i < 60) {
74 t = ((b & c) | (d & (b | c))) + SHA1_K40;
75 } else {
76 t = (b ^ c ^ d) + SHA1_K60;
77 }
78 t += rol32(a, 5) + e + InternalState.Buffer[i & 15];
79 e = d;
80 d = c;
81 c = rol32(b, 30);
82 b = a;
83 a = t;
84 }
85 InternalState.State[0] += a;
86 InternalState.State[1] += b;
87 InternalState.State[2] += c;
88 InternalState.State[3] += d;
89 InternalState.State[4] += e;
102 uint32_t A = InternalState.State[0];
103 uint32_t B = InternalState.State[1];
104 uint32_t C = InternalState.State[2];
105 uint32_t D = InternalState.State[3];
106 uint32_t E = InternalState.State[4];
107
108 // 4 rounds of 20 operations each. Loop unrolled.
109 r0(A, B, C, D, E, 0, InternalState.Buffer.L);
110 r0(E, A, B, C, D, 1, InternalState.Buffer.L);
111 r0(D, E, A, B, C, 2, InternalState.Buffer.L);
112 r0(C, D, E, A, B, 3, InternalState.Buffer.L);
113 r0(B, C, D, E, A, 4, InternalState.Buffer.L);
114 r0(A, B, C, D, E, 5, InternalState.Buffer.L);
115 r0(E, A, B, C, D, 6, InternalState.Buffer.L);
116 r0(D, E, A, B, C, 7, InternalState.Buffer.L);
117 r0(C, D, E, A, B, 8, InternalState.Buffer.L);
118 r0(B, C, D, E, A, 9, InternalState.Buffer.L);
119 r0(A, B, C, D, E, 10, InternalState.Buffer.L);
120 r0(E, A, B, C, D, 11, InternalState.Buffer.L);
121 r0(D, E, A, B, C, 12, InternalState.Buffer.L);
122 r0(C, D, E, A, B, 13, InternalState.Buffer.L);
123 r0(B, C, D, E, A, 14, InternalState.Buffer.L);
124 r0(A, B, C, D, E, 15, InternalState.Buffer.L);
125 r1(E, A, B, C, D, 16, InternalState.Buffer.L);
126 r1(D, E, A, B, C, 17, InternalState.Buffer.L);
127 r1(C, D, E, A, B, 18, InternalState.Buffer.L);
128 r1(B, C, D, E, A, 19, InternalState.Buffer.L);
129
130 r2(A, B, C, D, E, 20, InternalState.Buffer.L);
131 r2(E, A, B, C, D, 21, InternalState.Buffer.L);
132 r2(D, E, A, B, C, 22, InternalState.Buffer.L);
133 r2(C, D, E, A, B, 23, InternalState.Buffer.L);
134 r2(B, C, D, E, A, 24, InternalState.Buffer.L);
135 r2(A, B, C, D, E, 25, InternalState.Buffer.L);
136 r2(E, A, B, C, D, 26, InternalState.Buffer.L);
137 r2(D, E, A, B, C, 27, InternalState.Buffer.L);
138 r2(C, D, E, A, B, 28, InternalState.Buffer.L);
139 r2(B, C, D, E, A, 29, InternalState.Buffer.L);
140 r2(A, B, C, D, E, 30, InternalState.Buffer.L);
141 r2(E, A, B, C, D, 31, InternalState.Buffer.L);
142 r2(D, E, A, B, C, 32, InternalState.Buffer.L);
143 r2(C, D, E, A, B, 33, InternalState.Buffer.L);
144 r2(B, C, D, E, A, 34, InternalState.Buffer.L);
145 r2(A, B, C, D, E, 35, InternalState.Buffer.L);
146 r2(E, A, B, C, D, 36, InternalState.Buffer.L);
147 r2(D, E, A, B, C, 37, InternalState.Buffer.L);
148 r2(C, D, E, A, B, 38, InternalState.Buffer.L);
149 r2(B, C, D, E, A, 39, InternalState.Buffer.L);
150
151 r3(A, B, C, D, E, 40, InternalState.Buffer.L);
152 r3(E, A, B, C, D, 41, InternalState.Buffer.L);
153 r3(D, E, A, B, C, 42, InternalState.Buffer.L);
154 r3(C, D, E, A, B, 43, InternalState.Buffer.L);
155 r3(B, C, D, E, A, 44, InternalState.Buffer.L);
156 r3(A, B, C, D, E, 45, InternalState.Buffer.L);
157 r3(E, A, B, C, D, 46, InternalState.Buffer.L);
158 r3(D, E, A, B, C, 47, InternalState.Buffer.L);
159 r3(C, D, E, A, B, 48, InternalState.Buffer.L);
160 r3(B, C, D, E, A, 49, InternalState.Buffer.L);
161 r3(A, B, C, D, E, 50, InternalState.Buffer.L);
162 r3(E, A, B, C, D, 51, InternalState.Buffer.L);
163 r3(D, E, A, B, C, 52, InternalState.Buffer.L);
164 r3(C, D, E, A, B, 53, InternalState.Buffer.L);
165 r3(B, C, D, E, A, 54, InternalState.Buffer.L);
166 r3(A, B, C, D, E, 55, InternalState.Buffer.L);
167 r3(E, A, B, C, D, 56, InternalState.Buffer.L);
168 r3(D, E, A, B, C, 57, InternalState.Buffer.L);
169 r3(C, D, E, A, B, 58, InternalState.Buffer.L);
170 r3(B, C, D, E, A, 59, InternalState.Buffer.L);
171
172 r4(A, B, C, D, E, 60, InternalState.Buffer.L);
173 r4(E, A, B, C, D, 61, InternalState.Buffer.L);
174 r4(D, E, A, B, C, 62, InternalState.Buffer.L);
175 r4(C, D, E, A, B, 63, InternalState.Buffer.L);
176 r4(B, C, D, E, A, 64, InternalState.Buffer.L);
177 r4(A, B, C, D, E, 65, InternalState.Buffer.L);
178 r4(E, A, B, C, D, 66, InternalState.Buffer.L);
179 r4(D, E, A, B, C, 67, InternalState.Buffer.L);
180 r4(C, D, E, A, B, 68, InternalState.Buffer.L);
181 r4(B, C, D, E, A, 69, InternalState.Buffer.L);
182 r4(A, B, C, D, E, 70, InternalState.Buffer.L);
183 r4(E, A, B, C, D, 71, InternalState.Buffer.L);
184 r4(D, E, A, B, C, 72, InternalState.Buffer.L);
185 r4(C, D, E, A, B, 73, InternalState.Buffer.L);
186 r4(B, C, D, E, A, 74, InternalState.Buffer.L);
187 r4(A, B, C, D, E, 75, InternalState.Buffer.L);
188 r4(E, A, B, C, D, 76, InternalState.Buffer.L);
189 r4(D, E, A, B, C, 77, InternalState.Buffer.L);
190 r4(C, D, E, A, B, 78, InternalState.Buffer.L);
191 r4(B, C, D, E, A, 79, InternalState.Buffer.L);
192
193 InternalState.State[0] += A;
194 InternalState.State[1] += B;
195 InternalState.State[2] += C;
196 InternalState.State[3] += D;
197 InternalState.State[4] += E;
90198 }
91199
92200 void SHA1::addUncounted(uint8_t data) {
93 uint8_t *const b = (uint8_t *)InternalState.Buffer;
94201 #ifdef SHA_BIG_ENDIAN
95 b[InternalState.BufferOffset] = data;
202 InternalState.Buffer.C[InternalState.BufferOffset] = data;
96203 #else
97 b[InternalState.BufferOffset ^ 3] = data;
98 #endif
204 InternalState.Buffer.C[InternalState.BufferOffset ^ 3] = data;
205 #endif
206
99207 InternalState.BufferOffset++;
100208 if (InternalState.BufferOffset == BLOCK_LENGTH) {
101209 hashBlock();