]> git.ipfire.org Git - thirdparty/gcc.git/blame - libiberty/sha1.c
Update copyright years.
[thirdparty/gcc.git] / libiberty / sha1.c
CommitLineData
598848e4
ILT
1/* sha1.c - Functions to compute SHA1 message digest of files or
2 memory blocks according to the NIST specification FIPS-180-1.
3
a945c346 4 Copyright (C) 2000-2024 Free Software Foundation, Inc.
598848e4
ILT
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20/* Written by Scott G. Miller
21 Credits:
22 Robert Klep <robert@ilse.nl> -- Expansion function fix
23*/
24
25#include <config.h>
26
27#include "sha1.h"
28
29#include <stddef.h>
30#include <string.h>
31
bf4f40cc
JJ
32#ifdef HAVE_X86_SHA1_HW_SUPPORT
33# include <x86intrin.h>
34# include <cpuid.h>
35#endif
36
598848e4
ILT
37#if USE_UNLOCKED_IO
38# include "unlocked-io.h"
39#endif
40
41#ifdef WORDS_BIGENDIAN
42# define SWAP(n) (n)
43#else
44# define SWAP(n) \
45 (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
46#endif
47
48#define BLOCKSIZE 4096
49#if BLOCKSIZE % 64 != 0
50# error "invalid BLOCKSIZE"
51#endif
52
53/* This array contains the bytes used to pad the buffer to the next
54 64-byte boundary. (RFC 1321, 3.1: Step 1) */
55static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
56
57
58/* Take a pointer to a 160 bit block of data (five 32 bit ints) and
59 initialize it to the start constants of the SHA1 algorithm. This
60 must be called before using hash in the call to sha1_hash. */
61void
62sha1_init_ctx (struct sha1_ctx *ctx)
63{
64 ctx->A = 0x67452301;
65 ctx->B = 0xefcdab89;
66 ctx->C = 0x98badcfe;
67 ctx->D = 0x10325476;
68 ctx->E = 0xc3d2e1f0;
69
70 ctx->total[0] = ctx->total[1] = 0;
71 ctx->buflen = 0;
72}
73
74/* Put result from CTX in first 20 bytes following RESBUF. The result
75 must be in little endian byte order.
76
77 IMPORTANT: On some systems it is required that RESBUF is correctly
78 aligned for a 32-bit value. */
79void *
80sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
81{
82 ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
83 ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
84 ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
85 ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
86 ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
87
88 return resbuf;
89}
90
91/* Process the remaining bytes in the internal buffer and the usual
92 prolog according to the standard and write the result to RESBUF.
93
94 IMPORTANT: On some systems it is required that RESBUF is correctly
95 aligned for a 32-bit value. */
96void *
97sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
98{
99 /* Take yet unprocessed bytes into account. */
100 sha1_uint32 bytes = ctx->buflen;
101 size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
102
103 /* Now count remaining bytes. */
104 ctx->total[0] += bytes;
105 if (ctx->total[0] < bytes)
106 ++ctx->total[1];
107
108 /* Put the 64-bit file length in *bits* at the end of the buffer. */
109 ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
110 ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
111
112 memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
113
114 /* Process last bytes. */
115 sha1_process_block (ctx->buffer, size * 4, ctx);
116
117 return sha1_read_ctx (ctx, resbuf);
118}
119
120/* Compute SHA1 message digest for bytes read from STREAM. The
121 resulting message digest number will be written into the 16 bytes
122 beginning at RESBLOCK. */
123int
124sha1_stream (FILE *stream, void *resblock)
125{
126 struct sha1_ctx ctx;
127 char buffer[BLOCKSIZE + 72];
128 size_t sum;
129
130 /* Initialize the computation context. */
131 sha1_init_ctx (&ctx);
132
133 /* Iterate over full file contents. */
134 while (1)
135 {
136 /* We read the file in blocks of BLOCKSIZE bytes. One call of the
137 computation function processes the whole buffer so that with the
138 next round of the loop another block can be read. */
139 size_t n;
140 sum = 0;
141
142 /* Read block. Take care for partial reads. */
143 while (1)
144 {
145 n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
146
147 sum += n;
148
149 if (sum == BLOCKSIZE)
150 break;
151
152 if (n == 0)
153 {
154 /* Check for the error flag IFF N == 0, so that we don't
155 exit the loop after a partial read due to e.g., EAGAIN
156 or EWOULDBLOCK. */
157 if (ferror (stream))
158 return 1;
159 goto process_partial_block;
160 }
161
162 /* We've read at least one byte, so ignore errors. But always
163 check for EOF, since feof may be true even though N > 0.
164 Otherwise, we could end up calling fread after EOF. */
165 if (feof (stream))
166 goto process_partial_block;
167 }
168
169 /* Process buffer with BLOCKSIZE bytes. Note that
170 BLOCKSIZE % 64 == 0
171 */
172 sha1_process_block (buffer, BLOCKSIZE, &ctx);
173 }
174
175 process_partial_block:;
176
177 /* Process any remaining bytes. */
178 if (sum > 0)
179 sha1_process_bytes (buffer, sum, &ctx);
180
181 /* Construct result in desired memory. */
182 sha1_finish_ctx (&ctx, resblock);
183 return 0;
184}
185
186/* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The
187 result is always in little endian byte order, so that a byte-wise
188 output yields to the wanted ASCII representation of the message
189 digest. */
190void *
191sha1_buffer (const char *buffer, size_t len, void *resblock)
192{
193 struct sha1_ctx ctx;
194
195 /* Initialize the computation context. */
196 sha1_init_ctx (&ctx);
197
198 /* Process whole buffer but last len % 64 bytes. */
199 sha1_process_bytes (buffer, len, &ctx);
200
201 /* Put result in desired memory area. */
202 return sha1_finish_ctx (&ctx, resblock);
203}
204
205void
206sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
207{
208 /* When we already have some bits in our internal buffer concatenate
209 both inputs first. */
210 if (ctx->buflen != 0)
211 {
212 size_t left_over = ctx->buflen;
213 size_t add = 128 - left_over > len ? len : 128 - left_over;
214
215 memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
216 ctx->buflen += add;
217
218 if (ctx->buflen > 64)
219 {
220 sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
221
222 ctx->buflen &= 63;
223 /* The regions in the following copy operation cannot overlap. */
224 memcpy (ctx->buffer,
225 &((char *) ctx->buffer)[(left_over + add) & ~63],
226 ctx->buflen);
227 }
228
229 buffer = (const char *) buffer + add;
230 len -= add;
231 }
232
233 /* Process available complete blocks. */
234 if (len >= 64)
235 {
236#if !_STRING_ARCH_unaligned
237# define alignof(type) offsetof (struct { char c; type x; }, x)
238# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
239 if (UNALIGNED_P (buffer))
240 while (len > 64)
241 {
242 sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
243 buffer = (const char *) buffer + 64;
244 len -= 64;
245 }
246 else
247#endif
248 {
249 sha1_process_block (buffer, len & ~63, ctx);
250 buffer = (const char *) buffer + (len & ~63);
251 len &= 63;
252 }
253 }
254
255 /* Move remaining bytes in internal buffer. */
256 if (len > 0)
257 {
258 size_t left_over = ctx->buflen;
259
260 memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
261 left_over += len;
262 if (left_over >= 64)
263 {
264 sha1_process_block (ctx->buffer, 64, ctx);
265 left_over -= 64;
f6e9c1c9 266 memmove (ctx->buffer, &ctx->buffer[16], left_over);
598848e4
ILT
267 }
268 ctx->buflen = left_over;
269 }
270}
271
272/* --- Code below is the primary difference between md5.c and sha1.c --- */
273
274/* SHA1 round constants */
275#define K1 0x5a827999
276#define K2 0x6ed9eba1
277#define K3 0x8f1bbcdc
278#define K4 0xca62c1d6
279
280/* Round functions. Note that F2 is the same as F4. */
281#define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
282#define F2(B,C,D) (B ^ C ^ D)
283#define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
284#define F4(B,C,D) (B ^ C ^ D)
285
286/* Process LEN bytes of BUFFER, accumulating context into CTX.
287 It is assumed that LEN % 64 == 0.
288 Most of this code comes from GnuPG's cipher/sha1.c. */
289
290void
291sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
292{
293 const sha1_uint32 *words = (const sha1_uint32*) buffer;
294 size_t nwords = len / sizeof (sha1_uint32);
295 const sha1_uint32 *endp = words + nwords;
296 sha1_uint32 x[16];
297 sha1_uint32 a = ctx->A;
298 sha1_uint32 b = ctx->B;
299 sha1_uint32 c = ctx->C;
300 sha1_uint32 d = ctx->D;
301 sha1_uint32 e = ctx->E;
302
303 /* First increment the byte count. RFC 1321 specifies the possible
304 length of the file up to 2^64 bits. Here we only compute the
305 number of bytes. Do a double word increment. */
306 ctx->total[0] += len;
1d77deec 307 ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
598848e4
ILT
308
309#define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
310
311#define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \
312 ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
313 , (x[I&0x0f] = rol(tm, 1)) )
314
315#define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \
316 + F( B, C, D ) \
317 + K \
318 + M; \
319 B = rol( B, 30 ); \
320 } while(0)
321
322 while (words < endp)
323 {
324 sha1_uint32 tm;
325 int t;
326 for (t = 0; t < 16; t++)
327 {
328 x[t] = SWAP (*words);
329 words++;
330 }
331
332 R( a, b, c, d, e, F1, K1, x[ 0] );
333 R( e, a, b, c, d, F1, K1, x[ 1] );
334 R( d, e, a, b, c, F1, K1, x[ 2] );
335 R( c, d, e, a, b, F1, K1, x[ 3] );
336 R( b, c, d, e, a, F1, K1, x[ 4] );
337 R( a, b, c, d, e, F1, K1, x[ 5] );
338 R( e, a, b, c, d, F1, K1, x[ 6] );
339 R( d, e, a, b, c, F1, K1, x[ 7] );
340 R( c, d, e, a, b, F1, K1, x[ 8] );
341 R( b, c, d, e, a, F1, K1, x[ 9] );
342 R( a, b, c, d, e, F1, K1, x[10] );
343 R( e, a, b, c, d, F1, K1, x[11] );
344 R( d, e, a, b, c, F1, K1, x[12] );
345 R( c, d, e, a, b, F1, K1, x[13] );
346 R( b, c, d, e, a, F1, K1, x[14] );
347 R( a, b, c, d, e, F1, K1, x[15] );
348 R( e, a, b, c, d, F1, K1, M(16) );
349 R( d, e, a, b, c, F1, K1, M(17) );
350 R( c, d, e, a, b, F1, K1, M(18) );
351 R( b, c, d, e, a, F1, K1, M(19) );
352 R( a, b, c, d, e, F2, K2, M(20) );
353 R( e, a, b, c, d, F2, K2, M(21) );
354 R( d, e, a, b, c, F2, K2, M(22) );
355 R( c, d, e, a, b, F2, K2, M(23) );
356 R( b, c, d, e, a, F2, K2, M(24) );
357 R( a, b, c, d, e, F2, K2, M(25) );
358 R( e, a, b, c, d, F2, K2, M(26) );
359 R( d, e, a, b, c, F2, K2, M(27) );
360 R( c, d, e, a, b, F2, K2, M(28) );
361 R( b, c, d, e, a, F2, K2, M(29) );
362 R( a, b, c, d, e, F2, K2, M(30) );
363 R( e, a, b, c, d, F2, K2, M(31) );
364 R( d, e, a, b, c, F2, K2, M(32) );
365 R( c, d, e, a, b, F2, K2, M(33) );
366 R( b, c, d, e, a, F2, K2, M(34) );
367 R( a, b, c, d, e, F2, K2, M(35) );
368 R( e, a, b, c, d, F2, K2, M(36) );
369 R( d, e, a, b, c, F2, K2, M(37) );
370 R( c, d, e, a, b, F2, K2, M(38) );
371 R( b, c, d, e, a, F2, K2, M(39) );
372 R( a, b, c, d, e, F3, K3, M(40) );
373 R( e, a, b, c, d, F3, K3, M(41) );
374 R( d, e, a, b, c, F3, K3, M(42) );
375 R( c, d, e, a, b, F3, K3, M(43) );
376 R( b, c, d, e, a, F3, K3, M(44) );
377 R( a, b, c, d, e, F3, K3, M(45) );
378 R( e, a, b, c, d, F3, K3, M(46) );
379 R( d, e, a, b, c, F3, K3, M(47) );
380 R( c, d, e, a, b, F3, K3, M(48) );
381 R( b, c, d, e, a, F3, K3, M(49) );
382 R( a, b, c, d, e, F3, K3, M(50) );
383 R( e, a, b, c, d, F3, K3, M(51) );
384 R( d, e, a, b, c, F3, K3, M(52) );
385 R( c, d, e, a, b, F3, K3, M(53) );
386 R( b, c, d, e, a, F3, K3, M(54) );
387 R( a, b, c, d, e, F3, K3, M(55) );
388 R( e, a, b, c, d, F3, K3, M(56) );
389 R( d, e, a, b, c, F3, K3, M(57) );
390 R( c, d, e, a, b, F3, K3, M(58) );
391 R( b, c, d, e, a, F3, K3, M(59) );
392 R( a, b, c, d, e, F4, K4, M(60) );
393 R( e, a, b, c, d, F4, K4, M(61) );
394 R( d, e, a, b, c, F4, K4, M(62) );
395 R( c, d, e, a, b, F4, K4, M(63) );
396 R( b, c, d, e, a, F4, K4, M(64) );
397 R( a, b, c, d, e, F4, K4, M(65) );
398 R( e, a, b, c, d, F4, K4, M(66) );
399 R( d, e, a, b, c, F4, K4, M(67) );
400 R( c, d, e, a, b, F4, K4, M(68) );
401 R( b, c, d, e, a, F4, K4, M(69) );
402 R( a, b, c, d, e, F4, K4, M(70) );
403 R( e, a, b, c, d, F4, K4, M(71) );
404 R( d, e, a, b, c, F4, K4, M(72) );
405 R( c, d, e, a, b, F4, K4, M(73) );
406 R( b, c, d, e, a, F4, K4, M(74) );
407 R( a, b, c, d, e, F4, K4, M(75) );
408 R( e, a, b, c, d, F4, K4, M(76) );
409 R( d, e, a, b, c, F4, K4, M(77) );
410 R( c, d, e, a, b, F4, K4, M(78) );
411 R( b, c, d, e, a, F4, K4, M(79) );
412
413 a = ctx->A += a;
414 b = ctx->B += b;
415 c = ctx->C += c;
416 d = ctx->D += d;
417 e = ctx->E += e;
418 }
419}
bf4f40cc
JJ
420
421#if defined(HAVE_X86_SHA1_HW_SUPPORT)
422/* HW specific version of sha1_process_bytes. */
423
424static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
425
426static void
427sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
428{
429 /* When we already have some bits in our internal buffer concatenate
430 both inputs first. */
431 if (ctx->buflen != 0)
432 {
433 size_t left_over = ctx->buflen;
434 size_t add = 128 - left_over > len ? len : 128 - left_over;
435
436 memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
437 ctx->buflen += add;
438
439 if (ctx->buflen > 64)
440 {
441 sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
442
443 ctx->buflen &= 63;
444 /* The regions in the following copy operation cannot overlap. */
445 memcpy (ctx->buffer,
446 &((char *) ctx->buffer)[(left_over + add) & ~63],
447 ctx->buflen);
448 }
449
450 buffer = (const char *) buffer + add;
451 len -= add;
452 }
453
454 /* Process available complete blocks. */
455 if (len >= 64)
456 {
457#if !_STRING_ARCH_unaligned
458# define alignof(type) offsetof (struct { char c; type x; }, x)
459# define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
460 if (UNALIGNED_P (buffer))
461 while (len > 64)
462 {
463 sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
464 buffer = (const char *) buffer + 64;
465 len -= 64;
466 }
467 else
468#endif
469 {
470 sha1_hw_process_block (buffer, len & ~63, ctx);
471 buffer = (const char *) buffer + (len & ~63);
472 len &= 63;
473 }
474 }
475
476 /* Move remaining bytes in internal buffer. */
477 if (len > 0)
478 {
479 size_t left_over = ctx->buflen;
480
481 memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
482 left_over += len;
483 if (left_over >= 64)
484 {
485 sha1_hw_process_block (ctx->buffer, 64, ctx);
486 left_over -= 64;
487 memmove (ctx->buffer, &ctx->buffer[16], left_over);
488 }
489 ctx->buflen = left_over;
490 }
491}
492
493/* Process LEN bytes of BUFFER, accumulating context into CTX.
494 Using CPU specific intrinsics. */
495
496#ifdef HAVE_X86_SHA1_HW_SUPPORT
497__attribute__((__target__ ("sse4.1,sha")))
498#endif
499static void
500sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
501{
502#ifdef HAVE_X86_SHA1_HW_SUPPORT
503 /* Implemented from
504 https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html */
505 const __m128i *words = (const __m128i *) buffer;
506 const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
507 __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
508 const __m128i shuf_mask
509 = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
510 char check[((offsetof (struct sha1_ctx, B)
511 == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
512 && (offsetof (struct sha1_ctx, C)
513 == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
514 && (offsetof (struct sha1_ctx, D)
515 == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
516 ? 1 : -1];
517
518 /* First increment the byte count. RFC 1321 specifies the possible
519 length of the file up to 2^64 bits. Here we only compute the
520 number of bytes. Do a double word increment. */
521 ctx->total[0] += len;
522 ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
523
524 (void) &check[0];
525 abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
526 e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
527 abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
528
529 while (words < endp)
530 {
531 abcd_save = abcd;
532 e0_save = e0;
533
534 /* 0..3 */
535 msg0 = _mm_loadu_si128 (words);
536 msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
537 e0 = _mm_add_epi32 (e0, msg0);
538 e1 = abcd;
539 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
540
541 /* 4..7 */
542 msg1 = _mm_loadu_si128 (words + 1);
543 msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
544 e1 = _mm_sha1nexte_epu32 (e1, msg1);
545 e0 = abcd;
546 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
547 msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
548
549 /* 8..11 */
550 msg2 = _mm_loadu_si128 (words + 2);
551 msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
552 e0 = _mm_sha1nexte_epu32 (e0, msg2);
553 e1 = abcd;
554 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
555 msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
556 msg0 = _mm_xor_si128 (msg0, msg2);
557
558 /* 12..15 */
559 msg3 = _mm_loadu_si128 (words + 3);
560 msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
561 e1 = _mm_sha1nexte_epu32 (e1, msg3);
562 e0 = abcd;
563 msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
564 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
565 msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
566 msg1 = _mm_xor_si128 (msg1, msg3);
567
568 /* 16..19 */
569 e0 = _mm_sha1nexte_epu32 (e0, msg0);
570 e1 = abcd;
571 msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
572 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
573 msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
574 msg2 = _mm_xor_si128 (msg2, msg0);
575
576 /* 20..23 */
577 e1 = _mm_sha1nexte_epu32 (e1, msg1);
578 e0 = abcd;
579 msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
580 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
581 msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
582 msg3 = _mm_xor_si128 (msg3, msg1);
583
584 /* 24..27 */
585 e0 = _mm_sha1nexte_epu32 (e0, msg2);
586 e1 = abcd;
587 msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
588 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
589 msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
590 msg0 = _mm_xor_si128 (msg0, msg2);
591
592 /* 28..31 */
593 e1 = _mm_sha1nexte_epu32 (e1, msg3);
594 e0 = abcd;
595 msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
596 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
597 msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
598 msg1 = _mm_xor_si128 (msg1, msg3);
599
600 /* 32..35 */
601 e0 = _mm_sha1nexte_epu32 (e0, msg0);
602 e1 = abcd;
603 msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
604 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
605 msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
606 msg2 = _mm_xor_si128 (msg2, msg0);
607
608 /* 36..39 */
609 e1 = _mm_sha1nexte_epu32 (e1, msg1);
610 e0 = abcd;
611 msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
612 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
613 msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
614 msg3 = _mm_xor_si128 (msg3, msg1);
615
616 /* 40..43 */
617 e0 = _mm_sha1nexte_epu32 (e0, msg2);
618 e1 = abcd;
619 msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
620 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
621 msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
622 msg0 = _mm_xor_si128 (msg0, msg2);
623
624 /* 44..47 */
625 e1 = _mm_sha1nexte_epu32 (e1, msg3);
626 e0 = abcd;
627 msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
628 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
629 msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
630 msg1 = _mm_xor_si128 (msg1, msg3);
631
632 /* 48..51 */
633 e0 = _mm_sha1nexte_epu32 (e0, msg0);
634 e1 = abcd;
635 msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
636 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
637 msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
638 msg2 = _mm_xor_si128 (msg2, msg0);
639
640 /* 52..55 */
641 e1 = _mm_sha1nexte_epu32 (e1, msg1);
642 e0 = abcd;
643 msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
644 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
645 msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
646 msg3 = _mm_xor_si128 (msg3, msg1);
647
648 /* 56..59 */
649 e0 = _mm_sha1nexte_epu32 (e0, msg2);
650 e1 = abcd;
651 msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
652 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
653 msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
654 msg0 = _mm_xor_si128 (msg0, msg2);
655
656 /* 60..63 */
657 e1 = _mm_sha1nexte_epu32 (e1, msg3);
658 e0 = abcd;
659 msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
660 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
661 msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
662 msg1 = _mm_xor_si128 (msg1, msg3);
663
664 /* 64..67 */
665 e0 = _mm_sha1nexte_epu32 (e0, msg0);
666 e1 = abcd;
667 msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
668 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
669 msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
670 msg2 = _mm_xor_si128 (msg2, msg0);
671
672 /* 68..71 */
673 e1 = _mm_sha1nexte_epu32 (e1, msg1);
674 e0 = abcd;
675 msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
676 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
677 msg3 = _mm_xor_si128 (msg3, msg1);
678
679 /* 72..75 */
680 e0 = _mm_sha1nexte_epu32 (e0, msg2);
681 e1 = abcd;
682 msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
683 abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
684
685 /* 76..79 */
686 e1 = _mm_sha1nexte_epu32 (e1, msg3);
687 e0 = abcd;
688 abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
689
690 /* Finalize. */
691 e0 = _mm_sha1nexte_epu32 (e0, e0_save);
692 abcd = _mm_add_epi32 (abcd, abcd_save);
693
694 words = words + 4;
695 }
696
697 abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
698 _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
699 ctx->E = _mm_extract_epi32 (e0, 3);
700#endif
701}
702#endif
703
704/* Return sha1_process_bytes or some hardware optimized version thereof
705 depending on current CPU. */
706
707sha1_process_bytes_fn
708sha1_choose_process_bytes (void)
709{
710#ifdef HAVE_X86_SHA1_HW_SUPPORT
711 unsigned int eax, ebx, ecx, edx;
712 if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
713 && (ebx & bit_SHA) != 0
714 && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
715 && (ecx & bit_SSE4_1) != 0)
716 return sha1_hw_process_bytes;
717#endif
718 return sha1_process_bytes;
719}