]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sha/sha512.c
+20% SHA512 performance improvement on x86.
[thirdparty/openssl.git] / crypto / sha / sha512.c
1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 * on [aligned] data in host byte order and one - on data in input
17 * stream byte order;
18 * - share common byte-order neutral collector and padding function
19 * implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 * there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 * *aligned* data in input stream byte order, big-endian in this case]
28 * we minimize burden of maintenance in two ways: a) collector/padding
29 * function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 * apply a number of optimizations to mitigate potential performance
32 * penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 * <appro@fy.chalmers.se>
43 */
44 #include <stdlib.h>
45 #include <string.h>
46
47 #include <openssl/crypto.h>
48 #include <openssl/sha.h>
49 #include <openssl/opensslv.h>
50
51 #include "cryptlib.h"
52
53 const char *SHA512_version="SHA-512" OPENSSL_VERSION_PTEXT;
54
55 #if defined(_M_IX86) || defined(_M_AMD64) || defined(__i386) || defined(__x86_64)
56 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
57 #endif
58
59 int SHA384_Init (SHA512_CTX *c)
60 {
61 c->h[0]=U64(0xcbbb9d5dc1059ed8);
62 c->h[1]=U64(0x629a292a367cd507);
63 c->h[2]=U64(0x9159015a3070dd17);
64 c->h[3]=U64(0x152fecd8f70e5939);
65 c->h[4]=U64(0x67332667ffc00b31);
66 c->h[5]=U64(0x8eb44a8768581511);
67 c->h[6]=U64(0xdb0c2e0d64f98fa7);
68 c->h[7]=U64(0x47b5481dbefa4fa4);
69 c->Nl=0; c->Nh=0;
70 c->num=0; c->md_len=SHA384_DIGEST_LENGTH;
71 return 1;
72 }
73
74 int SHA512_Init (SHA512_CTX *c)
75 {
76 c->h[0]=U64(0x6a09e667f3bcc908);
77 c->h[1]=U64(0xbb67ae8584caa73b);
78 c->h[2]=U64(0x3c6ef372fe94f82b);
79 c->h[3]=U64(0xa54ff53a5f1d36f1);
80 c->h[4]=U64(0x510e527fade682d1);
81 c->h[5]=U64(0x9b05688c2b3e6c1f);
82 c->h[6]=U64(0x1f83d9abfb41bd6b);
83 c->h[7]=U64(0x5be0cd19137e2179);
84 c->Nl=0; c->Nh=0;
85 c->num=0; c->md_len=SHA512_DIGEST_LENGTH;
86 return 1;
87 }
88
89 #ifndef SHA512_ASM
90 static
91 #endif
92 void sha512_block (SHA512_CTX *ctx, const void *in, size_t num);
93
94 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
95 {
96 unsigned char *p=(unsigned char *)c->u.p;
97 size_t n=c->num;
98
99 p[n]=0x80; /* There always is a room for one */
100 n++;
101 if (n > (sizeof(c->u)-16))
102 memset (p+n,0,sizeof(c->u)-n), n=0,
103 sha512_block (c,p,1);
104
105 memset (p+n,0,sizeof(c->u)-16-n);
106 #ifdef B_ENDIAN
107 c->u.d[SHA_LBLOCK-2] = c->Nh;
108 c->u.d[SHA_LBLOCK-1] = c->Nl;
109 #else
110 p[sizeof(c->u)-1] = (unsigned char)(c->Nl);
111 p[sizeof(c->u)-2] = (unsigned char)(c->Nl>>8);
112 p[sizeof(c->u)-3] = (unsigned char)(c->Nl>>16);
113 p[sizeof(c->u)-4] = (unsigned char)(c->Nl>>24);
114 p[sizeof(c->u)-5] = (unsigned char)(c->Nl>>32);
115 p[sizeof(c->u)-6] = (unsigned char)(c->Nl>>40);
116 p[sizeof(c->u)-7] = (unsigned char)(c->Nl>>48);
117 p[sizeof(c->u)-8] = (unsigned char)(c->Nl>>56);
118 p[sizeof(c->u)-9] = (unsigned char)(c->Nh);
119 p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
120 p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
121 p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
122 p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
123 p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
124 p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
125 p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
126 #endif
127
128 sha512_block (c,p,1);
129
130 if (md==0) return 0;
131
132 switch (c->md_len)
133 {
134 /* Let compiler decide if it's appropriate to unroll... */
135 case SHA384_DIGEST_LENGTH:
136 for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
137 {
138 SHA_LONG64 t = c->h[n];
139
140 *(md++) = (unsigned char)(t>>56);
141 *(md++) = (unsigned char)(t>>48);
142 *(md++) = (unsigned char)(t>>40);
143 *(md++) = (unsigned char)(t>>32);
144 *(md++) = (unsigned char)(t>>24);
145 *(md++) = (unsigned char)(t>>16);
146 *(md++) = (unsigned char)(t>>8);
147 *(md++) = (unsigned char)(t);
148 }
149 break;
150 case SHA512_DIGEST_LENGTH:
151 for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
152 {
153 SHA_LONG64 t = c->h[n];
154
155 *(md++) = (unsigned char)(t>>56);
156 *(md++) = (unsigned char)(t>>48);
157 *(md++) = (unsigned char)(t>>40);
158 *(md++) = (unsigned char)(t>>32);
159 *(md++) = (unsigned char)(t>>24);
160 *(md++) = (unsigned char)(t>>16);
161 *(md++) = (unsigned char)(t>>8);
162 *(md++) = (unsigned char)(t);
163 }
164 break;
165 /* ... as well as make sure md_len is not abused. */
166 default: return 0;
167 }
168
169 return 1;
170 }
171
172 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
173 { return SHA512_Final (md,c); }
174
175 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
176 {
177 SHA_LONG64 l;
178 unsigned char *p=c->u.p;
179 const unsigned char *data=(const unsigned char *)_data;
180
181 if (len==0) return 1;
182
183 l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
184 if (l < c->Nl) c->Nh++;
185 if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
186 c->Nl=l;
187
188 if (c->num != 0)
189 {
190 size_t n = sizeof(c->u) - c->num;
191
192 if (len < n)
193 {
194 memcpy (p+c->num,data,len), c->num += len;
195 return 1;
196 }
197 else {
198 memcpy (p+c->num,data,n), c->num = 0;
199 len-=n, data+=n;
200 sha512_block (c,p,1);
201 }
202 }
203
204 if (len >= sizeof(c->u))
205 {
206 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
207 if ((size_t)data%sizeof(c->u.d[0]) != 0)
208 while (len >= sizeof(c->u))
209 memcpy (p,data,sizeof(c->u)),
210 sha512_block (c,p,1),
211 len -= sizeof(c->u),
212 data += sizeof(c->u);
213 else
214 #endif
215 sha512_block (c,data,len/sizeof(c->u)),
216 data += len,
217 len %= sizeof(c->u),
218 data -= len;
219 }
220
221 if (len != 0) memcpy (p,data,len), c->num = (int)len;
222
223 return 1;
224 }
225
226 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
227 { return SHA512_Update (c,data,len); }
228
229 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
230 { sha512_block (c,data,1); }
231
232 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
233 {
234 SHA512_CTX c;
235 static unsigned char m[SHA384_DIGEST_LENGTH];
236
237 if (md == NULL) md=m;
238 SHA384_Init(&c);
239 SHA512_Update(&c,d,n);
240 SHA512_Final(md,&c);
241 OPENSSL_cleanse(&c,sizeof(c));
242 return(md);
243 }
244
245 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
246 {
247 SHA512_CTX c;
248 static unsigned char m[SHA512_DIGEST_LENGTH];
249
250 if (md == NULL) md=m;
251 SHA512_Init(&c);
252 SHA512_Update(&c,d,n);
253 SHA512_Final(md,&c);
254 OPENSSL_cleanse(&c,sizeof(c));
255 return(md);
256 }
257
258 #ifndef SHA512_ASM
259 static const SHA_LONG64 K512[80] = {
260 U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
261 U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
262 U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
263 U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
264 U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
265 U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
266 U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
267 U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
268 U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
269 U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
270 U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
271 U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
272 U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
273 U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
274 U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
275 U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
276 U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
277 U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
278 U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
279 U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
280 U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
281 U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
282 U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
283 U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
284 U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
285 U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
286 U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
287 U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
288 U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
289 U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
290 U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
291 U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
292 U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
293 U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
294 U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
295 U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
296 U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
297 U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
298 U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
299 U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
300
301 #ifndef PEDANTIC
302 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
303 # if defined(__x86_64) || defined(__x86_64__)
304 # define ROTR(a,n) ({ unsigned long ret; \
305 asm ("rorq %1,%0" \
306 : "=r"(ret) \
307 : "J"(n),"0"(a) \
308 : "cc"); ret; })
309 # if !defined(B_ENDIAN)
310 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
311 asm ("bswapq %0" \
312 : "=r"(ret) \
313 : "0"(ret)); ret; })
314 # endif
315 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
316 # if defined(I386_ONLY)
317 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
318 unsigned int hi,lo; \
319 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
320 "roll $16,%%eax; roll $16,%%edx; "\
321 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
322 : "=a"(lo),"=d"(hi) \
323 : "0"(p[1]),"1"(p[0]) : "cc"); \
324 ((SHA_LONG64)hi)<<32|lo; })
325 # else
326 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
327 unsigned int hi,lo; \
328 asm ("bswapl %0; bswapl %1;" \
329 : "=r"(lo),"=r"(hi) \
330 : "0"(p[1]),"1"(p[0])); \
331 ((SHA_LONG64)hi)<<32|lo; })
332 # endif
333 # elif defined(_ARCH_PPC) && defined(__64BIT__)
334 # define ROTR(a,n) ({ unsigned long ret; \
335 asm ("rotrdi %0,%1,%2" \
336 : "=r"(ret) \
337 : "r"(a),"K"(n)); ret; })
338 # endif
339 # elif defined(_MSC_VER)
340 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
341 # define ROTR(a,n) _rotr64((a),n)
342 # endif
343 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
344 # if defined(I386_ONLY)
345 static SHA_LONG64 __fastcall __pull64be(const void *x)
346 { _asm mov edx, [ecx + 0]
347 _asm mov eax, [ecx + 4]
348 _asm xchg dh,dl
349 _asm xchg ah,al
350 _asm rol edx,16
351 _asm rol eax,16
352 _asm xchg dh,dl
353 _asm xchg ah,al
354 }
355 # else
356 static SHA_LONG64 __fastcall __pull64be(const void *x)
357 { _asm mov edx, [ecx + 0]
358 _asm mov eax, [ecx + 4]
359 _asm bswap edx
360 _asm bswap eax
361 }
362 # endif
363 # define PULL64(x) __pull64be(&(x))
364 # endif
365 # endif
366 #endif
367
368 #ifndef PULL64
369 #define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
370 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
371 #endif
372
373 #ifndef ROTR
374 #define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
375 #endif
376
377 #define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
378 #define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
379 #define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
380 #define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
381
382 #define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
383 #define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
384
385 #if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
386 #define GO_FOR_SSE2(ctx,in,num) do { \
387 void sha512_block_sse2(void *,const void *,size_t); \
388 if (!(OPENSSL_ia32cap_P & (1<<26))) break; \
389 sha512_block_sse2(ctx->h,in,num); return; \
390 } while (0)
391 #endif
392
393 #ifdef OPENSSL_SMALL_FOOTPRINT
394
395 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
396 {
397 const SHA_LONG64 *W=in;
398 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
399 SHA_LONG64 X[16];
400 int i;
401
402 #ifdef GO_FOR_SSE2
403 GO_FOR_SSE2(ctx,in,num);
404 #endif
405
406 while (num--) {
407
408 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
409 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
410
411 for (i=0;i<16;i++)
412 {
413 #ifdef B_ENDIAN
414 T1 = X[i] = W[i];
415 #else
416 T1 = X[i] = PULL64(W[i]);
417 #endif
418 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
419 T2 = Sigma0(a) + Maj(a,b,c);
420 h = g; g = f; f = e; e = d + T1;
421 d = c; c = b; b = a; a = T1 + T2;
422 }
423
424 for (;i<80;i++)
425 {
426 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
427 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
428
429 T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
430 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
431 T2 = Sigma0(a) + Maj(a,b,c);
432 h = g; g = f; f = e; e = d + T1;
433 d = c; c = b; b = a; a = T1 + T2;
434 }
435
436 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
437 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
438
439 W+=SHA_LBLOCK;
440 }
441 }
442
443 #else
444
445 #define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
446 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
447 h = Sigma0(a) + Maj(a,b,c); \
448 d += T1; h += T1; } while (0)
449
450 #define ROUND_16_80(i,a,b,c,d,e,f,g,h,X) do { \
451 s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
452 s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
453 T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
454 ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
455
456 static void sha512_block (SHA512_CTX *ctx, const void *in, size_t num)
457 {
458 const SHA_LONG64 *W=in;
459 SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
460 SHA_LONG64 X[16];
461 int i;
462
463 #ifdef GO_FOR_SSE2
464 GO_FOR_SSE2(ctx,in,num);
465 #endif
466
467 while (num--) {
468
469 a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
470 e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
471
472 #ifdef B_ENDIAN
473 T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
474 T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
475 T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
476 T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
477 T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
478 T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
479 T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
480 T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
481 T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
482 T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
483 T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
484 T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
485 T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
486 T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
487 T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
488 T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
489 #else
490 T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
491 T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
492 T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
493 T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
494 T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
495 T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
496 T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
497 T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
498 T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
499 T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
500 T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
501 T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
502 T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
503 T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
504 T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
505 T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
506 #endif
507
508 for (i=16;i<80;i+=8)
509 {
510 ROUND_16_80(i+0,a,b,c,d,e,f,g,h,X);
511 ROUND_16_80(i+1,h,a,b,c,d,e,f,g,X);
512 ROUND_16_80(i+2,g,h,a,b,c,d,e,f,X);
513 ROUND_16_80(i+3,f,g,h,a,b,c,d,e,X);
514 ROUND_16_80(i+4,e,f,g,h,a,b,c,d,X);
515 ROUND_16_80(i+5,d,e,f,g,h,a,b,c,X);
516 ROUND_16_80(i+6,c,d,e,f,g,h,a,b,X);
517 ROUND_16_80(i+7,b,c,d,e,f,g,h,a,X);
518 }
519
520 ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
521 ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
522
523 W+=SHA_LBLOCK;
524 }
525 }
526
527 #endif
528
529 #endif /* SHA512_ASM */
530
531 #endif /* OPENSSL_NO_SHA512 */