]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/sha512.c
Update copyright year
[thirdparty/openssl.git] / crypto / sha / sha512.c
CommitLineData
b1322259 1/*
fecb3aae 2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
b1322259 3 *
a598ed0d 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
b1322259
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
c842261b 8 */
b1322259 9
85d843c8
P
10/*
11 * SHA512 low level APIs are deprecated for public use, but still ok for
12 * internal use.
13 */
14#include "internal/deprecated.h"
15
16#include <stdio.h>
165fca51 17#include <openssl/opensslconf.h>
1d97c843 18/*-
c842261b
AP
19 * IMPLEMENTATION NOTES.
20 *
21 * As you might have noticed 32-bit hash algorithms:
22 *
fcf64ba0 23 * - permit SHA_LONG to be wider than 32-bit
c842261b
AP
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order and one - on data in input
26 * stream byte order;
27 * - share common byte-order neutral collector and padding function
1cc94e2f 28 * implementations, crypto/md32_common.h;
c842261b
AP
29 *
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
32 *
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
42 *
43 * Caveat lector.
44 *
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
50 * 16-bit platforms.
c842261b 51 */
474e469b
RS
52#include <stdlib.h>
53#include <string.h>
c842261b 54
474e469b
RS
55#include <openssl/crypto.h>
56#include <openssl/sha.h>
57#include <openssl/opensslv.h>
c842261b 58
b39fc560 59#include "internal/cryptlib.h"
25f2138b 60#include "crypto/sha.h"
2b247cf8 61
474e469b 62#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
b38c0add 63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
1a42839b 64 defined(__s390__) || defined(__s390x__) || \
039081b8 65 defined(__aarch64__) || \
1a42839b 66 defined(SHA512_ASM)
474e469b
RS
67# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68#endif
c842261b 69
0969e259 70#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71# define U64(C) C##UI64
72#elif defined(__arch64__)
73# define U64(C) C##UL
74#else
75# define U64(C) C##ULL
76#endif
77
4bed94f0
P
78int sha512_224_init(SHA512_CTX *c)
79{
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
88
89 c->Nl = 0;
90 c->Nh = 0;
91 c->num = 0;
92 c->md_len = SHA224_DIGEST_LENGTH;
93 return 1;
94}
95
96int sha512_256_init(SHA512_CTX *c)
97{
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107 c->Nl = 0;
108 c->Nh = 0;
109 c->num = 0;
110 c->md_len = SHA256_DIGEST_LENGTH;
111 return 1;
112}
113
0f113f3e
MC
114int SHA384_Init(SHA512_CTX *c)
115{
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
124
125 c->Nl = 0;
126 c->Nh = 0;
127 c->num = 0;
128 c->md_len = SHA384_DIGEST_LENGTH;
129 return 1;
130}
131
132int SHA512_Init(SHA512_CTX *c)
133{
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
142
143 c->Nl = 0;
144 c->Nh = 0;
145 c->num = 0;
146 c->md_len = SHA512_DIGEST_LENGTH;
147 return 1;
148}
149
474e469b 150#ifndef SHA512_ASM
859ceeeb 151static
474e469b 152#endif
0f113f3e
MC
153void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156{
157 unsigned char *p = (unsigned char *)c->u.p;
158 size_t n = c->num;
159
160 p[n] = 0x80; /* There always is a room for one */
161 n++;
16f8d4eb
RS
162 if (n > (sizeof(c->u) - 16)) {
163 memset(p + n, 0, sizeof(c->u) - n);
164 n = 0;
165 sha512_block_data_order(c, p, 1);
166 }
0f113f3e
MC
167
168 memset(p + n, 0, sizeof(c->u) - 16 - n);
474e469b 169#ifdef B_ENDIAN
0f113f3e
MC
170 c->u.d[SHA_LBLOCK - 2] = c->Nh;
171 c->u.d[SHA_LBLOCK - 1] = c->Nl;
474e469b 172#else
0f113f3e
MC
173 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
474e469b 189#endif
0f113f3e
MC
190
191 sha512_block_data_order(c, p, 1);
192
193 if (md == 0)
194 return 0;
195
196 switch (c->md_len) {
ce1932f2 197 /* Let compiler decide if it's appropriate to unroll... */
4bed94f0
P
198 case SHA224_DIGEST_LENGTH:
199 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200 SHA_LONG64 t = c->h[n];
201
202 *(md++) = (unsigned char)(t >> 56);
203 *(md++) = (unsigned char)(t >> 48);
204 *(md++) = (unsigned char)(t >> 40);
205 *(md++) = (unsigned char)(t >> 32);
206 *(md++) = (unsigned char)(t >> 24);
207 *(md++) = (unsigned char)(t >> 16);
208 *(md++) = (unsigned char)(t >> 8);
209 *(md++) = (unsigned char)(t);
210 }
211 /*
212 * For 224 bits, there are four bytes left over that have to be
213 * processed separately.
214 */
215 {
216 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218 *(md++) = (unsigned char)(t >> 56);
219 *(md++) = (unsigned char)(t >> 48);
220 *(md++) = (unsigned char)(t >> 40);
221 *(md++) = (unsigned char)(t >> 32);
222 }
223 break;
224 case SHA256_DIGEST_LENGTH:
225 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226 SHA_LONG64 t = c->h[n];
227
228 *(md++) = (unsigned char)(t >> 56);
229 *(md++) = (unsigned char)(t >> 48);
230 *(md++) = (unsigned char)(t >> 40);
231 *(md++) = (unsigned char)(t >> 32);
232 *(md++) = (unsigned char)(t >> 24);
233 *(md++) = (unsigned char)(t >> 16);
234 *(md++) = (unsigned char)(t >> 8);
235 *(md++) = (unsigned char)(t);
236 }
237 break;
0f113f3e
MC
238 case SHA384_DIGEST_LENGTH:
239 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240 SHA_LONG64 t = c->h[n];
241
242 *(md++) = (unsigned char)(t >> 56);
243 *(md++) = (unsigned char)(t >> 48);
244 *(md++) = (unsigned char)(t >> 40);
245 *(md++) = (unsigned char)(t >> 32);
246 *(md++) = (unsigned char)(t >> 24);
247 *(md++) = (unsigned char)(t >> 16);
248 *(md++) = (unsigned char)(t >> 8);
249 *(md++) = (unsigned char)(t);
250 }
251 break;
252 case SHA512_DIGEST_LENGTH:
253 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254 SHA_LONG64 t = c->h[n];
255
256 *(md++) = (unsigned char)(t >> 56);
257 *(md++) = (unsigned char)(t >> 48);
258 *(md++) = (unsigned char)(t >> 40);
259 *(md++) = (unsigned char)(t >> 32);
260 *(md++) = (unsigned char)(t >> 24);
261 *(md++) = (unsigned char)(t >> 16);
262 *(md++) = (unsigned char)(t >> 8);
263 *(md++) = (unsigned char)(t);
264 }
265 break;
ce1932f2 266 /* ... as well as make sure md_len is not abused. */
0f113f3e
MC
267 default:
268 return 0;
269 }
270
271 return 1;
272}
273
274int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275{
276 return SHA512_Final(md, c);
277}
278
279int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280{
281 SHA_LONG64 l;
282 unsigned char *p = c->u.p;
283 const unsigned char *data = (const unsigned char *)_data;
284
285 if (len == 0)
286 return 1;
287
288 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289 if (l < c->Nl)
290 c->Nh++;
291 if (sizeof(len) >= 8)
292 c->Nh += (((SHA_LONG64) len) >> 61);
293 c->Nl = l;
294
295 if (c->num != 0) {
296 size_t n = sizeof(c->u) - c->num;
297
298 if (len < n) {
299 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300 return 1;
301 } else {
302 memcpy(p + c->num, data, n), c->num = 0;
303 len -= n, data += n;
304 sha512_block_data_order(c, p, 1);
305 }
306 }
307
308 if (len >= sizeof(c->u)) {
474e469b 309#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
0f113f3e
MC
310 if ((size_t)data % sizeof(c->u.d[0]) != 0)
311 while (len >= sizeof(c->u))
312 memcpy(p, data, sizeof(c->u)),
ce1932f2
AP
313 sha512_block_data_order(c, p, 1),
314 len -= sizeof(c->u), data += sizeof(c->u);
0f113f3e 315 else
474e469b 316#endif
0f113f3e 317 sha512_block_data_order(c, data, len / sizeof(c->u)),
ce1932f2 318 data += len, len %= sizeof(c->u), data -= len;
0f113f3e
MC
319 }
320
321 if (len != 0)
322 memcpy(p, data, len), c->num = (int)len;
323
324 return 1;
325}
326
327int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328{
329 return SHA512_Update(c, data, len);
330}
331
332void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333{
474e469b 334#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
0f113f3e
MC
335 if ((size_t)data % sizeof(c->u.d[0]) != 0)
336 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
474e469b 337#endif
0f113f3e
MC
338 sha512_block_data_order(c, data, 1);
339}
c842261b 340
474e469b 341#ifndef SHA512_ASM
c842261b 342static const SHA_LONG64 K512[80] = {
0f113f3e
MC
343 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383};
384
474e469b 385# ifndef PEDANTIC
ce1932f2
AP
386# if defined(__GNUC__) && __GNUC__>=2 && \
387 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
474e469b 388# if defined(__x86_64) || defined(__x86_64__)
ce1932f2 389# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
390 asm ("rorq %1,%0" \
391 : "=r"(ret) \
392 : "J"(n),"0"(a) \
393 : "cc"); ret; })
474e469b
RS
394# if !defined(B_ENDIAN)
395# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
0f113f3e
MC
396 asm ("bswapq %0" \
397 : "=r"(ret) \
398 : "0"(ret)); ret; })
474e469b
RS
399# endif
400# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401# if defined(I386_ONLY)
402# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
ce1932f2 403 unsigned int hi=p[0],lo=p[1]; \
0f113f3e
MC
404 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405 "roll $16,%%eax; roll $16,%%edx; "\
ce1932f2 406 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
0f113f3e
MC
407 : "=a"(lo),"=d"(hi) \
408 : "0"(lo),"1"(hi) : "cc"); \
409 ((SHA_LONG64)hi)<<32|lo; })
474e469b
RS
410# else
411# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
ce1932f2 412 unsigned int hi=p[0],lo=p[1]; \
0f113f3e
MC
413 asm ("bswapl %0; bswapl %1;" \
414 : "=r"(lo),"=r"(hi) \
415 : "0"(lo),"1"(hi)); \
416 ((SHA_LONG64)hi)<<32|lo; })
474e469b
RS
417# endif
418# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
ce1932f2 419# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
420 asm ("rotrdi %0,%1,%2" \
421 : "=r"(ret) \
422 : "r"(a),"K"(n)); ret; })
474e469b 423# elif defined(__aarch64__)
ce1932f2 424# define ROTR(a,n) ({ SHA_LONG64 ret; \
0f113f3e
MC
425 asm ("ror %0,%1,%2" \
426 : "=r"(ret) \
427 : "r"(a),"I"(n)); ret; })
474e469b 428# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
0f113f3e 429 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
ce1932f2 430# define PULL64(x) ({ SHA_LONG64 ret; \
0f113f3e
MC
431 asm ("rev %0,%1" \
432 : "=r"(ret) \
ce1932f2 433 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
0f113f3e 434# endif
611685ad 435# elif (__riscv_zbkb || __riscv_zbb) && __riscv_xlen == 32
436# define PULL64(x) ({ SHA_LONG64 ret; \
437 unsigned int *r = (unsigned int *)(&(ret)); \
438 const unsigned int *p = (const unsigned int *)(&(x)); \
439 asm ("rev8 %0, %1" \
440 : "=r"(r[0]) \
441 : "r" (p[1])); \
442 asm ("rev8 %0, %1" \
443 : "=r"(r[1]) \
444 : "r" (p[0])); ret; })
445# elif (__riscv_zbkb || __riscv_zbb) && __riscv_xlen == 64
446# define PULL64(x) ({ SHA_LONG64 ret; \
447 asm ("rev8 %0, %1" \
448 : "=r"(ret) \
449 : "r"(x)); ret; })
450# endif
451# if __riscv_zknh && __riscv_xlen == 32
452# define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
453 const unsigned int *p = (const unsigned int *)(&(x)); \
454 asm ("sha512sum0r %0, %1, %2" \
455 : "=r"(r[0]) \
456 : "r" (p[0]), "r" (p[1])); \
457 asm ("sha512sum0r %0, %2, %1" \
458 : "=r"(r[1]) \
459 : "r" (p[0]), "r" (p[1])); ret; })
460# define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
461 const unsigned int *p = (const unsigned int *)(&(x)); \
462 asm ("sha512sum1r %0, %1, %2" \
463 : "=r"(r[0]) \
464 : "r" (p[0]), "r" (p[1])); \
465 asm ("sha512sum1r %0, %2, %1" \
466 : "=r"(r[1]) \
467 : "r" (p[0]), "r" (p[1])); ret; })
468# define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
469 const unsigned int *p = (const unsigned int *)(&(x)); \
470 asm ("sha512sig0l %0, %1, %2" \
471 : "=r"(r[0]) \
472 : "r" (p[0]), "r" (p[1])); \
473 asm ("sha512sig0h %0, %2, %1" \
474 : "=r"(r[1]) \
475 : "r" (p[0]), "r" (p[1])); ret; })
476# define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
477 const unsigned int *p = (const unsigned int *)(&(x)); \
478 asm ("sha512sig1l %0, %1, %2" \
479 : "=r"(r[0]) \
480 : "r" (p[0]), "r" (p[1])); \
481 asm ("sha512sig1h %0, %2, %1" \
482 : "=r"(r[1]) \
483 : "r" (p[0]), "r" (p[1])); ret; })
484# elif __riscv_zknh && __riscv_xlen == 64
485# define Sigma0(x) ({ SHA_LONG64 ret; \
486 asm ("sha512sum0 %0, %1" \
487 : "=r"(ret) \
488 : "r"(x)); ret; })
489# define Sigma1(x) ({ SHA_LONG64 ret; \
490 asm ("sha512sum1 %0, %1" \
491 : "=r"(ret) \
492 : "r"(x)); ret; })
493# define sigma0(x) ({ SHA_LONG64 ret; \
494 asm ("sha512sig0 %0, %1" \
495 : "=r"(ret) \
496 : "r"(x)); ret; })
497# define sigma1(x) ({ SHA_LONG64 ret; \
498 asm ("sha512sig1 %0, %1" \
499 : "=r"(ret) \
500 : "r"(x)); ret; })
501# endif
502# if (__riscv_zbt || __riscv_zpn) && __riscv_xlen == 32
503# define Ch(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
504 const unsigned int *xp = (const unsigned int *)(&(x)); \
505 const unsigned int *yp = (const unsigned int *)(&(y)); \
506 const unsigned int *zp = (const unsigned int *)(&(z)); \
507 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
508 : "=r"(r[0]) \
509 : "r"(xp[0]), "r"(yp[0]), "r"(zp[0])); \
510 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
511 : "=r"(r[1]) \
512 : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
513# define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
514 const unsigned int *xp = (const unsigned int *)(&(x)); \
515 const unsigned int *yp = (const unsigned int *)(&(y)); \
516 const unsigned int *zp = (const unsigned int *)(&(z)); \
517 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
518 : "=r"(r[0]) \
519 : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0])); \
520 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
521 : "=r"(r[1]) \
522 : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
523# elif (__riscv_zbt || __riscv_zpn) && __riscv_xlen == 64
524# define Ch(x,y,z) ({ SHA_LONG64 ret; \
525 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
526 : "=r"(ret) \
527 : "r"(x), "r"(y), "r"(z)); ret; })
528# define Maj(x,y,z) ({ SHA_LONG64 ret; \
529 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530 : "=r"(ret) \
531 : "r"(x^z), "r"(y), "r"(x)); ret; })
474e469b
RS
532# endif
533# elif defined(_MSC_VER)
534# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
535# pragma intrinsic(_rotr64)
536# define ROTR(a,n) _rotr64((a),n)
537# endif
ce1932f2
AP
538# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
539 !defined(OPENSSL_NO_INLINE_ASM)
474e469b 540# if defined(I386_ONLY)
0f113f3e
MC
541static SHA_LONG64 __fastcall __pull64be(const void *x)
542{
ce1932f2
AP
543 _asm mov edx,[ecx + 0]
544 _asm mov eax,[ecx + 4]
545 _asm xchg dh, dl
546 _asm xchg ah, al
547 _asm rol edx, 16
548 _asm rol eax, 16
549 _asm xchg dh, dl
550 _asm xchg ah, al
551}
474e469b 552# else
0f113f3e
MC
553static SHA_LONG64 __fastcall __pull64be(const void *x)
554{
ce1932f2
AP
555 _asm mov edx,[ecx + 0]
556 _asm mov eax,[ecx + 4]
557 _asm bswap edx
558 _asm bswap eax
559}
474e469b
RS
560# endif
561# define PULL64(x) __pull64be(&(x))
039081b8 562# endif
63077bd4 563# endif
474e469b
RS
564# endif
565# ifndef PULL64
566# define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
567# define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
568# endif
569# ifndef ROTR
570# define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
571# endif
611685ad 572# ifndef Sigma0
573# define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
574# endif
575# ifndef Sigma1
576# define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
577# endif
578# ifndef sigma0
579# define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
580# endif
581# ifndef sigma1
582# define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
583# endif
584# ifndef Ch
585# define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
586# endif
587# ifndef Maj
588# define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
589# endif
ce1932f2 590
474e469b 591# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
0f113f3e
MC
592/*
593 * This code should give better results on 32-bit CPU with less than
594 * ~24 registers, both size and performance wise...
ce1932f2
AP
595 */
596
597static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
598 size_t num)
0f113f3e
MC
599{
600 const SHA_LONG64 *W = in;
601 SHA_LONG64 A, E, T;
602 SHA_LONG64 X[9 + 80], *F;
603 int i;
604
605 while (num--) {
606
607 F = X + 80;
608 A = ctx->h[0];
609 F[1] = ctx->h[1];
610 F[2] = ctx->h[2];
611 F[3] = ctx->h[3];
612 E = ctx->h[4];
613 F[5] = ctx->h[5];
614 F[6] = ctx->h[6];
615 F[7] = ctx->h[7];
616
617 for (i = 0; i < 16; i++, F--) {
474e469b 618# ifdef B_ENDIAN
0f113f3e 619 T = W[i];
474e469b 620# else
0f113f3e 621 T = PULL64(W[i]);
474e469b 622# endif
0f113f3e
MC
623 F[0] = A;
624 F[4] = E;
625 F[8] = T;
626 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
627 E = F[3] + T;
628 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
629 }
630
631 for (; i < 80; i++, F--) {
632 T = sigma0(F[8 + 16 - 1]);
633 T += sigma1(F[8 + 16 - 14]);
634 T += F[8 + 16] + F[8 + 16 - 9];
635
636 F[0] = A;
637 F[4] = E;
638 F[8] = T;
639 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
640 E = F[3] + T;
641 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
642 }
643
644 ctx->h[0] += A;
645 ctx->h[1] += F[1];
646 ctx->h[2] += F[2];
647 ctx->h[3] += F[3];
648 ctx->h[4] += E;
649 ctx->h[5] += F[5];
650 ctx->h[6] += F[6];
651 ctx->h[7] += F[7];
652
653 W += SHA_LBLOCK;
ce6aa1e4 654 }
0f113f3e
MC
655}
656
474e469b 657# elif defined(OPENSSL_SMALL_FOOTPRINT)
ce1932f2 658
0f113f3e
MC
659static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
660 size_t num)
661{
662 const SHA_LONG64 *W = in;
663 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
664 SHA_LONG64 X[16];
665 int i;
666
667 while (num--) {
668
669 a = ctx->h[0];
670 b = ctx->h[1];
671 c = ctx->h[2];
672 d = ctx->h[3];
673 e = ctx->h[4];
674 f = ctx->h[5];
675 g = ctx->h[6];
676 h = ctx->h[7];
677
678 for (i = 0; i < 16; i++) {
474e469b 679# ifdef B_ENDIAN
0f113f3e 680 T1 = X[i] = W[i];
474e469b 681# else
0f113f3e 682 T1 = X[i] = PULL64(W[i]);
474e469b 683# endif
0f113f3e
MC
684 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
685 T2 = Sigma0(a) + Maj(a, b, c);
686 h = g;
687 g = f;
688 f = e;
689 e = d + T1;
690 d = c;
691 c = b;
692 b = a;
693 a = T1 + T2;
694 }
695
696 for (; i < 80; i++) {
697 s0 = X[(i + 1) & 0x0f];
698 s0 = sigma0(s0);
699 s1 = X[(i + 14) & 0x0f];
700 s1 = sigma1(s1);
701
702 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
703 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
704 T2 = Sigma0(a) + Maj(a, b, c);
705 h = g;
706 g = f;
707 f = e;
708 e = d + T1;
709 d = c;
710 c = b;
711 b = a;
712 a = T1 + T2;
713 }
714
715 ctx->h[0] += a;
716 ctx->h[1] += b;
717 ctx->h[2] += c;
718 ctx->h[3] += d;
719 ctx->h[4] += e;
720 ctx->h[5] += f;
721 ctx->h[6] += g;
722 ctx->h[7] += h;
723
724 W += SHA_LBLOCK;
725 }
726}
727
474e469b 728# else
ce1932f2 729# define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
0f113f3e
MC
730 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
731 h = Sigma0(a) + Maj(a,b,c); \
ce1932f2
AP
732 d += T1; h += T1; } while (0)
733
734# define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
0f113f3e
MC
735 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
736 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
737 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
738 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
ce1932f2 739
0f113f3e
MC
740static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
741 size_t num)
742{
743 const SHA_LONG64 *W = in;
744 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
745 SHA_LONG64 X[16];
746 int i;
747
748 while (num--) {
749
750 a = ctx->h[0];
751 b = ctx->h[1];
752 c = ctx->h[2];
753 d = ctx->h[3];
754 e = ctx->h[4];
755 f = ctx->h[5];
756 g = ctx->h[6];
757 h = ctx->h[7];
758
474e469b 759# ifdef B_ENDIAN
0f113f3e
MC
760 T1 = X[0] = W[0];
761 ROUND_00_15(0, a, b, c, d, e, f, g, h);
762 T1 = X[1] = W[1];
763 ROUND_00_15(1, h, a, b, c, d, e, f, g);
764 T1 = X[2] = W[2];
765 ROUND_00_15(2, g, h, a, b, c, d, e, f);
766 T1 = X[3] = W[3];
767 ROUND_00_15(3, f, g, h, a, b, c, d, e);
768 T1 = X[4] = W[4];
769 ROUND_00_15(4, e, f, g, h, a, b, c, d);
770 T1 = X[5] = W[5];
771 ROUND_00_15(5, d, e, f, g, h, a, b, c);
772 T1 = X[6] = W[6];
773 ROUND_00_15(6, c, d, e, f, g, h, a, b);
774 T1 = X[7] = W[7];
775 ROUND_00_15(7, b, c, d, e, f, g, h, a);
776 T1 = X[8] = W[8];
777 ROUND_00_15(8, a, b, c, d, e, f, g, h);
778 T1 = X[9] = W[9];
779 ROUND_00_15(9, h, a, b, c, d, e, f, g);
780 T1 = X[10] = W[10];
781 ROUND_00_15(10, g, h, a, b, c, d, e, f);
782 T1 = X[11] = W[11];
783 ROUND_00_15(11, f, g, h, a, b, c, d, e);
784 T1 = X[12] = W[12];
785 ROUND_00_15(12, e, f, g, h, a, b, c, d);
786 T1 = X[13] = W[13];
787 ROUND_00_15(13, d, e, f, g, h, a, b, c);
788 T1 = X[14] = W[14];
789 ROUND_00_15(14, c, d, e, f, g, h, a, b);
790 T1 = X[15] = W[15];
791 ROUND_00_15(15, b, c, d, e, f, g, h, a);
474e469b 792# else
0f113f3e
MC
793 T1 = X[0] = PULL64(W[0]);
794 ROUND_00_15(0, a, b, c, d, e, f, g, h);
795 T1 = X[1] = PULL64(W[1]);
796 ROUND_00_15(1, h, a, b, c, d, e, f, g);
797 T1 = X[2] = PULL64(W[2]);
798 ROUND_00_15(2, g, h, a, b, c, d, e, f);
799 T1 = X[3] = PULL64(W[3]);
800 ROUND_00_15(3, f, g, h, a, b, c, d, e);
801 T1 = X[4] = PULL64(W[4]);
802 ROUND_00_15(4, e, f, g, h, a, b, c, d);
803 T1 = X[5] = PULL64(W[5]);
804 ROUND_00_15(5, d, e, f, g, h, a, b, c);
805 T1 = X[6] = PULL64(W[6]);
806 ROUND_00_15(6, c, d, e, f, g, h, a, b);
807 T1 = X[7] = PULL64(W[7]);
808 ROUND_00_15(7, b, c, d, e, f, g, h, a);
809 T1 = X[8] = PULL64(W[8]);
810 ROUND_00_15(8, a, b, c, d, e, f, g, h);
811 T1 = X[9] = PULL64(W[9]);
812 ROUND_00_15(9, h, a, b, c, d, e, f, g);
813 T1 = X[10] = PULL64(W[10]);
814 ROUND_00_15(10, g, h, a, b, c, d, e, f);
815 T1 = X[11] = PULL64(W[11]);
816 ROUND_00_15(11, f, g, h, a, b, c, d, e);
817 T1 = X[12] = PULL64(W[12]);
818 ROUND_00_15(12, e, f, g, h, a, b, c, d);
819 T1 = X[13] = PULL64(W[13]);
820 ROUND_00_15(13, d, e, f, g, h, a, b, c);
821 T1 = X[14] = PULL64(W[14]);
822 ROUND_00_15(14, c, d, e, f, g, h, a, b);
823 T1 = X[15] = PULL64(W[15]);
824 ROUND_00_15(15, b, c, d, e, f, g, h, a);
474e469b 825# endif
c842261b 826
0f113f3e
MC
827 for (i = 16; i < 80; i += 16) {
828 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
829 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
830 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
831 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
832 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
833 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
834 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
835 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
836 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
837 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
838 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
839 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
840 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
841 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
842 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
843 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
844 }
845
846 ctx->h[0] += a;
847 ctx->h[1] += b;
848 ctx->h[2] += c;
849 ctx->h[3] += d;
850 ctx->h[4] += e;
851 ctx->h[5] += f;
852 ctx->h[6] += g;
853 ctx->h[7] += h;
854
855 W += SHA_LBLOCK;
856 }
857}
ce6aa1e4 858
0f113f3e 859# endif
a5804a75 860
474e469b 861#endif /* SHA512_ASM */