]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/whrlpool/wp_block.c
Reorganize private crypto header files
[thirdparty/openssl.git] / crypto / whrlpool / wp_block.c
1 /*
2 * Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 /**
11 * The Whirlpool hashing function.
12 *
13 * See
14 * P.S.L.M. Barreto, V. Rijmen,
15 * ``The Whirlpool hashing function,''
16 * NESSIE submission, 2000 (tweaked version, 2001),
17 * <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18 *
19 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20 * Vincent Rijmen. Lookup "reference implementations" on
21 * <http://planeta.terra.com.br/informatica/paulobarreto/>
22 *
23 * =============================================================================
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39 #include "internal/cryptlib.h"
40 #include "wp_locl.h"
41 #include <string.h>
42
43 typedef unsigned char u8;
44 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
45 typedef unsigned __int64 u64;
46 #elif defined(__arch64__)
47 typedef unsigned long u64;
48 #else
49 typedef unsigned long long u64;
50 #endif
51
52 #define ROUNDS 10
53
54 #define STRICT_ALIGNMENT
55 #if !defined(PEDANTIC) && (defined(__i386) || defined(__i386__) || \
56 defined(__x86_64) || defined(__x86_64__) || \
57 defined(_M_IX86) || defined(_M_AMD64) || \
58 defined(_M_X64))
59 /*
60 * Well, formally there're couple of other architectures, which permit
61 * unaligned loads, specifically those not crossing cache lines, IA-64 and
62 * PowerPC...
63 */
64 # undef STRICT_ALIGNMENT
65 #endif
66
67 #undef SMALL_REGISTER_BANK
68 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
69 # define SMALL_REGISTER_BANK
70 # if defined(WHIRLPOOL_ASM)
71 # ifndef OPENSSL_SMALL_FOOTPRINT
72 /*
73 * it appears that for elder non-MMX
74 * CPUs this is actually faster!
75 */
76 # define OPENSSL_SMALL_FOOTPRINT
77 # endif
78 # define GO_FOR_MMX(ctx,inp,num) do { \
79 void whirlpool_block_mmx(void *,const void *,size_t); \
80 if (!(OPENSSL_ia32cap_P[0] & (1<<23))) break; \
81 whirlpool_block_mmx(ctx->H.c,inp,num); return; \
82 } while (0)
83 # endif
84 #endif
85
86 #undef ROTATE
87 #ifndef PEDANTIC
88 # if defined(_MSC_VER)
89 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
90 # include <stdlib.h>
91 # pragma intrinsic(_rotl64)
92 # define ROTATE(a,n) _rotl64((a),n)
93 # endif
94 # elif defined(__GNUC__) && __GNUC__>=2
95 # if defined(__x86_64) || defined(__x86_64__)
96 # if defined(L_ENDIAN)
97 # define ROTATE(a,n) ({ u64 ret; asm ("rolq %1,%0" \
98 : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
99 # elif defined(B_ENDIAN)
100 /*
101 * Most will argue that x86_64 is always little-endian. Well, yes, but
102 * then we have stratus.com who has modified gcc to "emulate"
103 * big-endian on x86. Is there evidence that they [or somebody else]
104 * won't do same for x86_64? Naturally no. And this line is waiting
105 * ready for that brave soul:-)
106 */
107 # define ROTATE(a,n) ({ u64 ret; asm ("rorq %1,%0" \
108 : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
109 # endif
110 # elif defined(__ia64) || defined(__ia64__)
111 # if defined(L_ENDIAN)
112 # define ROTATE(a,n) ({ u64 ret; asm ("shrp %0=%1,%1,%2" \
113 : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
114 # elif defined(B_ENDIAN)
115 # define ROTATE(a,n) ({ u64 ret; asm ("shrp %0=%1,%1,%2" \
116 : "=r"(ret) : "r"(a),"M"(n)); ret; })
117 # endif
118 # endif
119 # endif
120 #endif
121
122 #if defined(OPENSSL_SMALL_FOOTPRINT)
123 # if !defined(ROTATE)
124 # if defined(L_ENDIAN) /* little-endians have to rotate left */
125 # define ROTATE(i,n) ((i)<<(n) ^ (i)>>(64-n))
126 # elif defined(B_ENDIAN) /* big-endians have to rotate right */
127 # define ROTATE(i,n) ((i)>>(n) ^ (i)<<(64-n))
128 # endif
129 # endif
130 # if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
131 # define STRICT_ALIGNMENT /* ensure smallest table size */
132 # endif
133 #endif
134
135 /*
136 * Table size depends on STRICT_ALIGNMENT and whether or not endian-
137 * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
138 * defined, which is normally the case on x86[_64] CPUs, the table is
139 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
140 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
141 * whole bunch of additional rotations, but I'm willing to "trade,"
142 * because 16KB table certainly trashes L1 cache. I wish all CPUs
143 * could handle unaligned load as 4KB table doesn't trash the cache,
144 * nor does it require additional rotations.
145 */
146 /*
147 * Note that every Cn macro expands as two loads: one byte load and
148 * one quadword load. One can argue that that many single-byte loads
149 * is too excessive, as one could load a quadword and "milk" it for
150 * eight 8-bit values instead. Well, yes, but in order to do so *and*
151 * avoid excessive loads you have to accommodate a handful of 64-bit
152 * values in the register bank and issue a bunch of shifts and mask.
153 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
154 * On most CPUs eight single-byte loads are faster and I let other
155 * ones to depend on smart compiler to fold byte loads if beneficial.
156 * Hand-coded assembler would be another alternative:-)
157 */
158 #ifdef STRICT_ALIGNMENT
159 # if defined(ROTATE)
160 # define N 1
161 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
162 # define C0(K,i) (Cx.q[K.c[(i)*8+0]])
163 # define C1(K,i) ROTATE(Cx.q[K.c[(i)*8+1]],8)
164 # define C2(K,i) ROTATE(Cx.q[K.c[(i)*8+2]],16)
165 # define C3(K,i) ROTATE(Cx.q[K.c[(i)*8+3]],24)
166 # define C4(K,i) ROTATE(Cx.q[K.c[(i)*8+4]],32)
167 # define C5(K,i) ROTATE(Cx.q[K.c[(i)*8+5]],40)
168 # define C6(K,i) ROTATE(Cx.q[K.c[(i)*8+6]],48)
169 # define C7(K,i) ROTATE(Cx.q[K.c[(i)*8+7]],56)
170 # else
171 # define N 8
172 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
173 c7,c0,c1,c2,c3,c4,c5,c6, \
174 c6,c7,c0,c1,c2,c3,c4,c5, \
175 c5,c6,c7,c0,c1,c2,c3,c4, \
176 c4,c5,c6,c7,c0,c1,c2,c3, \
177 c3,c4,c5,c6,c7,c0,c1,c2, \
178 c2,c3,c4,c5,c6,c7,c0,c1, \
179 c1,c2,c3,c4,c5,c6,c7,c0
180 # define C0(K,i) (Cx.q[0+8*K.c[(i)*8+0]])
181 # define C1(K,i) (Cx.q[1+8*K.c[(i)*8+1]])
182 # define C2(K,i) (Cx.q[2+8*K.c[(i)*8+2]])
183 # define C3(K,i) (Cx.q[3+8*K.c[(i)*8+3]])
184 # define C4(K,i) (Cx.q[4+8*K.c[(i)*8+4]])
185 # define C5(K,i) (Cx.q[5+8*K.c[(i)*8+5]])
186 # define C6(K,i) (Cx.q[6+8*K.c[(i)*8+6]])
187 # define C7(K,i) (Cx.q[7+8*K.c[(i)*8+7]])
188 # endif
189 #else
190 # define N 2
191 # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
192 c0,c1,c2,c3,c4,c5,c6,c7
193 # define C0(K,i) (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
194 # define C1(K,i) (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
195 # define C2(K,i) (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
196 # define C3(K,i) (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
197 # define C4(K,i) (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
198 # define C5(K,i) (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
199 # define C6(K,i) (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
200 # define C7(K,i) (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
201 #endif
202
203 static const
204 union {
205 u8 c[(256 * N + ROUNDS) * sizeof(u64)];
206 u64 q[(256 * N + ROUNDS)];
207 } Cx = {
208 {
209 /* Note endian-neutral representation:-) */
210 LL(0x18, 0x18, 0x60, 0x18, 0xc0, 0x78, 0x30, 0xd8),
211 LL(0x23, 0x23, 0x8c, 0x23, 0x05, 0xaf, 0x46, 0x26),
212 LL(0xc6, 0xc6, 0x3f, 0xc6, 0x7e, 0xf9, 0x91, 0xb8),
213 LL(0xe8, 0xe8, 0x87, 0xe8, 0x13, 0x6f, 0xcd, 0xfb),
214 LL(0x87, 0x87, 0x26, 0x87, 0x4c, 0xa1, 0x13, 0xcb),
215 LL(0xb8, 0xb8, 0xda, 0xb8, 0xa9, 0x62, 0x6d, 0x11),
216 LL(0x01, 0x01, 0x04, 0x01, 0x08, 0x05, 0x02, 0x09),
217 LL(0x4f, 0x4f, 0x21, 0x4f, 0x42, 0x6e, 0x9e, 0x0d),
218 LL(0x36, 0x36, 0xd8, 0x36, 0xad, 0xee, 0x6c, 0x9b),
219 LL(0xa6, 0xa6, 0xa2, 0xa6, 0x59, 0x04, 0x51, 0xff),
220 LL(0xd2, 0xd2, 0x6f, 0xd2, 0xde, 0xbd, 0xb9, 0x0c),
221 LL(0xf5, 0xf5, 0xf3, 0xf5, 0xfb, 0x06, 0xf7, 0x0e),
222 LL(0x79, 0x79, 0xf9, 0x79, 0xef, 0x80, 0xf2, 0x96),
223 LL(0x6f, 0x6f, 0xa1, 0x6f, 0x5f, 0xce, 0xde, 0x30),
224 LL(0x91, 0x91, 0x7e, 0x91, 0xfc, 0xef, 0x3f, 0x6d),
225 LL(0x52, 0x52, 0x55, 0x52, 0xaa, 0x07, 0xa4, 0xf8),
226 LL(0x60, 0x60, 0x9d, 0x60, 0x27, 0xfd, 0xc0, 0x47),
227 LL(0xbc, 0xbc, 0xca, 0xbc, 0x89, 0x76, 0x65, 0x35),
228 LL(0x9b, 0x9b, 0x56, 0x9b, 0xac, 0xcd, 0x2b, 0x37),
229 LL(0x8e, 0x8e, 0x02, 0x8e, 0x04, 0x8c, 0x01, 0x8a),
230 LL(0xa3, 0xa3, 0xb6, 0xa3, 0x71, 0x15, 0x5b, 0xd2),
231 LL(0x0c, 0x0c, 0x30, 0x0c, 0x60, 0x3c, 0x18, 0x6c),
232 LL(0x7b, 0x7b, 0xf1, 0x7b, 0xff, 0x8a, 0xf6, 0x84),
233 LL(0x35, 0x35, 0xd4, 0x35, 0xb5, 0xe1, 0x6a, 0x80),
234 LL(0x1d, 0x1d, 0x74, 0x1d, 0xe8, 0x69, 0x3a, 0xf5),
235 LL(0xe0, 0xe0, 0xa7, 0xe0, 0x53, 0x47, 0xdd, 0xb3),
236 LL(0xd7, 0xd7, 0x7b, 0xd7, 0xf6, 0xac, 0xb3, 0x21),
237 LL(0xc2, 0xc2, 0x2f, 0xc2, 0x5e, 0xed, 0x99, 0x9c),
238 LL(0x2e, 0x2e, 0xb8, 0x2e, 0x6d, 0x96, 0x5c, 0x43),
239 LL(0x4b, 0x4b, 0x31, 0x4b, 0x62, 0x7a, 0x96, 0x29),
240 LL(0xfe, 0xfe, 0xdf, 0xfe, 0xa3, 0x21, 0xe1, 0x5d),
241 LL(0x57, 0x57, 0x41, 0x57, 0x82, 0x16, 0xae, 0xd5),
242 LL(0x15, 0x15, 0x54, 0x15, 0xa8, 0x41, 0x2a, 0xbd),
243 LL(0x77, 0x77, 0xc1, 0x77, 0x9f, 0xb6, 0xee, 0xe8),
244 LL(0x37, 0x37, 0xdc, 0x37, 0xa5, 0xeb, 0x6e, 0x92),
245 LL(0xe5, 0xe5, 0xb3, 0xe5, 0x7b, 0x56, 0xd7, 0x9e),
246 LL(0x9f, 0x9f, 0x46, 0x9f, 0x8c, 0xd9, 0x23, 0x13),
247 LL(0xf0, 0xf0, 0xe7, 0xf0, 0xd3, 0x17, 0xfd, 0x23),
248 LL(0x4a, 0x4a, 0x35, 0x4a, 0x6a, 0x7f, 0x94, 0x20),
249 LL(0xda, 0xda, 0x4f, 0xda, 0x9e, 0x95, 0xa9, 0x44),
250 LL(0x58, 0x58, 0x7d, 0x58, 0xfa, 0x25, 0xb0, 0xa2),
251 LL(0xc9, 0xc9, 0x03, 0xc9, 0x06, 0xca, 0x8f, 0xcf),
252 LL(0x29, 0x29, 0xa4, 0x29, 0x55, 0x8d, 0x52, 0x7c),
253 LL(0x0a, 0x0a, 0x28, 0x0a, 0x50, 0x22, 0x14, 0x5a),
254 LL(0xb1, 0xb1, 0xfe, 0xb1, 0xe1, 0x4f, 0x7f, 0x50),
255 LL(0xa0, 0xa0, 0xba, 0xa0, 0x69, 0x1a, 0x5d, 0xc9),
256 LL(0x6b, 0x6b, 0xb1, 0x6b, 0x7f, 0xda, 0xd6, 0x14),
257 LL(0x85, 0x85, 0x2e, 0x85, 0x5c, 0xab, 0x17, 0xd9),
258 LL(0xbd, 0xbd, 0xce, 0xbd, 0x81, 0x73, 0x67, 0x3c),
259 LL(0x5d, 0x5d, 0x69, 0x5d, 0xd2, 0x34, 0xba, 0x8f),
260 LL(0x10, 0x10, 0x40, 0x10, 0x80, 0x50, 0x20, 0x90),
261 LL(0xf4, 0xf4, 0xf7, 0xf4, 0xf3, 0x03, 0xf5, 0x07),
262 LL(0xcb, 0xcb, 0x0b, 0xcb, 0x16, 0xc0, 0x8b, 0xdd),
263 LL(0x3e, 0x3e, 0xf8, 0x3e, 0xed, 0xc6, 0x7c, 0xd3),
264 LL(0x05, 0x05, 0x14, 0x05, 0x28, 0x11, 0x0a, 0x2d),
265 LL(0x67, 0x67, 0x81, 0x67, 0x1f, 0xe6, 0xce, 0x78),
266 LL(0xe4, 0xe4, 0xb7, 0xe4, 0x73, 0x53, 0xd5, 0x97),
267 LL(0x27, 0x27, 0x9c, 0x27, 0x25, 0xbb, 0x4e, 0x02),
268 LL(0x41, 0x41, 0x19, 0x41, 0x32, 0x58, 0x82, 0x73),
269 LL(0x8b, 0x8b, 0x16, 0x8b, 0x2c, 0x9d, 0x0b, 0xa7),
270 LL(0xa7, 0xa7, 0xa6, 0xa7, 0x51, 0x01, 0x53, 0xf6),
271 LL(0x7d, 0x7d, 0xe9, 0x7d, 0xcf, 0x94, 0xfa, 0xb2),
272 LL(0x95, 0x95, 0x6e, 0x95, 0xdc, 0xfb, 0x37, 0x49),
273 LL(0xd8, 0xd8, 0x47, 0xd8, 0x8e, 0x9f, 0xad, 0x56),
274 LL(0xfb, 0xfb, 0xcb, 0xfb, 0x8b, 0x30, 0xeb, 0x70),
275 LL(0xee, 0xee, 0x9f, 0xee, 0x23, 0x71, 0xc1, 0xcd),
276 LL(0x7c, 0x7c, 0xed, 0x7c, 0xc7, 0x91, 0xf8, 0xbb),
277 LL(0x66, 0x66, 0x85, 0x66, 0x17, 0xe3, 0xcc, 0x71),
278 LL(0xdd, 0xdd, 0x53, 0xdd, 0xa6, 0x8e, 0xa7, 0x7b),
279 LL(0x17, 0x17, 0x5c, 0x17, 0xb8, 0x4b, 0x2e, 0xaf),
280 LL(0x47, 0x47, 0x01, 0x47, 0x02, 0x46, 0x8e, 0x45),
281 LL(0x9e, 0x9e, 0x42, 0x9e, 0x84, 0xdc, 0x21, 0x1a),
282 LL(0xca, 0xca, 0x0f, 0xca, 0x1e, 0xc5, 0x89, 0xd4),
283 LL(0x2d, 0x2d, 0xb4, 0x2d, 0x75, 0x99, 0x5a, 0x58),
284 LL(0xbf, 0xbf, 0xc6, 0xbf, 0x91, 0x79, 0x63, 0x2e),
285 LL(0x07, 0x07, 0x1c, 0x07, 0x38, 0x1b, 0x0e, 0x3f),
286 LL(0xad, 0xad, 0x8e, 0xad, 0x01, 0x23, 0x47, 0xac),
287 LL(0x5a, 0x5a, 0x75, 0x5a, 0xea, 0x2f, 0xb4, 0xb0),
288 LL(0x83, 0x83, 0x36, 0x83, 0x6c, 0xb5, 0x1b, 0xef),
289 LL(0x33, 0x33, 0xcc, 0x33, 0x85, 0xff, 0x66, 0xb6),
290 LL(0x63, 0x63, 0x91, 0x63, 0x3f, 0xf2, 0xc6, 0x5c),
291 LL(0x02, 0x02, 0x08, 0x02, 0x10, 0x0a, 0x04, 0x12),
292 LL(0xaa, 0xaa, 0x92, 0xaa, 0x39, 0x38, 0x49, 0x93),
293 LL(0x71, 0x71, 0xd9, 0x71, 0xaf, 0xa8, 0xe2, 0xde),
294 LL(0xc8, 0xc8, 0x07, 0xc8, 0x0e, 0xcf, 0x8d, 0xc6),
295 LL(0x19, 0x19, 0x64, 0x19, 0xc8, 0x7d, 0x32, 0xd1),
296 LL(0x49, 0x49, 0x39, 0x49, 0x72, 0x70, 0x92, 0x3b),
297 LL(0xd9, 0xd9, 0x43, 0xd9, 0x86, 0x9a, 0xaf, 0x5f),
298 LL(0xf2, 0xf2, 0xef, 0xf2, 0xc3, 0x1d, 0xf9, 0x31),
299 LL(0xe3, 0xe3, 0xab, 0xe3, 0x4b, 0x48, 0xdb, 0xa8),
300 LL(0x5b, 0x5b, 0x71, 0x5b, 0xe2, 0x2a, 0xb6, 0xb9),
301 LL(0x88, 0x88, 0x1a, 0x88, 0x34, 0x92, 0x0d, 0xbc),
302 LL(0x9a, 0x9a, 0x52, 0x9a, 0xa4, 0xc8, 0x29, 0x3e),
303 LL(0x26, 0x26, 0x98, 0x26, 0x2d, 0xbe, 0x4c, 0x0b),
304 LL(0x32, 0x32, 0xc8, 0x32, 0x8d, 0xfa, 0x64, 0xbf),
305 LL(0xb0, 0xb0, 0xfa, 0xb0, 0xe9, 0x4a, 0x7d, 0x59),
306 LL(0xe9, 0xe9, 0x83, 0xe9, 0x1b, 0x6a, 0xcf, 0xf2),
307 LL(0x0f, 0x0f, 0x3c, 0x0f, 0x78, 0x33, 0x1e, 0x77),
308 LL(0xd5, 0xd5, 0x73, 0xd5, 0xe6, 0xa6, 0xb7, 0x33),
309 LL(0x80, 0x80, 0x3a, 0x80, 0x74, 0xba, 0x1d, 0xf4),
310 LL(0xbe, 0xbe, 0xc2, 0xbe, 0x99, 0x7c, 0x61, 0x27),
311 LL(0xcd, 0xcd, 0x13, 0xcd, 0x26, 0xde, 0x87, 0xeb),
312 LL(0x34, 0x34, 0xd0, 0x34, 0xbd, 0xe4, 0x68, 0x89),
313 LL(0x48, 0x48, 0x3d, 0x48, 0x7a, 0x75, 0x90, 0x32),
314 LL(0xff, 0xff, 0xdb, 0xff, 0xab, 0x24, 0xe3, 0x54),
315 LL(0x7a, 0x7a, 0xf5, 0x7a, 0xf7, 0x8f, 0xf4, 0x8d),
316 LL(0x90, 0x90, 0x7a, 0x90, 0xf4, 0xea, 0x3d, 0x64),
317 LL(0x5f, 0x5f, 0x61, 0x5f, 0xc2, 0x3e, 0xbe, 0x9d),
318 LL(0x20, 0x20, 0x80, 0x20, 0x1d, 0xa0, 0x40, 0x3d),
319 LL(0x68, 0x68, 0xbd, 0x68, 0x67, 0xd5, 0xd0, 0x0f),
320 LL(0x1a, 0x1a, 0x68, 0x1a, 0xd0, 0x72, 0x34, 0xca),
321 LL(0xae, 0xae, 0x82, 0xae, 0x19, 0x2c, 0x41, 0xb7),
322 LL(0xb4, 0xb4, 0xea, 0xb4, 0xc9, 0x5e, 0x75, 0x7d),
323 LL(0x54, 0x54, 0x4d, 0x54, 0x9a, 0x19, 0xa8, 0xce),
324 LL(0x93, 0x93, 0x76, 0x93, 0xec, 0xe5, 0x3b, 0x7f),
325 LL(0x22, 0x22, 0x88, 0x22, 0x0d, 0xaa, 0x44, 0x2f),
326 LL(0x64, 0x64, 0x8d, 0x64, 0x07, 0xe9, 0xc8, 0x63),
327 LL(0xf1, 0xf1, 0xe3, 0xf1, 0xdb, 0x12, 0xff, 0x2a),
328 LL(0x73, 0x73, 0xd1, 0x73, 0xbf, 0xa2, 0xe6, 0xcc),
329 LL(0x12, 0x12, 0x48, 0x12, 0x90, 0x5a, 0x24, 0x82),
330 LL(0x40, 0x40, 0x1d, 0x40, 0x3a, 0x5d, 0x80, 0x7a),
331 LL(0x08, 0x08, 0x20, 0x08, 0x40, 0x28, 0x10, 0x48),
332 LL(0xc3, 0xc3, 0x2b, 0xc3, 0x56, 0xe8, 0x9b, 0x95),
333 LL(0xec, 0xec, 0x97, 0xec, 0x33, 0x7b, 0xc5, 0xdf),
334 LL(0xdb, 0xdb, 0x4b, 0xdb, 0x96, 0x90, 0xab, 0x4d),
335 LL(0xa1, 0xa1, 0xbe, 0xa1, 0x61, 0x1f, 0x5f, 0xc0),
336 LL(0x8d, 0x8d, 0x0e, 0x8d, 0x1c, 0x83, 0x07, 0x91),
337 LL(0x3d, 0x3d, 0xf4, 0x3d, 0xf5, 0xc9, 0x7a, 0xc8),
338 LL(0x97, 0x97, 0x66, 0x97, 0xcc, 0xf1, 0x33, 0x5b),
339 LL(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
340 LL(0xcf, 0xcf, 0x1b, 0xcf, 0x36, 0xd4, 0x83, 0xf9),
341 LL(0x2b, 0x2b, 0xac, 0x2b, 0x45, 0x87, 0x56, 0x6e),
342 LL(0x76, 0x76, 0xc5, 0x76, 0x97, 0xb3, 0xec, 0xe1),
343 LL(0x82, 0x82, 0x32, 0x82, 0x64, 0xb0, 0x19, 0xe6),
344 LL(0xd6, 0xd6, 0x7f, 0xd6, 0xfe, 0xa9, 0xb1, 0x28),
345 LL(0x1b, 0x1b, 0x6c, 0x1b, 0xd8, 0x77, 0x36, 0xc3),
346 LL(0xb5, 0xb5, 0xee, 0xb5, 0xc1, 0x5b, 0x77, 0x74),
347 LL(0xaf, 0xaf, 0x86, 0xaf, 0x11, 0x29, 0x43, 0xbe),
348 LL(0x6a, 0x6a, 0xb5, 0x6a, 0x77, 0xdf, 0xd4, 0x1d),
349 LL(0x50, 0x50, 0x5d, 0x50, 0xba, 0x0d, 0xa0, 0xea),
350 LL(0x45, 0x45, 0x09, 0x45, 0x12, 0x4c, 0x8a, 0x57),
351 LL(0xf3, 0xf3, 0xeb, 0xf3, 0xcb, 0x18, 0xfb, 0x38),
352 LL(0x30, 0x30, 0xc0, 0x30, 0x9d, 0xf0, 0x60, 0xad),
353 LL(0xef, 0xef, 0x9b, 0xef, 0x2b, 0x74, 0xc3, 0xc4),
354 LL(0x3f, 0x3f, 0xfc, 0x3f, 0xe5, 0xc3, 0x7e, 0xda),
355 LL(0x55, 0x55, 0x49, 0x55, 0x92, 0x1c, 0xaa, 0xc7),
356 LL(0xa2, 0xa2, 0xb2, 0xa2, 0x79, 0x10, 0x59, 0xdb),
357 LL(0xea, 0xea, 0x8f, 0xea, 0x03, 0x65, 0xc9, 0xe9),
358 LL(0x65, 0x65, 0x89, 0x65, 0x0f, 0xec, 0xca, 0x6a),
359 LL(0xba, 0xba, 0xd2, 0xba, 0xb9, 0x68, 0x69, 0x03),
360 LL(0x2f, 0x2f, 0xbc, 0x2f, 0x65, 0x93, 0x5e, 0x4a),
361 LL(0xc0, 0xc0, 0x27, 0xc0, 0x4e, 0xe7, 0x9d, 0x8e),
362 LL(0xde, 0xde, 0x5f, 0xde, 0xbe, 0x81, 0xa1, 0x60),
363 LL(0x1c, 0x1c, 0x70, 0x1c, 0xe0, 0x6c, 0x38, 0xfc),
364 LL(0xfd, 0xfd, 0xd3, 0xfd, 0xbb, 0x2e, 0xe7, 0x46),
365 LL(0x4d, 0x4d, 0x29, 0x4d, 0x52, 0x64, 0x9a, 0x1f),
366 LL(0x92, 0x92, 0x72, 0x92, 0xe4, 0xe0, 0x39, 0x76),
367 LL(0x75, 0x75, 0xc9, 0x75, 0x8f, 0xbc, 0xea, 0xfa),
368 LL(0x06, 0x06, 0x18, 0x06, 0x30, 0x1e, 0x0c, 0x36),
369 LL(0x8a, 0x8a, 0x12, 0x8a, 0x24, 0x98, 0x09, 0xae),
370 LL(0xb2, 0xb2, 0xf2, 0xb2, 0xf9, 0x40, 0x79, 0x4b),
371 LL(0xe6, 0xe6, 0xbf, 0xe6, 0x63, 0x59, 0xd1, 0x85),
372 LL(0x0e, 0x0e, 0x38, 0x0e, 0x70, 0x36, 0x1c, 0x7e),
373 LL(0x1f, 0x1f, 0x7c, 0x1f, 0xf8, 0x63, 0x3e, 0xe7),
374 LL(0x62, 0x62, 0x95, 0x62, 0x37, 0xf7, 0xc4, 0x55),
375 LL(0xd4, 0xd4, 0x77, 0xd4, 0xee, 0xa3, 0xb5, 0x3a),
376 LL(0xa8, 0xa8, 0x9a, 0xa8, 0x29, 0x32, 0x4d, 0x81),
377 LL(0x96, 0x96, 0x62, 0x96, 0xc4, 0xf4, 0x31, 0x52),
378 LL(0xf9, 0xf9, 0xc3, 0xf9, 0x9b, 0x3a, 0xef, 0x62),
379 LL(0xc5, 0xc5, 0x33, 0xc5, 0x66, 0xf6, 0x97, 0xa3),
380 LL(0x25, 0x25, 0x94, 0x25, 0x35, 0xb1, 0x4a, 0x10),
381 LL(0x59, 0x59, 0x79, 0x59, 0xf2, 0x20, 0xb2, 0xab),
382 LL(0x84, 0x84, 0x2a, 0x84, 0x54, 0xae, 0x15, 0xd0),
383 LL(0x72, 0x72, 0xd5, 0x72, 0xb7, 0xa7, 0xe4, 0xc5),
384 LL(0x39, 0x39, 0xe4, 0x39, 0xd5, 0xdd, 0x72, 0xec),
385 LL(0x4c, 0x4c, 0x2d, 0x4c, 0x5a, 0x61, 0x98, 0x16),
386 LL(0x5e, 0x5e, 0x65, 0x5e, 0xca, 0x3b, 0xbc, 0x94),
387 LL(0x78, 0x78, 0xfd, 0x78, 0xe7, 0x85, 0xf0, 0x9f),
388 LL(0x38, 0x38, 0xe0, 0x38, 0xdd, 0xd8, 0x70, 0xe5),
389 LL(0x8c, 0x8c, 0x0a, 0x8c, 0x14, 0x86, 0x05, 0x98),
390 LL(0xd1, 0xd1, 0x63, 0xd1, 0xc6, 0xb2, 0xbf, 0x17),
391 LL(0xa5, 0xa5, 0xae, 0xa5, 0x41, 0x0b, 0x57, 0xe4),
392 LL(0xe2, 0xe2, 0xaf, 0xe2, 0x43, 0x4d, 0xd9, 0xa1),
393 LL(0x61, 0x61, 0x99, 0x61, 0x2f, 0xf8, 0xc2, 0x4e),
394 LL(0xb3, 0xb3, 0xf6, 0xb3, 0xf1, 0x45, 0x7b, 0x42),
395 LL(0x21, 0x21, 0x84, 0x21, 0x15, 0xa5, 0x42, 0x34),
396 LL(0x9c, 0x9c, 0x4a, 0x9c, 0x94, 0xd6, 0x25, 0x08),
397 LL(0x1e, 0x1e, 0x78, 0x1e, 0xf0, 0x66, 0x3c, 0xee),
398 LL(0x43, 0x43, 0x11, 0x43, 0x22, 0x52, 0x86, 0x61),
399 LL(0xc7, 0xc7, 0x3b, 0xc7, 0x76, 0xfc, 0x93, 0xb1),
400 LL(0xfc, 0xfc, 0xd7, 0xfc, 0xb3, 0x2b, 0xe5, 0x4f),
401 LL(0x04, 0x04, 0x10, 0x04, 0x20, 0x14, 0x08, 0x24),
402 LL(0x51, 0x51, 0x59, 0x51, 0xb2, 0x08, 0xa2, 0xe3),
403 LL(0x99, 0x99, 0x5e, 0x99, 0xbc, 0xc7, 0x2f, 0x25),
404 LL(0x6d, 0x6d, 0xa9, 0x6d, 0x4f, 0xc4, 0xda, 0x22),
405 LL(0x0d, 0x0d, 0x34, 0x0d, 0x68, 0x39, 0x1a, 0x65),
406 LL(0xfa, 0xfa, 0xcf, 0xfa, 0x83, 0x35, 0xe9, 0x79),
407 LL(0xdf, 0xdf, 0x5b, 0xdf, 0xb6, 0x84, 0xa3, 0x69),
408 LL(0x7e, 0x7e, 0xe5, 0x7e, 0xd7, 0x9b, 0xfc, 0xa9),
409 LL(0x24, 0x24, 0x90, 0x24, 0x3d, 0xb4, 0x48, 0x19),
410 LL(0x3b, 0x3b, 0xec, 0x3b, 0xc5, 0xd7, 0x76, 0xfe),
411 LL(0xab, 0xab, 0x96, 0xab, 0x31, 0x3d, 0x4b, 0x9a),
412 LL(0xce, 0xce, 0x1f, 0xce, 0x3e, 0xd1, 0x81, 0xf0),
413 LL(0x11, 0x11, 0x44, 0x11, 0x88, 0x55, 0x22, 0x99),
414 LL(0x8f, 0x8f, 0x06, 0x8f, 0x0c, 0x89, 0x03, 0x83),
415 LL(0x4e, 0x4e, 0x25, 0x4e, 0x4a, 0x6b, 0x9c, 0x04),
416 LL(0xb7, 0xb7, 0xe6, 0xb7, 0xd1, 0x51, 0x73, 0x66),
417 LL(0xeb, 0xeb, 0x8b, 0xeb, 0x0b, 0x60, 0xcb, 0xe0),
418 LL(0x3c, 0x3c, 0xf0, 0x3c, 0xfd, 0xcc, 0x78, 0xc1),
419 LL(0x81, 0x81, 0x3e, 0x81, 0x7c, 0xbf, 0x1f, 0xfd),
420 LL(0x94, 0x94, 0x6a, 0x94, 0xd4, 0xfe, 0x35, 0x40),
421 LL(0xf7, 0xf7, 0xfb, 0xf7, 0xeb, 0x0c, 0xf3, 0x1c),
422 LL(0xb9, 0xb9, 0xde, 0xb9, 0xa1, 0x67, 0x6f, 0x18),
423 LL(0x13, 0x13, 0x4c, 0x13, 0x98, 0x5f, 0x26, 0x8b),
424 LL(0x2c, 0x2c, 0xb0, 0x2c, 0x7d, 0x9c, 0x58, 0x51),
425 LL(0xd3, 0xd3, 0x6b, 0xd3, 0xd6, 0xb8, 0xbb, 0x05),
426 LL(0xe7, 0xe7, 0xbb, 0xe7, 0x6b, 0x5c, 0xd3, 0x8c),
427 LL(0x6e, 0x6e, 0xa5, 0x6e, 0x57, 0xcb, 0xdc, 0x39),
428 LL(0xc4, 0xc4, 0x37, 0xc4, 0x6e, 0xf3, 0x95, 0xaa),
429 LL(0x03, 0x03, 0x0c, 0x03, 0x18, 0x0f, 0x06, 0x1b),
430 LL(0x56, 0x56, 0x45, 0x56, 0x8a, 0x13, 0xac, 0xdc),
431 LL(0x44, 0x44, 0x0d, 0x44, 0x1a, 0x49, 0x88, 0x5e),
432 LL(0x7f, 0x7f, 0xe1, 0x7f, 0xdf, 0x9e, 0xfe, 0xa0),
433 LL(0xa9, 0xa9, 0x9e, 0xa9, 0x21, 0x37, 0x4f, 0x88),
434 LL(0x2a, 0x2a, 0xa8, 0x2a, 0x4d, 0x82, 0x54, 0x67),
435 LL(0xbb, 0xbb, 0xd6, 0xbb, 0xb1, 0x6d, 0x6b, 0x0a),
436 LL(0xc1, 0xc1, 0x23, 0xc1, 0x46, 0xe2, 0x9f, 0x87),
437 LL(0x53, 0x53, 0x51, 0x53, 0xa2, 0x02, 0xa6, 0xf1),
438 LL(0xdc, 0xdc, 0x57, 0xdc, 0xae, 0x8b, 0xa5, 0x72),
439 LL(0x0b, 0x0b, 0x2c, 0x0b, 0x58, 0x27, 0x16, 0x53),
440 LL(0x9d, 0x9d, 0x4e, 0x9d, 0x9c, 0xd3, 0x27, 0x01),
441 LL(0x6c, 0x6c, 0xad, 0x6c, 0x47, 0xc1, 0xd8, 0x2b),
442 LL(0x31, 0x31, 0xc4, 0x31, 0x95, 0xf5, 0x62, 0xa4),
443 LL(0x74, 0x74, 0xcd, 0x74, 0x87, 0xb9, 0xe8, 0xf3),
444 LL(0xf6, 0xf6, 0xff, 0xf6, 0xe3, 0x09, 0xf1, 0x15),
445 LL(0x46, 0x46, 0x05, 0x46, 0x0a, 0x43, 0x8c, 0x4c),
446 LL(0xac, 0xac, 0x8a, 0xac, 0x09, 0x26, 0x45, 0xa5),
447 LL(0x89, 0x89, 0x1e, 0x89, 0x3c, 0x97, 0x0f, 0xb5),
448 LL(0x14, 0x14, 0x50, 0x14, 0xa0, 0x44, 0x28, 0xb4),
449 LL(0xe1, 0xe1, 0xa3, 0xe1, 0x5b, 0x42, 0xdf, 0xba),
450 LL(0x16, 0x16, 0x58, 0x16, 0xb0, 0x4e, 0x2c, 0xa6),
451 LL(0x3a, 0x3a, 0xe8, 0x3a, 0xcd, 0xd2, 0x74, 0xf7),
452 LL(0x69, 0x69, 0xb9, 0x69, 0x6f, 0xd0, 0xd2, 0x06),
453 LL(0x09, 0x09, 0x24, 0x09, 0x48, 0x2d, 0x12, 0x41),
454 LL(0x70, 0x70, 0xdd, 0x70, 0xa7, 0xad, 0xe0, 0xd7),
455 LL(0xb6, 0xb6, 0xe2, 0xb6, 0xd9, 0x54, 0x71, 0x6f),
456 LL(0xd0, 0xd0, 0x67, 0xd0, 0xce, 0xb7, 0xbd, 0x1e),
457 LL(0xed, 0xed, 0x93, 0xed, 0x3b, 0x7e, 0xc7, 0xd6),
458 LL(0xcc, 0xcc, 0x17, 0xcc, 0x2e, 0xdb, 0x85, 0xe2),
459 LL(0x42, 0x42, 0x15, 0x42, 0x2a, 0x57, 0x84, 0x68),
460 LL(0x98, 0x98, 0x5a, 0x98, 0xb4, 0xc2, 0x2d, 0x2c),
461 LL(0xa4, 0xa4, 0xaa, 0xa4, 0x49, 0x0e, 0x55, 0xed),
462 LL(0x28, 0x28, 0xa0, 0x28, 0x5d, 0x88, 0x50, 0x75),
463 LL(0x5c, 0x5c, 0x6d, 0x5c, 0xda, 0x31, 0xb8, 0x86),
464 LL(0xf8, 0xf8, 0xc7, 0xf8, 0x93, 0x3f, 0xed, 0x6b),
465 LL(0x86, 0x86, 0x22, 0x86, 0x44, 0xa4, 0x11, 0xc2),
466 #define RC (&(Cx.q[256*N]))
467 0x18, 0x23, 0xc6, 0xe8, 0x87, 0xb8, 0x01, 0x4f,
468 /* rc[ROUNDS] */
469 0x36, 0xa6, 0xd2, 0xf5, 0x79, 0x6f, 0x91, 0x52, 0x60, 0xbc, 0x9b,
470 0x8e, 0xa3, 0x0c, 0x7b, 0x35, 0x1d, 0xe0, 0xd7, 0xc2, 0x2e, 0x4b,
471 0xfe, 0x57, 0x15, 0x77, 0x37, 0xe5, 0x9f, 0xf0, 0x4a, 0xda, 0x58,
472 0xc9, 0x29, 0x0a, 0xb1, 0xa0, 0x6b, 0x85, 0xbd, 0x5d, 0x10, 0xf4,
473 0xcb, 0x3e, 0x05, 0x67, 0xe4, 0x27, 0x41, 0x8b, 0xa7, 0x7d, 0x95,
474 0xd8, 0xfb, 0xee, 0x7c, 0x66, 0xdd, 0x17, 0x47, 0x9e, 0xca, 0x2d,
475 0xbf, 0x07, 0xad, 0x5a, 0x83, 0x33
476 }
477 };
478
479 void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n)
480 {
481 int r;
482 const u8 *p = inp;
483 union {
484 u64 q[8];
485 u8 c[64];
486 } S, K, *H = (void *)ctx->H.q;
487
488 #ifdef GO_FOR_MMX
489 GO_FOR_MMX(ctx, inp, n);
490 #endif
491 do {
492 #ifdef OPENSSL_SMALL_FOOTPRINT
493 u64 L[8];
494 int i;
495
496 for (i = 0; i < 64; i++)
497 S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
498 for (r = 0; r < ROUNDS; r++) {
499 for (i = 0; i < 8; i++) {
500 L[i] = i ? 0 : RC[r];
501 L[i] ^= C0(K, i) ^ C1(K, (i - 1) & 7) ^
502 C2(K, (i - 2) & 7) ^ C3(K, (i - 3) & 7) ^
503 C4(K, (i - 4) & 7) ^ C5(K, (i - 5) & 7) ^
504 C6(K, (i - 6) & 7) ^ C7(K, (i - 7) & 7);
505 }
506 memcpy(K.q, L, 64);
507 for (i = 0; i < 8; i++) {
508 L[i] ^= C0(S, i) ^ C1(S, (i - 1) & 7) ^
509 C2(S, (i - 2) & 7) ^ C3(S, (i - 3) & 7) ^
510 C4(S, (i - 4) & 7) ^ C5(S, (i - 5) & 7) ^
511 C6(S, (i - 6) & 7) ^ C7(S, (i - 7) & 7);
512 }
513 memcpy(S.q, L, 64);
514 }
515 for (i = 0; i < 64; i++)
516 H->c[i] ^= S.c[i] ^ p[i];
517 #else
518 u64 L0, L1, L2, L3, L4, L5, L6, L7;
519
520 # ifdef STRICT_ALIGNMENT
521 if ((size_t)p & 7) {
522 memcpy(S.c, p, 64);
523 S.q[0] ^= (K.q[0] = H->q[0]);
524 S.q[1] ^= (K.q[1] = H->q[1]);
525 S.q[2] ^= (K.q[2] = H->q[2]);
526 S.q[3] ^= (K.q[3] = H->q[3]);
527 S.q[4] ^= (K.q[4] = H->q[4]);
528 S.q[5] ^= (K.q[5] = H->q[5]);
529 S.q[6] ^= (K.q[6] = H->q[6]);
530 S.q[7] ^= (K.q[7] = H->q[7]);
531 } else
532 # endif
533 {
534 const u64 *pa = (const u64 *)p;
535 S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
536 S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
537 S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
538 S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
539 S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
540 S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
541 S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
542 S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
543 }
544
545 for (r = 0; r < ROUNDS; r++) {
546 # ifdef SMALL_REGISTER_BANK
547 L0 = C0(K, 0) ^ C1(K, 7) ^ C2(K, 6) ^ C3(K, 5) ^
548 C4(K, 4) ^ C5(K, 3) ^ C6(K, 2) ^ C7(K, 1) ^ RC[r];
549 L1 = C0(K, 1) ^ C1(K, 0) ^ C2(K, 7) ^ C3(K, 6) ^
550 C4(K, 5) ^ C5(K, 4) ^ C6(K, 3) ^ C7(K, 2);
551 L2 = C0(K, 2) ^ C1(K, 1) ^ C2(K, 0) ^ C3(K, 7) ^
552 C4(K, 6) ^ C5(K, 5) ^ C6(K, 4) ^ C7(K, 3);
553 L3 = C0(K, 3) ^ C1(K, 2) ^ C2(K, 1) ^ C3(K, 0) ^
554 C4(K, 7) ^ C5(K, 6) ^ C6(K, 5) ^ C7(K, 4);
555 L4 = C0(K, 4) ^ C1(K, 3) ^ C2(K, 2) ^ C3(K, 1) ^
556 C4(K, 0) ^ C5(K, 7) ^ C6(K, 6) ^ C7(K, 5);
557 L5 = C0(K, 5) ^ C1(K, 4) ^ C2(K, 3) ^ C3(K, 2) ^
558 C4(K, 1) ^ C5(K, 0) ^ C6(K, 7) ^ C7(K, 6);
559 L6 = C0(K, 6) ^ C1(K, 5) ^ C2(K, 4) ^ C3(K, 3) ^
560 C4(K, 2) ^ C5(K, 1) ^ C6(K, 0) ^ C7(K, 7);
561 L7 = C0(K, 7) ^ C1(K, 6) ^ C2(K, 5) ^ C3(K, 4) ^
562 C4(K, 3) ^ C5(K, 2) ^ C6(K, 1) ^ C7(K, 0);
563
564 K.q[0] = L0;
565 K.q[1] = L1;
566 K.q[2] = L2;
567 K.q[3] = L3;
568 K.q[4] = L4;
569 K.q[5] = L5;
570 K.q[6] = L6;
571 K.q[7] = L7;
572
573 L0 ^= C0(S, 0) ^ C1(S, 7) ^ C2(S, 6) ^ C3(S, 5) ^
574 C4(S, 4) ^ C5(S, 3) ^ C6(S, 2) ^ C7(S, 1);
575 L1 ^= C0(S, 1) ^ C1(S, 0) ^ C2(S, 7) ^ C3(S, 6) ^
576 C4(S, 5) ^ C5(S, 4) ^ C6(S, 3) ^ C7(S, 2);
577 L2 ^= C0(S, 2) ^ C1(S, 1) ^ C2(S, 0) ^ C3(S, 7) ^
578 C4(S, 6) ^ C5(S, 5) ^ C6(S, 4) ^ C7(S, 3);
579 L3 ^= C0(S, 3) ^ C1(S, 2) ^ C2(S, 1) ^ C3(S, 0) ^
580 C4(S, 7) ^ C5(S, 6) ^ C6(S, 5) ^ C7(S, 4);
581 L4 ^= C0(S, 4) ^ C1(S, 3) ^ C2(S, 2) ^ C3(S, 1) ^
582 C4(S, 0) ^ C5(S, 7) ^ C6(S, 6) ^ C7(S, 5);
583 L5 ^= C0(S, 5) ^ C1(S, 4) ^ C2(S, 3) ^ C3(S, 2) ^
584 C4(S, 1) ^ C5(S, 0) ^ C6(S, 7) ^ C7(S, 6);
585 L6 ^= C0(S, 6) ^ C1(S, 5) ^ C2(S, 4) ^ C3(S, 3) ^
586 C4(S, 2) ^ C5(S, 1) ^ C6(S, 0) ^ C7(S, 7);
587 L7 ^= C0(S, 7) ^ C1(S, 6) ^ C2(S, 5) ^ C3(S, 4) ^
588 C4(S, 3) ^ C5(S, 2) ^ C6(S, 1) ^ C7(S, 0);
589
590 S.q[0] = L0;
591 S.q[1] = L1;
592 S.q[2] = L2;
593 S.q[3] = L3;
594 S.q[4] = L4;
595 S.q[5] = L5;
596 S.q[6] = L6;
597 S.q[7] = L7;
598 # else
599 L0 = C0(K, 0);
600 L1 = C1(K, 0);
601 L2 = C2(K, 0);
602 L3 = C3(K, 0);
603 L4 = C4(K, 0);
604 L5 = C5(K, 0);
605 L6 = C6(K, 0);
606 L7 = C7(K, 0);
607 L0 ^= RC[r];
608
609 L1 ^= C0(K, 1);
610 L2 ^= C1(K, 1);
611 L3 ^= C2(K, 1);
612 L4 ^= C3(K, 1);
613 L5 ^= C4(K, 1);
614 L6 ^= C5(K, 1);
615 L7 ^= C6(K, 1);
616 L0 ^= C7(K, 1);
617
618 L2 ^= C0(K, 2);
619 L3 ^= C1(K, 2);
620 L4 ^= C2(K, 2);
621 L5 ^= C3(K, 2);
622 L6 ^= C4(K, 2);
623 L7 ^= C5(K, 2);
624 L0 ^= C6(K, 2);
625 L1 ^= C7(K, 2);
626
627 L3 ^= C0(K, 3);
628 L4 ^= C1(K, 3);
629 L5 ^= C2(K, 3);
630 L6 ^= C3(K, 3);
631 L7 ^= C4(K, 3);
632 L0 ^= C5(K, 3);
633 L1 ^= C6(K, 3);
634 L2 ^= C7(K, 3);
635
636 L4 ^= C0(K, 4);
637 L5 ^= C1(K, 4);
638 L6 ^= C2(K, 4);
639 L7 ^= C3(K, 4);
640 L0 ^= C4(K, 4);
641 L1 ^= C5(K, 4);
642 L2 ^= C6(K, 4);
643 L3 ^= C7(K, 4);
644
645 L5 ^= C0(K, 5);
646 L6 ^= C1(K, 5);
647 L7 ^= C2(K, 5);
648 L0 ^= C3(K, 5);
649 L1 ^= C4(K, 5);
650 L2 ^= C5(K, 5);
651 L3 ^= C6(K, 5);
652 L4 ^= C7(K, 5);
653
654 L6 ^= C0(K, 6);
655 L7 ^= C1(K, 6);
656 L0 ^= C2(K, 6);
657 L1 ^= C3(K, 6);
658 L2 ^= C4(K, 6);
659 L3 ^= C5(K, 6);
660 L4 ^= C6(K, 6);
661 L5 ^= C7(K, 6);
662
663 L7 ^= C0(K, 7);
664 L0 ^= C1(K, 7);
665 L1 ^= C2(K, 7);
666 L2 ^= C3(K, 7);
667 L3 ^= C4(K, 7);
668 L4 ^= C5(K, 7);
669 L5 ^= C6(K, 7);
670 L6 ^= C7(K, 7);
671
672 K.q[0] = L0;
673 K.q[1] = L1;
674 K.q[2] = L2;
675 K.q[3] = L3;
676 K.q[4] = L4;
677 K.q[5] = L5;
678 K.q[6] = L6;
679 K.q[7] = L7;
680
681 L0 ^= C0(S, 0);
682 L1 ^= C1(S, 0);
683 L2 ^= C2(S, 0);
684 L3 ^= C3(S, 0);
685 L4 ^= C4(S, 0);
686 L5 ^= C5(S, 0);
687 L6 ^= C6(S, 0);
688 L7 ^= C7(S, 0);
689
690 L1 ^= C0(S, 1);
691 L2 ^= C1(S, 1);
692 L3 ^= C2(S, 1);
693 L4 ^= C3(S, 1);
694 L5 ^= C4(S, 1);
695 L6 ^= C5(S, 1);
696 L7 ^= C6(S, 1);
697 L0 ^= C7(S, 1);
698
699 L2 ^= C0(S, 2);
700 L3 ^= C1(S, 2);
701 L4 ^= C2(S, 2);
702 L5 ^= C3(S, 2);
703 L6 ^= C4(S, 2);
704 L7 ^= C5(S, 2);
705 L0 ^= C6(S, 2);
706 L1 ^= C7(S, 2);
707
708 L3 ^= C0(S, 3);
709 L4 ^= C1(S, 3);
710 L5 ^= C2(S, 3);
711 L6 ^= C3(S, 3);
712 L7 ^= C4(S, 3);
713 L0 ^= C5(S, 3);
714 L1 ^= C6(S, 3);
715 L2 ^= C7(S, 3);
716
717 L4 ^= C0(S, 4);
718 L5 ^= C1(S, 4);
719 L6 ^= C2(S, 4);
720 L7 ^= C3(S, 4);
721 L0 ^= C4(S, 4);
722 L1 ^= C5(S, 4);
723 L2 ^= C6(S, 4);
724 L3 ^= C7(S, 4);
725
726 L5 ^= C0(S, 5);
727 L6 ^= C1(S, 5);
728 L7 ^= C2(S, 5);
729 L0 ^= C3(S, 5);
730 L1 ^= C4(S, 5);
731 L2 ^= C5(S, 5);
732 L3 ^= C6(S, 5);
733 L4 ^= C7(S, 5);
734
735 L6 ^= C0(S, 6);
736 L7 ^= C1(S, 6);
737 L0 ^= C2(S, 6);
738 L1 ^= C3(S, 6);
739 L2 ^= C4(S, 6);
740 L3 ^= C5(S, 6);
741 L4 ^= C6(S, 6);
742 L5 ^= C7(S, 6);
743
744 L7 ^= C0(S, 7);
745 L0 ^= C1(S, 7);
746 L1 ^= C2(S, 7);
747 L2 ^= C3(S, 7);
748 L3 ^= C4(S, 7);
749 L4 ^= C5(S, 7);
750 L5 ^= C6(S, 7);
751 L6 ^= C7(S, 7);
752
753 S.q[0] = L0;
754 S.q[1] = L1;
755 S.q[2] = L2;
756 S.q[3] = L3;
757 S.q[4] = L4;
758 S.q[5] = L5;
759 S.q[6] = L6;
760 S.q[7] = L7;
761 # endif
762 }
763
764 # ifdef STRICT_ALIGNMENT
765 if ((size_t)p & 7) {
766 int i;
767 for (i = 0; i < 64; i++)
768 H->c[i] ^= S.c[i] ^ p[i];
769 } else
770 # endif
771 {
772 const u64 *pa = (const u64 *)p;
773 H->q[0] ^= S.q[0] ^ pa[0];
774 H->q[1] ^= S.q[1] ^ pa[1];
775 H->q[2] ^= S.q[2] ^ pa[2];
776 H->q[3] ^= S.q[3] ^ pa[3];
777 H->q[4] ^= S.q[4] ^ pa[4];
778 H->q[5] ^= S.q[5] ^ pa[5];
779 H->q[6] ^= S.q[6] ^ pa[6];
780 H->q[7] ^= S.q[7] ^ pa[7];
781 }
782 #endif
783 p += 64;
784 } while (--n);
785 }