2018-01-01 Niels Möller <nisse@lysator.liu.se>
+ * skein.h (_SKEIN256_NKEYS): Reduce from 6 to 5 (repeated subkeys
+ no longer used).
+ * skein256.c (_skein256_expand): Update accordingly.
+ (skein256_process_block): Use uint64_t rather than unsigned long
+ long in cast.
+ * skein256-internal.c (_skein256_block): Micro optimization of subkey rotation.
+
* configure.ac: Don't use skein512-internal.asm, it brings no real
speedup.
#define SKEIN256_BLOCK_SIZE 32
#define SKEIN256_DIGEST_SIZE 32
-/* Internal lengths, as 64-bit words. We use *two* redundant words for
- the key, to reduce the number of index mod operations. On the other
- hand, tweak words are expanded on the fly. */
+/* Internal lengths, as 64-bit words. We store the redundant xor sum
+ of the subkeys as an extra subkey. On the other hand, tweak words
+ are expanded on the fly. */
#define _SKEIN256_LENGTH 4
-#define _SKEIN256_NKEYS 6
+#define _SKEIN256_NKEYS 5
#define _SKEIN_NTWEAK 2
struct skein256_ctx {
uint64_t state[_SKEIN256_NKEYS];
/* Current implementation limited to message size <= 2^69 - 32 bytes,
- while the specification allows up to 2^96 -1 bytes.*/
+ while the specification allows up to 2^96 - 1 bytes.*/
uint64_t count; /* Block count */
uint8_t block[SKEIN256_BLOCK_SIZE];
unsigned index;
ROUND(w0, w1, w2, w3, 23, 40);
ROUND(w0, w3, w2, w1, 5, 37);
- w0 += k1 - t0; /* Right-hand side equal to new k4, below. */
+ tmp = k1 - t0; // New k4.
+ w0 += tmp;
w1 += k2;
t0 ^= t1;
- w2 += k3 + t0; /* Right-hand side equal to new k1, below. */
- w3 += k4 + i + 1;
-
- tmp = k1;
k1 = k3 + t0;
+ w2 += k1;
+ w3 += k4 + i + 1;
k3 = k0;
k0 = k2 - t1;
t1 ^= t0;
k2 = k4 + t1;
- k4 = tmp - t1;
+ k4 = tmp;
ROUND(w0, w1, w2, w3, 25, 33);
ROUND(w0, w3, w2, w1, 46, 12);
for (i = 0, sum = _SKEIN_C240; i < _SKEIN256_LENGTH; i++)
sum ^= keys[i];
keys[_SKEIN256_LENGTH] = sum;
- keys[_SKEIN256_LENGTH + 1] = keys[0];
}
void
tag |= ((ctx->count == 0) << 6);
tweak[0] = (ctx->count << 5) + length;
- tweak[1] = (ctx->count >> 59) | ((unsigned long long) tag << 56);
+ tweak[1] = (ctx->count >> 59) | ((uint64_t) tag << 56);
_skein256_expand(ctx->state);
_skein256_block(ctx->state, ctx->state, tweak, data);