]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-47098: Replace Keccak Code Package with tiny_sha3 (GH-32060)
authorChristian Heimes <christian@python.org>
Sat, 26 Mar 2022 20:36:08 +0000 (22:36 +0200)
committerGitHub <noreply@github.com>
Sat, 26 Mar 2022 20:36:08 +0000 (21:36 +0100)
25 files changed:
Doc/whatsnew/3.11.rst
Makefile.pre.in
Misc/NEWS.d/next/Library/2022-03-23-10-07-41.bpo-47098.7AN_qp.rst [new file with mode: 0644]
Modules/_sha3/LICENSE [new file with mode: 0644]
Modules/_sha3/README.txt
Modules/_sha3/cleanup.py [deleted file]
Modules/_sha3/kcp/KeccakHash.c [deleted file]
Modules/_sha3/kcp/KeccakHash.h [deleted file]
Modules/_sha3/kcp/KeccakP-1600-64.macros [deleted file]
Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h [deleted file]
Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h [deleted file]
Modules/_sha3/kcp/KeccakP-1600-SnP.h [deleted file]
Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c [deleted file]
Modules/_sha3/kcp/KeccakP-1600-opt64-config.h [deleted file]
Modules/_sha3/kcp/KeccakP-1600-opt64.c [deleted file]
Modules/_sha3/kcp/KeccakP-1600-unrolling.macros [deleted file]
Modules/_sha3/kcp/KeccakSponge.c [deleted file]
Modules/_sha3/kcp/KeccakSponge.h [deleted file]
Modules/_sha3/kcp/KeccakSponge.inc [deleted file]
Modules/_sha3/kcp/PlSnP-Fallback.inc [deleted file]
Modules/_sha3/kcp/SnP-Relaned.h [deleted file]
Modules/_sha3/kcp/align.h [deleted file]
Modules/_sha3/sha3.c [new file with mode: 0644]
Modules/_sha3/sha3.h [new file with mode: 0644]
Modules/_sha3/sha3module.c

index 9a137f3ca99d84ad8192201bd94d7dcfd34a2fb2..41e4659b0f779f456abf625422bfac56d7a2d4fa 100644 (file)
@@ -252,6 +252,13 @@ hashlib
   over Python's vendored copy.
   (Contributed by Christian Heimes in :issue:`47095`.)
 
+* The internal ``_sha3`` module with SHA3 and SHAKE algorithms now uses
+  *tiny_sha3* instead of the *Keccak Code Package* to reduce code and binary
+  size. The :mod:`hashlib` module prefers optimized SHA3 and SHAKE
+  implementations from OpenSSL. The change affects only installations without
+  OpenSSL support.
+  (Contributed by Christian Heimes in :issue:`47098`.)
+
 IDLE and idlelib
 ----------------
 
index ff7442af316e53057982447be2af22fb1d5f7dc9..fb5dd6a066c7df919246c420010fea340a5ac3d7 100644 (file)
@@ -667,7 +667,6 @@ coverage-lcov:
            '*/Modules/_blake2/impl/*' \
            '*/Modules/_ctypes/libffi*/*' \
            '*/Modules/_decimal/libmpdec/*' \
-           '*/Modules/_sha3/kcp/*' \
            '*/Modules/expat/*' \
            '*/Modules/zlib/*' \
            '*/Include/*' \
@@ -2491,7 +2490,7 @@ MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
 MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__SHA256_DEPS=$(srcdir)/Modules/hashlib.h
-MODULE__SHA3_DEPS=$(srcdir)/Modules/_sha3/kcp/KeccakHash.c $(srcdir)/Modules/_sha3/kcp/KeccakHash.h $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-64.macros $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-SnP.h $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-opt64.c $(srcdir)/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros $(srcdir)/Modules/_sha3/kcp/KeccakSponge.c $(srcdir)/Modules/_sha3/kcp/KeccakSponge.h $(srcdir)/Modules/_sha3/kcp/KeccakSponge.inc $(srcdir)/Modules/_sha3/kcp/PlSnP-Fallback.inc $(srcdir)/Modules/_sha3/kcp/SnP-Relaned.h $(srcdir)/Modules/_sha3/kcp/align.h $(srcdir)/Modules/hashlib.h
+MODULE__SHA3_DEPS=$(srcdir)/Modules/_sha3/sha3.c $(srcdir)/Modules/_sha3/sha3.h $(srcdir)/Modules/hashlib.h
 MODULE__SHA512_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__SOCKET_DEPS=$(srcdir)/Modules/socketmodule.h
 MODULE__SSL_DEPS=$(srcdir)/Modules/_ssl.h $(srcdir)/Modules/_ssl/cert.c $(srcdir)/Modules/_ssl/debughelpers.c $(srcdir)/Modules/_ssl/misc.c $(srcdir)/Modules/_ssl_data.h $(srcdir)/Modules/_ssl_data_111.h $(srcdir)/Modules/_ssl_data_300.h $(srcdir)/Modules/socketmodule.h
diff --git a/Misc/NEWS.d/next/Library/2022-03-23-10-07-41.bpo-47098.7AN_qp.rst b/Misc/NEWS.d/next/Library/2022-03-23-10-07-41.bpo-47098.7AN_qp.rst
new file mode 100644 (file)
index 0000000..dbb71bf
--- /dev/null
@@ -0,0 +1,3 @@
+The Keccak Code Package for :mod:`hashlib`'s internal ``_sha3`` module has
+been replaced with tiny_sha3. The module is used as fallback when Python is
+built without OpenSSL.
diff --git a/Modules/_sha3/LICENSE b/Modules/_sha3/LICENSE
new file mode 100644 (file)
index 0000000..d2d484d
--- /dev/null
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Markku-Juhani O. Saarinen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
index e34b1d12f702fa45fd0a065b4dc2f1f54d95928d..b35919b01677d3d7796d4b0f808c0d62a235f0f2 100644 (file)
@@ -1,11 +1,8 @@
-Keccak Code Package
-===================
+tiny_sha3
+=========
 
-The files in kcp are taken from the Keccak Code Package. They have been
-slightly to be C89 compatible. The architecture specific header file
-KeccakP-1600-SnP.h ha been renamed to KeccakP-1600-SnP-opt32.h or
-KeccakP-1600-SnP-opt64.h.
-
-The 64bit files were generated with generic64lc/libkeccak.a.pack target, the
-32bit files with generic32lc/libkeccak.a.pack.
+https://github.com/mjosaarinen/tiny_sha3
+commit dcbb3192047c2a721f5f851db591871d428036a9
 
+- All functions have been converted to static functions.
+- sha3() function is commented out.
diff --git a/Modules/_sha3/cleanup.py b/Modules/_sha3/cleanup.py
deleted file mode 100755 (executable)
index 4f53681..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) 2012   Christian Heimes (christian@python.org)
-# Licensed to PSF under a Contributor Agreement.
-#
-# cleanup Keccak sources
-
-import os
-import re
-
-CPP1 = re.compile("^//(.*)")
-CPP2 = re.compile(r"\ //(.*)")
-
-STATICS = ("void ", "int ", "HashReturn ",
-           "const UINT64 ", "UINT16 ", "    int prefix##")
-
-HERE = os.path.dirname(os.path.abspath(__file__))
-KECCAK = os.path.join(HERE, "kcp")
-
-def getfiles():
-    for name in os.listdir(KECCAK):
-        name = os.path.join(KECCAK, name)
-        if os.path.isfile(name):
-            yield name
-
-def cleanup(f):
-    buf = []
-    for line in f:
-        # mark all functions and global data as static
-        #if line.startswith(STATICS):
-        #    buf.append("static " + line)
-        #    continue
-        # remove UINT64 typedef, we have our own
-        if line.startswith("typedef unsigned long long int"):
-            buf.append("/* %s */\n" % line.strip())
-            continue
-        ## remove #include "brg_endian.h"
-        if "brg_endian.h" in line:
-            buf.append("/* %s */\n" % line.strip())
-            continue
-        # transform C++ comments into ANSI C comments
-        line = CPP1.sub(r"/*\1 */\n", line)
-        line = CPP2.sub(r" /*\1 */\n", line)
-        buf.append(line)
-    return "".join(buf)
-
-for name in getfiles():
-    with open(name) as f:
-        res = cleanup(f)
-    with open(name, "w") as f:
-        f.write(res)
diff --git a/Modules/_sha3/kcp/KeccakHash.c b/Modules/_sha3/kcp/KeccakHash.c
deleted file mode 100644 (file)
index e09fb43..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#include <string.h>
-#include "KeccakHash.h"
-
-/* ---------------------------------------------------------------- */
-
-HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
-{
-    HashReturn result;
-
-    if (delimitedSuffix == 0)
-        return FAIL;
-    result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity);
-    if (result != SUCCESS)
-        return result;
-    instance->fixedOutputLength = hashbitlen;
-    instance->delimitedSuffix = delimitedSuffix;
-    return SUCCESS;
-}
-
-/* ---------------------------------------------------------------- */
-
-HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen)
-{
-    if ((databitlen % 8) == 0)
-        return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
-    else {
-        HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8);
-        if (ret == SUCCESS) {
-            /* The last partial byte is assumed to be aligned on the least significant bits */
-
-            unsigned char lastByte = data[databitlen/8];
-            /* Concatenate the last few bits provided here with those of the suffix */
-
-            unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8)));
-            if ((delimitedLastBytes & 0xFF00) == 0x0000) {
-                instance->delimitedSuffix = delimitedLastBytes & 0xFF;
-            }
-            else {
-                unsigned char oneByte[1];
-                oneByte[0] = delimitedLastBytes & 0xFF;
-                ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1);
-                instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF;
-            }
-        }
-        return ret;
-    }
-}
-
-/* ---------------------------------------------------------------- */
-
-HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval)
-{
-    HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
-    if (ret == SUCCESS)
-        return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
-    else
-        return ret;
-}
-
-/* ---------------------------------------------------------------- */
-
-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen)
-{
-    if ((databitlen % 8) != 0)
-        return FAIL;
-    return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8);
-}
diff --git a/Modules/_sha3/kcp/KeccakHash.h b/Modules/_sha3/kcp/KeccakHash.h
deleted file mode 100644 (file)
index bbd3dc6..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _KeccakHashInterface_h_
-#define _KeccakHashInterface_h_
-
-#ifndef KeccakP1600_excluded
-
-#include "KeccakSponge.h"
-#include <string.h>
-
-typedef unsigned char BitSequence;
-typedef size_t DataLength;
-typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
-
-typedef struct {
-    KeccakWidth1600_SpongeInstance sponge;
-    unsigned int fixedOutputLength;
-    unsigned char delimitedSuffix;
-} Keccak_HashInstance;
-
-/**
-  * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
-  * @param  hashInstance    Pointer to the hash instance to be initialized.
-  * @param  rate        The value of the rate r.
-  * @param  capacity    The value of the capacity c.
-  * @param  hashbitlen  The desired number of output bits,
-  *                     or 0 for an arbitrarily-long output.
-  * @param  delimitedSuffix Bits that will be automatically appended to the end
-  *                         of the input message, as in domain separation.
-  *                         This is a byte containing from 0 to 7 bits
-  *                         formatted like the @a delimitedData parameter of
-  *                         the Keccak_SpongeAbsorbLastFewBits() function.
-  * @pre    One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
-  * @return SUCCESS if successful, FAIL otherwise.
-  */
-HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
-
-/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHAKE128(hashInstance)        Keccak_HashInitialize(hashInstance, 1344,  256,   0, 0x1F)
-
-/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHAKE256(hashInstance)        Keccak_HashInitialize(hashInstance, 1088,  512,   0, 0x1F)
-
-/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHA3_224(hashInstance)        Keccak_HashInitialize(hashInstance, 1152,  448, 224, 0x06)
-
-/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHA3_256(hashInstance)        Keccak_HashInitialize(hashInstance, 1088,  512, 256, 0x06)
-
-/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHA3_384(hashInstance)        Keccak_HashInitialize(hashInstance,  832,  768, 384, 0x06)
-
-/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
-  */
-#define Keccak_HashInitialize_SHA3_512(hashInstance)        Keccak_HashInitialize(hashInstance,  576, 1024, 512, 0x06)
-
-/**
-  * Function to give input data to be absorbed.
-  * @param  hashInstance    Pointer to the hash instance initialized by Keccak_HashInitialize().
-  * @param  data        Pointer to the input data.
-  *                     When @a databitLen is not a multiple of 8, the last bits of data must be
-  *                     in the least significant bits of the last byte (little-endian convention).
-  * @param  databitLen  The number of input bits provided in the input data.
-  * @pre    In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8.
-  * @return SUCCESS if successful, FAIL otherwise.
-  */
-HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen);
-
-/**
-  * Function to call after all input blocks have been input and to get
-  * output bits if the length was specified when calling Keccak_HashInitialize().
-  * @param  hashInstance    Pointer to the hash instance initialized by Keccak_HashInitialize().
-  * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
-  *     output bits is equal to @a hashbitlen.
-  * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
-  *     must be extracted using the Keccak_HashSqueeze() function.
-  * @param  state       Pointer to the state of the sponge function initialized by Init().
-  * @param  hashval     Pointer to the buffer where to store the output data.
-  * @return SUCCESS if successful, FAIL otherwise.
-  */
-HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval);
-
- /**
-  * Function to squeeze output data.
-  * @param  hashInstance    Pointer to the hash instance initialized by Keccak_HashInitialize().
-  * @param  data        Pointer to the buffer where to store the output data.
-  * @param  databitlen  The number of output bits desired (must be a multiple of 8).
-  * @pre    Keccak_HashFinal() must have been already called.
-  * @pre    @a databitlen is a multiple of 8.
-  * @return SUCCESS if successful, FAIL otherwise.
-  */
-HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen);
-
-#endif
-
-#endif
diff --git a/Modules/_sha3/kcp/KeccakP-1600-64.macros b/Modules/_sha3/kcp/KeccakP-1600-64.macros
deleted file mode 100644 (file)
index 1f11fe3..0000000
+++ /dev/null
@@ -1,2208 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#define declareABCDE \
-    UINT64 Aba, Abe, Abi, Abo, Abu; \
-    UINT64 Aga, Age, Agi, Ago, Agu; \
-    UINT64 Aka, Ake, Aki, Ako, Aku; \
-    UINT64 Ama, Ame, Ami, Amo, Amu; \
-    UINT64 Asa, Ase, Asi, Aso, Asu; \
-    UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
-    UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
-    UINT64 Bka, Bke, Bki, Bko, Bku; \
-    UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
-    UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
-    UINT64 Ca, Ce, Ci, Co, Cu; \
-    UINT64 Da, De, Di, Do, Du; \
-    UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
-    UINT64 Ega, Ege, Egi, Ego, Egu; \
-    UINT64 Eka, Eke, Eki, Eko, Eku; \
-    UINT64 Ema, Eme, Emi, Emo, Emu; \
-    UINT64 Esa, Ese, Esi, Eso, Esu; \
-
-#define prepareTheta \
-    Ca = Aba^Aga^Aka^Ama^Asa; \
-    Ce = Abe^Age^Ake^Ame^Ase; \
-    Ci = Abi^Agi^Aki^Ami^Asi; \
-    Co = Abo^Ago^Ako^Amo^Aso; \
-    Cu = Abu^Agu^Aku^Amu^Asu; \
-
-#ifdef UseBebigokimisa
-/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */
-
-/* --- 64-bit lanes mapped to 64-bit words */
-
-#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
-    Da = Cu^ROL64(Ce, 1); \
-    De = Ca^ROL64(Ci, 1); \
-    Di = Ce^ROL64(Co, 1); \
-    Do = Ci^ROL64(Cu, 1); \
-    Du = Co^ROL64(Ca, 1); \
-\
-    A##ba ^= Da; \
-    Bba = A##ba; \
-    A##ge ^= De; \
-    Bbe = ROL64(A##ge, 44); \
-    A##ki ^= Di; \
-    Bbi = ROL64(A##ki, 43); \
-    A##mo ^= Do; \
-    Bbo = ROL64(A##mo, 21); \
-    A##su ^= Du; \
-    Bbu = ROL64(A##su, 14); \
-    E##ba =   Bba ^(  Bbe |  Bbi ); \
-    E##ba ^= KeccakF1600RoundConstants[i]; \
-    Ca = E##ba; \
-    E##be =   Bbe ^((~Bbi)|  Bbo ); \
-    Ce = E##be; \
-    E##bi =   Bbi ^(  Bbo &  Bbu ); \
-    Ci = E##bi; \
-    E##bo =   Bbo ^(  Bbu |  Bba ); \
-    Co = E##bo; \
-    E##bu =   Bbu ^(  Bba &  Bbe ); \
-    Cu = E##bu; \
-\
-    A##bo ^= Do; \
-    Bga = ROL64(A##bo, 28); \
-    A##gu ^= Du; \
-    Bge = ROL64(A##gu, 20); \
-    A##ka ^= Da; \
-    Bgi = ROL64(A##ka, 3); \
-    A##me ^= De; \
-    Bgo = ROL64(A##me, 45); \
-    A##si ^= Di; \
-    Bgu = ROL64(A##si, 61); \
-    E##ga =   Bga ^(  Bge |  Bgi ); \
-    Ca ^= E##ga; \
-    E##ge =   Bge ^(  Bgi &  Bgo ); \
-    Ce ^= E##ge; \
-    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
-    Ci ^= E##gi; \
-    E##go =   Bgo ^(  Bgu |  Bga ); \
-    Co ^= E##go; \
-    E##gu =   Bgu ^(  Bga &  Bge ); \
-    Cu ^= E##gu; \
-\
-    A##be ^= De; \
-    Bka = ROL64(A##be, 1); \
-    A##gi ^= Di; \
-    Bke = ROL64(A##gi, 6); \
-    A##ko ^= Do; \
-    Bki = ROL64(A##ko, 25); \
-    A##mu ^= Du; \
-    Bko = ROL64(A##mu, 8); \
-    A##sa ^= Da; \
-    Bku = ROL64(A##sa, 18); \
-    E##ka =   Bka ^(  Bke |  Bki ); \
-    Ca ^= E##ka; \
-    E##ke =   Bke ^(  Bki &  Bko ); \
-    Ce ^= E##ke; \
-    E##ki =   Bki ^((~Bko)&  Bku ); \
-    Ci ^= E##ki; \
-    E##ko = (~Bko)^(  Bku |  Bka ); \
-    Co ^= E##ko; \
-    E##ku =   Bku ^(  Bka &  Bke ); \
-    Cu ^= E##ku; \
-\
-    A##bu ^= Du; \
-    Bma = ROL64(A##bu, 27); \
-    A##ga ^= Da; \
-    Bme = ROL64(A##ga, 36); \
-    A##ke ^= De; \
-    Bmi = ROL64(A##ke, 10); \
-    A##mi ^= Di; \
-    Bmo = ROL64(A##mi, 15); \
-    A##so ^= Do; \
-    Bmu = ROL64(A##so, 56); \
-    E##ma =   Bma ^(  Bme &  Bmi ); \
-    Ca ^= E##ma; \
-    E##me =   Bme ^(  Bmi |  Bmo ); \
-    Ce ^= E##me; \
-    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
-    Ci ^= E##mi; \
-    E##mo = (~Bmo)^(  Bmu &  Bma ); \
-    Co ^= E##mo; \
-    E##mu =   Bmu ^(  Bma |  Bme ); \
-    Cu ^= E##mu; \
-\
-    A##bi ^= Di; \
-    Bsa = ROL64(A##bi, 62); \
-    A##go ^= Do; \
-    Bse = ROL64(A##go, 55); \
-    A##ku ^= Du; \
-    Bsi = ROL64(A##ku, 39); \
-    A##ma ^= Da; \
-    Bso = ROL64(A##ma, 41); \
-    A##se ^= De; \
-    Bsu = ROL64(A##se, 2); \
-    E##sa =   Bsa ^((~Bse)&  Bsi ); \
-    Ca ^= E##sa; \
-    E##se = (~Bse)^(  Bsi |  Bso ); \
-    Ce ^= E##se; \
-    E##si =   Bsi ^(  Bso &  Bsu ); \
-    Ci ^= E##si; \
-    E##so =   Bso ^(  Bsu |  Bsa ); \
-    Co ^= E##so; \
-    E##su =   Bsu ^(  Bsa &  Bse ); \
-    Cu ^= E##su; \
-\
-
-/* --- Code for round (lane complementing pattern 'bebigokimisa') */
-
-/* --- 64-bit lanes mapped to 64-bit words */
-
-#define thetaRhoPiChiIota(i, A, E) \
-    Da = Cu^ROL64(Ce, 1); \
-    De = Ca^ROL64(Ci, 1); \
-    Di = Ce^ROL64(Co, 1); \
-    Do = Ci^ROL64(Cu, 1); \
-    Du = Co^ROL64(Ca, 1); \
-\
-    A##ba ^= Da; \
-    Bba = A##ba; \
-    A##ge ^= De; \
-    Bbe = ROL64(A##ge, 44); \
-    A##ki ^= Di; \
-    Bbi = ROL64(A##ki, 43); \
-    A##mo ^= Do; \
-    Bbo = ROL64(A##mo, 21); \
-    A##su ^= Du; \
-    Bbu = ROL64(A##su, 14); \
-    E##ba =   Bba ^(  Bbe |  Bbi ); \
-    E##ba ^= KeccakF1600RoundConstants[i]; \
-    E##be =   Bbe ^((~Bbi)|  Bbo ); \
-    E##bi =   Bbi ^(  Bbo &  Bbu ); \
-    E##bo =   Bbo ^(  Bbu |  Bba ); \
-    E##bu =   Bbu ^(  Bba &  Bbe ); \
-\
-    A##bo ^= Do; \
-    Bga = ROL64(A##bo, 28); \
-    A##gu ^= Du; \
-    Bge = ROL64(A##gu, 20); \
-    A##ka ^= Da; \
-    Bgi = ROL64(A##ka, 3); \
-    A##me ^= De; \
-    Bgo = ROL64(A##me, 45); \
-    A##si ^= Di; \
-    Bgu = ROL64(A##si, 61); \
-    E##ga =   Bga ^(  Bge |  Bgi ); \
-    E##ge =   Bge ^(  Bgi &  Bgo ); \
-    E##gi =   Bgi ^(  Bgo |(~Bgu)); \
-    E##go =   Bgo ^(  Bgu |  Bga ); \
-    E##gu =   Bgu ^(  Bga &  Bge ); \
-\
-    A##be ^= De; \
-    Bka = ROL64(A##be, 1); \
-    A##gi ^= Di; \
-    Bke = ROL64(A##gi, 6); \
-    A##ko ^= Do; \
-    Bki = ROL64(A##ko, 25); \
-    A##mu ^= Du; \
-    Bko = ROL64(A##mu, 8); \
-    A##sa ^= Da; \
-    Bku = ROL64(A##sa, 18); \
-    E##ka =   Bka ^(  Bke |  Bki ); \
-    E##ke =   Bke ^(  Bki &  Bko ); \
-    E##ki =   Bki ^((~Bko)&  Bku ); \
-    E##ko = (~Bko)^(  Bku |  Bka ); \
-    E##ku =   Bku ^(  Bka &  Bke ); \
-\
-    A##bu ^= Du; \
-    Bma = ROL64(A##bu, 27); \
-    A##ga ^= Da; \
-    Bme = ROL64(A##ga, 36); \
-    A##ke ^= De; \
-    Bmi = ROL64(A##ke, 10); \
-    A##mi ^= Di; \
-    Bmo = ROL64(A##mi, 15); \
-    A##so ^= Do; \
-    Bmu = ROL64(A##so, 56); \
-    E##ma =   Bma ^(  Bme &  Bmi ); \
-    E##me =   Bme ^(  Bmi |  Bmo ); \
-    E##mi =   Bmi ^((~Bmo)|  Bmu ); \
-    E##mo = (~Bmo)^(  Bmu &  Bma ); \
-    E##mu =   Bmu ^(  Bma |  Bme ); \
-\
-    A##bi ^= Di; \
-    Bsa = ROL64(A##bi, 62); \
-    A##go ^= Do; \
-    Bse = ROL64(A##go, 55); \
-    A##ku ^= Du; \
-    Bsi = ROL64(A##ku, 39); \
-    A##ma ^= Da; \
-    Bso = ROL64(A##ma, 41); \
-    A##se ^= De; \
-    Bsu = ROL64(A##se, 2); \
-    E##sa =   Bsa ^((~Bse)&  Bsi ); \
-    E##se = (~Bse)^(  Bsi |  Bso ); \
-    E##si =   Bsi ^(  Bso &  Bsu ); \
-    E##so =   Bso ^(  Bsu |  Bsa ); \
-    E##su =   Bsu ^(  Bsa &  Bse ); \
-\
-
-#else /* UseBebigokimisa */
-
-/* --- Code for round, with prepare-theta */
-
-/* --- 64-bit lanes mapped to 64-bit words */
-
-#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
-    Da = Cu^ROL64(Ce, 1); \
-    De = Ca^ROL64(Ci, 1); \
-    Di = Ce^ROL64(Co, 1); \
-    Do = Ci^ROL64(Cu, 1); \
-    Du = Co^ROL64(Ca, 1); \
-\
-    A##ba ^= Da; \
-    Bba = A##ba; \
-    A##ge ^= De; \
-    Bbe = ROL64(A##ge, 44); \
-    A##ki ^= Di; \
-    Bbi = ROL64(A##ki, 43); \
-    A##mo ^= Do; \
-    Bbo = ROL64(A##mo, 21); \
-    A##su ^= Du; \
-    Bbu = ROL64(A##su, 14); \
-    E##ba =   Bba ^((~Bbe)&  Bbi ); \
-    E##ba ^= KeccakF1600RoundConstants[i]; \
-    Ca = E##ba; \
-    E##be =   Bbe ^((~Bbi)&  Bbo ); \
-    Ce = E##be; \
-    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
-    Ci = E##bi; \
-    E##bo =   Bbo ^((~Bbu)&  Bba ); \
-    Co = E##bo; \
-    E##bu =   Bbu ^((~Bba)&  Bbe ); \
-    Cu = E##bu; \
-\
-    A##bo ^= Do; \
-    Bga = ROL64(A##bo, 28); \
-    A##gu ^= Du; \
-    Bge = ROL64(A##gu, 20); \
-    A##ka ^= Da; \
-    Bgi = ROL64(A##ka, 3); \
-    A##me ^= De; \
-    Bgo = ROL64(A##me, 45); \
-    A##si ^= Di; \
-    Bgu = ROL64(A##si, 61); \
-    E##ga =   Bga ^((~Bge)&  Bgi ); \
-    Ca ^= E##ga; \
-    E##ge =   Bge ^((~Bgi)&  Bgo ); \
-    Ce ^= E##ge; \
-    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
-    Ci ^= E##gi; \
-    E##go =   Bgo ^((~Bgu)&  Bga ); \
-    Co ^= E##go; \
-    E##gu =   Bgu ^((~Bga)&  Bge ); \
-    Cu ^= E##gu; \
-\
-    A##be ^= De; \
-    Bka = ROL64(A##be, 1); \
-    A##gi ^= Di; \
-    Bke = ROL64(A##gi, 6); \
-    A##ko ^= Do; \
-    Bki = ROL64(A##ko, 25); \
-    A##mu ^= Du; \
-    Bko = ROL64(A##mu, 8); \
-    A##sa ^= Da; \
-    Bku = ROL64(A##sa, 18); \
-    E##ka =   Bka ^((~Bke)&  Bki ); \
-    Ca ^= E##ka; \
-    E##ke =   Bke ^((~Bki)&  Bko ); \
-    Ce ^= E##ke; \
-    E##ki =   Bki ^((~Bko)&  Bku ); \
-    Ci ^= E##ki; \
-    E##ko =   Bko ^((~Bku)&  Bka ); \
-    Co ^= E##ko; \
-    E##ku =   Bku ^((~Bka)&  Bke ); \
-    Cu ^= E##ku; \
-\
-    A##bu ^= Du; \
-    Bma = ROL64(A##bu, 27); \
-    A##ga ^= Da; \
-    Bme = ROL64(A##ga, 36); \
-    A##ke ^= De; \
-    Bmi = ROL64(A##ke, 10); \
-    A##mi ^= Di; \
-    Bmo = ROL64(A##mi, 15); \
-    A##so ^= Do; \
-    Bmu = ROL64(A##so, 56); \
-    E##ma =   Bma ^((~Bme)&  Bmi ); \
-    Ca ^= E##ma; \
-    E##me =   Bme ^((~Bmi)&  Bmo ); \
-    Ce ^= E##me; \
-    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
-    Ci ^= E##mi; \
-    E##mo =   Bmo ^((~Bmu)&  Bma ); \
-    Co ^= E##mo; \
-    E##mu =   Bmu ^((~Bma)&  Bme ); \
-    Cu ^= E##mu; \
-\
-    A##bi ^= Di; \
-    Bsa = ROL64(A##bi, 62); \
-    A##go ^= Do; \
-    Bse = ROL64(A##go, 55); \
-    A##ku ^= Du; \
-    Bsi = ROL64(A##ku, 39); \
-    A##ma ^= Da; \
-    Bso = ROL64(A##ma, 41); \
-    A##se ^= De; \
-    Bsu = ROL64(A##se, 2); \
-    E##sa =   Bsa ^((~Bse)&  Bsi ); \
-    Ca ^= E##sa; \
-    E##se =   Bse ^((~Bsi)&  Bso ); \
-    Ce ^= E##se; \
-    E##si =   Bsi ^((~Bso)&  Bsu ); \
-    Ci ^= E##si; \
-    E##so =   Bso ^((~Bsu)&  Bsa ); \
-    Co ^= E##so; \
-    E##su =   Bsu ^((~Bsa)&  Bse ); \
-    Cu ^= E##su; \
-\
-
-/* --- Code for round */
-
-/* --- 64-bit lanes mapped to 64-bit words */
-
-#define thetaRhoPiChiIota(i, A, E) \
-    Da = Cu^ROL64(Ce, 1); \
-    De = Ca^ROL64(Ci, 1); \
-    Di = Ce^ROL64(Co, 1); \
-    Do = Ci^ROL64(Cu, 1); \
-    Du = Co^ROL64(Ca, 1); \
-\
-    A##ba ^= Da; \
-    Bba = A##ba; \
-    A##ge ^= De; \
-    Bbe = ROL64(A##ge, 44); \
-    A##ki ^= Di; \
-    Bbi = ROL64(A##ki, 43); \
-    A##mo ^= Do; \
-    Bbo = ROL64(A##mo, 21); \
-    A##su ^= Du; \
-    Bbu = ROL64(A##su, 14); \
-    E##ba =   Bba ^((~Bbe)&  Bbi ); \
-    E##ba ^= KeccakF1600RoundConstants[i]; \
-    E##be =   Bbe ^((~Bbi)&  Bbo ); \
-    E##bi =   Bbi ^((~Bbo)&  Bbu ); \
-    E##bo =   Bbo ^((~Bbu)&  Bba ); \
-    E##bu =   Bbu ^((~Bba)&  Bbe ); \
-\
-    A##bo ^= Do; \
-    Bga = ROL64(A##bo, 28); \
-    A##gu ^= Du; \
-    Bge = ROL64(A##gu, 20); \
-    A##ka ^= Da; \
-    Bgi = ROL64(A##ka, 3); \
-    A##me ^= De; \
-    Bgo = ROL64(A##me, 45); \
-    A##si ^= Di; \
-    Bgu = ROL64(A##si, 61); \
-    E##ga =   Bga ^((~Bge)&  Bgi ); \
-    E##ge =   Bge ^((~Bgi)&  Bgo ); \
-    E##gi =   Bgi ^((~Bgo)&  Bgu ); \
-    E##go =   Bgo ^((~Bgu)&  Bga ); \
-    E##gu =   Bgu ^((~Bga)&  Bge ); \
-\
-    A##be ^= De; \
-    Bka = ROL64(A##be, 1); \
-    A##gi ^= Di; \
-    Bke = ROL64(A##gi, 6); \
-    A##ko ^= Do; \
-    Bki = ROL64(A##ko, 25); \
-    A##mu ^= Du; \
-    Bko = ROL64(A##mu, 8); \
-    A##sa ^= Da; \
-    Bku = ROL64(A##sa, 18); \
-    E##ka =   Bka ^((~Bke)&  Bki ); \
-    E##ke =   Bke ^((~Bki)&  Bko ); \
-    E##ki =   Bki ^((~Bko)&  Bku ); \
-    E##ko =   Bko ^((~Bku)&  Bka ); \
-    E##ku =   Bku ^((~Bka)&  Bke ); \
-\
-    A##bu ^= Du; \
-    Bma = ROL64(A##bu, 27); \
-    A##ga ^= Da; \
-    Bme = ROL64(A##ga, 36); \
-    A##ke ^= De; \
-    Bmi = ROL64(A##ke, 10); \
-    A##mi ^= Di; \
-    Bmo = ROL64(A##mi, 15); \
-    A##so ^= Do; \
-    Bmu = ROL64(A##so, 56); \
-    E##ma =   Bma ^((~Bme)&  Bmi ); \
-    E##me =   Bme ^((~Bmi)&  Bmo ); \
-    E##mi =   Bmi ^((~Bmo)&  Bmu ); \
-    E##mo =   Bmo ^((~Bmu)&  Bma ); \
-    E##mu =   Bmu ^((~Bma)&  Bme ); \
-\
-    A##bi ^= Di; \
-    Bsa = ROL64(A##bi, 62); \
-    A##go ^= Do; \
-    Bse = ROL64(A##go, 55); \
-    A##ku ^= Du; \
-    Bsi = ROL64(A##ku, 39); \
-    A##ma ^= Da; \
-    Bso = ROL64(A##ma, 41); \
-    A##se ^= De; \
-    Bsu = ROL64(A##se, 2); \
-    E##sa =   Bsa ^((~Bse)&  Bsi ); \
-    E##se =   Bse ^((~Bsi)&  Bso ); \
-    E##si =   Bsi ^((~Bso)&  Bsu ); \
-    E##so =   Bso ^((~Bsu)&  Bsa ); \
-    E##su =   Bsu ^((~Bsa)&  Bse ); \
-\
-
-#endif /* UseBebigokimisa */
-
-
-#define copyFromState(X, state) \
-    X##ba = state[ 0]; \
-    X##be = state[ 1]; \
-    X##bi = state[ 2]; \
-    X##bo = state[ 3]; \
-    X##bu = state[ 4]; \
-    X##ga = state[ 5]; \
-    X##ge = state[ 6]; \
-    X##gi = state[ 7]; \
-    X##go = state[ 8]; \
-    X##gu = state[ 9]; \
-    X##ka = state[10]; \
-    X##ke = state[11]; \
-    X##ki = state[12]; \
-    X##ko = state[13]; \
-    X##ku = state[14]; \
-    X##ma = state[15]; \
-    X##me = state[16]; \
-    X##mi = state[17]; \
-    X##mo = state[18]; \
-    X##mu = state[19]; \
-    X##sa = state[20]; \
-    X##se = state[21]; \
-    X##si = state[22]; \
-    X##so = state[23]; \
-    X##su = state[24]; \
-
-#define copyToState(state, X) \
-    state[ 0] = X##ba; \
-    state[ 1] = X##be; \
-    state[ 2] = X##bi; \
-    state[ 3] = X##bo; \
-    state[ 4] = X##bu; \
-    state[ 5] = X##ga; \
-    state[ 6] = X##ge; \
-    state[ 7] = X##gi; \
-    state[ 8] = X##go; \
-    state[ 9] = X##gu; \
-    state[10] = X##ka; \
-    state[11] = X##ke; \
-    state[12] = X##ki; \
-    state[13] = X##ko; \
-    state[14] = X##ku; \
-    state[15] = X##ma; \
-    state[16] = X##me; \
-    state[17] = X##mi; \
-    state[18] = X##mo; \
-    state[19] = X##mu; \
-    state[20] = X##sa; \
-    state[21] = X##se; \
-    state[22] = X##si; \
-    state[23] = X##so; \
-    state[24] = X##su; \
-
-#define copyStateVariables(X, Y) \
-    X##ba = Y##ba; \
-    X##be = Y##be; \
-    X##bi = Y##bi; \
-    X##bo = Y##bo; \
-    X##bu = Y##bu; \
-    X##ga = Y##ga; \
-    X##ge = Y##ge; \
-    X##gi = Y##gi; \
-    X##go = Y##go; \
-    X##gu = Y##gu; \
-    X##ka = Y##ka; \
-    X##ke = Y##ke; \
-    X##ki = Y##ki; \
-    X##ko = Y##ko; \
-    X##ku = Y##ku; \
-    X##ma = Y##ma; \
-    X##me = Y##me; \
-    X##mi = Y##mi; \
-    X##mo = Y##mo; \
-    X##mu = Y##mu; \
-    X##sa = Y##sa; \
-    X##se = Y##se; \
-    X##si = Y##si; \
-    X##so = Y##so; \
-    X##su = Y##su; \
-
-#define copyFromStateAndAdd(X, state, input, laneCount) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount < 1) { \
-                        X##ba = state[ 0]; \
-                    } \
-                    else { \
-                        X##ba = state[ 0]^input[ 0]; \
-                    } \
-                    X##be = state[ 1]; \
-                    X##bi = state[ 2]; \
-                } \
-                else { \
-                    X##ba = state[ 0]^input[ 0]; \
-                    X##be = state[ 1]^input[ 1]; \
-                    if (laneCount < 3) { \
-                        X##bi = state[ 2]; \
-                    } \
-                    else { \
-                        X##bi = state[ 2]^input[ 2]; \
-                    } \
-                } \
-                X##bo = state[ 3]; \
-                X##bu = state[ 4]; \
-                X##ga = state[ 5]; \
-                X##ge = state[ 6]; \
-            } \
-            else { \
-                X##ba = state[ 0]^input[ 0]; \
-                X##be = state[ 1]^input[ 1]; \
-                X##bi = state[ 2]^input[ 2]; \
-                X##bo = state[ 3]^input[ 3]; \
-                if (laneCount < 6) { \
-                    if (laneCount < 5) { \
-                        X##bu = state[ 4]; \
-                    } \
-                    else { \
-                        X##bu = state[ 4]^input[ 4]; \
-                    } \
-                    X##ga = state[ 5]; \
-                    X##ge = state[ 6]; \
-                } \
-                else { \
-                    X##bu = state[ 4]^input[ 4]; \
-                    X##ga = state[ 5]^input[ 5]; \
-                    if (laneCount < 7) { \
-                        X##ge = state[ 6]; \
-                    } \
-                    else { \
-                        X##ge = state[ 6]^input[ 6]; \
-                    } \
-                } \
-            } \
-            X##gi = state[ 7]; \
-            X##go = state[ 8]; \
-            X##gu = state[ 9]; \
-            X##ka = state[10]; \
-            X##ke = state[11]; \
-            X##ki = state[12]; \
-            X##ko = state[13]; \
-            X##ku = state[14]; \
-        } \
-        else { \
-            X##ba = state[ 0]^input[ 0]; \
-            X##be = state[ 1]^input[ 1]; \
-            X##bi = state[ 2]^input[ 2]; \
-            X##bo = state[ 3]^input[ 3]; \
-            X##bu = state[ 4]^input[ 4]; \
-            X##ga = state[ 5]^input[ 5]; \
-            X##ge = state[ 6]^input[ 6]; \
-            X##gi = state[ 7]^input[ 7]; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount < 9) { \
-                        X##go = state[ 8]; \
-                    } \
-                    else { \
-                        X##go = state[ 8]^input[ 8]; \
-                    } \
-                    X##gu = state[ 9]; \
-                    X##ka = state[10]; \
-                } \
-                else { \
-                    X##go = state[ 8]^input[ 8]; \
-                    X##gu = state[ 9]^input[ 9]; \
-                    if (laneCount < 11) { \
-                        X##ka = state[10]; \
-                    } \
-                    else { \
-                        X##ka = state[10]^input[10]; \
-                    } \
-                } \
-                X##ke = state[11]; \
-                X##ki = state[12]; \
-                X##ko = state[13]; \
-                X##ku = state[14]; \
-            } \
-            else { \
-                X##go = state[ 8]^input[ 8]; \
-                X##gu = state[ 9]^input[ 9]; \
-                X##ka = state[10]^input[10]; \
-                X##ke = state[11]^input[11]; \
-                if (laneCount < 14) { \
-                    if (laneCount < 13) { \
-                        X##ki = state[12]; \
-                    } \
-                    else { \
-                        X##ki = state[12]^input[12]; \
-                    } \
-                    X##ko = state[13]; \
-                    X##ku = state[14]; \
-                } \
-                else { \
-                    X##ki = state[12]^input[12]; \
-                    X##ko = state[13]^input[13]; \
-                    if (laneCount < 15) { \
-                        X##ku = state[14]; \
-                    } \
-                    else { \
-                        X##ku = state[14]^input[14]; \
-                    } \
-                } \
-            } \
-        } \
-        X##ma = state[15]; \
-        X##me = state[16]; \
-        X##mi = state[17]; \
-        X##mo = state[18]; \
-        X##mu = state[19]; \
-        X##sa = state[20]; \
-        X##se = state[21]; \
-        X##si = state[22]; \
-        X##so = state[23]; \
-        X##su = state[24]; \
-    } \
-    else { \
-        X##ba = state[ 0]^input[ 0]; \
-        X##be = state[ 1]^input[ 1]; \
-        X##bi = state[ 2]^input[ 2]; \
-        X##bo = state[ 3]^input[ 3]; \
-        X##bu = state[ 4]^input[ 4]; \
-        X##ga = state[ 5]^input[ 5]; \
-        X##ge = state[ 6]^input[ 6]; \
-        X##gi = state[ 7]^input[ 7]; \
-        X##go = state[ 8]^input[ 8]; \
-        X##gu = state[ 9]^input[ 9]; \
-        X##ka = state[10]^input[10]; \
-        X##ke = state[11]^input[11]; \
-        X##ki = state[12]^input[12]; \
-        X##ko = state[13]^input[13]; \
-        X##ku = state[14]^input[14]; \
-        X##ma = state[15]^input[15]; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount < 17) { \
-                        X##me = state[16]; \
-                    } \
-                    else { \
-                        X##me = state[16]^input[16]; \
-                    } \
-                    X##mi = state[17]; \
-                    X##mo = state[18]; \
-                } \
-                else { \
-                    X##me = state[16]^input[16]; \
-                    X##mi = state[17]^input[17]; \
-                    if (laneCount < 19) { \
-                        X##mo = state[18]; \
-                    } \
-                    else { \
-                        X##mo = state[18]^input[18]; \
-                    } \
-                } \
-                X##mu = state[19]; \
-                X##sa = state[20]; \
-                X##se = state[21]; \
-                X##si = state[22]; \
-            } \
-            else { \
-                X##me = state[16]^input[16]; \
-                X##mi = state[17]^input[17]; \
-                X##mo = state[18]^input[18]; \
-                X##mu = state[19]^input[19]; \
-                if (laneCount < 22) { \
-                    if (laneCount < 21) { \
-                        X##sa = state[20]; \
-                    } \
-                    else { \
-                        X##sa = state[20]^input[20]; \
-                    } \
-                    X##se = state[21]; \
-                    X##si = state[22]; \
-                } \
-                else { \
-                    X##sa = state[20]^input[20]; \
-                    X##se = state[21]^input[21]; \
-                    if (laneCount < 23) { \
-                        X##si = state[22]; \
-                    } \
-                    else { \
-                        X##si = state[22]^input[22]; \
-                    } \
-                } \
-            } \
-            X##so = state[23]; \
-            X##su = state[24]; \
-        } \
-        else { \
-            X##me = state[16]^input[16]; \
-            X##mi = state[17]^input[17]; \
-            X##mo = state[18]^input[18]; \
-            X##mu = state[19]^input[19]; \
-            X##sa = state[20]^input[20]; \
-            X##se = state[21]^input[21]; \
-            X##si = state[22]^input[22]; \
-            X##so = state[23]^input[23]; \
-            if (laneCount < 25) { \
-                X##su = state[24]; \
-            } \
-            else { \
-                X##su = state[24]^input[24]; \
-            } \
-        } \
-    }
-
-#define addInput(X, input, laneCount) \
-    if (laneCount == 21) { \
-        X##ba ^= input[ 0]; \
-        X##be ^= input[ 1]; \
-        X##bi ^= input[ 2]; \
-        X##bo ^= input[ 3]; \
-        X##bu ^= input[ 4]; \
-        X##ga ^= input[ 5]; \
-        X##ge ^= input[ 6]; \
-        X##gi ^= input[ 7]; \
-        X##go ^= input[ 8]; \
-        X##gu ^= input[ 9]; \
-        X##ka ^= input[10]; \
-        X##ke ^= input[11]; \
-        X##ki ^= input[12]; \
-        X##ko ^= input[13]; \
-        X##ku ^= input[14]; \
-        X##ma ^= input[15]; \
-        X##me ^= input[16]; \
-        X##mi ^= input[17]; \
-        X##mo ^= input[18]; \
-        X##mu ^= input[19]; \
-        X##sa ^= input[20]; \
-    } \
-    else if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount < 1) { \
-                    } \
-                    else { \
-                        X##ba ^= input[ 0]; \
-                    } \
-                } \
-                else { \
-                    X##ba ^= input[ 0]; \
-                    X##be ^= input[ 1]; \
-                    if (laneCount < 3) { \
-                    } \
-                    else { \
-                        X##bi ^= input[ 2]; \
-                    } \
-                } \
-            } \
-            else { \
-                X##ba ^= input[ 0]; \
-                X##be ^= input[ 1]; \
-                X##bi ^= input[ 2]; \
-                X##bo ^= input[ 3]; \
-                if (laneCount < 6) { \
-                    if (laneCount < 5) { \
-                    } \
-                    else { \
-                        X##bu ^= input[ 4]; \
-                    } \
-                } \
-                else { \
-                    X##bu ^= input[ 4]; \
-                    X##ga ^= input[ 5]; \
-                    if (laneCount < 7) { \
-                    } \
-                    else { \
-                        X##ge ^= input[ 6]; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            X##ba ^= input[ 0]; \
-            X##be ^= input[ 1]; \
-            X##bi ^= input[ 2]; \
-            X##bo ^= input[ 3]; \
-            X##bu ^= input[ 4]; \
-            X##ga ^= input[ 5]; \
-            X##ge ^= input[ 6]; \
-            X##gi ^= input[ 7]; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount < 9) { \
-                    } \
-                    else { \
-                        X##go ^= input[ 8]; \
-                    } \
-                } \
-                else { \
-                    X##go ^= input[ 8]; \
-                    X##gu ^= input[ 9]; \
-                    if (laneCount < 11) { \
-                    } \
-                    else { \
-                        X##ka ^= input[10]; \
-                    } \
-                } \
-            } \
-            else { \
-                X##go ^= input[ 8]; \
-                X##gu ^= input[ 9]; \
-                X##ka ^= input[10]; \
-                X##ke ^= input[11]; \
-                if (laneCount < 14) { \
-                    if (laneCount < 13) { \
-                    } \
-                    else { \
-                        X##ki ^= input[12]; \
-                    } \
-                } \
-                else { \
-                    X##ki ^= input[12]; \
-                    X##ko ^= input[13]; \
-                    if (laneCount < 15) { \
-                    } \
-                    else { \
-                        X##ku ^= input[14]; \
-                    } \
-                } \
-            } \
-        } \
-    } \
-    else { \
-        X##ba ^= input[ 0]; \
-        X##be ^= input[ 1]; \
-        X##bi ^= input[ 2]; \
-        X##bo ^= input[ 3]; \
-        X##bu ^= input[ 4]; \
-        X##ga ^= input[ 5]; \
-        X##ge ^= input[ 6]; \
-        X##gi ^= input[ 7]; \
-        X##go ^= input[ 8]; \
-        X##gu ^= input[ 9]; \
-        X##ka ^= input[10]; \
-        X##ke ^= input[11]; \
-        X##ki ^= input[12]; \
-        X##ko ^= input[13]; \
-        X##ku ^= input[14]; \
-        X##ma ^= input[15]; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount < 17) { \
-                    } \
-                    else { \
-                        X##me ^= input[16]; \
-                    } \
-                } \
-                else { \
-                    X##me ^= input[16]; \
-                    X##mi ^= input[17]; \
-                    if (laneCount < 19) { \
-                    } \
-                    else { \
-                        X##mo ^= input[18]; \
-                    } \
-                } \
-            } \
-            else { \
-                X##me ^= input[16]; \
-                X##mi ^= input[17]; \
-                X##mo ^= input[18]; \
-                X##mu ^= input[19]; \
-                if (laneCount < 22) { \
-                    if (laneCount < 21) { \
-                    } \
-                    else { \
-                        X##sa ^= input[20]; \
-                    } \
-                } \
-                else { \
-                    X##sa ^= input[20]; \
-                    X##se ^= input[21]; \
-                    if (laneCount < 23) { \
-                    } \
-                    else { \
-                        X##si ^= input[22]; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            X##me ^= input[16]; \
-            X##mi ^= input[17]; \
-            X##mo ^= input[18]; \
-            X##mu ^= input[19]; \
-            X##sa ^= input[20]; \
-            X##se ^= input[21]; \
-            X##si ^= input[22]; \
-            X##so ^= input[23]; \
-            if (laneCount < 25) { \
-            } \
-            else { \
-                X##su ^= input[24]; \
-            } \
-        } \
-    }
-
-#ifdef UseBebigokimisa
-
-#define copyToStateAndOutput(X, state, output, laneCount) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    state[ 0] = X##ba; \
-                    if (laneCount >= 1) { \
-                        output[ 0] = X##ba; \
-                    } \
-                    state[ 1] = X##be; \
-                    state[ 2] = X##bi; \
-                } \
-                else { \
-                    state[ 0] = X##ba; \
-                    output[ 0] = X##ba; \
-                    state[ 1] = X##be; \
-                    output[ 1] = ~X##be; \
-                    state[ 2] = X##bi; \
-                    if (laneCount >= 3) { \
-                        output[ 2] = ~X##bi; \
-                    } \
-                } \
-                state[ 3] = X##bo; \
-                state[ 4] = X##bu; \
-                state[ 5] = X##ga; \
-                state[ 6] = X##ge; \
-            } \
-            else { \
-                state[ 0] = X##ba; \
-                output[ 0] = X##ba; \
-                state[ 1] = X##be; \
-                output[ 1] = ~X##be; \
-                state[ 2] = X##bi; \
-                output[ 2] = ~X##bi; \
-                state[ 3] = X##bo; \
-                output[ 3] = X##bo; \
-                if (laneCount < 6) { \
-                    state[ 4] = X##bu; \
-                    if (laneCount >= 5) { \
-                        output[ 4] = X##bu; \
-                    } \
-                    state[ 5] = X##ga; \
-                    state[ 6] = X##ge; \
-                } \
-                else { \
-                    state[ 4] = X##bu; \
-                    output[ 4] = X##bu; \
-                    state[ 5] = X##ga; \
-                    output[ 5] = X##ga; \
-                    state[ 6] = X##ge; \
-                    if (laneCount >= 7) { \
-                        output[ 6] = X##ge; \
-                    } \
-                } \
-            } \
-            state[ 7] = X##gi; \
-            state[ 8] = X##go; \
-            state[ 9] = X##gu; \
-            state[10] = X##ka; \
-            state[11] = X##ke; \
-            state[12] = X##ki; \
-            state[13] = X##ko; \
-            state[14] = X##ku; \
-        } \
-        else { \
-            state[ 0] = X##ba; \
-            output[ 0] = X##ba; \
-            state[ 1] = X##be; \
-            output[ 1] = ~X##be; \
-            state[ 2] = X##bi; \
-            output[ 2] = ~X##bi; \
-            state[ 3] = X##bo; \
-            output[ 3] = X##bo; \
-            state[ 4] = X##bu; \
-            output[ 4] = X##bu; \
-            state[ 5] = X##ga; \
-            output[ 5] = X##ga; \
-            state[ 6] = X##ge; \
-            output[ 6] = X##ge; \
-            state[ 7] = X##gi; \
-            output[ 7] = X##gi; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    state[ 8] = X##go; \
-                    if (laneCount >= 9) { \
-                        output[ 8] = ~X##go; \
-                    } \
-                    state[ 9] = X##gu; \
-                    state[10] = X##ka; \
-                } \
-                else { \
-                    state[ 8] = X##go; \
-                    output[ 8] = ~X##go; \
-                    state[ 9] = X##gu; \
-                    output[ 9] = X##gu; \
-                    state[10] = X##ka; \
-                    if (laneCount >= 11) { \
-                        output[10] = X##ka; \
-                    } \
-                } \
-                state[11] = X##ke; \
-                state[12] = X##ki; \
-                state[13] = X##ko; \
-                state[14] = X##ku; \
-            } \
-            else { \
-                state[ 8] = X##go; \
-                output[ 8] = ~X##go; \
-                state[ 9] = X##gu; \
-                output[ 9] = X##gu; \
-                state[10] = X##ka; \
-                output[10] = X##ka; \
-                state[11] = X##ke; \
-                output[11] = X##ke; \
-                if (laneCount < 14) { \
-                    state[12] = X##ki; \
-                    if (laneCount >= 13) { \
-                        output[12] = ~X##ki; \
-                    } \
-                    state[13] = X##ko; \
-                    state[14] = X##ku; \
-                } \
-                else { \
-                    state[12] = X##ki; \
-                    output[12] = ~X##ki; \
-                    state[13] = X##ko; \
-                    output[13] = X##ko; \
-                    state[14] = X##ku; \
-                    if (laneCount >= 15) { \
-                        output[14] = X##ku; \
-                    } \
-                } \
-            } \
-        } \
-        state[15] = X##ma; \
-        state[16] = X##me; \
-        state[17] = X##mi; \
-        state[18] = X##mo; \
-        state[19] = X##mu; \
-        state[20] = X##sa; \
-        state[21] = X##se; \
-        state[22] = X##si; \
-        state[23] = X##so; \
-        state[24] = X##su; \
-    } \
-    else { \
-        state[ 0] = X##ba; \
-        output[ 0] = X##ba; \
-        state[ 1] = X##be; \
-        output[ 1] = ~X##be; \
-        state[ 2] = X##bi; \
-        output[ 2] = ~X##bi; \
-        state[ 3] = X##bo; \
-        output[ 3] = X##bo; \
-        state[ 4] = X##bu; \
-        output[ 4] = X##bu; \
-        state[ 5] = X##ga; \
-        output[ 5] = X##ga; \
-        state[ 6] = X##ge; \
-        output[ 6] = X##ge; \
-        state[ 7] = X##gi; \
-        output[ 7] = X##gi; \
-        state[ 8] = X##go; \
-        output[ 8] = ~X##go; \
-        state[ 9] = X##gu; \
-        output[ 9] = X##gu; \
-        state[10] = X##ka; \
-        output[10] = X##ka; \
-        state[11] = X##ke; \
-        output[11] = X##ke; \
-        state[12] = X##ki; \
-        output[12] = ~X##ki; \
-        state[13] = X##ko; \
-        output[13] = X##ko; \
-        state[14] = X##ku; \
-        output[14] = X##ku; \
-        state[15] = X##ma; \
-        output[15] = X##ma; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    state[16] = X##me; \
-                    if (laneCount >= 17) { \
-                        output[16] = X##me; \
-                    } \
-                    state[17] = X##mi; \
-                    state[18] = X##mo; \
-                } \
-                else { \
-                    state[16] = X##me; \
-                    output[16] = X##me; \
-                    state[17] = X##mi; \
-                    output[17] = ~X##mi; \
-                    state[18] = X##mo; \
-                    if (laneCount >= 19) { \
-                        output[18] = X##mo; \
-                    } \
-                } \
-                state[19] = X##mu; \
-                state[20] = X##sa; \
-                state[21] = X##se; \
-                state[22] = X##si; \
-            } \
-            else { \
-                state[16] = X##me; \
-                output[16] = X##me; \
-                state[17] = X##mi; \
-                output[17] = ~X##mi; \
-                state[18] = X##mo; \
-                output[18] = X##mo; \
-                state[19] = X##mu; \
-                output[19] = X##mu; \
-                if (laneCount < 22) { \
-                    state[20] = X##sa; \
-                    if (laneCount >= 21) { \
-                        output[20] = ~X##sa; \
-                    } \
-                    state[21] = X##se; \
-                    state[22] = X##si; \
-                } \
-                else { \
-                    state[20] = X##sa; \
-                    output[20] = ~X##sa; \
-                    state[21] = X##se; \
-                    output[21] = X##se; \
-                    state[22] = X##si; \
-                    if (laneCount >= 23) { \
-                        output[22] = X##si; \
-                    } \
-                } \
-            } \
-            state[23] = X##so; \
-            state[24] = X##su; \
-        } \
-        else { \
-            state[16] = X##me; \
-            output[16] = X##me; \
-            state[17] = X##mi; \
-            output[17] = ~X##mi; \
-            state[18] = X##mo; \
-            output[18] = X##mo; \
-            state[19] = X##mu; \
-            output[19] = X##mu; \
-            state[20] = X##sa; \
-            output[20] = ~X##sa; \
-            state[21] = X##se; \
-            output[21] = X##se; \
-            state[22] = X##si; \
-            output[22] = X##si; \
-            state[23] = X##so; \
-            output[23] = X##so; \
-            state[24] = X##su; \
-            if (laneCount >= 25) { \
-                output[24] = X##su; \
-            } \
-        } \
-    }
-
-#define output(X, output, laneCount) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount >= 1) { \
-                        output[ 0] = X##ba; \
-                    } \
-                } \
-                else { \
-                    output[ 0] = X##ba; \
-                    output[ 1] = ~X##be; \
-                    if (laneCount >= 3) { \
-                        output[ 2] = ~X##bi; \
-                    } \
-                } \
-            } \
-            else { \
-                output[ 0] = X##ba; \
-                output[ 1] = ~X##be; \
-                output[ 2] = ~X##bi; \
-                output[ 3] = X##bo; \
-                if (laneCount < 6) { \
-                    if (laneCount >= 5) { \
-                        output[ 4] = X##bu; \
-                    } \
-                } \
-                else { \
-                    output[ 4] = X##bu; \
-                    output[ 5] = X##ga; \
-                    if (laneCount >= 7) { \
-                        output[ 6] = X##ge; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            output[ 0] = X##ba; \
-            output[ 1] = ~X##be; \
-            output[ 2] = ~X##bi; \
-            output[ 3] = X##bo; \
-            output[ 4] = X##bu; \
-            output[ 5] = X##ga; \
-            output[ 6] = X##ge; \
-            output[ 7] = X##gi; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount >= 9) { \
-                        output[ 8] = ~X##go; \
-                    } \
-                } \
-                else { \
-                    output[ 8] = ~X##go; \
-                    output[ 9] = X##gu; \
-                    if (laneCount >= 11) { \
-                        output[10] = X##ka; \
-                    } \
-                } \
-            } \
-            else { \
-                output[ 8] = ~X##go; \
-                output[ 9] = X##gu; \
-                output[10] = X##ka; \
-                output[11] = X##ke; \
-                if (laneCount < 14) { \
-                    if (laneCount >= 13) { \
-                        output[12] = ~X##ki; \
-                    } \
-                } \
-                else { \
-                    output[12] = ~X##ki; \
-                    output[13] = X##ko; \
-                    if (laneCount >= 15) { \
-                        output[14] = X##ku; \
-                    } \
-                } \
-            } \
-        } \
-    } \
-    else { \
-        output[ 0] = X##ba; \
-        output[ 1] = ~X##be; \
-        output[ 2] = ~X##bi; \
-        output[ 3] = X##bo; \
-        output[ 4] = X##bu; \
-        output[ 5] = X##ga; \
-        output[ 6] = X##ge; \
-        output[ 7] = X##gi; \
-        output[ 8] = ~X##go; \
-        output[ 9] = X##gu; \
-        output[10] = X##ka; \
-        output[11] = X##ke; \
-        output[12] = ~X##ki; \
-        output[13] = X##ko; \
-        output[14] = X##ku; \
-        output[15] = X##ma; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount >= 17) { \
-                        output[16] = X##me; \
-                    } \
-                } \
-                else { \
-                    output[16] = X##me; \
-                    output[17] = ~X##mi; \
-                    if (laneCount >= 19) { \
-                        output[18] = X##mo; \
-                    } \
-                } \
-            } \
-            else { \
-                output[16] = X##me; \
-                output[17] = ~X##mi; \
-                output[18] = X##mo; \
-                output[19] = X##mu; \
-                if (laneCount < 22) { \
-                    if (laneCount >= 21) { \
-                        output[20] = ~X##sa; \
-                    } \
-                } \
-                else { \
-                    output[20] = ~X##sa; \
-                    output[21] = X##se; \
-                    if (laneCount >= 23) { \
-                        output[22] = X##si; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            output[16] = X##me; \
-            output[17] = ~X##mi; \
-            output[18] = X##mo; \
-            output[19] = X##mu; \
-            output[20] = ~X##sa; \
-            output[21] = X##se; \
-            output[22] = X##si; \
-            output[23] = X##so; \
-            if (laneCount >= 25) { \
-                output[24] = X##su; \
-            } \
-        } \
-    }
-
-#define wrapOne(X, input, output, index, name) \
-    X##name ^= input[index]; \
-    output[index] = X##name;
-
-#define wrapOneInvert(X, input, output, index, name) \
-    X##name ^= input[index]; \
-    output[index] = ~X##name;
-
-#define unwrapOne(X, input, output, index, name) \
-    output[index] = input[index] ^ X##name; \
-    X##name ^= output[index];
-
-#define unwrapOneInvert(X, input, output, index, name) \
-    output[index] = ~(input[index] ^ X##name); \
-    X##name ^= output[index]; \
-
-#else /* UseBebigokimisa */
-
-
-#define copyToStateAndOutput(X, state, output, laneCount) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    state[ 0] = X##ba; \
-                    if (laneCount >= 1) { \
-                        output[ 0] = X##ba; \
-                    } \
-                    state[ 1] = X##be; \
-                    state[ 2] = X##bi; \
-                } \
-                else { \
-                    state[ 0] = X##ba; \
-                    output[ 0] = X##ba; \
-                    state[ 1] = X##be; \
-                    output[ 1] = X##be; \
-                    state[ 2] = X##bi; \
-                    if (laneCount >= 3) { \
-                        output[ 2] = X##bi; \
-                    } \
-                } \
-                state[ 3] = X##bo; \
-                state[ 4] = X##bu; \
-                state[ 5] = X##ga; \
-                state[ 6] = X##ge; \
-            } \
-            else { \
-                state[ 0] = X##ba; \
-                output[ 0] = X##ba; \
-                state[ 1] = X##be; \
-                output[ 1] = X##be; \
-                state[ 2] = X##bi; \
-                output[ 2] = X##bi; \
-                state[ 3] = X##bo; \
-                output[ 3] = X##bo; \
-                if (laneCount < 6) { \
-                    state[ 4] = X##bu; \
-                    if (laneCount >= 5) { \
-                        output[ 4] = X##bu; \
-                    } \
-                    state[ 5] = X##ga; \
-                    state[ 6] = X##ge; \
-                } \
-                else { \
-                    state[ 4] = X##bu; \
-                    output[ 4] = X##bu; \
-                    state[ 5] = X##ga; \
-                    output[ 5] = X##ga; \
-                    state[ 6] = X##ge; \
-                    if (laneCount >= 7) { \
-                        output[ 6] = X##ge; \
-                    } \
-                } \
-            } \
-            state[ 7] = X##gi; \
-            state[ 8] = X##go; \
-            state[ 9] = X##gu; \
-            state[10] = X##ka; \
-            state[11] = X##ke; \
-            state[12] = X##ki; \
-            state[13] = X##ko; \
-            state[14] = X##ku; \
-        } \
-        else { \
-            state[ 0] = X##ba; \
-            output[ 0] = X##ba; \
-            state[ 1] = X##be; \
-            output[ 1] = X##be; \
-            state[ 2] = X##bi; \
-            output[ 2] = X##bi; \
-            state[ 3] = X##bo; \
-            output[ 3] = X##bo; \
-            state[ 4] = X##bu; \
-            output[ 4] = X##bu; \
-            state[ 5] = X##ga; \
-            output[ 5] = X##ga; \
-            state[ 6] = X##ge; \
-            output[ 6] = X##ge; \
-            state[ 7] = X##gi; \
-            output[ 7] = X##gi; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    state[ 8] = X##go; \
-                    if (laneCount >= 9) { \
-                        output[ 8] = X##go; \
-                    } \
-                    state[ 9] = X##gu; \
-                    state[10] = X##ka; \
-                } \
-                else { \
-                    state[ 8] = X##go; \
-                    output[ 8] = X##go; \
-                    state[ 9] = X##gu; \
-                    output[ 9] = X##gu; \
-                    state[10] = X##ka; \
-                    if (laneCount >= 11) { \
-                        output[10] = X##ka; \
-                    } \
-                } \
-                state[11] = X##ke; \
-                state[12] = X##ki; \
-                state[13] = X##ko; \
-                state[14] = X##ku; \
-            } \
-            else { \
-                state[ 8] = X##go; \
-                output[ 8] = X##go; \
-                state[ 9] = X##gu; \
-                output[ 9] = X##gu; \
-                state[10] = X##ka; \
-                output[10] = X##ka; \
-                state[11] = X##ke; \
-                output[11] = X##ke; \
-                if (laneCount < 14) { \
-                    state[12] = X##ki; \
-                    if (laneCount >= 13) { \
-                        output[12]= X##ki; \
-                    } \
-                    state[13] = X##ko; \
-                    state[14] = X##ku; \
-                } \
-                else { \
-                    state[12] = X##ki; \
-                    output[12]= X##ki; \
-                    state[13] = X##ko; \
-                    output[13] = X##ko; \
-                    state[14] = X##ku; \
-                    if (laneCount >= 15) { \
-                        output[14] = X##ku; \
-                    } \
-                } \
-            } \
-        } \
-        state[15] = X##ma; \
-        state[16] = X##me; \
-        state[17] = X##mi; \
-        state[18] = X##mo; \
-        state[19] = X##mu; \
-        state[20] = X##sa; \
-        state[21] = X##se; \
-        state[22] = X##si; \
-        state[23] = X##so; \
-        state[24] = X##su; \
-    } \
-    else { \
-        state[ 0] = X##ba; \
-        output[ 0] = X##ba; \
-        state[ 1] = X##be; \
-        output[ 1] = X##be; \
-        state[ 2] = X##bi; \
-        output[ 2] = X##bi; \
-        state[ 3] = X##bo; \
-        output[ 3] = X##bo; \
-        state[ 4] = X##bu; \
-        output[ 4] = X##bu; \
-        state[ 5] = X##ga; \
-        output[ 5] = X##ga; \
-        state[ 6] = X##ge; \
-        output[ 6] = X##ge; \
-        state[ 7] = X##gi; \
-        output[ 7] = X##gi; \
-        state[ 8] = X##go; \
-        output[ 8] = X##go; \
-        state[ 9] = X##gu; \
-        output[ 9] = X##gu; \
-        state[10] = X##ka; \
-        output[10] = X##ka; \
-        state[11] = X##ke; \
-        output[11] = X##ke; \
-        state[12] = X##ki; \
-        output[12]= X##ki; \
-        state[13] = X##ko; \
-        output[13] = X##ko; \
-        state[14] = X##ku; \
-        output[14] = X##ku; \
-        state[15] = X##ma; \
-        output[15] = X##ma; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    state[16] = X##me; \
-                    if (laneCount >= 17) { \
-                        output[16] = X##me; \
-                    } \
-                    state[17] = X##mi; \
-                    state[18] = X##mo; \
-                } \
-                else { \
-                    state[16] = X##me; \
-                    output[16] = X##me; \
-                    state[17] = X##mi; \
-                    output[17] = X##mi; \
-                    state[18] = X##mo; \
-                    if (laneCount >= 19) { \
-                        output[18] = X##mo; \
-                    } \
-                } \
-                state[19] = X##mu; \
-                state[20] = X##sa; \
-                state[21] = X##se; \
-                state[22] = X##si; \
-            } \
-            else { \
-                state[16] = X##me; \
-                output[16] = X##me; \
-                state[17] = X##mi; \
-                output[17] = X##mi; \
-                state[18] = X##mo; \
-                output[18] = X##mo; \
-                state[19] = X##mu; \
-                output[19] = X##mu; \
-                if (laneCount < 22) { \
-                    state[20] = X##sa; \
-                    if (laneCount >= 21) { \
-                        output[20] = X##sa; \
-                    } \
-                    state[21] = X##se; \
-                    state[22] = X##si; \
-                } \
-                else { \
-                    state[20] = X##sa; \
-                    output[20] = X##sa; \
-                    state[21] = X##se; \
-                    output[21] = X##se; \
-                    state[22] = X##si; \
-                    if (laneCount >= 23) { \
-                        output[22] = X##si; \
-                    } \
-                } \
-            } \
-            state[23] = X##so; \
-            state[24] = X##su; \
-        } \
-        else { \
-            state[16] = X##me; \
-            output[16] = X##me; \
-            state[17] = X##mi; \
-            output[17] = X##mi; \
-            state[18] = X##mo; \
-            output[18] = X##mo; \
-            state[19] = X##mu; \
-            output[19] = X##mu; \
-            state[20] = X##sa; \
-            output[20] = X##sa; \
-            state[21] = X##se; \
-            output[21] = X##se; \
-            state[22] = X##si; \
-            output[22] = X##si; \
-            state[23] = X##so; \
-            output[23] = X##so; \
-            state[24] = X##su; \
-            if (laneCount >= 25) { \
-                output[24] = X##su; \
-            } \
-        } \
-    }
-
-#define output(X, output, laneCount) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount >= 1) { \
-                        output[ 0] = X##ba; \
-                    } \
-                } \
-                else { \
-                    output[ 0] = X##ba; \
-                    output[ 1] = X##be; \
-                    if (laneCount >= 3) { \
-                        output[ 2] = X##bi; \
-                    } \
-                } \
-            } \
-            else { \
-                output[ 0] = X##ba; \
-                output[ 1] = X##be; \
-                output[ 2] = X##bi; \
-                output[ 3] = X##bo; \
-                if (laneCount < 6) { \
-                    if (laneCount >= 5) { \
-                        output[ 4] = X##bu; \
-                    } \
-                } \
-                else { \
-                    output[ 4] = X##bu; \
-                    output[ 5] = X##ga; \
-                    if (laneCount >= 7) { \
-                        output[ 6] = X##ge; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            output[ 0] = X##ba; \
-            output[ 1] = X##be; \
-            output[ 2] = X##bi; \
-            output[ 3] = X##bo; \
-            output[ 4] = X##bu; \
-            output[ 5] = X##ga; \
-            output[ 6] = X##ge; \
-            output[ 7] = X##gi; \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount >= 9) { \
-                        output[ 8] = X##go; \
-                    } \
-                } \
-                else { \
-                    output[ 8] = X##go; \
-                    output[ 9] = X##gu; \
-                    if (laneCount >= 11) { \
-                        output[10] = X##ka; \
-                    } \
-                } \
-            } \
-            else { \
-                output[ 8] = X##go; \
-                output[ 9] = X##gu; \
-                output[10] = X##ka; \
-                output[11] = X##ke; \
-                if (laneCount < 14) { \
-                    if (laneCount >= 13) { \
-                        output[12] = X##ki; \
-                    } \
-                } \
-                else { \
-                    output[12] = X##ki; \
-                    output[13] = X##ko; \
-                    if (laneCount >= 15) { \
-                        output[14] = X##ku; \
-                    } \
-                } \
-            } \
-        } \
-    } \
-    else { \
-        output[ 0] = X##ba; \
-        output[ 1] = X##be; \
-        output[ 2] = X##bi; \
-        output[ 3] = X##bo; \
-        output[ 4] = X##bu; \
-        output[ 5] = X##ga; \
-        output[ 6] = X##ge; \
-        output[ 7] = X##gi; \
-        output[ 8] = X##go; \
-        output[ 9] = X##gu; \
-        output[10] = X##ka; \
-        output[11] = X##ke; \
-        output[12] = X##ki; \
-        output[13] = X##ko; \
-        output[14] = X##ku; \
-        output[15] = X##ma; \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount >= 17) { \
-                        output[16] = X##me; \
-                    } \
-                } \
-                else { \
-                    output[16] = X##me; \
-                    output[17] = X##mi; \
-                    if (laneCount >= 19) { \
-                        output[18] = X##mo; \
-                    } \
-                } \
-            } \
-            else { \
-                output[16] = X##me; \
-                output[17] = X##mi; \
-                output[18] = X##mo; \
-                output[19] = X##mu; \
-                if (laneCount < 22) { \
-                    if (laneCount >= 21) { \
-                        output[20] = X##sa; \
-                    } \
-                } \
-                else { \
-                    output[20] = X##sa; \
-                    output[21] = X##se; \
-                    if (laneCount >= 23) { \
-                        output[22] = X##si; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            output[16] = X##me; \
-            output[17] = X##mi; \
-            output[18] = X##mo; \
-            output[19] = X##mu; \
-            output[20] = X##sa; \
-            output[21] = X##se; \
-            output[22] = X##si; \
-            output[23] = X##so; \
-            if (laneCount >= 25) { \
-                output[24] = X##su; \
-            } \
-        } \
-    }
-
-#define wrapOne(X, input, output, index, name) \
-    X##name ^= input[index]; \
-    output[index] = X##name;
-
-#define wrapOneInvert(X, input, output, index, name) \
-    X##name ^= input[index]; \
-    output[index] = X##name;
-
-#define unwrapOne(X, input, output, index, name) \
-    output[index] = input[index] ^ X##name; \
-    X##name ^= output[index];
-
-#define unwrapOneInvert(X, input, output, index, name) \
-    output[index] = input[index] ^ X##name; \
-    X##name ^= output[index];
-
-#endif
-
-#define wrap(X, input, output, laneCount, trailingBits) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount < 1) { \
-                        X##ba ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 0, ba) \
-                        X##be ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOne(X, input, output, 0, ba) \
-                    wrapOneInvert(X, input, output, 1, be) \
-                    if (laneCount < 3) { \
-                        X##bi ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOneInvert(X, input, output, 2, bi) \
-                        X##bo ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                wrapOne(X, input, output, 0, ba) \
-                wrapOneInvert(X, input, output, 1, be) \
-                wrapOneInvert(X, input, output, 2, bi) \
-                wrapOne(X, input, output, 3, bo) \
-                if (laneCount < 6) { \
-                    if (laneCount < 5) { \
-                        X##bu ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 4, bu) \
-                        X##ga ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOne(X, input, output, 4, bu) \
-                    wrapOne(X, input, output, 5, ga) \
-                    if (laneCount < 7) { \
-                        X##ge ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 6, ge) \
-                        X##gi ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            wrapOne(X, input, output, 0, ba) \
-            wrapOneInvert(X, input, output, 1, be) \
-            wrapOneInvert(X, input, output, 2, bi) \
-            wrapOne(X, input, output, 3, bo) \
-            wrapOne(X, input, output, 4, bu) \
-            wrapOne(X, input, output, 5, ga) \
-            wrapOne(X, input, output, 6, ge) \
-            wrapOne(X, input, output, 7, gi) \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount < 9) { \
-                        X##go ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOneInvert(X, input, output, 8, go) \
-                        X##gu ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOneInvert(X, input, output, 8, go) \
-                    wrapOne(X, input, output, 9, gu) \
-                    if (laneCount < 11) { \
-                        X##ka ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 10, ka) \
-                        X##ke ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                wrapOneInvert(X, input, output, 8, go) \
-                wrapOne(X, input, output, 9, gu) \
-                wrapOne(X, input, output, 10, ka) \
-                wrapOne(X, input, output, 11, ke) \
-                if (laneCount < 14) { \
-                    if (laneCount < 13) { \
-                        X##ki ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOneInvert(X, input, output, 12, ki) \
-                        X##ko ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOneInvert(X, input, output, 12, ki) \
-                    wrapOne(X, input, output, 13, ko) \
-                    if (laneCount < 15) { \
-                        X##ku ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 14, ku) \
-                        X##ma ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-    } \
-    else { \
-        wrapOne(X, input, output, 0, ba) \
-        wrapOneInvert(X, input, output, 1, be) \
-        wrapOneInvert(X, input, output, 2, bi) \
-        wrapOne(X, input, output, 3, bo) \
-        wrapOne(X, input, output, 4, bu) \
-        wrapOne(X, input, output, 5, ga) \
-        wrapOne(X, input, output, 6, ge) \
-        wrapOne(X, input, output, 7, gi) \
-        wrapOneInvert(X, input, output, 8, go) \
-        wrapOne(X, input, output, 9, gu) \
-        wrapOne(X, input, output, 10, ka) \
-        wrapOne(X, input, output, 11, ke) \
-        wrapOneInvert(X, input, output, 12, ki) \
-        wrapOne(X, input, output, 13, ko) \
-        wrapOne(X, input, output, 14, ku) \
-        wrapOne(X, input, output, 15, ma) \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount < 17) { \
-                        X##me ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 16, me) \
-                        X##mi ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOne(X, input, output, 16, me) \
-                    wrapOneInvert(X, input, output, 17, mi) \
-                    if (laneCount < 19) { \
-                        X##mo ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 18, mo) \
-                        X##mu ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                wrapOne(X, input, output, 16, me) \
-                wrapOneInvert(X, input, output, 17, mi) \
-                wrapOne(X, input, output, 18, mo) \
-                wrapOne(X, input, output, 19, mu) \
-                if (laneCount < 22) { \
-                    if (laneCount < 21) { \
-                        X##sa ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOneInvert(X, input, output, 20, sa) \
-                        X##se ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    wrapOneInvert(X, input, output, 20, sa) \
-                    wrapOne(X, input, output, 21, se) \
-                    if (laneCount < 23) { \
-                        X##si ^= trailingBits; \
-                    } \
-                    else { \
-                        wrapOne(X, input, output, 22, si) \
-                        X##so ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            wrapOne(X, input, output, 16, me) \
-            wrapOneInvert(X, input, output, 17, mi) \
-            wrapOne(X, input, output, 18, mo) \
-            wrapOne(X, input, output, 19, mu) \
-            wrapOneInvert(X, input, output, 20, sa) \
-            wrapOne(X, input, output, 21, se) \
-            wrapOne(X, input, output, 22, si) \
-            wrapOne(X, input, output, 23, so) \
-            if (laneCount < 25) { \
-                X##su ^= trailingBits; \
-            } \
-            else { \
-                wrapOne(X, input, output, 24, su) \
-            } \
-        } \
-    }
-
-#define unwrap(X, input, output, laneCount, trailingBits) \
-    if (laneCount < 16) { \
-        if (laneCount < 8) { \
-            if (laneCount < 4) { \
-                if (laneCount < 2) { \
-                    if (laneCount < 1) { \
-                        X##ba ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 0, ba) \
-                        X##be ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOne(X, input, output, 0, ba) \
-                    unwrapOneInvert(X, input, output, 1, be) \
-                    if (laneCount < 3) { \
-                        X##bi ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOneInvert(X, input, output, 2, bi) \
-                        X##bo ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                unwrapOne(X, input, output, 0, ba) \
-                unwrapOneInvert(X, input, output, 1, be) \
-                unwrapOneInvert(X, input, output, 2, bi) \
-                unwrapOne(X, input, output, 3, bo) \
-                if (laneCount < 6) { \
-                    if (laneCount < 5) { \
-                        X##bu ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 4, bu) \
-                        X##ga ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOne(X, input, output, 4, bu) \
-                    unwrapOne(X, input, output, 5, ga) \
-                    if (laneCount < 7) { \
-                        X##ge ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 6, ge) \
-                        X##gi ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            unwrapOne(X, input, output, 0, ba) \
-            unwrapOneInvert(X, input, output, 1, be) \
-            unwrapOneInvert(X, input, output, 2, bi) \
-            unwrapOne(X, input, output, 3, bo) \
-            unwrapOne(X, input, output, 4, bu) \
-            unwrapOne(X, input, output, 5, ga) \
-            unwrapOne(X, input, output, 6, ge) \
-            unwrapOne(X, input, output, 7, gi) \
-            if (laneCount < 12) { \
-                if (laneCount < 10) { \
-                    if (laneCount < 9) { \
-                        X##go ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOneInvert(X, input, output, 8, go) \
-                        X##gu ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOneInvert(X, input, output, 8, go) \
-                    unwrapOne(X, input, output, 9, gu) \
-                    if (laneCount < 11) { \
-                        X##ka ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 10, ka) \
-                        X##ke ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                unwrapOneInvert(X, input, output, 8, go) \
-                unwrapOne(X, input, output, 9, gu) \
-                unwrapOne(X, input, output, 10, ka) \
-                unwrapOne(X, input, output, 11, ke) \
-                if (laneCount < 14) { \
-                    if (laneCount < 13) { \
-                        X##ki ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOneInvert(X, input, output, 12, ki) \
-                        X##ko ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOneInvert(X, input, output, 12, ki) \
-                    unwrapOne(X, input, output, 13, ko) \
-                    if (laneCount < 15) { \
-                        X##ku ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 14, ku) \
-                        X##ma ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-    } \
-    else { \
-        unwrapOne(X, input, output, 0, ba) \
-        unwrapOneInvert(X, input, output, 1, be) \
-        unwrapOneInvert(X, input, output, 2, bi) \
-        unwrapOne(X, input, output, 3, bo) \
-        unwrapOne(X, input, output, 4, bu) \
-        unwrapOne(X, input, output, 5, ga) \
-        unwrapOne(X, input, output, 6, ge) \
-        unwrapOne(X, input, output, 7, gi) \
-        unwrapOneInvert(X, input, output, 8, go) \
-        unwrapOne(X, input, output, 9, gu) \
-        unwrapOne(X, input, output, 10, ka) \
-        unwrapOne(X, input, output, 11, ke) \
-        unwrapOneInvert(X, input, output, 12, ki) \
-        unwrapOne(X, input, output, 13, ko) \
-        unwrapOne(X, input, output, 14, ku) \
-        unwrapOne(X, input, output, 15, ma) \
-        if (laneCount < 24) { \
-            if (laneCount < 20) { \
-                if (laneCount < 18) { \
-                    if (laneCount < 17) { \
-                        X##me ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 16, me) \
-                        X##mi ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOne(X, input, output, 16, me) \
-                    unwrapOneInvert(X, input, output, 17, mi) \
-                    if (laneCount < 19) { \
-                        X##mo ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 18, mo) \
-                        X##mu ^= trailingBits; \
-                    } \
-                } \
-            } \
-            else { \
-                unwrapOne(X, input, output, 16, me) \
-                unwrapOneInvert(X, input, output, 17, mi) \
-                unwrapOne(X, input, output, 18, mo) \
-                unwrapOne(X, input, output, 19, mu) \
-                if (laneCount < 22) { \
-                    if (laneCount < 21) { \
-                        X##sa ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOneInvert(X, input, output, 20, sa) \
-                        X##se ^= trailingBits; \
-                    } \
-                } \
-                else { \
-                    unwrapOneInvert(X, input, output, 20, sa) \
-                    unwrapOne(X, input, output, 21, se) \
-                    if (laneCount < 23) { \
-                        X##si ^= trailingBits; \
-                    } \
-                    else { \
-                        unwrapOne(X, input, output, 22, si) \
-                        X##so ^= trailingBits; \
-                    } \
-                } \
-            } \
-        } \
-        else { \
-            unwrapOne(X, input, output, 16, me) \
-            unwrapOneInvert(X, input, output, 17, mi) \
-            unwrapOne(X, input, output, 18, mo) \
-            unwrapOne(X, input, output, 19, mu) \
-            unwrapOneInvert(X, input, output, 20, sa) \
-            unwrapOne(X, input, output, 21, se) \
-            unwrapOne(X, input, output, 22, si) \
-            unwrapOne(X, input, output, 23, so) \
-            if (laneCount < 25) { \
-                X##su ^= trailingBits; \
-            } \
-            else { \
-                unwrapOne(X, input, output, 24, su) \
-            } \
-        } \
-    }
diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt32.h
deleted file mode 100644 (file)
index 6cf765e..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _KeccakP_1600_SnP_h_
-#define _KeccakP_1600_SnP_h_
-
-/** For the documentation, see SnP-documentation.h.
- */
-
-#define KeccakP1600_implementation      "in-place 32-bit optimized implementation"
-#define KeccakP1600_stateSizeInBytes    200
-#define KeccakP1600_stateAlignment      8
-
-#define KeccakP1600_StaticInitialize()
-void KeccakP1600_Initialize(void *state);
-void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
-void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
-void KeccakP1600_Permute_12rounds(void *state);
-void KeccakP1600_Permute_24rounds(void *state);
-void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
-
-#endif
diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h b/Modules/_sha3/kcp/KeccakP-1600-SnP-opt64.h
deleted file mode 100644 (file)
index 889a31a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _KeccakP_1600_SnP_h_
-#define _KeccakP_1600_SnP_h_
-
-/** For the documentation, see SnP-documentation.h.
- */
-
-/* #include "brg_endian.h" */
-#include "KeccakP-1600-opt64-config.h"
-
-#define KeccakP1600_implementation      "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")"
-#define KeccakP1600_stateSizeInBytes    200
-#define KeccakP1600_stateAlignment      8
-#define KeccakF1600_FastLoop_supported
-
-#include <stddef.h>
-
-#define KeccakP1600_StaticInitialize()
-void KeccakP1600_Initialize(void *state);
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-#define KeccakP1600_AddByte(state, byte, offset) \
-    ((unsigned char*)(state))[(offset)] ^= (byte)
-#else
-void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
-#endif
-void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
-void KeccakP1600_Permute_12rounds(void *state);
-void KeccakP1600_Permute_24rounds(void *state);
-void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
-void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
-size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
-
-#endif
diff --git a/Modules/_sha3/kcp/KeccakP-1600-SnP.h b/Modules/_sha3/kcp/KeccakP-1600-SnP.h
deleted file mode 100644 (file)
index 0b23f09..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#if KeccakOpt == 64
-  #include "KeccakP-1600-SnP-opt64.h"
-#elif KeccakOpt == 32
-  #include "KeccakP-1600-SnP-opt32.h"
-#else
-  #error "No KeccakOpt"
-#endif
diff --git a/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c b/Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c
deleted file mode 100644 (file)
index a2f9ffe..0000000
+++ /dev/null
@@ -1,1162 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#include    <string.h>
-/* #include "brg_endian.h" */
-#include "KeccakP-1600-SnP.h"
-#include "SnP-Relaned.h"
-
-typedef unsigned char UINT8;
-typedef unsigned int UINT32;
-/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */
-
-/*typedef unsigned long       UINT32; */
-
-
-#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
-
-/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
-
-#define prepareToBitInterleaving(low, high, temp, temp0, temp1) \
-        temp0 = (low); \
-        temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
-        temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
-        temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
-        temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
-        temp1 = (high); \
-        temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1); \
-        temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
-        temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
-        temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8);
-
-#define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \
-        prepareToBitInterleaving(low, high, temp, temp0, temp1) \
-        even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \
-        odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000);
-
-#define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \
-        prepareToBitInterleaving(low, high, temp, temp0, temp1) \
-        even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \
-        odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000);
-
-#define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \
-        prepareToBitInterleaving(low, high, temp, temp0, temp1) \
-        even = (temp0 & 0x0000FFFF) | (temp1 << 16); \
-        odd = (temp0 >> 16) | (temp1 & 0xFFFF0000);
-
-/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
-
-#define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
-        temp0 = (even); \
-        temp1 = (odd); \
-        temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \
-        temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \
-        temp0 = temp; \
-        temp = (temp0 ^ (temp0 >>  8)) & 0x0000FF00UL;  temp0 = temp0 ^ temp ^ (temp <<  8); \
-        temp = (temp0 ^ (temp0 >>  4)) & 0x00F000F0UL;  temp0 = temp0 ^ temp ^ (temp <<  4); \
-        temp = (temp0 ^ (temp0 >>  2)) & 0x0C0C0C0CUL;  temp0 = temp0 ^ temp ^ (temp <<  2); \
-        temp = (temp0 ^ (temp0 >>  1)) & 0x22222222UL;  temp0 = temp0 ^ temp ^ (temp <<  1); \
-        temp = (temp1 ^ (temp1 >>  8)) & 0x0000FF00UL;  temp1 = temp1 ^ temp ^ (temp <<  8); \
-        temp = (temp1 ^ (temp1 >>  4)) & 0x00F000F0UL;  temp1 = temp1 ^ temp ^ (temp <<  4); \
-        temp = (temp1 ^ (temp1 >>  2)) & 0x0C0C0C0CUL;  temp1 = temp1 ^ temp ^ (temp <<  2); \
-        temp = (temp1 ^ (temp1 >>  1)) & 0x22222222UL;  temp1 = temp1 ^ temp ^ (temp <<  1);
-
-#define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \
-        prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
-        low = temp0; \
-        high = temp1;
-
-#define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \
-        prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \
-        lowOut = lowIn ^ temp0; \
-        highOut = highIn ^ temp1;
-
-void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length)
-{
-    UINT8 laneAsBytes[8];
-    UINT32 low, high;
-    UINT32 temp, temp0, temp1;
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-
-    memset(laneAsBytes, 0xFF, offset);
-    memset(laneAsBytes+offset, 0x00, length);
-    memset(laneAsBytes+offset+length, 0xFF, 8-offset-length);
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    low = *((UINT32*)(laneAsBytes+0));
-    high = *((UINT32*)(laneAsBytes+4));
-#else
-    low = laneAsBytes[0]
-        | ((UINT32)(laneAsBytes[1]) << 8)
-        | ((UINT32)(laneAsBytes[2]) << 16)
-        | ((UINT32)(laneAsBytes[3]) << 24);
-    high = laneAsBytes[4]
-        | ((UINT32)(laneAsBytes[5]) << 8)
-        | ((UINT32)(laneAsBytes[6]) << 16)
-        | ((UINT32)(laneAsBytes[7]) << 24);
-#endif
-    toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Initialize(void *state)
-{
-    memset(state, 0, 200);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
-{
-    unsigned int lanePosition = offset/8;
-    unsigned int offsetInLane = offset%8;
-    UINT32 low, high;
-    UINT32 temp, temp0, temp1;
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-
-    if (offsetInLane < 4) {
-        low = (UINT32)byte << (offsetInLane*8);
-        high = 0;
-    }
-    else {
-        low = 0;
-        high = (UINT32)byte << ((offsetInLane-4)*8);
-    }
-    toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    UINT8 laneAsBytes[8];
-    UINT32 low, high;
-    UINT32 temp, temp0, temp1;
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-
-    memset(laneAsBytes, 0, 8);
-    memcpy(laneAsBytes+offset, data, length);
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    low = *((UINT32*)(laneAsBytes+0));
-    high = *((UINT32*)(laneAsBytes+4));
-#else
-    low = laneAsBytes[0]
-        | ((UINT32)(laneAsBytes[1]) << 8)
-        | ((UINT32)(laneAsBytes[2]) << 16)
-        | ((UINT32)(laneAsBytes[3]) << 24);
-    high = laneAsBytes[4]
-        | ((UINT32)(laneAsBytes[5]) << 8)
-        | ((UINT32)(laneAsBytes[6]) << 16)
-        | ((UINT32)(laneAsBytes[7]) << 24);
-#endif
-    toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    const UINT32 * pI = (const UINT32 *)data;
-    UINT32 * pS = (UINT32*)state;
-    UINT32 t, x0, x1;
-    int i;
-    for (i = laneCount-1; i >= 0; --i) {
-#ifdef NO_MISALIGNED_ACCESSES
-        UINT32 low;
-        UINT32 high;
-        memcpy(&low, pI++, 4);
-        memcpy(&high, pI++, 4);
-        toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1);
-#else
-        toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
-#endif
-    }
-#else
-    unsigned int lanePosition;
-    for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
-        UINT8 laneAsBytes[8];
-        UINT32 low, high, temp, temp0, temp1;
-        UINT32 *stateAsHalfLanes;
-        memcpy(laneAsBytes, data+lanePosition*8, 8);
-        low = laneAsBytes[0]
-            | ((UINT32)(laneAsBytes[1]) << 8)
-            | ((UINT32)(laneAsBytes[2]) << 16)
-            | ((UINT32)(laneAsBytes[3]) << 24);
-        high = laneAsBytes[4]
-            | ((UINT32)(laneAsBytes[5]) << 8)
-            | ((UINT32)(laneAsBytes[6]) << 16)
-            | ((UINT32)(laneAsBytes[7]) << 24);
-        stateAsHalfLanes = (UINT32*)state;
-        toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    KeccakP1600_SetBytesInLaneToZero(state, lanePosition, offset, length);
-    KeccakP1600_AddBytesInLane(state, lanePosition, data, offset, length);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    const UINT32 * pI = (const UINT32 *)data;
-    UINT32 * pS = (UINT32 *)state;
-    UINT32 t, x0, x1;
-    int i;
-    for (i = laneCount-1; i >= 0; --i) {
-#ifdef NO_MISALIGNED_ACCESSES
-        UINT32 low;
-        UINT32 high;
-        memcpy(&low, pI++, 4);
-        memcpy(&high, pI++, 4);
-        toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1);
-#else
-        toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1)
-#endif
-    }
-#else
-    unsigned int lanePosition;
-    for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
-        UINT8 laneAsBytes[8];
-        UINT32 low, high, temp, temp0, temp1;
-        UINT32 *stateAsHalfLanes;
-        memcpy(laneAsBytes, data+lanePosition*8, 8);
-        low = laneAsBytes[0]
-            | ((UINT32)(laneAsBytes[1]) << 8)
-            | ((UINT32)(laneAsBytes[2]) << 16)
-            | ((UINT32)(laneAsBytes[3]) << 24);
-        high = laneAsBytes[4]
-            | ((UINT32)(laneAsBytes[5]) << 8)
-            | ((UINT32)(laneAsBytes[6]) << 16)
-            | ((UINT32)(laneAsBytes[7]) << 24);
-        stateAsHalfLanes = (UINT32*)state;
-        toBitInterleavingAndSet(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1);
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
-{
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-    unsigned int i;
-
-    for(i=0; i<byteCount/8; i++) {
-        stateAsHalfLanes[i*2+0] = 0;
-        stateAsHalfLanes[i*2+1] = 0;
-    }
-    if (byteCount%8 != 0)
-        KeccakP1600_SetBytesInLaneToZero(state, byteCount/8, 0, byteCount%8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
-{
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-    UINT32 low, high, temp, temp0, temp1;
-    UINT8 laneAsBytes[8];
-
-    fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    *((UINT32*)(laneAsBytes+0)) = low;
-    *((UINT32*)(laneAsBytes+4)) = high;
-#else
-    laneAsBytes[0] = low & 0xFF;
-    laneAsBytes[1] = (low >> 8) & 0xFF;
-    laneAsBytes[2] = (low >> 16) & 0xFF;
-    laneAsBytes[3] = (low >> 24) & 0xFF;
-    laneAsBytes[4] = high & 0xFF;
-    laneAsBytes[5] = (high >> 8) & 0xFF;
-    laneAsBytes[6] = (high >> 16) & 0xFF;
-    laneAsBytes[7] = (high >> 24) & 0xFF;
-#endif
-    memcpy(data, laneAsBytes+offset, length);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    UINT32 * pI = (UINT32 *)data;
-    const UINT32 * pS = ( const UINT32 *)state;
-    UINT32 t, x0, x1;
-    int i;
-    for (i = laneCount-1; i >= 0; --i) {
-#ifdef NO_MISALIGNED_ACCESSES
-        UINT32 low;
-        UINT32 high;
-        fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
-        memcpy(pI++, &low, 4);
-        memcpy(pI++, &high, 4);
-#else
-        fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1)
-#endif
-    }
-#else
-    unsigned int lanePosition;
-    for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
-        UINT32 *stateAsHalfLanes = (UINT32*)state;
-        UINT32 low, high, temp, temp0, temp1;
-        UINT8 laneAsBytes[8];
-        fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
-        laneAsBytes[0] = low & 0xFF;
-        laneAsBytes[1] = (low >> 8) & 0xFF;
-        laneAsBytes[2] = (low >> 16) & 0xFF;
-        laneAsBytes[3] = (low >> 24) & 0xFF;
-        laneAsBytes[4] = high & 0xFF;
-        laneAsBytes[5] = (high >> 8) & 0xFF;
-        laneAsBytes[6] = (high >> 16) & 0xFF;
-        laneAsBytes[7] = (high >> 24) & 0xFF;
-        memcpy(data+lanePosition*8, laneAsBytes, 8);
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
-{
-    UINT32 *stateAsHalfLanes = (UINT32*)state;
-    UINT32 low, high, temp, temp0, temp1;
-    UINT8 laneAsBytes[8];
-    unsigned int i;
-
-    fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    *((UINT32*)(laneAsBytes+0)) = low;
-    *((UINT32*)(laneAsBytes+4)) = high;
-#else
-    laneAsBytes[0] = low & 0xFF;
-    laneAsBytes[1] = (low >> 8) & 0xFF;
-    laneAsBytes[2] = (low >> 16) & 0xFF;
-    laneAsBytes[3] = (low >> 24) & 0xFF;
-    laneAsBytes[4] = high & 0xFF;
-    laneAsBytes[5] = (high >> 8) & 0xFF;
-    laneAsBytes[6] = (high >> 16) & 0xFF;
-    laneAsBytes[7] = (high >> 24) & 0xFF;
-#endif
-    for(i=0; i<length; i++)
-        output[i] = input[i] ^ laneAsBytes[offset+i];
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    const UINT32 * pI = (const UINT32 *)input;
-    UINT32 * pO = (UINT32 *)output;
-    const UINT32 * pS = (const UINT32 *)state;
-    UINT32 t, x0, x1;
-    int i;
-    for (i = laneCount-1; i >= 0; --i) {
-#ifdef NO_MISALIGNED_ACCESSES
-        UINT32 low;
-        UINT32 high;
-        fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1);
-        *(pO++) = *(pI++) ^ low;
-        *(pO++) = *(pI++) ^ high;
-#else
-        fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1)
-#endif
-    }
-#else
-    unsigned int lanePosition;
-    for(lanePosition=0; lanePosition<laneCount; lanePosition++) {
-        UINT32 *stateAsHalfLanes = (UINT32*)state;
-        UINT32 low, high, temp, temp0, temp1;
-        UINT8 laneAsBytes[8];
-        fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1);
-        laneAsBytes[0] = low & 0xFF;
-        laneAsBytes[1] = (low >> 8) & 0xFF;
-        laneAsBytes[2] = (low >> 16) & 0xFF;
-        laneAsBytes[3] = (low >> 24) & 0xFF;
-        laneAsBytes[4] = high & 0xFF;
-        laneAsBytes[5] = (high >> 8) & 0xFF;
-        laneAsBytes[6] = (high >> 16) & 0xFF;
-        laneAsBytes[7] = (high >> 24) & 0xFF;
-        ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0));
-        ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4));
-    }
-#endif
-}
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
-{
-    SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] =
-{
-    0x00000001UL,    0x00000000UL,
-    0x00000000UL,    0x00000089UL,
-    0x00000000UL,    0x8000008bUL,
-    0x00000000UL,    0x80008080UL,
-    0x00000001UL,    0x0000008bUL,
-    0x00000001UL,    0x00008000UL,
-    0x00000001UL,    0x80008088UL,
-    0x00000001UL,    0x80000082UL,
-    0x00000000UL,    0x0000000bUL,
-    0x00000000UL,    0x0000000aUL,
-    0x00000001UL,    0x00008082UL,
-    0x00000000UL,    0x00008003UL,
-    0x00000001UL,    0x0000808bUL,
-    0x00000001UL,    0x8000000bUL,
-    0x00000001UL,    0x8000008aUL,
-    0x00000001UL,    0x80000081UL,
-    0x00000000UL,    0x80000081UL,
-    0x00000000UL,    0x80000008UL,
-    0x00000000UL,    0x00000083UL,
-    0x00000000UL,    0x80008003UL,
-    0x00000001UL,    0x80008088UL,
-    0x00000000UL,    0x80000088UL,
-    0x00000001UL,    0x00008000UL,
-    0x00000000UL,    0x80008082UL,
-    0x000000FFUL
-};
-
-#define KeccakAtoD_round0() \
-        Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
-        Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
-        Da0 = Cx^ROL32(Du1, 1); \
-        Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
-        Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
-        Da1 = Cz^Du0; \
-\
-        Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
-        Do0 = Cw^ROL32(Cz, 1); \
-        Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
-        Do1 = Cy^Cx; \
-\
-        Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
-        De0 = Cx^ROL32(Cy, 1); \
-        Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
-        De1 = Cz^Cw; \
-\
-        Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
-        Di0 = Du0^ROL32(Cy, 1); \
-        Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
-        Di1 = Du1^Cw; \
-\
-        Du0 = Cw^ROL32(Cz, 1); \
-        Du1 = Cy^Cx; \
-
-#define KeccakAtoD_round1() \
-        Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \
-        Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \
-        Da0 = Cx^ROL32(Du1, 1); \
-        Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \
-        Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \
-        Da1 = Cz^Du0; \
-\
-        Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \
-        Do0 = Cw^ROL32(Cz, 1); \
-        Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \
-        Do1 = Cy^Cx; \
-\
-        Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \
-        De0 = Cx^ROL32(Cy, 1); \
-        Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \
-        De1 = Cz^Cw; \
-\
-        Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \
-        Di0 = Du0^ROL32(Cy, 1); \
-        Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \
-        Di1 = Du1^Cw; \
-\
-        Du0 = Cw^ROL32(Cz, 1); \
-        Du1 = Cy^Cx; \
-
-#define KeccakAtoD_round2() \
-        Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \
-        Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \
-        Da0 = Cx^ROL32(Du1, 1); \
-        Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \
-        Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \
-        Da1 = Cz^Du0; \
-\
-        Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \
-        Do0 = Cw^ROL32(Cz, 1); \
-        Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \
-        Do1 = Cy^Cx; \
-\
-        Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \
-        De0 = Cx^ROL32(Cy, 1); \
-        Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \
-        De1 = Cz^Cw; \
-\
-        Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \
-        Di0 = Du0^ROL32(Cy, 1); \
-        Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \
-        Di1 = Du1^Cw; \
-\
-        Du0 = Cw^ROL32(Cz, 1); \
-        Du1 = Cy^Cx; \
-
-#define KeccakAtoD_round3() \
-        Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \
-        Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \
-        Da0 = Cx^ROL32(Du1, 1); \
-        Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \
-        Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \
-        Da1 = Cz^Du0; \
-\
-        Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \
-        Do0 = Cw^ROL32(Cz, 1); \
-        Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \
-        Do1 = Cy^Cx; \
-\
-        Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \
-        De0 = Cx^ROL32(Cy, 1); \
-        Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \
-        De1 = Cz^Cw; \
-\
-        Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \
-        Di0 = Du0^ROL32(Cy, 1); \
-        Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \
-        Di1 = Du1^Cw; \
-\
-        Du0 = Cw^ROL32(Cz, 1); \
-        Du1 = Cy^Cx; \
-
-void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds)
-{
-    {
-        UINT32 Da0, De0, Di0, Do0, Du0;
-        UINT32 Da1, De1, Di1, Do1, Du1;
-        UINT32 Ca0, Ce0, Ci0, Co0, Cu0;
-        UINT32 Cx, Cy, Cz, Cw;
-        #define Ba Ca0
-        #define Be Ce0
-        #define Bi Ci0
-        #define Bo Co0
-        #define Bu Cu0
-        const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2;
-        UINT32 *stateAsHalfLanes = (UINT32*)state;
-        #define Aba0 stateAsHalfLanes[ 0]
-        #define Aba1 stateAsHalfLanes[ 1]
-        #define Abe0 stateAsHalfLanes[ 2]
-        #define Abe1 stateAsHalfLanes[ 3]
-        #define Abi0 stateAsHalfLanes[ 4]
-        #define Abi1 stateAsHalfLanes[ 5]
-        #define Abo0 stateAsHalfLanes[ 6]
-        #define Abo1 stateAsHalfLanes[ 7]
-        #define Abu0 stateAsHalfLanes[ 8]
-        #define Abu1 stateAsHalfLanes[ 9]
-        #define Aga0 stateAsHalfLanes[10]
-        #define Aga1 stateAsHalfLanes[11]
-        #define Age0 stateAsHalfLanes[12]
-        #define Age1 stateAsHalfLanes[13]
-        #define Agi0 stateAsHalfLanes[14]
-        #define Agi1 stateAsHalfLanes[15]
-        #define Ago0 stateAsHalfLanes[16]
-        #define Ago1 stateAsHalfLanes[17]
-        #define Agu0 stateAsHalfLanes[18]
-        #define Agu1 stateAsHalfLanes[19]
-        #define Aka0 stateAsHalfLanes[20]
-        #define Aka1 stateAsHalfLanes[21]
-        #define Ake0 stateAsHalfLanes[22]
-        #define Ake1 stateAsHalfLanes[23]
-        #define Aki0 stateAsHalfLanes[24]
-        #define Aki1 stateAsHalfLanes[25]
-        #define Ako0 stateAsHalfLanes[26]
-        #define Ako1 stateAsHalfLanes[27]
-        #define Aku0 stateAsHalfLanes[28]
-        #define Aku1 stateAsHalfLanes[29]
-        #define Ama0 stateAsHalfLanes[30]
-        #define Ama1 stateAsHalfLanes[31]
-        #define Ame0 stateAsHalfLanes[32]
-        #define Ame1 stateAsHalfLanes[33]
-        #define Ami0 stateAsHalfLanes[34]
-        #define Ami1 stateAsHalfLanes[35]
-        #define Amo0 stateAsHalfLanes[36]
-        #define Amo1 stateAsHalfLanes[37]
-        #define Amu0 stateAsHalfLanes[38]
-        #define Amu1 stateAsHalfLanes[39]
-        #define Asa0 stateAsHalfLanes[40]
-        #define Asa1 stateAsHalfLanes[41]
-        #define Ase0 stateAsHalfLanes[42]
-        #define Ase1 stateAsHalfLanes[43]
-        #define Asi0 stateAsHalfLanes[44]
-        #define Asi1 stateAsHalfLanes[45]
-        #define Aso0 stateAsHalfLanes[46]
-        #define Aso1 stateAsHalfLanes[47]
-        #define Asu0 stateAsHalfLanes[48]
-        #define Asu1 stateAsHalfLanes[49]
-
-        do
-        {
-            /* --- Code for 4 rounds */
-
-            /* --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */
-
-            KeccakAtoD_round0();
-
-            Ba = (Aba0^Da0);
-            Be = ROL32((Age0^De0), 22);
-            Bi = ROL32((Aki1^Di1), 22);
-            Bo = ROL32((Amo1^Do1), 11);
-            Bu = ROL32((Asu0^Du0), 7);
-            Aba0 =   Ba ^((~Be)&  Bi );
-            Aba0 ^= *(pRoundConstants++);
-            Age0 =   Be ^((~Bi)&  Bo );
-            Aki1 =   Bi ^((~Bo)&  Bu );
-            Amo1 =   Bo ^((~Bu)&  Ba );
-            Asu0 =   Bu ^((~Ba)&  Be );
-
-            Ba = (Aba1^Da1);
-            Be = ROL32((Age1^De1), 22);
-            Bi = ROL32((Aki0^Di0), 21);
-            Bo = ROL32((Amo0^Do0), 10);
-            Bu = ROL32((Asu1^Du1), 7);
-            Aba1 =   Ba ^((~Be)&  Bi );
-            Aba1 ^= *(pRoundConstants++);
-            Age1 =   Be ^((~Bi)&  Bo );
-            Aki0 =   Bi ^((~Bo)&  Bu );
-            Amo0 =   Bo ^((~Bu)&  Ba );
-            Asu1 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Aka1^Da1), 2);
-            Bo = ROL32((Ame1^De1), 23);
-            Bu = ROL32((Asi1^Di1), 31);
-            Ba = ROL32((Abo0^Do0), 14);
-            Be = ROL32((Agu0^Du0), 10);
-            Aka1 =   Ba ^((~Be)&  Bi );
-            Ame1 =   Be ^((~Bi)&  Bo );
-            Asi1 =   Bi ^((~Bo)&  Bu );
-            Abo0 =   Bo ^((~Bu)&  Ba );
-            Agu0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Aka0^Da0), 1);
-            Bo = ROL32((Ame0^De0), 22);
-            Bu = ROL32((Asi0^Di0), 30);
-            Ba = ROL32((Abo1^Do1), 14);
-            Be = ROL32((Agu1^Du1), 10);
-            Aka0 =   Ba ^((~Be)&  Bi );
-            Ame0 =   Be ^((~Bi)&  Bo );
-            Asi0 =   Bi ^((~Bo)&  Bu );
-            Abo1 =   Bo ^((~Bu)&  Ba );
-            Agu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Asa0^Da0), 9);
-            Ba = ROL32((Abe1^De1), 1);
-            Be = ROL32((Agi0^Di0), 3);
-            Bi = ROL32((Ako1^Do1), 13);
-            Bo = ROL32((Amu0^Du0), 4);
-            Asa0 =   Ba ^((~Be)&  Bi );
-            Abe1 =   Be ^((~Bi)&  Bo );
-            Agi0 =   Bi ^((~Bo)&  Bu );
-            Ako1 =   Bo ^((~Bu)&  Ba );
-            Amu0 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Asa1^Da1), 9);
-            Ba = (Abe0^De0);
-            Be = ROL32((Agi1^Di1), 3);
-            Bi = ROL32((Ako0^Do0), 12);
-            Bo = ROL32((Amu1^Du1), 4);
-            Asa1 =   Ba ^((~Be)&  Bi );
-            Abe0 =   Be ^((~Bi)&  Bo );
-            Agi1 =   Bi ^((~Bo)&  Bu );
-            Ako0 =   Bo ^((~Bu)&  Ba );
-            Amu1 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Aga0^Da0), 18);
-            Bi = ROL32((Ake0^De0), 5);
-            Bo = ROL32((Ami1^Di1), 8);
-            Bu = ROL32((Aso0^Do0), 28);
-            Ba = ROL32((Abu1^Du1), 14);
-            Aga0 =   Ba ^((~Be)&  Bi );
-            Ake0 =   Be ^((~Bi)&  Bo );
-            Ami1 =   Bi ^((~Bo)&  Bu );
-            Aso0 =   Bo ^((~Bu)&  Ba );
-            Abu1 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Aga1^Da1), 18);
-            Bi = ROL32((Ake1^De1), 5);
-            Bo = ROL32((Ami0^Di0), 7);
-            Bu = ROL32((Aso1^Do1), 28);
-            Ba = ROL32((Abu0^Du0), 13);
-            Aga1 =   Ba ^((~Be)&  Bi );
-            Ake1 =   Be ^((~Bi)&  Bo );
-            Ami0 =   Bi ^((~Bo)&  Bu );
-            Aso1 =   Bo ^((~Bu)&  Ba );
-            Abu0 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Ama1^Da1), 21);
-            Bu = ROL32((Ase0^De0), 1);
-            Ba = ROL32((Abi0^Di0), 31);
-            Be = ROL32((Ago1^Do1), 28);
-            Bi = ROL32((Aku1^Du1), 20);
-            Ama1 =   Ba ^((~Be)&  Bi );
-            Ase0 =   Be ^((~Bi)&  Bo );
-            Abi0 =   Bi ^((~Bo)&  Bu );
-            Ago1 =   Bo ^((~Bu)&  Ba );
-            Aku1 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Ama0^Da0), 20);
-            Bu = ROL32((Ase1^De1), 1);
-            Ba = ROL32((Abi1^Di1), 31);
-            Be = ROL32((Ago0^Do0), 27);
-            Bi = ROL32((Aku0^Du0), 19);
-            Ama0 =   Ba ^((~Be)&  Bi );
-            Ase1 =   Be ^((~Bi)&  Bo );
-            Abi1 =   Bi ^((~Bo)&  Bu );
-            Ago0 =   Bo ^((~Bu)&  Ba );
-            Aku0 =   Bu ^((~Ba)&  Be );
-
-            KeccakAtoD_round1();
-
-            Ba = (Aba0^Da0);
-            Be = ROL32((Ame1^De0), 22);
-            Bi = ROL32((Agi1^Di1), 22);
-            Bo = ROL32((Aso1^Do1), 11);
-            Bu = ROL32((Aku1^Du0), 7);
-            Aba0 =   Ba ^((~Be)&  Bi );
-            Aba0 ^= *(pRoundConstants++);
-            Ame1 =   Be ^((~Bi)&  Bo );
-            Agi1 =   Bi ^((~Bo)&  Bu );
-            Aso1 =   Bo ^((~Bu)&  Ba );
-            Aku1 =   Bu ^((~Ba)&  Be );
-
-            Ba = (Aba1^Da1);
-            Be = ROL32((Ame0^De1), 22);
-            Bi = ROL32((Agi0^Di0), 21);
-            Bo = ROL32((Aso0^Do0), 10);
-            Bu = ROL32((Aku0^Du1), 7);
-            Aba1 =   Ba ^((~Be)&  Bi );
-            Aba1 ^= *(pRoundConstants++);
-            Ame0 =   Be ^((~Bi)&  Bo );
-            Agi0 =   Bi ^((~Bo)&  Bu );
-            Aso0 =   Bo ^((~Bu)&  Ba );
-            Aku0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Asa1^Da1), 2);
-            Bo = ROL32((Ake1^De1), 23);
-            Bu = ROL32((Abi1^Di1), 31);
-            Ba = ROL32((Amo1^Do0), 14);
-            Be = ROL32((Agu0^Du0), 10);
-            Asa1 =   Ba ^((~Be)&  Bi );
-            Ake1 =   Be ^((~Bi)&  Bo );
-            Abi1 =   Bi ^((~Bo)&  Bu );
-            Amo1 =   Bo ^((~Bu)&  Ba );
-            Agu0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Asa0^Da0), 1);
-            Bo = ROL32((Ake0^De0), 22);
-            Bu = ROL32((Abi0^Di0), 30);
-            Ba = ROL32((Amo0^Do1), 14);
-            Be = ROL32((Agu1^Du1), 10);
-            Asa0 =   Ba ^((~Be)&  Bi );
-            Ake0 =   Be ^((~Bi)&  Bo );
-            Abi0 =   Bi ^((~Bo)&  Bu );
-            Amo0 =   Bo ^((~Bu)&  Ba );
-            Agu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Ama1^Da0), 9);
-            Ba = ROL32((Age1^De1), 1);
-            Be = ROL32((Asi1^Di0), 3);
-            Bi = ROL32((Ako0^Do1), 13);
-            Bo = ROL32((Abu1^Du0), 4);
-            Ama1 =   Ba ^((~Be)&  Bi );
-            Age1 =   Be ^((~Bi)&  Bo );
-            Asi1 =   Bi ^((~Bo)&  Bu );
-            Ako0 =   Bo ^((~Bu)&  Ba );
-            Abu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Ama0^Da1), 9);
-            Ba = (Age0^De0);
-            Be = ROL32((Asi0^Di1), 3);
-            Bi = ROL32((Ako1^Do0), 12);
-            Bo = ROL32((Abu0^Du1), 4);
-            Ama0 =   Ba ^((~Be)&  Bi );
-            Age0 =   Be ^((~Bi)&  Bo );
-            Asi0 =   Bi ^((~Bo)&  Bu );
-            Ako1 =   Bo ^((~Bu)&  Ba );
-            Abu0 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Aka1^Da0), 18);
-            Bi = ROL32((Abe1^De0), 5);
-            Bo = ROL32((Ami0^Di1), 8);
-            Bu = ROL32((Ago1^Do0), 28);
-            Ba = ROL32((Asu1^Du1), 14);
-            Aka1 =   Ba ^((~Be)&  Bi );
-            Abe1 =   Be ^((~Bi)&  Bo );
-            Ami0 =   Bi ^((~Bo)&  Bu );
-            Ago1 =   Bo ^((~Bu)&  Ba );
-            Asu1 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Aka0^Da1), 18);
-            Bi = ROL32((Abe0^De1), 5);
-            Bo = ROL32((Ami1^Di0), 7);
-            Bu = ROL32((Ago0^Do1), 28);
-            Ba = ROL32((Asu0^Du0), 13);
-            Aka0 =   Ba ^((~Be)&  Bi );
-            Abe0 =   Be ^((~Bi)&  Bo );
-            Ami1 =   Bi ^((~Bo)&  Bu );
-            Ago0 =   Bo ^((~Bu)&  Ba );
-            Asu0 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Aga1^Da1), 21);
-            Bu = ROL32((Ase0^De0), 1);
-            Ba = ROL32((Aki1^Di0), 31);
-            Be = ROL32((Abo1^Do1), 28);
-            Bi = ROL32((Amu1^Du1), 20);
-            Aga1 =   Ba ^((~Be)&  Bi );
-            Ase0 =   Be ^((~Bi)&  Bo );
-            Aki1 =   Bi ^((~Bo)&  Bu );
-            Abo1 =   Bo ^((~Bu)&  Ba );
-            Amu1 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Aga0^Da0), 20);
-            Bu = ROL32((Ase1^De1), 1);
-            Ba = ROL32((Aki0^Di1), 31);
-            Be = ROL32((Abo0^Do0), 27);
-            Bi = ROL32((Amu0^Du0), 19);
-            Aga0 =   Ba ^((~Be)&  Bi );
-            Ase1 =   Be ^((~Bi)&  Bo );
-            Aki0 =   Bi ^((~Bo)&  Bu );
-            Abo0 =   Bo ^((~Bu)&  Ba );
-            Amu0 =   Bu ^((~Ba)&  Be );
-
-            KeccakAtoD_round2();
-
-            Ba = (Aba0^Da0);
-            Be = ROL32((Ake1^De0), 22);
-            Bi = ROL32((Asi0^Di1), 22);
-            Bo = ROL32((Ago0^Do1), 11);
-            Bu = ROL32((Amu1^Du0), 7);
-            Aba0 =   Ba ^((~Be)&  Bi );
-            Aba0 ^= *(pRoundConstants++);
-            Ake1 =   Be ^((~Bi)&  Bo );
-            Asi0 =   Bi ^((~Bo)&  Bu );
-            Ago0 =   Bo ^((~Bu)&  Ba );
-            Amu1 =   Bu ^((~Ba)&  Be );
-
-            Ba = (Aba1^Da1);
-            Be = ROL32((Ake0^De1), 22);
-            Bi = ROL32((Asi1^Di0), 21);
-            Bo = ROL32((Ago1^Do0), 10);
-            Bu = ROL32((Amu0^Du1), 7);
-            Aba1 =   Ba ^((~Be)&  Bi );
-            Aba1 ^= *(pRoundConstants++);
-            Ake0 =   Be ^((~Bi)&  Bo );
-            Asi1 =   Bi ^((~Bo)&  Bu );
-            Ago1 =   Bo ^((~Bu)&  Ba );
-            Amu0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Ama0^Da1), 2);
-            Bo = ROL32((Abe0^De1), 23);
-            Bu = ROL32((Aki0^Di1), 31);
-            Ba = ROL32((Aso1^Do0), 14);
-            Be = ROL32((Agu0^Du0), 10);
-            Ama0 =   Ba ^((~Be)&  Bi );
-            Abe0 =   Be ^((~Bi)&  Bo );
-            Aki0 =   Bi ^((~Bo)&  Bu );
-            Aso1 =   Bo ^((~Bu)&  Ba );
-            Agu0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Ama1^Da0), 1);
-            Bo = ROL32((Abe1^De0), 22);
-            Bu = ROL32((Aki1^Di0), 30);
-            Ba = ROL32((Aso0^Do1), 14);
-            Be = ROL32((Agu1^Du1), 10);
-            Ama1 =   Ba ^((~Be)&  Bi );
-            Abe1 =   Be ^((~Bi)&  Bo );
-            Aki1 =   Bi ^((~Bo)&  Bu );
-            Aso0 =   Bo ^((~Bu)&  Ba );
-            Agu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Aga1^Da0), 9);
-            Ba = ROL32((Ame0^De1), 1);
-            Be = ROL32((Abi1^Di0), 3);
-            Bi = ROL32((Ako1^Do1), 13);
-            Bo = ROL32((Asu1^Du0), 4);
-            Aga1 =   Ba ^((~Be)&  Bi );
-            Ame0 =   Be ^((~Bi)&  Bo );
-            Abi1 =   Bi ^((~Bo)&  Bu );
-            Ako1 =   Bo ^((~Bu)&  Ba );
-            Asu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Aga0^Da1), 9);
-            Ba = (Ame1^De0);
-            Be = ROL32((Abi0^Di1), 3);
-            Bi = ROL32((Ako0^Do0), 12);
-            Bo = ROL32((Asu0^Du1), 4);
-            Aga0 =   Ba ^((~Be)&  Bi );
-            Ame1 =   Be ^((~Bi)&  Bo );
-            Abi0 =   Bi ^((~Bo)&  Bu );
-            Ako0 =   Bo ^((~Bu)&  Ba );
-            Asu0 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Asa1^Da0), 18);
-            Bi = ROL32((Age1^De0), 5);
-            Bo = ROL32((Ami1^Di1), 8);
-            Bu = ROL32((Abo1^Do0), 28);
-            Ba = ROL32((Aku0^Du1), 14);
-            Asa1 =   Ba ^((~Be)&  Bi );
-            Age1 =   Be ^((~Bi)&  Bo );
-            Ami1 =   Bi ^((~Bo)&  Bu );
-            Abo1 =   Bo ^((~Bu)&  Ba );
-            Aku0 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Asa0^Da1), 18);
-            Bi = ROL32((Age0^De1), 5);
-            Bo = ROL32((Ami0^Di0), 7);
-            Bu = ROL32((Abo0^Do1), 28);
-            Ba = ROL32((Aku1^Du0), 13);
-            Asa0 =   Ba ^((~Be)&  Bi );
-            Age0 =   Be ^((~Bi)&  Bo );
-            Ami0 =   Bi ^((~Bo)&  Bu );
-            Abo0 =   Bo ^((~Bu)&  Ba );
-            Aku1 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Aka0^Da1), 21);
-            Bu = ROL32((Ase0^De0), 1);
-            Ba = ROL32((Agi1^Di0), 31);
-            Be = ROL32((Amo0^Do1), 28);
-            Bi = ROL32((Abu0^Du1), 20);
-            Aka0 =   Ba ^((~Be)&  Bi );
-            Ase0 =   Be ^((~Bi)&  Bo );
-            Agi1 =   Bi ^((~Bo)&  Bu );
-            Amo0 =   Bo ^((~Bu)&  Ba );
-            Abu0 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Aka1^Da0), 20);
-            Bu = ROL32((Ase1^De1), 1);
-            Ba = ROL32((Agi0^Di1), 31);
-            Be = ROL32((Amo1^Do0), 27);
-            Bi = ROL32((Abu1^Du0), 19);
-            Aka1 =   Ba ^((~Be)&  Bi );
-            Ase1 =   Be ^((~Bi)&  Bo );
-            Agi0 =   Bi ^((~Bo)&  Bu );
-            Amo1 =   Bo ^((~Bu)&  Ba );
-            Abu1 =   Bu ^((~Ba)&  Be );
-
-            KeccakAtoD_round3();
-
-            Ba = (Aba0^Da0);
-            Be = ROL32((Abe0^De0), 22);
-            Bi = ROL32((Abi0^Di1), 22);
-            Bo = ROL32((Abo0^Do1), 11);
-            Bu = ROL32((Abu0^Du0), 7);
-            Aba0 =   Ba ^((~Be)&  Bi );
-            Aba0 ^= *(pRoundConstants++);
-            Abe0 =   Be ^((~Bi)&  Bo );
-            Abi0 =   Bi ^((~Bo)&  Bu );
-            Abo0 =   Bo ^((~Bu)&  Ba );
-            Abu0 =   Bu ^((~Ba)&  Be );
-
-            Ba = (Aba1^Da1);
-            Be = ROL32((Abe1^De1), 22);
-            Bi = ROL32((Abi1^Di0), 21);
-            Bo = ROL32((Abo1^Do0), 10);
-            Bu = ROL32((Abu1^Du1), 7);
-            Aba1 =   Ba ^((~Be)&  Bi );
-            Aba1 ^= *(pRoundConstants++);
-            Abe1 =   Be ^((~Bi)&  Bo );
-            Abi1 =   Bi ^((~Bo)&  Bu );
-            Abo1 =   Bo ^((~Bu)&  Ba );
-            Abu1 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Aga0^Da1), 2);
-            Bo = ROL32((Age0^De1), 23);
-            Bu = ROL32((Agi0^Di1), 31);
-            Ba = ROL32((Ago0^Do0), 14);
-            Be = ROL32((Agu0^Du0), 10);
-            Aga0 =   Ba ^((~Be)&  Bi );
-            Age0 =   Be ^((~Bi)&  Bo );
-            Agi0 =   Bi ^((~Bo)&  Bu );
-            Ago0 =   Bo ^((~Bu)&  Ba );
-            Agu0 =   Bu ^((~Ba)&  Be );
-
-            Bi = ROL32((Aga1^Da0), 1);
-            Bo = ROL32((Age1^De0), 22);
-            Bu = ROL32((Agi1^Di0), 30);
-            Ba = ROL32((Ago1^Do1), 14);
-            Be = ROL32((Agu1^Du1), 10);
-            Aga1 =   Ba ^((~Be)&  Bi );
-            Age1 =   Be ^((~Bi)&  Bo );
-            Agi1 =   Bi ^((~Bo)&  Bu );
-            Ago1 =   Bo ^((~Bu)&  Ba );
-            Agu1 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Aka0^Da0), 9);
-            Ba = ROL32((Ake0^De1), 1);
-            Be = ROL32((Aki0^Di0), 3);
-            Bi = ROL32((Ako0^Do1), 13);
-            Bo = ROL32((Aku0^Du0), 4);
-            Aka0 =   Ba ^((~Be)&  Bi );
-            Ake0 =   Be ^((~Bi)&  Bo );
-            Aki0 =   Bi ^((~Bo)&  Bu );
-            Ako0 =   Bo ^((~Bu)&  Ba );
-            Aku0 =   Bu ^((~Ba)&  Be );
-
-            Bu = ROL32((Aka1^Da1), 9);
-            Ba = (Ake1^De0);
-            Be = ROL32((Aki1^Di1), 3);
-            Bi = ROL32((Ako1^Do0), 12);
-            Bo = ROL32((Aku1^Du1), 4);
-            Aka1 =   Ba ^((~Be)&  Bi );
-            Ake1 =   Be ^((~Bi)&  Bo );
-            Aki1 =   Bi ^((~Bo)&  Bu );
-            Ako1 =   Bo ^((~Bu)&  Ba );
-            Aku1 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Ama0^Da0), 18);
-            Bi = ROL32((Ame0^De0), 5);
-            Bo = ROL32((Ami0^Di1), 8);
-            Bu = ROL32((Amo0^Do0), 28);
-            Ba = ROL32((Amu0^Du1), 14);
-            Ama0 =   Ba ^((~Be)&  Bi );
-            Ame0 =   Be ^((~Bi)&  Bo );
-            Ami0 =   Bi ^((~Bo)&  Bu );
-            Amo0 =   Bo ^((~Bu)&  Ba );
-            Amu0 =   Bu ^((~Ba)&  Be );
-
-            Be = ROL32((Ama1^Da1), 18);
-            Bi = ROL32((Ame1^De1), 5);
-            Bo = ROL32((Ami1^Di0), 7);
-            Bu = ROL32((Amo1^Do1), 28);
-            Ba = ROL32((Amu1^Du0), 13);
-            Ama1 =   Ba ^((~Be)&  Bi );
-            Ame1 =   Be ^((~Bi)&  Bo );
-            Ami1 =   Bi ^((~Bo)&  Bu );
-            Amo1 =   Bo ^((~Bu)&  Ba );
-            Amu1 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Asa0^Da1), 21);
-            Bu = ROL32((Ase0^De0), 1);
-            Ba = ROL32((Asi0^Di0), 31);
-            Be = ROL32((Aso0^Do1), 28);
-            Bi = ROL32((Asu0^Du1), 20);
-            Asa0 =   Ba ^((~Be)&  Bi );
-            Ase0 =   Be ^((~Bi)&  Bo );
-            Asi0 =   Bi ^((~Bo)&  Bu );
-            Aso0 =   Bo ^((~Bu)&  Ba );
-            Asu0 =   Bu ^((~Ba)&  Be );
-
-            Bo = ROL32((Asa1^Da0), 20);
-            Bu = ROL32((Ase1^De1), 1);
-            Ba = ROL32((Asi1^Di1), 31);
-            Be = ROL32((Aso1^Do0), 27);
-            Bi = ROL32((Asu1^Du0), 19);
-            Asa1 =   Ba ^((~Be)&  Bi );
-            Ase1 =   Be ^((~Bi)&  Bo );
-            Asi1 =   Bi ^((~Bo)&  Bu );
-            Aso1 =   Bo ^((~Bu)&  Ba );
-            Asu1 =   Bu ^((~Ba)&  Be );
-        }
-        while ( *pRoundConstants != 0xFF );
-
-        #undef Aba0
-        #undef Aba1
-        #undef Abe0
-        #undef Abe1
-        #undef Abi0
-        #undef Abi1
-        #undef Abo0
-        #undef Abo1
-        #undef Abu0
-        #undef Abu1
-        #undef Aga0
-        #undef Aga1
-        #undef Age0
-        #undef Age1
-        #undef Agi0
-        #undef Agi1
-        #undef Ago0
-        #undef Ago1
-        #undef Agu0
-        #undef Agu1
-        #undef Aka0
-        #undef Aka1
-        #undef Ake0
-        #undef Ake1
-        #undef Aki0
-        #undef Aki1
-        #undef Ako0
-        #undef Ako1
-        #undef Aku0
-        #undef Aku1
-        #undef Ama0
-        #undef Ama1
-        #undef Ame0
-        #undef Ame1
-        #undef Ami0
-        #undef Ami1
-        #undef Amo0
-        #undef Amo1
-        #undef Amu0
-        #undef Amu1
-        #undef Asa0
-        #undef Asa1
-        #undef Ase0
-        #undef Ase1
-        #undef Asi0
-        #undef Asi1
-        #undef Aso0
-        #undef Aso1
-        #undef Asu0
-        #undef Asu1
-    }
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Permute_12rounds(void *state)
-{
-     KeccakP1600_Permute_Nrounds(state, 12);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Permute_24rounds(void *state)
-{
-     KeccakP1600_Permute_Nrounds(state, 24);
-}
diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h b/Modules/_sha3/kcp/KeccakP-1600-opt64-config.h
deleted file mode 100644 (file)
index 9501c64..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#define KeccakP1600_implementation_config "lane complementing, all rounds unrolled"
-#define KeccakP1600_fullUnrolling
-#define KeccakP1600_useLaneComplementing
diff --git a/Modules/_sha3/kcp/KeccakP-1600-opt64.c b/Modules/_sha3/kcp/KeccakP-1600-opt64.c
deleted file mode 100644 (file)
index c90010d..0000000
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#include <string.h>
-#include <stdlib.h>
-/* #include "brg_endian.h" */
-#include "KeccakP-1600-opt64-config.h"
-
-#if NOT_PYTHON
-typedef unsigned char UINT8;
-/* typedef unsigned long long int UINT64; */
-#endif
-
-#if defined(KeccakP1600_useLaneComplementing)
-#define UseBebigokimisa
-#endif
-
-#if defined(_MSC_VER)
-#define ROL64(a, offset) _rotl64(a, offset)
-#elif defined(KeccakP1600_useSHLD)
-    #define ROL64(x,N) ({ \
-    register UINT64 __out; \
-    register UINT64 __in = x; \
-    __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
-    __out; \
-    })
-#else
-#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
-#endif
-
-#include "KeccakP-1600-64.macros"
-#ifdef KeccakP1600_fullUnrolling
-#define FullUnrolling
-#else
-#define Unrolling KeccakP1600_unrolling
-#endif
-#include "KeccakP-1600-unrolling.macros"
-#include "SnP-Relaned.h"
-
-static const UINT64 KeccakF1600RoundConstants[24] = {
-    0x0000000000000001ULL,
-    0x0000000000008082ULL,
-    0x800000000000808aULL,
-    0x8000000080008000ULL,
-    0x000000000000808bULL,
-    0x0000000080000001ULL,
-    0x8000000080008081ULL,
-    0x8000000000008009ULL,
-    0x000000000000008aULL,
-    0x0000000000000088ULL,
-    0x0000000080008009ULL,
-    0x000000008000000aULL,
-    0x000000008000808bULL,
-    0x800000000000008bULL,
-    0x8000000000008089ULL,
-    0x8000000000008003ULL,
-    0x8000000000008002ULL,
-    0x8000000000000080ULL,
-    0x000000000000800aULL,
-    0x800000008000000aULL,
-    0x8000000080008081ULL,
-    0x8000000000008080ULL,
-    0x0000000080000001ULL,
-    0x8000000080008008ULL };
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Initialize(void *state)
-{
-    memset(state, 0, 200);
-#ifdef KeccakP1600_useLaneComplementing
-    ((UINT64*)state)[ 1] = ~(UINT64)0;
-    ((UINT64*)state)[ 2] = ~(UINT64)0;
-    ((UINT64*)state)[ 8] = ~(UINT64)0;
-    ((UINT64*)state)[12] = ~(UINT64)0;
-    ((UINT64*)state)[17] = ~(UINT64)0;
-    ((UINT64*)state)[20] = ~(UINT64)0;
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    UINT64 lane;
-    if (length == 0)
-        return;
-    if (length == 1)
-        lane = data[0];
-    else {
-        lane = 0;
-        memcpy(&lane, data, length);
-    }
-    lane <<= offset*8;
-#else
-    UINT64 lane = 0;
-    unsigned int i;
-    for(i=0; i<length; i++)
-        lane |= ((UINT64)data[i]) << ((i+offset)*8);
-#endif
-    ((UINT64*)state)[lanePosition] ^= lane;
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    unsigned int i = 0;
-#ifdef NO_MISALIGNED_ACCESSES
-    /* If either pointer is misaligned, fall back to byte-wise xor. */
-
-    if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
-      for (i = 0; i < laneCount * 8; i++) {
-        ((unsigned char*)state)[i] ^= data[i];
-      }
-    }
-    else
-#endif
-    {
-      /* Otherwise... */
-
-      for( ; (i+8)<=laneCount; i+=8) {
-          ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
-          ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
-          ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
-          ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
-          ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
-          ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
-          ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
-          ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
-      }
-      for( ; (i+4)<=laneCount; i+=4) {
-          ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
-          ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
-          ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
-          ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
-      }
-      for( ; (i+2)<=laneCount; i+=2) {
-          ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
-          ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
-      }
-      if (i<laneCount) {
-          ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
-      }
-    }
-#else
-    unsigned int i;
-    UINT8 *curData = data;
-    for(i=0; i<laneCount; i++, curData+=8) {
-        UINT64 lane = (UINT64)curData[0]
-            | ((UINT64)curData[1] << 8)
-            | ((UINT64)curData[2] << 16)
-            | ((UINT64)curData[3] << 24)
-            | ((UINT64)curData[4] <<32)
-            | ((UINT64)curData[5] << 40)
-            | ((UINT64)curData[6] << 48)
-            | ((UINT64)curData[7] << 56);
-        ((UINT64*)state)[i] ^= lane;
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
-void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
-{
-    UINT64 lane = byte;
-    lane <<= (offset%8)*8;
-    ((UINT64*)state)[offset/8] ^= lane;
-}
-#endif
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-#ifdef KeccakP1600_useLaneComplementing
-    if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
-        unsigned int i;
-        for(i=0; i<length; i++)
-            ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
-    }
-    else
-#endif
-    {
-        memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
-    }
-#else
-#error "Not yet implemented"
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-#ifdef KeccakP1600_useLaneComplementing
-    unsigned int lanePosition;
-
-    for(lanePosition=0; lanePosition<laneCount; lanePosition++)
-        if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
-            ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
-        else
-            ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
-#else
-    memcpy(state, data, laneCount*8);
-#endif
-#else
-#error "Not yet implemented"
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-#ifdef KeccakP1600_useLaneComplementing
-    unsigned int lanePosition;
-
-    for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
-        if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
-            ((UINT64*)state)[lanePosition] = ~0;
-        else
-            ((UINT64*)state)[lanePosition] = 0;
-    if (byteCount%8 != 0) {
-        lanePosition = byteCount/8;
-        if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
-            memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
-        else
-            memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
-    }
-#else
-    memset(state, 0, byteCount);
-#endif
-#else
-#error "Not yet implemented"
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Permute_24rounds(void *state)
-{
-    declareABCDE
-    #ifndef KeccakP1600_fullUnrolling
-    unsigned int i;
-    #endif
-    UINT64 *stateAsLanes = (UINT64*)state;
-
-    copyFromState(A, stateAsLanes)
-    rounds24
-    copyToState(stateAsLanes, A)
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_Permute_12rounds(void *state)
-{
-    declareABCDE
-    #ifndef KeccakP1600_fullUnrolling
-    unsigned int i;
-    #endif
-    UINT64 *stateAsLanes = (UINT64*)state;
-
-    copyFromState(A, stateAsLanes)
-    rounds12
-    copyToState(stateAsLanes, A)
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
-{
-    UINT64 lane = ((UINT64*)state)[lanePosition];
-#ifdef KeccakP1600_useLaneComplementing
-    if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
-        lane = ~lane;
-#endif
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    {
-        UINT64 lane1[1];
-        lane1[0] = lane;
-        memcpy(data, (UINT8*)lane1+offset, length);
-    }
-#else
-    unsigned int i;
-    lane >>= offset*8;
-    for(i=0; i<length; i++) {
-        data[i] = lane & 0xFF;
-        lane >>= 8;
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
-void fromWordToBytes(UINT8 *bytes, const UINT64 word)
-{
-    unsigned int i;
-
-    for(i=0; i<(64/8); i++)
-        bytes[i] = (word >> (8*i)) & 0xFF;
-}
-#endif
-
-void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
-{
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    memcpy(data, state, laneCount*8);
-#else
-    unsigned int i;
-
-    for(i=0; i<laneCount; i++)
-        fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
-#endif
-#ifdef KeccakP1600_useLaneComplementing
-    if (laneCount > 1) {
-        ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
-        if (laneCount > 2) {
-            ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
-            if (laneCount > 8) {
-                ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
-                if (laneCount > 12) {
-                    ((UINT64*)data)[12] = ~((UINT64*)data)[12];
-                    if (laneCount > 17) {
-                        ((UINT64*)data)[17] = ~((UINT64*)data)[17];
-                        if (laneCount > 20) {
-                            ((UINT64*)data)[20] = ~((UINT64*)data)[20];
-                        }
-                    }
-                }
-            }
-        }
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
-{
-    SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
-{
-    UINT64 lane = ((UINT64*)state)[lanePosition];
-#ifdef KeccakP1600_useLaneComplementing
-    if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
-        lane = ~lane;
-#endif
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-    {
-        unsigned int i;
-        UINT64 lane1[1];
-        lane1[0] = lane;
-        for(i=0; i<length; i++)
-            output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
-    }
-#else
-    unsigned int i;
-    lane >>= offset*8;
-    for(i=0; i<length; i++) {
-        output[i] = input[i] ^ (lane & 0xFF);
-        lane >>= 8;
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
-{
-    unsigned int i;
-#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
-    unsigned char temp[8];
-    unsigned int j;
-#endif
-
-    for(i=0; i<laneCount; i++) {
-#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
-        ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
-#else
-        fromWordToBytes(temp, ((const UINT64*)state)[i]);
-        for(j=0; j<8; j++)
-            output[i*8+j] = input[i*8+j] ^ temp[j];
-#endif
-    }
-#ifdef KeccakP1600_useLaneComplementing
-    if (laneCount > 1) {
-        ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
-        if (laneCount > 2) {
-            ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
-            if (laneCount > 8) {
-                ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
-                if (laneCount > 12) {
-                    ((UINT64*)output)[12] = ~((UINT64*)output)[12];
-                    if (laneCount > 17) {
-                        ((UINT64*)output)[17] = ~((UINT64*)output)[17];
-                        if (laneCount > 20) {
-                            ((UINT64*)output)[20] = ~((UINT64*)output)[20];
-                        }
-                    }
-                }
-            }
-        }
-    }
-#endif
-}
-
-/* ---------------------------------------------------------------- */
-
-void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
-{
-    SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
-}
-
-/* ---------------------------------------------------------------- */
-
-size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
-{
-    size_t originalDataByteLen = dataByteLen;
-    declareABCDE
-    #ifndef KeccakP1600_fullUnrolling
-    unsigned int i;
-    #endif
-    UINT64 *stateAsLanes = (UINT64*)state;
-    UINT64 *inDataAsLanes = (UINT64*)data;
-
-    copyFromState(A, stateAsLanes)
-    while(dataByteLen >= laneCount*8) {
-        addInput(A, inDataAsLanes, laneCount)
-        rounds24
-        inDataAsLanes += laneCount;
-        dataByteLen -= laneCount*8;
-    }
-    copyToState(stateAsLanes, A)
-    return originalDataByteLen - dataByteLen;
-}
diff --git a/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros b/Modules/_sha3/kcp/KeccakP-1600-unrolling.macros
deleted file mode 100644 (file)
index 405ce29..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#if (defined(FullUnrolling))
-#define rounds24 \
-    prepareTheta \
-    thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
-    thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
-    thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
-    thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
-    thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
-    thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
-    thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
-    thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
-    thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
-    thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(10, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(11, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(12, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(13, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(14, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(15, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(16, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(17, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(18, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(19, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(20, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(21, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(22, A, E) \
-    thetaRhoPiChiIota(23, E, A) \
-
-#define rounds12 \
-    prepareTheta \
-    thetaRhoPiChiIotaPrepareTheta(12, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(13, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(14, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(15, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(16, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(17, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(18, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(19, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(20, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(21, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(22, A, E) \
-    thetaRhoPiChiIota(23, E, A) \
-
-#elif (Unrolling == 12)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i+=12) { \
-        thetaRhoPiChiIotaPrepareTheta(i   , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    thetaRhoPiChiIotaPrepareTheta(12, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(13, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(14, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(15, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(16, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(17, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(18, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(19, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(20, A, E) \
-    thetaRhoPiChiIotaPrepareTheta(21, E, A) \
-    thetaRhoPiChiIotaPrepareTheta(22, A, E) \
-    thetaRhoPiChiIota(23, E, A) \
-
-#elif (Unrolling == 6)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i+=6) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    for(i=12; i<24; i+=6) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
-    } \
-
-#elif (Unrolling == 4)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i+=4) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    for(i=12; i<24; i+=4) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
-    } \
-
-#elif (Unrolling == 3)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i+=3) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        copyStateVariables(A, E) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    for(i=12; i<24; i+=3) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-        thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
-        copyStateVariables(A, E) \
-    } \
-
-#elif (Unrolling == 2)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i+=2) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    for(i=12; i<24; i+=2) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
-    } \
-
-#elif (Unrolling == 1)
-#define rounds24 \
-    prepareTheta \
-    for(i=0; i<24; i++) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        copyStateVariables(A, E) \
-    } \
-
-#define rounds12 \
-    prepareTheta \
-    for(i=12; i<24; i++) { \
-        thetaRhoPiChiIotaPrepareTheta(i  , A, E) \
-        copyStateVariables(A, E) \
-    } \
-
-#else
-#error "Unrolling is not correctly specified!"
-#endif
diff --git a/Modules/_sha3/kcp/KeccakSponge.c b/Modules/_sha3/kcp/KeccakSponge.c
deleted file mode 100644 (file)
index afdb731..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#include "KeccakSponge.h"
-
-#ifdef KeccakReference
-    #include "displayIntermediateValues.h"
-#endif
-
-#ifndef KeccakP200_excluded
-    #include "KeccakP-200-SnP.h"
-
-    #define prefix KeccakWidth200
-    #define SnP KeccakP200
-    #define SnP_width 200
-    #define SnP_Permute KeccakP200_Permute_18rounds
-    #if defined(KeccakF200_FastLoop_supported)
-        #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb
-    #endif
-        #include "KeccakSponge.inc"
-    #undef prefix
-    #undef SnP
-    #undef SnP_width
-    #undef SnP_Permute
-    #undef SnP_FastLoop_Absorb
-#endif
-
-#ifndef KeccakP400_excluded
-    #include "KeccakP-400-SnP.h"
-
-    #define prefix KeccakWidth400
-    #define SnP KeccakP400
-    #define SnP_width 400
-    #define SnP_Permute KeccakP400_Permute_20rounds
-    #if defined(KeccakF400_FastLoop_supported)
-        #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb
-    #endif
-        #include "KeccakSponge.inc"
-    #undef prefix
-    #undef SnP
-    #undef SnP_width
-    #undef SnP_Permute
-    #undef SnP_FastLoop_Absorb
-#endif
-
-#ifndef KeccakP800_excluded
-    #include "KeccakP-800-SnP.h"
-
-    #define prefix KeccakWidth800
-    #define SnP KeccakP800
-    #define SnP_width 800
-    #define SnP_Permute KeccakP800_Permute_22rounds
-    #if defined(KeccakF800_FastLoop_supported)
-        #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb
-    #endif
-        #include "KeccakSponge.inc"
-    #undef prefix
-    #undef SnP
-    #undef SnP_width
-    #undef SnP_Permute
-    #undef SnP_FastLoop_Absorb
-#endif
-
-#ifndef KeccakP1600_excluded
-    #include "KeccakP-1600-SnP.h"
-
-    #define prefix KeccakWidth1600
-    #define SnP KeccakP1600
-    #define SnP_width 1600
-    #define SnP_Permute KeccakP1600_Permute_24rounds
-    #if defined(KeccakF1600_FastLoop_supported)
-        #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb
-    #endif
-        #include "KeccakSponge.inc"
-    #undef prefix
-    #undef SnP
-    #undef SnP_width
-    #undef SnP_Permute
-    #undef SnP_FastLoop_Absorb
-#endif
diff --git a/Modules/_sha3/kcp/KeccakSponge.h b/Modules/_sha3/kcp/KeccakSponge.h
deleted file mode 100644 (file)
index 0f4badc..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _KeccakSponge_h_
-#define _KeccakSponge_h_
-
-/** General information
-  *
-  * The following type and functions are not actually implemented. Their
-  * documentation is generic, with the prefix Prefix replaced by
-  * - KeccakWidth200 for a sponge function based on Keccak-f[200]
-  * - KeccakWidth400 for a sponge function based on Keccak-f[400]
-  * - KeccakWidth800 for a sponge function based on Keccak-f[800]
-  * - KeccakWidth1600 for a sponge function based on Keccak-f[1600]
-  *
-  * In all these functions, the rate and capacity must sum to the width of the
-  * chosen permutation. For instance, to use the sponge function
-  * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination
-  * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(),
-  * KeccakWidth1600_SpongeAbsorbLastFewBits() and
-  * KeccakWidth1600_SpongeSqueeze().
-  *
-  * The Prefix_SpongeInstance contains the sponge instance attributes for use
-  * with the Prefix_Sponge* functions.
-  * It gathers the state processed by the permutation as well as the rate,
-  * the position of input/output bytes in the state and the phase
-  * (absorbing or squeezing).
-  */
-
-#ifdef DontReallyInclude_DocumentationOnly
-/** Function to evaluate the sponge function Keccak[r, c] in a single call.
-  * @param  rate        The value of the rate r.
-  * @param  capacity    The value of the capacity c.
-  * @param  input           Pointer to the input message (before the suffix).
-  * @param  inputByteLen    The length of the input message in bytes.
-  * @param  suffix          Byte containing from 0 to 7 suffix bits
-  *                         that must be absorbed after @a input.
-  *                         These <i>n</i> bits must be in the least significant bit positions.
-  *                         These bits must be delimited with a bit 1 at position <i>n</i>
-  *                         (counting from 0=LSB to 7=MSB) and followed by bits 0
-  *                         from position <i>n</i>+1 to position 7.
-  *                         Some examples:
-  *                             - If no bits are to be absorbed, then @a suffix must be 0x01.
-  *                             - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04.
-  *                             - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32.
-  *                             - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B.
-  *                         .
-  * @param  output          Pointer to the output buffer.
-  * @param  outputByteLen   The desired number of output bytes.
-  * @pre    One must have r+c equal to the supported width of this implementation
-  *         and the rate a multiple of 8 bits (one byte) in this implementation.
-  * @pre    @a suffix ≠ 0x00
-  * @return Zero if successful, 1 otherwise.
-  */
-int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen);
-
-/**
-  * Function to initialize the state of the Keccak[r, c] sponge function.
-  * The phase of the sponge function is set to absorbing.
-  * @param  spongeInstance  Pointer to the sponge instance to be initialized.
-  * @param  rate        The value of the rate r.
-  * @param  capacity    The value of the capacity c.
-  * @pre    One must have r+c equal to the supported width of this implementation
-  *         and the rate a multiple of 8 bits (one byte) in this implementation.
-  * @return Zero if successful, 1 otherwise.
-  */
-int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity);
-
-/**
-  * Function to give input data bytes for the sponge function to absorb.
-  * @param  spongeInstance  Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
-  * @param  data        Pointer to the input data.
-  * @param  dataByteLen  The number of input bytes provided in the input data.
-  * @pre    The sponge function must be in the absorbing phase,
-  *         i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
-  *         must not have been called before.
-  * @return Zero if successful, 1 otherwise.
-  */
-int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen);
-
-/**
-  * Function to give input data bits for the sponge function to absorb
-  * and then to switch to the squeezing phase.
-  * @param  spongeInstance  Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
-  * @param  delimitedData   Byte containing from 0 to 7 trailing bits
-  *                     that must be absorbed.
-  *                     These <i>n</i> bits must be in the least significant bit positions.
-  *                     These bits must be delimited with a bit 1 at position <i>n</i>
-  *                     (counting from 0=LSB to 7=MSB) and followed by bits 0
-  *                     from position <i>n</i>+1 to position 7.
-  *                     Some examples:
-  *                         - If no bits are to be absorbed, then @a delimitedData must be 0x01.
-  *                         - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04.
-  *                         - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32.
-  *                         - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B.
-  *                     .
-  * @pre    The sponge function must be in the absorbing phase,
-  *         i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
-  *         must not have been called before.
-  * @pre    @a delimitedData ≠ 0x00
-  * @return Zero if successful, 1 otherwise.
-  */
-int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData);
-
-/**
-  * Function to squeeze output data from the sponge function.
-  * If the sponge function was in the absorbing phase, this function
-  * switches it to the squeezing phase
-  * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called.
-  * @param  spongeInstance  Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
-  * @param  data        Pointer to the buffer where to store the output data.
-  * @param  dataByteLen The number of output bytes desired.
-  * @return Zero if successful, 1 otherwise.
-  */
-int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
-#endif
-
-#include <string.h>
-#include "align.h"
-
-#define KCP_DeclareSpongeStructure(prefix, size, alignment) \
-    ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
-        unsigned char state[size]; \
-        unsigned int rate; \
-        unsigned int byteIOIndex; \
-        int squeezing; \
-    } prefix##_SpongeInstance;
-
-#define KCP_DeclareSpongeFunctions(prefix) \
-    int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \
-    int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
-    int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \
-    int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
-    int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
-
-#ifndef KeccakP200_excluded
-    #include "KeccakP-200-SnP.h"
-    KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment)
-    KCP_DeclareSpongeFunctions(KeccakWidth200)
-#endif
-
-#ifndef KeccakP400_excluded
-    #include "KeccakP-400-SnP.h"
-    KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment)
-    KCP_DeclareSpongeFunctions(KeccakWidth400)
-#endif
-
-#ifndef KeccakP800_excluded
-    #include "KeccakP-800-SnP.h"
-    KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment)
-    KCP_DeclareSpongeFunctions(KeccakWidth800)
-#endif
-
-#ifndef KeccakP1600_excluded
-    #include "KeccakP-1600-SnP.h"
-    KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
-    KCP_DeclareSpongeFunctions(KeccakWidth1600)
-#endif
-
-#endif
diff --git a/Modules/_sha3/kcp/KeccakSponge.inc b/Modules/_sha3/kcp/KeccakSponge.inc
deleted file mode 100644 (file)
index e10739d..0000000
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#define JOIN0(a, b)                     a ## b
-#define JOIN(a, b)                      JOIN0(a, b)
-
-#define Sponge                          JOIN(prefix, _Sponge)
-#define SpongeInstance                  JOIN(prefix, _SpongeInstance)
-#define SpongeInitialize                JOIN(prefix, _SpongeInitialize)
-#define SpongeAbsorb                    JOIN(prefix, _SpongeAbsorb)
-#define SpongeAbsorbLastFewBits         JOIN(prefix, _SpongeAbsorbLastFewBits)
-#define SpongeSqueeze                   JOIN(prefix, _SpongeSqueeze)
-
-#define SnP_stateSizeInBytes            JOIN(SnP, _stateSizeInBytes)
-#define SnP_stateAlignment              JOIN(SnP, _stateAlignment)
-#define SnP_StaticInitialize            JOIN(SnP, _StaticInitialize)
-#define SnP_Initialize                  JOIN(SnP, _Initialize)
-#define SnP_AddByte                     JOIN(SnP, _AddByte)
-#define SnP_AddBytes                    JOIN(SnP, _AddBytes)
-#define SnP_ExtractBytes                JOIN(SnP, _ExtractBytes)
-
-int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen)
-{
-    ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes];
-    unsigned int partialBlock;
-    const unsigned char *curInput = input;
-    unsigned char *curOutput = output;
-    unsigned int rateInBytes = rate/8;
-
-    if (rate+capacity != SnP_width)
-        return 1;
-    if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
-        return 1;
-    if (suffix == 0)
-        return 1;
-
-    /* Initialize the state */
-
-    SnP_StaticInitialize();
-    SnP_Initialize(state);
-
-    /* First, absorb whole blocks */
-
-#ifdef SnP_FastLoop_Absorb
-    if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) {
-        /* fast lane: whole lane rate */
-
-        size_t j;
-        j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen);
-        curInput += j;
-        inputByteLen -= j;
-    }
-#endif
-    while(inputByteLen >= (size_t)rateInBytes) {
-        #ifdef KeccakReference
-        displayBytes(1, "Block to be absorbed", curInput, rateInBytes);
-        #endif
-        SnP_AddBytes(state, curInput, 0, rateInBytes);
-        SnP_Permute(state);
-        curInput += rateInBytes;
-        inputByteLen -= rateInBytes;
-    }
-
-    /* Then, absorb what remains */
-
-    partialBlock = (unsigned int)inputByteLen;
-    #ifdef KeccakReference
-    displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock);
-    #endif
-    SnP_AddBytes(state, curInput, 0, partialBlock);
-
-    /* Finally, absorb the suffix */
-
-    #ifdef KeccakReference
-    {
-        unsigned char delimitedData1[1];
-        delimitedData1[0] = suffix;
-        displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
-    }
-    #endif
-    /* Last few bits, whose delimiter coincides with first bit of padding */
-
-    SnP_AddByte(state, suffix, partialBlock);
-    /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
-
-    if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1)))
-        SnP_Permute(state);
-    /* Second bit of padding */
-
-    SnP_AddByte(state, 0x80, rateInBytes-1);
-    #ifdef KeccakReference
-    {
-        unsigned char block[SnP_width/8];
-        memset(block, 0, SnP_width/8);
-        block[rateInBytes-1] = 0x80;
-        displayBytes(1, "Second bit of padding", block, rateInBytes);
-    }
-    #endif
-    SnP_Permute(state);
-    #ifdef KeccakReference
-    displayText(1, "--- Switching to squeezing phase ---");
-    #endif
-
-    /* First, output whole blocks */
-
-    while(outputByteLen > (size_t)rateInBytes) {
-        SnP_ExtractBytes(state, curOutput, 0, rateInBytes);
-        SnP_Permute(state);
-        #ifdef KeccakReference
-        displayBytes(1, "Squeezed block", curOutput, rateInBytes);
-        #endif
-        curOutput += rateInBytes;
-        outputByteLen -= rateInBytes;
-    }
-
-    /* Finally, output what remains */
-
-    partialBlock = (unsigned int)outputByteLen;
-    SnP_ExtractBytes(state, curOutput, 0, partialBlock);
-    #ifdef KeccakReference
-    displayBytes(1, "Squeezed block (part)", curOutput, partialBlock);
-    #endif
-
-    return 0;
-}
-
-/* ---------------------------------------------------------------- */
-/* ---------------------------------------------------------------- */
-/* ---------------------------------------------------------------- */
-
-int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity)
-{
-    if (rate+capacity != SnP_width)
-        return 1;
-    if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0))
-        return 1;
-    SnP_StaticInitialize();
-    SnP_Initialize(instance->state);
-    instance->rate = rate;
-    instance->byteIOIndex = 0;
-    instance->squeezing = 0;
-
-    return 0;
-}
-
-/* ---------------------------------------------------------------- */
-
-int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen)
-{
-    size_t i, j;
-    unsigned int partialBlock;
-    const unsigned char *curData;
-    unsigned int rateInBytes = instance->rate/8;
-
-    if (instance->squeezing)
-        return 1; /* Too late for additional input */
-
-
-    i = 0;
-    curData = data;
-    while(i < dataByteLen) {
-        if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) {
-#ifdef SnP_FastLoop_Absorb
-            /* processing full blocks first */
-
-            if ((rateInBytes % (SnP_width/200)) == 0) {
-                /* fast lane: whole lane rate */
-
-                j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i);
-                i += j;
-                curData += j;
-            }
-            else {
-#endif
-                for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
-                    #ifdef KeccakReference
-                    displayBytes(1, "Block to be absorbed", curData, rateInBytes);
-                    #endif
-                    SnP_AddBytes(instance->state, curData, 0, rateInBytes);
-                    SnP_Permute(instance->state);
-                    curData+=rateInBytes;
-                }
-                i = dataByteLen - j;
-#ifdef SnP_FastLoop_Absorb
-            }
-#endif
-        }
-        else {
-            /* normal lane: using the message queue */
-
-            partialBlock = (unsigned int)(dataByteLen - i);
-            if (partialBlock+instance->byteIOIndex > rateInBytes)
-                partialBlock = rateInBytes-instance->byteIOIndex;
-            #ifdef KeccakReference
-            displayBytes(1, "Block to be absorbed (part)", curData, partialBlock);
-            #endif
-            i += partialBlock;
-
-            SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
-            curData += partialBlock;
-            instance->byteIOIndex += partialBlock;
-            if (instance->byteIOIndex == rateInBytes) {
-                SnP_Permute(instance->state);
-                instance->byteIOIndex = 0;
-            }
-        }
-    }
-    return 0;
-}
-
-/* ---------------------------------------------------------------- */
-
-int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData)
-{
-    unsigned int rateInBytes = instance->rate/8;
-
-    if (delimitedData == 0)
-        return 1;
-    if (instance->squeezing)
-        return 1; /* Too late for additional input */
-
-
-    #ifdef KeccakReference
-    {
-        unsigned char delimitedData1[1];
-        delimitedData1[0] = delimitedData;
-        displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1);
-    }
-    #endif
-    /* Last few bits, whose delimiter coincides with first bit of padding */
-
-    SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex);
-    /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
-
-    if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1)))
-        SnP_Permute(instance->state);
-    /* Second bit of padding */
-
-    SnP_AddByte(instance->state, 0x80, rateInBytes-1);
-    #ifdef KeccakReference
-    {
-        unsigned char block[SnP_width/8];
-        memset(block, 0, SnP_width/8);
-        block[rateInBytes-1] = 0x80;
-        displayBytes(1, "Second bit of padding", block, rateInBytes);
-    }
-    #endif
-    SnP_Permute(instance->state);
-    instance->byteIOIndex = 0;
-    instance->squeezing = 1;
-    #ifdef KeccakReference
-    displayText(1, "--- Switching to squeezing phase ---");
-    #endif
-    return 0;
-}
-
-/* ---------------------------------------------------------------- */
-
-int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen)
-{
-    size_t i, j;
-    unsigned int partialBlock;
-    unsigned int rateInBytes = instance->rate/8;
-    unsigned char *curData;
-
-    if (!instance->squeezing)
-        SpongeAbsorbLastFewBits(instance, 0x01);
-
-    i = 0;
-    curData = data;
-    while(i < dataByteLen) {
-        if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) {
-            for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
-                SnP_Permute(instance->state);
-                SnP_ExtractBytes(instance->state, curData, 0, rateInBytes);
-                #ifdef KeccakReference
-                displayBytes(1, "Squeezed block", curData, rateInBytes);
-                #endif
-                curData+=rateInBytes;
-            }
-            i = dataByteLen - j;
-        }
-        else {
-            /* normal lane: using the message queue */
-
-            if (instance->byteIOIndex == rateInBytes) {
-                SnP_Permute(instance->state);
-                instance->byteIOIndex = 0;
-            }
-            partialBlock = (unsigned int)(dataByteLen - i);
-            if (partialBlock+instance->byteIOIndex > rateInBytes)
-                partialBlock = rateInBytes-instance->byteIOIndex;
-            i += partialBlock;
-
-            SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock);
-            #ifdef KeccakReference
-            displayBytes(1, "Squeezed block (part)", curData, partialBlock);
-            #endif
-            curData += partialBlock;
-            instance->byteIOIndex += partialBlock;
-        }
-    }
-    return 0;
-}
-
-/* ---------------------------------------------------------------- */
-
-#undef Sponge
-#undef SpongeInstance
-#undef SpongeInitialize
-#undef SpongeAbsorb
-#undef SpongeAbsorbLastFewBits
-#undef SpongeSqueeze
-#undef SnP_stateSizeInBytes
-#undef SnP_stateAlignment
-#undef SnP_StaticInitialize
-#undef SnP_Initialize
-#undef SnP_AddByte
-#undef SnP_AddBytes
-#undef SnP_ExtractBytes
diff --git a/Modules/_sha3/kcp/PlSnP-Fallback.inc b/Modules/_sha3/kcp/PlSnP-Fallback.inc
deleted file mode 100644 (file)
index 3a9119a..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
-
-/* expect SnP_stateSizeInBytes, SnP_stateAlignment */
-
-/* expect prefix */
-
-/* expect SnP_* */
-
-
-#define JOIN0(a, b)                     a ## b
-#define JOIN(a, b)                      JOIN0(a, b)
-
-#define PlSnP_StaticInitialize          JOIN(prefix, _StaticInitialize)
-#define PlSnP_InitializeAll             JOIN(prefix, _InitializeAll)
-#define PlSnP_AddByte                   JOIN(prefix, _AddByte)
-#define PlSnP_AddBytes                  JOIN(prefix, _AddBytes)
-#define PlSnP_AddLanesAll               JOIN(prefix, _AddLanesAll)
-#define PlSnP_OverwriteBytes            JOIN(prefix, _OverwriteBytes)
-#define PlSnP_OverwriteLanesAll         JOIN(prefix, _OverwriteLanesAll)
-#define PlSnP_OverwriteWithZeroes       JOIN(prefix, _OverwriteWithZeroes)
-#define PlSnP_ExtractBytes              JOIN(prefix, _ExtractBytes)
-#define PlSnP_ExtractLanesAll           JOIN(prefix, _ExtractLanesAll)
-#define PlSnP_ExtractAndAddBytes        JOIN(prefix, _ExtractAndAddBytes)
-#define PlSnP_ExtractAndAddLanesAll     JOIN(prefix, _ExtractAndAddLanesAll)
-
-#if (PlSnP_baseParallelism == 1)
-    #define SnP_stateSizeInBytes            JOIN(SnP, _stateSizeInBytes)
-    #define SnP_stateAlignment              JOIN(SnP, _stateAlignment)
-#else
-    #define SnP_stateSizeInBytes            JOIN(SnP, _statesSizeInBytes)
-    #define SnP_stateAlignment              JOIN(SnP, _statesAlignment)
-#endif
-#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism))
-#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment)
-#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset))
-
-#define SnP_StaticInitialize            JOIN(SnP, _StaticInitialize)
-#define SnP_Initialize                  JOIN(SnP, _Initialize)
-#define SnP_InitializeAll               JOIN(SnP, _InitializeAll)
-#define SnP_AddByte                     JOIN(SnP, _AddByte)
-#define SnP_AddBytes                    JOIN(SnP, _AddBytes)
-#define SnP_AddLanesAll                 JOIN(SnP, _AddLanesAll)
-#define SnP_OverwriteBytes              JOIN(SnP, _OverwriteBytes)
-#define SnP_OverwriteLanesAll           JOIN(SnP, _OverwriteLanesAll)
-#define SnP_OverwriteWithZeroes         JOIN(SnP, _OverwriteWithZeroes)
-#define SnP_ExtractBytes                JOIN(SnP, _ExtractBytes)
-#define SnP_ExtractLanesAll             JOIN(SnP, _ExtractLanesAll)
-#define SnP_ExtractAndAddBytes          JOIN(SnP, _ExtractAndAddBytes)
-#define SnP_ExtractAndAddLanesAll       JOIN(SnP, _ExtractAndAddLanesAll)
-
-void PlSnP_StaticInitialize( void )
-{
-    SnP_StaticInitialize();
-}
-
-void PlSnP_InitializeAll(void *states)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++)
-    #if (PlSnP_baseParallelism == 1)
-        SnP_Initialize(stateWithIndex(i));
-    #else
-        SnP_InitializeAll(stateWithIndex(i));
-    #endif
-}
-
-void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_AddByte(stateWithIndex(instanceIndex), byte, offset);
-    #else
-        SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset);
-    #endif
-}
-
-void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length);
-    #else
-        SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
-    #endif
-}
-
-void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
-        #else
-            SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
-        #endif
-        data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
-    }
-}
-
-void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length);
-    #else
-        SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
-    #endif
-}
-
-void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
-        #else
-            SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
-        #endif
-        data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
-    }
-}
-
-void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount);
-    #else
-        SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount);
-    #endif
-}
-
-void PlSnP_PermuteAll(void *states)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_Permute(stateWithIndex(i));
-        #else
-            SnP_PermuteAll(stateWithIndex(i));
-        #endif
-    }
-}
-
-#if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds))
-void PlSnP_PermuteAll_12rounds(void *states)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_Permute_12rounds(stateWithIndex(i));
-        #else
-            SnP_PermuteAll_12rounds(stateWithIndex(i));
-        #endif
-    }
-}
-#endif
-
-void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length);
-    #else
-        SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
-    #endif
-}
-
-void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
-        #else
-            SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
-        #endif
-        data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
-    }
-}
-
-void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
-{
-    #if (PlSnP_baseParallelism == 1)
-        SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length);
-    #else
-        SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length);
-    #endif
-}
-
-void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
-{
-    unsigned int i;
-
-    for(i=0; i<PlSnP_factor; i++) {
-        #if (PlSnP_baseParallelism == 1)
-            SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes);
-        #else
-            SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset);
-        #endif
-        input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
-        output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
-    }
-}
-
-#undef PlSnP_factor
-#undef SnP_stateOffset
-#undef stateWithIndex
-#undef JOIN0
-#undef JOIN
-#undef PlSnP_StaticInitialize
-#undef PlSnP_InitializeAll
-#undef PlSnP_AddByte
-#undef PlSnP_AddBytes
-#undef PlSnP_AddLanesAll
-#undef PlSnP_OverwriteBytes
-#undef PlSnP_OverwriteLanesAll
-#undef PlSnP_OverwriteWithZeroes
-#undef PlSnP_PermuteAll
-#undef PlSnP_ExtractBytes
-#undef PlSnP_ExtractLanesAll
-#undef PlSnP_ExtractAndAddBytes
-#undef PlSnP_ExtractAndAddLanesAll
-#undef SnP_stateAlignment
-#undef SnP_stateSizeInBytes
-#undef PlSnP_factor
-#undef SnP_stateOffset
-#undef stateWithIndex
-#undef SnP_StaticInitialize
-#undef SnP_Initialize
-#undef SnP_InitializeAll
-#undef SnP_AddByte
-#undef SnP_AddBytes
-#undef SnP_AddLanesAll
-#undef SnP_OverwriteBytes
-#undef SnP_OverwriteWithZeroes
-#undef SnP_OverwriteLanesAll
-#undef SnP_ExtractBytes
-#undef SnP_ExtractLanesAll
-#undef SnP_ExtractAndAddBytes
-#undef SnP_ExtractAndAddLanesAll
diff --git a/Modules/_sha3/kcp/SnP-Relaned.h b/Modules/_sha3/kcp/SnP-Relaned.h
deleted file mode 100644 (file)
index 086e635..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _SnP_Relaned_h_
-#define _SnP_Relaned_h_
-
-#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \
-    { \
-        if ((offset) == 0) { \
-            SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \
-            SnP_AddBytesInLane(state, \
-                (length)/SnP_laneLengthInBytes, \
-                (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
-                0, \
-                (length)%SnP_laneLengthInBytes); \
-        } \
-        else { \
-            unsigned int _sizeLeft = (length); \
-            unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
-            unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
-            const unsigned char *_curData = (data); \
-            while(_sizeLeft > 0) { \
-                unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
-                if (_bytesInLane > _sizeLeft) \
-                    _bytesInLane = _sizeLeft; \
-                SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
-                _sizeLeft -= _bytesInLane; \
-                _lanePosition++; \
-                _offsetInLane = 0; \
-                _curData += _bytesInLane; \
-            } \
-        } \
-    }
-
-#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \
-    { \
-        if ((offset) == 0) { \
-            SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \
-            SnP_OverwriteBytesInLane(state, \
-                (length)/SnP_laneLengthInBytes, \
-                (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
-                0, \
-                (length)%SnP_laneLengthInBytes); \
-        } \
-        else { \
-            unsigned int _sizeLeft = (length); \
-            unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
-            unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
-            const unsigned char *_curData = (data); \
-            while(_sizeLeft > 0) { \
-                unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
-                if (_bytesInLane > _sizeLeft) \
-                    _bytesInLane = _sizeLeft; \
-                SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
-                _sizeLeft -= _bytesInLane; \
-                _lanePosition++; \
-                _offsetInLane = 0; \
-                _curData += _bytesInLane; \
-            } \
-        } \
-    }
-
-#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \
-    { \
-        if ((offset) == 0) { \
-            SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \
-            SnP_ExtractBytesInLane(state, \
-                (length)/SnP_laneLengthInBytes, \
-                (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
-                0, \
-                (length)%SnP_laneLengthInBytes); \
-        } \
-        else { \
-            unsigned int _sizeLeft = (length); \
-            unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
-            unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
-            unsigned char *_curData = (data); \
-            while(_sizeLeft > 0) { \
-                unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
-                if (_bytesInLane > _sizeLeft) \
-                    _bytesInLane = _sizeLeft; \
-                SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \
-                _sizeLeft -= _bytesInLane; \
-                _lanePosition++; \
-                _offsetInLane = 0; \
-                _curData += _bytesInLane; \
-            } \
-        } \
-    }
-
-#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \
-    { \
-        if ((offset) == 0) { \
-            SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \
-            SnP_ExtractAndAddBytesInLane(state, \
-                (length)/SnP_laneLengthInBytes, \
-                (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
-                (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \
-                0, \
-                (length)%SnP_laneLengthInBytes); \
-        } \
-        else { \
-            unsigned int _sizeLeft = (length); \
-            unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \
-            unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \
-            const unsigned char *_curInput = (input); \
-            unsigned char *_curOutput = (output); \
-            while(_sizeLeft > 0) { \
-                unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \
-                if (_bytesInLane > _sizeLeft) \
-                    _bytesInLane = _sizeLeft; \
-                SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \
-                _sizeLeft -= _bytesInLane; \
-                _lanePosition++; \
-                _offsetInLane = 0; \
-                _curInput += _bytesInLane; \
-                _curOutput += _bytesInLane; \
-            } \
-        } \
-    }
-
-#endif
diff --git a/Modules/_sha3/kcp/align.h b/Modules/_sha3/kcp/align.h
deleted file mode 100644 (file)
index 6650fe8..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
-Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
-denoted as "the implementer".
-
-For more information, feedback or questions, please refer to our websites:
-http://keccak.noekeon.org/
-http://keyak.noekeon.org/
-http://ketje.noekeon.org/
-
-To the extent possible under law, the implementer has waived all copyright
-and related or neighboring rights to the source code in this file.
-http://creativecommons.org/publicdomain/zero/1.0/
-*/
-
-#ifndef _align_h_
-#define _align_h_
-
-/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
-
-#ifdef ALIGN
-#undef ALIGN
-#endif
-
-#if defined(__GNUC__)
-#define ALIGN(x) __attribute__ ((aligned(x)))
-#elif defined(_MSC_VER)
-#define ALIGN(x) __declspec(align(x))
-#elif defined(__ARMCC_VERSION)
-#define ALIGN(x) __align(x)
-#else
-#define ALIGN(x)
-#endif
-
-#endif
diff --git a/Modules/_sha3/sha3.c b/Modules/_sha3/sha3.c
new file mode 100644 (file)
index 0000000..e2d3fd7
--- /dev/null
@@ -0,0 +1,193 @@
+// sha3.c
+// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
+
+// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
+// Revised 03-Sep-15 for portability + OpenSSL - style API
+
+#include "sha3.h"
+
+// update the state with given number of rounds
+
+static void sha3_keccakf(uint64_t st[25])
+{
+    // constants
+    const uint64_t keccakf_rndc[24] = {
+        0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+        0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+        0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+        0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+        0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+        0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+        0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+        0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+    };
+    const int keccakf_rotc[24] = {
+        1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
+        27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
+    };
+    const int keccakf_piln[24] = {
+        10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
+        15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
+    };
+
+    // variables
+    int i, j, r;
+    uint64_t t, bc[5];
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+    uint8_t *v;
+
+    // endianess conversion. this is redundant on little-endian targets
+    for (i = 0; i < 25; i++) {
+        v = (uint8_t *) &st[i];
+        st[i] = ((uint64_t) v[0])     | (((uint64_t) v[1]) << 8) |
+            (((uint64_t) v[2]) << 16) | (((uint64_t) v[3]) << 24) |
+            (((uint64_t) v[4]) << 32) | (((uint64_t) v[5]) << 40) |
+            (((uint64_t) v[6]) << 48) | (((uint64_t) v[7]) << 56);
+    }
+#endif
+
+    // actual iteration
+    for (r = 0; r < KECCAKF_ROUNDS; r++) {
+
+        // Theta
+        for (i = 0; i < 5; i++)
+            bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
+
+        for (i = 0; i < 5; i++) {
+            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+            for (j = 0; j < 25; j += 5)
+                st[j + i] ^= t;
+        }
+
+        // Rho Pi
+        t = st[1];
+        for (i = 0; i < 24; i++) {
+            j = keccakf_piln[i];
+            bc[0] = st[j];
+            st[j] = ROTL64(t, keccakf_rotc[i]);
+            t = bc[0];
+        }
+
+        //  Chi
+        for (j = 0; j < 25; j += 5) {
+            for (i = 0; i < 5; i++)
+                bc[i] = st[j + i];
+            for (i = 0; i < 5; i++)
+                st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+        }
+
+        //  Iota
+        st[0] ^= keccakf_rndc[r];
+    }
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+    // endianess conversion. this is redundant on little-endian targets
+    for (i = 0; i < 25; i++) {
+        v = (uint8_t *) &st[i];
+        t = st[i];
+        v[0] = t & 0xFF;
+        v[1] = (t >> 8) & 0xFF;
+        v[2] = (t >> 16) & 0xFF;
+        v[3] = (t >> 24) & 0xFF;
+        v[4] = (t >> 32) & 0xFF;
+        v[5] = (t >> 40) & 0xFF;
+        v[6] = (t >> 48) & 0xFF;
+        v[7] = (t >> 56) & 0xFF;
+    }
+#endif
+}
+
+// Initialize the context for SHA3
+
+static int sha3_init(sha3_ctx_t *c, int mdlen)
+{
+    int i;
+
+    for (i = 0; i < 25; i++)
+        c->st.q[i] = 0;
+    c->mdlen = mdlen;
+    c->rsiz = 200 - 2 * mdlen;
+    c->pt = 0;
+
+    return 1;
+}
+
+// update state with more data
+
+static int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
+{
+    size_t i;
+    int j;
+
+    j = c->pt;
+    for (i = 0; i < len; i++) {
+        c->st.b[j++] ^= ((const uint8_t *) data)[i];
+        if (j >= c->rsiz) {
+            sha3_keccakf(c->st.q);
+            j = 0;
+        }
+    }
+    c->pt = j;
+
+    return 1;
+}
+
+// finalize and output a hash
+
+static int sha3_final(void *md, sha3_ctx_t *c)
+{
+    int i;
+
+    c->st.b[c->pt] ^= 0x06;
+    c->st.b[c->rsiz - 1] ^= 0x80;
+    sha3_keccakf(c->st.q);
+
+    for (i = 0; i < c->mdlen; i++) {
+        ((uint8_t *) md)[i] = c->st.b[i];
+    }
+
+    return 1;
+}
+
+#if 0
+// compute a SHA-3 hash (md) of given byte length from "in"
+
+void *sha3(const void *in, size_t inlen, void *md, int mdlen)
+{
+    sha3_ctx_t sha3;
+
+    sha3_init(&sha3, mdlen);
+    sha3_update(&sha3, in, inlen);
+    sha3_final(md, &sha3);
+
+    return md;
+}
+#endif
+
+// SHAKE128 and SHAKE256 extensible-output functionality
+
+static void shake_xof(sha3_ctx_t *c)
+{
+    c->st.b[c->pt] ^= 0x1F;
+    c->st.b[c->rsiz - 1] ^= 0x80;
+    sha3_keccakf(c->st.q);
+    c->pt = 0;
+}
+
+static void shake_out(sha3_ctx_t *c, void *out, size_t len)
+{
+    size_t i;
+    int j;
+
+    j = c->pt;
+    for (i = 0; i < len; i++) {
+        if (j >= c->rsiz) {
+            sha3_keccakf(c->st.q);
+            j = 0;
+        }
+        ((uint8_t *) out)[i] = c->st.b[j++];
+    }
+    c->pt = j;
+}
+
diff --git a/Modules/_sha3/sha3.h b/Modules/_sha3/sha3.h
new file mode 100644 (file)
index 0000000..f973d67
--- /dev/null
@@ -0,0 +1,49 @@
+// sha3.h
+// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
+
+#ifndef SHA3_H
+#define SHA3_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifndef KECCAKF_ROUNDS
+#define KECCAKF_ROUNDS 24
+#endif
+
+#ifndef ROTL64
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+#endif
+
+// state context
+typedef struct {
+    union {                                 // state:
+        uint8_t b[200];                     // 8-bit bytes
+        uint64_t q[25];                     // 64-bit words
+    } st;
+    int pt, rsiz, mdlen;                    // these don't overflow
+} sha3_ctx_t;
+
+// Compression function.
+static void sha3_keccakf(uint64_t st[25]);
+
+// OpenSSL - like interfece
+static int sha3_init(sha3_ctx_t *c, int mdlen);    // mdlen = hash output in bytes
+static int sha3_update(sha3_ctx_t *c, const void *data, size_t len);
+static int sha3_final(void *md, sha3_ctx_t *c);    // digest goes to md
+
+// compute a sha3 hash (md) of given byte length from "in"
+#if 0
+static void *sha3(const void *in, size_t inlen, void *md, int mdlen);
+#endif
+
+// SHAKE128 and SHAKE256 extensible-output functions
+#define shake128_init(c) sha3_init(c, 16)
+#define shake256_init(c) sha3_init(c, 32)
+#define shake_update sha3_update
+
+static void shake_xof(sha3_ctx_t *c);
+static void shake_out(sha3_ctx_t *c, void *out, size_t len);
+
+#endif
+
index bffd177c0e753225c11ed057c5e7e500d20cce41..bd1dd596bdda68ba72bba749ee344c8229f740ee 100644 (file)
@@ -10,7 +10,7 @@
  *  Trevor Perrin (trevp@trevp.net)
  *  Gregory P. Smith (greg@krypto.org)
  *
- * Copyright (C) 2012-2016  Christian Heimes (christian@python.org)
+ * Copyright (C) 2012-2022  Christian Heimes (christian@python.org)
  * Licensed to PSF under a Contributor Agreement.
  *
  */
 #include "pycore_strhex.h"        // _Py_strhex()
 #include "../hashlib.h"
 
-/* **************************************************************************
- *                          SHA-3 (Keccak) and SHAKE
- *
- * The code is based on KeccakCodePackage from 2016-04-23
- * commit 647f93079afc4ada3d23737477a6e52511ca41fd
- *
- * The reference implementation is altered in this points:
- *  - C++ comments are converted to ANSI C comments.
- *  - all function names are mangled
- *  - typedef for UINT64 is commented out.
- *  - brg_endian.h is removed
- *
- * *************************************************************************/
-
-#ifdef __sparc
-  /* opt64 uses un-aligned memory access that causes a BUS error with msg
-   * 'invalid address alignment' on SPARC. */
-  #define KeccakOpt 32
-#elif PY_BIG_ENDIAN
-  /* opt64 is not yet supported on big endian platforms */
-  #define KeccakOpt 32
-#elif SIZEOF_VOID_P == 8
-  /* opt64 works only on little-endian 64bit platforms with unsigned int64 */
-  #define KeccakOpt 64
-#else
-  /* opt32 is used for the remaining 32 and 64bit platforms */
-  #define KeccakOpt 32
-#endif
-
-#if KeccakOpt == 64
-  /* 64bit platforms with unsigned int64 */
-  typedef uint64_t UINT64;
-  typedef unsigned char UINT8;
-#endif
-// kcp/KeccakP-1600-opt64.c doesn't need to define UINT8
-#define NOT_PYTHON 0
-
-/* replacement for brg_endian.h */
-#define IS_LITTLE_ENDIAN 1234
-#define IS_BIG_ENDIAN 4321
-#if PY_LITTLE_ENDIAN
-#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
-#endif
-#if PY_BIG_ENDIAN
-#define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
-#endif
-
-/* Prevent bus errors on platforms requiring aligned accesses such ARM. */
-#if defined(HAVE_ALIGNED_REQUIRED) && !defined(NO_MISALIGNED_ACCESSES)
-#define NO_MISALIGNED_ACCESSES
-#endif
-
-/* mangle names */
-#define KeccakF1600_FastLoop_Absorb _PySHA3_KeccakF1600_FastLoop_Absorb
-#define Keccak_HashFinal _PySHA3_Keccak_HashFinal
-#define Keccak_HashInitialize _PySHA3_Keccak_HashInitialize
-#define Keccak_HashSqueeze _PySHA3_Keccak_HashSqueeze
-#define Keccak_HashUpdate _PySHA3_Keccak_HashUpdate
-#define KeccakP1600_AddBytes _PySHA3_KeccakP1600_AddBytes
-#define KeccakP1600_AddBytesInLane _PySHA3_KeccakP1600_AddBytesInLane
-#define KeccakP1600_AddLanes _PySHA3_KeccakP1600_AddLanes
-#define KeccakP1600_ExtractAndAddBytes _PySHA3_KeccakP1600_ExtractAndAddBytes
-#define KeccakP1600_ExtractAndAddBytesInLane _PySHA3_KeccakP1600_ExtractAndAddBytesInLane
-#define KeccakP1600_ExtractAndAddLanes _PySHA3_KeccakP1600_ExtractAndAddLanes
-#define KeccakP1600_ExtractBytes _PySHA3_KeccakP1600_ExtractBytes
-#define KeccakP1600_ExtractBytesInLane _PySHA3_KeccakP1600_ExtractBytesInLane
-#define KeccakP1600_ExtractLanes _PySHA3_KeccakP1600_ExtractLanes
-#define KeccakP1600_Initialize _PySHA3_KeccakP1600_Initialize
-#define KeccakP1600_OverwriteBytes _PySHA3_KeccakP1600_OverwriteBytes
-#define KeccakP1600_OverwriteBytesInLane _PySHA3_KeccakP1600_OverwriteBytesInLane
-#define KeccakP1600_OverwriteLanes _PySHA3_KeccakP1600_OverwriteLanes
-#define KeccakP1600_OverwriteWithZeroes _PySHA3_KeccakP1600_OverwriteWithZeroes
-#define KeccakP1600_Permute_12rounds _PySHA3_KeccakP1600_Permute_12rounds
-#define KeccakP1600_Permute_24rounds _PySHA3_KeccakP1600_Permute_24rounds
-#define KeccakWidth1600_Sponge _PySHA3_KeccakWidth1600_Sponge
-#define KeccakWidth1600_SpongeAbsorb _PySHA3_KeccakWidth1600_SpongeAbsorb
-#define KeccakWidth1600_SpongeAbsorbLastFewBits _PySHA3_KeccakWidth1600_SpongeAbsorbLastFewBits
-#define KeccakWidth1600_SpongeInitialize _PySHA3_KeccakWidth1600_SpongeInitialize
-#define KeccakWidth1600_SpongeSqueeze _PySHA3_KeccakWidth1600_SpongeSqueeze
-#if KeccakOpt == 32
-#define KeccakP1600_AddByte _PySHA3_KeccakP1600_AddByte
-#define KeccakP1600_Permute_Nrounds _PySHA3_KeccakP1600_Permute_Nrounds
-#define KeccakP1600_SetBytesInLaneToZero _PySHA3_KeccakP1600_SetBytesInLaneToZero
-#endif
-
-/* we are only interested in KeccakP1600 */
-#define KeccakP200_excluded 1
-#define KeccakP400_excluded 1
-#define KeccakP800_excluded 1
-
-/* inline all Keccak dependencies */
-#include "kcp/KeccakHash.h"
-#include "kcp/KeccakSponge.h"
-#include "kcp/KeccakHash.c"
-#include "kcp/KeccakSponge.c"
-#if KeccakOpt == 64
-  #include "kcp/KeccakP-1600-opt64.c"
-#elif KeccakOpt == 32
-  #include "kcp/KeccakP-1600-inplace32BI.c"
-#endif
+#include "sha3.c"
 
 #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */
-#define SHA3_LANESIZE (20 * 8) /* ExtractLane needs max uint64_t[20] extra. */
-#define SHA3_state Keccak_HashInstance
-#define SHA3_init Keccak_HashInitialize
-#define SHA3_process Keccak_HashUpdate
-#define SHA3_done Keccak_HashFinal
-#define SHA3_squeeze Keccak_HashSqueeze
+#define SHA3_LANESIZE 0
+#define SHA3_state sha3_ctx_t
+#define SHA3_init sha3_init
+#define SHA3_process sha3_update
+#define SHA3_done(state, digest) sha3_final(digest, state)
+#define SHA3_squeeze(state, out, len) shake_xof(state), shake_out(state, out, len)
 #define SHA3_copystate(dest, src) memcpy(&(dest), &(src), sizeof(SHA3_state))
 
+// no optimization
+#define KeccakOpt 0
+
+typedef enum { SUCCESS = 1, FAIL = 0, BAD_HASHLEN = 2 } HashReturn;
+
 typedef struct {
     PyTypeObject *sha3_224_type;
     PyTypeObject *sha3_256_type;
     PyTypeObject *sha3_384_type;
     PyTypeObject *sha3_512_type;
-#ifdef PY_WITH_KECCAK
-    PyTypeObject *keccak_224_type;
-    PyTypeObject *keccak_256_type;
-    PyTypeObject *keccak_384_type;
-    PyTypeObject *keccak_512_type;
-#endif
     PyTypeObject *shake_128_type;
     PyTypeObject *shake_256_type;
 } SHA3State;
@@ -215,27 +115,17 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity)
     assert(state != NULL);
 
     if (type == state->sha3_224_type) {
-        res = Keccak_HashInitialize_SHA3_224(&self->hash_state);
+        res = sha3_init(&self->hash_state, 28);
     } else if (type == state->sha3_256_type) {
-        res = Keccak_HashInitialize_SHA3_256(&self->hash_state);
+        res = sha3_init(&self->hash_state, 32);
     } else if (type == state->sha3_384_type) {
-        res = Keccak_HashInitialize_SHA3_384(&self->hash_state);
+        res = sha3_init(&self->hash_state, 48);
     } else if (type == state->sha3_512_type) {
-        res = Keccak_HashInitialize_SHA3_512(&self->hash_state);
-#ifdef PY_WITH_KECCAK
-    } else if (type == state->keccak_224_type) {
-        res = Keccak_HashInitialize(&self->hash_state, 1152, 448, 224, 0x01);
-    } else if (type == state->keccak_256_type) {
-        res = Keccak_HashInitialize(&self->hash_state, 1088, 512, 256, 0x01);
-    } else if (type == state->keccak_384_type) {
-        res = Keccak_HashInitialize(&self->hash_state, 832, 768, 384, 0x01);
-    } else if (type == state->keccak_512_type) {
-        res = Keccak_HashInitialize(&self->hash_state, 576, 1024, 512, 0x01);
-#endif
+        res = sha3_init(&self->hash_state, 64);
     } else if (type == state->shake_128_type) {
-        res = Keccak_HashInitialize_SHAKE128(&self->hash_state);
+        res = sha3_init(&self->hash_state, 16);
     } else if (type == state->shake_256_type) {
-        res = Keccak_HashInitialize_SHAKE256(&self->hash_state);
+        res = sha3_init(&self->hash_state, 32);
     } else {
         PyErr_BadInternalCall();
         goto error;
@@ -254,11 +144,11 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity)
              * thus it's safe to release the GIL without locking the object.
              */
             Py_BEGIN_ALLOW_THREADS
-            res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
+            res = SHA3_process(&self->hash_state, buf.buf, buf.len);
             Py_END_ALLOW_THREADS
         }
         else {
-            res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
+            res = SHA3_process(&self->hash_state, buf.buf, buf.len);
         }
         if (res != SUCCESS) {
             PyErr_SetString(PyExc_RuntimeError,
@@ -344,7 +234,7 @@ _sha3_sha3_224_digest_impl(SHA3object *self)
         return NULL;
     }
     return PyBytes_FromStringAndSize((const char *)digest,
-                                      self->hash_state.fixedOutputLength / 8);
+                                      self->hash_state.mdlen);
 }
 
 
@@ -372,7 +262,7 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self)
         return NULL;
     }
     return _Py_strhex((const char *)digest,
-                      self->hash_state.fixedOutputLength / 8);
+                      self->hash_state.mdlen);
 }
 
 
@@ -405,12 +295,12 @@ _sha3_sha3_224_update(SHA3object *self, PyObject *data)
     if (self->lock) {
         Py_BEGIN_ALLOW_THREADS
         PyThread_acquire_lock(self->lock, 1);
-        res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
+        res = SHA3_process(&self->hash_state, buf.buf, buf.len);
         PyThread_release_lock(self->lock);
         Py_END_ALLOW_THREADS
     }
     else {
-        res = SHA3_process(&self->hash_state, buf.buf, buf.len * 8);
+        res = SHA3_process(&self->hash_state, buf.buf, buf.len);
     }
 
     if (res != SUCCESS) {
@@ -437,8 +327,8 @@ static PyMethodDef SHA3_methods[] = {
 static PyObject *
 SHA3_get_block_size(SHA3object *self, void *closure)
 {
-    int rate = self->hash_state.sponge.rate;
-    return PyLong_FromLong(rate / 8);
+    int rate = self->hash_state.rsiz;
+    return PyLong_FromLong(rate);
 }
 
 
@@ -458,16 +348,6 @@ SHA3_get_name(SHA3object *self, void *closure)
         return PyUnicode_FromString("sha3_384");
     } else if (type == state->sha3_512_type) {
         return PyUnicode_FromString("sha3_512");
-#ifdef PY_WITH_KECCAK
-    } else if (type == state->keccak_224_type) {
-        return PyUnicode_FromString("keccak_224");
-    } else if (type == state->keccak_256_type) {
-        return PyUnicode_FromString("keccak_256");
-    } else if (type == state->keccak_384_type) {
-        return PyUnicode_FromString("keccak_384");
-    } else if (type == state->keccak_512_type) {
-        return PyUnicode_FromString("keccak_512");
-#endif
     } else if (type == state->shake_128_type) {
         return PyUnicode_FromString("shake_128");
     } else if (type == state->shake_256_type) {
@@ -482,14 +362,14 @@ SHA3_get_name(SHA3object *self, void *closure)
 static PyObject *
 SHA3_get_digest_size(SHA3object *self, void *closure)
 {
-    return PyLong_FromLong(self->hash_state.fixedOutputLength / 8);
+    return PyLong_FromLong(self->hash_state.mdlen);
 }
 
 
 static PyObject *
 SHA3_get_capacity_bits(SHA3object *self, void *closure)
 {
-    int capacity = 1600 - self->hash_state.sponge.rate;
+    int capacity = 1600 - self->hash_state.rsiz * 8;
     return PyLong_FromLong(capacity);
 }
 
@@ -497,16 +377,14 @@ SHA3_get_capacity_bits(SHA3object *self, void *closure)
 static PyObject *
 SHA3_get_rate_bits(SHA3object *self, void *closure)
 {
-    unsigned int rate = self->hash_state.sponge.rate;
+    unsigned int rate = self->hash_state.rsiz * 8;
     return PyLong_FromLong(rate);
 }
 
 static PyObject *
 SHA3_get_suffix(SHA3object *self, void *closure)
 {
-    unsigned char suffix[2];
-    suffix[0] = self->hash_state.delimitedSuffix;
-    suffix[1] = 0;
+    unsigned char suffix[2] = {0x06, 0};
     return PyBytes_FromStringAndSize((const char *)suffix, 1);
 }
 
@@ -520,12 +398,12 @@ static PyGetSetDef SHA3_getseters[] = {
     {NULL}  /* Sentinel */
 };
 
-#define SHA3_TYPE_SLOTS(type_slots_obj, type_doc, type_methods) \
+#define SHA3_TYPE_SLOTS(type_slots_obj, type_doc, type_methods, type_getseters) \
     static PyType_Slot type_slots_obj[] = { \
         {Py_tp_dealloc, SHA3_dealloc}, \
         {Py_tp_doc, (char*)type_doc}, \
         {Py_tp_methods, type_methods}, \
-        {Py_tp_getset, SHA3_getseters}, \
+        {Py_tp_getset, type_getseters}, \
         {Py_tp_new, py_sha3_new}, \
         {0,0} \
     }
@@ -560,62 +438,23 @@ PyDoc_STRVAR(sha3_512__doc__,
 \n\
 Return a new SHA3 hash object with a hashbit length of 64 bytes.");
 
-#ifdef PY_WITH_KECCAK
-PyDoc_STRVAR(keccak_224__doc__,
-"keccak_224([data], *, usedforsecurity=True) -> Keccak object\n\
-\n\
-Return a new Keccak hash object with a hashbit length of 28 bytes.");
-
-PyDoc_STRVAR(keccak_256__doc__,
-"keccak_256([data], *, usedforsecurity=True) -> Keccak object\n\
-\n\
-Return a new Keccak hash object with a hashbit length of 32 bytes.");
-
-PyDoc_STRVAR(keccak_384__doc__,
-"keccak_384([data], *, usedforsecurity=True) -> Keccak object\n\
-\n\
-Return a new Keccak hash object with a hashbit length of 48 bytes.");
-
-PyDoc_STRVAR(keccak_512__doc__,
-"keccak_512([data], *, usedforsecurity=True) -> Keccak object\n\
-\n\
-Return a new Keccak hash object with a hashbit length of 64 bytes.");
-
-#endif
-
-SHA3_TYPE_SLOTS(sha3_224_slots, sha3_224__doc__, SHA3_methods);
+SHA3_TYPE_SLOTS(sha3_224_slots, sha3_224__doc__, SHA3_methods, SHA3_getseters);
 SHA3_TYPE_SPEC(sha3_224_spec, "sha3_224", sha3_224_slots);
 
-SHA3_TYPE_SLOTS(sha3_256_slots, sha3_256__doc__, SHA3_methods);
+SHA3_TYPE_SLOTS(sha3_256_slots, sha3_256__doc__, SHA3_methods, SHA3_getseters);
 SHA3_TYPE_SPEC(sha3_256_spec, "sha3_256", sha3_256_slots);
 
-SHA3_TYPE_SLOTS(sha3_384_slots, sha3_384__doc__, SHA3_methods);
+SHA3_TYPE_SLOTS(sha3_384_slots, sha3_384__doc__, SHA3_methods, SHA3_getseters);
 SHA3_TYPE_SPEC(sha3_384_spec, "sha3_384", sha3_384_slots);
 
-SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods);
+SHA3_TYPE_SLOTS(sha3_512_slots, sha3_512__doc__, SHA3_methods, SHA3_getseters);
 SHA3_TYPE_SPEC(sha3_512_spec, "sha3_512", sha3_512_slots);
 
-#ifdef PY_WITH_KECCAK
-SHA3_TYPE_SLOTS(Keccak_224_slots, keccak_224__doc__, SHA3_methods);
-SHA3_TYPE_SPEC(Keccak_224_spec, "keccak_224", Keccak_224_slots);
-
-SHA3_TYPE_SLOTS(Keccak_256_slots, keccak_256__doc__, SHA3_methods);
-SHA3_TYPE_SPEC(Keccak_256_spec, "keccak_256", Keccak_256_slots);
-
-SHA3_TYPE_SLOTS(Keccak_384_slots, keccak_384__doc__, SHA3_methods);
-SHA3_TYPE_SPEC(Keccak_384_spec, "keccak_384", Keccak_384_slots);
-
-SHA3_TYPE_SLOTS(Keccak_512_slots, keccak_512__doc__, SHA3_methods);
-SHA3_TYPE_SPEC(Keccak_512_spec, "keccak_512", Keccak_512_slots);
-#endif
-
-
 static PyObject *
 _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex)
 {
     unsigned char *digest = NULL;
     SHA3_state temp;
-    int res;
     PyObject *result = NULL;
 
     if (digestlen >= (1 << 29)) {
@@ -634,23 +473,13 @@ _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex)
     ENTER_HASHLIB(self);
     SHA3_copystate(temp, self->hash_state);
     LEAVE_HASHLIB(self);
-    res = SHA3_done(&temp, NULL);
-    if (res != SUCCESS) {
-        PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 done()");
-        goto error;
-    }
-    res = SHA3_squeeze(&temp, digest, digestlen * 8);
-    if (res != SUCCESS) {
-        PyErr_SetString(PyExc_RuntimeError, "internal error in SHA3 Squeeze()");
-        return NULL;
-    }
+    SHA3_squeeze(&temp, digest, digestlen);
     if (hex) {
          result = _Py_strhex((const char *)digest, digestlen);
     } else {
         result = PyBytes_FromStringAndSize((const char *)digest,
                                            digestlen);
     }
-  error:
     if (digest != NULL) {
         PyMem_Free(digest);
     }
@@ -691,6 +520,30 @@ _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length)
     return _SHAKE_digest(self, length, 1);
 }
 
+static PyObject *
+SHAKE_get_digest_size(SHA3object *self, void *closure)
+{
+    return PyLong_FromLong(0);
+}
+
+static PyObject *
+SHAKE_get_suffix(SHA3object *self, void *closure)
+{
+    unsigned char suffix[2] = {0x1f, 0};
+    return PyBytes_FromStringAndSize((const char *)suffix, 1);
+}
+
+
+static PyGetSetDef SHAKE_getseters[] = {
+    {"block_size", (getter)SHA3_get_block_size, NULL, NULL, NULL},
+    {"name", (getter)SHA3_get_name, NULL, NULL, NULL},
+    {"digest_size", (getter)SHAKE_get_digest_size, NULL, NULL, NULL},
+    {"_capacity_bits", (getter)SHA3_get_capacity_bits, NULL, NULL, NULL},
+    {"_rate_bits", (getter)SHA3_get_rate_bits, NULL, NULL, NULL},
+    {"_suffix", (getter)SHAKE_get_suffix, NULL, NULL, NULL},
+    {NULL}  /* Sentinel */
+};
+
 
 static PyMethodDef SHAKE_methods[] = {
     _SHA3_SHA3_224_COPY_METHODDEF
@@ -710,10 +563,10 @@ PyDoc_STRVAR(shake_256__doc__,
 \n\
 Return a new SHAKE hash object.");
 
-SHA3_TYPE_SLOTS(SHAKE128slots, shake_128__doc__, SHAKE_methods);
+SHA3_TYPE_SLOTS(SHAKE128slots, shake_128__doc__, SHAKE_methods, SHAKE_getseters);
 SHA3_TYPE_SPEC(SHAKE128_spec, "shake_128", SHAKE128slots);
 
-SHA3_TYPE_SLOTS(SHAKE256slots, shake_256__doc__, SHAKE_methods);
+SHA3_TYPE_SLOTS(SHAKE256slots, shake_256__doc__, SHAKE_methods, SHAKE_getseters);
 SHA3_TYPE_SPEC(SHAKE256_spec, "shake_256", SHAKE256slots);
 
 
@@ -725,12 +578,6 @@ _sha3_traverse(PyObject *module, visitproc visit, void *arg)
     Py_VISIT(state->sha3_256_type);
     Py_VISIT(state->sha3_384_type);
     Py_VISIT(state->sha3_512_type);
-#ifdef PY_WITH_KECCAK
-    Py_VISIT(state->keccak_224_type);
-    Py_VISIT(state->keccak_256_type);
-    Py_VISIT(state->keccak_384_type);
-    Py_VISIT(state->keccak_512_type);
-#endif
     Py_VISIT(state->shake_128_type);
     Py_VISIT(state->shake_256_type);
     return 0;
@@ -744,12 +591,6 @@ _sha3_clear(PyObject *module)
     Py_CLEAR(state->sha3_256_type);
     Py_CLEAR(state->sha3_384_type);
     Py_CLEAR(state->sha3_512_type);
-#ifdef PY_WITH_KECCAK
-    Py_CLEAR(state->keccak_224_type);
-    Py_CLEAR(state->keccak_256_type);
-    Py_CLEAR(state->keccak_384_type);
-    Py_CLEAR(state->keccak_512_type);
-#endif
     Py_CLEAR(state->shake_128_type);
     Py_CLEAR(state->shake_256_type);
     return 0;
@@ -782,12 +623,6 @@ _sha3_exec(PyObject *m)
     init_sha3type(sha3_256_type, sha3_256_spec);
     init_sha3type(sha3_384_type, sha3_384_spec);
     init_sha3type(sha3_512_type, sha3_512_spec);
-#ifdef PY_WITH_KECCAK
-    init_sha3type(keccak_224_type, Keccak_224_spec);
-    init_sha3type(keccak_256_type, Keccak_256_spec);
-    init_sha3type(keccak_384_type, Keccak_384_spec);
-    init_sha3type(keccak_512_type, Keccak_512_spec);
-#endif
     init_sha3type(shake_128_type, SHAKE128_spec);
     init_sha3type(shake_256_type, SHAKE256_spec);
 #undef init_sha3type
@@ -796,7 +631,7 @@ _sha3_exec(PyObject *m)
         return -1;
     }
     if (PyModule_AddStringConstant(m, "implementation",
-                                   KeccakP1600_implementation) < 0) {
+                                   "tiny_sha3") < 0) {
         return -1;
     }