From: Nick Mathewson Date: Sat, 19 Apr 2025 00:43:10 +0000 (-0400) Subject: adapt 32-bit ctmul.c to work with polyval.c X-Git-Tag: tor-0.4.9.3-alpha~49^2~13 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7ce4df844ebc0f589505b4417bfb017b0908b765;p=thirdparty%2Ftor.git adapt 32-bit ctmul.c to work with polyval.c --- diff --git a/src/ext/polyval/ctmul.c b/src/ext/polyval/ctmul.c index 362320252f..b4bfa25bce 100644 --- a/src/ext/polyval/ctmul.c +++ b/src/ext/polyval/ctmul.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2016 Thomas Pornin * - * Permission is hereby granted, free of charge, to any person obtaining + * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, @@ -9,12 +9,12 @@ * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * - * The above copyright notice and this permission notice shall be + * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN @@ -22,8 +22,6 @@ * SOFTWARE. */ -#include "inner.h" - /* * We compute "carryless multiplications" through normal integer * multiplications, masking out enough bits to create "holes" in which @@ -50,7 +48,10 @@ * multiplications, we use the "fast mul" code by default. */ -#if BR_SLOW_MUL +// A 32x32 -> 64 multiply. +#define MUL(x, y) (((uint64_t)(x)) * ((uint64_t)(y))) + +#ifdef BR_SLOW_MUL /* * This implementation uses Karatsuba-like reduction to make fewer @@ -192,61 +193,21 @@ bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y) #endif -/* see bearssl_hash.h */ -void -br_ghash_ctmul(void *y, const void *h, const void *data, size_t len) +static void +pv_mul_y_h(polyval_t *pv) { - const unsigned char *buf, *hb; - unsigned char *yb; - uint32_t yw[4]; - uint32_t hw[4]; + uint32_t *yw = pv->y.v; + const uint32_t *hw = pv->h.v; /* * Throughout the loop we handle the y and h values as arrays * of 32-bit words. */ - buf = data; - yb = y; - hb = h; - yw[3] = br_dec32be(yb); - yw[2] = br_dec32be(yb + 4); - yw[1] = br_dec32be(yb + 8); - yw[0] = br_dec32be(yb + 12); - hw[3] = br_dec32be(hb); - hw[2] = br_dec32be(hb + 4); - hw[1] = br_dec32be(hb + 8); - hw[0] = br_dec32be(hb + 12); - while (len > 0) { - const unsigned char *src; - unsigned char tmp[16]; + { int i; uint32_t a[9], b[9], zw[8]; uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3; - /* - * Get the next 16-byte block (using zero-padding if - * necessary). - */ - if (len >= 16) { - src = buf; - buf += 16; - len -= 16; - } else { - memcpy(tmp, buf, len); - memset(tmp + len, 0, (sizeof tmp) - len); - src = tmp; - len = 0; - } - - /* - * Decode the block. The GHASH standard mandates - * big-endian encoding. - */ - yw[3] ^= br_dec32be(src); - yw[2] ^= br_dec32be(src + 4); - yw[1] ^= br_dec32be(src + 8); - yw[0] ^= br_dec32be(src + 12); - /* * We multiply two 128-bit field elements. We use * Karatsuba to turn that into three 64-bit @@ -306,6 +267,8 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len) d0 ^= e2; d1 ^= e3; +#if 0 + // This rotation is GHASH-only. /* * GHASH specification has the bits "reversed" (most * significant is in fact least significant), which does @@ -320,6 +283,16 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len) zw[5] = (d1 << 1) | (d0 >> 31); zw[6] = (d2 << 1) | (d1 >> 31); zw[7] = (d3 << 1) | (d2 >> 31); +#else + zw[0] = c0; + zw[1] = c1; + zw[2] = c2; + zw[3] = c3; + zw[4] = d0; + zw[5] = d1; + zw[6] = d2; + zw[7] = d3; +#endif /* * We now do the reduction modulo the field polynomial @@ -332,14 +305,7 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len) zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7); zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25); } - memcpy(yw, zw + 4, sizeof yw); + memcpy(yw, zw + 4, 16); } - - /* - * Encode back the result. - */ - br_enc32be(yb, yw[3]); - br_enc32be(yb + 4, yw[2]); - br_enc32be(yb + 8, yw[1]); - br_enc32be(yb + 12, yw[0]); } +#undef MUL diff --git a/src/ext/polyval/polyval.c b/src/ext/polyval/polyval.c index 163d10d8c9..9140b9457e 100644 --- a/src/ext/polyval/polyval.c +++ b/src/ext/polyval/polyval.c @@ -29,8 +29,10 @@ void pv_mul_y_h(polyval_t *); #ifdef WORDS_BIG_ENDIAN #ifdef __GNUC__ #define bswap64(x) __builtin_bswap64(x) +#define bswap32(x) __builtin_bswap32(x) #else -static inline uint64_t bswap64(uint64_t v) +static inline uint64_t +bswap64(uint64_t v) { return ((value & 0xFF00000000000000) >> 56) | @@ -42,13 +44,24 @@ static inline uint64_t bswap64(uint64_t v) ((value & 0x000000000000FF00) >> 8) | ((value & 0x00000000000000FF)); } +static inline uint64_t +bswap32(uint64_t v) +{ + return + ((value & 0xFF000000) >> 24) | + ((value & 0x00FF0000) >> 16) | + ((value & 0x0000FF00) >> 8) | + ((value & 0x000000FF)); +} #endif #endif #ifdef WORDS_BIG_ENDIAN -#define convert_byte_order(x) bswap64(x) +#define convert_byte_order64(x) bswap64(x) +#define convert_byte_order32(x) bswap32(x) #else -#define convert_byte_order(x) (x) +#define convert_byte_order64(x) (x) +#define convert_byte_order32(x) (x) #endif #ifdef PV_USE_CTMUL64 @@ -61,15 +74,15 @@ u128_from_bytes(const uint8_t *bytes) u128 r; memcpy(&r.lo, bytes, 8); memcpy(&r.hi, bytes + 8, 8); - r.lo = convert_byte_order(r.lo); - r.hi = convert_byte_order(r.hi); + r.lo = convert_byte_order64(r.lo); + r.hi = convert_byte_order64(r.hi); return r; } static inline void u128_to_bytes(u128 val, uint8_t *bytes_out) { - uint64_t lo = convert_byte_order(val.lo); - uint64_t hi = convert_byte_order(val.hi); + uint64_t lo = convert_byte_order64(val.lo); + uint64_t hi = convert_byte_order64(val.hi); memcpy(bytes_out, &lo, 8); memcpy(bytes_out + 8, &hi, 8); } @@ -85,6 +98,40 @@ pv_init_extra(polyval_t *pv) pv->hr.lo = rev64(pv->h.lo); pv->hr.hi = rev64(pv->h.hi); } +#elif defined(PV_USE_CTMUL) +#include "ext/polyval/ctmul.c" + +static inline u128 +u128_from_bytes(const uint8_t *bytes) +{ + u128 r; + memcpy(&r.v, bytes, 16); + for (int i = 0; i < 4; ++i) { + r.v[i] = convert_byte_order32(r.v[i]); + } + return r; +} +static inline void +u128_to_bytes(u128 val, uint8_t *bytes_out) +{ + uint32_t v[4]; + for (int i = 0; i < 4; ++i) { + v[i] = convert_byte_order32(val.v[i]); + } + memcpy(bytes_out, v, 16); +} +static inline void +pv_xor(polyval_t *pv, u128 val) +{ + for (int i = 0; i < 4; ++i) { + pv->y.v[i] ^= val.v[i]; + } +} +static inline void +pv_init_extra(polyval_t *pv) +{ + (void)pv; +} #endif void diff --git a/src/ext/polyval/polyval.h b/src/ext/polyval/polyval.h index 9a83e7225b..4e21fcbb85 100644 --- a/src/ext/polyval/polyval.h +++ b/src/ext/polyval/polyval.h @@ -12,13 +12,17 @@ #include "orconfig.h" #include "lib/cc/torint.h" -#define PV_USE_CTMUL64 +#define PV_USE_CTMUL #ifdef PV_USE_CTMUL64 /** A 128-bit integer represented as its low and high portion. */ struct pv_u128_ { uint64_t lo; uint64_t hi; +} pv_u128_; +#elif defined(PV_USE_CTMUL) +struct pv_u128_ { + uint32_t v[4]; }; #endif @@ -28,9 +32,11 @@ struct pv_u128_ { typedef struct polyval_t { /** The key itself. */ struct pv_u128_ h; +#ifdef PV_USE_CTMUL64 /** The elements of the key in bit-reversed form. * (Used as an optimization.) */ struct pv_u128_ hr; +#endif /** The accumulator */ struct pv_u128_ y; } polyval_t;