adapt 32-bit ctmul.c to work with polyval.c

author Nick Mathewson <nickm@torproject.org>

Sat, 19 Apr 2025 00:43:10 +0000 (20:43 -0400)

committer Nick Mathewson <nickm@torproject.org>

Wed, 21 May 2025 13:43:51 +0000 (09:43 -0400)
author Nick Mathewson <nickm@torproject.org>
Sat, 19 Apr 2025 00:43:10 +0000 (20:43 -0400)
committer Nick Mathewson <nickm@torproject.org>
Wed, 21 May 2025 13:43:51 +0000 (09:43 -0400)
diff --git a/src/ext/polyval/ctmul.c b/src/ext/polyval/ctmul.c

index 362320252f3b5930379635c1e59d2b6f1eba0121..b4bfa25bce23eec0bfb8d93c78061a70a97f8a8e 100644 (file)
--- a/src/ext/polyval/ctmul.c
+++ b/src/ext/polyval/ctmul.c
@@ -1,7 +1,7 @@
  /*
   * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
   *
- * Permission is hereby granted, free of charge, to any person obtaining 
+ * Permission is hereby granted, free of charge, to any person obtaining
   * a copy of this software and associated documentation files (the
   * "Software"), to deal in the Software without restriction, including
   * without limitation the rights to use, copy, modify, merge, publish,
@@ -9,12 +9,12 @@
   * permit persons to whom the Software is furnished to do so, subject to
   * the following conditions:
   *
- * The above copyright notice and this permission notice shall be 
+ * The above copyright notice and this permission notice shall be
   * included in all copies or substantial portions of the Software.
   *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
@@ -22,8 +22,6 @@
   * SOFTWARE.
   */
  
-#include "inner.h"
-
  /*
   * We compute "carryless multiplications" through normal integer
   * multiplications, masking out enough bits to create "holes" in which
@@ -50,7 +48,10 @@
   * multiplications, we use the "fast mul" code by default.
   */
  
-#if BR_SLOW_MUL
+// A 32x32 -> 64 multiply.
+#define MUL(x, y) (((uint64_t)(x)) * ((uint64_t)(y)))
+
+#ifdef BR_SLOW_MUL
  
  /*
   * This implementation uses Karatsuba-like reduction to make fewer
@@ -192,61 +193,21 @@ bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
  
  #endif
  
-/* see bearssl_hash.h */
-void
-br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
+static void
+pv_mul_y_h(polyval_t *pv)
  {
-       const unsigned char *buf, *hb;
-       unsigned char *yb;
-       uint32_t yw[4];
-       uint32_t hw[4];
+       uint32_t *yw = pv->y.v;
+       const uint32_t *hw = pv->h.v;
  
         /*
          * Throughout the loop we handle the y and h values as arrays
          * of 32-bit words.
          */
-       buf = data;
-       yb = y;
-       hb = h;
-       yw[3] = br_dec32be(yb);
-       yw[2] = br_dec32be(yb + 4);
-       yw[1] = br_dec32be(yb + 8);
-       yw[0] = br_dec32be(yb + 12);
-       hw[3] = br_dec32be(hb);
-       hw[2] = br_dec32be(hb + 4);
-       hw[1] = br_dec32be(hb + 8);
-       hw[0] = br_dec32be(hb + 12);
-       while (len > 0) {
-               const unsigned char *src;
-               unsigned char tmp[16];
+       {
                 int i;
                 uint32_t a[9], b[9], zw[8];
                 uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3;
  
-               /*
-                * Get the next 16-byte block (using zero-padding if
-                * necessary).
-                */
-               if (len >= 16) {
-                       src = buf;
-                       buf += 16;
-                       len -= 16;
-               } else {
-                       memcpy(tmp, buf, len);
-                       memset(tmp + len, 0, (sizeof tmp) - len);
-                       src = tmp;
-                       len = 0;
-               }
-
-               /*
-                * Decode the block. The GHASH standard mandates
-                * big-endian encoding.
-                */
-               yw[3] ^= br_dec32be(src);
-               yw[2] ^= br_dec32be(src + 4);
-               yw[1] ^= br_dec32be(src + 8);
-               yw[0] ^= br_dec32be(src + 12);
-
                 /*
                  * We multiply two 128-bit field elements. We use
                  * Karatsuba to turn that into three 64-bit
@@ -306,6 +267,8 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
                 d0 ^= e2;
                 d1 ^= e3;
  
+#if 0
+               // This rotation is GHASH-only.
                 /*
                  * GHASH specification has the bits "reversed" (most
                  * significant is in fact least significant), which does
@@ -320,6 +283,16 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
                 zw[5] = (d1 << 1) | (d0 >> 31);
                 zw[6] = (d2 << 1) | (d1 >> 31);
                 zw[7] = (d3 << 1) | (d2 >> 31);
+#else
+               zw[0] = c0;
+               zw[1] = c1;
+               zw[2] = c2;
+               zw[3] = c3;
+               zw[4] = d0;
+               zw[5] = d1;
+               zw[6] = d2;
+               zw[7] = d3;
+#endif
  
                 /*
                  * We now do the reduction modulo the field polynomial
@@ -332,14 +305,7 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
                         zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
                         zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
                 }
-               memcpy(yw, zw + 4, sizeof yw);
+               memcpy(yw, zw + 4, 16);
         }
-
-       /*
-        * Encode back the result.
-        */
-       br_enc32be(yb, yw[3]);
-       br_enc32be(yb + 4, yw[2]);
-       br_enc32be(yb + 8, yw[1]);
-       br_enc32be(yb + 12, yw[0]);
  }
+#undef MUL
diff --git a/src/ext/polyval/polyval.c b/src/ext/polyval/polyval.c

index 163d10d8c9537149e5ce215aec655309411ee478..9140b9457e78e4aff6aa4d394af6fdeba895047e 100644 (file)
--- a/src/ext/polyval/polyval.c
+++ b/src/ext/polyval/polyval.c
@@ -29,8 +29,10 @@ void pv_mul_y_h(polyval_t *);
  #ifdef WORDS_BIG_ENDIAN
  #ifdef __GNUC__
  #define bswap64(x) __builtin_bswap64(x)
+#define bswap32(x) __builtin_bswap32(x)
  #else
-static inline uint64_t bswap64(uint64_t v)
+static inline uint64_t
+bswap64(uint64_t v)
  {
    return
      ((value & 0xFF00000000000000) >> 56) |
@@ -42,13 +44,24 @@ static inline uint64_t bswap64(uint64_t v)
      ((value & 0x000000000000FF00) >> 8) |
      ((value & 0x00000000000000FF));
  }
+static inline uint64_t
+bswap32(uint64_t v)
+{
+  return
+    ((value & 0xFF000000) >> 24) |
+    ((value & 0x00FF0000) >> 16) |
+    ((value & 0x0000FF00) >> 8) |
+    ((value & 0x000000FF));
+}
  #endif
  #endif
  
  #ifdef WORDS_BIG_ENDIAN
-#define convert_byte_order(x) bswap64(x)
+#define convert_byte_order64(x) bswap64(x)
+#define convert_byte_order32(x) bswap32(x)
  #else
-#define convert_byte_order(x) (x)
+#define convert_byte_order64(x) (x)
+#define convert_byte_order32(x) (x)
  #endif
  
  #ifdef PV_USE_CTMUL64
@@ -61,15 +74,15 @@ u128_from_bytes(const uint8_t *bytes)
    u128 r;
    memcpy(&r.lo, bytes, 8);
    memcpy(&r.hi, bytes + 8, 8);
-  r.lo = convert_byte_order(r.lo);
-  r.hi = convert_byte_order(r.hi);
+  r.lo = convert_byte_order64(r.lo);
+  r.hi = convert_byte_order64(r.hi);
    return r;
  }
  static inline void
  u128_to_bytes(u128 val, uint8_t *bytes_out)
  {
-  uint64_t lo = convert_byte_order(val.lo);
-  uint64_t hi = convert_byte_order(val.hi);
+  uint64_t lo = convert_byte_order64(val.lo);
+  uint64_t hi = convert_byte_order64(val.hi);
    memcpy(bytes_out, &lo, 8);
    memcpy(bytes_out + 8, &hi, 8);
  }
@@ -85,6 +98,40 @@ pv_init_extra(polyval_t *pv)
    pv->hr.lo = rev64(pv->h.lo);
    pv->hr.hi = rev64(pv->h.hi);
  }
+#elif defined(PV_USE_CTMUL)
+#include "ext/polyval/ctmul.c"
+
+static inline u128
+u128_from_bytes(const uint8_t *bytes)
+{
+  u128 r;
+  memcpy(&r.v, bytes, 16);
+  for (int i = 0; i < 4; ++i) {
+    r.v[i] = convert_byte_order32(r.v[i]);
+  }
+  return r;
+}
+static inline void
+u128_to_bytes(u128 val, uint8_t *bytes_out)
+{
+  uint32_t v[4];
+  for (int i = 0; i < 4; ++i) {
+    v[i] = convert_byte_order32(val.v[i]);
+  }
+  memcpy(bytes_out, v, 16);
+}
+static inline void
+pv_xor(polyval_t *pv, u128 val)
+{
+  for (int i = 0; i < 4; ++i) {
+    pv->y.v[i] ^= val.v[i];
+  }
+}
+static inline void
+pv_init_extra(polyval_t *pv)
+{
+  (void)pv;
+}
  #endif
  
  void
diff --git a/src/ext/polyval/polyval.h b/src/ext/polyval/polyval.h

index 9a83e7225b7b0770425c60d02a4812caf6b6166d..4e21fcbb85af4ef7acf08df9229da3f6e069e803 100644 (file)
--- a/src/ext/polyval/polyval.h
+++ b/src/ext/polyval/polyval.h
@@ -12,13 +12,17 @@
  #include "orconfig.h"
  #include "lib/cc/torint.h"
  
-#define PV_USE_CTMUL64
+#define PV_USE_CTMUL
  
  #ifdef PV_USE_CTMUL64
  /** A 128-bit integer represented as its low and high portion. */
  struct pv_u128_ {
    uint64_t lo;
    uint64_t hi;
+} pv_u128_;
+#elif defined(PV_USE_CTMUL)
+struct pv_u128_ {
+  uint32_t v[4];
  };
  #endif
  
@@ -28,9 +32,11 @@ struct pv_u128_ {
  typedef struct polyval_t {
    /** The key itself. */
    struct pv_u128_ h;
+#ifdef PV_USE_CTMUL64
    /** The elements of the key in bit-reversed form.
     * (Used as an optimization.) */
    struct pv_u128_ hr;
+#endif
    /** The accumulator */
    struct pv_u128_ y;
  } polyval_t;
author	Nick Mathewson <nickm@torproject.org>
	Sat, 19 Apr 2025 00:43:10 +0000 (20:43 -0400)
committer	Nick Mathewson <nickm@torproject.org>
	Wed, 21 May 2025 13:43:51 +0000 (09:43 -0400)
src/ext/polyval/ctmul.c		patch \| blob \| blame \| history
src/ext/polyval/polyval.c		patch \| blob \| blame \| history
src/ext/polyval/polyval.h		patch \| blob \| blame \| history