From: Nick Mathewson <nickm@torproject.org>
Date: Sat, 19 Apr 2025 00:43:10 +0000 (-0400)
Subject: adapt 32-bit ctmul.c to work with polyval.c
X-Git-Tag: tor-0.4.9.3-alpha~49^2~13
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7ce4df844ebc0f589505b4417bfb017b0908b765;p=thirdparty%2Ftor.git

adapt 32-bit ctmul.c to work with polyval.c
---

diff --git a/src/ext/polyval/ctmul.c b/src/ext/polyval/ctmul.c
index 362320252f..b4bfa25bce 100644
--- a/src/ext/polyval/ctmul.c
+++ b/src/ext/polyval/ctmul.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
  *
- * Permission is hereby granted, free of charge, to any person obtaining 
+ * Permission is hereby granted, free of charge, to any person obtaining
  * a copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * without limitation the rights to use, copy, modify, merge, publish,
@@ -9,12 +9,12 @@
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
- * The above copyright notice and this permission notice shall be 
+ * The above copyright notice and this permission notice shall be
  * included in all copies or substantial portions of the Software.
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
@@ -22,8 +22,6 @@
  * SOFTWARE.
  */
 
-#include "inner.h"
-
 /*
  * We compute "carryless multiplications" through normal integer
  * multiplications, masking out enough bits to create "holes" in which
@@ -50,7 +48,10 @@
  * multiplications, we use the "fast mul" code by default.
  */
 
-#if BR_SLOW_MUL
+// A 32x32 -> 64 multiply.
+#define MUL(x, y) (((uint64_t)(x)) * ((uint64_t)(y)))
+
+#ifdef BR_SLOW_MUL
 
 /*
  * This implementation uses Karatsuba-like reduction to make fewer
@@ -192,61 +193,21 @@ bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
 
 #endif
 
-/* see bearssl_hash.h */
-void
-br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
+static void
+pv_mul_y_h(polyval_t *pv)
 {
-	const unsigned char *buf, *hb;
-	unsigned char *yb;
-	uint32_t yw[4];
-	uint32_t hw[4];
+	uint32_t *yw = pv->y.v;
+	const uint32_t *hw = pv->h.v;
 
 	/*
 	 * Throughout the loop we handle the y and h values as arrays
 	 * of 32-bit words.
 	 */
-	buf = data;
-	yb = y;
-	hb = h;
-	yw[3] = br_dec32be(yb);
-	yw[2] = br_dec32be(yb + 4);
-	yw[1] = br_dec32be(yb + 8);
-	yw[0] = br_dec32be(yb + 12);
-	hw[3] = br_dec32be(hb);
-	hw[2] = br_dec32be(hb + 4);
-	hw[1] = br_dec32be(hb + 8);
-	hw[0] = br_dec32be(hb + 12);
-	while (len > 0) {
-		const unsigned char *src;
-		unsigned char tmp[16];
+	{
 		int i;
 		uint32_t a[9], b[9], zw[8];
 		uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3;
 
-		/*
-		 * Get the next 16-byte block (using zero-padding if
-		 * necessary).
-		 */
-		if (len >= 16) {
-			src = buf;
-			buf += 16;
-			len -= 16;
-		} else {
-			memcpy(tmp, buf, len);
-			memset(tmp + len, 0, (sizeof tmp) - len);
-			src = tmp;
-			len = 0;
-		}
-
-		/*
-		 * Decode the block. The GHASH standard mandates
-		 * big-endian encoding.
-		 */
-		yw[3] ^= br_dec32be(src);
-		yw[2] ^= br_dec32be(src + 4);
-		yw[1] ^= br_dec32be(src + 8);
-		yw[0] ^= br_dec32be(src + 12);
-
 		/*
 		 * We multiply two 128-bit field elements. We use
 		 * Karatsuba to turn that into three 64-bit
@@ -306,6 +267,8 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
 		d0 ^= e2;
 		d1 ^= e3;
 
+#if 0
+		// This rotation is GHASH-only.
 		/*
 		 * GHASH specification has the bits "reversed" (most
 		 * significant is in fact least significant), which does
@@ -320,6 +283,16 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
 		zw[5] = (d1 << 1) | (d0 >> 31);
 		zw[6] = (d2 << 1) | (d1 >> 31);
 		zw[7] = (d3 << 1) | (d2 >> 31);
+#else
+		zw[0] = c0;
+		zw[1] = c1;
+		zw[2] = c2;
+		zw[3] = c3;
+		zw[4] = d0;
+		zw[5] = d1;
+		zw[6] = d2;
+		zw[7] = d3;
+#endif
 
 		/*
 		 * We now do the reduction modulo the field polynomial
@@ -332,14 +305,7 @@ br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
 			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
 			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
 		}
-		memcpy(yw, zw + 4, sizeof yw);
+		memcpy(yw, zw + 4, 16);
 	}
-
-	/*
-	 * Encode back the result.
-	 */
-	br_enc32be(yb, yw[3]);
-	br_enc32be(yb + 4, yw[2]);
-	br_enc32be(yb + 8, yw[1]);
-	br_enc32be(yb + 12, yw[0]);
 }
+#undef MUL
diff --git a/src/ext/polyval/polyval.c b/src/ext/polyval/polyval.c
index 163d10d8c9..9140b9457e 100644
--- a/src/ext/polyval/polyval.c
+++ b/src/ext/polyval/polyval.c
@@ -29,8 +29,10 @@ void pv_mul_y_h(polyval_t *);
 #ifdef WORDS_BIG_ENDIAN
 #ifdef __GNUC__
 #define bswap64(x) __builtin_bswap64(x)
+#define bswap32(x) __builtin_bswap32(x)
 #else
-static inline uint64_t bswap64(uint64_t v)
+static inline uint64_t
+bswap64(uint64_t v)
 {
   return
     ((value & 0xFF00000000000000) >> 56) |
@@ -42,13 +44,24 @@ static inline uint64_t bswap64(uint64_t v)
     ((value & 0x000000000000FF00) >> 8) |
     ((value & 0x00000000000000FF));
 }
+static inline uint64_t
+bswap32(uint64_t v)
+{
+  return
+    ((value & 0xFF000000) >> 24) |
+    ((value & 0x00FF0000) >> 16) |
+    ((value & 0x0000FF00) >> 8) |
+    ((value & 0x000000FF));
+}
 #endif
 #endif
 
 #ifdef WORDS_BIG_ENDIAN
-#define convert_byte_order(x) bswap64(x)
+#define convert_byte_order64(x) bswap64(x)
+#define convert_byte_order32(x) bswap32(x)
 #else
-#define convert_byte_order(x) (x)
+#define convert_byte_order64(x) (x)
+#define convert_byte_order32(x) (x)
 #endif
 
 #ifdef PV_USE_CTMUL64
@@ -61,15 +74,15 @@ u128_from_bytes(const uint8_t *bytes)
   u128 r;
   memcpy(&r.lo, bytes, 8);
   memcpy(&r.hi, bytes + 8, 8);
-  r.lo = convert_byte_order(r.lo);
-  r.hi = convert_byte_order(r.hi);
+  r.lo = convert_byte_order64(r.lo);
+  r.hi = convert_byte_order64(r.hi);
   return r;
 }
 static inline void
 u128_to_bytes(u128 val, uint8_t *bytes_out)
 {
-  uint64_t lo = convert_byte_order(val.lo);
-  uint64_t hi = convert_byte_order(val.hi);
+  uint64_t lo = convert_byte_order64(val.lo);
+  uint64_t hi = convert_byte_order64(val.hi);
   memcpy(bytes_out, &lo, 8);
   memcpy(bytes_out + 8, &hi, 8);
 }
@@ -85,6 +98,40 @@ pv_init_extra(polyval_t *pv)
   pv->hr.lo = rev64(pv->h.lo);
   pv->hr.hi = rev64(pv->h.hi);
 }
+#elif defined(PV_USE_CTMUL)
+#include "ext/polyval/ctmul.c"
+
+static inline u128
+u128_from_bytes(const uint8_t *bytes)
+{
+  u128 r;
+  memcpy(&r.v, bytes, 16);
+  for (int i = 0; i < 4; ++i) {
+    r.v[i] = convert_byte_order32(r.v[i]);
+  }
+  return r;
+}
+static inline void
+u128_to_bytes(u128 val, uint8_t *bytes_out)
+{
+  uint32_t v[4];
+  for (int i = 0; i < 4; ++i) {
+    v[i] = convert_byte_order32(val.v[i]);
+  }
+  memcpy(bytes_out, v, 16);
+}
+static inline void
+pv_xor(polyval_t *pv, u128 val)
+{
+  for (int i = 0; i < 4; ++i) {
+    pv->y.v[i] ^= val.v[i];
+  }
+}
+static inline void
+pv_init_extra(polyval_t *pv)
+{
+  (void)pv;
+}
 #endif
 
 void
diff --git a/src/ext/polyval/polyval.h b/src/ext/polyval/polyval.h
index 9a83e7225b..4e21fcbb85 100644
--- a/src/ext/polyval/polyval.h
+++ b/src/ext/polyval/polyval.h
@@ -12,13 +12,17 @@
 #include "orconfig.h"
 #include "lib/cc/torint.h"
 
-#define PV_USE_CTMUL64
+#define PV_USE_CTMUL
 
 #ifdef PV_USE_CTMUL64
 /** A 128-bit integer represented as its low and high portion. */
 struct pv_u128_ {
   uint64_t lo;
   uint64_t hi;
+} pv_u128_;
+#elif defined(PV_USE_CTMUL)
+struct pv_u128_ {
+  uint32_t v[4];
 };
 #endif
 
@@ -28,9 +32,11 @@ struct pv_u128_ {
 typedef struct polyval_t {
   /** The key itself. */
   struct pv_u128_ h;
+#ifdef PV_USE_CTMUL64
   /** The elements of the key in bit-reversed form.
    * (Used as an optimization.) */
   struct pv_u128_ hr;
+#endif
   /** The accumulator */
   struct pv_u128_ y;
 } polyval_t;