Improve side-channel silence when comparing values to zero.

author Niels Möller <nisse@lysator.liu.se>

Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)

committer Niels Möller <nisse@lysator.liu.se>

Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)
author Niels Möller <nisse@lysator.liu.se>
Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)
committer Niels Möller <nisse@lysator.liu.se>
Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)
diff --git a/ChangeLog b/ChangeLog

index 278de08ba7cdc1193a3fb68e145a0bc818dcae0c..4d53b8969fb386a4fe6b962d95c602ad935882cd 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2023-11-06  Niels Möller  <nisse@lysator.liu.se>
+
+       Avoid comparison like cnd = (x == 0) in code intended to be
+       side-channel silent, since to eliminate branches with some
+       compilers/architectures, in particular 32-bit x86 and the msvc compiler.
+       * nettle-internal.h (IS_ZERO_SMALL): New macro.
+       * memeql-sec.c (memeql_sec): Use IS_ZERO_SMALL.
+       * pkcs1-sec-decrypt.c (EQUAL): Likewise.
+
+       * cnd-copy.c (cnd_copy): Require that cnd argument is 1 or 0.
+       * ecc-mul-a.c (ecc_mul_a) [ECC_MUL_A_WBITS == 0]:
+       Rearrange loop to pass 0 or 1 to cnd_copy.
+       * ecc-mul-a-eh.c (ecc_mul_a_eh) [ECC_MUL_A_EH_WBITS == 0]:
+       Likewise.
+       * ecc-mul-a.c (ecc_mul_a) [ECC_MUL_A_WBITS > 0]: Use
+       IS_ZERO_SMALL, and pass 0 or 1 to cnd_copy.
+       * ecc-mul-g.c (ecc_mul_g): Likewise.
+
+       * ecc-internal.h (is_zero_limb): New inline function.
+       * eddsa-decompress.c (_eddsa_decompress): Likewise.
+       * ecc-gostdsa-sign.c (ecc_gostdsa_sign): Likewise.
+       * ecc-mod-arith.c (ecc_mod_zero_p): Likewise.
+       (ecc_mod_equal_p): Avoid comparison cy == 0.
+       * ecc-j-to-a.c (ecc_j_to_a): Avoid comparison cy == 0.
+
  2023-10-06  Niels Möller  <nisse@lysator.liu.se>
  
         * testsuite/rsa-sec-decrypt-test.c (test_main): Skip side-channel
diff --git a/cnd-copy.c b/cnd-copy.c

index d24da3d06366bcbb1c0d5b7f4de43c377d332ef0..878cd3206446515d291334eedfa554a796e18f67 100644 (file)
--- a/cnd-copy.c
+++ b/cnd-copy.c
@@ -43,7 +43,7 @@ cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n)
    mp_limb_t mask, keep;
    mp_size_t i;
  
-  mask = -(mp_limb_t) (cnd !=0);
+  mask = -(mp_limb_t) cnd;
    keep = ~mask;
  
    for (i = 0; i < n; i++)
diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c

index 491a228157783304096375285d7a68ad0d97c561..c811c87ef43234382cae86f4d9b2c725ef5b316e 100644 (file)
--- a/ecc-gostdsa-sign.c
+++ b/ecc-gostdsa-sign.c
@@ -91,7 +91,7 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc,
     * so one subtraction should suffice. */
  
    *scratch = mpn_sub_n (tp, sp, ecc->q.m, ecc->p.size);
-  cnd_copy (*scratch == 0, sp, tp, ecc->p.size);
+  cnd_copy (is_zero_limb (*scratch), sp, tp, ecc->p.size);
  
  #undef P
  #undef hp
diff --git a/ecc-internal.h b/ecc-internal.h

index be02de5f534d4e4f7660e721f30245e6aaa6a51e..2a5e3ae10f0049586af4f3e42c288bf57c59b6d8 100644 (file)
--- a/ecc-internal.h
+++ b/ecc-internal.h
@@ -85,6 +85,13 @@
  #define curve25519_eh_to_x _nettle_curve25519_eh_to_x
  #define curve448_eh_to_x _nettle_curve448_eh_to_x
  
+inline int
+is_zero_limb (mp_limb_t x)
+{
+  x |= (x << 1);
+  return ((x >> 1) - 1) >> (GMP_LIMB_BITS - 1);
+}
+
  extern const struct ecc_curve _nettle_secp_192r1;
  extern const struct ecc_curve _nettle_secp_224r1;
  extern const struct ecc_curve _nettle_secp_256r1;
@@ -464,6 +471,7 @@ ecc_mul_m (const struct ecc_modulo *m,
            mp_limb_t *qx, const uint8_t *n, const mp_limb_t *px,
            mp_limb_t *scratch);
  
+/* The cnd argument must be 1 or 0. */
  void
  cnd_copy (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n);
  
diff --git a/ecc-j-to-a.c b/ecc-j-to-a.c

index ac134b5166d5018a76e675ecea0c4d0123452d64..3196cd6fa38289f69889fa10fc5a9a33ebbe1379 100644 (file)
--- a/ecc-j-to-a.c
+++ b/ecc-j-to-a.c
@@ -72,7 +72,7 @@ ecc_j_to_a (const struct ecc_curve *ecc,
              already be < 2*ecc->q, so one subtraction should
              suffice. */
           cy = mpn_sub_n (scratch, r, ecc->q.m, ecc->p.size);
-         cnd_copy (cy == 0, r, scratch, ecc->p.size);
+         cnd_copy (1 - cy, r, scratch, ecc->p.size);
         }
        return;
      }
diff --git a/ecc-mod-arith.c b/ecc-mod-arith.c

index d01378643c3b412697ccdc3661ba4148271b67ec..3b9bcb47cee360f5a534755021c854490d814604 100644 (file)
--- a/ecc-mod-arith.c
+++ b/ecc-mod-arith.c
@@ -55,7 +55,7 @@ ecc_mod_zero_p (const struct ecc_modulo *m, const mp_limb_t *xp_in)
        is_not_p |= (xp[i] ^ m->m[i]);
      }
  
-  return (is_non_zero == 0) | (is_not_p == 0);
+  return is_zero_limb (is_non_zero) | is_zero_limb (is_not_p);
  }
  
  int
@@ -65,7 +65,7 @@ ecc_mod_equal_p (const struct ecc_modulo *m, const mp_limb_t *a,
    mp_limb_t cy;
    cy = mpn_sub_n (scratch, a, ref, m->size);
    /* If cy > 0, i.e., a < ref, then they can't be equal mod m. */
-  return (cy == 0) & ecc_mod_zero_p (m, scratch);
+  return (1 - cy) & ecc_mod_zero_p (m, scratch);
  }
  
  void
diff --git a/ecc-mul-a-eh.c b/ecc-mul-a-eh.c

index 980fec3f715e7eb5abd09de6ae5b218c45e5e2b1..30130aeab5a15338c4a0b0af97020d580f60ad68 100644 (file)
--- a/ecc-mul-a-eh.c
+++ b/ecc-mul-a-eh.c
@@ -66,21 +66,18 @@ ecc_mul_a_eh (const struct ecc_curve *ecc,
    
    for (i = ecc->p.size; i-- > 0; )
      {
-      mp_limb_t w = np[i];
-      mp_limb_t bit;
+      mp_limb_t w = np[i] << (GMP_LIMB_BITS - GMP_NUMB_BITS);
+      unsigned j;
  
-      for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
-          bit > 0;
-          bit >>= 1)
+      for (j = 0; j < GMP_NUMB_BITS; j++, w <<= 1)
         {
-         int digit;
-
+         int bit;
           ecc->dup (ecc, r, r, scratch_out);
           ecc->add_hh (ecc, tp, r, pe, scratch_out);
  
-         digit = (w & bit) > 0;
+         bit = w >> (GMP_LIMB_BITS - 1);
           /* If we had a one-bit, use the sum. */
-         cnd_copy (digit, r, tp, 3*ecc->p.size);
+         cnd_copy (bit, r, tp, 3*ecc->p.size);
         }
      }
  }
diff --git a/ecc-mul-a.c b/ecc-mul-a.c

index 8e1355ebc933457cfa9b0da797477c9c2f8b1155..cc2a7960a1395abd8919d04717999efffb3d9af4 100644 (file)
--- a/ecc-mul-a.c
+++ b/ecc-mul-a.c
@@ -39,6 +39,7 @@
  
  #include "ecc.h"
  #include "ecc-internal.h"
+#include "nettle-internal.h"
  
  /* Binary algorithm needs 6*ecc->p.size + scratch for ecc_add_jja.
     Current total is 12 ecc->p.size, at most 864 bytes.
@@ -67,25 +68,23 @@ ecc_mul_a (const struct ecc_curve *ecc,
    
    for (i = ecc->p.size, is_zero = 1; i-- > 0; )
      {
-      mp_limb_t w = np[i];
-      mp_limb_t bit;
+      mp_limb_t w = np[i] << (GMP_LIMB_BITS - GMP_NUMB_BITS);
+      unsigned j;
  
-      for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1);
-          bit > 0;
-          bit >>= 1)
+      for (j = 0; j < GMP_NUMB_BITS; j++, w <<= 1)
         {
-         int digit;
+         int bit;
  
           ecc_dup_jj (ecc, r, r, scratch_out);
           ecc_add_jja (ecc, tp, r, pj, scratch_out);
  
-         digit = (w & bit) > 0;
+         bit = w >> (GMP_LIMB_BITS - 1);
           /* If is_zero is set, r is the zero point,
              and ecc_add_jja produced garbage. */
           cnd_copy (is_zero, tp, pj, 3*ecc->p.size);
-         is_zero &= ~digit;
+         is_zero &= 1 - bit;
           /* If we had a one-bit, use the sum. */
-         cnd_copy (digit, r, tp, 3*ecc->p.size);
+         cnd_copy (bit, r, tp, 3*ecc->p.size);
         }
      }
  }
@@ -145,10 +144,11 @@ ecc_mul_a (const struct ecc_curve *ecc,
    assert (bits < TABLE_SIZE);
  
    mpn_sec_tabselect (r, table, 3*ecc->p.size, TABLE_SIZE, bits);
-  is_zero = (bits == 0);
+  is_zero = IS_ZERO_SMALL (bits);
  
    for (;;)
      {
+      int bits_is_zero;
        unsigned j;
        if (shift >= ECC_MUL_A_WBITS)
         {
@@ -174,11 +174,12 @@ ecc_mul_a (const struct ecc_curve *ecc,
        mpn_sec_tabselect (tp, table, 3*ecc->p.size, TABLE_SIZE, bits);
        cnd_copy (is_zero, r, tp, 3*ecc->p.size);
        ecc_add_jjj (ecc, tp, tp, r, scratch_out);
+      bits_is_zero = IS_ZERO_SMALL (bits);
  
        /* Use the sum when valid. ecc_add_jja produced garbage if
-        is_zero != 0 or bits == 0, . */          
-      cnd_copy (bits & (is_zero - 1), r, tp, 3*ecc->p.size);
-      is_zero &= (bits == 0);
+        is_zero or bits_is_zero. */
+      cnd_copy (1 - (bits_is_zero | is_zero), r, tp, 3*ecc->p.size);
+      is_zero &= bits_is_zero;
      }
  #undef table
  #undef tp
diff --git a/ecc-mul-g.c b/ecc-mul-g.c

index 677a37e7a01979b516a5ce2007e5fae5e84d02d1..97bbabad6dd0a4393312d7d671ee108032337a14 100644 (file)
--- a/ecc-mul-g.c
+++ b/ecc-mul-g.c
@@ -39,6 +39,7 @@
  
  #include "ecc.h"
  #include "ecc-internal.h"
+#include "nettle-internal.h"
  
  void
  ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
@@ -71,7 +72,8 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
           /* Avoid the mp_bitcnt_t type for compatibility with older GMP
              versions. */
           unsigned bit_index;
-         
+         int bits_is_zero;
+
           /* Extract c bits from n, stride k, starting at i + kcj,
              ending at i + k (cj + c - 1)*/
           for (bits = 0, bit_index = i + k*(c*j+c); bit_index > i + k*c*j; )
@@ -96,10 +98,12 @@ ecc_mul_g (const struct ecc_curve *ecc, mp_limb_t *r,
           cnd_copy (is_zero, r + 2*ecc->p.size, ecc->unit, ecc->p.size);
           
           ecc_add_jja (ecc, tp, r, tp, scratch_out);
+         bits_is_zero = IS_ZERO_SMALL (bits);
+
           /* Use the sum when valid. ecc_add_jja produced garbage if
-            is_zero != 0 or bits == 0, . */      
-         cnd_copy (bits & (is_zero - 1), r, tp, 3*ecc->p.size);
-         is_zero &= (bits == 0);
+            is_zero or bits_is_zero. */
+         cnd_copy (1 - (bits_is_zero | is_zero), r, tp, 3*ecc->p.size);
+         is_zero &= bits_is_zero;
         }
      }
  #undef tp
diff --git a/eddsa-decompress.c b/eddsa-decompress.c

index 8517fb7bbd3490dce3272614efee42c823d775a2..0718c1a0c44998f545fc1bce1937c8b372202067 100644 (file)
--- a/eddsa-decompress.c
+++ b/eddsa-decompress.c
@@ -83,7 +83,7 @@ _eddsa_decompress (const struct ecc_curve *ecc, mp_limb_t *p,
  
    /* Check range. */
    if (nlimbs > ecc->p.size)
-    res = (scratch[nlimbs - 1] == 0);
+    res = is_zero_limb (scratch[nlimbs - 1]);
    else
      res = 1;
  
diff --git a/memeql-sec.c b/memeql-sec.c

index b19052eda4a13312662ff77439e6f5fbaf34cbc2..ba8c5dd6950a4933a9cba0cf97f98547fda99650 100644 (file)
--- a/memeql-sec.c
+++ b/memeql-sec.c
@@ -34,6 +34,7 @@
  #endif
  
  #include "memops.h"
+#include "nettle-internal.h"
  
  int
  memeql_sec (const void *a, const void *b, size_t n)
@@ -47,5 +48,5 @@ memeql_sec (const void *a, const void *b, size_t n)
    for (i = diff = 0; i < n; i++)
      diff |= (ap[i] ^ bp[i]);
  
-  return diff == 0;
+  return IS_ZERO_SMALL (diff);
  }
diff --git a/nettle-internal.h b/nettle-internal.h

index c41f3ee08516cc9ca081250acf512b9a8f90407b..2b7dc8164e00c1da8b37a161d9acc143fe4b8cac 100644 (file)
--- a/nettle-internal.h
+++ b/nettle-internal.h
@@ -84,6 +84,10 @@
  #define NETTLE_MAX_CIPHER_BLOCK_SIZE 32
  #define NETTLE_MAX_CIPHER_KEY_SIZE 32
  
+/* Equivalent to x == 0, but with an expression that should compile to
+   branch free code on all compilers. Requires that x is at most 31 bits. */
+#define IS_ZERO_SMALL(x) (((uint32_t) (x) - 1U) >> 31)
+
  /* Doesn't quite fit with the other algorithms, because of the weak
   * keys. Weak keys are not reported, the functions will simply crash
   * if you try to use a weak key. */
@@ -103,7 +107,7 @@ extern const struct nettle_aead nettle_chacha;
  extern const struct nettle_aead nettle_salsa20;
  extern const struct nettle_aead nettle_salsa20r12;
  
-/* All-in-one CBC encrypt fucntinos treated as AEAD with no
+/* All-in-one CBC encrypt functinos treated as AEAD with no
     authentication and no decrypt method. */
  extern const struct nettle_aead nettle_cbc_aes128;
  extern const struct nettle_aead nettle_cbc_aes192;
diff --git a/pkcs1-sec-decrypt.c b/pkcs1-sec-decrypt.c

index 942a2bd39633b92296ee192e8db19bac8ce76ff4..7cf56fe4b776bc6a6b870d999cdcdb4a91200ff5 100644 (file)
--- a/pkcs1-sec-decrypt.c
+++ b/pkcs1-sec-decrypt.c
@@ -44,14 +44,14 @@
  
  #include "gmp-glue.h"
  #include "pkcs1-internal.h"
+#include "nettle-internal.h"
  
  /* Inputs are always cast to uint32_t values. But all values used in this
   * function should never exceed the maximum value of a uint32_t anyway.
   * these macros returns 1 on success, 0 on failure */
  #define NOT_EQUAL(a, b) \
      ((0U - ((uint32_t)(a) ^ (uint32_t)(b))) >> 31)
-#define EQUAL(a, b) \
-    ((((uint32_t)(a) ^ (uint32_t)(b)) - 1U) >> 31)
+#define EQUAL(a, b) (IS_ZERO_SMALL ((a) ^ (b)))
  #define GREATER_OR_EQUAL(a, b) \
      (1U - (((uint32_t)(a) - (uint32_t)(b)) >> 31))
author	Niels Möller <nisse@lysator.liu.se>
	Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)
committer	Niels Möller <nisse@lysator.liu.se>
	Mon, 6 Nov 2023 18:15:22 +0000 (19:15 +0100)
ChangeLog		patch \| blob \| blame \| history
cnd-copy.c		patch \| blob \| blame \| history
ecc-gostdsa-sign.c		patch \| blob \| blame \| history
ecc-internal.h		patch \| blob \| blame \| history
ecc-j-to-a.c		patch \| blob \| blame \| history
ecc-mod-arith.c		patch \| blob \| blame \| history
ecc-mul-a-eh.c		patch \| blob \| blame \| history
ecc-mul-a.c		patch \| blob \| blame \| history
ecc-mul-g.c		patch \| blob \| blame \| history
eddsa-decompress.c		patch \| blob \| blame \| history
memeql-sec.c		patch \| blob \| blame \| history
nettle-internal.h		patch \| blob \| blame \| history
pkcs1-sec-decrypt.c		patch \| blob \| blame \| history