]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
CTR mode optimizations for 16-byte block size. ctr-opt
authorNiels Möller <nisse@lysator.liu.se>
Mon, 8 Jan 2018 21:28:52 +0000 (22:28 +0100)
committerNiels Möller <nisse@lysator.liu.se>
Tue, 9 Jan 2018 06:31:54 +0000 (07:31 +0100)
ChangeLog
ctr.c

index 5975cab875c606766110f22a23bce98d581085aa..f31a301760bbb000798c090c16dfd152583e8eeb 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,13 @@
 
 2018-01-08  Niels Möller  <nisse@lysator.liu.se>
 
+       * ctr.c (ctr_crypt16): New function, with optimizations specific
+       to 16-byte block size.
+       (ctr_fill16): New helper function, definition depending on
+       WORDS_BIGENDIAN, and little endian version requiring
+       HAVE_BUILTIN_BSWAP64.
+       (ctr_crypt): Use ctr_crypt16, when appropriate.
+
        * nettle-types.h (union nettle_block16): Add uint64_t field.
 
        * configure.ac: Check for __builtin_bswap64, define
diff --git a/ctr.c b/ctr.c
index f41644662b42707ecd6d6d4f181f9c2b7ffcacc6..8295e1af87e306bb674b5c1ac7aaf1951ce69efd 100644 (file)
--- a/ctr.c
+++ b/ctr.c
@@ -62,36 +62,128 @@ ctr_fill (size_t block_size, uint8_t *ctr, size_t length, uint8_t *buffer)
   return i;
 }
 
+#if WORDS_BIGENDIAN
+# define USE_CTR_CRYPT16 1
+static void
+ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
+{
+  uint64_t hi, lo;
+  hi = READ_UINT64(ctr);
+  lo = READ_UINT64(ctr + 8);
+
+  while (blocks-- > 0)
+    {
+      *buffer++ = hi;
+      *buffer++ = lo;
+      hi += !(++lo);
+    }
+  WRITE_UINT64(ctr, hi);
+  WRITE_UINT64(ctr + 8, lo);
+}
+#else /* !WORDS_BIGENDIAN */
+# if HAVE_BUILTIN_BSWAP64
+#  define USE_CTR_CRYPT16 1
+static void
+ctr_fill16(uint8_t *ctr, size_t blocks, uint64_t *buffer)
+{
+  uint64_t hi, lo;
+  /* Read hi in native endianness */
+  hi = LE_READ_UINT64(ctr);
+  lo = READ_UINT64(ctr + 8);
+
+  while (blocks-- > 0)
+    {
+      *buffer++ = hi;
+      *buffer++ = __builtin_bswap64(lo);
+      if (!++lo)
+       hi = __builtin_bswap64(__builtin_bswap64(hi) + 1);
+    }
+  LE_WRITE_UINT64(ctr, hi);
+  WRITE_UINT64(ctr + 8, lo);
+}
+# else /* ! HAVE_BUILTIN_BSWAP64 */
+#  define USE_CTR_CRYPT16 0
+# endif
+#endif /* !WORDS_BIGENDIAN */
+
+#if USE_CTR_CRYPT16
+static size_t
+ctr_crypt16(const void *ctx, nettle_cipher_func *f,
+           uint8_t *ctr,
+           size_t length, uint8_t *dst,
+           const uint8_t *src)
+{
+  if (dst != src && !((uintptr_t) dst % sizeof(uint64_t)))
+    {
+      size_t blocks = length / 16u;
+      ctr_fill16 (ctr, blocks, (uint64_t *) dst);
+      f(ctx, blocks * 16, dst, dst);
+      memxor (dst, src, blocks * 16);
+      return blocks * 16;
+    }
+  else
+    {
+      /* Construct an aligned buffer of consecutive counter values, of
+        size at most CTR_BUFFER_LIMIT. */
+      TMP_DECL(buffer, union nettle_block16, CTR_BUFFER_LIMIT / 16);
+      size_t blocks = (length + 15) / 16u;
+      size_t i;
+      TMP_ALLOC(buffer, MIN(blocks, CTR_BUFFER_LIMIT / 16));
+
+      for (i = 0; blocks >= CTR_BUFFER_LIMIT / 16;
+          i += CTR_BUFFER_LIMIT, blocks -= CTR_BUFFER_LIMIT / 16)
+       {
+         ctr_fill16 (ctr, CTR_BUFFER_LIMIT / 16, buffer->u64);
+         f(ctx, CTR_BUFFER_LIMIT, buffer->b, buffer->b);
+         if (length - i < CTR_BUFFER_LIMIT)
+           goto done;
+         memxor3 (dst, src, buffer->b, CTR_BUFFER_LIMIT);
+       }
+
+      if (blocks > 0)
+       {
+         assert (length - i < CTR_BUFFER_LIMIT);
+         ctr_fill16 (ctr, blocks, buffer->u64);
+         f(ctx, blocks * 16, buffer->b, buffer->b);
+       done:
+         memxor3 (dst + i, src + i, buffer->b, length - i);
+      }
+      return length;
+    }
+}
+#endif /* USE_CTR_CRYPT16 */
+
 void
 ctr_crypt(const void *ctx, nettle_cipher_func *f,
          size_t block_size, uint8_t *ctr,
          size_t length, uint8_t *dst,
          const uint8_t *src)
 {
-  if (src != dst)
+#if USE_CTR_CRYPT16
+  if (block_size == 16)
     {
-      if (length == block_size)
-       {
-         f(ctx, block_size, dst, ctr);
-         INCREMENT(block_size, ctr);
-         memxor(dst, src, block_size);
-       }
-      else
-       {
-         size_t filled = ctr_fill (block_size, ctr, length, dst);
+      size_t done = ctr_crypt16(ctx, f, ctr, length, dst, src);
+      length -= done;
+      src += done;
+      dst += done;
+    }
+#endif
 
-         f(ctx, filled, dst, dst);
-         memxor(dst, src, filled);
+  if(src != dst)
+    {
+      size_t filled = ctr_fill (block_size, ctr, length, dst);
+
+      f(ctx, filled, dst, dst);
+      memxor(dst, src, filled);
 
-         if (filled < length)
-           {
-             TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
-             TMP_ALLOC(buffer, block_size);
+      if (filled < length)
+       {
+         TMP_DECL(block, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
+         TMP_ALLOC(block, block_size);
 
-             f(ctx, block_size, buffer, ctr);
-             INCREMENT(block_size, ctr);
-             memxor3(dst + filled, src + filled, buffer, length - filled);
-           }
+         f(ctx, block_size, block, ctr);
+         INCREMENT(block_size, ctr);
+         memxor3(dst + filled, src + filled, block, length - filled);
        }
     }
   else