]> git.ipfire.org Git - thirdparty/nettle.git/commitdiff
Increase buffer size for in-place CTR.
authorNiels Möller <nisse@lysator.liu.se>
Mon, 8 Jan 2018 07:45:17 +0000 (08:45 +0100)
committerNiels Möller <nisse@lysator.liu.se>
Tue, 9 Jan 2018 06:31:54 +0000 (07:31 +0100)
ChangeLog
ctr.c

index c927848cde5989a7d9f79d7aa4ccee500e7e8fc7..aa9608d76911424cf3dc0da9b2a027520a33dc04 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,10 @@
 
 2018-01-08  Niels Möller  <nisse@lysator.liu.se>
 
+       * ctr.c (ctr_crypt): For in-place operation, increase max buffer
+       size from 4 blocks to 512 bytes, similarly to CBC and CFB.
+       Improves in-place aes128 CTR performance by 25% on x86_64.
+
        * examples/nettle-benchmark.c (time_cipher): Benchmark in-place
        operation separately, for cbc_decrypt and ctr_crypt.
 
diff --git a/ctr.c b/ctr.c
index f81f74adf0c49dcdf7670a8f7f063a3df2413425..42048833e7507925819d5ff4850fd71f503d5011 100644 (file)
--- a/ctr.c
+++ b/ctr.c
@@ -45,7 +45,8 @@
 #include "memxor.h"
 #include "nettle-internal.h"
 
-#define NBLOCKS 4
+/* Don't allocate any more space than this on the stack */
+#define CTR_BUFFER_LIMIT 512
 
 void
 ctr_crypt(const void *ctx, nettle_cipher_func *f,
@@ -90,47 +91,43 @@ ctr_crypt(const void *ctx, nettle_cipher_func *f,
     }
   else
     {
-      if (length > block_size)
-       {
-         TMP_DECL(buffer, uint8_t, NBLOCKS * NETTLE_MAX_CIPHER_BLOCK_SIZE);
-         size_t chunk = NBLOCKS * block_size;
-
-         TMP_ALLOC(buffer, chunk);
+      /* For in-place CTR, construct a buffer of consecutive counter
+        values, of size at most CTR_BUFFER_LIMIT. */
+      TMP_DECL(buffer, uint8_t, CTR_BUFFER_LIMIT);
+
+      size_t buffer_size;
+      if (length < block_size)
+       buffer_size = block_size;
+      else if (length <= CTR_BUFFER_LIMIT)
+       buffer_size = length;
+      else
+       buffer_size = CTR_BUFFER_LIMIT;
 
-         for (; length >= chunk;
-              length -= chunk, src += chunk, dst += chunk)
-           {
-             unsigned n;
-             uint8_t *p;         
-             for (n = 0, p = buffer; n < NBLOCKS; n++, p += block_size)
-               {
-                 memcpy (p, ctr, block_size);
-                 INCREMENT(block_size, ctr);
-               }
-             f(ctx, chunk, buffer, buffer);
-             memxor(dst, buffer, chunk);
-           }
+      TMP_ALLOC(buffer, buffer_size);
 
-         if (length > 0)
+      while (length >= block_size)
+       {
+         size_t i;
+         for (i = 0;
+              i + block_size <= buffer_size && i + block_size <= length;
+              i += block_size)
            {
-             /* Final, possibly partial, blocks */
-             for (chunk = 0; chunk < length; chunk += block_size)
-               {
-                 memcpy (buffer + chunk, ctr, block_size);
-                 INCREMENT(block_size, ctr);
-               }
-             f(ctx, chunk, buffer, buffer);
-             memxor3(dst, src, buffer, length);
+             memcpy (buffer + i, ctr, block_size);
+             INCREMENT(block_size, ctr);
            }
+         assert (i > 0);
+         f(ctx, i, buffer, buffer);
+         memxor(dst, buffer, i);
+         length -= i;
+         dst += i;
        }
-      else if (length > 0)
-       {
-         TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE);
-         TMP_ALLOC(buffer, block_size);
 
+      /* Final, possibly partial, block. */
+      if (length > 0)
+       {
          f(ctx, block_size, buffer, ctr);
          INCREMENT(block_size, ctr);
-         memxor3(dst, src, buffer, length);
+         memxor(dst, buffer, length);
        }
     }
 }