+2025-06-22 Niels Möller <nisse@lysator.liu.se>
+
+ * nettle-types.h (_NETTLE_ALIGN16): New macro, to require 16-byte
+ alignment. Enabled only on platforms where uint64_t needs 8-byte
+ alignment. Intended to improve performance for SIMD load and
+ store, which on some platforms may be faster with proper
+ alignment. Depends on stdalign.h, defined in C11.
+ (union nettle_block16): Use _NETTLE_ALIGN16.
+
+ * ctr16.c (_nettle_ctr_crypt16): Update alignedness check to use
+ alignof(union nettle_block16).
+
+ * aes.h (struct aes128_ctx): Use _NETTLE_ALIGN16 for the array of subkeys.
+ (struct aes192_ctx): Likewise.
+ (struct aes256_ctx): Likewise.
+
+ * umac.h (_UMAC_STATE): Use _NETTLE_ALIGN16 for the l1_key array.
+
2025-05-13 Niels Möller <nisse@lysator.liu.se>
Reduce size of sha3 context size to 216 bytes.
struct aes128_ctx
{
- uint32_t keys[4 * (_AES128_ROUNDS + 1)];
+ _NETTLE_ALIGN16 uint32_t keys[4 * (_AES128_ROUNDS + 1)];
};
void
struct aes192_ctx
{
- uint32_t keys[4 * (_AES192_ROUNDS + 1)];
+ _NETTLE_ALIGN16 uint32_t keys[4 * (_AES192_ROUNDS + 1)];
};
void
struct aes256_ctx
{
- uint32_t keys[4 * (_AES256_ROUNDS + 1)];
+ _NETTLE_ALIGN16 uint32_t keys[4 * (_AES256_ROUNDS + 1)];
};
void
size_t length, uint8_t *dst,
const uint8_t *src)
{
- if (dst != src && !((uintptr_t) dst % sizeof(uint64_t)))
+ if (dst != src && !((uintptr_t) dst % alignof(union nettle_block16)))
{
size_t blocks = length / 16u;
size_t done;
/* For size_t */
#include <stddef.h>
+#include <stdalign.h>
#include <stdint.h>
/* Attributes we want to use in installed header files, and hence
extern "C" {
#endif
+/* On 64-bit platforms where uint64_t requires 8 byte alignment, use
+ twice the alignment. To work for both C and C++, needs to be placed
+ before the type, see example for nettle_block16 below. */
+#define _NETTLE_ALIGN16 alignas(alignof(uint64_t) == 8 ? 16 : 0)
+
/* An aligned 16-byte block. */
union nettle_block16
{
uint8_t b[16];
- uint64_t u64[2];
+ _NETTLE_ALIGN16 uint64_t u64[2];
};
union nettle_block8
/* Subkeys and state for UMAC with tag size 32*n bits. */
#define _UMAC_STATE(n) \
- uint32_t l1_key[UMAC_BLOCK_SIZE/4 + 4*((n)-1)]; \
+ _NETTLE_ALIGN16 uint32_t l1_key[UMAC_BLOCK_SIZE/4 + 4*((n)-1)]; \
/* Keys in 32-bit pieces, high first */ \
uint32_t l2_key[6*(n)]; \
uint64_t l3_key1[8*(n)]; \