From 3e89e082e8f716fec82f21ca888b36b70177712f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 4 Mar 2014 15:04:40 -0800 Subject: [PATCH] rdrand: Fix the RDRAND data reduction The RDRAND data reduction function was not correct. Specifically: 1. When using AESni, in order to guarantee at least one reseed event per reduction stripe we need to process the data in a different order. This means writing it out all the data to a buffer before processing it, and then process it in much larger stripes. 2. When using gcrypt, we are only performing one reduction at a time, so only process enough input for one reduction and only generate that amount of output. Signed-off-by: H. Peter Anvin --- rdrand_asm.S | 60 +++++++++++++++++++++++++++---------------------- rngd_rdrand.c | 62 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 71 insertions(+), 51 deletions(-) diff --git a/rdrand_asm.S b/rdrand_asm.S index 913b923..c92996c 100644 --- a/rdrand_asm.S +++ b/rdrand_asm.S @@ -58,6 +58,7 @@ ENDPROC(x86_rdrand_bytes) #define PTR0 %rdi #define PTR1 %rsi #define PTR2 %rcx +#define CTR3 %eax #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ #elif defined(__i386__) @@ -97,6 +98,7 @@ ENDPROC(x86_rdrand_bytes) #define PTR0 %eax #define PTR1 %edx #define PTR2 %ecx +#define CTR3 %esi #define NPTR2 1 /* %rcx = %r1 */ #endif @@ -107,27 +109,37 @@ ENTRY(x86_aes_mangle) mov %esp, %ebp movl 8(%ebp), %eax movl 12(%ebp), %edx + push %esi #endif + movl $512, CTR3 /* Number of rounds */ + + movdqa (0*16)(PTR1), %xmm0 + movdqa (1*16)(PTR1), %xmm1 + movdqa (2*16)(PTR1), %xmm2 + movdqa (3*16)(PTR1), %xmm3 + movdqa (4*16)(PTR1), %xmm4 + movdqa (5*16)(PTR1), %xmm5 + movdqa (6*16)(PTR1), %xmm6 + movdqa (7*16)(PTR1), %xmm7 +#ifdef __x86_64__ SETPTR(aes_round_keys, PTR2) +1: +#else +1: + SETPTR(aes_round_keys, PTR2) +#endif - movdqa (0*16)(PTR0), %xmm0 - movdqa (1*16)(PTR0), %xmm1 - movdqa (2*16)(PTR0), %xmm2 - movdqa (3*16)(PTR0), %xmm3 - movdqa (4*16)(PTR0), %xmm4 - movdqa (5*16)(PTR0), %xmm5 - movdqa (6*16)(PTR0), %xmm6 - movdqa (7*16)(PTR0), %xmm7 - - pxor (0*16)(PTR1), %xmm0 - pxor (1*16)(PTR1), %xmm1 - pxor (2*16)(PTR1), %xmm2 - pxor (3*16)(PTR1), %xmm3 - pxor (4*16)(PTR1), %xmm4 - pxor (5*16)(PTR1), %xmm5 - pxor (6*16)(PTR1), %xmm6 - pxor (7*16)(PTR1), %xmm7 + /* 8192 = 512 (rounds) * 16 (bytes) */ + pxor (0*8192)(PTR0), %xmm0 + pxor (1*8192)(PTR0), %xmm1 + pxor (2*8192)(PTR0), %xmm2 + pxor (3*8192)(PTR0), %xmm3 + pxor (4*8192)(PTR0), %xmm4 + pxor (5*8192)(PTR0), %xmm5 + pxor (6*8192)(PTR0), %xmm6 + pxor (7*8192)(PTR0), %xmm7 + add $16, PTR0 offset = 0 .rept 10 @@ -175,16 +187,9 @@ offset = offset + 16 .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */ .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */ #endif - - movdqa %xmm0, (0*16)(PTR0) - movdqa %xmm1, (1*16)(PTR0) - movdqa %xmm2, (2*16)(PTR0) - movdqa %xmm3, (3*16)(PTR0) - movdqa %xmm4, (4*16)(PTR0) - movdqa %xmm5, (5*16)(PTR0) - movdqa %xmm6, (6*16)(PTR0) - movdqa %xmm7, (7*16)(PTR0) - + sub $1, CTR3 + jnz 1b + movdqa %xmm0, (0*16)(PTR1) movdqa %xmm1, (1*16)(PTR1) movdqa %xmm2, (2*16)(PTR1) @@ -195,6 +200,7 @@ offset = offset + 16 movdqa %xmm7, (7*16)(PTR1) #ifdef __i386__ + pop %esi pop %ebp #endif ret diff --git a/rngd_rdrand.c b/rngd_rdrand.c index cb67369..3c28282 100644 --- a/rngd_rdrand.c +++ b/rngd_rdrand.c @@ -124,7 +124,9 @@ static void cpuid(unsigned int leaf, unsigned int subleaf, struct cpuid *out) } /* Read data from the drng in chunks of 128 bytes for AES scrambling */ -#define CHUNK_SIZE (16*8) +#define AES_BLOCK 16 +#define CHUNK_SIZE (AES_BLOCK*8) /* 8 parallel streams */ +#define RDRAND_ROUNDS 512 /* 512:1 data reduction */ static unsigned char iv_buf[CHUNK_SIZE] __attribute__((aligned(128))); static int have_aesni; @@ -146,7 +148,8 @@ static inline int gcrypt_mangle(unsigned char *tmp) /* Encrypt tmp in-place. */ - gcry_error = gcry_cipher_encrypt(gcry_cipher_hd, tmp, CHUNK_SIZE, + gcry_error = gcry_cipher_encrypt(gcry_cipher_hd, tmp, + AES_BLOCK * RDRAND_ROUNDS, NULL, 0); if (gcry_error) { @@ -166,30 +169,41 @@ int xread_drng(void *buf, size_t size, struct rng *ent_src) { char *p = buf; size_t chunk; - const int rdrand_round_count = 512; - unsigned char tmp[CHUNK_SIZE] __attribute__((aligned(128))); + void *data; + unsigned char rdrand_buf[CHUNK_SIZE * RDRAND_ROUNDS] + __attribute__((aligned(128))); + unsigned int rand_bytes; int i; (void)ent_src; + rand_bytes = have_aesni + ? CHUNK_SIZE * RDRAND_ROUNDS + : AES_BLOCK * RDRAND_ROUNDS; + while (size) { - for (i = 0; i < rdrand_round_count; i++) { - if (x86_rdrand_bytes(tmp, CHUNK_SIZE) != CHUNK_SIZE) { - message(LOG_DAEMON|LOG_ERR, "read error\n"); - return -1; - } - - /* - * Use 128-bit AES in CBC mode to reduce the - * data by a factor of rdrand_round_count - */ - if (have_aesni) - x86_aes_mangle(tmp, iv_buf); - else if (gcrypt_mangle(tmp)) - return -1; + if (x86_rdrand_bytes(rdrand_buf, rand_bytes) != rand_bytes) { + message(LOG_DAEMON|LOG_ERR, "read error\n"); + return -1; + } + + /* + * Use 128-bit AES in CBC mode to reduce the + * data by a factor of RDRAND_ROUNDS + */ + if (have_aesni) { + x86_aes_mangle(rdrand_buf, iv_buf); + data = iv_buf; + chunk = CHUNK_SIZE; + } else if (!gcrypt_mangle(rdrand_buf)) { + data = rdrand_buf + AES_BLOCK * (RDRAND_ROUNDS - 1); + chunk = AES_BLOCK; + } else { + return -1; } - chunk = (sizeof(tmp) > size) ? size : sizeof(tmp); - memcpy(p, tmp, chunk); + + chunk = (chunk > size) ? size : chunk; + memcpy(p, data, chunk); p += chunk; size -= chunk; } @@ -222,14 +236,14 @@ static int init_gcrypt(const void *key) GCRY_CIPHER_MODE_CBC, 0); if (!gcry_error) - gcry_error = gcry_cipher_setkey(gcry_cipher_hd, key, 16); + gcry_error = gcry_cipher_setkey(gcry_cipher_hd, key, AES_BLOCK); if (!gcry_error) { /* * Only need the first 16 bytes of iv_buf. AES-NI can * encrypt multiple blocks in parallel but we can't. */ - gcry_error = gcry_cipher_setiv(gcry_cipher_hd, iv_buf, 16); + gcry_error = gcry_cipher_setiv(gcry_cipher_hd, iv_buf, AES_BLOCK); } if (gcry_error) { @@ -255,11 +269,11 @@ int init_drng_entropy_source(struct rng *ent_src) /* We need RDRAND, but AESni is optional */ const uint32_t features_ecx1_rdrand = 1 << 30; const uint32_t features_ecx1_aesni = 1 << 25; - static unsigned char key[16] = { + static unsigned char key[AES_BLOCK] = { 0x00,0x10,0x20,0x30,0x40,0x50,0x60,0x70, 0x80,0x90,0xa0,0xb0,0xc0,0xd0,0xe0,0xf0 }; /* AES data reduction key */ - unsigned char xkey[16]; /* Material to XOR into the key */ + unsigned char xkey[AES_BLOCK]; /* Material to XOR into the key */ int fd; int i; -- 2.47.2