]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
x86: vdso: Wire up getrandom() vDSO implementation
authorJason A. Donenfeld <Jason@zx2c4.com>
Fri, 18 Nov 2022 16:38:23 +0000 (17:38 +0100)
committerJason A. Donenfeld <Jason@zx2c4.com>
Fri, 19 Jul 2024 18:22:12 +0000 (20:22 +0200)
Hook up the generic vDSO implementation to the x86 vDSO data page. Since
the existing vDSO infrastructure is heavily based on the timekeeping
functionality, which works over arrays of bases, a new macro is
introduced for vvars that are not arrays.

The vDSO function requires a ChaCha20 implementation that does not write
to the stack, yet can still do an entire ChaCha20 permutation, so
provide this using SSE2, since this is userland code that must work on
all x86-64 processors.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Samuel Neves <sneves@dei.uc.pt> # for vgetrandom-chacha.S
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
MAINTAINERS
arch/x86/Kconfig
arch/x86/entry/vdso/Makefile
arch/x86/entry/vdso/vdso.lds.S
arch/x86/entry/vdso/vgetrandom-chacha.S [new file with mode: 0644]
arch/x86/entry/vdso/vgetrandom.c [new file with mode: 0644]
arch/x86/include/asm/vdso/getrandom.h [new file with mode: 0644]
arch/x86/include/asm/vdso/vsyscall.h
arch/x86/include/asm/vvar.h

index 7edb30b4abf04df49d4ec68c2a401c52e73b3304..4dc7bfcaf118f4728123f8876378659b530a30ac 100644 (file)
@@ -18747,6 +18747,8 @@ F:      drivers/char/random.c
 F:     drivers/virt/vmgenid.c
 F:     include/vdso/getrandom.h
 F:     lib/vdso/getrandom.c
+F:     arch/x86/entry/vdso/vgetrandom*
+F:     arch/x86/include/asm/vdso/getrandom*
 
 RAPIDIO SUBSYSTEM
 M:     Matt Porter <mporter@kernel.crashing.org>
index 1d7122a1883e8265fb9be256bf8efa680d02191b..9c98b7a88cc2f961da554756466d42605c645614 100644 (file)
@@ -287,6 +287,7 @@ config X86
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_USER_RETURN_NOTIFIER
        select HAVE_GENERIC_VDSO
+       select VDSO_GETRANDOM                   if X86_64
        select HOTPLUG_PARALLEL                 if SMP && X86_64
        select HOTPLUG_SMT                      if SMP
        select HOTPLUG_SPLIT_STARTUP            if SMP && X86_32
index 215a1b202a9180785a4d8d8c1d8d8e469145e5e5..c9216ac4fb1eb8c1e5bc5e33b41e9e40c7924cbf 100644 (file)
@@ -7,7 +7,7 @@
 include $(srctree)/lib/vdso/Makefile
 
 # Files to link into the vDSO:
-vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o
 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o vdso32/vgetcpu.o
 vobjs-$(CONFIG_X86_SGX)        += vsgx.o
@@ -73,6 +73,7 @@ CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
 CFLAGS_REMOVE_vdso32/vgetcpu.o = -pg
 CFLAGS_REMOVE_vsgx.o = -pg
+CFLAGS_REMOVE_vgetrandom.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
index e8c60ae7a7c838313de4024f89b9a6db84d14c0c..0bab5f4af6d1a7c477b594cf0baf4dfaa390e835 100644 (file)
@@ -30,6 +30,8 @@ VERSION {
 #ifdef CONFIG_X86_SGX
                __vdso_sgx_enter_enclave;
 #endif
+               getrandom;
+               __vdso_getrandom;
        local: *;
        };
 }
diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/vgetrandom-chacha.S
new file mode 100644 (file)
index 0000000..bcba563
--- /dev/null
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+.section       .rodata, "a"
+.align 16
+CONSTANTS:     .octa 0x6b20657479622d323320646e61707865
+.text
+
+/*
+ * Very basic SSE2 implementation of ChaCha20. Produces a given positive number
+ * of blocks of output with a nonce of 0, taking an input key and 8-byte
+ * counter. Importantly does not spill to the stack. Its arguments are:
+ *
+ *     rdi: output bytes
+ *     rsi: 32-byte key input
+ *     rdx: 8-byte counter input/output
+ *     rcx: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+.set   output,         %rdi
+.set   key,            %rsi
+.set   counter,        %rdx
+.set   nblocks,        %rcx
+.set   i,              %al
+/* xmm registers are *not* callee-save. */
+.set   temp,           %xmm0
+.set   state0,         %xmm1
+.set   state1,         %xmm2
+.set   state2,         %xmm3
+.set   state3,         %xmm4
+.set   copy0,          %xmm5
+.set   copy1,          %xmm6
+.set   copy2,          %xmm7
+.set   copy3,          %xmm8
+.set   one,            %xmm9
+
+       /* copy0 = "expand 32-byte k" */
+       movaps          CONSTANTS(%rip),copy0
+       /* copy1,copy2 = key */
+       movups          0x00(key),copy1
+       movups          0x10(key),copy2
+       /* copy3 = counter || zero nonce */
+       movq            0x00(counter),copy3
+       /* one = 1 || 0 */
+       movq            $1,%rax
+       movq            %rax,one
+
+.Lblock:
+       /* state0,state1,state2,state3 = copy0,copy1,copy2,copy3 */
+       movdqa          copy0,state0
+       movdqa          copy1,state1
+       movdqa          copy2,state2
+       movdqa          copy3,state3
+
+       movb            $10,i
+.Lpermute:
+       /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+       paddd           state1,state0
+       pxor            state0,state3
+       movdqa          state3,temp
+       pslld           $16,temp
+       psrld           $16,state3
+       por             temp,state3
+
+       /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+       paddd           state3,state2
+       pxor            state2,state1
+       movdqa          state1,temp
+       pslld           $12,temp
+       psrld           $20,state1
+       por             temp,state1
+
+       /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+       paddd           state1,state0
+       pxor            state0,state3
+       movdqa          state3,temp
+       pslld           $8,temp
+       psrld           $24,state3
+       por             temp,state3
+
+       /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+       paddd           state3,state2
+       pxor            state2,state1
+       movdqa          state1,temp
+       pslld           $7,temp
+       psrld           $25,state1
+       por             temp,state1
+
+       /* state1[0,1,2,3] = state1[1,2,3,0] */
+       pshufd          $0x39,state1,state1
+       /* state2[0,1,2,3] = state2[2,3,0,1] */
+       pshufd          $0x4e,state2,state2
+       /* state3[0,1,2,3] = state3[3,0,1,2] */
+       pshufd          $0x93,state3,state3
+
+       /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+       paddd           state1,state0
+       pxor            state0,state3
+       movdqa          state3,temp
+       pslld           $16,temp
+       psrld           $16,state3
+       por             temp,state3
+
+       /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+       paddd           state3,state2
+       pxor            state2,state1
+       movdqa          state1,temp
+       pslld           $12,temp
+       psrld           $20,state1
+       por             temp,state1
+
+       /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+       paddd           state1,state0
+       pxor            state0,state3
+       movdqa          state3,temp
+       pslld           $8,temp
+       psrld           $24,state3
+       por             temp,state3
+
+       /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+       paddd           state3,state2
+       pxor            state2,state1
+       movdqa          state1,temp
+       pslld           $7,temp
+       psrld           $25,state1
+       por             temp,state1
+
+       /* state1[0,1,2,3] = state1[3,0,1,2] */
+       pshufd          $0x93,state1,state1
+       /* state2[0,1,2,3] = state2[2,3,0,1] */
+       pshufd          $0x4e,state2,state2
+       /* state3[0,1,2,3] = state3[1,2,3,0] */
+       pshufd          $0x39,state3,state3
+
+       decb            i
+       jnz             .Lpermute
+
+       /* output0 = state0 + copy0 */
+       paddd           copy0,state0
+       movups          state0,0x00(output)
+       /* output1 = state1 + copy1 */
+       paddd           copy1,state1
+       movups          state1,0x10(output)
+       /* output2 = state2 + copy2 */
+       paddd           copy2,state2
+       movups          state2,0x20(output)
+       /* output3 = state3 + copy3 */
+       paddd           copy3,state3
+       movups          state3,0x30(output)
+
+       /* ++copy3.counter */
+       paddq           one,copy3
+
+       /* output += 64, --nblocks */
+       addq            $64,output
+       decq            nblocks
+       jnz             .Lblock
+
+       /* counter = copy3.counter */
+       movq            copy3,0x00(counter)
+
+       /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+       pxor            state0,state0
+       pxor            state1,state1
+       pxor            state2,state2
+       pxor            state3,state3
+       pxor            copy1,copy1
+       pxor            copy2,copy2
+       pxor            temp,temp
+
+       ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vgetrandom.c
new file mode 100644 (file)
index 0000000..52d3c7f
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
+ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+       return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
+
+ssize_t getrandom(void *, size_t, unsigned int, void *, size_t)
+       __attribute__((weak, alias("__vdso_getrandom")));
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644 (file)
index 0000000..b96e674
--- /dev/null
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <asm/vvar.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer:    Destination buffer to fill with random bytes.
+ * @len:       Size of @buffer in bytes.
+ * @flags:     Zero or more GRND_* flags.
+ * Returns:    The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+       long ret;
+
+       asm ("syscall" : "=a" (ret) :
+            "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+            "rcx", "r11", "memory");
+
+       return ret;
+}
+
+#define __vdso_rng_data (VVAR(_vdso_rng_data))
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
+{
+       if (IS_ENABLED(CONFIG_TIME_NS) && __vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
+               return (void *)&__vdso_rng_data + ((void *)&__timens_vdso_data - (void *)&__vdso_data);
+       return &__vdso_rng_data;
+}
+
+/**
+ * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
+ * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output.
+ * @key:       32-byte input key.
+ * @counter:   8-byte counter, read on input and updated on return.
+ * @nblocks:   Number of blocks to generate.
+ *
+ * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
+ * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
+ * leaking into forked child processes.
+ */
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
index be199a9b267627311bf4d3efbc7f1915f1a3f47d..71c56586a22f982ca628ac6eb496bb6698787c1f 100644 (file)
@@ -11,6 +11,8 @@
 #include <asm/vvar.h>
 
 DEFINE_VVAR(struct vdso_data, _vdso_data);
+DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data);
+
 /*
  * Update the vDSO data page to keep in sync with kernel timekeeping.
  */
index 183e98e49ab943aeaf9e7ecc183c2f383545887d..9d9af37f7cab9e3e6159c0b758e500ddcfc6b3e8 100644 (file)
@@ -26,6 +26,8 @@
  */
 #define DECLARE_VVAR(offset, type, name) \
        EMIT_VVAR(name, offset)
+#define DECLARE_VVAR_SINGLE(offset, type, name) \
+       EMIT_VVAR(name, offset)
 
 #else
 
@@ -37,6 +39,10 @@ extern char __vvar_page;
        extern type timens_ ## name[CS_BASES]                           \
        __attribute__((visibility("hidden")));                          \
 
+#define DECLARE_VVAR_SINGLE(offset, type, name)                                \
+       extern type vvar_ ## name                                       \
+       __attribute__((visibility("hidden")));                          \
+
 #define VVAR(name) (vvar_ ## name)
 #define TIMENS(name) (timens_ ## name)
 
@@ -44,12 +50,22 @@ extern char __vvar_page;
        type name[CS_BASES]                                             \
        __attribute__((section(".vvar_" #name), aligned(16))) __visible
 
+#define DEFINE_VVAR_SINGLE(type, name)                                 \
+       type name                                                       \
+       __attribute__((section(".vvar_" #name), aligned(16))) __visible
+
 #endif
 
 /* DECLARE_VVAR(offset, type, name) */
 
 DECLARE_VVAR(128, struct vdso_data, _vdso_data)
 
+#if !defined(_SINGLE_DATA)
+#define _SINGLE_DATA
+DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data)
+#endif
+
 #undef DECLARE_VVAR
+#undef DECLARE_VVAR_SINGLE
 
 #endif