From: Ben Hutchings Date: Fri, 9 Mar 2018 00:11:14 +0000 (+0000) Subject: x86/syscall: Sanitize syscall table de-references under speculation X-Git-Tag: v3.2.101~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=429edb965dd15507f143cdd08637b0dbfa282f0e;p=thirdparty%2Fkernel%2Fstable.git x86/syscall: Sanitize syscall table de-references under speculation commit 2fbd7af5af8665d18bcefae3e9700be07e22b681 upstream. The upstream version of this, touching C code, was written by Dan Williams, with the following description: > The syscall table base is a user controlled function pointer in kernel > space. Use array_index_nospec() to prevent any out of bounds speculation. > > While retpoline prevents speculating into a userspace directed target it > does not stop the pointer de-reference, the concern is leaking memory > relative to the syscall table base, by observing instruction cache > behavior. The x86_64 assembly version for 4.4 was written by Jiri Slaby, with the following description: > In 4.4.118, we have commit c8961332d6da (x86/syscall: Sanitize syscall > table de-references under speculation), which is a backport of upstream > commit 2fbd7af5af86. But it fixed only the C part of the upstream patch > -- the IA32 sysentry. So it ommitted completely the assembly part -- the > 64bit sysentry. > > Fix that in this patch by explicit array_index_mask_nospec written in > assembly. The same was used in lib/getuser.S. > > However, to have "sbb" working properly, we have to switch from "cmp" > against (NR_syscalls-1) to (NR_syscalls), otherwise the last syscall > number would be "and"ed by 0. It is because the original "ja" relies on > "CF" or "ZF", but we rely only on "CF" in "sbb". That means: switch to > "jae" conditional jump too. > > Final note: use rcx for mask as this is exactly what is overwritten by > the 4th syscall argument (r10) right after. In 3.2 the x86_32 syscall table lookup is also written in assembly. So I've taken Jiri's version and added similar masking in entry_32.S, using edx as the temporary. edx is clobbered by SAVE_REGS and seems to be free at this point. The ia32 compat syscall table lookup on x86_64 is also written in assembly, so I've added the same masking in ia32entry.S, using r8 as the temporary since it is always clobbered by the following instructions. The x86_64 entry code also lacks syscall masking for x32. Cc: Dan Williams Cc: Jiri Slaby Cc: Jan Beulich Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: kernel-hardening@lists.openwall.com Cc: gregkh@linuxfoundation.org Cc: Andy Lutomirski Cc: alan@linux.intel.com Cc: Jinpu Wang Signed-off-by: Ben Hutchings --- diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ad5ea3e24e3f8..750c6f7e961d5 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -161,9 +161,11 @@ ENTRY(ia32_sysenter_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + cmpq $(IA32_NR_syscalls),%rax + jae ia32_badsys sysenter_do_call: + sbb %r8,%r8 /* array_index_mask_nospec() */ + and %r8,%rax IA32_ARG_FIXUP sysenter_dispatch: #ifdef CONFIG_RETPOLINE @@ -207,8 +209,10 @@ sysexit_from_sys_call: movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + cmpq $(IA32_NR_syscalls),%rax + jae ia32_badsys + sbb %r8,%r8 /* array_index_mask_nospec() */ + and %r8,%rax movl %ebx,%edi /* reload 1st syscall arg */ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ @@ -260,8 +264,8 @@ sysenter_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + cmpq $(IA32_NR_syscalls),%rax + jae int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -327,9 +331,11 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpq $IA32_NR_syscalls-1,%rax - ja ia32_badsys + cmpq $IA32_NR_syscalls,%rax + jae ia32_badsys cstar_do_call: + sbb %r8,%r8 /* array_index_mask_nospec() */ + and %r8,%rax IA32_ARG_FIXUP 1 cstar_dispatch: #ifdef CONFIG_RETPOLINE @@ -386,8 +392,8 @@ cstar_tracesys: LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ RESTORE_REST xchgl %ebp,%r9d - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + cmpq $(IA32_NR_syscalls),%rax + jae int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -444,9 +450,11 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys - cmpq $(IA32_NR_syscalls-1),%rax - ja ia32_badsys + cmpq $(IA32_NR_syscalls),%rax + jae ia32_badsys ia32_do_call: + sbb %r8,%r8 /* array_index_mask_nospec() */ + and %r8,%rax IA32_ARG_FIXUP #ifdef CONFIG_RETPOLINE movq ia32_sys_call_table(,%rax,8),%rax @@ -468,8 +476,8 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpq $(IA32_NR_syscalls-1),%rax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + cmpq $(IA32_NR_syscalls),%rax + jae int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call END(ia32_syscall) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3801c756c801..ede58eeeb11f0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -429,6 +429,8 @@ sysenter_past_esp: sysenter_do_call: cmpl $(nr_syscalls), %eax jae sysenter_badsys + sbb %edx, %edx /* array_index_mask_nospec() */ + and %edx, %eax #ifdef CONFIG_RETPOLINE movl sys_call_table(,%eax,4),%eax call __x86_indirect_thunk_eax @@ -517,6 +519,8 @@ ENTRY(system_call) cmpl $(nr_syscalls), %eax jae syscall_badsys syscall_call: + sbb %edx, %edx /* array_index_mask_nospec() */ + and %edx, %eax #ifdef CONFIG_RETPOLINE movl sys_call_table(,%eax,4),%eax call __x86_indirect_thunk_eax diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a2b14db26b8a5..e992680d0eab0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -517,8 +517,10 @@ ENTRY(system_call_after_swapgs) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) jnz tracesys system_call_fastpath: - cmpq $__NR_syscall_max,%rax - ja badsys + cmpq $NR_syscalls, %rax + jae badsys + sbb %rcx, %rcx /* array_index_mask_nospec() */ + and %rcx, %rax movq %r10,%rcx #ifdef CONFIG_RETPOLINE movq sys_call_table(, %rax, 8), %rax @@ -646,8 +648,10 @@ tracesys: */ LOAD_ARGS ARGOFFSET, 1 RESTORE_REST - cmpq $__NR_syscall_max,%rax - ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ + cmpq $NR_syscalls, %rax + jae int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ + sbb %rcx, %rcx /* array_index_mask_nospec() */ + and %rcx, %rax movq %r10,%rcx /* fixup for C */ #ifdef CONFIG_RETPOLINE movq sys_call_table(, %rax, 8), %rax