]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/2.6.32.12/x86-64-support-native-xadd-rwsem-implementation.patch
Fixes for 5.10
[thirdparty/kernel/stable-queue.git] / releases / 2.6.32.12 / x86-64-support-native-xadd-rwsem-implementation.patch
CommitLineData
2011d8fd
GKH
1From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001
2From: Linus Torvalds <torvalds@linux-foundation.org>
3Date: Tue, 12 Jan 2010 18:16:42 -0800
4Subject: x86-64: support native xadd rwsem implementation
5
6From: Linus Torvalds <torvalds@linux-foundation.org>
7
8commit bafaecd11df15ad5b1e598adc7736afcd38ee13d upstream.
9
10This one is much faster than the spinlock based fallback rwsem code,
11with certain artifical benchmarks having shown 300%+ improvement on
12threaded page faults etc.
13
14Again, note the 32767-thread limit here. So this really does need that
15whole "make rwsem_count_t be 64-bit and fix the BIAS values to match"
16extension on top of it, but that is conceptually a totally independent
17issue.
18
19NOT TESTED! The original patch that this all was based on were tested by
20KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the
21cleaned-up series, so caveat emptor..
22
23Also note that it _may_ be a good idea to mark some more registers
24clobbered on x86-64 in the inline asms instead of saving/restoring them.
25They are inline functions, but they are only used in places where there
26are not a lot of live registers _anyway_, so doing for example the
27clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any
28worse, and would make the slow-path code smaller.
29
30(Not that the slow-path really matters to that degree. Saving a few
31unnecessary registers is the _least_ of our problems when we hit the slow
32path. The instruction/cycle counting really only matters in the fast
33path).
34
35Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
36LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain>
37Signed-off-by: H. Peter Anvin <hpa@zytor.com>
38Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
39
40---
41 arch/x86/Kconfig.cpu | 2 -
42 arch/x86/lib/Makefile | 1
43 arch/x86/lib/rwsem_64.S | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
44 3 files changed, 83 insertions(+), 1 deletion(-)
45
46--- a/arch/x86/Kconfig.cpu
47+++ b/arch/x86/Kconfig.cpu
48@@ -323,7 +323,7 @@ config X86_L1_CACHE_SHIFT
49
50 config X86_XADD
51 def_bool y
52- depends on X86_32 && !M386
53+ depends on X86_64 || !M386
54
55 config X86_PPRO_FENCE
56 bool "PentiumPro memory ordering errata workaround"
57--- a/arch/x86/lib/Makefile
58+++ b/arch/x86/lib/Makefile
59@@ -26,4 +26,5 @@ else
60 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
61 lib-y += memmove_64.o memset_64.o
62 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
63+ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
64 endif
65--- /dev/null
66+++ b/arch/x86/lib/rwsem_64.S
67@@ -0,0 +1,81 @@
68+/*
69+ * x86-64 rwsem wrappers
70+ *
71+ * This interfaces the inline asm code to the slow-path
72+ * C routines. We need to save the call-clobbered regs
73+ * that the asm does not mark as clobbered, and move the
74+ * argument from %rax to %rdi.
75+ *
76+ * NOTE! We don't need to save %rax, because the functions
77+ * will always return the semaphore pointer in %rax (which
78+ * is also the input argument to these helpers)
79+ *
80+ * The following can clobber %rdx because the asm clobbers it:
81+ * call_rwsem_down_write_failed
82+ * call_rwsem_wake
83+ * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
84+ */
85+
86+#include <linux/linkage.h>
87+#include <asm/rwlock.h>
88+#include <asm/alternative-asm.h>
89+#include <asm/frame.h>
90+#include <asm/dwarf2.h>
91+
92+#define save_common_regs \
93+ pushq %rdi; \
94+ pushq %rsi; \
95+ pushq %rcx; \
96+ pushq %r8; \
97+ pushq %r9; \
98+ pushq %r10; \
99+ pushq %r11
100+
101+#define restore_common_regs \
102+ popq %r11; \
103+ popq %r10; \
104+ popq %r9; \
105+ popq %r8; \
106+ popq %rcx; \
107+ popq %rsi; \
108+ popq %rdi
109+
110+/* Fix up special calling conventions */
111+ENTRY(call_rwsem_down_read_failed)
112+ save_common_regs
113+ pushq %rdx
114+ movq %rax,%rdi
115+ call rwsem_down_read_failed
116+ popq %rdx
117+ restore_common_regs
118+ ret
119+ ENDPROC(call_rwsem_down_read_failed)
120+
121+ENTRY(call_rwsem_down_write_failed)
122+ save_common_regs
123+ movq %rax,%rdi
124+ call rwsem_down_write_failed
125+ restore_common_regs
126+ ret
127+ ENDPROC(call_rwsem_down_write_failed)
128+
129+ENTRY(call_rwsem_wake)
130+ decw %dx /* do nothing if still outstanding active readers */
131+ jnz 1f
132+ save_common_regs
133+ movq %rax,%rdi
134+ call rwsem_wake
135+ restore_common_regs
136+1: ret
137+ ENDPROC(call_rwsem_wake)
138+
139+/* Fix up special calling conventions */
140+ENTRY(call_rwsem_downgrade_wake)
141+ save_common_regs
142+ pushq %rdx
143+ movq %rax,%rdi
144+ call rwsem_downgrade_wake
145+ popq %rdx
146+ restore_common_regs
147+ ret
148+ ENDPROC(call_rwsem_downgrade_wake)