]>
Commit | Line | Data |
---|---|---|
2011d8fd GKH |
1 | From bafaecd11df15ad5b1e598adc7736afcd38ee13d Mon Sep 17 00:00:00 2001 |
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
3 | Date: Tue, 12 Jan 2010 18:16:42 -0800 | |
4 | Subject: x86-64: support native xadd rwsem implementation | |
5 | ||
6 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
7 | ||
8 | commit bafaecd11df15ad5b1e598adc7736afcd38ee13d upstream. | |
9 | ||
10 | This one is much faster than the spinlock based fallback rwsem code, | |
11 | with certain artifical benchmarks having shown 300%+ improvement on | |
12 | threaded page faults etc. | |
13 | ||
14 | Again, note the 32767-thread limit here. So this really does need that | |
15 | whole "make rwsem_count_t be 64-bit and fix the BIAS values to match" | |
16 | extension on top of it, but that is conceptually a totally independent | |
17 | issue. | |
18 | ||
19 | NOT TESTED! The original patch that this all was based on were tested by | |
20 | KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the | |
21 | cleaned-up series, so caveat emptor.. | |
22 | ||
23 | Also note that it _may_ be a good idea to mark some more registers | |
24 | clobbered on x86-64 in the inline asms instead of saving/restoring them. | |
25 | They are inline functions, but they are only used in places where there | |
26 | are not a lot of live registers _anyway_, so doing for example the | |
27 | clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any | |
28 | worse, and would make the slow-path code smaller. | |
29 | ||
30 | (Not that the slow-path really matters to that degree. Saving a few | |
31 | unnecessary registers is the _least_ of our problems when we hit the slow | |
32 | path. The instruction/cycle counting really only matters in the fast | |
33 | path). | |
34 | ||
35 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
36 | LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain> | |
37 | Signed-off-by: H. Peter Anvin <hpa@zytor.com> | |
38 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
39 | ||
40 | --- | |
41 | arch/x86/Kconfig.cpu | 2 - | |
42 | arch/x86/lib/Makefile | 1 | |
43 | arch/x86/lib/rwsem_64.S | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ | |
44 | 3 files changed, 83 insertions(+), 1 deletion(-) | |
45 | ||
46 | --- a/arch/x86/Kconfig.cpu | |
47 | +++ b/arch/x86/Kconfig.cpu | |
48 | @@ -323,7 +323,7 @@ config X86_L1_CACHE_SHIFT | |
49 | ||
50 | config X86_XADD | |
51 | def_bool y | |
52 | - depends on X86_32 && !M386 | |
53 | + depends on X86_64 || !M386 | |
54 | ||
55 | config X86_PPRO_FENCE | |
56 | bool "PentiumPro memory ordering errata workaround" | |
57 | --- a/arch/x86/lib/Makefile | |
58 | +++ b/arch/x86/lib/Makefile | |
59 | @@ -26,4 +26,5 @@ else | |
60 | lib-y += thunk_64.o clear_page_64.o copy_page_64.o | |
61 | lib-y += memmove_64.o memset_64.o | |
62 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | |
63 | + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | |
64 | endif | |
65 | --- /dev/null | |
66 | +++ b/arch/x86/lib/rwsem_64.S | |
67 | @@ -0,0 +1,81 @@ | |
68 | +/* | |
69 | + * x86-64 rwsem wrappers | |
70 | + * | |
71 | + * This interfaces the inline asm code to the slow-path | |
72 | + * C routines. We need to save the call-clobbered regs | |
73 | + * that the asm does not mark as clobbered, and move the | |
74 | + * argument from %rax to %rdi. | |
75 | + * | |
76 | + * NOTE! We don't need to save %rax, because the functions | |
77 | + * will always return the semaphore pointer in %rax (which | |
78 | + * is also the input argument to these helpers) | |
79 | + * | |
80 | + * The following can clobber %rdx because the asm clobbers it: | |
81 | + * call_rwsem_down_write_failed | |
82 | + * call_rwsem_wake | |
83 | + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. | |
84 | + */ | |
85 | + | |
86 | +#include <linux/linkage.h> | |
87 | +#include <asm/rwlock.h> | |
88 | +#include <asm/alternative-asm.h> | |
89 | +#include <asm/frame.h> | |
90 | +#include <asm/dwarf2.h> | |
91 | + | |
92 | +#define save_common_regs \ | |
93 | + pushq %rdi; \ | |
94 | + pushq %rsi; \ | |
95 | + pushq %rcx; \ | |
96 | + pushq %r8; \ | |
97 | + pushq %r9; \ | |
98 | + pushq %r10; \ | |
99 | + pushq %r11 | |
100 | + | |
101 | +#define restore_common_regs \ | |
102 | + popq %r11; \ | |
103 | + popq %r10; \ | |
104 | + popq %r9; \ | |
105 | + popq %r8; \ | |
106 | + popq %rcx; \ | |
107 | + popq %rsi; \ | |
108 | + popq %rdi | |
109 | + | |
110 | +/* Fix up special calling conventions */ | |
111 | +ENTRY(call_rwsem_down_read_failed) | |
112 | + save_common_regs | |
113 | + pushq %rdx | |
114 | + movq %rax,%rdi | |
115 | + call rwsem_down_read_failed | |
116 | + popq %rdx | |
117 | + restore_common_regs | |
118 | + ret | |
119 | + ENDPROC(call_rwsem_down_read_failed) | |
120 | + | |
121 | +ENTRY(call_rwsem_down_write_failed) | |
122 | + save_common_regs | |
123 | + movq %rax,%rdi | |
124 | + call rwsem_down_write_failed | |
125 | + restore_common_regs | |
126 | + ret | |
127 | + ENDPROC(call_rwsem_down_write_failed) | |
128 | + | |
129 | +ENTRY(call_rwsem_wake) | |
130 | + decw %dx /* do nothing if still outstanding active readers */ | |
131 | + jnz 1f | |
132 | + save_common_regs | |
133 | + movq %rax,%rdi | |
134 | + call rwsem_wake | |
135 | + restore_common_regs | |
136 | +1: ret | |
137 | + ENDPROC(call_rwsem_wake) | |
138 | + | |
139 | +/* Fix up special calling conventions */ | |
140 | +ENTRY(call_rwsem_downgrade_wake) | |
141 | + save_common_regs | |
142 | + pushq %rdx | |
143 | + movq %rax,%rdi | |
144 | + call rwsem_downgrade_wake | |
145 | + popq %rdx | |
146 | + restore_common_regs | |
147 | + ret | |
148 | + ENDPROC(call_rwsem_downgrade_wake) |