]>
Commit | Line | Data |
---|---|---|
d33cec38 GKH |
1 | From 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 Mon Sep 17 00:00:00 2001 |
2 | From: Alexander Potapenko <glider@google.com> | |
3 | Date: Tue, 2 Apr 2019 13:28:13 +0200 | |
4 | Subject: x86/asm: Use stricter assembly constraints in bitops | |
5 | ||
6 | From: Alexander Potapenko <glider@google.com> | |
7 | ||
8 | commit 5b77e95dd7790ff6c8fbf1cd8d0104ebed818a03 upstream. | |
9 | ||
10 | There's a number of problems with how arch/x86/include/asm/bitops.h | |
11 | is currently using assembly constraints for the memory region | |
12 | bitops are modifying: | |
13 | ||
14 | 1) Use memory clobber in bitops that touch arbitrary memory | |
15 | ||
16 | Certain bit operations that read/write bits take a base pointer and an | |
17 | arbitrarily large offset to address the bit relative to that base. | |
18 | Inline assembly constraints aren't expressive enough to tell the | |
19 | compiler that the assembly directive is going to touch a specific memory | |
20 | location of unknown size, therefore we have to use the "memory" clobber | |
21 | to indicate that the assembly is going to access memory locations other | |
22 | than those listed in the inputs/outputs. | |
23 | ||
24 | To indicate that BTR/BTS instructions don't necessarily touch the first | |
25 | sizeof(long) bytes of the argument, we also move the address to assembly | |
26 | inputs. | |
27 | ||
28 | This particular change leads to size increase of 124 kernel functions in | |
29 | a defconfig build. For some of them the diff is in NOP operations, other | |
30 | end up re-reading values from memory and may potentially slow down the | |
31 | execution. But without these clobbers the compiler is free to cache | |
32 | the contents of the bitmaps and use them as if they weren't changed by | |
33 | the inline assembly. | |
34 | ||
35 | 2) Use byte-sized arguments for operations touching single bytes. | |
36 | ||
37 | Passing a long value to ANDB/ORB/XORB instructions makes the compiler | |
38 | treat sizeof(long) bytes as being clobbered, which isn't the case. This | |
39 | may theoretically lead to worse code in the case of heavy optimization. | |
40 | ||
41 | Practical impact: | |
42 | ||
43 | I've built a defconfig kernel and looked through some of the functions | |
44 | generated by GCC 7.3.0 with and without this clobber, and didn't spot | |
45 | any miscompilations. | |
46 | ||
47 | However there is a (trivial) theoretical case where this code leads to | |
48 | miscompilation: | |
49 | ||
50 | https://lkml.org/lkml/2019/3/28/393 | |
51 | ||
52 | using just GCC 8.3.0 with -O2. It isn't hard to imagine someone writes | |
53 | such a function in the kernel someday. | |
54 | ||
55 | So the primary motivation is to fix an existing misuse of the asm | |
56 | directive, which happens to work in certain configurations now, but | |
57 | isn't guaranteed to work under different circumstances. | |
58 | ||
59 | [ --mingo: Added -stable tag because defconfig only builds a fraction | |
60 | of the kernel and the trivial testcase looks normal enough to | |
61 | be used in existing or in-development code. ] | |
62 | ||
63 | Signed-off-by: Alexander Potapenko <glider@google.com> | |
64 | Cc: <stable@vger.kernel.org> | |
65 | Cc: Andy Lutomirski <luto@kernel.org> | |
66 | Cc: Borislav Petkov <bp@alien8.de> | |
67 | Cc: Brian Gerst <brgerst@gmail.com> | |
68 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
69 | Cc: Dmitry Vyukov <dvyukov@google.com> | |
70 | Cc: H. Peter Anvin <hpa@zytor.com> | |
71 | Cc: James Y Knight <jyknight@google.com> | |
72 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
73 | Cc: Paul E. McKenney <paulmck@linux.ibm.com> | |
74 | Cc: Peter Zijlstra <peterz@infradead.org> | |
75 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
76 | Link: http://lkml.kernel.org/r/20190402112813.193378-1-glider@google.com | |
77 | [ Edited the changelog, tidied up one of the defines. ] | |
78 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
79 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
80 | ||
81 | --- | |
82 | arch/x86/include/asm/bitops.h | 41 ++++++++++++++++++----------------------- | |
83 | 1 file changed, 18 insertions(+), 23 deletions(-) | |
84 | ||
85 | --- a/arch/x86/include/asm/bitops.h | |
86 | +++ b/arch/x86/include/asm/bitops.h | |
87 | @@ -36,16 +36,17 @@ | |
88 | * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). | |
89 | */ | |
90 | ||
91 | -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) | |
92 | +#define RLONG_ADDR(x) "m" (*(volatile long *) (x)) | |
93 | +#define WBYTE_ADDR(x) "+m" (*(volatile char *) (x)) | |
94 | ||
95 | -#define ADDR BITOP_ADDR(addr) | |
96 | +#define ADDR RLONG_ADDR(addr) | |
97 | ||
98 | /* | |
99 | * We do the locked ops that don't return the old value as | |
100 | * a mask operation on a byte. | |
101 | */ | |
102 | #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) | |
103 | -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) | |
104 | +#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) | |
105 | #define CONST_MASK(nr) (1 << ((nr) & 7)) | |
106 | ||
107 | /** | |
108 | @@ -73,7 +74,7 @@ set_bit(long nr, volatile unsigned long | |
109 | : "memory"); | |
110 | } else { | |
111 | asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" | |
112 | - : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); | |
113 | + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); | |
114 | } | |
115 | } | |
116 | ||
117 | @@ -88,7 +89,7 @@ set_bit(long nr, volatile unsigned long | |
118 | */ | |
119 | static __always_inline void __set_bit(long nr, volatile unsigned long *addr) | |
120 | { | |
121 | - asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); | |
122 | + asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); | |
123 | } | |
124 | ||
125 | /** | |
126 | @@ -110,8 +111,7 @@ clear_bit(long nr, volatile unsigned lon | |
127 | : "iq" ((u8)~CONST_MASK(nr))); | |
128 | } else { | |
129 | asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" | |
130 | - : BITOP_ADDR(addr) | |
131 | - : "Ir" (nr)); | |
132 | + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); | |
133 | } | |
134 | } | |
135 | ||
136 | @@ -131,7 +131,7 @@ static __always_inline void clear_bit_un | |
137 | ||
138 | static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) | |
139 | { | |
140 | - asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); | |
141 | + asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); | |
142 | } | |
143 | ||
144 | static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) | |
145 | @@ -139,7 +139,7 @@ static __always_inline bool clear_bit_un | |
146 | bool negative; | |
147 | asm volatile(LOCK_PREFIX "andb %2,%1" | |
148 | CC_SET(s) | |
149 | - : CC_OUT(s) (negative), ADDR | |
150 | + : CC_OUT(s) (negative), WBYTE_ADDR(addr) | |
151 | : "ir" ((char) ~(1 << nr)) : "memory"); | |
152 | return negative; | |
153 | } | |
154 | @@ -155,13 +155,9 @@ static __always_inline bool clear_bit_un | |
155 | * __clear_bit() is non-atomic and implies release semantics before the memory | |
156 | * operation. It can be used for an unlock if no other CPUs can concurrently | |
157 | * modify other bits in the word. | |
158 | - * | |
159 | - * No memory barrier is required here, because x86 cannot reorder stores past | |
160 | - * older loads. Same principle as spin_unlock. | |
161 | */ | |
162 | static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) | |
163 | { | |
164 | - barrier(); | |
165 | __clear_bit(nr, addr); | |
166 | } | |
167 | ||
168 | @@ -176,7 +172,7 @@ static __always_inline void __clear_bit_ | |
169 | */ | |
170 | static __always_inline void __change_bit(long nr, volatile unsigned long *addr) | |
171 | { | |
172 | - asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); | |
173 | + asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); | |
174 | } | |
175 | ||
176 | /** | |
177 | @@ -196,8 +192,7 @@ static __always_inline void change_bit(l | |
178 | : "iq" ((u8)CONST_MASK(nr))); | |
179 | } else { | |
180 | asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" | |
181 | - : BITOP_ADDR(addr) | |
182 | - : "Ir" (nr)); | |
183 | + : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); | |
184 | } | |
185 | } | |
186 | ||
187 | @@ -243,8 +238,8 @@ static __always_inline bool __test_and_s | |
188 | ||
189 | asm(__ASM_SIZE(bts) " %2,%1" | |
190 | CC_SET(c) | |
191 | - : CC_OUT(c) (oldbit), ADDR | |
192 | - : "Ir" (nr)); | |
193 | + : CC_OUT(c) (oldbit) | |
194 | + : ADDR, "Ir" (nr) : "memory"); | |
195 | return oldbit; | |
196 | } | |
197 | ||
198 | @@ -284,8 +279,8 @@ static __always_inline bool __test_and_c | |
199 | ||
200 | asm volatile(__ASM_SIZE(btr) " %2,%1" | |
201 | CC_SET(c) | |
202 | - : CC_OUT(c) (oldbit), ADDR | |
203 | - : "Ir" (nr)); | |
204 | + : CC_OUT(c) (oldbit) | |
205 | + : ADDR, "Ir" (nr) : "memory"); | |
206 | return oldbit; | |
207 | } | |
208 | ||
209 | @@ -296,8 +291,8 @@ static __always_inline bool __test_and_c | |
210 | ||
211 | asm volatile(__ASM_SIZE(btc) " %2,%1" | |
212 | CC_SET(c) | |
213 | - : CC_OUT(c) (oldbit), ADDR | |
214 | - : "Ir" (nr) : "memory"); | |
215 | + : CC_OUT(c) (oldbit) | |
216 | + : ADDR, "Ir" (nr) : "memory"); | |
217 | ||
218 | return oldbit; | |
219 | } | |
220 | @@ -329,7 +324,7 @@ static __always_inline bool variable_tes | |
221 | asm volatile(__ASM_SIZE(bt) " %2,%1" | |
222 | CC_SET(c) | |
223 | : CC_OUT(c) (oldbit) | |
224 | - : "m" (*(unsigned long *)addr), "Ir" (nr)); | |
225 | + : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory"); | |
226 | ||
227 | return oldbit; | |
228 | } |