]>
Commit | Line | Data |
---|---|---|
c50f154e | 1 | .\" Copyright 2015-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
d06aa1bf MD |
2 | .\" |
3 | .\" %%%LICENSE_START(VERBATIM) | |
4 | .\" Permission is granted to make and distribute verbatim copies of this | |
5 | .\" manual provided the copyright notice and this permission notice are | |
6 | .\" preserved on all copies. | |
7 | .\" | |
8 | .\" Permission is granted to copy and distribute modified versions of this | |
9 | .\" manual under the conditions for verbatim copying, provided that the | |
10 | .\" entire resulting derived work is distributed under the terms of a | |
11 | .\" permission notice identical to this one. | |
12 | .\" | |
13 | .\" Since the Linux kernel and libraries are constantly changing, this | |
14 | .\" manual page may be incorrect or out-of-date. The author(s) assume no | |
15 | .\" responsibility for errors or omissions, or for damages resulting from | |
16 | .\" the use of the information contained herein. The author(s) may not | |
17 | .\" have taken the same level of care in the production of this manual, | |
18 | .\" which is licensed free of charge, as they might when working | |
19 | .\" professionally. | |
20 | .\" | |
21 | .\" Formatted or processed versions of this manual, if unaccompanied by | |
22 | .\" the source, must acknowledge the copyright and authors of this work. | |
23 | .\" %%%LICENSE_END | |
24 | .\" | |
09b8afdc | 25 | .TH MEMBARRIER 2 2018-04-30 "Linux" "Linux Programmer's Manual" |
d06aa1bf MD |
26 | .SH NAME |
27 | membarrier \- issue memory barriers on a set of threads | |
28 | .SH SYNOPSIS | |
29 | .B #include <linux/membarrier.h> | |
68e4db0a | 30 | .PP |
d06aa1bf | 31 | .BI "int membarrier(int " cmd ", int " flags "); |
d06aa1bf | 32 | .SH DESCRIPTION |
7e6241dc MK |
33 | The |
34 | .BR membarrier () | |
35 | system call helps reducing the overhead of the memory barrier | |
d06aa1bf MD |
36 | instructions required to order memory accesses on multi-core systems. |
37 | However, this system call is heavier than a memory barrier, so using it | |
38 | effectively is | |
7e6241dc | 39 | .I not |
d06aa1bf | 40 | as simple as replacing memory barriers with this |
7e6241dc | 41 | system call, but requires understanding of the details below. |
efeece04 | 42 | .PP |
d06aa1bf MD |
43 | Use of memory barriers needs to be done taking into account that a |
44 | memory barrier always needs to be either matched with its memory barrier | |
7e6241dc | 45 | counterparts, or that the architecture's memory model doesn't require the |
d06aa1bf | 46 | matching barriers. |
efeece04 | 47 | .PP |
d06aa1bf MD |
48 | There are cases where one side of the matching barriers (which we will |
49 | refer to as "fast side") is executed much more often than the other | |
7e6241dc MK |
50 | (which we will refer to as "slow side"). |
51 | This is a prime target for the use of | |
52 | .BR membarrier (). | |
53 | The key idea is to replace, for these matching | |
54 | barriers, the fast-side memory barriers by simple compiler barriers, | |
55 | for example: | |
efeece04 | 56 | .PP |
1afb17b7 MK |
57 | .in +4n |
58 | .EX | |
59 | asm volatile ("" : : : "memory") | |
60 | .EE | |
61 | .in | |
efeece04 | 62 | .PP |
7e6241dc MK |
63 | and replace the slow-side memory barriers by calls to |
64 | .BR membarrier (). | |
efeece04 | 65 | .PP |
d06aa1bf MD |
66 | This will add overhead to the slow side, and remove overhead from the |
67 | fast side, thus resulting in an overall performance increase as long as | |
7e6241dc MK |
68 | the slow side is infrequent enough that the overhead of the |
69 | .BR membarrier () | |
70 | calls does not outweigh the performance gain on the fast side. | |
efeece04 | 71 | .PP |
d06aa1bf MD |
72 | The |
73 | .I cmd | |
74 | argument is one of the following: | |
d06aa1bf | 75 | .TP |
26fac2d0 | 76 | .BR MEMBARRIER_CMD_QUERY " (since Linux 4.3)" |
7e6241dc MK |
77 | Query the set of supported commands. |
78 | The return value of the call is a bit mask of supported | |
d06aa1bf | 79 | commands. |
84015a22 MK |
80 | .BR MEMBARRIER_CMD_QUERY , |
81 | which has the value 0, | |
82 | is not itself included in this bit mask. | |
83 | This command is always supported (on kernels where | |
84 | .BR membarrier () | |
85 | is provided). | |
d06aa1bf | 86 | .TP |
8fb5a3b5 | 87 | .BR MEMBARRIER_CMD_GLOBAL " (since Linux 4.16)" |
d06aa1bf MD |
88 | Ensure that all threads from all processes on the system pass through a |
89 | state where all memory accesses to user-space addresses match program | |
7e6241dc MK |
90 | order between entry to and return from the |
91 | .BR membarrier () | |
92 | system call. | |
93 | All threads on the system are targeted by this command. | |
c50f154e | 94 | .TP |
8fb5a3b5 | 95 | .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED " (since Linux 4.16)" |
13d4ca14 | 96 | Execute a memory barrier on all running threads of all processes that |
f5a563c0 MD |
97 | previously registered with |
98 | .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . | |
d8b2fd50 | 99 | .IP |
e8edc891 | 100 | Upon return from the system call, the calling thread has a guarantee that all |
f5a563c0 MD |
101 | running threads have passed through a state where all memory accesses to |
102 | user-space addresses match program order between entry to and return | |
103 | from the system call (non-running threads are de facto in such a state). | |
e8edc891 MK |
104 | This guarantee is provided only for the threads of processes that |
105 | previously registered with | |
f5a563c0 | 106 | .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . |
d8b2fd50 | 107 | .IP |
f5a563c0 MD |
108 | Given that registration is about the intent to receive the barriers, it |
109 | is valid to invoke | |
110 | .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED | |
e8edc891 MK |
111 | from a process that has not employed |
112 | .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . | |
f5a563c0 MD |
113 | .IP |
114 | The "expedited" commands complete faster than the non-expedited ones; | |
115 | they never block, but have the downside of causing extra overhead. | |
116 | .TP | |
8fb5a3b5 | 117 | .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED " (since Linux 4.16)" |
13d4ca14 | 118 | Register the process's intent to receive |
f5a563c0 MD |
119 | .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED |
120 | memory barriers. | |
121 | .TP | |
8fb5a3b5 | 122 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED " (since Linux 4.14)" |
c50f154e | 123 | Execute a memory barrier on each running thread belonging to the same |
e8edc891 | 124 | process as the calling thread. |
d8b2fd50 | 125 | .IP |
e8edc891 MK |
126 | Upon return from the system call, the calling |
127 | thread has a guarantee that all its running thread siblings have passed | |
c50f154e MD |
128 | through a state where all memory accesses to user-space addresses match |
129 | program order between entry to and return from the system call | |
20fe2509 | 130 | (non-running threads are de facto in such a state). |
e8edc891 MK |
131 | This guarantee is provided only for threads in |
132 | the same process as the calling thread. | |
d1555345 MK |
133 | .IP |
134 | The "expedited" commands complete faster than the non-expedited ones; | |
20fe2509 | 135 | they never block, but have the downside of causing extra overhead. |
d8b2fd50 | 136 | .IP |
e8edc891 | 137 | A process must register its intent to use the private |
20fe2509 | 138 | expedited command prior to using it. |
c50f154e | 139 | .TP |
8fb5a3b5 | 140 | .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED " (since Linux 4.14)" |
d1555345 | 141 | Register the process's intent to use |
8fb5a3b5 | 142 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED . |
f5a563c0 | 143 | .TP |
8fb5a3b5 | 144 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)" |
13d4ca14 | 145 | In addition to providing the memory ordering guarantees described in |
f5a563c0 | 146 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED , |
e8edc891 MK |
147 | upon return from system call the calling thread has a guarantee that all its |
148 | running thread siblings have executed a core serializing instruction. | |
149 | This guarantee is provided only for threads in | |
150 | the same process as the calling thread. | |
d8b2fd50 | 151 | .IP |
f5a563c0 | 152 | The "expedited" commands complete faster than the non-expedited ones, |
8d23228c | 153 | they never block, but have the downside of causing extra overhead. |
d8b2fd50 | 154 | .IP |
e8edc891 | 155 | A process must register its intent to use the private expedited sync |
f5a563c0 MD |
156 | core command prior to using it. |
157 | .TP | |
8fb5a3b5 | 158 | .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)" |
13d4ca14 | 159 | Register the process's intent to use |
f5a563c0 MD |
160 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE . |
161 | .TP | |
26fac2d0 | 162 | .BR MEMBARRIER_CMD_SHARED " (since Linux 4.3)" |
46dc0687 MK |
163 | This is an alias for |
164 | .BR MEMBARRIER_CMD_GLOBAL | |
165 | that exists for header backward compatibility. | |
d06aa1bf MD |
166 | .PP |
167 | The | |
168 | .I flags | |
7e6241dc | 169 | argument is currently unused and must be specified as 0. |
d06aa1bf MD |
170 | .PP |
171 | All memory accesses performed in program order from each targeted thread | |
7e6241dc MK |
172 | are guaranteed to be ordered with respect to |
173 | .BR membarrier (). | |
efeece04 | 174 | .PP |
7e6241dc MK |
175 | If we use the semantic |
176 | .I barrier() | |
177 | to represent a compiler barrier forcing memory | |
d06aa1bf | 178 | accesses to be performed in program order across the barrier, and |
7e6241dc MK |
179 | .I smp_mb() |
180 | to represent explicit memory barriers forcing full memory | |
d06aa1bf | 181 | ordering across the barrier, we have the following ordering table for |
7e6241dc MK |
182 | each pairing of |
183 | .IR barrier() , | |
184 | .BR membarrier () | |
185 | and | |
186 | .IR smp_mb() . | |
d06aa1bf | 187 | The pair ordering is detailed as (O: ordered, X: not ordered): |
efeece04 | 188 | .PP |
7e6241dc MK |
189 | barrier() smp_mb() membarrier() |
190 | barrier() X X O | |
191 | smp_mb() X O O | |
51866840 | 192 | membarrier() O O O |
d06aa1bf | 193 | .SH RETURN VALUE |
9eb5be29 MK |
194 | On success, the |
195 | .B MEMBARRIER_CMD_QUERY | |
c50f154e | 196 | operation returns a bit mask of supported commands, and the |
8fb5a3b5 MK |
197 | .BR MEMBARRIER_CMD_GLOBAL , |
198 | .BR MEMBARRIER_CMD_GLOBAL_EXPEDITED , | |
199 | .BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED , | |
200 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED , | |
201 | .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED , | |
202 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE , | |
f5a563c0 MD |
203 | and |
204 | .B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE | |
c50f154e | 205 | operations return zero. |
7e6241dc | 206 | On error, \-1 is returned, |
d06aa1bf MD |
207 | and |
208 | .I errno | |
209 | is set appropriately. | |
efeece04 | 210 | .PP |
7a9c62ef MK |
211 | For a given command, with |
212 | .I flags | |
213 | set to 0, this system call is | |
7e6241dc | 214 | guaranteed to always return the same value until reboot. |
7a9c62ef MK |
215 | Further calls with the same arguments will lead to the same result. |
216 | Therefore, with | |
217 | .I flags | |
218 | set to 0, error handling is required only for the first call to | |
219 | .BR membarrier (). | |
d06aa1bf MD |
220 | .SH ERRORS |
221 | .TP | |
d06aa1bf MD |
222 | .B EINVAL |
223 | .I cmd | |
c50f154e | 224 | is invalid, or |
d06aa1bf | 225 | .I flags |
58440555 | 226 | is nonzero, or the |
f5a563c0 | 227 | .BR MEMBARRIER_CMD_GLOBAL |
c50f154e MD |
228 | command is disabled because the |
229 | .I nohz_full | |
f5a563c0 MD |
230 | CPU parameter has been set, or the |
231 | .BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE | |
232 | and | |
233 | .BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE | |
234 | commands are not implemented by the architecture. | |
7e6241dc MK |
235 | .TP |
236 | .B ENOSYS | |
237 | The | |
238 | .BR membarrier () | |
239 | system call is not implemented by this kernel. | |
eddeaded | 240 | .TP |
c50f154e MD |
241 | .B EPERM |
242 | The current process was not registered prior to using private expedited | |
243 | commands. | |
d06aa1bf | 244 | .SH VERSIONS |
7e6241dc MK |
245 | The |
246 | .BR membarrier () | |
247 | system call was added in Linux 4.3. | |
787dd4ad | 248 | .\" |
d06aa1bf MD |
249 | .SH CONFORMING TO |
250 | .BR membarrier () | |
251 | is Linux-specific. | |
ee595da3 MK |
252 | .\" .SH SEE ALSO |
253 | .\" FIXME See if the following syscalls make it into Linux 4.15 or later | |
254 | .\" .BR cpu_opv (2), | |
255 | .\" .BR rseq (2) | |
d06aa1bf | 256 | .SH NOTES |
d06aa1bf | 257 | A memory barrier instruction is part of the instruction set of |
7e6241dc MK |
258 | architectures with weakly-ordered memory models. |
259 | It orders memory | |
d06aa1bf | 260 | accesses prior to the barrier and after the barrier with respect to |
7e6241dc MK |
261 | matching barriers on other cores. |
262 | For instance, a load fence can order | |
d06aa1bf MD |
263 | loads prior to and following that fence with respect to stores ordered |
264 | by store fences. | |
efeece04 | 265 | .PP |
d06aa1bf MD |
266 | Program order is the order in which instructions are ordered in the |
267 | program assembly code. | |
efeece04 | 268 | .PP |
7e6241dc MK |
269 | Examples where |
270 | .BR membarrier () | |
271 | can be useful include implementations | |
9f4d4beb | 272 | of Read-Copy-Update libraries and garbage collectors. |
d06aa1bf | 273 | .SH EXAMPLE |
d06aa1bf MD |
274 | Assuming a multithreaded application where "fast_path()" is executed |
275 | very frequently, and where "slow_path()" is executed infrequently, the | |
276 | following code (x86) can be transformed using | |
7e6241dc | 277 | .BR membarrier (): |
efeece04 | 278 | .PP |
7e6241dc | 279 | .in +4n |
b8302363 | 280 | .EX |
d06aa1bf MD |
281 | #include <stdlib.h> |
282 | ||
283 | static volatile int a, b; | |
284 | ||
7e6241dc | 285 | static void |
b9aff60e | 286 | fast_path(int *read_b) |
d06aa1bf | 287 | { |
b9aff60e | 288 | a = 1; |
7e6241dc | 289 | asm volatile ("mfence" : : : "memory"); |
b9aff60e | 290 | *read_b = b; |
d06aa1bf MD |
291 | } |
292 | ||
7e6241dc | 293 | static void |
b9aff60e | 294 | slow_path(int *read_a) |
d06aa1bf | 295 | { |
7e6241dc | 296 | b = 1; |
b9aff60e MD |
297 | asm volatile ("mfence" : : : "memory"); |
298 | *read_a = a; | |
d06aa1bf MD |
299 | } |
300 | ||
7e6241dc MK |
301 | int |
302 | main(int argc, char **argv) | |
d06aa1bf | 303 | { |
b9aff60e MD |
304 | int read_a, read_b; |
305 | ||
7e6241dc MK |
306 | /* |
307 | * Real applications would call fast_path() and slow_path() | |
308 | * from different threads. Call those from main() to keep | |
309 | * this example short. | |
310 | */ | |
311 | ||
b9aff60e MD |
312 | slow_path(&read_a); |
313 | fast_path(&read_b); | |
314 | ||
315 | /* | |
316 | * read_b == 0 implies read_a == 1 and | |
317 | * read_a == 0 implies read_b == 1. | |
318 | */ | |
319 | ||
320 | if (read_b == 0 && read_a == 0) | |
321 | abort(); | |
7e6241dc MK |
322 | |
323 | exit(EXIT_SUCCESS); | |
d06aa1bf | 324 | } |
b8302363 | 325 | .EE |
7e6241dc | 326 | .in |
efeece04 | 327 | .PP |
7e6241dc MK |
328 | The code above transformed to use |
329 | .BR membarrier () | |
330 | becomes: | |
efeece04 | 331 | .PP |
7e6241dc | 332 | .in +4n |
b8302363 | 333 | .EX |
d06aa1bf MD |
334 | #define _GNU_SOURCE |
335 | #include <stdlib.h> | |
336 | #include <stdio.h> | |
337 | #include <unistd.h> | |
338 | #include <sys/syscall.h> | |
339 | #include <linux/membarrier.h> | |
340 | ||
341 | static volatile int a, b; | |
342 | ||
7e6241dc MK |
343 | static int |
344 | membarrier(int cmd, int flags) | |
d06aa1bf | 345 | { |
7e6241dc | 346 | return syscall(__NR_membarrier, cmd, flags); |
d06aa1bf MD |
347 | } |
348 | ||
7e6241dc MK |
349 | static int |
350 | init_membarrier(void) | |
d06aa1bf | 351 | { |
7e6241dc MK |
352 | int ret; |
353 | ||
354 | /* Check that membarrier() is supported. */ | |
355 | ||
356 | ret = membarrier(MEMBARRIER_CMD_QUERY, 0); | |
357 | if (ret < 0) { | |
358 | perror("membarrier"); | |
359 | return \-1; | |
360 | } | |
361 | ||
f5a563c0 | 362 | if (!(ret & MEMBARRIER_CMD_GLOBAL)) { |
7e6241dc | 363 | fprintf(stderr, |
d1a71985 | 364 | "membarrier does not support MEMBARRIER_CMD_GLOBAL\en"); |
7e6241dc MK |
365 | return \-1; |
366 | } | |
367 | ||
368 | return 0; | |
d06aa1bf MD |
369 | } |
370 | ||
7e6241dc | 371 | static void |
b9aff60e | 372 | fast_path(int *read_b) |
d06aa1bf | 373 | { |
b9aff60e | 374 | a = 1; |
7e6241dc | 375 | asm volatile ("" : : : "memory"); |
b9aff60e | 376 | *read_b = b; |
d06aa1bf MD |
377 | } |
378 | ||
7e6241dc | 379 | static void |
b9aff60e | 380 | slow_path(int *read_a) |
d06aa1bf | 381 | { |
7e6241dc | 382 | b = 1; |
f5a563c0 | 383 | membarrier(MEMBARRIER_CMD_GLOBAL, 0); |
b9aff60e | 384 | *read_a = a; |
d06aa1bf MD |
385 | } |
386 | ||
7e6241dc MK |
387 | int |
388 | main(int argc, char **argv) | |
d06aa1bf | 389 | { |
b9aff60e MD |
390 | int read_a, read_b; |
391 | ||
7e6241dc MK |
392 | if (init_membarrier()) |
393 | exit(EXIT_FAILURE); | |
394 | ||
395 | /* | |
396 | * Real applications would call fast_path() and slow_path() | |
397 | * from different threads. Call those from main() to keep | |
398 | * this example short. | |
399 | */ | |
400 | ||
b9aff60e MD |
401 | slow_path(&read_a); |
402 | fast_path(&read_b); | |
403 | ||
404 | /* | |
405 | * read_b == 0 implies read_a == 1 and | |
406 | * read_a == 0 implies read_b == 1. | |
407 | */ | |
408 | ||
409 | if (read_b == 0 && read_a == 0) | |
410 | abort(); | |
7e6241dc MK |
411 | |
412 | exit(EXIT_SUCCESS); | |
d06aa1bf | 413 | } |
b8302363 | 414 | .EE |
7e6241dc | 415 | .in |