]>
Commit | Line | Data |
---|---|---|
8176b81a MK |
1 | .\" Copyright (C) Michael Kerrisk, 2004 |
2 | .\" using some material drawn from earlier man pages | |
3 | .\" written by Thomas Kuhn, Copyright 1996 | |
fea681da | 4 | .\" |
1dd72f9c | 5 | .\" %%%LICENSE_START(GPLv2+_DOC_FULL) |
fea681da MK |
6 | .\" This is free documentation; you can redistribute it and/or |
7 | .\" modify it under the terms of the GNU General Public License as | |
8 | .\" published by the Free Software Foundation; either version 2 of | |
9 | .\" the License, or (at your option) any later version. | |
10 | .\" | |
11 | .\" The GNU General Public License's references to "object code" | |
12 | .\" and "executables" are to be interpreted as the output of any | |
13 | .\" document formatting or typesetting system, including | |
14 | .\" intermediate and printed output. | |
15 | .\" | |
16 | .\" This manual is distributed in the hope that it will be useful, | |
17 | .\" but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | .\" GNU General Public License for more details. | |
20 | .\" | |
21 | .\" You should have received a copy of the GNU General Public | |
c715f741 MK |
22 | .\" License along with this manual; if not, see |
23 | .\" <http://www.gnu.org/licenses/>. | |
6a8d8745 | 24 | .\" %%%LICENSE_END |
c1832fd1 | 25 | .\" |
8538a62b | 26 | .TH MLOCK 2 2018-02-02 "Linux" "Linux Programmer's Manual" |
fea681da | 27 | .SH NAME |
7a3e8caa | 28 | mlock, mlock2, munlock, mlockall, munlockall \- lock and unlock memory |
fea681da MK |
29 | .SH SYNOPSIS |
30 | .nf | |
31 | .B #include <sys/mman.h> | |
68e4db0a | 32 | .PP |
da8cb51e | 33 | .BI "int mlock(const void *" addr ", size_t " len ); |
7a3e8caa | 34 | .BI "int mlock2(const void *" addr ", size_t " len ", int " flags ); |
da8cb51e | 35 | .BI "int munlock(const void *" addr ", size_t " len ); |
68e4db0a | 36 | .PP |
da8cb51e | 37 | .BI "int mlockall(int " flags ); |
495846d9 | 38 | .B int munlockall(void); |
fea681da MK |
39 | .fi |
40 | .SH DESCRIPTION | |
7a3e8caa EM |
41 | .BR mlock (), |
42 | .BR mlock2 (), | |
8176b81a MK |
43 | and |
44 | .BR mlockall () | |
9f1eb9e0 | 45 | lock part or all of the calling process's virtual address |
c13182ef | 46 | space into RAM, preventing that memory from being paged to the |
8176b81a | 47 | swap area. |
efeece04 | 48 | .PP |
8176b81a MK |
49 | .BR munlock () |
50 | and | |
51 | .BR munlockall () | |
52 | perform the converse operation, | |
9f1eb9e0 | 53 | unlocking part or all of the calling process's virtual |
30ca8940 MK |
54 | address space, so that pages in the specified virtual address range may |
55 | once more to be swapped out if required by the kernel memory manager. | |
efeece04 | 56 | .PP |
30ca8940 | 57 | Memory locking and unlocking are performed in units of whole pages. |
7a3e8caa | 58 | .SS mlock(), mlock2(), and munlock() |
8176b81a MK |
59 | .BR mlock () |
60 | locks pages in the address range starting at | |
fea681da | 61 | .I addr |
8176b81a | 62 | and continuing for |
fea681da | 63 | .I len |
8176b81a | 64 | bytes. |
c13182ef | 65 | All pages that contain a part of the specified address range are |
8176b81a MK |
66 | guaranteed to be resident in RAM when the call returns successfully; |
67 | the pages are guaranteed to stay in RAM until later unlocked. | |
efeece04 | 68 | .PP |
7a3e8caa | 69 | .BR mlock2 () |
d357d179 MK |
70 | .\" commit a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e |
71 | .\" commit de60f5f10c58d4f34b68622442c0e04180367f3f | |
72 | .\" commit b0f205c2a3082dd9081f9a94e50658c5fa906ff1 | |
7a3e8caa EM |
73 | also locks pages in the specified range starting at |
74 | .I addr | |
75 | and continuing for | |
76 | .I len | |
77 | bytes. | |
78 | However, the state of the pages contained in that range after the call | |
79 | returns successfully will depend on the value in the | |
80 | .I flags | |
81 | argument. | |
efeece04 | 82 | .PP |
7a3e8caa EM |
83 | The |
84 | .I flags | |
85 | argument can be either 0 or the following constant: | |
d357d179 | 86 | .TP |
7a3e8caa | 87 | .B MLOCK_ONFAULT |
251c33e3 | 88 | Lock pages that are currently resident and mark the entire range so |
78e8660d | 89 | that the remaining nonresident pages are locked when they are populated |
251c33e3 | 90 | by a page fault. |
7a3e8caa | 91 | .PP |
efeece04 | 92 | .PP |
7a3e8caa EM |
93 | If |
94 | .I flags | |
95 | is 0, | |
96 | .BR mlock2 () | |
d357d179 MK |
97 | behaves exactly the same as |
98 | .BR mlock (). | |
efeece04 | 99 | .PP |
8176b81a MK |
100 | .BR munlock () |
101 | unlocks pages in the address range starting at | |
102 | .I addr | |
103 | and continuing for | |
104 | .I len | |
105 | bytes. | |
106 | After this call, all pages that contain a part of the specified | |
107 | memory range can be moved to external swap space again by the kernel. | |
73d8cece | 108 | .SS mlockall() and munlockall() |
8176b81a MK |
109 | .BR mlockall () |
110 | locks all pages mapped into the address space of the | |
c13182ef MK |
111 | calling process. |
112 | This includes the pages of the code, data and stack | |
8176b81a | 113 | segment, as well as shared libraries, user space kernel data, shared |
2706f299 | 114 | memory, and memory-mapped files. |
c13182ef | 115 | All mapped pages are guaranteed |
8176b81a MK |
116 | to be resident in RAM when the call returns successfully; |
117 | the pages are guaranteed to stay in RAM until later unlocked. | |
efeece04 | 118 | .PP |
8176b81a MK |
119 | The |
120 | .I flags | |
121 | argument is constructed as the bitwise OR of one or more of the | |
122 | following constants: | |
123 | .TP 1.2i | |
124 | .B MCL_CURRENT | |
125 | Lock all pages which are currently mapped into the address space of | |
126 | the process. | |
127 | .TP | |
128 | .B MCL_FUTURE | |
129 | Lock all pages which will become mapped into the address space of the | |
c13182ef | 130 | process in the future. |
7a3e8caa | 131 | These could be, for instance, new pages required |
9a141bfb | 132 | by a growing heap and stack as well as new memory-mapped files or |
8176b81a | 133 | shared memory regions. |
7a3e8caa EM |
134 | .TP |
135 | .BR MCL_ONFAULT " (since Linux 4.4)" | |
136 | Used together with | |
137 | .BR MCL_CURRENT , | |
138 | .BR MCL_FUTURE , | |
a0c1a1ef MK |
139 | or both. |
140 | Mark all current (with | |
7a3e8caa EM |
141 | .BR MCL_CURRENT ) |
142 | or future (with | |
143 | .BR MCL_FUTURE ) | |
a0c1a1ef MK |
144 | mappings to lock pages when they are faulted in. |
145 | When used with | |
7a3e8caa EM |
146 | .BR MCL_CURRENT , |
147 | all present pages are locked, but | |
148 | .BR mlockall () | |
a0c1a1ef MK |
149 | will not fault in non-present pages. |
150 | When used with | |
7a3e8caa EM |
151 | .BR MCL_FUTURE , |
152 | all future mappings will be marked to lock pages when they are faulted | |
153 | in, but they will not be populated by the lock when the mapping is | |
154 | created. | |
155 | .B MCL_ONFAULT | |
156 | must be used with either | |
157 | .B MCL_CURRENT | |
158 | or | |
159 | .B MCL_FUTURE | |
160 | or both. | |
8176b81a MK |
161 | .PP |
162 | If | |
163 | .B MCL_FUTURE | |
3aadaa65 MK |
164 | has been specified, then a later system call (e.g., |
165 | .BR mmap (2), | |
166 | .BR sbrk (2), | |
167 | .BR malloc (3)), | |
168 | may fail if it would cause the number of locked bytes to exceed | |
169 | the permitted maximum (see below). | |
170 | In the same circumstances, stack growth may likewise fail: | |
171 | the kernel will deny stack expansion and deliver a | |
0daa9e92 | 172 | .B SIGSEGV |
3aadaa65 | 173 | signal to the process. |
efeece04 | 174 | .PP |
8176b81a MK |
175 | .BR munlockall () |
176 | unlocks all pages mapped into the address space of the | |
177 | calling process. | |
47297adb | 178 | .SH RETURN VALUE |
723560ba | 179 | On success, these system calls return 0. |
a1d5f77c MK |
180 | On error, \-1 is returned, |
181 | .I errno | |
182 | is set appropriately, and no changes are made to any locks in the | |
183 | address space of the process. | |
184 | .SH ERRORS | |
185 | .TP | |
186 | .B ENOMEM | |
c7094399 | 187 | (Linux 2.6.9 and later) the caller had a nonzero |
a1d5f77c MK |
188 | .B RLIMIT_MEMLOCK |
189 | soft resource limit, but tried to lock more memory than the limit | |
190 | permitted. | |
191 | This limit is not enforced if the process is privileged | |
192 | .RB ( CAP_IPC_LOCK ). | |
193 | .TP | |
194 | .B ENOMEM | |
195 | (Linux 2.4 and earlier) the calling process tried to lock more than | |
196 | half of RAM. | |
197 | .\" In the case of mlock(), this check is somewhat buggy: it doesn't | |
198 | .\" take into account whether the to-be-locked range overlaps with | |
199 | .\" already locked pages. Thus, suppose we allocate | |
200 | .\" (num_physpages / 4 + 1) of memory, and lock those pages once using | |
201 | .\" mlock(), and then lock the *same* page range a second time. | |
202 | .\" In the case, the second mlock() call will fail, since the check | |
203 | .\" calculates that the process is trying to lock (num_physpages / 2 + 2) | |
204 | .\" pages, which of course is not true. (MTK, Nov 04, kernel 2.4.28) | |
205 | .TP | |
206 | .B EPERM | |
ec3cf7a4 | 207 | The caller is not privileged, but needs privilege |
a1d5f77c | 208 | .RB ( CAP_IPC_LOCK ) |
ec3cf7a4 | 209 | to perform the requested operation. |
a1d5f77c | 210 | .\"SVr4 documents an additional EAGAIN error code. |
dd3568a1 | 211 | .PP |
a1d5f77c | 212 | For |
7a3e8caa EM |
213 | .BR mlock (), |
214 | .BR mlock2 (), | |
a1d5f77c MK |
215 | and |
216 | .BR munlock (): | |
217 | .TP | |
901d59b5 MK |
218 | .B EAGAIN |
219 | Some or all of the specified address range could not be locked. | |
220 | .TP | |
a1d5f77c | 221 | .B EINVAL |
963da6c5 | 222 | The result of the addition |
fce0a699 | 223 | .IR addr + len |
963da6c5 | 224 | was less than |
fce0a699 | 225 | .IR addr |
4a7300fe | 226 | (e.g., the addition may have resulted in an overflow). |
a1d5f77c MK |
227 | .TP |
228 | .B EINVAL | |
229 | (Not on Linux) | |
230 | .I addr | |
231 | was not a multiple of the page size. | |
232 | .TP | |
233 | .B ENOMEM | |
234 | Some of the specified address range does not correspond to mapped | |
235 | pages in the address space of the process. | |
555c4452 MK |
236 | .TP |
237 | .B ENOMEM | |
238 | Locking or unlocking a region would result in the total number of | |
239 | mappings with distinct attributes (e.g., locked versus unlocked) | |
240 | exceeding the allowed maximum. | |
241 | .\" I.e., the number of VMAs would exceed the 64kB maximum | |
242 | (For example, unlocking a range in the middle of a currently locked | |
243 | mapping would result in three mappings: | |
244 | two locked mappings at each end and an unlocked mapping in the middle.) | |
dd3568a1 | 245 | .PP |
a1d5f77c | 246 | For |
7a3e8caa | 247 | .BR mlock2 (): |
a1d5f77c MK |
248 | .TP |
249 | .B EINVAL | |
250 | Unknown \fIflags\fP were specified. | |
dd3568a1 | 251 | .PP |
a1d5f77c | 252 | For |
7a3e8caa EM |
253 | .BR mlockall (): |
254 | .TP | |
255 | .B EINVAL | |
256 | Unknown \fIflags\fP were specified or | |
257 | .B MCL_ONFAULT | |
258 | was specified without either | |
259 | .B MCL_FUTURE | |
260 | or | |
261 | .BR MCL_CURRENT . | |
dd3568a1 | 262 | .PP |
7a3e8caa | 263 | For |
a1d5f77c MK |
264 | .BR munlockall (): |
265 | .TP | |
266 | .B EPERM | |
267 | (Linux 2.6.8 and earlier) The caller was not privileged | |
268 | .RB ( CAP_IPC_LOCK ). | |
d357d179 | 269 | .SH VERSIONS |
9187f1a3 | 270 | .BR mlock2 () |
39874d76 MK |
271 | is available since Linux 4.4; |
272 | glibc support was added in version 2.27. | |
47297adb | 273 | .SH CONFORMING TO |
ace101f9 | 274 | POSIX.1-2001, POSIX.1-2008, SVr4. |
efeece04 | 275 | .PP |
9e5de5da MK |
276 | mlock2 () |
277 | is Linux specific. | |
a1d5f77c MK |
278 | .SH AVAILABILITY |
279 | On POSIX systems on which | |
280 | .BR mlock () | |
281 | and | |
282 | .BR munlock () | |
283 | are available, | |
284 | .B _POSIX_MEMLOCK_RANGE | |
c84371c6 | 285 | is defined in \fI<unistd.h>\fP and the number of bytes in a page |
a1d5f77c MK |
286 | can be determined from the constant |
287 | .B PAGESIZE | |
c84371c6 | 288 | (if defined) in \fI<limits.h>\fP or by calling |
a1d5f77c | 289 | .IR sysconf(_SC_PAGESIZE) . |
efeece04 | 290 | .PP |
a1d5f77c MK |
291 | On POSIX systems on which |
292 | .BR mlockall () | |
293 | and | |
294 | .BR munlockall () | |
295 | are available, | |
296 | .B _POSIX_MEMLOCK | |
6387216b MK |
297 | is defined in \fI<unistd.h>\fP to a value greater than 0. |
298 | (See also | |
a1d5f77c MK |
299 | .BR sysconf (3).) |
300 | .\" POSIX.1-2001: It shall be defined to -1 or 0 or 200112L. | |
301 | .\" -1: unavailable, 0: ask using sysconf(). | |
302 | .\" glibc defines it to 1. | |
47297adb | 303 | .SH NOTES |
fea681da | 304 | Memory locking has two main applications: real-time algorithms and |
c13182ef MK |
305 | high-security data processing. |
306 | Real-time applications require | |
fea681da | 307 | deterministic timing, and, like scheduling, paging is one major cause |
c13182ef MK |
308 | of unexpected program execution delays. |
309 | Real-time applications will | |
8176b81a MK |
310 | usually also switch to a real-time scheduler with |
311 | .BR sched_setscheduler (2). | |
fea681da | 312 | Cryptographic security software often handles critical bytes like |
c13182ef MK |
313 | passwords or secret keys as data structures. |
314 | As a result of paging, | |
fea681da MK |
315 | these secrets could be transferred onto a persistent swap store medium, |
316 | where they might be accessible to the enemy long after the security | |
317 | software has erased the secrets in RAM and terminated. | |
318 | (But be aware that the suspend mode on laptops and some desktop | |
319 | computers will save a copy of the system's RAM to disk, regardless | |
320 | of memory locks.) | |
efeece04 | 321 | .PP |
8176b81a MK |
322 | Real-time processes that are using |
323 | .BR mlockall () | |
324 | to prevent delays on page faults should reserve enough | |
325 | locked stack pages before entering the time-critical section, | |
326 | so that no page fault can be caused by function calls. | |
c13182ef MK |
327 | This can be achieved by calling a function that allocates a |
328 | sufficiently large automatic variable (an array) and writes to the | |
329 | memory occupied by this array in order to touch these stack pages. | |
8176b81a | 330 | This way, enough pages will be mapped for the stack and can be |
c13182ef MK |
331 | locked into RAM. |
332 | The dummy writes ensure that not even copy-on-write | |
8176b81a | 333 | page faults can occur in the critical section. |
efeece04 | 334 | .PP |
8176b81a MK |
335 | Memory locks are not inherited by a child created via |
336 | .BR fork (2) | |
337 | and are automatically removed (unlocked) during an | |
338 | .BR execve (2) | |
339 | or when the process terminates. | |
ec3b7804 MK |
340 | The |
341 | .BR mlockall () | |
342 | .B MCL_FUTURE | |
7a3e8caa EM |
343 | and |
344 | .B MCL_FUTURE | MCL_ONFAULT | |
345 | settings are not inherited by a child created via | |
ec3b7804 | 346 | .BR fork (2) |
7a3e8caa | 347 | and are cleared during an |
ec3b7804 | 348 | .BR execve (2). |
efeece04 | 349 | .PP |
fb08a095 SAS |
350 | Note that |
351 | .BR fork (2) | |
cbac502b MK |
352 | will prepare the address space for a copy-on-write operation. |
353 | The consequence is that any write access that follows will cause | |
354 | a page fault that in turn may cause high latencies for a real-time process. | |
355 | Therefore, it is crucial not to invoke | |
fb08a095 | 356 | .BR fork (2) |
cbac502b | 357 | after an |
fb08a095 SAS |
358 | .BR mlockall () |
359 | or | |
360 | .BR mlock () | |
cbac502b MK |
361 | operation\(emnot even from a thread which runs at a low priority within |
362 | a process which also has a thread running at elevated priority. | |
efeece04 | 363 | .PP |
8176b81a MK |
364 | The memory lock on an address range is automatically removed |
365 | if the address range is unmapped via | |
366 | .BR munmap (2). | |
efeece04 | 367 | .PP |
75b94dc3 | 368 | Memory locks do not stack, that is, pages which have been locked several times |
fea681da | 369 | by calls to |
7a3e8caa EM |
370 | .BR mlock (), |
371 | .BR mlock2 (), | |
fea681da | 372 | or |
8176b81a | 373 | .BR mlockall () |
fea681da | 374 | will be unlocked by a single call to |
8176b81a | 375 | .BR munlock () |
fea681da | 376 | for the corresponding range or by |
e511ffb6 | 377 | .BR munlockall (). |
fea681da MK |
378 | Pages which are mapped to several locations or by several processes stay |
379 | locked into RAM as long as they are locked at least at one location or by | |
380 | at least one process. | |
efeece04 | 381 | .PP |
7a3e8caa EM |
382 | If a call to |
383 | .BR mlockall () | |
384 | which uses the | |
385 | .B MCL_FUTURE | |
386 | flag is followed by another call that does not specify this flag, the | |
387 | changes made by the | |
388 | .B MCL_FUTURE | |
389 | call will be lost. | |
efeece04 | 390 | .PP |
f97c12d1 MK |
391 | The |
392 | .BR mlock2 () | |
393 | .B MLOCK_ONFAULT | |
394 | flag and the | |
395 | .BR mlockall () | |
396 | .B MCL_ONFAULT | |
397 | flag allow efficient memory locking for applications that deal with | |
398 | large mappings where only a (small) portion of pages in the mapping are touched. | |
399 | In such cases, locking all of the pages in a mapping would incur | |
400 | a significant penalty for memory locking. | |
c634028a | 401 | .SS Linux notes |
8176b81a | 402 | Under Linux, |
7a3e8caa EM |
403 | .BR mlock (), |
404 | .BR mlock2 (), | |
fea681da | 405 | and |
8176b81a MK |
406 | .BR munlock () |
407 | automatically round | |
fea681da | 408 | .I addr |
8176b81a | 409 | down to the nearest page boundary. |
d357d179 MK |
410 | However, the POSIX.1 specification of |
411 | .BR mlock () | |
412 | and | |
413 | .BR munlock () | |
414 | allows an implementation to require that | |
fea681da MK |
415 | .I addr |
416 | is page aligned, so portable applications should ensure this. | |
efeece04 | 417 | .PP |
ec8bcce2 MK |
418 | The |
419 | .I VmLck | |
420 | field of the Linux-specific | |
750653a8 | 421 | .I /proc/[pid]/status |
e5eb406c MK |
422 | file shows how many kilobytes of memory the process with ID |
423 | .I PID | |
424 | has locked using | |
ec8bcce2 | 425 | .BR mlock (), |
7a3e8caa | 426 | .BR mlock2 (), |
ec8bcce2 | 427 | .BR mlockall (), |
ec8bcce2 MK |
428 | and |
429 | .BR mmap (2) | |
430 | .BR MAP_LOCKED . | |
73d8cece | 431 | .SS Limits and permissions |
8176b81a MK |
432 | In Linux 2.6.8 and earlier, |
433 | a process must be privileged | |
434 | .RB ( CAP_IPC_LOCK ) | |
435 | in order to lock memory and the | |
436 | .B RLIMIT_MEMLOCK | |
437 | soft resource limit defines a limit on how much memory the process may lock. | |
efeece04 | 438 | .PP |
8176b81a MK |
439 | Since Linux 2.6.9, no limits are placed on the amount of memory |
440 | that a privileged process can lock and the | |
441 | .B RLIMIT_MEMLOCK | |
442 | soft resource limit instead defines a limit on how much memory an | |
443 | unprivileged process may lock. | |
47297adb | 444 | .SH BUGS |
4a8449e1 MK |
445 | In Linux 4.8 and earlier, |
446 | a bug in the kernel's accounting of locked memory for unprivileged processes | |
447 | (i.e., without | |
448 | .BR CAP_IPC_LOCK ) | |
449 | meant that if the region specified by | |
450 | .I addr | |
451 | and | |
452 | .I len | |
453 | overlapped an existing lock, | |
454 | then the already locked bytes in the overlapping region were counted twice | |
455 | when checking against the limit. | |
456 | Such double accounting could incorrectly calculate a "total locked memory" | |
457 | value for the process that exceeded the | |
458 | .BR RLIMIT_MEMLOCK | |
459 | limit, with the result that | |
460 | .BR mlock () | |
461 | and | |
6614e292 | 462 | .BR mlock2 () |
4a8449e1 MK |
463 | would fail on requests that should have succeeded. |
464 | This bug was fixed | |
465 | .\" commit 0cf2f6f6dc605e587d2c1120f295934c77e810e8 | |
466 | in Linux 4.9 | |
efeece04 | 467 | .PP |
8176b81a MK |
468 | In the 2.4 series Linux kernels up to and including 2.4.17, |
469 | a bug caused the | |
470 | .BR mlockall () | |
471 | .B MCL_FUTURE | |
472 | flag to be inherited across a | |
473 | .BR fork (2). | |
474 | This was rectified in kernel 2.4.18. | |
efeece04 | 475 | .PP |
c13182ef | 476 | Since kernel 2.6.9, if a privileged process calls |
083ddbc2 | 477 | .I mlockall(MCL_FUTURE) |
c13182ef MK |
478 | and later drops privileges (loses the |
479 | .B CAP_IPC_LOCK | |
480 | capability by, for example, | |
c7094399 | 481 | setting its effective UID to a nonzero value), |
c13182ef | 482 | then subsequent memory allocations (e.g., |
083ddbc2 MK |
483 | .BR mmap (2), |
484 | .BR brk (2)) | |
c13182ef MK |
485 | will fail if the |
486 | .B RLIMIT_MEMLOCK | |
083ddbc2 MK |
487 | resource limit is encountered. |
488 | .\" See the following LKML thread: | |
489 | .\" http://marc.theaimsgroup.com/?l=linux-kernel&m=113801392825023&w=2 | |
490 | .\" "Rationale for RLIMIT_MEMLOCK" | |
491 | .\" 23 Jan 2006 | |
47297adb | 492 | .SH SEE ALSO |
bb1639a2 | 493 | .BR mincore (2), |
8176b81a | 494 | .BR mmap (2), |
fea681da | 495 | .BR setrlimit (2), |
f0c34053 | 496 | .BR shmctl (2), |
8176b81a | 497 | .BR sysconf (3), |
ec8bcce2 | 498 | .BR proc (5), |
fea681da | 499 | .BR capabilities (7) |