]>
Commit | Line | Data |
---|---|---|
a2dd6388 MK |
1 | .\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com> |
2 | .\" A very few fragments remain from an earlier page written by | |
3 | .\" Werner Almesberger in 2000 | |
2297bf0e | 4 | .\" |
5fbde956 | 5 | .\" SPDX-License-Identifier: Linux-man-pages-copyleft |
fea681da | 6 | .\" |
4c1c5274 | 7 | .TH pivot_root 2 (date) "Linux man-pages (unreleased)" |
fea681da | 8 | .SH NAME |
0843016c | 9 | pivot_root \- change the root mount |
e69cfee8 AC |
10 | .SH LIBRARY |
11 | Standard C library | |
8fc3b2cf | 12 | .RI ( libc ", " \-lc ) |
fea681da | 13 | .SH SYNOPSIS |
c7db92b9 | 14 | .nf |
3e67d1a7 AC |
15 | .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */" |
16 | .B #include <unistd.h> | |
17 | .PP | |
18 | .BI "int syscall(SYS_pivot_root, const char *" new_root \ | |
19 | ", const char *" put_old ); | |
c7db92b9 | 20 | .fi |
dbfe9c70 | 21 | .PP |
45c99e3e | 22 | .IR Note : |
3e67d1a7 AC |
23 | glibc provides no wrapper for |
24 | .BR pivot_root (), | |
25 | necessitating the use of | |
26 | .BR syscall (2). | |
fea681da | 27 | .SH DESCRIPTION |
60a90ecd | 28 | .BR pivot_root () |
0843016c MK |
29 | changes the root mount in the mount namespace of the calling process. |
30 | More precisely, it moves the root mount to the | |
31 | directory \fIput_old\fP and makes \fInew_root\fP the new root mount. | |
fdc558bd MK |
32 | The calling process must have the |
33 | .B CAP_SYS_ADMIN | |
34 | capability in the user namespace that owns the caller's mount namespace. | |
efeece04 | 35 | .PP |
60a90ecd | 36 | .BR pivot_root () |
81b24320 MK |
37 | changes the root directory and the current working directory |
38 | of each process or thread in the same mount namespace to | |
39 | .I new_root | |
40 | if they point to the old root directory. | |
682e1329 MK |
41 | (See also NOTES.) |
42 | On the other hand, | |
43 | .BR pivot_root () | |
44 | does not change the caller's current working directory | |
45 | (unless it is on the old root directory), | |
46 | and thus it should be followed by a | |
47 | \fBchdir("/")\fP call. | |
efeece04 | 48 | .PP |
41d4557c | 49 | The following restrictions apply: |
22356d97 | 50 | .IP \(bu 3 |
1ae6b2c7 | 51 | .I new_root |
41d4557c | 52 | and |
1ae6b2c7 | 53 | .I put_old |
41d4557c | 54 | must be directories. |
22356d97 | 55 | .IP \(bu |
33313a26 MK |
56 | .I new_root |
57 | and | |
58 | .I put_old | |
59 | must not be on the same mount as the current root. | |
22356d97 | 60 | .IP \(bu |
57bab66a | 61 | \fIput_old\fP must be at or underneath \fInew_root\fP; |
87529800 | 62 | that is, adding some nonnegative |
d5b48568 | 63 | number of "\fI/..\fP" suffixes to the pathname pointed to by |
87529800 MK |
64 | .I put_old |
65 | must yield the same directory as \fInew_root\fP. | |
22356d97 | 66 | .IP \(bu |
37704bfc | 67 | .I new_root |
666373fc | 68 | must be a path to a mount point, but can't be |
9f3af6b8 | 69 | .IR """/""" . |
666373fc MK |
70 | A path that is not already a mount point can be converted into one by |
71 | bind mounting the path onto itself. | |
22356d97 | 72 | .IP \(bu |
d4b2104a | 73 | The propagation type of the parent mount of |
1ae6b2c7 | 74 | .I new_root |
d4b2104a | 75 | and the parent mount of the current root directory must not be |
a39e880f MK |
76 | .BR MS_SHARED ; |
77 | similarly, if | |
78 | .I put_old | |
79 | is an existing mount point, its propagation type must not be | |
1a0b1fd7 | 80 | .BR MS_SHARED . |
9d33e03b MK |
81 | These restrictions ensure that |
82 | .BR pivot_root () | |
83 | never propagates any changes to another mount namespace. | |
22356d97 | 84 | .IP \(bu |
eb9078a7 | 85 | The current root directory must be a mount point. |
47297adb | 86 | .SH RETURN VALUE |
c13182ef MK |
87 | On success, zero is returned. |
88 | On error, \-1 is returned, and | |
f6a4078b | 89 | \fIerrno\fP is set to indicate the error. |
fea681da | 90 | .SH ERRORS |
60a90ecd | 91 | .BR pivot_root () |
5f5751d3 | 92 | may fail with any of the same errors as |
60a90ecd | 93 | .BR stat (2). |
5f5751d3 | 94 | Additionally, it may fail with the following errors: |
fea681da MK |
95 | .TP |
96 | .B EBUSY | |
b647c4c9 MK |
97 | .\" Reconfirmed that the following error occurs on Linux 5.0 by |
98 | .\" specifying 'new_root' as "/rootfs" and 'put_old' as | |
99 | .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of | |
100 | .\" itself. Of course, this is an odd situation, since a later check | |
101 | .\" in the kernel code will in any case yield EINVAL if 'new_root' is | |
102 | .\" not a mount point. However, when the system call was first added, | |
bf421740 | 103 | .\" 'new_root' was not required to be a mount point. So, this |
b647c4c9 MK |
104 | .\" error is nowadays probably just the result of crufty accumulation. |
105 | .\" This error can also occur if we bind mount "/" on top of itself | |
106 | .\" and try to specify "/" as the 'new' (again, an odd situation). So, | |
107 | .\" the EBUSY check in the kernel does still seem necessary to prevent | |
108 | .\" that case. Furthermore, the "or put_old" piece is probably | |
109 | .\" redundant text (although the check is in the kernel), since, | |
110 | .\" in another check, 'put_old' is required to be under 'new_root'. | |
111 | .I new_root | |
112 | or | |
113 | .I put_old | |
ba4b07c3 | 114 | is on the current root mount. |
b647c4c9 MK |
115 | (This error covers the pathological case where |
116 | .I new_root | |
117 | is | |
118 | .IR """/""" .) | |
fea681da MK |
119 | .TP |
120 | .B EINVAL | |
37704bfc MK |
121 | .I new_root |
122 | is not a mount point. | |
123 | .TP | |
124 | .B EINVAL | |
542175d8 | 125 | \fIput_old\fP is not at or underneath \fInew_root\fP. |
fea681da | 126 | .TP |
dc9b6c92 | 127 | .B EINVAL |
eb9078a7 MK |
128 | The current root directory is not a mount point |
129 | (because of an earlier | |
130 | .BR chroot (2)). | |
131 | .TP | |
132 | .B EINVAL | |
ba4b07c3 | 133 | The current root is on the rootfs (initial ramfs) mount; see NOTES. |
dc9b6c92 | 134 | .TP |
1a0b1fd7 MK |
135 | .B EINVAL |
136 | Either the mount point at | |
137 | .IR new_root , | |
138 | or the parent mount of that mount point, | |
139 | has propagation type | |
140 | .BR MS_SHARED . | |
141 | .TP | |
a39e880f MK |
142 | .B EINVAL |
143 | .I put_old | |
144 | is a mount point and has the propagation type | |
145 | .BR MS_SHARED . | |
146 | .TP | |
fea681da MK |
147 | .B ENOTDIR |
148 | \fInew_root\fP or \fIput_old\fP is not a directory. | |
149 | .TP | |
150 | .B EPERM | |
edd1fa35 | 151 | The calling process does not have the |
fea681da MK |
152 | .B CAP_SYS_ADMIN |
153 | capability. | |
a1d5f77c MK |
154 | .SH VERSIONS |
155 | .BR pivot_root () | |
156 | was introduced in Linux 2.3.41. | |
3113c7f3 | 157 | .SH STANDARDS |
a1d5f77c | 158 | .BR pivot_root () |
8382f16d | 159 | is Linux-specific and hence is not portable. |
f5b03186 | 160 | .SH NOTES |
14caaed2 MK |
161 | A command-line interface for this system call is provided by |
162 | .BR pivot_root (8). | |
163 | .PP | |
422e36b7 MK |
164 | .BR pivot_root () |
165 | allows the caller to switch to a new root filesystem while at the same time | |
166 | placing the old root mount at a location under | |
167 | .I new_root | |
168 | from where it can subsequently be unmounted. | |
169 | (The fact that it moves all processes that have a root directory | |
b27d444f MK |
170 | or current working directory on the old root directory to the |
171 | new root frees the old root directory of users, | |
33313a26 | 172 | allowing the old root mount to be unmounted more easily.) |
c4bf3333 | 173 | .PP |
87529800 | 174 | One use of |
422e36b7 MK |
175 | .BR pivot_root () |
176 | is during system startup, when the | |
87529800 MK |
177 | system mounts a temporary root filesystem (e.g., an |
178 | .BR initrd (4)), | |
179 | then mounts the real root filesystem, and eventually turns the latter into | |
180 | the root directory of all relevant processes and threads. | |
422e36b7 MK |
181 | A modern use is to set up a root filesystem during |
182 | the creation of a container. | |
183 | .PP | |
fc2f474d MK |
184 | The fact that |
185 | .BR pivot_root () | |
186 | modifies process root and current working directories in the | |
187 | manner noted in DESCRIPTION | |
188 | is necessary in order to prevent kernel threads from keeping the old | |
87529800 | 189 | root mount busy with their root and current working directories, |
fc2f474d MK |
190 | even if they never access |
191 | the filesystem in any way. | |
fc2f474d | 192 | .PP |
97076c5a MK |
193 | The rootfs (initial ramfs) cannot be |
194 | .BR pivot_root ()ed. | |
195 | The recommended method of changing the root filesystem in this case is | |
196 | to delete everything in rootfs, overmount rootfs with the new root, attach | |
197 | .IR stdin / stdout / stderr | |
198 | to the new | |
199 | .IR /dev/console , | |
200 | and exec the new | |
201 | .BR init (1). | |
202 | Helper programs for this process exist; see | |
203 | .BR switch_root (8). | |
3db820fe MK |
204 | .\" |
205 | .SS pivot_root(\(dq.\(dq, \(dq.\(dq) | |
57bab66a MK |
206 | .I new_root |
207 | and | |
208 | .I put_old | |
209 | may be the same directory. | |
210 | In particular, the following sequence allows a pivot-root operation | |
211 | without needing to create and remove a temporary directory: | |
212 | .PP | |
213 | .in +4n | |
214 | .EX | |
215 | chdir(new_root); | |
216 | pivot_root(".", "."); | |
217 | umount2(".", MNT_DETACH); | |
218 | .EE | |
219 | .in | |
220 | .PP | |
221 | This sequence succeeds because the | |
222 | .BR pivot_root () | |
223 | call stacks the old root mount point | |
57bab66a MK |
224 | on top of the new root mount point at |
225 | .IR / . | |
226 | At that point, the calling process's root directory and current | |
227 | working directory refer to the new root mount point | |
228 | .RI ( new_root ). | |
229 | During the subsequent | |
230 | .BR umount () | |
231 | call, resolution of | |
1ae6b2c7 | 232 | .I """.""" |
57bab66a MK |
233 | starts with |
234 | .I new_root | |
235 | and then moves up the list of mounts stacked at | |
236 | .IR / , | |
8f2a9129 | 237 | with the result that old root mount point is unmounted. |
01c64c3b MK |
238 | .\" |
239 | .SS Historical notes | |
240 | For many years, this manual page carried the following text: | |
241 | .RS | |
242 | .PP | |
243 | .BR pivot_root () | |
244 | may or may not change the current root and the current | |
245 | working directory of any processes or threads which use the old | |
246 | root directory. | |
247 | The caller of | |
248 | .BR pivot_root () | |
249 | must ensure that processes with root or current working directory | |
250 | at the old root operate correctly in either case. | |
251 | An easy way to ensure this is to change their | |
252 | root and current working directory to \fInew_root\fP before invoking | |
253 | .BR pivot_root (). | |
254 | .RE | |
255 | .PP | |
256 | This text, written before the system call implementation was | |
257 | even finalized in the kernel, was probably intended to warn users | |
258 | at that time that the implementation might change before final release. | |
259 | However, the behavior stated in DESCRIPTION | |
260 | has remained consistent since this system call | |
261 | was first implemented and will not change now. | |
a14af333 | 262 | .SH EXAMPLES |
47b69a37 MK |
263 | .\" FIXME |
264 | .\" Would it be better, because simpler, to use unshare(2) | |
265 | .\" rather than clone(2) in the example below? | |
2f2e1a22 MK |
266 | The program below demonstrates the use of |
267 | .BR pivot_root () | |
268 | inside a mount namespace that is created using | |
269 | .BR clone (2). | |
270 | After pivoting to the root directory named in the program's | |
271 | first command-line argument, the child created by | |
272 | .BR clone (2) | |
273 | then executes the program named in the remaining command-line arguments. | |
274 | .PP | |
275 | We demonstrate the program by creating a directory that will serve as | |
276 | the new root filesystem and placing a copy of the (statically linked) | |
277 | .BR busybox (1) | |
278 | executable in that directory. | |
279 | .PP | |
280 | .in +4n | |
281 | .EX | |
282 | $ \fBmkdir /tmp/rootfs\fP | |
283 | $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory | |
284 | 319459 /tmp/rootfs | |
285 | $ \fBcp $(which busybox) /tmp/rootfs\fP | |
861d36ba | 286 | $ \fBPS1=\(aqbbsh$ \(aq sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP |
2f2e1a22 MK |
287 | bbsh$ \fBPATH=/\fP |
288 | bbsh$ \fBbusybox ln busybox ln\fP | |
289 | bbsh$ \fBln busybox echo\fP | |
290 | bbsh$ \fBln busybox ls\fP | |
291 | bbsh$ \fBls\fP | |
292 | busybox echo ln ls | |
293 | bbsh$ \fBls \-id /\fP # Compare with inode number above | |
294 | 319459 / | |
295 | bbsh$ \fBecho \(aqhello world\(aq\fP | |
296 | hello world | |
297 | .EE | |
298 | .in | |
299 | .SS Program source | |
300 | \& | |
301 | .PP | |
33857069 | 302 | .\" SRC BEGIN (pivot_root.c) |
2f2e1a22 MK |
303 | .EX |
304 | /* pivot_root_demo.c */ | |
305 | ||
306 | #define _GNU_SOURCE | |
5a5208c1 | 307 | #include <err.h> |
80ae7514 | 308 | #include <limits.h> |
2f2e1a22 | 309 | #include <sched.h> |
80ae7514 | 310 | #include <signal.h> |
2f2e1a22 MK |
311 | #include <stdio.h> |
312 | #include <stdlib.h> | |
80ae7514 | 313 | #include <sys/mman.h> |
2f2e1a22 MK |
314 | #include <sys/mount.h> |
315 | #include <sys/stat.h> | |
80ae7514 AC |
316 | #include <sys/syscall.h> |
317 | #include <sys/wait.h> | |
318 | #include <unistd.h> | |
2f2e1a22 | 319 | |
2f2e1a22 MK |
320 | static int |
321 | pivot_root(const char *new_root, const char *put_old) | |
322 | { | |
323 | return syscall(SYS_pivot_root, new_root, put_old); | |
324 | } | |
325 | ||
326 | #define STACK_SIZE (1024 * 1024) | |
327 | ||
328 | static int /* Startup function for cloned child */ | |
329 | child(void *arg) | |
330 | { | |
0b94bd78 AC |
331 | char path[PATH_MAX]; |
332 | char **args = arg; | |
333 | char *new_root = args[0]; | |
334 | const char *put_old = "/oldrootfs"; | |
2f2e1a22 MK |
335 | |
336 | /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have | |
337 | shared propagation (which would cause pivot_root() to | |
338 | return an error), and prevent propagation of mount | |
c6beb8a1 | 339 | events to the initial mount namespace. */ |
2f2e1a22 | 340 | |
32a72b3e | 341 | if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == \-1) |
5a5208c1 | 342 | err(EXIT_FAILURE, "mount\-MS_PRIVATE"); |
2f2e1a22 | 343 | |
c6beb8a1 | 344 | /* Ensure that \(aqnew_root\(aq is a mount point. */ |
2f2e1a22 MK |
345 | |
346 | if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1) | |
5a5208c1 | 347 | err(EXIT_FAILURE, "mount\-MS_BIND"); |
2f2e1a22 | 348 | |
c6beb8a1 | 349 | /* Create directory to which old root will be pivoted. */ |
2f2e1a22 MK |
350 | |
351 | snprintf(path, sizeof(path), "%s/%s", new_root, put_old); | |
352 | if (mkdir(path, 0777) == \-1) | |
5a5208c1 | 353 | err(EXIT_FAILURE, "mkdir"); |
2f2e1a22 | 354 | |
c6beb8a1 | 355 | /* And pivot the root filesystem. */ |
2f2e1a22 MK |
356 | |
357 | if (pivot_root(new_root, path) == \-1) | |
5a5208c1 | 358 | err(EXIT_FAILURE, "pivot_root"); |
2f2e1a22 | 359 | |
c6beb8a1 | 360 | /* Switch the current working directory to "/". */ |
2f2e1a22 MK |
361 | |
362 | if (chdir("/") == \-1) | |
5a5208c1 | 363 | err(EXIT_FAILURE, "chdir"); |
2f2e1a22 | 364 | |
c6beb8a1 | 365 | /* Unmount old root and remove mount point. */ |
2f2e1a22 MK |
366 | |
367 | if (umount2(put_old, MNT_DETACH) == \-1) | |
368 | perror("umount2"); | |
369 | if (rmdir(put_old) == \-1) | |
370 | perror("rmdir"); | |
371 | ||
372 | /* Execute the command specified in argv[1]... */ | |
373 | ||
374 | execv(args[1], &args[1]); | |
5a5208c1 | 375 | err(EXIT_FAILURE, "execv"); |
2f2e1a22 MK |
376 | } |
377 | ||
378 | int | |
379 | main(int argc, char *argv[]) | |
380 | { | |
0b94bd78 AC |
381 | char *stack; |
382 | ||
c6beb8a1 | 383 | /* Create a child process in a new mount namespace. */ |
2f2e1a22 | 384 | |
0b94bd78 AC |
385 | stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, |
386 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0); | |
1b547316 | 387 | if (stack == MAP_FAILED) |
5a5208c1 | 388 | err(EXIT_FAILURE, "mmap"); |
2f2e1a22 MK |
389 | |
390 | if (clone(child, stack + STACK_SIZE, | |
4687ab0e | 391 | CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1) |
5a5208c1 | 392 | err(EXIT_FAILURE, "clone"); |
2f2e1a22 | 393 | |
c6beb8a1 | 394 | /* Parent falls through to here; wait for child. */ |
2f2e1a22 MK |
395 | |
396 | if (wait(NULL) == \-1) | |
5a5208c1 | 397 | err(EXIT_FAILURE, "wait"); |
2f2e1a22 MK |
398 | |
399 | exit(EXIT_SUCCESS); | |
400 | } | |
401 | .EE | |
33857069 | 402 | .\" SRC END |
47297adb | 403 | .SH SEE ALSO |
fea681da MK |
404 | .BR chdir (2), |
405 | .BR chroot (2), | |
34a0f19c | 406 | .BR mount (2), |
fea681da MK |
407 | .BR stat (2), |
408 | .BR initrd (4), | |
f42778c4 | 409 | .BR mount_namespaces (7), |
b2bced6d MK |
410 | .BR pivot_root (8), |
411 | .BR switch_root (8) |