]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/pivot_root.2
Many pages: Use correct letter case in page titles (TH)
[thirdparty/man-pages.git] / man2 / pivot_root.2
CommitLineData
a2dd6388
MK
1.\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2.\" A very few fragments remain from an earlier page written by
3.\" Werner Almesberger in 2000
2297bf0e 4.\"
5fbde956 5.\" SPDX-License-Identifier: Linux-man-pages-copyleft
fea681da 6.\"
4c1c5274 7.TH pivot_root 2 (date) "Linux man-pages (unreleased)"
fea681da 8.SH NAME
0843016c 9pivot_root \- change the root mount
e69cfee8
AC
10.SH LIBRARY
11Standard C library
8fc3b2cf 12.RI ( libc ", " \-lc )
fea681da 13.SH SYNOPSIS
c7db92b9 14.nf
3e67d1a7
AC
15.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
16.B #include <unistd.h>
17.PP
18.BI "int syscall(SYS_pivot_root, const char *" new_root \
19", const char *" put_old );
c7db92b9 20.fi
dbfe9c70 21.PP
45c99e3e 22.IR Note :
3e67d1a7
AC
23glibc provides no wrapper for
24.BR pivot_root (),
25necessitating the use of
26.BR syscall (2).
fea681da 27.SH DESCRIPTION
60a90ecd 28.BR pivot_root ()
0843016c
MK
29changes the root mount in the mount namespace of the calling process.
30More precisely, it moves the root mount to the
31directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
fdc558bd
MK
32The calling process must have the
33.B CAP_SYS_ADMIN
34capability in the user namespace that owns the caller's mount namespace.
efeece04 35.PP
60a90ecd 36.BR pivot_root ()
81b24320
MK
37changes the root directory and the current working directory
38of each process or thread in the same mount namespace to
39.I new_root
40if they point to the old root directory.
682e1329
MK
41(See also NOTES.)
42On the other hand,
43.BR pivot_root ()
44does not change the caller's current working directory
45(unless it is on the old root directory),
46and thus it should be followed by a
47\fBchdir("/")\fP call.
efeece04 48.PP
41d4557c 49The following restrictions apply:
22356d97 50.IP \(bu 3
1ae6b2c7 51.I new_root
41d4557c 52and
1ae6b2c7 53.I put_old
41d4557c 54must be directories.
22356d97 55.IP \(bu
33313a26
MK
56.I new_root
57and
58.I put_old
59must not be on the same mount as the current root.
22356d97 60.IP \(bu
57bab66a 61\fIput_old\fP must be at or underneath \fInew_root\fP;
87529800 62that is, adding some nonnegative
d5b48568 63number of "\fI/..\fP" suffixes to the pathname pointed to by
87529800
MK
64.I put_old
65must yield the same directory as \fInew_root\fP.
22356d97 66.IP \(bu
37704bfc 67.I new_root
666373fc 68must be a path to a mount point, but can't be
9f3af6b8 69.IR """/""" .
666373fc
MK
70A path that is not already a mount point can be converted into one by
71bind mounting the path onto itself.
22356d97 72.IP \(bu
d4b2104a 73The propagation type of the parent mount of
1ae6b2c7 74.I new_root
d4b2104a 75and the parent mount of the current root directory must not be
a39e880f
MK
76.BR MS_SHARED ;
77similarly, if
78.I put_old
79is an existing mount point, its propagation type must not be
1a0b1fd7 80.BR MS_SHARED .
9d33e03b
MK
81These restrictions ensure that
82.BR pivot_root ()
83never propagates any changes to another mount namespace.
22356d97 84.IP \(bu
eb9078a7 85The current root directory must be a mount point.
47297adb 86.SH RETURN VALUE
c13182ef
MK
87On success, zero is returned.
88On error, \-1 is returned, and
f6a4078b 89\fIerrno\fP is set to indicate the error.
fea681da 90.SH ERRORS
60a90ecd 91.BR pivot_root ()
5f5751d3 92may fail with any of the same errors as
60a90ecd 93.BR stat (2).
5f5751d3 94Additionally, it may fail with the following errors:
fea681da
MK
95.TP
96.B EBUSY
b647c4c9
MK
97.\" Reconfirmed that the following error occurs on Linux 5.0 by
98.\" specifying 'new_root' as "/rootfs" and 'put_old' as
99.\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
100.\" itself. Of course, this is an odd situation, since a later check
101.\" in the kernel code will in any case yield EINVAL if 'new_root' is
102.\" not a mount point. However, when the system call was first added,
bf421740 103.\" 'new_root' was not required to be a mount point. So, this
b647c4c9
MK
104.\" error is nowadays probably just the result of crufty accumulation.
105.\" This error can also occur if we bind mount "/" on top of itself
106.\" and try to specify "/" as the 'new' (again, an odd situation). So,
107.\" the EBUSY check in the kernel does still seem necessary to prevent
108.\" that case. Furthermore, the "or put_old" piece is probably
109.\" redundant text (although the check is in the kernel), since,
110.\" in another check, 'put_old' is required to be under 'new_root'.
111.I new_root
112or
113.I put_old
ba4b07c3 114is on the current root mount.
b647c4c9
MK
115(This error covers the pathological case where
116.I new_root
117is
118.IR """/""" .)
fea681da
MK
119.TP
120.B EINVAL
37704bfc
MK
121.I new_root
122is not a mount point.
123.TP
124.B EINVAL
542175d8 125\fIput_old\fP is not at or underneath \fInew_root\fP.
fea681da 126.TP
dc9b6c92 127.B EINVAL
eb9078a7
MK
128The current root directory is not a mount point
129(because of an earlier
130.BR chroot (2)).
131.TP
132.B EINVAL
ba4b07c3 133The current root is on the rootfs (initial ramfs) mount; see NOTES.
dc9b6c92 134.TP
1a0b1fd7
MK
135.B EINVAL
136Either the mount point at
137.IR new_root ,
138or the parent mount of that mount point,
139has propagation type
140.BR MS_SHARED .
141.TP
a39e880f
MK
142.B EINVAL
143.I put_old
144is a mount point and has the propagation type
145.BR MS_SHARED .
146.TP
fea681da
MK
147.B ENOTDIR
148\fInew_root\fP or \fIput_old\fP is not a directory.
149.TP
150.B EPERM
edd1fa35 151The calling process does not have the
fea681da
MK
152.B CAP_SYS_ADMIN
153capability.
a1d5f77c
MK
154.SH VERSIONS
155.BR pivot_root ()
156was introduced in Linux 2.3.41.
3113c7f3 157.SH STANDARDS
a1d5f77c 158.BR pivot_root ()
8382f16d 159is Linux-specific and hence is not portable.
f5b03186 160.SH NOTES
14caaed2
MK
161A command-line interface for this system call is provided by
162.BR pivot_root (8).
163.PP
422e36b7
MK
164.BR pivot_root ()
165allows the caller to switch to a new root filesystem while at the same time
166placing the old root mount at a location under
167.I new_root
168from where it can subsequently be unmounted.
169(The fact that it moves all processes that have a root directory
b27d444f
MK
170or current working directory on the old root directory to the
171new root frees the old root directory of users,
33313a26 172allowing the old root mount to be unmounted more easily.)
c4bf3333 173.PP
87529800 174One use of
422e36b7
MK
175.BR pivot_root ()
176is during system startup, when the
87529800
MK
177system mounts a temporary root filesystem (e.g., an
178.BR initrd (4)),
179then mounts the real root filesystem, and eventually turns the latter into
180the root directory of all relevant processes and threads.
422e36b7
MK
181A modern use is to set up a root filesystem during
182the creation of a container.
183.PP
fc2f474d
MK
184The fact that
185.BR pivot_root ()
186modifies process root and current working directories in the
187manner noted in DESCRIPTION
188is necessary in order to prevent kernel threads from keeping the old
87529800 189root mount busy with their root and current working directories,
fc2f474d
MK
190even if they never access
191the filesystem in any way.
fc2f474d 192.PP
97076c5a
MK
193The rootfs (initial ramfs) cannot be
194.BR pivot_root ()ed.
195The recommended method of changing the root filesystem in this case is
196to delete everything in rootfs, overmount rootfs with the new root, attach
197.IR stdin / stdout / stderr
198to the new
199.IR /dev/console ,
200and exec the new
201.BR init (1).
202Helper programs for this process exist; see
203.BR switch_root (8).
3db820fe
MK
204.\"
205.SS pivot_root(\(dq.\(dq, \(dq.\(dq)
57bab66a
MK
206.I new_root
207and
208.I put_old
209may be the same directory.
210In particular, the following sequence allows a pivot-root operation
211without needing to create and remove a temporary directory:
212.PP
213.in +4n
214.EX
215chdir(new_root);
216pivot_root(".", ".");
217umount2(".", MNT_DETACH);
218.EE
219.in
220.PP
221This sequence succeeds because the
222.BR pivot_root ()
223call stacks the old root mount point
57bab66a
MK
224on top of the new root mount point at
225.IR / .
226At that point, the calling process's root directory and current
227working directory refer to the new root mount point
228.RI ( new_root ).
229During the subsequent
230.BR umount ()
231call, resolution of
1ae6b2c7 232.I """."""
57bab66a
MK
233starts with
234.I new_root
235and then moves up the list of mounts stacked at
236.IR / ,
8f2a9129 237with the result that old root mount point is unmounted.
01c64c3b
MK
238.\"
239.SS Historical notes
240For many years, this manual page carried the following text:
241.RS
242.PP
243.BR pivot_root ()
244may or may not change the current root and the current
245working directory of any processes or threads which use the old
246root directory.
247The caller of
248.BR pivot_root ()
249must ensure that processes with root or current working directory
250at the old root operate correctly in either case.
251An easy way to ensure this is to change their
252root and current working directory to \fInew_root\fP before invoking
253.BR pivot_root ().
254.RE
255.PP
256This text, written before the system call implementation was
257even finalized in the kernel, was probably intended to warn users
258at that time that the implementation might change before final release.
259However, the behavior stated in DESCRIPTION
260has remained consistent since this system call
261was first implemented and will not change now.
a14af333 262.SH EXAMPLES
47b69a37
MK
263.\" FIXME
264.\" Would it be better, because simpler, to use unshare(2)
265.\" rather than clone(2) in the example below?
2f2e1a22
MK
266The program below demonstrates the use of
267.BR pivot_root ()
268inside a mount namespace that is created using
269.BR clone (2).
270After pivoting to the root directory named in the program's
271first command-line argument, the child created by
272.BR clone (2)
273then executes the program named in the remaining command-line arguments.
274.PP
275We demonstrate the program by creating a directory that will serve as
276the new root filesystem and placing a copy of the (statically linked)
277.BR busybox (1)
278executable in that directory.
279.PP
280.in +4n
281.EX
282$ \fBmkdir /tmp/rootfs\fP
283$ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
284319459 /tmp/rootfs
285$ \fBcp $(which busybox) /tmp/rootfs\fP
861d36ba 286$ \fBPS1=\(aqbbsh$ \(aq sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
2f2e1a22
MK
287bbsh$ \fBPATH=/\fP
288bbsh$ \fBbusybox ln busybox ln\fP
289bbsh$ \fBln busybox echo\fP
290bbsh$ \fBln busybox ls\fP
291bbsh$ \fBls\fP
292busybox echo ln ls
293bbsh$ \fBls \-id /\fP # Compare with inode number above
294319459 /
295bbsh$ \fBecho \(aqhello world\(aq\fP
296hello world
297.EE
298.in
299.SS Program source
300\&
301.PP
33857069 302.\" SRC BEGIN (pivot_root.c)
2f2e1a22
MK
303.EX
304/* pivot_root_demo.c */
305
306#define _GNU_SOURCE
5a5208c1 307#include <err.h>
80ae7514 308#include <limits.h>
2f2e1a22 309#include <sched.h>
80ae7514 310#include <signal.h>
2f2e1a22
MK
311#include <stdio.h>
312#include <stdlib.h>
80ae7514 313#include <sys/mman.h>
2f2e1a22
MK
314#include <sys/mount.h>
315#include <sys/stat.h>
80ae7514
AC
316#include <sys/syscall.h>
317#include <sys/wait.h>
318#include <unistd.h>
2f2e1a22 319
2f2e1a22
MK
320static int
321pivot_root(const char *new_root, const char *put_old)
322{
323 return syscall(SYS_pivot_root, new_root, put_old);
324}
325
326#define STACK_SIZE (1024 * 1024)
327
328static int /* Startup function for cloned child */
329child(void *arg)
330{
0b94bd78
AC
331 char path[PATH_MAX];
332 char **args = arg;
333 char *new_root = args[0];
334 const char *put_old = "/oldrootfs";
2f2e1a22
MK
335
336 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
337 shared propagation (which would cause pivot_root() to
338 return an error), and prevent propagation of mount
c6beb8a1 339 events to the initial mount namespace. */
2f2e1a22 340
32a72b3e 341 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == \-1)
5a5208c1 342 err(EXIT_FAILURE, "mount\-MS_PRIVATE");
2f2e1a22 343
c6beb8a1 344 /* Ensure that \(aqnew_root\(aq is a mount point. */
2f2e1a22
MK
345
346 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
5a5208c1 347 err(EXIT_FAILURE, "mount\-MS_BIND");
2f2e1a22 348
c6beb8a1 349 /* Create directory to which old root will be pivoted. */
2f2e1a22
MK
350
351 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
352 if (mkdir(path, 0777) == \-1)
5a5208c1 353 err(EXIT_FAILURE, "mkdir");
2f2e1a22 354
c6beb8a1 355 /* And pivot the root filesystem. */
2f2e1a22
MK
356
357 if (pivot_root(new_root, path) == \-1)
5a5208c1 358 err(EXIT_FAILURE, "pivot_root");
2f2e1a22 359
c6beb8a1 360 /* Switch the current working directory to "/". */
2f2e1a22
MK
361
362 if (chdir("/") == \-1)
5a5208c1 363 err(EXIT_FAILURE, "chdir");
2f2e1a22 364
c6beb8a1 365 /* Unmount old root and remove mount point. */
2f2e1a22
MK
366
367 if (umount2(put_old, MNT_DETACH) == \-1)
368 perror("umount2");
369 if (rmdir(put_old) == \-1)
370 perror("rmdir");
371
372 /* Execute the command specified in argv[1]... */
373
374 execv(args[1], &args[1]);
5a5208c1 375 err(EXIT_FAILURE, "execv");
2f2e1a22
MK
376}
377
378int
379main(int argc, char *argv[])
380{
0b94bd78
AC
381 char *stack;
382
c6beb8a1 383 /* Create a child process in a new mount namespace. */
2f2e1a22 384
0b94bd78
AC
385 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
386 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
1b547316 387 if (stack == MAP_FAILED)
5a5208c1 388 err(EXIT_FAILURE, "mmap");
2f2e1a22
MK
389
390 if (clone(child, stack + STACK_SIZE,
4687ab0e 391 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
5a5208c1 392 err(EXIT_FAILURE, "clone");
2f2e1a22 393
c6beb8a1 394 /* Parent falls through to here; wait for child. */
2f2e1a22
MK
395
396 if (wait(NULL) == \-1)
5a5208c1 397 err(EXIT_FAILURE, "wait");
2f2e1a22
MK
398
399 exit(EXIT_SUCCESS);
400}
401.EE
33857069 402.\" SRC END
47297adb 403.SH SEE ALSO
fea681da
MK
404.BR chdir (2),
405.BR chroot (2),
34a0f19c 406.BR mount (2),
fea681da
MK
407.BR stat (2),
408.BR initrd (4),
f42778c4 409.BR mount_namespaces (7),
b2bced6d
MK
410.BR pivot_root (8),
411.BR switch_root (8)