]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/pivot_root.2
pivot_root.2: Reword one of the restrictions on 'new_root'
[thirdparty/man-pages.git] / man2 / pivot_root.2
CommitLineData
fea681da 1.\" Copyright (C) 2000 by Werner Almesberger
83cc245d 2.\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2297bf0e 3.\"
b55e2bb3 4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
fea681da 5.\" May be distributed under GPL
b55e2bb3 6.\" %%%LICENSE_END
fea681da
MK
7.\"
8.\" Written 2000-02-23 by Werner Almesberger
c11b1abf 9.\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
fea681da 10.\"
63121bd4 11.TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
fea681da 12.SH NAME
9ee4a2b6 13pivot_root \- change the root filesystem
fea681da 14.SH SYNOPSIS
fea681da 15.BI "int pivot_root(const char *" new_root ", const char *" put_old );
dbfe9c70 16.PP
45c99e3e
MK
17.IR Note :
18There is no glibc wrapper for this system call; see NOTES.
fea681da 19.SH DESCRIPTION
60a90ecd 20.BR pivot_root ()
7cc1a16d
MK
21changes the root filesystem in the mount namespace of the calling process.
22More precisely, it moves the root filesystem to the
23directory \fIput_old\fP and makes \fInew_root\fP the new root filesystem.
fdc558bd
MK
24The calling process must have the
25.B CAP_SYS_ADMIN
26capability in the user namespace that owns the caller's mount namespace.
efeece04 27.PP
60a90ecd
MK
28.BR pivot_root ()
29may or may not change the current root and the current
ac2eb791
MK
30working directory of any processes or threads that
31use the old root directory and which are in
32the same mount namespace as the caller of
33.BR pivot_root ().
60a90ecd
MK
34The caller of
35.BR pivot_root ()
682e1329 36should ensure that processes with root or current working directory
edd1fa35 37at the old root operate correctly in either case.
c13182ef 38An easy way to ensure this is to change their
edd1fa35 39root and current working directory to \fInew_root\fP before invoking
60a90ecd 40.BR pivot_root ().
4a8b7d7b
MK
41Note also that
42.BR pivot_root ()
43may or may not affect the calling process's current working directory.
44It is therefore recommended to call
45\fBchdir("/")\fP immediately after
46.BR pivot_root ().
efeece04 47.PP
682e1329 48The paragraph above is intentionally vague because at the time when
a94f69d6 49.BR pivot_root ()
682e1329
MK
50was first implemented, it was unclear whether its affect
51on other process's root and current working directories\(emand
52the caller's current working directory\(emmight change in the future.
53However, the behavior has remained consistent since this system call
a94f69d6 54was first implemented:
60a90ecd 55.BR pivot_root ()
81b24320
MK
56changes the root directory and the current working directory
57of each process or thread in the same mount namespace to
58.I new_root
59if they point to the old root directory.
682e1329
MK
60(See also NOTES.)
61On the other hand,
62.BR pivot_root ()
63does not change the caller's current working directory
64(unless it is on the old root directory),
65and thus it should be followed by a
66\fBchdir("/")\fP call.
efeece04 67.PP
41d4557c 68The following restrictions apply:
fea681da 69.IP \- 3
41d4557c
MK
70.IR new_root
71and
72.IR put_old
73must be directories.
0ac6f900 74.IP \-
33313a26
MK
75.I new_root
76and
77.I put_old
78must not be on the same mount as the current root.
0ac6f900 79.IP \-
57bab66a
MK
80\fIput_old\fP must be at or underneath \fInew_root\fP;
81that is, adding a nonnegative
8478ee02 82number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield
fea681da 83the same directory as \fInew_root\fP.
0ac6f900 84.IP \-
37704bfc 85.I new_root
666373fc 86must be a path to a mount point, but can't be
9f3af6b8 87.IR """/""" .
666373fc
MK
88A path that is not already a mount point can be converted into one by
89bind mounting the path onto itself.
0ac6f900 90.IP \-
d4b2104a
MK
91The propagation type of the parent mount of
92.IR new_root
93and the parent mount of the current root directory must not be
a39e880f
MK
94.BR MS_SHARED ;
95similarly, if
96.I put_old
97is an existing mount point, its propagation type must not be
1a0b1fd7 98.BR MS_SHARED .
9d33e03b
MK
99These restrictions ensure that
100.BR pivot_root ()
101never propagates any changes to another mount namespace.
eb9078a7
MK
102.IP \-
103The current root directory must be a mount point.
47297adb 104.SH RETURN VALUE
c13182ef
MK
105On success, zero is returned.
106On error, \-1 is returned, and
fea681da
MK
107\fIerrno\fP is set appropriately.
108.SH ERRORS
60a90ecd 109.BR pivot_root ()
5f5751d3 110may fail with any of the same errors as
60a90ecd 111.BR stat (2).
5f5751d3 112Additionally, it may fail with the following errors:
fea681da
MK
113.TP
114.B EBUSY
b647c4c9
MK
115.\" Reconfirmed that the following error occurs on Linux 5.0 by
116.\" specifying 'new_root' as "/rootfs" and 'put_old' as
117.\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
118.\" itself. Of course, this is an odd situation, since a later check
119.\" in the kernel code will in any case yield EINVAL if 'new_root' is
120.\" not a mount point. However, when the system call was first added,
bf421740 121.\" 'new_root' was not required to be a mount point. So, this
b647c4c9
MK
122.\" error is nowadays probably just the result of crufty accumulation.
123.\" This error can also occur if we bind mount "/" on top of itself
124.\" and try to specify "/" as the 'new' (again, an odd situation). So,
125.\" the EBUSY check in the kernel does still seem necessary to prevent
126.\" that case. Furthermore, the "or put_old" piece is probably
127.\" redundant text (although the check is in the kernel), since,
128.\" in another check, 'put_old' is required to be under 'new_root'.
129.I new_root
130or
131.I put_old
132is on the current root filesystem.
133(This error covers the pathological case where
134.I new_root
135is
136.IR """/""" .)
fea681da
MK
137.TP
138.B EINVAL
37704bfc
MK
139.I new_root
140is not a mount point.
141.TP
142.B EINVAL
fea681da
MK
143\fIput_old\fP is not underneath \fInew_root\fP.
144.TP
dc9b6c92 145.B EINVAL
eb9078a7
MK
146The current root directory is not a mount point
147(because of an earlier
148.BR chroot (2)).
149.TP
150.B EINVAL
0c2329cd 151The current root is on the rootfs (initial ramfs) filesystem; see NOTES.
dc9b6c92 152.TP
1a0b1fd7
MK
153.B EINVAL
154Either the mount point at
155.IR new_root ,
156or the parent mount of that mount point,
157has propagation type
158.BR MS_SHARED .
159.TP
a39e880f
MK
160.B EINVAL
161.I put_old
162is a mount point and has the propagation type
163.BR MS_SHARED .
164.TP
fea681da
MK
165.B ENOTDIR
166\fInew_root\fP or \fIput_old\fP is not a directory.
167.TP
168.B EPERM
edd1fa35 169The calling process does not have the
fea681da
MK
170.B CAP_SYS_ADMIN
171capability.
a1d5f77c
MK
172.SH VERSIONS
173.BR pivot_root ()
174was introduced in Linux 2.3.41.
47297adb 175.SH CONFORMING TO
a1d5f77c 176.BR pivot_root ()
8382f16d 177is Linux-specific and hence is not portable.
f5b03186
MK
178.SH NOTES
179Glibc does not provide a wrapper for this system call; call it using
180.BR syscall (2).
82320f42 181.PP
14caaed2
MK
182A command-line interface for this system call is provided by
183.BR pivot_root (8).
184.PP
422e36b7
MK
185.BR pivot_root ()
186allows the caller to switch to a new root filesystem while at the same time
187placing the old root mount at a location under
188.I new_root
189from where it can subsequently be unmounted.
190(The fact that it moves all processes that have a root directory
b27d444f
MK
191or current working directory on the old root directory to the
192new root frees the old root directory of users,
33313a26 193allowing the old root mount to be unmounted more easily.)
c4bf3333 194.PP
422e36b7
MK
195A typical use of
196.BR pivot_root ()
197is during system startup, when the
198system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then
199mounts the real root filesystem, and eventually turns the latter into
200the current root of all relevant processes or threads.
201A modern use is to set up a root filesystem during
202the creation of a container.
203.PP
fc2f474d
MK
204The fact that
205.BR pivot_root ()
206modifies process root and current working directories in the
207manner noted in DESCRIPTION
208is necessary in order to prevent kernel threads from keeping the old
209root directory busy with their root and current working directory,
210even if they never access
211the filesystem in any way.
fc2f474d 212.PP
57bab66a
MK
213.I new_root
214and
215.I put_old
216may be the same directory.
217In particular, the following sequence allows a pivot-root operation
218without needing to create and remove a temporary directory:
219.PP
220.in +4n
221.EX
222chdir(new_root);
223pivot_root(".", ".");
224umount2(".", MNT_DETACH);
225.EE
226.in
227.PP
228This sequence succeeds because the
229.BR pivot_root ()
230call stacks the old root mount point
57bab66a
MK
231on top of the new root mount point at
232.IR / .
233At that point, the calling process's root directory and current
234working directory refer to the new root mount point
235.RI ( new_root ).
236During the subsequent
237.BR umount ()
238call, resolution of
239.IR """."""
240starts with
241.I new_root
242and then moves up the list of mounts stacked at
243.IR / ,
8f2a9129 244with the result that old root mount point is unmounted.
57bab66a 245.PP
82320f42
EK
246The rootfs (initial ramfs) cannot be
247.BR pivot_root ()ed.
52fc743c
MK
248The recommended method of changing the root filesystem in this case is
249to delete everything in rootfs, overmount rootfs with the new root, attach
82320f42
EK
250.IR stdin / stdout / stderr
251to the new
252.IR /dev/console ,
52fc743c
MK
253and exec the new
254.BR init (1).
255Helper programs for this process exist; see
82320f42 256.BR switch_root (8).
2f2e1a22 257.SH EXAMPLE
47b69a37
MK
258.\" FIXME
259.\" Would it be better, because simpler, to use unshare(2)
260.\" rather than clone(2) in the example below?
2f2e1a22
MK
261.PP
262The program below demonstrates the use of
263.BR pivot_root ()
264inside a mount namespace that is created using
265.BR clone (2).
266After pivoting to the root directory named in the program's
267first command-line argument, the child created by
268.BR clone (2)
269then executes the program named in the remaining command-line arguments.
270.PP
271We demonstrate the program by creating a directory that will serve as
272the new root filesystem and placing a copy of the (statically linked)
273.BR busybox (1)
274executable in that directory.
275.PP
276.in +4n
277.EX
278$ \fBmkdir /tmp/rootfs\fP
279$ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
280319459 /tmp/rootfs
281$ \fBcp $(which busybox) /tmp/rootfs\fP
282$ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
283bbsh$ \fBPATH=/\fP
284bbsh$ \fBbusybox ln busybox ln\fP
285bbsh$ \fBln busybox echo\fP
286bbsh$ \fBln busybox ls\fP
287bbsh$ \fBls\fP
288busybox echo ln ls
289bbsh$ \fBls \-id /\fP # Compare with inode number above
290319459 /
291bbsh$ \fBecho \(aqhello world\(aq\fP
292hello world
293.EE
294.in
295.SS Program source
296\&
297.PP
298.EX
299/* pivot_root_demo.c */
300
301#define _GNU_SOURCE
302#include <sched.h>
303#include <stdio.h>
304#include <stdlib.h>
305#include <unistd.h>
306#include <sys/wait.h>
307#include <sys/syscall.h>
308#include <sys/mount.h>
309#include <sys/stat.h>
310#include <limits.h>
311
312#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
313 } while (0)
314
315static int
316pivot_root(const char *new_root, const char *put_old)
317{
318 return syscall(SYS_pivot_root, new_root, put_old);
319}
320
321#define STACK_SIZE (1024 * 1024)
322
323static int /* Startup function for cloned child */
324child(void *arg)
325{
326 char **args = arg;
327 char *new_root = args[0];
328 const char *put_old = "/oldrootfs";
329 char path[PATH_MAX];
330
331 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
332 shared propagation (which would cause pivot_root() to
333 return an error), and prevent propagation of mount
334 events to the initial mount namespace */
335
336 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
337 errExit("mount\-MS_PRIVATE");
338
339 /* Ensure that \(aqnew_root\(aq is a mount point */
340
341 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
342 errExit("mount\-MS_BIND");
343
344 /* Create directory to which old root will be pivoted */
345
346 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
347 if (mkdir(path, 0777) == \-1)
348 errExit("mkdir");
349
350 /* And pivot the root filesystem */
351
352 if (pivot_root(new_root, path) == \-1)
353 errExit("pivot_root");
354
bf421740 355 /* Switch the current working directory to "/" */
2f2e1a22
MK
356
357 if (chdir("/") == \-1)
358 errExit("chdir");
359
360 /* Unmount old root and remove mount point */
361
362 if (umount2(put_old, MNT_DETACH) == \-1)
363 perror("umount2");
364 if (rmdir(put_old) == \-1)
365 perror("rmdir");
366
367 /* Execute the command specified in argv[1]... */
368
369 execv(args[1], &args[1]);
370 errExit("execv");
371}
372
373int
374main(int argc, char *argv[])
375{
376 /* Create a child process in a new mount namespace */
377
378 char *stack = malloc(STACK_SIZE);
379 if (stack == NULL)
380 errExit("malloc");
381
382 if (clone(child, stack + STACK_SIZE,
383 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
384 errExit("clone");
385
386 /* Parent falls through to here; wait for child */
387
388 if (wait(NULL) == \-1)
389 errExit("wait");
390
391 exit(EXIT_SUCCESS);
392}
393.EE
47297adb 394.SH SEE ALSO
fea681da
MK
395.BR chdir (2),
396.BR chroot (2),
34a0f19c 397.BR mount (2),
fea681da
MK
398.BR stat (2),
399.BR initrd (4),
f42778c4 400.BR mount_namespaces (7),
b2bced6d
MK
401.BR pivot_root (8),
402.BR switch_root (8)