]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/pivot_root.2
mknod.2: tfix
[thirdparty/man-pages.git] / man2 / pivot_root.2
CommitLineData
a2dd6388
MK
1.\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2.\" A very few fragments remain from an earlier page written by
3.\" Werner Almesberger in 2000
2297bf0e 4.\"
a2dd6388
MK
5.\" %%%LICENSE_START(VERBATIM)
6.\" Permission is granted to make and distribute verbatim copies of this
7.\" manual provided the copyright notice and this permission notice are
8.\" preserved on all copies.
9.\"
10.\" Permission is granted to copy and distribute modified versions of this
11.\" manual under the conditions for verbatim copying, provided that the
12.\" entire resulting derived work is distributed under the terms of a
13.\" permission notice identical to this one.
fea681da 14.\"
a2dd6388
MK
15.\" Since the Linux kernel and libraries are constantly changing, this
16.\" manual page may be incorrect or out-of-date. The author(s) assume no
17.\" responsibility for errors or omissions, or for damages resulting from
18.\" the use of the information contained herein. The author(s) may not
19.\" have taken the same level of care in the production of this manual,
20.\" which is licensed free of charge, as they might when working
21.\" professionally.
22.\"
23.\" Formatted or processed versions of this manual, if unaccompanied by
24.\" the source, must acknowledge the copyright and authors of this work.
25.\" %%%LICENSE_END
fea681da 26.\"
a5409de9 27.TH PIVOT_ROOT 2 2019-11-19 "Linux" "Linux Programmer's Manual"
fea681da 28.SH NAME
0843016c 29pivot_root \- change the root mount
fea681da 30.SH SYNOPSIS
fea681da 31.BI "int pivot_root(const char *" new_root ", const char *" put_old );
dbfe9c70 32.PP
45c99e3e
MK
33.IR Note :
34There is no glibc wrapper for this system call; see NOTES.
fea681da 35.SH DESCRIPTION
60a90ecd 36.BR pivot_root ()
0843016c
MK
37changes the root mount in the mount namespace of the calling process.
38More precisely, it moves the root mount to the
39directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
fdc558bd
MK
40The calling process must have the
41.B CAP_SYS_ADMIN
42capability in the user namespace that owns the caller's mount namespace.
efeece04 43.PP
60a90ecd 44.BR pivot_root ()
81b24320
MK
45changes the root directory and the current working directory
46of each process or thread in the same mount namespace to
47.I new_root
48if they point to the old root directory.
682e1329
MK
49(See also NOTES.)
50On the other hand,
51.BR pivot_root ()
52does not change the caller's current working directory
53(unless it is on the old root directory),
54and thus it should be followed by a
55\fBchdir("/")\fP call.
efeece04 56.PP
41d4557c 57The following restrictions apply:
fea681da 58.IP \- 3
41d4557c
MK
59.IR new_root
60and
61.IR put_old
62must be directories.
0ac6f900 63.IP \-
33313a26
MK
64.I new_root
65and
66.I put_old
67must not be on the same mount as the current root.
0ac6f900 68.IP \-
57bab66a 69\fIput_old\fP must be at or underneath \fInew_root\fP;
87529800
MK
70that is, adding some nonnegative
71number of "\fI/..\fP" prefixes to the pathname pointed to by
72.I put_old
73must yield the same directory as \fInew_root\fP.
0ac6f900 74.IP \-
37704bfc 75.I new_root
666373fc 76must be a path to a mount point, but can't be
9f3af6b8 77.IR """/""" .
666373fc
MK
78A path that is not already a mount point can be converted into one by
79bind mounting the path onto itself.
0ac6f900 80.IP \-
d4b2104a
MK
81The propagation type of the parent mount of
82.IR new_root
83and the parent mount of the current root directory must not be
a39e880f
MK
84.BR MS_SHARED ;
85similarly, if
86.I put_old
87is an existing mount point, its propagation type must not be
1a0b1fd7 88.BR MS_SHARED .
9d33e03b
MK
89These restrictions ensure that
90.BR pivot_root ()
91never propagates any changes to another mount namespace.
eb9078a7
MK
92.IP \-
93The current root directory must be a mount point.
47297adb 94.SH RETURN VALUE
c13182ef
MK
95On success, zero is returned.
96On error, \-1 is returned, and
fea681da
MK
97\fIerrno\fP is set appropriately.
98.SH ERRORS
60a90ecd 99.BR pivot_root ()
5f5751d3 100may fail with any of the same errors as
60a90ecd 101.BR stat (2).
5f5751d3 102Additionally, it may fail with the following errors:
fea681da
MK
103.TP
104.B EBUSY
b647c4c9
MK
105.\" Reconfirmed that the following error occurs on Linux 5.0 by
106.\" specifying 'new_root' as "/rootfs" and 'put_old' as
107.\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
108.\" itself. Of course, this is an odd situation, since a later check
109.\" in the kernel code will in any case yield EINVAL if 'new_root' is
110.\" not a mount point. However, when the system call was first added,
bf421740 111.\" 'new_root' was not required to be a mount point. So, this
b647c4c9
MK
112.\" error is nowadays probably just the result of crufty accumulation.
113.\" This error can also occur if we bind mount "/" on top of itself
114.\" and try to specify "/" as the 'new' (again, an odd situation). So,
115.\" the EBUSY check in the kernel does still seem necessary to prevent
116.\" that case. Furthermore, the "or put_old" piece is probably
117.\" redundant text (although the check is in the kernel), since,
118.\" in another check, 'put_old' is required to be under 'new_root'.
119.I new_root
120or
121.I put_old
ba4b07c3 122is on the current root mount.
b647c4c9
MK
123(This error covers the pathological case where
124.I new_root
125is
126.IR """/""" .)
fea681da
MK
127.TP
128.B EINVAL
37704bfc
MK
129.I new_root
130is not a mount point.
131.TP
132.B EINVAL
542175d8 133\fIput_old\fP is not at or underneath \fInew_root\fP.
fea681da 134.TP
dc9b6c92 135.B EINVAL
eb9078a7
MK
136The current root directory is not a mount point
137(because of an earlier
138.BR chroot (2)).
139.TP
140.B EINVAL
ba4b07c3 141The current root is on the rootfs (initial ramfs) mount; see NOTES.
dc9b6c92 142.TP
1a0b1fd7
MK
143.B EINVAL
144Either the mount point at
145.IR new_root ,
146or the parent mount of that mount point,
147has propagation type
148.BR MS_SHARED .
149.TP
a39e880f
MK
150.B EINVAL
151.I put_old
152is a mount point and has the propagation type
153.BR MS_SHARED .
154.TP
fea681da
MK
155.B ENOTDIR
156\fInew_root\fP or \fIput_old\fP is not a directory.
157.TP
158.B EPERM
edd1fa35 159The calling process does not have the
fea681da
MK
160.B CAP_SYS_ADMIN
161capability.
a1d5f77c
MK
162.SH VERSIONS
163.BR pivot_root ()
164was introduced in Linux 2.3.41.
47297adb 165.SH CONFORMING TO
a1d5f77c 166.BR pivot_root ()
8382f16d 167is Linux-specific and hence is not portable.
f5b03186
MK
168.SH NOTES
169Glibc does not provide a wrapper for this system call; call it using
170.BR syscall (2).
82320f42 171.PP
14caaed2
MK
172A command-line interface for this system call is provided by
173.BR pivot_root (8).
174.PP
422e36b7
MK
175.BR pivot_root ()
176allows the caller to switch to a new root filesystem while at the same time
177placing the old root mount at a location under
178.I new_root
179from where it can subsequently be unmounted.
180(The fact that it moves all processes that have a root directory
b27d444f
MK
181or current working directory on the old root directory to the
182new root frees the old root directory of users,
33313a26 183allowing the old root mount to be unmounted more easily.)
c4bf3333 184.PP
87529800 185One use of
422e36b7
MK
186.BR pivot_root ()
187is during system startup, when the
87529800
MK
188system mounts a temporary root filesystem (e.g., an
189.BR initrd (4)),
190then mounts the real root filesystem, and eventually turns the latter into
191the root directory of all relevant processes and threads.
422e36b7
MK
192A modern use is to set up a root filesystem during
193the creation of a container.
194.PP
fc2f474d
MK
195The fact that
196.BR pivot_root ()
197modifies process root and current working directories in the
198manner noted in DESCRIPTION
199is necessary in order to prevent kernel threads from keeping the old
87529800 200root mount busy with their root and current working directories,
fc2f474d
MK
201even if they never access
202the filesystem in any way.
fc2f474d 203.PP
97076c5a
MK
204The rootfs (initial ramfs) cannot be
205.BR pivot_root ()ed.
206The recommended method of changing the root filesystem in this case is
207to delete everything in rootfs, overmount rootfs with the new root, attach
208.IR stdin / stdout / stderr
209to the new
210.IR /dev/console ,
211and exec the new
212.BR init (1).
213Helper programs for this process exist; see
214.BR switch_root (8).
3db820fe
MK
215.\"
216.SS pivot_root(\(dq.\(dq, \(dq.\(dq)
97076c5a 217.PP
57bab66a
MK
218.I new_root
219and
220.I put_old
221may be the same directory.
222In particular, the following sequence allows a pivot-root operation
223without needing to create and remove a temporary directory:
224.PP
225.in +4n
226.EX
227chdir(new_root);
228pivot_root(".", ".");
229umount2(".", MNT_DETACH);
230.EE
231.in
232.PP
233This sequence succeeds because the
234.BR pivot_root ()
235call stacks the old root mount point
57bab66a
MK
236on top of the new root mount point at
237.IR / .
238At that point, the calling process's root directory and current
239working directory refer to the new root mount point
240.RI ( new_root ).
241During the subsequent
242.BR umount ()
243call, resolution of
244.IR """."""
245starts with
246.I new_root
247and then moves up the list of mounts stacked at
248.IR / ,
8f2a9129 249with the result that old root mount point is unmounted.
01c64c3b
MK
250.\"
251.SS Historical notes
252For many years, this manual page carried the following text:
253.RS
254.PP
255.BR pivot_root ()
256may or may not change the current root and the current
257working directory of any processes or threads which use the old
258root directory.
259The caller of
260.BR pivot_root ()
261must ensure that processes with root or current working directory
262at the old root operate correctly in either case.
263An easy way to ensure this is to change their
264root and current working directory to \fInew_root\fP before invoking
265.BR pivot_root ().
266.RE
267.PP
268This text, written before the system call implementation was
269even finalized in the kernel, was probably intended to warn users
270at that time that the implementation might change before final release.
271However, the behavior stated in DESCRIPTION
272has remained consistent since this system call
273was first implemented and will not change now.
2f2e1a22 274.SH EXAMPLE
47b69a37
MK
275.\" FIXME
276.\" Would it be better, because simpler, to use unshare(2)
277.\" rather than clone(2) in the example below?
2f2e1a22
MK
278.PP
279The program below demonstrates the use of
280.BR pivot_root ()
281inside a mount namespace that is created using
282.BR clone (2).
283After pivoting to the root directory named in the program's
284first command-line argument, the child created by
285.BR clone (2)
286then executes the program named in the remaining command-line arguments.
287.PP
288We demonstrate the program by creating a directory that will serve as
289the new root filesystem and placing a copy of the (statically linked)
290.BR busybox (1)
291executable in that directory.
292.PP
293.in +4n
294.EX
295$ \fBmkdir /tmp/rootfs\fP
296$ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
297319459 /tmp/rootfs
298$ \fBcp $(which busybox) /tmp/rootfs\fP
299$ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
300bbsh$ \fBPATH=/\fP
301bbsh$ \fBbusybox ln busybox ln\fP
302bbsh$ \fBln busybox echo\fP
303bbsh$ \fBln busybox ls\fP
304bbsh$ \fBls\fP
305busybox echo ln ls
306bbsh$ \fBls \-id /\fP # Compare with inode number above
307319459 /
308bbsh$ \fBecho \(aqhello world\(aq\fP
309hello world
310.EE
311.in
312.SS Program source
313\&
314.PP
315.EX
316/* pivot_root_demo.c */
317
318#define _GNU_SOURCE
319#include <sched.h>
320#include <stdio.h>
321#include <stdlib.h>
322#include <unistd.h>
323#include <sys/wait.h>
324#include <sys/syscall.h>
325#include <sys/mount.h>
326#include <sys/stat.h>
327#include <limits.h>
1b547316 328#include <sys/mman.h>
2f2e1a22
MK
329
330#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
331 } while (0)
332
333static int
334pivot_root(const char *new_root, const char *put_old)
335{
336 return syscall(SYS_pivot_root, new_root, put_old);
337}
338
339#define STACK_SIZE (1024 * 1024)
340
341static int /* Startup function for cloned child */
342child(void *arg)
343{
344 char **args = arg;
345 char *new_root = args[0];
346 const char *put_old = "/oldrootfs";
347 char path[PATH_MAX];
348
349 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
350 shared propagation (which would cause pivot_root() to
351 return an error), and prevent propagation of mount
352 events to the initial mount namespace */
353
354 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
355 errExit("mount\-MS_PRIVATE");
356
357 /* Ensure that \(aqnew_root\(aq is a mount point */
358
359 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
360 errExit("mount\-MS_BIND");
361
362 /* Create directory to which old root will be pivoted */
363
364 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
365 if (mkdir(path, 0777) == \-1)
366 errExit("mkdir");
367
368 /* And pivot the root filesystem */
369
370 if (pivot_root(new_root, path) == \-1)
371 errExit("pivot_root");
372
bf421740 373 /* Switch the current working directory to "/" */
2f2e1a22
MK
374
375 if (chdir("/") == \-1)
376 errExit("chdir");
377
378 /* Unmount old root and remove mount point */
379
380 if (umount2(put_old, MNT_DETACH) == \-1)
381 perror("umount2");
382 if (rmdir(put_old) == \-1)
383 perror("rmdir");
384
385 /* Execute the command specified in argv[1]... */
386
387 execv(args[1], &args[1]);
388 errExit("execv");
389}
390
391int
392main(int argc, char *argv[])
393{
394 /* Create a child process in a new mount namespace */
395
1b547316
MK
396 char *stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
397 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
398 if (stack == MAP_FAILED)
399 errExit("mmap");
2f2e1a22
MK
400
401 if (clone(child, stack + STACK_SIZE,
402 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
403 errExit("clone");
404
405 /* Parent falls through to here; wait for child */
406
407 if (wait(NULL) == \-1)
408 errExit("wait");
409
410 exit(EXIT_SUCCESS);
411}
412.EE
47297adb 413.SH SEE ALSO
fea681da
MK
414.BR chdir (2),
415.BR chroot (2),
34a0f19c 416.BR mount (2),
fea681da
MK
417.BR stat (2),
418.BR initrd (4),
f42778c4 419.BR mount_namespaces (7),
b2bced6d
MK
420.BR pivot_root (8),
421.BR switch_root (8)