]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
Many pages: Use correct letter case in page titles (TH)
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" A very few fragments remain from an earlier page written by
3 .\" Werner Almesberger in 2000
4 .\"
5 .\" SPDX-License-Identifier: Linux-man-pages-copyleft
6 .\"
7 .TH pivot_root 2 (date) "Linux man-pages (unreleased)"
8 .SH NAME
9 pivot_root \- change the root mount
10 .SH LIBRARY
11 Standard C library
12 .RI ( libc ", " \-lc )
13 .SH SYNOPSIS
14 .nf
15 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
16 .B #include <unistd.h>
17 .PP
18 .BI "int syscall(SYS_pivot_root, const char *" new_root \
19 ", const char *" put_old );
20 .fi
21 .PP
22 .IR Note :
23 glibc provides no wrapper for
24 .BR pivot_root (),
25 necessitating the use of
26 .BR syscall (2).
27 .SH DESCRIPTION
28 .BR pivot_root ()
29 changes the root mount in the mount namespace of the calling process.
30 More precisely, it moves the root mount to the
31 directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
32 The calling process must have the
33 .B CAP_SYS_ADMIN
34 capability in the user namespace that owns the caller's mount namespace.
35 .PP
36 .BR pivot_root ()
37 changes the root directory and the current working directory
38 of each process or thread in the same mount namespace to
39 .I new_root
40 if they point to the old root directory.
41 (See also NOTES.)
42 On the other hand,
43 .BR pivot_root ()
44 does not change the caller's current working directory
45 (unless it is on the old root directory),
46 and thus it should be followed by a
47 \fBchdir("/")\fP call.
48 .PP
49 The following restrictions apply:
50 .IP \(bu 3
51 .I new_root
52 and
53 .I put_old
54 must be directories.
55 .IP \(bu
56 .I new_root
57 and
58 .I put_old
59 must not be on the same mount as the current root.
60 .IP \(bu
61 \fIput_old\fP must be at or underneath \fInew_root\fP;
62 that is, adding some nonnegative
63 number of "\fI/..\fP" suffixes to the pathname pointed to by
64 .I put_old
65 must yield the same directory as \fInew_root\fP.
66 .IP \(bu
67 .I new_root
68 must be a path to a mount point, but can't be
69 .IR """/""" .
70 A path that is not already a mount point can be converted into one by
71 bind mounting the path onto itself.
72 .IP \(bu
73 The propagation type of the parent mount of
74 .I new_root
75 and the parent mount of the current root directory must not be
76 .BR MS_SHARED ;
77 similarly, if
78 .I put_old
79 is an existing mount point, its propagation type must not be
80 .BR MS_SHARED .
81 These restrictions ensure that
82 .BR pivot_root ()
83 never propagates any changes to another mount namespace.
84 .IP \(bu
85 The current root directory must be a mount point.
86 .SH RETURN VALUE
87 On success, zero is returned.
88 On error, \-1 is returned, and
89 \fIerrno\fP is set to indicate the error.
90 .SH ERRORS
91 .BR pivot_root ()
92 may fail with any of the same errors as
93 .BR stat (2).
94 Additionally, it may fail with the following errors:
95 .TP
96 .B EBUSY
97 .\" Reconfirmed that the following error occurs on Linux 5.0 by
98 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
99 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
100 .\" itself. Of course, this is an odd situation, since a later check
101 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
102 .\" not a mount point. However, when the system call was first added,
103 .\" 'new_root' was not required to be a mount point. So, this
104 .\" error is nowadays probably just the result of crufty accumulation.
105 .\" This error can also occur if we bind mount "/" on top of itself
106 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
107 .\" the EBUSY check in the kernel does still seem necessary to prevent
108 .\" that case. Furthermore, the "or put_old" piece is probably
109 .\" redundant text (although the check is in the kernel), since,
110 .\" in another check, 'put_old' is required to be under 'new_root'.
111 .I new_root
112 or
113 .I put_old
114 is on the current root mount.
115 (This error covers the pathological case where
116 .I new_root
117 is
118 .IR """/""" .)
119 .TP
120 .B EINVAL
121 .I new_root
122 is not a mount point.
123 .TP
124 .B EINVAL
125 \fIput_old\fP is not at or underneath \fInew_root\fP.
126 .TP
127 .B EINVAL
128 The current root directory is not a mount point
129 (because of an earlier
130 .BR chroot (2)).
131 .TP
132 .B EINVAL
133 The current root is on the rootfs (initial ramfs) mount; see NOTES.
134 .TP
135 .B EINVAL
136 Either the mount point at
137 .IR new_root ,
138 or the parent mount of that mount point,
139 has propagation type
140 .BR MS_SHARED .
141 .TP
142 .B EINVAL
143 .I put_old
144 is a mount point and has the propagation type
145 .BR MS_SHARED .
146 .TP
147 .B ENOTDIR
148 \fInew_root\fP or \fIput_old\fP is not a directory.
149 .TP
150 .B EPERM
151 The calling process does not have the
152 .B CAP_SYS_ADMIN
153 capability.
154 .SH VERSIONS
155 .BR pivot_root ()
156 was introduced in Linux 2.3.41.
157 .SH STANDARDS
158 .BR pivot_root ()
159 is Linux-specific and hence is not portable.
160 .SH NOTES
161 A command-line interface for this system call is provided by
162 .BR pivot_root (8).
163 .PP
164 .BR pivot_root ()
165 allows the caller to switch to a new root filesystem while at the same time
166 placing the old root mount at a location under
167 .I new_root
168 from where it can subsequently be unmounted.
169 (The fact that it moves all processes that have a root directory
170 or current working directory on the old root directory to the
171 new root frees the old root directory of users,
172 allowing the old root mount to be unmounted more easily.)
173 .PP
174 One use of
175 .BR pivot_root ()
176 is during system startup, when the
177 system mounts a temporary root filesystem (e.g., an
178 .BR initrd (4)),
179 then mounts the real root filesystem, and eventually turns the latter into
180 the root directory of all relevant processes and threads.
181 A modern use is to set up a root filesystem during
182 the creation of a container.
183 .PP
184 The fact that
185 .BR pivot_root ()
186 modifies process root and current working directories in the
187 manner noted in DESCRIPTION
188 is necessary in order to prevent kernel threads from keeping the old
189 root mount busy with their root and current working directories,
190 even if they never access
191 the filesystem in any way.
192 .PP
193 The rootfs (initial ramfs) cannot be
194 .BR pivot_root ()ed.
195 The recommended method of changing the root filesystem in this case is
196 to delete everything in rootfs, overmount rootfs with the new root, attach
197 .IR stdin / stdout / stderr
198 to the new
199 .IR /dev/console ,
200 and exec the new
201 .BR init (1).
202 Helper programs for this process exist; see
203 .BR switch_root (8).
204 .\"
205 .SS pivot_root(\(dq.\(dq, \(dq.\(dq)
206 .I new_root
207 and
208 .I put_old
209 may be the same directory.
210 In particular, the following sequence allows a pivot-root operation
211 without needing to create and remove a temporary directory:
212 .PP
213 .in +4n
214 .EX
215 chdir(new_root);
216 pivot_root(".", ".");
217 umount2(".", MNT_DETACH);
218 .EE
219 .in
220 .PP
221 This sequence succeeds because the
222 .BR pivot_root ()
223 call stacks the old root mount point
224 on top of the new root mount point at
225 .IR / .
226 At that point, the calling process's root directory and current
227 working directory refer to the new root mount point
228 .RI ( new_root ).
229 During the subsequent
230 .BR umount ()
231 call, resolution of
232 .I """."""
233 starts with
234 .I new_root
235 and then moves up the list of mounts stacked at
236 .IR / ,
237 with the result that old root mount point is unmounted.
238 .\"
239 .SS Historical notes
240 For many years, this manual page carried the following text:
241 .RS
242 .PP
243 .BR pivot_root ()
244 may or may not change the current root and the current
245 working directory of any processes or threads which use the old
246 root directory.
247 The caller of
248 .BR pivot_root ()
249 must ensure that processes with root or current working directory
250 at the old root operate correctly in either case.
251 An easy way to ensure this is to change their
252 root and current working directory to \fInew_root\fP before invoking
253 .BR pivot_root ().
254 .RE
255 .PP
256 This text, written before the system call implementation was
257 even finalized in the kernel, was probably intended to warn users
258 at that time that the implementation might change before final release.
259 However, the behavior stated in DESCRIPTION
260 has remained consistent since this system call
261 was first implemented and will not change now.
262 .SH EXAMPLES
263 .\" FIXME
264 .\" Would it be better, because simpler, to use unshare(2)
265 .\" rather than clone(2) in the example below?
266 The program below demonstrates the use of
267 .BR pivot_root ()
268 inside a mount namespace that is created using
269 .BR clone (2).
270 After pivoting to the root directory named in the program's
271 first command-line argument, the child created by
272 .BR clone (2)
273 then executes the program named in the remaining command-line arguments.
274 .PP
275 We demonstrate the program by creating a directory that will serve as
276 the new root filesystem and placing a copy of the (statically linked)
277 .BR busybox (1)
278 executable in that directory.
279 .PP
280 .in +4n
281 .EX
282 $ \fBmkdir /tmp/rootfs\fP
283 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
284 319459 /tmp/rootfs
285 $ \fBcp $(which busybox) /tmp/rootfs\fP
286 $ \fBPS1=\(aqbbsh$ \(aq sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
287 bbsh$ \fBPATH=/\fP
288 bbsh$ \fBbusybox ln busybox ln\fP
289 bbsh$ \fBln busybox echo\fP
290 bbsh$ \fBln busybox ls\fP
291 bbsh$ \fBls\fP
292 busybox echo ln ls
293 bbsh$ \fBls \-id /\fP # Compare with inode number above
294 319459 /
295 bbsh$ \fBecho \(aqhello world\(aq\fP
296 hello world
297 .EE
298 .in
299 .SS Program source
300 \&
301 .PP
302 .\" SRC BEGIN (pivot_root.c)
303 .EX
304 /* pivot_root_demo.c */
305
306 #define _GNU_SOURCE
307 #include <err.h>
308 #include <limits.h>
309 #include <sched.h>
310 #include <signal.h>
311 #include <stdio.h>
312 #include <stdlib.h>
313 #include <sys/mman.h>
314 #include <sys/mount.h>
315 #include <sys/stat.h>
316 #include <sys/syscall.h>
317 #include <sys/wait.h>
318 #include <unistd.h>
319
320 static int
321 pivot_root(const char *new_root, const char *put_old)
322 {
323 return syscall(SYS_pivot_root, new_root, put_old);
324 }
325
326 #define STACK_SIZE (1024 * 1024)
327
328 static int /* Startup function for cloned child */
329 child(void *arg)
330 {
331 char path[PATH_MAX];
332 char **args = arg;
333 char *new_root = args[0];
334 const char *put_old = "/oldrootfs";
335
336 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
337 shared propagation (which would cause pivot_root() to
338 return an error), and prevent propagation of mount
339 events to the initial mount namespace. */
340
341 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == \-1)
342 err(EXIT_FAILURE, "mount\-MS_PRIVATE");
343
344 /* Ensure that \(aqnew_root\(aq is a mount point. */
345
346 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
347 err(EXIT_FAILURE, "mount\-MS_BIND");
348
349 /* Create directory to which old root will be pivoted. */
350
351 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
352 if (mkdir(path, 0777) == \-1)
353 err(EXIT_FAILURE, "mkdir");
354
355 /* And pivot the root filesystem. */
356
357 if (pivot_root(new_root, path) == \-1)
358 err(EXIT_FAILURE, "pivot_root");
359
360 /* Switch the current working directory to "/". */
361
362 if (chdir("/") == \-1)
363 err(EXIT_FAILURE, "chdir");
364
365 /* Unmount old root and remove mount point. */
366
367 if (umount2(put_old, MNT_DETACH) == \-1)
368 perror("umount2");
369 if (rmdir(put_old) == \-1)
370 perror("rmdir");
371
372 /* Execute the command specified in argv[1]... */
373
374 execv(args[1], &args[1]);
375 err(EXIT_FAILURE, "execv");
376 }
377
378 int
379 main(int argc, char *argv[])
380 {
381 char *stack;
382
383 /* Create a child process in a new mount namespace. */
384
385 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
386 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
387 if (stack == MAP_FAILED)
388 err(EXIT_FAILURE, "mmap");
389
390 if (clone(child, stack + STACK_SIZE,
391 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
392 err(EXIT_FAILURE, "clone");
393
394 /* Parent falls through to here; wait for child. */
395
396 if (wait(NULL) == \-1)
397 err(EXIT_FAILURE, "wait");
398
399 exit(EXIT_SUCCESS);
400 }
401 .EE
402 .\" SRC END
403 .SH SEE ALSO
404 .BR chdir (2),
405 .BR chroot (2),
406 .BR mount (2),
407 .BR stat (2),
408 .BR initrd (4),
409 .BR mount_namespaces (7),
410 .BR pivot_root (8),
411 .BR switch_root (8)