]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
pivot_root.2: Update the copyright and license
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" A very few fragments remain from an earlier page written by
3 .\" Werner Almesberger in 2000
4 .\"
5 .\" %%%LICENSE_START(VERBATIM)
6 .\" Permission is granted to make and distribute verbatim copies of this
7 .\" manual provided the copyright notice and this permission notice are
8 .\" preserved on all copies.
9 .\"
10 .\" Permission is granted to copy and distribute modified versions of this
11 .\" manual under the conditions for verbatim copying, provided that the
12 .\" entire resulting derived work is distributed under the terms of a
13 .\" permission notice identical to this one.
14 .\"
15 .\" Since the Linux kernel and libraries are constantly changing, this
16 .\" manual page may be incorrect or out-of-date. The author(s) assume no
17 .\" responsibility for errors or omissions, or for damages resulting from
18 .\" the use of the information contained herein. The author(s) may not
19 .\" have taken the same level of care in the production of this manual,
20 .\" which is licensed free of charge, as they might when working
21 .\" professionally.
22 .\"
23 .\" Formatted or processed versions of this manual, if unaccompanied by
24 .\" the source, must acknowledge the copyright and authors of this work.
25 .\" %%%LICENSE_END
26 .\"
27 .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
28 .SH NAME
29 pivot_root \- change the root mount
30 .SH SYNOPSIS
31 .BI "int pivot_root(const char *" new_root ", const char *" put_old );
32 .PP
33 .IR Note :
34 There is no glibc wrapper for this system call; see NOTES.
35 .SH DESCRIPTION
36 .BR pivot_root ()
37 changes the root mount in the mount namespace of the calling process.
38 More precisely, it moves the root mount to the
39 directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
40 The calling process must have the
41 .B CAP_SYS_ADMIN
42 capability in the user namespace that owns the caller's mount namespace.
43 .PP
44 .BR pivot_root ()
45 changes the root directory and the current working directory
46 of each process or thread in the same mount namespace to
47 .I new_root
48 if they point to the old root directory.
49 (See also NOTES.)
50 On the other hand,
51 .BR pivot_root ()
52 does not change the caller's current working directory
53 (unless it is on the old root directory),
54 and thus it should be followed by a
55 \fBchdir("/")\fP call.
56 .PP
57 The following restrictions apply:
58 .IP \- 3
59 .IR new_root
60 and
61 .IR put_old
62 must be directories.
63 .IP \-
64 .I new_root
65 and
66 .I put_old
67 must not be on the same mount as the current root.
68 .IP \-
69 \fIput_old\fP must be at or underneath \fInew_root\fP;
70 that is, adding some nonnegative
71 number of "\fI/..\fP" prefixes to the pathname pointed to by
72 .I put_old
73 must yield the same directory as \fInew_root\fP.
74 .IP \-
75 .I new_root
76 must be a path to a mount point, but can't be
77 .IR """/""" .
78 A path that is not already a mount point can be converted into one by
79 bind mounting the path onto itself.
80 .IP \-
81 The propagation type of the parent mount of
82 .IR new_root
83 and the parent mount of the current root directory must not be
84 .BR MS_SHARED ;
85 similarly, if
86 .I put_old
87 is an existing mount point, its propagation type must not be
88 .BR MS_SHARED .
89 These restrictions ensure that
90 .BR pivot_root ()
91 never propagates any changes to another mount namespace.
92 .IP \-
93 The current root directory must be a mount point.
94 .SH RETURN VALUE
95 On success, zero is returned.
96 On error, \-1 is returned, and
97 \fIerrno\fP is set appropriately.
98 .SH ERRORS
99 .BR pivot_root ()
100 may fail with any of the same errors as
101 .BR stat (2).
102 Additionally, it may fail with the following errors:
103 .TP
104 .B EBUSY
105 .\" Reconfirmed that the following error occurs on Linux 5.0 by
106 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
107 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
108 .\" itself. Of course, this is an odd situation, since a later check
109 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
110 .\" not a mount point. However, when the system call was first added,
111 .\" 'new_root' was not required to be a mount point. So, this
112 .\" error is nowadays probably just the result of crufty accumulation.
113 .\" This error can also occur if we bind mount "/" on top of itself
114 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
115 .\" the EBUSY check in the kernel does still seem necessary to prevent
116 .\" that case. Furthermore, the "or put_old" piece is probably
117 .\" redundant text (although the check is in the kernel), since,
118 .\" in another check, 'put_old' is required to be under 'new_root'.
119 .I new_root
120 or
121 .I put_old
122 is on the current root mount.
123 (This error covers the pathological case where
124 .I new_root
125 is
126 .IR """/""" .)
127 .TP
128 .B EINVAL
129 .I new_root
130 is not a mount point.
131 .TP
132 .B EINVAL
133 \fIput_old\fP is not at or underneath \fInew_root\fP.
134 .TP
135 .B EINVAL
136 The current root directory is not a mount point
137 (because of an earlier
138 .BR chroot (2)).
139 .TP
140 .B EINVAL
141 The current root is on the rootfs (initial ramfs) mount; see NOTES.
142 .TP
143 .B EINVAL
144 Either the mount point at
145 .IR new_root ,
146 or the parent mount of that mount point,
147 has propagation type
148 .BR MS_SHARED .
149 .TP
150 .B EINVAL
151 .I put_old
152 is a mount point and has the propagation type
153 .BR MS_SHARED .
154 .TP
155 .B ENOTDIR
156 \fInew_root\fP or \fIput_old\fP is not a directory.
157 .TP
158 .B EPERM
159 The calling process does not have the
160 .B CAP_SYS_ADMIN
161 capability.
162 .SH VERSIONS
163 .BR pivot_root ()
164 was introduced in Linux 2.3.41.
165 .SH CONFORMING TO
166 .BR pivot_root ()
167 is Linux-specific and hence is not portable.
168 .SH NOTES
169 Glibc does not provide a wrapper for this system call; call it using
170 .BR syscall (2).
171 .PP
172 A command-line interface for this system call is provided by
173 .BR pivot_root (8).
174 .PP
175 .BR pivot_root ()
176 allows the caller to switch to a new root filesystem while at the same time
177 placing the old root mount at a location under
178 .I new_root
179 from where it can subsequently be unmounted.
180 (The fact that it moves all processes that have a root directory
181 or current working directory on the old root directory to the
182 new root frees the old root directory of users,
183 allowing the old root mount to be unmounted more easily.)
184 .PP
185 One use of
186 .BR pivot_root ()
187 is during system startup, when the
188 system mounts a temporary root filesystem (e.g., an
189 .BR initrd (4)),
190 then mounts the real root filesystem, and eventually turns the latter into
191 the root directory of all relevant processes and threads.
192 A modern use is to set up a root filesystem during
193 the creation of a container.
194 .PP
195 The fact that
196 .BR pivot_root ()
197 modifies process root and current working directories in the
198 manner noted in DESCRIPTION
199 is necessary in order to prevent kernel threads from keeping the old
200 root mount busy with their root and current working directories,
201 even if they never access
202 the filesystem in any way.
203 .PP
204 The rootfs (initial ramfs) cannot be
205 .BR pivot_root ()ed.
206 The recommended method of changing the root filesystem in this case is
207 to delete everything in rootfs, overmount rootfs with the new root, attach
208 .IR stdin / stdout / stderr
209 to the new
210 .IR /dev/console ,
211 and exec the new
212 .BR init (1).
213 Helper programs for this process exist; see
214 .BR switch_root (8).
215 .\"
216 .SS pivot_root(\(dq.\(dq, \(dq.\(dq)
217 .PP
218 .I new_root
219 and
220 .I put_old
221 may be the same directory.
222 In particular, the following sequence allows a pivot-root operation
223 without needing to create and remove a temporary directory:
224 .PP
225 .in +4n
226 .EX
227 chdir(new_root);
228 pivot_root(".", ".");
229 umount2(".", MNT_DETACH);
230 .EE
231 .in
232 .PP
233 This sequence succeeds because the
234 .BR pivot_root ()
235 call stacks the old root mount point
236 on top of the new root mount point at
237 .IR / .
238 At that point, the calling process's root directory and current
239 working directory refer to the new root mount point
240 .RI ( new_root ).
241 During the subsequent
242 .BR umount ()
243 call, resolution of
244 .IR """."""
245 starts with
246 .I new_root
247 and then moves up the list of mounts stacked at
248 .IR / ,
249 with the result that old root mount point is unmounted.
250 .\"
251 .SS Historical notes
252 For many years, this manual page carried the following text:
253 .RS
254 .PP
255 .BR pivot_root ()
256 may or may not change the current root and the current
257 working directory of any processes or threads which use the old
258 root directory.
259 The caller of
260 .BR pivot_root ()
261 must ensure that processes with root or current working directory
262 at the old root operate correctly in either case.
263 An easy way to ensure this is to change their
264 root and current working directory to \fInew_root\fP before invoking
265 .BR pivot_root ().
266 .RE
267 .PP
268 This text, written before the system call implementation was
269 even finalized in the kernel, was probably intended to warn users
270 at that time that the implementation might change before final release.
271 However, the behavior stated in DESCRIPTION
272 has remained consistent since this system call
273 was first implemented and will not change now.
274 .SH EXAMPLE
275 .\" FIXME
276 .\" Would it be better, because simpler, to use unshare(2)
277 .\" rather than clone(2) in the example below?
278 .PP
279 The program below demonstrates the use of
280 .BR pivot_root ()
281 inside a mount namespace that is created using
282 .BR clone (2).
283 After pivoting to the root directory named in the program's
284 first command-line argument, the child created by
285 .BR clone (2)
286 then executes the program named in the remaining command-line arguments.
287 .PP
288 We demonstrate the program by creating a directory that will serve as
289 the new root filesystem and placing a copy of the (statically linked)
290 .BR busybox (1)
291 executable in that directory.
292 .PP
293 .in +4n
294 .EX
295 $ \fBmkdir /tmp/rootfs\fP
296 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
297 319459 /tmp/rootfs
298 $ \fBcp $(which busybox) /tmp/rootfs\fP
299 $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
300 bbsh$ \fBPATH=/\fP
301 bbsh$ \fBbusybox ln busybox ln\fP
302 bbsh$ \fBln busybox echo\fP
303 bbsh$ \fBln busybox ls\fP
304 bbsh$ \fBls\fP
305 busybox echo ln ls
306 bbsh$ \fBls \-id /\fP # Compare with inode number above
307 319459 /
308 bbsh$ \fBecho \(aqhello world\(aq\fP
309 hello world
310 .EE
311 .in
312 .SS Program source
313 \&
314 .PP
315 .EX
316 /* pivot_root_demo.c */
317
318 #define _GNU_SOURCE
319 #include <sched.h>
320 #include <stdio.h>
321 #include <stdlib.h>
322 #include <unistd.h>
323 #include <sys/wait.h>
324 #include <sys/syscall.h>
325 #include <sys/mount.h>
326 #include <sys/stat.h>
327 #include <limits.h>
328
329 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
330 } while (0)
331
332 static int
333 pivot_root(const char *new_root, const char *put_old)
334 {
335 return syscall(SYS_pivot_root, new_root, put_old);
336 }
337
338 #define STACK_SIZE (1024 * 1024)
339
340 static int /* Startup function for cloned child */
341 child(void *arg)
342 {
343 char **args = arg;
344 char *new_root = args[0];
345 const char *put_old = "/oldrootfs";
346 char path[PATH_MAX];
347
348 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
349 shared propagation (which would cause pivot_root() to
350 return an error), and prevent propagation of mount
351 events to the initial mount namespace */
352
353 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
354 errExit("mount\-MS_PRIVATE");
355
356 /* Ensure that \(aqnew_root\(aq is a mount point */
357
358 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
359 errExit("mount\-MS_BIND");
360
361 /* Create directory to which old root will be pivoted */
362
363 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
364 if (mkdir(path, 0777) == \-1)
365 errExit("mkdir");
366
367 /* And pivot the root filesystem */
368
369 if (pivot_root(new_root, path) == \-1)
370 errExit("pivot_root");
371
372 /* Switch the current working directory to "/" */
373
374 if (chdir("/") == \-1)
375 errExit("chdir");
376
377 /* Unmount old root and remove mount point */
378
379 if (umount2(put_old, MNT_DETACH) == \-1)
380 perror("umount2");
381 if (rmdir(put_old) == \-1)
382 perror("rmdir");
383
384 /* Execute the command specified in argv[1]... */
385
386 execv(args[1], &args[1]);
387 errExit("execv");
388 }
389
390 int
391 main(int argc, char *argv[])
392 {
393 /* Create a child process in a new mount namespace */
394
395 char *stack = malloc(STACK_SIZE);
396 if (stack == NULL)
397 errExit("malloc");
398
399 if (clone(child, stack + STACK_SIZE,
400 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
401 errExit("clone");
402
403 /* Parent falls through to here; wait for child */
404
405 if (wait(NULL) == \-1)
406 errExit("wait");
407
408 exit(EXIT_SUCCESS);
409 }
410 .EE
411 .SH SEE ALSO
412 .BR chdir (2),
413 .BR chroot (2),
414 .BR mount (2),
415 .BR stat (2),
416 .BR initrd (4),
417 .BR mount_namespaces (7),
418 .BR pivot_root (8),
419 .BR switch_root (8)