]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
pivot_root.2: Add a subsection header for the pivot_root(".", ".") discussion
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2000 by Werner Almesberger
2 .\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under GPL
6 .\" %%%LICENSE_END
7 .\"
8 .\" Written 2000-02-23 by Werner Almesberger
9 .\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
10 .\"
11 .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
12 .SH NAME
13 pivot_root \- change the root mount
14 .SH SYNOPSIS
15 .BI "int pivot_root(const char *" new_root ", const char *" put_old );
16 .PP
17 .IR Note :
18 There is no glibc wrapper for this system call; see NOTES.
19 .SH DESCRIPTION
20 .BR pivot_root ()
21 changes the root mount in the mount namespace of the calling process.
22 More precisely, it moves the root mount to the
23 directory \fIput_old\fP and makes \fInew_root\fP the new root mount.
24 The calling process must have the
25 .B CAP_SYS_ADMIN
26 capability in the user namespace that owns the caller's mount namespace.
27 .PP
28 .BR pivot_root ()
29 may or may not change the current root and the current
30 working directory of any processes or threads that
31 use the old root directory and which are in
32 the same mount namespace as the caller of
33 .BR pivot_root ().
34 The caller of
35 .BR pivot_root ()
36 should ensure that processes with root or current working directory
37 at the old root operate correctly in either case.
38 An easy way to ensure this is to change their
39 root and current working directory to \fInew_root\fP before invoking
40 .BR pivot_root ().
41 Note also that
42 .BR pivot_root ()
43 may or may not affect the calling process's current working directory.
44 It is therefore recommended to call
45 \fBchdir("/")\fP immediately after
46 .BR pivot_root ().
47 .PP
48 The paragraph above is intentionally vague because at the time when
49 .BR pivot_root ()
50 was first implemented, it was unclear whether its affect
51 on other process's root and current working directories\(emand
52 the caller's current working directory\(emmight change in the future.
53 However, the behavior has remained consistent since this system call
54 was first implemented:
55 .BR pivot_root ()
56 changes the root directory and the current working directory
57 of each process or thread in the same mount namespace to
58 .I new_root
59 if they point to the old root directory.
60 (See also NOTES.)
61 On the other hand,
62 .BR pivot_root ()
63 does not change the caller's current working directory
64 (unless it is on the old root directory),
65 and thus it should be followed by a
66 \fBchdir("/")\fP call.
67 .PP
68 The following restrictions apply:
69 .IP \- 3
70 .IR new_root
71 and
72 .IR put_old
73 must be directories.
74 .IP \-
75 .I new_root
76 and
77 .I put_old
78 must not be on the same mount as the current root.
79 .IP \-
80 \fIput_old\fP must be at or underneath \fInew_root\fP;
81 that is, adding a nonnegative
82 number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield
83 the same directory as \fInew_root\fP.
84 .IP \-
85 .I new_root
86 must be a path to a mount point, but can't be
87 .IR """/""" .
88 A path that is not already a mount point can be converted into one by
89 bind mounting the path onto itself.
90 .IP \-
91 The propagation type of the parent mount of
92 .IR new_root
93 and the parent mount of the current root directory must not be
94 .BR MS_SHARED ;
95 similarly, if
96 .I put_old
97 is an existing mount point, its propagation type must not be
98 .BR MS_SHARED .
99 These restrictions ensure that
100 .BR pivot_root ()
101 never propagates any changes to another mount namespace.
102 .IP \-
103 The current root directory must be a mount point.
104 .SH RETURN VALUE
105 On success, zero is returned.
106 On error, \-1 is returned, and
107 \fIerrno\fP is set appropriately.
108 .SH ERRORS
109 .BR pivot_root ()
110 may fail with any of the same errors as
111 .BR stat (2).
112 Additionally, it may fail with the following errors:
113 .TP
114 .B EBUSY
115 .\" Reconfirmed that the following error occurs on Linux 5.0 by
116 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
117 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
118 .\" itself. Of course, this is an odd situation, since a later check
119 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
120 .\" not a mount point. However, when the system call was first added,
121 .\" 'new_root' was not required to be a mount point. So, this
122 .\" error is nowadays probably just the result of crufty accumulation.
123 .\" This error can also occur if we bind mount "/" on top of itself
124 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
125 .\" the EBUSY check in the kernel does still seem necessary to prevent
126 .\" that case. Furthermore, the "or put_old" piece is probably
127 .\" redundant text (although the check is in the kernel), since,
128 .\" in another check, 'put_old' is required to be under 'new_root'.
129 .I new_root
130 or
131 .I put_old
132 is on the current root filesystem.
133 (This error covers the pathological case where
134 .I new_root
135 is
136 .IR """/""" .)
137 .TP
138 .B EINVAL
139 .I new_root
140 is not a mount point.
141 .TP
142 .B EINVAL
143 \fIput_old\fP is not underneath \fInew_root\fP.
144 .TP
145 .B EINVAL
146 The current root directory is not a mount point
147 (because of an earlier
148 .BR chroot (2)).
149 .TP
150 .B EINVAL
151 The current root is on the rootfs (initial ramfs) filesystem; see NOTES.
152 .TP
153 .B EINVAL
154 Either the mount point at
155 .IR new_root ,
156 or the parent mount of that mount point,
157 has propagation type
158 .BR MS_SHARED .
159 .TP
160 .B EINVAL
161 .I put_old
162 is a mount point and has the propagation type
163 .BR MS_SHARED .
164 .TP
165 .B ENOTDIR
166 \fInew_root\fP or \fIput_old\fP is not a directory.
167 .TP
168 .B EPERM
169 The calling process does not have the
170 .B CAP_SYS_ADMIN
171 capability.
172 .SH VERSIONS
173 .BR pivot_root ()
174 was introduced in Linux 2.3.41.
175 .SH CONFORMING TO
176 .BR pivot_root ()
177 is Linux-specific and hence is not portable.
178 .SH NOTES
179 Glibc does not provide a wrapper for this system call; call it using
180 .BR syscall (2).
181 .PP
182 A command-line interface for this system call is provided by
183 .BR pivot_root (8).
184 .PP
185 .BR pivot_root ()
186 allows the caller to switch to a new root filesystem while at the same time
187 placing the old root mount at a location under
188 .I new_root
189 from where it can subsequently be unmounted.
190 (The fact that it moves all processes that have a root directory
191 or current working directory on the old root directory to the
192 new root frees the old root directory of users,
193 allowing the old root mount to be unmounted more easily.)
194 .PP
195 A typical use of
196 .BR pivot_root ()
197 is during system startup, when the
198 system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then
199 mounts the real root filesystem, and eventually turns the latter into
200 the current root of all relevant processes or threads.
201 A modern use is to set up a root filesystem during
202 the creation of a container.
203 .PP
204 The fact that
205 .BR pivot_root ()
206 modifies process root and current working directories in the
207 manner noted in DESCRIPTION
208 is necessary in order to prevent kernel threads from keeping the old
209 root directory busy with their root and current working directory,
210 even if they never access
211 the filesystem in any way.
212 .PP
213 The rootfs (initial ramfs) cannot be
214 .BR pivot_root ()ed.
215 The recommended method of changing the root filesystem in this case is
216 to delete everything in rootfs, overmount rootfs with the new root, attach
217 .IR stdin / stdout / stderr
218 to the new
219 .IR /dev/console ,
220 and exec the new
221 .BR init (1).
222 Helper programs for this process exist; see
223 .BR switch_root (8).
224 .\"
225 .SS pivot_root(\(dq.\(dq, \(dq.\(dq)
226 .PP
227 .I new_root
228 and
229 .I put_old
230 may be the same directory.
231 In particular, the following sequence allows a pivot-root operation
232 without needing to create and remove a temporary directory:
233 .PP
234 .in +4n
235 .EX
236 chdir(new_root);
237 pivot_root(".", ".");
238 umount2(".", MNT_DETACH);
239 .EE
240 .in
241 .PP
242 This sequence succeeds because the
243 .BR pivot_root ()
244 call stacks the old root mount point
245 on top of the new root mount point at
246 .IR / .
247 At that point, the calling process's root directory and current
248 working directory refer to the new root mount point
249 .RI ( new_root ).
250 During the subsequent
251 .BR umount ()
252 call, resolution of
253 .IR """."""
254 starts with
255 .I new_root
256 and then moves up the list of mounts stacked at
257 .IR / ,
258 with the result that old root mount point is unmounted.
259 .SH EXAMPLE
260 .\" FIXME
261 .\" Would it be better, because simpler, to use unshare(2)
262 .\" rather than clone(2) in the example below?
263 .PP
264 The program below demonstrates the use of
265 .BR pivot_root ()
266 inside a mount namespace that is created using
267 .BR clone (2).
268 After pivoting to the root directory named in the program's
269 first command-line argument, the child created by
270 .BR clone (2)
271 then executes the program named in the remaining command-line arguments.
272 .PP
273 We demonstrate the program by creating a directory that will serve as
274 the new root filesystem and placing a copy of the (statically linked)
275 .BR busybox (1)
276 executable in that directory.
277 .PP
278 .in +4n
279 .EX
280 $ \fBmkdir /tmp/rootfs\fP
281 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
282 319459 /tmp/rootfs
283 $ \fBcp $(which busybox) /tmp/rootfs\fP
284 $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
285 bbsh$ \fBPATH=/\fP
286 bbsh$ \fBbusybox ln busybox ln\fP
287 bbsh$ \fBln busybox echo\fP
288 bbsh$ \fBln busybox ls\fP
289 bbsh$ \fBls\fP
290 busybox echo ln ls
291 bbsh$ \fBls \-id /\fP # Compare with inode number above
292 319459 /
293 bbsh$ \fBecho \(aqhello world\(aq\fP
294 hello world
295 .EE
296 .in
297 .SS Program source
298 \&
299 .PP
300 .EX
301 /* pivot_root_demo.c */
302
303 #define _GNU_SOURCE
304 #include <sched.h>
305 #include <stdio.h>
306 #include <stdlib.h>
307 #include <unistd.h>
308 #include <sys/wait.h>
309 #include <sys/syscall.h>
310 #include <sys/mount.h>
311 #include <sys/stat.h>
312 #include <limits.h>
313
314 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
315 } while (0)
316
317 static int
318 pivot_root(const char *new_root, const char *put_old)
319 {
320 return syscall(SYS_pivot_root, new_root, put_old);
321 }
322
323 #define STACK_SIZE (1024 * 1024)
324
325 static int /* Startup function for cloned child */
326 child(void *arg)
327 {
328 char **args = arg;
329 char *new_root = args[0];
330 const char *put_old = "/oldrootfs";
331 char path[PATH_MAX];
332
333 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
334 shared propagation (which would cause pivot_root() to
335 return an error), and prevent propagation of mount
336 events to the initial mount namespace */
337
338 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
339 errExit("mount\-MS_PRIVATE");
340
341 /* Ensure that \(aqnew_root\(aq is a mount point */
342
343 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
344 errExit("mount\-MS_BIND");
345
346 /* Create directory to which old root will be pivoted */
347
348 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
349 if (mkdir(path, 0777) == \-1)
350 errExit("mkdir");
351
352 /* And pivot the root filesystem */
353
354 if (pivot_root(new_root, path) == \-1)
355 errExit("pivot_root");
356
357 /* Switch the current working directory to "/" */
358
359 if (chdir("/") == \-1)
360 errExit("chdir");
361
362 /* Unmount old root and remove mount point */
363
364 if (umount2(put_old, MNT_DETACH) == \-1)
365 perror("umount2");
366 if (rmdir(put_old) == \-1)
367 perror("rmdir");
368
369 /* Execute the command specified in argv[1]... */
370
371 execv(args[1], &args[1]);
372 errExit("execv");
373 }
374
375 int
376 main(int argc, char *argv[])
377 {
378 /* Create a child process in a new mount namespace */
379
380 char *stack = malloc(STACK_SIZE);
381 if (stack == NULL)
382 errExit("malloc");
383
384 if (clone(child, stack + STACK_SIZE,
385 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
386 errExit("clone");
387
388 /* Parent falls through to here; wait for child */
389
390 if (wait(NULL) == \-1)
391 errExit("wait");
392
393 exit(EXIT_SUCCESS);
394 }
395 .EE
396 .SH SEE ALSO
397 .BR chdir (2),
398 .BR chroot (2),
399 .BR mount (2),
400 .BR stat (2),
401 .BR initrd (4),
402 .BR mount_namespaces (7),
403 .BR pivot_root (8),
404 .BR switch_root (8)