]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
pivot_root.2: Simplify discussion of restrictions for 'new_root'
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2000 by Werner Almesberger
2 .\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under GPL
6 .\" %%%LICENSE_END
7 .\"
8 .\" Written 2000-02-23 by Werner Almesberger
9 .\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
10 .\"
11 .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
12 .SH NAME
13 pivot_root \- change the root filesystem
14 .SH SYNOPSIS
15 .BI "int pivot_root(const char *" new_root ", const char *" put_old );
16 .PP
17 .IR Note :
18 There is no glibc wrapper for this system call; see NOTES.
19 .SH DESCRIPTION
20 .BR pivot_root ()
21 changes the root filesystem in the mount namespace of the calling process.
22 More precisely, it moves the root filesystem to the
23 directory \fIput_old\fP and makes \fInew_root\fP the new root filesystem.
24 The calling process must have the
25 .B CAP_SYS_ADMIN
26 capability in the user namespace that owns the caller's mount namespace.
27 .PP
28 .BR pivot_root ()
29 may or may not change the current root and the current
30 working directory of any processes or threads that
31 use the old root directory and which are in
32 the same mount namespace as the caller of
33 .BR pivot_root ().
34 The caller of
35 .BR pivot_root ()
36 should ensure that processes with root or current working directory
37 at the old root operate correctly in either case.
38 An easy way to ensure this is to change their
39 root and current working directory to \fInew_root\fP before invoking
40 .BR pivot_root ().
41 Note also that
42 .BR pivot_root ()
43 may or may not affect the calling process's current working directory.
44 It is therefore recommended to call
45 \fBchdir("/")\fP immediately after
46 .BR pivot_root ().
47 .PP
48 The paragraph above is intentionally vague because at the time when
49 .BR pivot_root ()
50 was first implemented, it was unclear whether its affect
51 on other process's root and current working directories\(emand
52 the caller's current working directory\(emmight change in the future.
53 However, the behavior has remained consistent since this system call
54 was first implemented:
55 .BR pivot_root ()
56 changes the root directory and the current working directory
57 of each process or thread in the same mount namespace to
58 .I new_root
59 if they point to the old root directory.
60 (See also NOTES.)
61 On the other hand,
62 .BR pivot_root ()
63 does not change the caller's current working directory
64 (unless it is on the old root directory),
65 and thus it should be followed by a
66 \fBchdir("/")\fP call.
67 .PP
68 The following restrictions apply:
69 .IP \- 3
70 .IR new_root
71 and
72 .IR put_old
73 must be directories.
74 .IP \-
75 \fIput_old\fP must not be on the same filesystem as
76 the current root.
77 .IP \-
78 \fIput_old\fP must be at or underneath \fInew_root\fP;
79 that is, adding a nonnegative
80 number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield
81 the same directory as \fInew_root\fP.
82 .IP \-
83 .I new_root
84 must be a mount point, but can't be
85 .IR """/""" .
86 If it is not otherwise a mount point, it suffices to bind mount
87 .I new_root
88 on top of itself.
89 .RI ( new_root
90 can be a bind mounted directory on the current root filesystem.)
91 .IP \-
92 The propagation type of the parent mount of
93 .IR new_root
94 and the parent mount of the current root directory must not be
95 .BR MS_SHARED ;
96 similarly, if
97 .I put_old
98 is an existing mount point, its propagation type must not be
99 .BR MS_SHARED .
100 These restrictions ensure that
101 .BR pivot_root ()
102 never propagates any changes to another mount namespace.
103 .IP \-
104 The current root directory must be a mount point.
105 .SH RETURN VALUE
106 On success, zero is returned.
107 On error, \-1 is returned, and
108 \fIerrno\fP is set appropriately.
109 .SH ERRORS
110 .BR pivot_root ()
111 may fail with any of the same errors as
112 .BR stat (2).
113 Additionally, it may fail with the following errors:
114 .TP
115 .B EBUSY
116 .\" Reconfirmed that the following error occurs on Linux 5.0 by
117 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
118 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
119 .\" itself. Of course, this is an odd situation, since a later check
120 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
121 .\" not a mount point. However, when the system call was first added,
122 .\" 'new_root' was not required to be a mount point. So, this
123 .\" error is nowadays probably just the result of crufty accumulation.
124 .\" This error can also occur if we bind mount "/" on top of itself
125 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
126 .\" the EBUSY check in the kernel does still seem necessary to prevent
127 .\" that case. Furthermore, the "or put_old" piece is probably
128 .\" redundant text (although the check is in the kernel), since,
129 .\" in another check, 'put_old' is required to be under 'new_root'.
130 .I new_root
131 or
132 .I put_old
133 is on the current root filesystem.
134 (This error covers the pathological case where
135 .I new_root
136 is
137 .IR """/""" .)
138 .TP
139 .B EINVAL
140 .I new_root
141 is not a mount point.
142 .TP
143 .B EINVAL
144 \fIput_old\fP is not underneath \fInew_root\fP.
145 .TP
146 .B EINVAL
147 The current root directory is not a mount point
148 (because of an earlier
149 .BR chroot (2)).
150 .TP
151 .B EINVAL
152 The current root is on the rootfs (initial ramfs) filesystem; see NOTES.
153 .TP
154 .B EINVAL
155 Either the mount point at
156 .IR new_root ,
157 or the parent mount of that mount point,
158 has propagation type
159 .BR MS_SHARED .
160 .TP
161 .B EINVAL
162 .I put_old
163 is a mount point and has the propagation type
164 .BR MS_SHARED .
165 .TP
166 .B ENOTDIR
167 \fInew_root\fP or \fIput_old\fP is not a directory.
168 .TP
169 .B EPERM
170 The calling process does not have the
171 .B CAP_SYS_ADMIN
172 capability.
173 .SH VERSIONS
174 .BR pivot_root ()
175 was introduced in Linux 2.3.41.
176 .SH CONFORMING TO
177 .BR pivot_root ()
178 is Linux-specific and hence is not portable.
179 .SH NOTES
180 Glibc does not provide a wrapper for this system call; call it using
181 .BR syscall (2).
182 .PP
183 A command-line interface for this system call is provided by
184 .BR pivot_root (8).
185 .PP
186 .BR pivot_root ()
187 allows the caller to switch to a new root filesystem while at the same time
188 placing the old root mount at a location under
189 .I new_root
190 from where it can subsequently be unmounted.
191 (The fact that it moves all processes that have a root directory
192 or current working directory on the old root directory to the
193 new root frees the old root directory of users,
194 allowing the old root filesystem to be unmounted more easily.)
195 .PP
196 A typical use of
197 .BR pivot_root ()
198 is during system startup, when the
199 system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then
200 mounts the real root filesystem, and eventually turns the latter into
201 the current root of all relevant processes or threads.
202 A modern use is to set up a root filesystem during
203 the creation of a container.
204 .PP
205 The fact that
206 .BR pivot_root ()
207 modifies process root and current working directories in the
208 manner noted in DESCRIPTION
209 is necessary in order to prevent kernel threads from keeping the old
210 root directory busy with their root and current working directory,
211 even if they never access
212 the filesystem in any way.
213 .PP
214 .I new_root
215 and
216 .I put_old
217 may be the same directory.
218 In particular, the following sequence allows a pivot-root operation
219 without needing to create and remove a temporary directory:
220 .PP
221 .in +4n
222 .EX
223 chdir(new_root);
224 pivot_root(".", ".");
225 umount2(".", MNT_DETACH);
226 .EE
227 .in
228 .PP
229 This sequence succeeds because the
230 .BR pivot_root ()
231 call stacks the old root mount point
232 on top of the new root mount point at
233 .IR / .
234 At that point, the calling process's root directory and current
235 working directory refer to the new root mount point
236 .RI ( new_root ).
237 During the subsequent
238 .BR umount ()
239 call, resolution of
240 .IR """."""
241 starts with
242 .I new_root
243 and then moves up the list of mounts stacked at
244 .IR / ,
245 with the result that old root mount point is unmounted.
246 .PP
247 The rootfs (initial ramfs) cannot be
248 .BR pivot_root ()ed.
249 The recommended method of changing the root filesystem in this case is
250 to delete everything in rootfs, overmount rootfs with the new root, attach
251 .IR stdin / stdout / stderr
252 to the new
253 .IR /dev/console ,
254 and exec the new
255 .BR init (1).
256 Helper programs for this process exist; see
257 .BR switch_root (8).
258 .SH EXAMPLE
259 .\" FIXME
260 .\" Would it be better, because simpler, to use unshare(2)
261 .\" rather than clone(2) in the example below?
262 .PP
263 The program below demonstrates the use of
264 .BR pivot_root ()
265 inside a mount namespace that is created using
266 .BR clone (2).
267 After pivoting to the root directory named in the program's
268 first command-line argument, the child created by
269 .BR clone (2)
270 then executes the program named in the remaining command-line arguments.
271 .PP
272 We demonstrate the program by creating a directory that will serve as
273 the new root filesystem and placing a copy of the (statically linked)
274 .BR busybox (1)
275 executable in that directory.
276 .PP
277 .in +4n
278 .EX
279 $ \fBmkdir /tmp/rootfs\fP
280 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
281 319459 /tmp/rootfs
282 $ \fBcp $(which busybox) /tmp/rootfs\fP
283 $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
284 bbsh$ \fBPATH=/\fP
285 bbsh$ \fBbusybox ln busybox ln\fP
286 bbsh$ \fBln busybox echo\fP
287 bbsh$ \fBln busybox ls\fP
288 bbsh$ \fBls\fP
289 busybox echo ln ls
290 bbsh$ \fBls \-id /\fP # Compare with inode number above
291 319459 /
292 bbsh$ \fBecho \(aqhello world\(aq\fP
293 hello world
294 .EE
295 .in
296 .SS Program source
297 \&
298 .PP
299 .EX
300 /* pivot_root_demo.c */
301
302 #define _GNU_SOURCE
303 #include <sched.h>
304 #include <stdio.h>
305 #include <stdlib.h>
306 #include <unistd.h>
307 #include <sys/wait.h>
308 #include <sys/syscall.h>
309 #include <sys/mount.h>
310 #include <sys/stat.h>
311 #include <limits.h>
312
313 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
314 } while (0)
315
316 static int
317 pivot_root(const char *new_root, const char *put_old)
318 {
319 return syscall(SYS_pivot_root, new_root, put_old);
320 }
321
322 #define STACK_SIZE (1024 * 1024)
323
324 static int /* Startup function for cloned child */
325 child(void *arg)
326 {
327 char **args = arg;
328 char *new_root = args[0];
329 const char *put_old = "/oldrootfs";
330 char path[PATH_MAX];
331
332 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
333 shared propagation (which would cause pivot_root() to
334 return an error), and prevent propagation of mount
335 events to the initial mount namespace */
336
337 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
338 errExit("mount\-MS_PRIVATE");
339
340 /* Ensure that \(aqnew_root\(aq is a mount point */
341
342 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
343 errExit("mount\-MS_BIND");
344
345 /* Create directory to which old root will be pivoted */
346
347 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
348 if (mkdir(path, 0777) == \-1)
349 errExit("mkdir");
350
351 /* And pivot the root filesystem */
352
353 if (pivot_root(new_root, path) == \-1)
354 errExit("pivot_root");
355
356 /* Switch the current working directory to "/" */
357
358 if (chdir("/") == \-1)
359 errExit("chdir");
360
361 /* Unmount old root and remove mount point */
362
363 if (umount2(put_old, MNT_DETACH) == \-1)
364 perror("umount2");
365 if (rmdir(put_old) == \-1)
366 perror("rmdir");
367
368 /* Execute the command specified in argv[1]... */
369
370 execv(args[1], &args[1]);
371 errExit("execv");
372 }
373
374 int
375 main(int argc, char *argv[])
376 {
377 /* Create a child process in a new mount namespace */
378
379 char *stack = malloc(STACK_SIZE);
380 if (stack == NULL)
381 errExit("malloc");
382
383 if (clone(child, stack + STACK_SIZE,
384 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
385 errExit("clone");
386
387 /* Parent falls through to here; wait for child */
388
389 if (wait(NULL) == \-1)
390 errExit("wait");
391
392 exit(EXIT_SUCCESS);
393 }
394 .EE
395 .SH SEE ALSO
396 .BR chdir (2),
397 .BR chroot (2),
398 .BR mount (2),
399 .BR stat (2),
400 .BR initrd (4),
401 .BR mount_namespaces (7),
402 .BR pivot_root (8),
403 .BR switch_root (8)