]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
pivot_root.2: srcfix: FIXME
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2000 by Werner Almesberger
2 .\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under GPL
6 .\" %%%LICENSE_END
7 .\"
8 .\" Written 2000-02-23 by Werner Almesberger
9 .\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
10 .\"
11 .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
12 .SH NAME
13 pivot_root \- change the root filesystem
14 .SH SYNOPSIS
15 .BI "int pivot_root(const char *" new_root ", const char *" put_old );
16 .PP
17 .IR Note :
18 There is no glibc wrapper for this system call; see NOTES.
19 .SH DESCRIPTION
20 .BR pivot_root ()
21 changes the root filesystem in the mount namespace of the calling process.
22 More precisely, it moves the root filesystem to the
23 directory \fIput_old\fP and makes \fInew_root\fP the new root filesystem.
24 The calling process must have the
25 .B CAP_SYS_ADMIN
26 capability in the user namespace that owns the caller's mount namespace.
27 .PP
28 .BR pivot_root ()
29 may or may not change the current root and the current
30 working directory of any processes or threads that
31 use the old root directory and which are in
32 the same mount namespace as the caller of
33 .BR pivot_root ().
34 The caller of
35 .BR pivot_root ()
36 should ensure that processes with root or current working directory
37 at the old root operate correctly in either case.
38 An easy way to ensure this is to change their
39 root and current working directory to \fInew_root\fP before invoking
40 .BR pivot_root ().
41 Note also that
42 .BR pivot_root ()
43 may or may not affect the calling process's current working directory.
44 It is therefore recommended to call
45 \fBchdir("/")\fP immediately after
46 .BR pivot_root ().
47 .PP
48 The paragraph above is intentionally vague because at the time when
49 .BR pivot_root ()
50 was first implemented, it was unclear whether its affect
51 on other process's root and current working directories\(emand
52 the caller's current working directory\(emmight change in the future.
53 However, the behavior has remained consistent since this system call
54 was first implemented:
55 .BR pivot_root ()
56 changes the root directory and the current working directory
57 of each process or thread in the same mount namespace to
58 .I new_root
59 if they point to the old root directory.
60 (See also NOTES.)
61 On the other hand,
62 .BR pivot_root ()
63 does not change the caller's current working directory
64 (unless it is on the old root directory),
65 and thus it should be followed by a
66 \fBchdir("/")\fP call.
67 .PP
68 The following restrictions apply:
69 .IP \- 3
70 .IR new_root
71 and
72 .IR put_old
73 must be directories.
74 .IP \-
75 \fInew_root\fP and \fIput_old\fP must not be on the same filesystem as
76 the current root.
77 In particular,
78 .IR new_root
79 can't be
80 .IR """/"""
81 (but can be a bind mounted directory on the current root filesystem).
82 .IP \-
83 \fIput_old\fP must be at or underneath \fInew_root\fP;
84 that is, adding a nonnegative
85 number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield
86 the same directory as \fInew_root\fP.
87 .IP \-
88 .I new_root
89 must be a mount point.
90 (If it is not otherwise a mount point, it suffices to bind mount
91 .I new_root
92 on top of itself.)
93 .IP \-
94 The propagation type of the parent mount of
95 .IR new_root
96 and the parent mount of the current root directory must not be
97 .BR MS_SHARED ;
98 similarly, if
99 .I put_old
100 is an existing mount point, its propagation type must not be
101 .BR MS_SHARED .
102 These restrictions ensure that
103 .BR pivot_root ()
104 never propagates any changes to another mount namespace.
105 .IP \-
106 The current root directory must be a mount point.
107 .SH RETURN VALUE
108 On success, zero is returned.
109 On error, \-1 is returned, and
110 \fIerrno\fP is set appropriately.
111 .SH ERRORS
112 .BR pivot_root ()
113 may fail with any of the same errors as
114 .BR stat (2).
115 Additionally, it may fail with the following errors:
116 .TP
117 .B EBUSY
118 .\" Reconfirmed that the following error occurs on Linux 5.0 by
119 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
120 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
121 .\" itself. Of course, this is an odd situation, since a later check
122 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
123 .\" not a mount point. However, when the system call was first added,
124 .\" 'new_root' was not required to be a mount point. So, this
125 .\" error is nowadays probably just the result of crufty accumulation.
126 .\" This error can also occur if we bind mount "/" on top of itself
127 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
128 .\" the EBUSY check in the kernel does still seem necessary to prevent
129 .\" that case. Furthermore, the "or put_old" piece is probably
130 .\" redundant text (although the check is in the kernel), since,
131 .\" in another check, 'put_old' is required to be under 'new_root'.
132 .I new_root
133 or
134 .I put_old
135 is on the current root filesystem.
136 (This error covers the pathological case where
137 .I new_root
138 is
139 .IR """/""" .)
140 .TP
141 .B EINVAL
142 .I new_root
143 is not a mount point.
144 .TP
145 .B EINVAL
146 \fIput_old\fP is not underneath \fInew_root\fP.
147 .TP
148 .B EINVAL
149 The current root directory is not a mount point
150 (because of an earlier
151 .BR chroot (2)).
152 .TP
153 .B EINVAL
154 The current root is on the rootfs (initial ramfs) filesystem; see NOTES.
155 .TP
156 .B EINVAL
157 Either the mount point at
158 .IR new_root ,
159 or the parent mount of that mount point,
160 has propagation type
161 .BR MS_SHARED .
162 .TP
163 .B EINVAL
164 .I put_old
165 is a mount point and has the propagation type
166 .BR MS_SHARED .
167 .TP
168 .B ENOTDIR
169 \fInew_root\fP or \fIput_old\fP is not a directory.
170 .TP
171 .B EPERM
172 The calling process does not have the
173 .B CAP_SYS_ADMIN
174 capability.
175 .SH VERSIONS
176 .BR pivot_root ()
177 was introduced in Linux 2.3.41.
178 .SH CONFORMING TO
179 .BR pivot_root ()
180 is Linux-specific and hence is not portable.
181 .SH NOTES
182 Glibc does not provide a wrapper for this system call; call it using
183 .BR syscall (2).
184 .PP
185 A command-line interface for this system call is provided by
186 .BR pivot_root (8).
187 .PP
188 .BR pivot_root ()
189 allows the caller to switch to a new root filesystem while at the same time
190 placing the old root mount at a location under
191 .I new_root
192 from where it can subsequently be unmounted.
193 (The fact that it moves all processes that have a root directory
194 or current working directory on the old root filesystem to the
195 new root filesystem frees the old root filesystem of users,
196 allowing it to be unmounted more easily.)
197 .PP
198 A typical use of
199 .BR pivot_root ()
200 is during system startup, when the
201 system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then
202 mounts the real root filesystem, and eventually turns the latter into
203 the current root of all relevant processes or threads.
204 A modern use is to set up a root filesystem during
205 the creation of a container.
206 .PP
207 The fact that
208 .BR pivot_root ()
209 modifies process root and current working directories in the
210 manner noted in DESCRIPTION
211 is necessary in order to prevent kernel threads from keeping the old
212 root directory busy with their root and current working directory,
213 even if they never access
214 the filesystem in any way.
215 .PP
216 .I new_root
217 and
218 .I put_old
219 may be the same directory.
220 In particular, the following sequence allows a pivot-root operation
221 without needing to create and remove a temporary directory:
222 .PP
223 .in +4n
224 .EX
225 chdir(new_root);
226 pivot_root(".", ".");
227 umount2(".", MNT_DETACH);
228 .EE
229 .in
230 .PP
231 This sequence succeeds because the
232 .BR pivot_root ()
233 call stacks the old root mount point
234 on top of the new root mount point at
235 .IR / .
236 At that point, the calling process's root directory and current
237 working directory refer to the new root mount point
238 .RI ( new_root ).
239 During the subsequent
240 .BR umount ()
241 call, resolution of
242 .IR """."""
243 starts with
244 .I new_root
245 and then moves up the list of mounts stacked at
246 .IR / ,
247 with the result that old root mount point is unmounted.
248 .PP
249 The rootfs (initial ramfs) cannot be
250 .BR pivot_root ()ed.
251 The recommended method of changing the root filesystem in this case is
252 to delete everything in rootfs, overmount rootfs with the new root, attach
253 .IR stdin / stdout / stderr
254 to the new
255 .IR /dev/console ,
256 and exec the new
257 .BR init (1).
258 Helper programs for this process exist; see
259 .BR switch_root (8).
260 .SH EXAMPLE
261 .\" FIXME
262 .\" Would it be better, because simpler, to use unshare(2)
263 .\" rather than clone(2) in the example below?
264 .PP
265 The program below demonstrates the use of
266 .BR pivot_root ()
267 inside a mount namespace that is created using
268 .BR clone (2).
269 After pivoting to the root directory named in the program's
270 first command-line argument, the child created by
271 .BR clone (2)
272 then executes the program named in the remaining command-line arguments.
273 .PP
274 We demonstrate the program by creating a directory that will serve as
275 the new root filesystem and placing a copy of the (statically linked)
276 .BR busybox (1)
277 executable in that directory.
278 .PP
279 .in +4n
280 .EX
281 $ \fBmkdir /tmp/rootfs\fP
282 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
283 319459 /tmp/rootfs
284 $ \fBcp $(which busybox) /tmp/rootfs\fP
285 $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
286 bbsh$ \fBPATH=/\fP
287 bbsh$ \fBbusybox ln busybox ln\fP
288 bbsh$ \fBln busybox echo\fP
289 bbsh$ \fBln busybox ls\fP
290 bbsh$ \fBls\fP
291 busybox echo ln ls
292 bbsh$ \fBls \-id /\fP # Compare with inode number above
293 319459 /
294 bbsh$ \fBecho \(aqhello world\(aq\fP
295 hello world
296 .EE
297 .in
298 .SS Program source
299 \&
300 .PP
301 .EX
302 /* pivot_root_demo.c */
303
304 #define _GNU_SOURCE
305 #include <sched.h>
306 #include <stdio.h>
307 #include <stdlib.h>
308 #include <unistd.h>
309 #include <sys/wait.h>
310 #include <sys/syscall.h>
311 #include <sys/mount.h>
312 #include <sys/stat.h>
313 #include <limits.h>
314
315 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
316 } while (0)
317
318 static int
319 pivot_root(const char *new_root, const char *put_old)
320 {
321 return syscall(SYS_pivot_root, new_root, put_old);
322 }
323
324 #define STACK_SIZE (1024 * 1024)
325
326 static int /* Startup function for cloned child */
327 child(void *arg)
328 {
329 char **args = arg;
330 char *new_root = args[0];
331 const char *put_old = "/oldrootfs";
332 char path[PATH_MAX];
333
334 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
335 shared propagation (which would cause pivot_root() to
336 return an error), and prevent propagation of mount
337 events to the initial mount namespace */
338
339 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
340 errExit("mount\-MS_PRIVATE");
341
342 /* Ensure that \(aqnew_root\(aq is a mount point */
343
344 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
345 errExit("mount\-MS_BIND");
346
347 /* Create directory to which old root will be pivoted */
348
349 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
350 if (mkdir(path, 0777) == \-1)
351 errExit("mkdir");
352
353 /* And pivot the root filesystem */
354
355 if (pivot_root(new_root, path) == \-1)
356 errExit("pivot_root");
357
358 /* Switch the current working directory to "/" */
359
360 if (chdir("/") == \-1)
361 errExit("chdir");
362
363 /* Unmount old root and remove mount point */
364
365 if (umount2(put_old, MNT_DETACH) == \-1)
366 perror("umount2");
367 if (rmdir(put_old) == \-1)
368 perror("rmdir");
369
370 /* Execute the command specified in argv[1]... */
371
372 execv(args[1], &args[1]);
373 errExit("execv");
374 }
375
376 int
377 main(int argc, char *argv[])
378 {
379 /* Create a child process in a new mount namespace */
380
381 char *stack = malloc(STACK_SIZE);
382 if (stack == NULL)
383 errExit("malloc");
384
385 if (clone(child, stack + STACK_SIZE,
386 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
387 errExit("clone");
388
389 /* Parent falls through to here; wait for child */
390
391 if (wait(NULL) == \-1)
392 errExit("wait");
393
394 exit(EXIT_SUCCESS);
395 }
396 .EE
397 .SH SEE ALSO
398 .BR chdir (2),
399 .BR chroot (2),
400 .BR mount (2),
401 .BR stat (2),
402 .BR initrd (4),
403 .BR mount_namespaces (7),
404 .BR pivot_root (8),
405 .BR switch_root (8)