]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/pivot_root.2
pivot_root.2: Remove BUGS section
[thirdparty/man-pages.git] / man2 / pivot_root.2
1 .\" Copyright (C) 2000 by Werner Almesberger
2 .\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com>
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under GPL
6 .\" %%%LICENSE_END
7 .\"
8 .\" Written 2000-02-23 by Werner Almesberger
9 .\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com>
10 .\"
11 .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual"
12 .SH NAME
13 pivot_root \- change the root filesystem
14 .SH SYNOPSIS
15 .BI "int pivot_root(const char *" new_root ", const char *" put_old );
16 .PP
17 .IR Note :
18 There is no glibc wrapper for this system call; see NOTES.
19 .SH DESCRIPTION
20 .BR pivot_root ()
21 changes the root filesystem in the mount namespace of the calling process.
22 More precisely, it moves the root filesystem to the
23 directory \fIput_old\fP and makes \fInew_root\fP the new root filesystem.
24 The calling process must have the
25 .B CAP_SYS_ADMIN
26 capability in the user namespace that owns the caller's mount namespace.
27 .PP
28 .BR pivot_root ()
29 may or may not change the current root and the current
30 working directory of any processes or threads that
31 use the old root directory and which are in
32 the same mount namespace as the caller of
33 .BR pivot_root ().
34 The caller of
35 .BR pivot_root ()
36 must ensure that processes with root or current working directory
37 at the old root operate correctly in either case.
38 An easy way to ensure this is to change their
39 root and current working directory to \fInew_root\fP before invoking
40 .BR pivot_root ().
41 .PP
42 The paragraph above is intentionally vague because the implementation of
43 .BR pivot_root ()
44 may change in the future
45 (or so it was thought when this system call was first added).
46 However,
47 the behavior on this point has remained consistent since
48 .BR pivot_root ()
49 was first implemented:
50 .BR pivot_root ()
51 changes the root directory and the current working directory
52 of each process or thread in the same mount namespace to
53 .I new_root
54 if they point to the old root directory.
55 See also NOTES.
56 .PP
57 Note that this also applies to the calling process:
58 .BR pivot_root ()
59 may or may not affect its current working directory.
60 It is therefore recommended to call
61 \fBchdir("/")\fP immediately after
62 .BR pivot_root ().
63 .PP
64 The following restrictions apply:
65 .IP \- 3
66 .IR new_root
67 and
68 .IR put_old
69 must be directories.
70 .IP \-
71 \fInew_root\fP and \fIput_old\fP must not be on the same filesystem as
72 the current root.
73 In particular,
74 .IR new_root
75 can't be
76 .IR """/"""
77 (but can be a bind mounted directory on the current root filesystem).
78 .IP \-
79 \fIput_old\fP must be underneath \fInew_root\fP, that is, adding a nonzero
80 number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield
81 the same directory as \fInew_root\fP.
82 .IP \-
83 .I new_root
84 must be a mount point.
85 (If it is not otherwise a mount point, it suffices to bind mount
86 .I new_root
87 on top of itself.)
88 .IP \-
89 The propagation type of
90 .I new_root
91 and its parent mount must not be
92 .BR MS_SHARED ;
93 similarly, if
94 .I put_old
95 is an existing mount point, its propagation type must not be
96 .BR MS_SHARED .
97 .PP
98 See also
99 .BR pivot_root (8)
100 for additional usage examples.
101 .PP
102 If the current root is not a mount point (e.g., after an earlier
103 .BR chroot (2)
104 or
105 .BR pivot_root ()),
106 then the mount point of the filesystem containing the current root directory
107 (i.e., not the directory itself) is mounted on \fIput_old\fP.
108 .SH RETURN VALUE
109 On success, zero is returned.
110 On error, \-1 is returned, and
111 \fIerrno\fP is set appropriately.
112 .SH ERRORS
113 .BR pivot_root ()
114 may fail with any of the same errors as
115 .BR stat (2).
116 Additionally, it may fail with the following errors:
117 .TP
118 .B EBUSY
119 .\" Reconfirmed that the following error occurs on Linux 5.0 by
120 .\" specifying 'new_root' as "/rootfs" and 'put_old' as
121 .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of
122 .\" itself. Of course, this is an odd situation, since a later check
123 .\" in the kernel code will in any case yield EINVAL if 'new_root' is
124 .\" not a mount point. However, when the system call was first added,
125 .\" 'new_root' was not required to be a mount point. So, this this
126 .\" error is nowadays probably just the result of crufty accumulation.
127 .\" This error can also occur if we bind mount "/" on top of itself
128 .\" and try to specify "/" as the 'new' (again, an odd situation). So,
129 .\" the EBUSY check in the kernel does still seem necessary to prevent
130 .\" that case. Furthermore, the "or put_old" piece is probably
131 .\" redundant text (although the check is in the kernel), since,
132 .\" in another check, 'put_old' is required to be under 'new_root'.
133 .I new_root
134 or
135 .I put_old
136 is on the current root filesystem.
137 (This error covers the pathological case where
138 .I new_root
139 is
140 .IR """/""" .)
141 .TP
142 .B EINVAL
143 .I new_root
144 is not a mount point.
145 .TP
146 .B EINVAL
147 \fIput_old\fP is not underneath \fInew_root\fP.
148 .TP
149 .B EINVAL
150 The current root is on the rootfs (initial ramfs) filesystem; see NOTES.
151 .TP
152 .B EINVAL
153 Either the mount point at
154 .IR new_root ,
155 or the parent mount of that mount point,
156 has propagation type
157 .BR MS_SHARED .
158 .TP
159 .B EINVAL
160 .I put_old
161 is a mount point and has the propagation type
162 .BR MS_SHARED .
163 .TP
164 .B ENOTDIR
165 \fInew_root\fP or \fIput_old\fP is not a directory.
166 .TP
167 .B EPERM
168 The calling process does not have the
169 .B CAP_SYS_ADMIN
170 capability.
171 .SH VERSIONS
172 .BR pivot_root ()
173 was introduced in Linux 2.3.41.
174 .SH CONFORMING TO
175 .BR pivot_root ()
176 is Linux-specific and hence is not portable.
177 .SH NOTES
178 Glibc does not provide a wrapper for this system call; call it using
179 .BR syscall (2).
180 .PP
181 .BR pivot_root ()
182 allows the caller to switch to a new root filesystem while at the same time
183 placing the old root mount at a location under
184 .I new_root
185 from where it can subsequently be unmounted.
186 (The fact that it moves all processes that have a root directory
187 or current working directory on the old root filesystem to the
188 new root filesystem frees the old root filesystem of users,
189 allowing it to be unmounted more easily.)
190 A typical use of
191 .BR pivot_root ()
192 is during system startup, when the
193 system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then
194 mounts the real root filesystem, and eventually turns the latter into
195 the current root of all relevant processes or threads.
196 A modern use is to set up a root filesystem during
197 the creation of a container.
198 .PP
199 The fact that
200 .BR pivot_root ()
201 modifies process root and current working directories in the
202 manner noted in DESCRIPTION
203 is necessary in order to prevent kernel threads from keeping the old
204 root directory busy with their root and current working directory,
205 even if they never access
206 the filesystem in any way.
207 Perhaps one day there may be a mechanism for
208 kernel threads to explicitly relinquish any access to the filesystem,
209 such that this fairly intrusive mechanism can be removed from
210 .BR pivot_root ().
211 .PP
212 The rootfs (initial ramfs) cannot be
213 .BR pivot_root ()ed.
214 The recommended method of changing the root filesystem in this case is
215 to delete everything in rootfs, overmount rootfs with the new root, attach
216 .IR stdin / stdout / stderr
217 to the new
218 .IR /dev/console ,
219 and exec the new
220 .BR init (1).
221 Helper programs for this process exist; see
222 .BR switch_root (8).
223 .SH EXAMPLE
224 .PP
225 The program below demonstrates the use of
226 .BR pivot_root ()
227 inside a mount namespace that is created using
228 .BR clone (2).
229 After pivoting to the root directory named in the program's
230 first command-line argument, the child created by
231 .BR clone (2)
232 then executes the program named in the remaining command-line arguments.
233 .PP
234 We demonstrate the program by creating a directory that will serve as
235 the new root filesystem and placing a copy of the (statically linked)
236 .BR busybox (1)
237 executable in that directory.
238 .PP
239 .in +4n
240 .EX
241 $ \fBmkdir /tmp/rootfs\fP
242 $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory
243 319459 /tmp/rootfs
244 $ \fBcp $(which busybox) /tmp/rootfs\fP
245 $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP
246 bbsh$ \fBPATH=/\fP
247 bbsh$ \fBbusybox ln busybox ln\fP
248 bbsh$ \fBln busybox echo\fP
249 bbsh$ \fBln busybox ls\fP
250 bbsh$ \fBls\fP
251 busybox echo ln ls
252 bbsh$ \fBls \-id /\fP # Compare with inode number above
253 319459 /
254 bbsh$ \fBecho \(aqhello world\(aq\fP
255 hello world
256 .EE
257 .in
258 .SS Program source
259 \&
260 .PP
261 .EX
262 /* pivot_root_demo.c */
263
264 #define _GNU_SOURCE
265 #include <sched.h>
266 #include <stdio.h>
267 #include <stdlib.h>
268 #include <unistd.h>
269 #include <sys/wait.h>
270 #include <sys/syscall.h>
271 #include <sys/mount.h>
272 #include <sys/stat.h>
273 #include <limits.h>
274
275 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
276 } while (0)
277
278 static int
279 pivot_root(const char *new_root, const char *put_old)
280 {
281 return syscall(SYS_pivot_root, new_root, put_old);
282 }
283
284 #define STACK_SIZE (1024 * 1024)
285
286 static int /* Startup function for cloned child */
287 child(void *arg)
288 {
289 char **args = arg;
290 char *new_root = args[0];
291 const char *put_old = "/oldrootfs";
292 char path[PATH_MAX];
293
294 /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have
295 shared propagation (which would cause pivot_root() to
296 return an error), and prevent propagation of mount
297 events to the initial mount namespace */
298
299 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1)
300 errExit("mount\-MS_PRIVATE");
301
302 /* Ensure that \(aqnew_root\(aq is a mount point */
303
304 if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1)
305 errExit("mount\-MS_BIND");
306
307 /* Create directory to which old root will be pivoted */
308
309 snprintf(path, sizeof(path), "%s/%s", new_root, put_old);
310 if (mkdir(path, 0777) == \-1)
311 errExit("mkdir");
312
313 /* And pivot the root filesystem */
314
315 if (pivot_root(new_root, path) == \-1)
316 errExit("pivot_root");
317
318 /* Switch the current working working directory to "/" */
319
320 if (chdir("/") == \-1)
321 errExit("chdir");
322
323 /* Unmount old root and remove mount point */
324
325 if (umount2(put_old, MNT_DETACH) == \-1)
326 perror("umount2");
327 if (rmdir(put_old) == \-1)
328 perror("rmdir");
329
330 /* Execute the command specified in argv[1]... */
331
332 execv(args[1], &args[1]);
333 errExit("execv");
334 }
335
336 int
337 main(int argc, char *argv[])
338 {
339 /* Create a child process in a new mount namespace */
340
341 char *stack = malloc(STACK_SIZE);
342 if (stack == NULL)
343 errExit("malloc");
344
345 if (clone(child, stack + STACK_SIZE,
346 CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1)
347 errExit("clone");
348
349 /* Parent falls through to here; wait for child */
350
351 if (wait(NULL) == \-1)
352 errExit("wait");
353
354 exit(EXIT_SUCCESS);
355 }
356 .EE
357 .SH SEE ALSO
358 .BR chdir (2),
359 .BR chroot (2),
360 .BR mount (2),
361 .BR stat (2),
362 .BR initrd (4),
363 .BR mount_namespaces (7),
364 .BR pivot_root (8),
365 .BR switch_root (8)