]>
Commit | Line | Data |
---|---|---|
fea681da | 1 | .\" Copyright (C) 2000 by Werner Almesberger |
83cc245d | 2 | .\" and Copyright (C) 2019 Michael Kerrisk <mtk.manpages@gmail.com> |
2297bf0e | 3 | .\" |
b55e2bb3 | 4 | .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE) |
fea681da | 5 | .\" May be distributed under GPL |
b55e2bb3 | 6 | .\" %%%LICENSE_END |
fea681da MK |
7 | .\" |
8 | .\" Written 2000-02-23 by Werner Almesberger | |
c11b1abf | 9 | .\" Modified 2004-06-17 Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da | 10 | .\" |
63121bd4 | 11 | .TH PIVOT_ROOT 2 2019-08-02 "Linux" "Linux Programmer's Manual" |
fea681da | 12 | .SH NAME |
9ee4a2b6 | 13 | pivot_root \- change the root filesystem |
fea681da | 14 | .SH SYNOPSIS |
fea681da | 15 | .BI "int pivot_root(const char *" new_root ", const char *" put_old ); |
dbfe9c70 | 16 | .PP |
45c99e3e MK |
17 | .IR Note : |
18 | There is no glibc wrapper for this system call; see NOTES. | |
fea681da | 19 | .SH DESCRIPTION |
60a90ecd | 20 | .BR pivot_root () |
7cc1a16d MK |
21 | changes the root filesystem in the mount namespace of the calling process. |
22 | More precisely, it moves the root filesystem to the | |
23 | directory \fIput_old\fP and makes \fInew_root\fP the new root filesystem. | |
fdc558bd MK |
24 | The calling process must have the |
25 | .B CAP_SYS_ADMIN | |
26 | capability in the user namespace that owns the caller's mount namespace. | |
efeece04 | 27 | .PP |
60a90ecd MK |
28 | .BR pivot_root () |
29 | may or may not change the current root and the current | |
ac2eb791 MK |
30 | working directory of any processes or threads that |
31 | use the old root directory and which are in | |
32 | the same mount namespace as the caller of | |
33 | .BR pivot_root (). | |
60a90ecd MK |
34 | The caller of |
35 | .BR pivot_root () | |
682e1329 | 36 | should ensure that processes with root or current working directory |
edd1fa35 | 37 | at the old root operate correctly in either case. |
c13182ef | 38 | An easy way to ensure this is to change their |
edd1fa35 | 39 | root and current working directory to \fInew_root\fP before invoking |
60a90ecd | 40 | .BR pivot_root (). |
4a8b7d7b MK |
41 | Note also that |
42 | .BR pivot_root () | |
43 | may or may not affect the calling process's current working directory. | |
44 | It is therefore recommended to call | |
45 | \fBchdir("/")\fP immediately after | |
46 | .BR pivot_root (). | |
efeece04 | 47 | .PP |
682e1329 | 48 | The paragraph above is intentionally vague because at the time when |
a94f69d6 | 49 | .BR pivot_root () |
682e1329 MK |
50 | was first implemented, it was unclear whether its affect |
51 | on other process's root and current working directories\(emand | |
52 | the caller's current working directory\(emmight change in the future. | |
53 | However, the behavior has remained consistent since this system call | |
a94f69d6 | 54 | was first implemented: |
60a90ecd | 55 | .BR pivot_root () |
81b24320 MK |
56 | changes the root directory and the current working directory |
57 | of each process or thread in the same mount namespace to | |
58 | .I new_root | |
59 | if they point to the old root directory. | |
682e1329 MK |
60 | (See also NOTES.) |
61 | On the other hand, | |
62 | .BR pivot_root () | |
63 | does not change the caller's current working directory | |
64 | (unless it is on the old root directory), | |
65 | and thus it should be followed by a | |
66 | \fBchdir("/")\fP call. | |
efeece04 | 67 | .PP |
41d4557c | 68 | The following restrictions apply: |
fea681da | 69 | .IP \- 3 |
41d4557c MK |
70 | .IR new_root |
71 | and | |
72 | .IR put_old | |
73 | must be directories. | |
0ac6f900 | 74 | .IP \- |
33313a26 MK |
75 | .I new_root |
76 | and | |
77 | .I put_old | |
78 | must not be on the same mount as the current root. | |
0ac6f900 | 79 | .IP \- |
57bab66a MK |
80 | \fIput_old\fP must be at or underneath \fInew_root\fP; |
81 | that is, adding a nonnegative | |
8478ee02 | 82 | number of \fI/..\fP to the string pointed to by \fIput_old\fP must yield |
fea681da | 83 | the same directory as \fInew_root\fP. |
0ac6f900 | 84 | .IP \- |
37704bfc | 85 | .I new_root |
666373fc | 86 | must be a path to a mount point, but can't be |
9f3af6b8 | 87 | .IR """/""" . |
666373fc MK |
88 | A path that is not already a mount point can be converted into one by |
89 | bind mounting the path onto itself. | |
0ac6f900 | 90 | .IP \- |
d4b2104a MK |
91 | The propagation type of the parent mount of |
92 | .IR new_root | |
93 | and the parent mount of the current root directory must not be | |
a39e880f MK |
94 | .BR MS_SHARED ; |
95 | similarly, if | |
96 | .I put_old | |
97 | is an existing mount point, its propagation type must not be | |
1a0b1fd7 | 98 | .BR MS_SHARED . |
9d33e03b MK |
99 | These restrictions ensure that |
100 | .BR pivot_root () | |
101 | never propagates any changes to another mount namespace. | |
eb9078a7 MK |
102 | .IP \- |
103 | The current root directory must be a mount point. | |
47297adb | 104 | .SH RETURN VALUE |
c13182ef MK |
105 | On success, zero is returned. |
106 | On error, \-1 is returned, and | |
fea681da MK |
107 | \fIerrno\fP is set appropriately. |
108 | .SH ERRORS | |
60a90ecd | 109 | .BR pivot_root () |
5f5751d3 | 110 | may fail with any of the same errors as |
60a90ecd | 111 | .BR stat (2). |
5f5751d3 | 112 | Additionally, it may fail with the following errors: |
fea681da MK |
113 | .TP |
114 | .B EBUSY | |
b647c4c9 MK |
115 | .\" Reconfirmed that the following error occurs on Linux 5.0 by |
116 | .\" specifying 'new_root' as "/rootfs" and 'put_old' as | |
117 | .\" "/rootfs/oldrootfs", and *not* bind mounting "/rootfs" on top of | |
118 | .\" itself. Of course, this is an odd situation, since a later check | |
119 | .\" in the kernel code will in any case yield EINVAL if 'new_root' is | |
120 | .\" not a mount point. However, when the system call was first added, | |
bf421740 | 121 | .\" 'new_root' was not required to be a mount point. So, this |
b647c4c9 MK |
122 | .\" error is nowadays probably just the result of crufty accumulation. |
123 | .\" This error can also occur if we bind mount "/" on top of itself | |
124 | .\" and try to specify "/" as the 'new' (again, an odd situation). So, | |
125 | .\" the EBUSY check in the kernel does still seem necessary to prevent | |
126 | .\" that case. Furthermore, the "or put_old" piece is probably | |
127 | .\" redundant text (although the check is in the kernel), since, | |
128 | .\" in another check, 'put_old' is required to be under 'new_root'. | |
129 | .I new_root | |
130 | or | |
131 | .I put_old | |
132 | is on the current root filesystem. | |
133 | (This error covers the pathological case where | |
134 | .I new_root | |
135 | is | |
136 | .IR """/""" .) | |
fea681da MK |
137 | .TP |
138 | .B EINVAL | |
37704bfc MK |
139 | .I new_root |
140 | is not a mount point. | |
141 | .TP | |
142 | .B EINVAL | |
fea681da MK |
143 | \fIput_old\fP is not underneath \fInew_root\fP. |
144 | .TP | |
dc9b6c92 | 145 | .B EINVAL |
eb9078a7 MK |
146 | The current root directory is not a mount point |
147 | (because of an earlier | |
148 | .BR chroot (2)). | |
149 | .TP | |
150 | .B EINVAL | |
0c2329cd | 151 | The current root is on the rootfs (initial ramfs) filesystem; see NOTES. |
dc9b6c92 | 152 | .TP |
1a0b1fd7 MK |
153 | .B EINVAL |
154 | Either the mount point at | |
155 | .IR new_root , | |
156 | or the parent mount of that mount point, | |
157 | has propagation type | |
158 | .BR MS_SHARED . | |
159 | .TP | |
a39e880f MK |
160 | .B EINVAL |
161 | .I put_old | |
162 | is a mount point and has the propagation type | |
163 | .BR MS_SHARED . | |
164 | .TP | |
fea681da MK |
165 | .B ENOTDIR |
166 | \fInew_root\fP or \fIput_old\fP is not a directory. | |
167 | .TP | |
168 | .B EPERM | |
edd1fa35 | 169 | The calling process does not have the |
fea681da MK |
170 | .B CAP_SYS_ADMIN |
171 | capability. | |
a1d5f77c MK |
172 | .SH VERSIONS |
173 | .BR pivot_root () | |
174 | was introduced in Linux 2.3.41. | |
47297adb | 175 | .SH CONFORMING TO |
a1d5f77c | 176 | .BR pivot_root () |
8382f16d | 177 | is Linux-specific and hence is not portable. |
f5b03186 MK |
178 | .SH NOTES |
179 | Glibc does not provide a wrapper for this system call; call it using | |
180 | .BR syscall (2). | |
82320f42 | 181 | .PP |
14caaed2 MK |
182 | A command-line interface for this system call is provided by |
183 | .BR pivot_root (8). | |
184 | .PP | |
422e36b7 MK |
185 | .BR pivot_root () |
186 | allows the caller to switch to a new root filesystem while at the same time | |
187 | placing the old root mount at a location under | |
188 | .I new_root | |
189 | from where it can subsequently be unmounted. | |
190 | (The fact that it moves all processes that have a root directory | |
b27d444f MK |
191 | or current working directory on the old root directory to the |
192 | new root frees the old root directory of users, | |
33313a26 | 193 | allowing the old root mount to be unmounted more easily.) |
c4bf3333 | 194 | .PP |
422e36b7 MK |
195 | A typical use of |
196 | .BR pivot_root () | |
197 | is during system startup, when the | |
198 | system mounts a temporary root filesystem (e.g., an \fBinitrd\fP), then | |
199 | mounts the real root filesystem, and eventually turns the latter into | |
200 | the current root of all relevant processes or threads. | |
201 | A modern use is to set up a root filesystem during | |
202 | the creation of a container. | |
203 | .PP | |
fc2f474d MK |
204 | The fact that |
205 | .BR pivot_root () | |
206 | modifies process root and current working directories in the | |
207 | manner noted in DESCRIPTION | |
208 | is necessary in order to prevent kernel threads from keeping the old | |
209 | root directory busy with their root and current working directory, | |
210 | even if they never access | |
211 | the filesystem in any way. | |
fc2f474d | 212 | .PP |
57bab66a MK |
213 | .I new_root |
214 | and | |
215 | .I put_old | |
216 | may be the same directory. | |
217 | In particular, the following sequence allows a pivot-root operation | |
218 | without needing to create and remove a temporary directory: | |
219 | .PP | |
220 | .in +4n | |
221 | .EX | |
222 | chdir(new_root); | |
223 | pivot_root(".", "."); | |
224 | umount2(".", MNT_DETACH); | |
225 | .EE | |
226 | .in | |
227 | .PP | |
228 | This sequence succeeds because the | |
229 | .BR pivot_root () | |
230 | call stacks the old root mount point | |
57bab66a MK |
231 | on top of the new root mount point at |
232 | .IR / . | |
233 | At that point, the calling process's root directory and current | |
234 | working directory refer to the new root mount point | |
235 | .RI ( new_root ). | |
236 | During the subsequent | |
237 | .BR umount () | |
238 | call, resolution of | |
239 | .IR """.""" | |
240 | starts with | |
241 | .I new_root | |
242 | and then moves up the list of mounts stacked at | |
243 | .IR / , | |
8f2a9129 | 244 | with the result that old root mount point is unmounted. |
57bab66a | 245 | .PP |
82320f42 EK |
246 | The rootfs (initial ramfs) cannot be |
247 | .BR pivot_root ()ed. | |
52fc743c MK |
248 | The recommended method of changing the root filesystem in this case is |
249 | to delete everything in rootfs, overmount rootfs with the new root, attach | |
82320f42 EK |
250 | .IR stdin / stdout / stderr |
251 | to the new | |
252 | .IR /dev/console , | |
52fc743c MK |
253 | and exec the new |
254 | .BR init (1). | |
255 | Helper programs for this process exist; see | |
82320f42 | 256 | .BR switch_root (8). |
2f2e1a22 | 257 | .SH EXAMPLE |
47b69a37 MK |
258 | .\" FIXME |
259 | .\" Would it be better, because simpler, to use unshare(2) | |
260 | .\" rather than clone(2) in the example below? | |
2f2e1a22 MK |
261 | .PP |
262 | The program below demonstrates the use of | |
263 | .BR pivot_root () | |
264 | inside a mount namespace that is created using | |
265 | .BR clone (2). | |
266 | After pivoting to the root directory named in the program's | |
267 | first command-line argument, the child created by | |
268 | .BR clone (2) | |
269 | then executes the program named in the remaining command-line arguments. | |
270 | .PP | |
271 | We demonstrate the program by creating a directory that will serve as | |
272 | the new root filesystem and placing a copy of the (statically linked) | |
273 | .BR busybox (1) | |
274 | executable in that directory. | |
275 | .PP | |
276 | .in +4n | |
277 | .EX | |
278 | $ \fBmkdir /tmp/rootfs\fP | |
279 | $ \fBls \-id /tmp/rootfs\fP # Show inode number of new root directory | |
280 | 319459 /tmp/rootfs | |
281 | $ \fBcp $(which busybox) /tmp/rootfs\fP | |
282 | $ \fBPS1='bbsh$ ' sudo ./pivot_root_demo /tmp/rootfs /busybox sh\fP | |
283 | bbsh$ \fBPATH=/\fP | |
284 | bbsh$ \fBbusybox ln busybox ln\fP | |
285 | bbsh$ \fBln busybox echo\fP | |
286 | bbsh$ \fBln busybox ls\fP | |
287 | bbsh$ \fBls\fP | |
288 | busybox echo ln ls | |
289 | bbsh$ \fBls \-id /\fP # Compare with inode number above | |
290 | 319459 / | |
291 | bbsh$ \fBecho \(aqhello world\(aq\fP | |
292 | hello world | |
293 | .EE | |
294 | .in | |
295 | .SS Program source | |
296 | \& | |
297 | .PP | |
298 | .EX | |
299 | /* pivot_root_demo.c */ | |
300 | ||
301 | #define _GNU_SOURCE | |
302 | #include <sched.h> | |
303 | #include <stdio.h> | |
304 | #include <stdlib.h> | |
305 | #include <unistd.h> | |
306 | #include <sys/wait.h> | |
307 | #include <sys/syscall.h> | |
308 | #include <sys/mount.h> | |
309 | #include <sys/stat.h> | |
310 | #include <limits.h> | |
311 | ||
312 | #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e | |
313 | } while (0) | |
314 | ||
315 | static int | |
316 | pivot_root(const char *new_root, const char *put_old) | |
317 | { | |
318 | return syscall(SYS_pivot_root, new_root, put_old); | |
319 | } | |
320 | ||
321 | #define STACK_SIZE (1024 * 1024) | |
322 | ||
323 | static int /* Startup function for cloned child */ | |
324 | child(void *arg) | |
325 | { | |
326 | char **args = arg; | |
327 | char *new_root = args[0]; | |
328 | const char *put_old = "/oldrootfs"; | |
329 | char path[PATH_MAX]; | |
330 | ||
331 | /* Ensure that \(aqnew_root\(aq and its parent mount don\(aqt have | |
332 | shared propagation (which would cause pivot_root() to | |
333 | return an error), and prevent propagation of mount | |
334 | events to the initial mount namespace */ | |
335 | ||
336 | if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL) == 1) | |
337 | errExit("mount\-MS_PRIVATE"); | |
338 | ||
339 | /* Ensure that \(aqnew_root\(aq is a mount point */ | |
340 | ||
341 | if (mount(new_root, new_root, NULL, MS_BIND, NULL) == \-1) | |
342 | errExit("mount\-MS_BIND"); | |
343 | ||
344 | /* Create directory to which old root will be pivoted */ | |
345 | ||
346 | snprintf(path, sizeof(path), "%s/%s", new_root, put_old); | |
347 | if (mkdir(path, 0777) == \-1) | |
348 | errExit("mkdir"); | |
349 | ||
350 | /* And pivot the root filesystem */ | |
351 | ||
352 | if (pivot_root(new_root, path) == \-1) | |
353 | errExit("pivot_root"); | |
354 | ||
bf421740 | 355 | /* Switch the current working directory to "/" */ |
2f2e1a22 MK |
356 | |
357 | if (chdir("/") == \-1) | |
358 | errExit("chdir"); | |
359 | ||
360 | /* Unmount old root and remove mount point */ | |
361 | ||
362 | if (umount2(put_old, MNT_DETACH) == \-1) | |
363 | perror("umount2"); | |
364 | if (rmdir(put_old) == \-1) | |
365 | perror("rmdir"); | |
366 | ||
367 | /* Execute the command specified in argv[1]... */ | |
368 | ||
369 | execv(args[1], &args[1]); | |
370 | errExit("execv"); | |
371 | } | |
372 | ||
373 | int | |
374 | main(int argc, char *argv[]) | |
375 | { | |
376 | /* Create a child process in a new mount namespace */ | |
377 | ||
378 | char *stack = malloc(STACK_SIZE); | |
379 | if (stack == NULL) | |
380 | errExit("malloc"); | |
381 | ||
382 | if (clone(child, stack + STACK_SIZE, | |
383 | CLONE_NEWNS | SIGCHLD, &argv[1]) == \-1) | |
384 | errExit("clone"); | |
385 | ||
386 | /* Parent falls through to here; wait for child */ | |
387 | ||
388 | if (wait(NULL) == \-1) | |
389 | errExit("wait"); | |
390 | ||
391 | exit(EXIT_SUCCESS); | |
392 | } | |
393 | .EE | |
47297adb | 394 | .SH SEE ALSO |
fea681da MK |
395 | .BR chdir (2), |
396 | .BR chroot (2), | |
34a0f19c | 397 | .BR mount (2), |
fea681da MK |
398 | .BR stat (2), |
399 | .BR initrd (4), | |
f42778c4 | 400 | .BR mount_namespaces (7), |
b2bced6d MK |
401 | .BR pivot_root (8), |
402 | .BR switch_root (8) |