]>
Commit | Line | Data |
---|---|---|
fea681da | 1 | .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992 |
8c7b566c | 2 | .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013 |
2297bf0e | 3 | .\" |
fd0fc519 | 4 | .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE) |
fea681da | 5 | .\" May be distributed under the GNU General Public License. |
fd0fc519 | 6 | .\" %%%LICENSE_END |
dccaff1e | 7 | .\" |
fea681da MK |
8 | .\" Modified by Michael Haardt <michael@moria.de> |
9 | .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu> | |
10 | .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>: | |
11 | .\" New man page (copied from 'fork.2'). | |
12 | .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl> | |
13 | .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr> | |
14 | .\" Modified 26 Jun 2001 by Michael Kerrisk | |
15 | .\" Mostly upgraded to 2.4.x | |
16 | .\" Added prototype for sys_clone() plus description | |
17 | .\" Added CLONE_THREAD with a brief description of thread groups | |
c13182ef | 18 | .\" Added CLONE_PARENT and revised entire page remove ambiguity |
fea681da MK |
19 | .\" between "calling process" and "parent process" |
20 | .\" Added CLONE_PTRACE and CLONE_VFORK | |
21 | .\" Added EPERM and EINVAL error codes | |
fd8a5be4 | 22 | .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>) |
fea681da | 23 | .\" various other minor tidy ups and clarifications. |
c11b1abf | 24 | .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com> |
d9bfdb9c | 25 | .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD |
c11b1abf | 26 | .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com> |
fea681da MK |
27 | .\" Added description for CLONE_NEWNS, which was added in 2.4.19 |
28 | .\" Slightly rephrased, aeb. | |
29 | .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb. | |
30 | .\" Modified 1 Jan 2004 - various updates, aeb | |
0967c11f | 31 | .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb. |
d9bfdb9c | 32 | .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid() |
31830ef0 | 33 | .\" wrapper under BUGS. |
fd8a5be4 MK |
34 | .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED. |
35 | .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD. | |
4e836144 | 36 | .\" 2008-11-18, mtk, order CLONE_* flags alphabetically |
82ee147a | 37 | .\" 2008-11-18, mtk, document CLONE_NEWPID |
43ce9dda | 38 | .\" 2008-11-19, mtk, document CLONE_NEWUTS |
667417b3 | 39 | .\" 2008-11-19, mtk, document CLONE_NEWIPC |
cfdc761b | 40 | .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO |
fea681da | 41 | .\" |
185341d4 MK |
42 | .\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23 |
43 | .\" (also supported for unshare()?) | |
360ed6b3 | 44 | .\" |
e585064b | 45 | .TH CLONE 2 2013-04-16 "Linux" "Linux Programmer's Manual" |
fea681da | 46 | .SH NAME |
9b0e0996 | 47 | clone, __clone2 \- create a child process |
fea681da | 48 | .SH SYNOPSIS |
c10859eb | 49 | .nf |
81f10dad MK |
50 | /* Prototype for the glibc wrapper function */ |
51 | ||
fea681da | 52 | .B #include <sched.h> |
c10859eb | 53 | |
ff929e3b MK |
54 | .BI "int clone(int (*" "fn" ")(void *), void *" child_stack , |
55 | .BI " int " flags ", void *" "arg" ", ... " | |
d3dbc9b1 | 56 | .BI " /* pid_t *" ptid ", struct user_desc *" tls \ |
ff929e3b | 57 | ", pid_t *" ctid " */ );" |
81f10dad | 58 | |
e585064b | 59 | /* Prototype for the raw system call */ |
81f10dad MK |
60 | |
61 | .BI "long clone(unsigned long " flags ", void *" child_stack , | |
62 | .BI " void *" ptid ", void *" ctid , | |
63 | .BI " struct pt_regs *" regs ); | |
c10859eb | 64 | .fi |
e73b3103 MK |
65 | .sp |
66 | .in -4n | |
81f10dad | 67 | Feature Test Macro Requirements for glibc wrapper function (see |
e73b3103 MK |
68 | .BR feature_test_macros (7)): |
69 | .in | |
70 | .sp | |
71 | .BR clone (): | |
72 | .ad l | |
73 | .RS 4 | |
74 | .PD 0 | |
75 | .TP 4 | |
76 | Since glibc 2.14: | |
77 | _GNU_SOURCE | |
78 | .TP 4 | |
79 | .\" FIXME See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749 | |
80 | Before glibc 2.14: | |
81 | _BSD_SOURCE || _SVID_SOURCE | |
82 | /* _GNU_SOURCE also suffices */ | |
83 | .PD | |
84 | .RE | |
85 | .ad b | |
fea681da | 86 | .SH DESCRIPTION |
edcc65ff MK |
87 | .BR clone () |
88 | creates a new process, in a manner similar to | |
fea681da | 89 | .BR fork (2). |
81f10dad MK |
90 | |
91 | This page describes both the glibc | |
e511ffb6 | 92 | .BR clone () |
e585064b | 93 | wrapper function and the underlying system call on which it is based. |
81f10dad | 94 | The main text describes the wrapper function; |
e585064b | 95 | the differences for the raw system call |
81f10dad | 96 | are described toward the end of this page. |
fea681da MK |
97 | |
98 | Unlike | |
99 | .BR fork (2), | |
81f10dad MK |
100 | .BR clone () |
101 | allows the child process to share parts of its execution context with | |
fea681da | 102 | the calling process, such as the memory space, the table of file |
c13182ef MK |
103 | descriptors, and the table of signal handlers. |
104 | (Note that on this manual | |
105 | page, "calling process" normally corresponds to "parent process". | |
106 | But see the description of | |
107 | .B CLONE_PARENT | |
fea681da MK |
108 | below.) |
109 | ||
110 | The main use of | |
edcc65ff | 111 | .BR clone () |
fea681da MK |
112 | is to implement threads: multiple threads of control in a program that |
113 | run concurrently in a shared memory space. | |
114 | ||
115 | When the child process is created with | |
c13182ef | 116 | .BR clone (), |
fea681da | 117 | it executes the function |
c13182ef | 118 | .IR fn ( arg ). |
fea681da | 119 | (This differs from |
c13182ef | 120 | .BR fork (2), |
fea681da | 121 | where execution continues in the child from the point |
c13182ef MK |
122 | of the |
123 | .BR fork (2) | |
fea681da MK |
124 | call.) |
125 | The | |
126 | .I fn | |
127 | argument is a pointer to a function that is called by the child | |
128 | process at the beginning of its execution. | |
129 | The | |
130 | .I arg | |
131 | argument is passed to the | |
132 | .I fn | |
133 | function. | |
134 | ||
c13182ef | 135 | When the |
fea681da | 136 | .IR fn ( arg ) |
c13182ef MK |
137 | function application returns, the child process terminates. |
138 | The integer returned by | |
fea681da | 139 | .I fn |
c13182ef MK |
140 | is the exit code for the child process. |
141 | The child process may also terminate explicitly by calling | |
fea681da MK |
142 | .BR exit (2) |
143 | or after receiving a fatal signal. | |
144 | ||
145 | The | |
146 | .I child_stack | |
c13182ef MK |
147 | argument specifies the location of the stack used by the child process. |
148 | Since the child and calling process may share memory, | |
fea681da | 149 | it is not possible for the child process to execute in the |
c13182ef MK |
150 | same stack as the calling process. |
151 | The calling process must therefore | |
fea681da MK |
152 | set up memory space for the child stack and pass a pointer to this |
153 | space to | |
edcc65ff | 154 | .BR clone (). |
5fab2e7c | 155 | Stacks grow downward on all processors that run Linux |
fea681da MK |
156 | (except the HP PA processors), so |
157 | .I child_stack | |
158 | usually points to the topmost address of the memory space set up for | |
159 | the child stack. | |
160 | ||
161 | The low byte of | |
162 | .I flags | |
fd8a5be4 MK |
163 | contains the number of the |
164 | .I "termination signal" | |
165 | sent to the parent when the child dies. | |
166 | If this signal is specified as anything other than | |
fea681da MK |
167 | .BR SIGCHLD , |
168 | then the parent process must specify the | |
c13182ef MK |
169 | .B __WALL |
170 | or | |
fea681da | 171 | .B __WCLONE |
c13182ef MK |
172 | options when waiting for the child with |
173 | .BR wait (2). | |
fea681da MK |
174 | If no signal is specified, then the parent process is not signaled |
175 | when the child terminates. | |
176 | ||
177 | .I flags | |
fd8a5be4 MK |
178 | may also be bitwise-or'ed with zero or more of the following constants, |
179 | in order to specify what is shared between the calling process | |
fea681da | 180 | and the child process: |
fea681da | 181 | .TP |
f5dbc7c8 MK |
182 | .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)" |
183 | Erase child thread ID at location | |
d3dbc9b1 | 184 | .I ctid |
f5dbc7c8 MK |
185 | in child memory when the child exits, and do a wakeup on the futex |
186 | at that address. | |
187 | The address involved may be changed by the | |
188 | .BR set_tid_address (2) | |
189 | system call. | |
190 | This is used by threading libraries. | |
191 | .TP | |
192 | .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)" | |
193 | Store child thread ID at location | |
d3dbc9b1 | 194 | .I ctid |
f5dbc7c8 MK |
195 | in child memory. |
196 | .TP | |
1603d6a1 | 197 | .BR CLONE_FILES " (since Linux 2.0)" |
fea681da | 198 | If |
f5dbc7c8 MK |
199 | .B CLONE_FILES |
200 | is set, the calling process and the child process share the same file | |
201 | descriptor table. | |
202 | Any file descriptor created by the calling process or by the child | |
203 | process is also valid in the other process. | |
204 | Similarly, if one of the processes closes a file descriptor, | |
205 | or changes its associated flags (using the | |
206 | .BR fcntl (2) | |
207 | .B F_SETFD | |
208 | operation), the other process is also affected. | |
fea681da MK |
209 | |
210 | If | |
f5dbc7c8 MK |
211 | .B CLONE_FILES |
212 | is not set, the child process inherits a copy of all file descriptors | |
213 | opened in the calling process at the time of | |
214 | .BR clone (). | |
215 | (The duplicated file descriptors in the child refer to the | |
216 | same open file descriptions (see | |
217 | .BR open (2)) | |
218 | as the corresponding file descriptors in the calling process.) | |
219 | Subsequent operations that open or close file descriptors, | |
220 | or change file descriptor flags, | |
221 | performed by either the calling | |
222 | process or the child process do not affect the other process. | |
fea681da | 223 | .TP |
1603d6a1 | 224 | .BR CLONE_FS " (since Linux 2.0)" |
fea681da MK |
225 | If |
226 | .B CLONE_FS | |
314c8ff4 | 227 | is set, the caller and the child process share the same file system |
c13182ef MK |
228 | information. |
229 | This includes the root of the file system, the current | |
230 | working directory, and the umask. | |
231 | Any call to | |
fea681da MK |
232 | .BR chroot (2), |
233 | .BR chdir (2), | |
234 | or | |
235 | .BR umask (2) | |
edcc65ff | 236 | performed by the calling process or the child process also affects the |
fea681da MK |
237 | other process. |
238 | ||
c13182ef | 239 | If |
fea681da MK |
240 | .B CLONE_FS |
241 | is not set, the child process works on a copy of the file system | |
242 | information of the calling process at the time of the | |
edcc65ff | 243 | .BR clone () |
fea681da MK |
244 | call. |
245 | Calls to | |
246 | .BR chroot (2), | |
247 | .BR chdir (2), | |
248 | .BR umask (2) | |
249 | performed later by one of the processes do not affect the other process. | |
fea681da | 250 | .TP |
a4cc375e | 251 | .BR CLONE_IO " (since Linux 2.6.25)" |
11f27a1c JA |
252 | If |
253 | .B CLONE_IO | |
254 | is set, then the new process shares an I/O context with | |
255 | the calling process. | |
256 | If this flag is not set, then (as with | |
257 | .BR fork (2)) | |
258 | the new process has its own I/O context. | |
259 | ||
260 | .\" The following based on text from Jens Axboe | |
a113945f | 261 | The I/O context is the I/O scope of the disk scheduler (i.e, |
11f27a1c JA |
262 | what the I/O scheduler uses to model scheduling of a process's I/O). |
263 | If processes share the same I/O context, | |
264 | they are treated as one by the I/O scheduler. | |
265 | As a consequence, they get to share disk time. | |
266 | For some I/O schedulers, | |
267 | .\" the anticipatory and CFQ scheduler | |
268 | if two processes share an I/O context, | |
269 | they will be allowed to interleave their disk access. | |
270 | If several threads are doing I/O on behalf of the same process | |
271 | .RB ( aio_read (3), | |
272 | for instance), they should employ | |
273 | .BR CLONE_IO | |
274 | to get better I/O performance. | |
275 | .\" with CFQ and AS. | |
276 | ||
277 | If the kernel is not configured with the | |
278 | .B CONFIG_BLOCK | |
279 | option, this flag is a no-op. | |
280 | .TP | |
8722311b | 281 | .BR CLONE_NEWIPC " (since Linux 2.6.19)" |
667417b3 MK |
282 | If |
283 | .B CLONE_NEWIPC | |
284 | is set, then create the process in a new IPC namespace. | |
285 | If this flag is not set, then (as with | |
286 | .BR fork (2)), | |
287 | the process is created in the same IPC namespace as | |
288 | the calling process. | |
0236bea9 | 289 | This flag is intended for the implementation of containers. |
667417b3 | 290 | |
009a049e MK |
291 | An IPC namespace provides an isolated view of System V IPC objects (see |
292 | .BR svipc (7)) | |
293 | and (since Linux 2.6.30) | |
294 | .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f | |
295 | .\" https://lwn.net/Articles/312232/ | |
296 | POSIX message queues | |
297 | (see | |
298 | .BR mq_overview (7)). | |
19911fa5 MK |
299 | The common characteristic of these IPC mechanisms is that IPC |
300 | objects are identified by mechanisms other than filesystem | |
301 | pathnames. | |
009a049e | 302 | |
c440fe01 | 303 | Objects created in an IPC namespace are visible to all other processes |
667417b3 MK |
304 | that are members of that namespace, |
305 | but are not visible to processes in other IPC namespaces. | |
306 | ||
83c1f4b5 | 307 | When an IPC namespace is destroyed |
009a049e | 308 | (i.e., when the last process that is a member of the namespace terminates), |
83c1f4b5 MK |
309 | all IPC objects in the namespace are automatically destroyed. |
310 | ||
667417b3 MK |
311 | Use of this flag requires: a kernel configured with the |
312 | .B CONFIG_SYSVIPC | |
313 | and | |
314 | .B CONFIG_IPC_NS | |
c8e18bd1 | 315 | options and that the process be privileged |
667417b3 MK |
316 | .RB ( CAP_SYS_ADMIN ). |
317 | This flag can't be specified in conjunction with | |
318 | .BR CLONE_SYSVSEM . | |
319 | .TP | |
163bf178 | 320 | .BR CLONE_NEWNET " (since Linux 2.6.24)" |
b9145b2c | 321 | .\" FIXME Check when the implementation was completed |
33a0ccb2 | 322 | (The implementation of this flag was completed only |
9108d867 | 323 | by about kernel version 2.6.29.) |
163bf178 MK |
324 | |
325 | If | |
326 | .B CLONE_NEWNET | |
327 | is set, then create the process in a new network namespace. | |
328 | If this flag is not set, then (as with | |
329 | .BR fork (2)), | |
330 | the process is created in the same network namespace as | |
331 | the calling process. | |
332 | This flag is intended for the implementation of containers. | |
333 | ||
334 | A network namespace provides an isolated view of the networking stack | |
335 | (network device interfaces, IPv4 and IPv6 protocol stacks, | |
336 | IP routing tables, firewall rules, the | |
337 | .I /proc/net | |
338 | and | |
339 | .I /sys/class/net | |
340 | directory trees, sockets, etc.). | |
341 | A physical network device can live in exactly one | |
342 | network namespace. | |
343 | A virtual network device ("veth") pair provides a pipe-like abstraction | |
1a95a1be | 344 | .\" FIXME Add pointer to veth(4) page when it is eventually completed |
163bf178 MK |
345 | that can be used to create tunnels between network namespaces, |
346 | and can be used to create a bridge to a physical network device | |
347 | in another namespace. | |
348 | ||
bf032425 SH |
349 | When a network namespace is freed |
350 | (i.e., when the last process in the namespace terminates), | |
351 | its physical network devices are moved back to the | |
352 | initial network namespace (not to the parent of the process). | |
353 | ||
163bf178 MK |
354 | Use of this flag requires: a kernel configured with the |
355 | .B CONFIG_NET_NS | |
356 | option and that the process be privileged | |
cae2ec15 | 357 | .RB ( CAP_SYS_ADMIN ). |
163bf178 | 358 | .TP |
c10859eb | 359 | .BR CLONE_NEWNS " (since Linux 2.4.19)" |
732e54dd | 360 | Start the child in a new mount namespace. |
fea681da | 361 | |
732e54dd | 362 | Every process lives in a mount namespace. |
c13182ef | 363 | The |
fea681da MK |
364 | .I namespace |
365 | of a process is the data (the set of mounts) describing the file hierarchy | |
c13182ef MK |
366 | as seen by that process. |
367 | After a | |
fea681da MK |
368 | .BR fork (2) |
369 | or | |
2777b1ca | 370 | .BR clone () |
fea681da MK |
371 | where the |
372 | .B CLONE_NEWNS | |
732e54dd | 373 | flag is not set, the child lives in the same mount |
4df2eb09 | 374 | namespace as the parent. |
fea681da MK |
375 | The system calls |
376 | .BR mount (2) | |
377 | and | |
378 | .BR umount (2) | |
732e54dd | 379 | change the mount namespace of the calling process, and hence affect |
fea681da | 380 | all processes that live in the same namespace, but do not affect |
732e54dd | 381 | processes in a different mount namespace. |
fea681da MK |
382 | |
383 | After a | |
2777b1ca | 384 | .BR clone () |
fea681da MK |
385 | where the |
386 | .B CLONE_NEWNS | |
732e54dd | 387 | flag is set, the cloned child is started in a new mount namespace, |
fea681da MK |
388 | initialized with a copy of the namespace of the parent. |
389 | ||
0b9bdf82 | 390 | Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability) |
fea681da MK |
391 | may specify the |
392 | .B CLONE_NEWNS | |
393 | flag. | |
394 | It is not permitted to specify both | |
395 | .B CLONE_NEWNS | |
396 | and | |
397 | .B CLONE_FS | |
398 | in the same | |
e511ffb6 | 399 | .BR clone () |
fea681da | 400 | call. |
fea681da | 401 | .TP |
82ee147a MK |
402 | .BR CLONE_NEWPID " (since Linux 2.6.24)" |
403 | .\" This explanation draws a lot of details from | |
404 | .\" http://lwn.net/Articles/259217/ | |
405 | .\" Authors: Pavel Emelyanov <xemul@openvz.org> | |
406 | .\" and Kir Kolyshkin <kir@openvz.org> | |
407 | .\" | |
408 | .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264 | |
409 | .\" Author: Pavel Emelyanov <xemul@openvz.org> | |
410 | If | |
5c95e5e8 | 411 | .B CLONE_NEWPID |
82ee147a MK |
412 | is set, then create the process in a new PID namespace. |
413 | If this flag is not set, then (as with | |
414 | .BR fork (2)), | |
415 | the process is created in the same PID namespace as | |
416 | the calling process. | |
0236bea9 | 417 | This flag is intended for the implementation of containers. |
82ee147a MK |
418 | |
419 | A PID namespace provides an isolated environment for PIDs: | |
420 | PIDs in a new namespace start at 1, | |
421 | somewhat like a standalone system, and calls to | |
422 | .BR fork (2), | |
423 | .BR vfork (2), | |
424 | or | |
27d47e71 | 425 | .BR clone () |
5584229c | 426 | will produce processes with PIDs that are unique within the namespace. |
82ee147a MK |
427 | |
428 | The first process created in a new namespace | |
429 | (i.e., the process created using the | |
430 | .BR CLONE_NEWPID | |
431 | flag) has the PID 1, and is the "init" process for the namespace. | |
432 | Children that are orphaned within the namespace will be reparented | |
433 | to this process rather than | |
434 | .BR init (8). | |
435 | Unlike the traditional | |
436 | .B init | |
437 | process, the "init" process of a PID namespace can terminate, | |
438 | and if it does, all of the processes in the namespace are terminated. | |
439 | ||
440 | PID namespaces form a hierarchy. | |