]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
clone.2: Change types for 'ptid' and 'ctid' in syscall prototypes
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
fea681da 1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
8c7b566c 2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
2297bf0e 3.\"
fd0fc519 4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
fea681da 5.\" May be distributed under the GNU General Public License.
fd0fc519 6.\" %%%LICENSE_END
dccaff1e 7.\"
fea681da
MK
8.\" Modified by Michael Haardt <michael@moria.de>
9.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11.\" New man page (copied from 'fork.2').
12.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14.\" Modified 26 Jun 2001 by Michael Kerrisk
15.\" Mostly upgraded to 2.4.x
16.\" Added prototype for sys_clone() plus description
17.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 18.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
19.\" between "calling process" and "parent process"
20.\" Added CLONE_PTRACE and CLONE_VFORK
21.\" Added EPERM and EINVAL error codes
fd8a5be4 22.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 23.\" various other minor tidy ups and clarifications.
c11b1abf 24.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 25.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 26.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
fea681da
MK
27.\" Added description for CLONE_NEWNS, which was added in 2.4.19
28.\" Slightly rephrased, aeb.
29.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 31.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 32.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 33.\" wrapper under BUGS.
fd8a5be4
MK
34.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 36.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 37.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 38.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 39.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 40.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 41.\"
3df541c0 42.TH CLONE 2 2016-07-17 "Linux" "Linux Programmer's Manual"
fea681da 43.SH NAME
9b0e0996 44clone, __clone2 \- create a child process
fea681da 45.SH SYNOPSIS
c10859eb 46.nf
81f10dad
MK
47/* Prototype for the glibc wrapper function */
48
4f71ba5d 49.B #define _GNU_SOURCE
fea681da 50.B #include <sched.h>
c10859eb 51
ff929e3b
MK
52.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53.BI " int " flags ", void *" "arg" ", ... "
dd6d3d2e 54.BI " /* pid_t *" ptid ", void *" newtls \
ff929e3b 55", pid_t *" ctid " */ );"
81f10dad 56
e585064b 57/* Prototype for the raw system call */
81f10dad
MK
58
59.BI "long clone(unsigned long " flags ", void *" child_stack ,
fda55470 60.BI " int *" ptid ", int *" ctid ,
dd6d3d2e 61.BI " unsigned long " newtls );
c10859eb 62.fi
fea681da 63.SH DESCRIPTION
edcc65ff
MK
64.BR clone ()
65creates a new process, in a manner similar to
fea681da 66.BR fork (2).
81f10dad
MK
67
68This page describes both the glibc
e511ffb6 69.BR clone ()
e585064b 70wrapper function and the underlying system call on which it is based.
81f10dad 71The main text describes the wrapper function;
e585064b 72the differences for the raw system call
81f10dad 73are described toward the end of this page.
fea681da
MK
74
75Unlike
76.BR fork (2),
81f10dad
MK
77.BR clone ()
78allows the child process to share parts of its execution context with
fea681da 79the calling process, such as the memory space, the table of file
c13182ef
MK
80descriptors, and the table of signal handlers.
81(Note that on this manual
82page, "calling process" normally corresponds to "parent process".
83But see the description of
84.B CLONE_PARENT
fea681da
MK
85below.)
86
1533d242 87One use of
edcc65ff 88.BR clone ()
fea681da
MK
89is to implement threads: multiple threads of control in a program that
90run concurrently in a shared memory space.
91
92When the child process is created with
c13182ef 93.BR clone (),
fea681da 94it executes the function
c13182ef 95.IR fn ( arg ).
fea681da 96(This differs from
c13182ef 97.BR fork (2),
fea681da 98where execution continues in the child from the point
c13182ef
MK
99of the
100.BR fork (2)
fea681da
MK
101call.)
102The
103.I fn
104argument is a pointer to a function that is called by the child
105process at the beginning of its execution.
106The
107.I arg
108argument is passed to the
109.I fn
110function.
111
c13182ef 112When the
fea681da 113.IR fn ( arg )
c13182ef
MK
114function application returns, the child process terminates.
115The integer returned by
fea681da 116.I fn
c13182ef
MK
117is the exit code for the child process.
118The child process may also terminate explicitly by calling
fea681da
MK
119.BR exit (2)
120or after receiving a fatal signal.
121
122The
123.I child_stack
c13182ef
MK
124argument specifies the location of the stack used by the child process.
125Since the child and calling process may share memory,
fea681da 126it is not possible for the child process to execute in the
c13182ef
MK
127same stack as the calling process.
128The calling process must therefore
fea681da
MK
129set up memory space for the child stack and pass a pointer to this
130space to
edcc65ff 131.BR clone ().
5fab2e7c 132Stacks grow downward on all processors that run Linux
fea681da
MK
133(except the HP PA processors), so
134.I child_stack
135usually points to the topmost address of the memory space set up for
136the child stack.
137
138The low byte of
139.I flags
fd8a5be4
MK
140contains the number of the
141.I "termination signal"
142sent to the parent when the child dies.
143If this signal is specified as anything other than
fea681da
MK
144.BR SIGCHLD ,
145then the parent process must specify the
c13182ef
MK
146.B __WALL
147or
fea681da 148.B __WCLONE
c13182ef
MK
149options when waiting for the child with
150.BR wait (2).
fea681da
MK
151If no signal is specified, then the parent process is not signaled
152when the child terminates.
153
154.I flags
fd8a5be4
MK
155may also be bitwise-or'ed with zero or more of the following constants,
156in order to specify what is shared between the calling process
fea681da 157and the child process:
fea681da 158.TP
f5dbc7c8 159.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
8ef021ea 160Erase the child thread ID at the location
d3dbc9b1 161.I ctid
f5dbc7c8
MK
162in child memory when the child exits, and do a wakeup on the futex
163at that address.
164The address involved may be changed by the
165.BR set_tid_address (2)
166system call.
167This is used by threading libraries.
168.TP
169.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
8ef021ea 170Store the child thread ID at the location
d3dbc9b1 171.I ctid
8ef021ea 172in the child's memory.
f5dbc7c8 173.TP
1603d6a1 174.BR CLONE_FILES " (since Linux 2.0)"
fea681da 175If
f5dbc7c8
MK
176.B CLONE_FILES
177is set, the calling process and the child process share the same file
178descriptor table.
179Any file descriptor created by the calling process or by the child
180process is also valid in the other process.
181Similarly, if one of the processes closes a file descriptor,
182or changes its associated flags (using the
183.BR fcntl (2)
184.B F_SETFD
185operation), the other process is also affected.
8a76b19e
KE
186If a process sharing a file descriptor table calls
187.BR execve (2),
188its file descriptor table is duplicated (unshared).
fea681da
MK
189
190If
f5dbc7c8
MK
191.B CLONE_FILES
192is not set, the child process inherits a copy of all file descriptors
193opened in the calling process at the time of
194.BR clone ().
f5dbc7c8
MK
195Subsequent operations that open or close file descriptors,
196or change file descriptor flags,
197performed by either the calling
198process or the child process do not affect the other process.
db8ba2b4
MK
199Note, however,
200that the duplicated file descriptors in the child refer to the same open file
201descriptions as the corresponding file descriptors in the calling process,
202and thus share file offsets and files status flags (see
203.BR open (2)).
fea681da 204.TP
1603d6a1 205.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
206If
207.B CLONE_FS
9ee4a2b6 208is set, the caller and the child process share the same filesystem
c13182ef 209information.
9ee4a2b6 210This includes the root of the filesystem, the current
c13182ef
MK
211working directory, and the umask.
212Any call to
fea681da
MK
213.BR chroot (2),
214.BR chdir (2),
215or
216.BR umask (2)
edcc65ff 217performed by the calling process or the child process also affects the
fea681da
MK
218other process.
219
c13182ef 220If
fea681da 221.B CLONE_FS
9ee4a2b6 222is not set, the child process works on a copy of the filesystem
fea681da 223information of the calling process at the time of the
edcc65ff 224.BR clone ()
fea681da
MK
225call.
226Calls to
227.BR chroot (2),
228.BR chdir (2),
229.BR umask (2)
230performed later by one of the processes do not affect the other process.
fea681da 231.TP
a4cc375e 232.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
233If
234.B CLONE_IO
235is set, then the new process shares an I/O context with
236the calling process.
237If this flag is not set, then (as with
238.BR fork (2))
239the new process has its own I/O context.
240
241.\" The following based on text from Jens Axboe
d1f84ed7 242The I/O context is the I/O scope of the disk scheduler (i.e.,
11f27a1c
JA
243what the I/O scheduler uses to model scheduling of a process's I/O).
244If processes share the same I/O context,
245they are treated as one by the I/O scheduler.
246As a consequence, they get to share disk time.
247For some I/O schedulers,
248.\" the anticipatory and CFQ scheduler
249if two processes share an I/O context,
250they will be allowed to interleave their disk access.
251If several threads are doing I/O on behalf of the same process
252.RB ( aio_read (3),
253for instance), they should employ
254.BR CLONE_IO
255to get better I/O performance.
256.\" with CFQ and AS.
257
258If the kernel is not configured with the
259.B CONFIG_BLOCK
260option, this flag is a no-op.
261.TP
c5af0674
MK
262.BR CLONE_NEWCGROUP " (since Linux 4.6)"
263Create the process in a new cgroup namespace.
264If this flag is not set, then (as with
265.BR fork (2))
266the process is created in the same cgroup namespaces as the calling process.
267This flag is intended for the implementation of containers.
268
269For further information on cgroup namespaces, see
b9fe4bc3 270.BR cgroup_namespaces (7).
c5af0674
MK
271
272Only a privileged process
273.RB ( CAP_SYS_ADMIN )
274can employ
275.BR CLONE_NEWCGROUP .
276.\"
277.TP
8722311b 278.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
279If
280.B CLONE_NEWIPC
281is set, then create the process in a new IPC namespace.
282If this flag is not set, then (as with
06b30458 283.BR fork (2)),
667417b3
MK
284the process is created in the same IPC namespace as
285the calling process.
0236bea9 286This flag is intended for the implementation of containers.
667417b3 287
efbfd7ec 288An IPC namespace provides an isolated view of System\ V IPC objects (see
009a049e
MK
289.BR svipc (7))
290and (since Linux 2.6.30)
291.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292.\" https://lwn.net/Articles/312232/
293POSIX message queues
294(see
295.BR mq_overview (7)).
19911fa5
MK
296The common characteristic of these IPC mechanisms is that IPC
297objects are identified by mechanisms other than filesystem
298pathnames.
009a049e 299
c440fe01 300Objects created in an IPC namespace are visible to all other processes
667417b3
MK
301that are members of that namespace,
302but are not visible to processes in other IPC namespaces.
303
83c1f4b5 304When an IPC namespace is destroyed
009a049e 305(i.e., when the last process that is a member of the namespace terminates),
83c1f4b5
MK
306all IPC objects in the namespace are automatically destroyed.
307
ab5dd83f
MK
308Only a privileged process
309.RB ( CAP_SYS_ADMIN )
310can employ
311.BR CLONE_NEWIPC .
667417b3
MK
312This flag can't be specified in conjunction with
313.BR CLONE_SYSVSEM .
9343f8e7
MK
314
315For further information on IPC namespaces, see
316.BR namespaces (7).
667417b3 317.TP
163bf178 318.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 319(The implementation of this flag was completed only
9108d867 320by about kernel version 2.6.29.)
163bf178
MK
321
322If
323.B CLONE_NEWNET
324is set, then create the process in a new network namespace.
325If this flag is not set, then (as with
57ef8c39 326.BR fork (2))
163bf178
MK
327the process is created in the same network namespace as
328the calling process.
329This flag is intended for the implementation of containers.
330
331A network namespace provides an isolated view of the networking stack
332(network device interfaces, IPv4 and IPv6 protocol stacks,
333IP routing tables, firewall rules, the
334.I /proc/net
335and
336.I /sys/class/net
337directory trees, sockets, etc.).
338A physical network device can live in exactly one
339network namespace.
340A virtual network device ("veth") pair provides a pipe-like abstraction
bea08fec 341.\" FIXME . Add pointer to veth(4) page when it is eventually completed
163bf178
MK
342that can be used to create tunnels between network namespaces,
343and can be used to create a bridge to a physical network device
344in another namespace.
345
bf032425
SH
346When a network namespace is freed
347(i.e., when the last process in the namespace terminates),
348its physical network devices are moved back to the
349initial network namespace (not to the parent of the process).
73680728
MK
350For further information on network namespaces, see
351.BR namespaces (7).
bf032425 352
ab5dd83f
MK
353Only a privileged process
354.RB ( CAP_SYS_ADMIN )
355can employ
356.BR CLONE_NEWNET .
163bf178 357.TP
c10859eb 358.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
359If
360.B CLONE_NEWNS
361is set, the cloned child is started in a new mount namespace,
362initialized with a copy of the namespace of the parent.
363If
fea681da 364.B CLONE_NEWNS
3dd2331c 365is not set, the child lives in the same mount
4df2eb09 366namespace as the parent.
fea681da 367
ab5dd83f
MK
368Only a privileged process
369.RB ( CAP_SYS_ADMIN )
370can employ
371.BR CLONE_NEWNS .
fea681da
MK
372It is not permitted to specify both
373.B CLONE_NEWNS
374and
375.B CLONE_FS
9219d208 376.\" See https://lwn.net/Articles/543273/
fea681da 377in the same
e511ffb6 378.BR clone ()
fea681da 379call.
c212248c
MK
380
381For further information on mount namespaces, see
382.BR namespaces (7)
383and
384.BR mount_namespaces (7).
9d005472
MK
385.TP
386.BR CLONE_NEWPID " (since Linux 2.6.24)"
387.\" This explanation draws a lot of details from
388.\" http://lwn.net/Articles/259217/
389.\" Authors: Pavel Emelyanov <xemul@openvz.org>
390.\" and Kir Kolyshkin <kir@openvz.org>
391.\"
392.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
393.\" Author: Pavel Emelyanov <xemul@openvz.org>
394If
395.B CLONE_NEWPID
396is set, then create the process in a new PID namespace.
397If this flag is not set, then (as with
398.BR fork (2))
399the process is created in the same PID namespace as
400the calling process.
401This flag is intended for the implementation of containers.
402
403For further information on PID namespaces, see
7e0e902b
MK
404.BR namespaces (7)
405and
39b3f005 406.BR pid_namespaces (7).
9d005472 407
ab5dd83f
MK
408Only a privileged process
409.RB ( CAP_SYS_ADMIN )
410can employ
411.BR CLONE_NEWPID .
9d005472 412This flag can't be specified in conjunction with
f0007192
MK
413.BR CLONE_THREAD
414or
415.BR CLONE_PARENT .
70d21f17 416.TP
06b30458
MK
417.BR CLONE_NEWUSER
418(This flag first became meaningful for
419.BR clone ()
4d2b3ed7
MK
420in Linux 2.6.23,
421the current
11a38815 422.BR clone ()
4d2b3ed7
MK
423semantics were merged in Linux 3.5,
424and the final pieces to make the user namespaces completely usable were
425merged in Linux 3.8.)
426
70d21f17
EB
427If
428.B CLONE_NEWUSER
06b30458
MK
429is set, then create the process in a new user namespace.
430If this flag is not set, then (as with
57ef8c39 431.BR fork (2))
70d21f17
EB
432the process is created in the same user namespace as the calling process.
433
9d005472 434For further information on user namespaces, see
f58fb24f
MK
435.BR namespaces (7)
436and
437.BR user_namespaces (7)
06b30458 438
fefbcba8
MK
439Before Linux 3.8, use of
440.BR CLONE_NEWUSER
441required that the caller have three capabilities:
442.BR CAP_SYS_ADMIN ,
443.BR CAP_SETUID ,
444and
445.BR CAP_SETGID .
446.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 447Starting with Linux 3.8,
9d005472 448no privileges are needed to create a user namespace.
f0007192 449
5e72cf7d
MK
450This flag can't be specified in conjunction with
451.BR CLONE_THREAD
452or
453.BR CLONE_PARENT .
454For security reasons,
455.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
456.\" https://lwn.net/Articles/543273/
457.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
458.\" were, for practical purposes, unusable in earlier 3.8.x because of the
ab3311aa 459.\" various filesystems that didn't support userns.
f0007192
MK
460.BR CLONE_NEWUSER
461cannot be specified in conjunction with
5e72cf7d
MK
462.BR CLONE_FS .
463
464For further information on user namespaces, see
465.BR user_namespaces (7).
82ee147a 466.TP
43ce9dda
MK
467.BR CLONE_NEWUTS " (since Linux 2.6.19)"
468If
469.B CLONE_NEWUTS
e1b11906
MK
470is set, then create the process in a new UTS namespace,
471whose identifiers are initialized by duplicating the identifiers
472from the UTS namespace of the calling process.
43ce9dda 473If this flag is not set, then (as with
57ef8c39 474.BR fork (2))
43ce9dda
MK
475the process is created in the same UTS namespace as
476the calling process.
0236bea9 477This flag is intended for the implementation of containers.
43ce9dda
MK
478
479A UTS namespace is the set of identifiers returned by
480.BR uname (2);
850905cf 481among these, the domain name and the hostname can be modified by
43ce9dda
MK
482.BR setdomainname (2)
483and
43ce9dda
MK
484.BR sethostname (2),
485respectively.
c440fe01
MK
486Changes made to the identifiers in a UTS namespace
487are visible to all other processes in the same namespace,
43ce9dda
MK
488but are not visible to processes in other UTS namespaces.
489
ab5dd83f
MK
490Only a privileged process
491.RB ( CAP_SYS_ADMIN )
492can employ
493.BR CLONE_NEWUTS .
9cc7ad66 494
83d9e9b2 495For further information on UTS namespaces, see
9cc7ad66 496.BR namespaces (7).
43ce9dda 497.TP
f5dbc7c8
MK
498.BR CLONE_PARENT " (since Linux 2.3.12)"
499If
500.B CLONE_PARENT
501is set, then the parent of the new child (as returned by
502.BR getppid (2))
503will be the same as that of the calling process.
504
505If
506.B CLONE_PARENT
507is not set, then (as with
508.BR fork (2))
509the child's parent is the calling process.
510
511Note that it is the parent process, as returned by
512.BR getppid (2),
513which is signaled when the child terminates, so that
514if
515.B CLONE_PARENT
516is set, then the parent of the calling process, rather than the
517calling process itself, will be signaled.
518.TP
519.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
8ef021ea 520Store the child thread ID at the location
d3dbc9b1 521.I ptid
8ef021ea 522in the parent's memory.
f5dbc7c8
MK
523(In Linux 2.5.32-2.5.48 there was a flag
524.B CLONE_SETTID
525that did this.)
526.TP
527.BR CLONE_PID " (obsolete)"
528If
529.B CLONE_PID
530is set, the child process is created with the same process ID as
531the calling process.
532This is good for hacking the system, but otherwise
533of not much use.
534Since 2.3.21 this flag can be
535specified only by the system boot process (PID 0).
28b44abc
MK
536It disappeared in Linux 2.5.16.
537Since then, the kernel silently ignores it without error.
f5dbc7c8 538.TP
1603d6a1 539.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
540If
541.B CLONE_PTRACE
542is specified, and the calling process is being traced,
543then trace the child also (see
544.BR ptrace (2)).
545.TP
546.BR CLONE_SETTLS " (since Linux 2.5.32)"
dd6d3d2e
KF
547The TLS (Thread Local Storage) descriptor is set to
548.I newtls.
549
550The interpretation of
551.I newtls
552and the resulting effect is architecture dependent.
553On x86,
f5dbc7c8 554.I newtls
dd6d3d2e
KF
555is interpreted as a
556.IR "struct user_desc *"
f5dbc7c8 557(See
dd6d3d2e
KF
558.BR set_thread_area (2)).
559On x86_64 it is the new value to be set for the %fs base register
560(See the
561.I ARCH_SET_FS
562argument to
563.BR arch_prctl (2)).
564On architectures with a dedicated TLS register, it is the new value
565of that register.
f5dbc7c8 566.TP
1603d6a1 567.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
568If
569.B CLONE_SIGHAND
314c8ff4 570is set, the calling process and the child process share the same table of
c13182ef
MK
571signal handlers.
572If the calling process or child process calls
fea681da 573.BR sigaction (2)
c13182ef
MK
574to change the behavior associated with a signal, the behavior is
575changed in the other process as well.
576However, the calling process and child
fea681da 577processes still have distinct signal masks and sets of pending
c13182ef
MK
578signals.
579So, one of them may block or unblock some signals using
fea681da
MK
580.BR sigprocmask (2)
581without affecting the other process.
582
583If
584.B CLONE_SIGHAND
585is not set, the child process inherits a copy of the signal handlers
586of the calling process at the time
edcc65ff 587.BR clone ()
c13182ef
MK
588is called.
589Calls to
fea681da
MK
590.BR sigaction (2)
591performed later by one of the processes have no effect on the other
592process.
29546c24
MK
593
594Since Linux 2.6.0-test6,
595.I flags
596must also include
597.B CLONE_VM
598if
599.B CLONE_SIGHAND
600is specified
fea681da 601.TP
a69b6bda
MK
602.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
603If
604.B CLONE_STOPPED
605is set, then the child is initially stopped (as though it was sent a
606.B SIGSTOP
607signal), and must be resumed by sending it a
608.B SIGCONT
609signal.
ef37eaf2 610
a60450a9
MK
611This flag was
612.I deprecated
613from Linux 2.6.25 onward,
614and was
615.I removed
28b44abc
MK
616altogether in Linux 2.6.38.
617Since then, the kernel silently ignores it without error.
a5a061ee 618.\" glibc 2.8 removed this defn from bits/sched.h
c5af0674
MK
619Starting with Linux 4.6, the same bit was reused for the
620.BR CLONE_NEWCGROUP
621flag.
a69b6bda 622.TP
f5dbc7c8 623.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 624If
f5dbc7c8
MK
625.B CLONE_SYSVSEM
626is set, then the child and the calling process share
5ada4b94
MK
627a single list of System V semaphore adjustment
628.RI ( semadj )
629values (see
f5dbc7c8 630.BR semop (2)).
5ada4b94
MK
631In this case, the shared list accumulates
632.I semadj
633values across all processes sharing the list,
634and semaphore adjustments are performed only when the last process
635that is sharing the list terminates (or ceases sharing the list using
636.BR unshare (2)).
f5d401dd 637If this flag is not set, then the child has a separate
5ada4b94
MK
638.I semadj
639list that is initially empty.
fea681da
MK
640.TP
641.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
642If
643.B CLONE_THREAD
644is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
645To make the remainder of the discussion of
646.B CLONE_THREAD
647more readable, the term "thread" is used to refer to the
648processes within a thread group.
fea681da 649
fd8a5be4
MK
650Thread groups were a feature added in Linux 2.4 to support the
651POSIX threads notion of a set of threads that share a single PID.
652Internally, this shared PID is the so-called
653thread group identifier (TGID) for the thread group.
c13182ef 654Since Linux 2.4, calls to
fea681da 655.BR getpid (2)
fd8a5be4
MK
656return the TGID of the caller.
657
658The threads within a group can be distinguished by their (system-wide)
659unique thread IDs (TID).
660A new thread's TID is available as the function result
661returned to the caller of
662.BR clone (),
663and a thread can obtain
664its own TID using
665.BR gettid (2).
666
c13182ef 667When a call is made to
fd8a5be4
MK
668.BR clone ()
669without specifying
670.BR CLONE_THREAD ,
671then the resulting thread is placed in a new thread group
672whose TGID is the same as the thread's TID.
673This thread is the
674.I leader
675of the new thread group.
676
677A new thread created with
678.B CLONE_THREAD
679has the same parent process as the caller of
680.BR clone ()
c13182ef 681(i.e., like
fd8a5be4
MK
682.BR CLONE_PARENT ),
683so that calls to
684.BR getppid (2)
685return the same value for all of the threads in a thread group.
686When a
c13182ef 687.B CLONE_THREAD
fd8a5be4
MK
688thread terminates, the thread that created it using
689.BR clone ()
690is not sent a
691.B SIGCHLD
692(or other termination) signal;
693nor can the status of such a thread be obtained
694using
695.BR wait (2).
696(The thread is said to be
697.IR detached .)
698
e2fbf61d
MK
699After all of the threads in a thread group terminate
700the parent process of the thread group is sent a
fd8a5be4
MK
701.B SIGCHLD
702(or other termination) signal.
703
704If any of the threads in a thread group performs an
705.BR execve (2),
706then all threads other than the thread group leader are terminated,
707and the new program is executed in the thread group leader.
708
f7110f60
MK
709If one of the threads in a thread group creates a child using
710.BR fork (2),
711then any thread in the group can
712.BR wait (2)
713for that child.
714
edcc65ff 715Since Linux 2.5.35,
fd8a5be4
MK
716.I flags
717must also include
718.B CLONE_SIGHAND
719if
720.B CLONE_THREAD
6fd69f33
MK
721is specified
722(and note that, since Linux 2.6.0-test6,
723.BR CLONE_SIGHAND
724also requires
725.BR CLONE_VM
726to be included).
e2fbf61d
MK
727
728Signals may be sent to a thread group as a whole (i.e., a TGID) using
729.BR kill (2),
730or to a specific thread (i.e., TID) using
731.BR tgkill (2).
732
733Signal dispositions and actions are process-wide:
734if an unhandled signal is delivered to a thread, then
735it will affect (terminate, stop, continue, be ignored in)
736all members of the thread group.
737
99408a60 738Each thread has its own signal mask, as set by
e2fbf61d 739.BR sigprocmask (2),
82a06020 740but signals can be pending either: for the whole process
e2fbf61d
MK
741(i.e., deliverable to any member of the thread group),
742when sent with
82a06020 743.BR kill (2);
e2fbf61d
MK
744or for an individual thread, when sent with
745.BR tgkill (2).
99408a60
MK
746A call to
747.BR sigpending (2)
748returns a signal set that is the union of the signals pending for the
749whole process and the signals that are pending for the calling thread.
e2fbf61d 750
c13182ef 751If
e2fbf61d
MK
752.BR kill (2)
753is used to send a signal to a thread group,
754and the thread group has installed a handler for the signal, then
755the handler will be invoked in exactly one, arbitrarily selected
756member of the thread group that has not blocked the signal.
c13182ef 757If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
758.BR sigwaitinfo (2),
759the kernel will arbitrarily select one of these threads
c13182ef 760to receive a signal sent using
e2fbf61d 761.BR kill (2).
a69b6bda 762.TP
f5dbc7c8 763.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 764If
f5dbc7c8
MK
765.B CLONE_UNTRACED
766is specified, then a tracing process cannot force
767.B CLONE_PTRACE
768on this child process.
fea681da 769.TP
1603d6a1 770.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
771If
772.B CLONE_VFORK
773is set, the execution of the calling process is suspended
774until the child releases its virtual memory
775resources via a call to
776.BR execve (2)
777or
778.BR _exit (2)
779(as with
780.BR vfork (2)).
781
782If
783.B CLONE_VFORK
4b4a853a 784is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
785after the call, and an application should not rely on execution occurring
786in any particular order.
fea681da 787.TP
1603d6a1 788.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
789If
790.B CLONE_VM
791is set, the calling process and the child process run in the same memory
792space.
793In particular, memory writes performed by the calling process
794or by the child process are also visible in the other process.
795Moreover, any memory mapping or unmapping performed with
796.BR mmap (2)
797or
798.BR munmap (2)
799by the child or calling process also affects the other process.
800
801If
802.B CLONE_VM
803is not set, the child process runs in a separate copy of the memory
804space of the calling process at the time of
805.BR clone ().
806Memory writes or file mappings/unmappings performed by one of the
807processes do not affect the other, as with
808.BR fork (2).
0722a578 809.SS C library/kernel differences
e585064b
MK
810The raw
811.BR clone ()
fea681da
MK
812system call corresponds more closely to
813.BR fork (2)
814in that execution in the child continues from the point of the
c13182ef 815call.
5add3af3
MK
816As such, the
817.I fn
c13182ef 818and
5add3af3
MK
819.I arg
820arguments of the
821.BR clone ()
822wrapper function are omitted.
823Furthermore, the argument order changes.
c787510f 824The raw system call interface on x86 and many other architectures is roughly:
5add3af3
MK
825.in +4
826.nf
827
828.BI "long clone(unsigned long " flags ", void *" child_stack ,
fda55470 829.BI " int *" ptid ", int *" ctid ,
dd6d3d2e 830.BI " unsigned long " newtls );
fea681da 831
5add3af3
MK
832.fi
833.in
e585064b 834Another difference for the raw system call is that the
fea681da 835.I child_stack
c13182ef 836argument may be zero, in which case copy-on-write semantics ensure that the
fea681da 837child gets separate copies of stack pages when either process modifies
c13182ef
MK
838the stack.
839In this case, for correct operation, the
fea681da
MK
840.B CLONE_VM
841option should not be specified.
c787510f 842
e585064b 843For some architectures, the order of the arguments for the system call
c787510f 844differs from that shown above.
7d2e6d74 845On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
c787510f
MK
846and MIPS architectures,
847the order of the fourth and fifth arguments is reversed.
848On the cris and s390 architectures,
849the order of the first and second arguments is reversed.
251113d0
MK
850.SS blackfin, m68k, and sparc
851The argument-passing conventions on
04346be5 852blackfin, m68k, and sparc are different from the descriptions above.
251113d0 853For details, see the kernel (and glibc) source.
574c92b6 854.SS ia64
097a1f3b
MK
855On ia64, a different interface is used:
856.nf
857
858.BI "int __clone2(int (*" "fn" ")(void *), "
859.BI " void *" child_stack_base ", size_t " stack_size ,
860.BI " int " flags ", void *" "arg" ", ... "
861.BI " /* pid_t *" ptid ", struct user_desc *" tls \
862", pid_t *" ctid " */ );"
863.fi
864.PP
865The prototype shown above is for the glibc wrapper function;
866the raw system call interface has no
867.I fn
868or
869.I arg
870argument, and changes the order of the arguments so that
871.I flags
872is the first argument, and
873.I tls
874is the last argument.
875.PP
876.BR __clone2 ()
877operates in the same way as
878.BR clone (),
879except that
880.I child_stack_base
881points to the lowest address of the child's stack area,
882and
883.I stack_size
884specifies the size of the stack pointed to by
885.IR child_stack_base .
5add3af3 886.SS Linux 2.4 and earlier
577f9b62
MK
887In Linux 2.4 and earlier,
888.BR clone ()
889does not take arguments
890.IR ptid ,
891.IR tls ,
892and
130b2e49 893.IR ctid .
47297adb 894.SH RETURN VALUE
0bfa087b
MK
895.\" gettid(2) returns current->pid;
896.\" getpid(2) returns current->tgid;
fea681da 897On success, the thread ID of the child process is returned
c13182ef 898in the caller's thread of execution.
84811e86 899On failure, \-1 is returned
fea681da
MK
900in the caller's context, no child process will be created, and
901.I errno
902will be set appropriately.
fea681da
MK
903.SH ERRORS
904.TP
905.B EAGAIN
e1b6e186
MK
906Too many processes are already running; see
907.BR fork (2).
fea681da
MK
908.TP
909.B EINVAL
910.B CLONE_SIGHAND
911was specified, but
912.B CLONE_VM
2e8a7fb3
MK
913was not.
914(Since Linux 2.6.0-test6.)
fea681da
MK
915.TP
916.B EINVAL
917.B CLONE_THREAD
918was specified, but
919.B CLONE_SIGHAND
6387216b
MK
920was not.
921(Since Linux 2.5.35.)
29546c24
MK
922.\" .TP
923.\" .B EINVAL
924.\" Precisely one of
925.\" .B CLONE_DETACHED
926.\" and
927.\" .B CLONE_THREAD
6387216b
MK
928.\" was specified.
929.\" (Since Linux 2.6.0-test6.)
fea681da
MK
930.TP
931.B EINVAL
d34e5645 932.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
933Both
934.B CLONE_FS
935and
936.B CLONE_NEWNS
937were specified in
938.IR flags .
939.TP
d34e5645
MK
940.BR EINVAL " (since Linux 3.9)"
941Both
942.B CLONE_NEWUSER
943and
944.B CLONE_FS
945were specified in
946.IR flags .
947.TP
fea681da 948.B EINVAL
82ee147a 949Both
667417b3
MK
950.B CLONE_NEWIPC
951and
952.B CLONE_SYSVSEM
953were specified in
954.IR flags .
955.TP
956.B EINVAL
f0007192 957One (or both) of
82ee147a 958.BR CLONE_NEWPID
f0007192
MK
959or
960.BR CLONE_NEWUSER
961and one (or both) of
82ee147a 962.BR CLONE_THREAD
f0007192
MK
963or
964.BR CLONE_PARENT
82ee147a
MK
965were specified in
966.IR flags .
967.TP
968.B EINVAL
d4748fad 969Returned by the glibc
edcc65ff 970.BR clone ()
d4748fad
MK
971wrapper function when
972.IR fn
973or
974.IR child_stack
975is specified as NULL.
fea681da 976.TP
28cad2c1 977.B EINVAL
667417b3
MK
978.BR CLONE_NEWIPC
979was specified in
980.IR flags ,
981but the kernel was not configured with the
982.B CONFIG_SYSVIPC
983and
984.BR CONFIG_IPC_NS
985options.
986.TP
987.B EINVAL
163bf178
MK
988.BR CLONE_NEWNET
989was specified in
990.IR flags ,
991but the kernel was not configured with the
992.B CONFIG_NET_NS
993option.
994.TP
995.B EINVAL
28cad2c1
MK
996.BR CLONE_NEWPID
997was specified in
998.IR flags ,
999but the kernel was not configured with the
1000.B CONFIG_PID_NS
1001option.
1002.TP
43ce9dda
MK
1003.B EINVAL
1004.BR CLONE_NEWUTS
1005was specified in
1006.IR flags ,
1007but the kernel was not configured with the
1008.B CONFIG_UTS
1009option.
1010.TP
c550a897
MK
1011.B EINVAL
1012.I child_stack
1013is not aligned to a suitable boundary for this architecture.
1014For example, on aarch64,
1015.I child_stack
1016must be a multiple of 16.
1017.TP
fea681da
MK
1018.B ENOMEM
1019Cannot allocate sufficient memory to allocate a task structure for the
1020child, or to copy those parts of the caller's context that need to be
1021copied.
1022.TP
1023.B EPERM
667417b3 1024.BR CLONE_NEWIPC ,
163bf178 1025.BR CLONE_NEWNET ,
43ce9dda
MK
1026.BR CLONE_NEWNS ,
1027.BR CLONE_NEWPID ,
82ee147a 1028or
43ce9dda 1029.BR CLONE_NEWUTS
00b08db3 1030was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1031.TP
1032.B EPERM
1033.B CLONE_PID
1034was specified by a process other than process 0.
365d292a
MK
1035.TP
1036.B EPERM
1037.BR CLONE_NEWUSER
1038was specified in
1039.IR flags ,
1040but either the effective user ID or the effective group ID of the caller
1041does not have a mapping in the parent namespace (see
f58fb24f 1042.BR user_namespaces (7)).
6fd119e7 1043.TP
ac007938
MK
1044.BR EPERM " (since Linux 3.9)"
1045.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
11a38815
AM
1046.B CLONE_NEWUSER
1047was specified in
ac007938
MK
1048.I flags
1049and the caller is in a chroot environment
1050.\" FIXME What is the rationale for this restriction?
1051(i.e., the caller's root directory does not match the root directory
1052of the mount namespace in which it resides).
1053.TP
6717ee86
MK
1054.BR ERESTARTNOINTR " (since Linux 2.6.17)"
1055.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1056System call was interrupted by a signal and will be restarted.
1057(This can be seen only during a trace.)
1058.TP
ac007938 1059.BR EUSERS " (since Linux 3.11)"
6fd119e7
MK
1060.B CLONE_NEWUSER
1061was specified in
1062.IR flags ,
1063and the call would cause the limit on the number of
1064nested user namespaces to be exceeded.
1065See
1066.BR user_namespaces (7).
365d292a
MK
1067.SH VERSIONS
1068There is no entry for
1069.BR clone ()
1070in libc5.
1071glibc2 provides
1072.BR clone ()
1073as described in this manual page.
47297adb 1074.SH CONFORMING TO
a1d5f77c 1075.BR clone ()
e585064b 1076is Linux-specific and should not be used in programs
a1d5f77c 1077intended to be portable.
fea681da 1078.SH NOTES
79bdcc4a
MK
1079The
1080.BR kcmp (2)
1081system call can be used to test whether two processes share various
49dba87f 1082resources such as a file descriptor table,
79bdcc4a
MK
1083System V semaphore undo operations, or a virtual address space.
1084
fd8a5be4
MK
1085In the kernel 2.4.x series,
1086.B CLONE_THREAD
1087generally does not make the parent of the new thread the same
1088as the parent of the calling process.
1089However, for kernel versions 2.4.7 to 2.4.18 the
1090.B CLONE_THREAD
1091flag implied the
c13182ef 1092.B CLONE_PARENT
fd8a5be4 1093flag (as in kernel 2.6).
fea681da 1094
c13182ef
MK
1095For a while there was
1096.B CLONE_DETACHED
a5053dcb 1097(introduced in 2.5.32):
c13182ef 1098parent wants no child-exit signal.
4d543007 1099In Linux 2.6.2, the need to give this flag together with
c13182ef 1100.B CLONE_THREAD
a5053dcb
MK
1101disappeared.
1102This flag is still defined, but has no effect.
1103
34ccb744 1104On i386,
a5a997ca
MK
1105.BR clone ()
1106should not be called through vsyscall, but directly through
1107.IR "int $0x80" .
31830ef0
MK
1108.SH BUGS
1109Versions of the GNU C library that include the NPTL threading library
c13182ef 1110contain a wrapper function for
0bfa087b 1111.BR getpid (2)
31830ef0 1112that performs caching of PIDs.
c60237c9
MK
1113This caching relies on support in the glibc wrapper for
1114.BR clone (),
1115but as currently implemented,
1116the cache may not be up to date in some circumstances.
1117In particular,
1118if a signal is delivered to the child immediately after the
1119.BR clone ()
1120call, then a call to
0b80cf56 1121.BR getpid (2)
c60237c9
MK
1122in a handler for the signal may return the PID
1123of the calling process ("the parent"),
88619baf 1124if the clone wrapper has not yet had a chance to update the PID
c60237c9
MK
1125cache in the child.
1126(This discussion ignores the case where the child was created using
9291ce36 1127.BR CLONE_THREAD ,
c60237c9 1128when
0b80cf56 1129.BR getpid (2)
c60237c9
MK
1130.I should
1131return the same value in the child and in the process that called
1132.BR clone (),
a1d48abb 1133since the caller and the child are in the same thread group.
e7d807b7 1134The stale-cache problem also does not occur if the
a1d48abb
JR
1135.I flags
1136argument includes
1137.BR CLONE_VM .)
c60237c9 1138To get the truth, it may be necessary to use code such as the following:
31830ef0
MK
1139.nf
1140
1141 #include <syscall.h>
1142
1143 pid_t mypid;
1144
1145 mypid = syscall(SYS_getpid);
1146.fi
c60237c9
MK
1147.\" See also the following bug reports
1148.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1149.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
8c7b566c 1150.SH EXAMPLE
8c7b566c 1151The following program demonstrates the use of
9c13072a 1152.BR clone ()
8c7b566c
MK
1153to create a child process that executes in a separate UTS namespace.
1154The child changes the hostname in its UTS namespace.
1155Both parent and child then display the system hostname,
1156making it possible to see that the hostname
1157differs in the UTS namespaces of the parent and child.
1158For an example of the use of this program, see
1159.BR setns (2).
f30b7415 1160.SS Program source
8c7b566c
MK
1161.nf
1162#define _GNU_SOURCE
1163#include <sys/wait.h>
1164#include <sys/utsname.h>
1165#include <sched.h>
1166#include <string.h>
1167#include <stdio.h>
1168#include <stdlib.h>
1169#include <unistd.h>
1170
1171#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1172 } while (0)
1173
1174static int /* Start function for cloned child */
1175childFunc(void *arg)
1176{
1177 struct utsname uts;
1178
1179 /* Change hostname in UTS namespace of child */
1180
1181 if (sethostname(arg, strlen(arg)) == \-1)
1182 errExit("sethostname");
1183
07d4e6ea 1184 /* Retrieve and display hostname */
8c7b566c
MK
1185
1186 if (uname(&uts) == \-1)
1187 errExit("uname");
1188 printf("uts.nodename in child: %s\\n", uts.nodename);
1189
1190 /* Keep the namespace open for a while, by sleeping.
1191 This allows some experimentation\-\-for example, another
1192 process might join the namespace. */
9f1b9726 1193
8c7b566c
MK
1194 sleep(200);
1195
1196 return 0; /* Child terminates now */
1197}
1198
1199#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1200
1201int
1202main(int argc, char *argv[])
1203{
1204 char *stack; /* Start of stack buffer */
1205 char *stackTop; /* End of stack buffer */
1206 pid_t pid;
1207 struct utsname uts;
1208
1209 if (argc < 2) {
1210 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1211 exit(EXIT_SUCCESS);
1212 }
1213
1214 /* Allocate stack for child */
1215
1216 stack = malloc(STACK_SIZE);
1217 if (stack == NULL)
1218 errExit("malloc");
1219 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1220
1221 /* Create child that has its own UTS namespace;
1222 child commences execution in childFunc() */
1223
1224 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1225 if (pid == \-1)
1226 errExit("clone");
1227 printf("clone() returned %ld\\n", (long) pid);
1228
1229 /* Parent falls through to here */
1230
1231 sleep(1); /* Give child time to change its hostname */
1232
9f1b9726 1233 /* Display hostname in parent\(aqs UTS namespace. This will be
8c7b566c
MK
1234 different from hostname in child\(aqs UTS namespace. */
1235
1236 if (uname(&uts) == \-1)
1237 errExit("uname");
1238 printf("uts.nodename in parent: %s\\n", uts.nodename);
1239
1240 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1241 errExit("waitpid");
1242 printf("child has terminated\\n");
1243
1244 exit(EXIT_SUCCESS);
1245}
1246.fi
47297adb 1247.SH SEE ALSO
fea681da 1248.BR fork (2),
2b44301c 1249.BR futex (2),
fea681da
MK
1250.BR getpid (2),
1251.BR gettid (2),
6f8746e4 1252.BR kcmp (2),
f2d0bbf1 1253.BR set_thread_area (2),
2b44301c 1254.BR set_tid_address (2),
8403481f 1255.BR setns (2),
f2d0bbf1 1256.BR tkill (2),
5cc01e9c 1257.BR unshare (2),
fea681da 1258.BR wait (2),
3616b7c0 1259.BR capabilities (7),
41096af1 1260.BR namespaces (7),
3616b7c0 1261.BR pthreads (7)