]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
ip.7: tfix
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
fea681da 1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
8c7b566c 2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
2297bf0e 3.\"
fd0fc519 4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
fea681da 5.\" May be distributed under the GNU General Public License.
fd0fc519 6.\" %%%LICENSE_END
dccaff1e 7.\"
fea681da
MK
8.\" Modified by Michael Haardt <michael@moria.de>
9.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11.\" New man page (copied from 'fork.2').
12.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14.\" Modified 26 Jun 2001 by Michael Kerrisk
15.\" Mostly upgraded to 2.4.x
16.\" Added prototype for sys_clone() plus description
17.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 18.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
19.\" between "calling process" and "parent process"
20.\" Added CLONE_PTRACE and CLONE_VFORK
21.\" Added EPERM and EINVAL error codes
fd8a5be4 22.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 23.\" various other minor tidy ups and clarifications.
c11b1abf 24.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 25.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 26.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
fea681da
MK
27.\" Added description for CLONE_NEWNS, which was added in 2.4.19
28.\" Slightly rephrased, aeb.
29.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 31.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 32.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 33.\" wrapper under BUGS.
fd8a5be4
MK
34.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 36.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 37.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 38.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 39.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 40.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 41.\"
97986708 42.TH CLONE 2 2016-03-15 "Linux" "Linux Programmer's Manual"
fea681da 43.SH NAME
9b0e0996 44clone, __clone2 \- create a child process
fea681da 45.SH SYNOPSIS
c10859eb 46.nf
81f10dad
MK
47/* Prototype for the glibc wrapper function */
48
4f71ba5d 49.B #define _GNU_SOURCE
fea681da 50.B #include <sched.h>
c10859eb 51
ff929e3b
MK
52.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53.BI " int " flags ", void *" "arg" ", ... "
d3dbc9b1 54.BI " /* pid_t *" ptid ", struct user_desc *" tls \
ff929e3b 55", pid_t *" ctid " */ );"
81f10dad 56
e585064b 57/* Prototype for the raw system call */
81f10dad
MK
58
59.BI "long clone(unsigned long " flags ", void *" child_stack ,
60.BI " void *" ptid ", void *" ctid ,
61.BI " struct pt_regs *" regs );
c10859eb 62.fi
fea681da 63.SH DESCRIPTION
edcc65ff
MK
64.BR clone ()
65creates a new process, in a manner similar to
fea681da 66.BR fork (2).
81f10dad
MK
67
68This page describes both the glibc
e511ffb6 69.BR clone ()
e585064b 70wrapper function and the underlying system call on which it is based.
81f10dad 71The main text describes the wrapper function;
e585064b 72the differences for the raw system call
81f10dad 73are described toward the end of this page.
fea681da
MK
74
75Unlike
76.BR fork (2),
81f10dad
MK
77.BR clone ()
78allows the child process to share parts of its execution context with
fea681da 79the calling process, such as the memory space, the table of file
c13182ef
MK
80descriptors, and the table of signal handlers.
81(Note that on this manual
82page, "calling process" normally corresponds to "parent process".
83But see the description of
84.B CLONE_PARENT
fea681da
MK
85below.)
86
1533d242 87One use of
edcc65ff 88.BR clone ()
fea681da
MK
89is to implement threads: multiple threads of control in a program that
90run concurrently in a shared memory space.
91
92When the child process is created with
c13182ef 93.BR clone (),
fea681da 94it executes the function
c13182ef 95.IR fn ( arg ).
fea681da 96(This differs from
c13182ef 97.BR fork (2),
fea681da 98where execution continues in the child from the point
c13182ef
MK
99of the
100.BR fork (2)
fea681da
MK
101call.)
102The
103.I fn
104argument is a pointer to a function that is called by the child
105process at the beginning of its execution.
106The
107.I arg
108argument is passed to the
109.I fn
110function.
111
c13182ef 112When the
fea681da 113.IR fn ( arg )
c13182ef
MK
114function application returns, the child process terminates.
115The integer returned by
fea681da 116.I fn
c13182ef
MK
117is the exit code for the child process.
118The child process may also terminate explicitly by calling
fea681da
MK
119.BR exit (2)
120or after receiving a fatal signal.
121
122The
123.I child_stack
c13182ef
MK
124argument specifies the location of the stack used by the child process.
125Since the child and calling process may share memory,
fea681da 126it is not possible for the child process to execute in the
c13182ef
MK
127same stack as the calling process.
128The calling process must therefore
fea681da
MK
129set up memory space for the child stack and pass a pointer to this
130space to
edcc65ff 131.BR clone ().
5fab2e7c 132Stacks grow downward on all processors that run Linux
fea681da
MK
133(except the HP PA processors), so
134.I child_stack
135usually points to the topmost address of the memory space set up for
136the child stack.
137
138The low byte of
139.I flags
fd8a5be4
MK
140contains the number of the
141.I "termination signal"
142sent to the parent when the child dies.
143If this signal is specified as anything other than
fea681da
MK
144.BR SIGCHLD ,
145then the parent process must specify the
c13182ef
MK
146.B __WALL
147or
fea681da 148.B __WCLONE
c13182ef
MK
149options when waiting for the child with
150.BR wait (2).
fea681da
MK
151If no signal is specified, then the parent process is not signaled
152when the child terminates.
153
154.I flags
fd8a5be4
MK
155may also be bitwise-or'ed with zero or more of the following constants,
156in order to specify what is shared between the calling process
fea681da 157and the child process:
fea681da 158.TP
f5dbc7c8 159.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
8ef021ea 160Erase the child thread ID at the location
d3dbc9b1 161.I ctid
f5dbc7c8
MK
162in child memory when the child exits, and do a wakeup on the futex
163at that address.
164The address involved may be changed by the
165.BR set_tid_address (2)
166system call.
167This is used by threading libraries.
168.TP
169.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
8ef021ea 170Store the child thread ID at the location
d3dbc9b1 171.I ctid
8ef021ea 172in the child's memory.
f5dbc7c8 173.TP
1603d6a1 174.BR CLONE_FILES " (since Linux 2.0)"
fea681da 175If
f5dbc7c8
MK
176.B CLONE_FILES
177is set, the calling process and the child process share the same file
178descriptor table.
179Any file descriptor created by the calling process or by the child
180process is also valid in the other process.
181Similarly, if one of the processes closes a file descriptor,
182or changes its associated flags (using the
183.BR fcntl (2)
184.B F_SETFD
185operation), the other process is also affected.
8a76b19e
KE
186If a process sharing a file descriptor table calls
187.BR execve (2),
188its file descriptor table is duplicated (unshared).
fea681da
MK
189
190If
f5dbc7c8
MK
191.B CLONE_FILES
192is not set, the child process inherits a copy of all file descriptors
193opened in the calling process at the time of
194.BR clone ().
195(The duplicated file descriptors in the child refer to the
196same open file descriptions (see
197.BR open (2))
198as the corresponding file descriptors in the calling process.)
199Subsequent operations that open or close file descriptors,
200or change file descriptor flags,
201performed by either the calling
202process or the child process do not affect the other process.
fea681da 203.TP
1603d6a1 204.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
205If
206.B CLONE_FS
9ee4a2b6 207is set, the caller and the child process share the same filesystem
c13182ef 208information.
9ee4a2b6 209This includes the root of the filesystem, the current
c13182ef
MK
210working directory, and the umask.
211Any call to
fea681da
MK
212.BR chroot (2),
213.BR chdir (2),
214or
215.BR umask (2)
edcc65ff 216performed by the calling process or the child process also affects the
fea681da
MK
217other process.
218
c13182ef 219If
fea681da 220.B CLONE_FS
9ee4a2b6 221is not set, the child process works on a copy of the filesystem
fea681da 222information of the calling process at the time of the
edcc65ff 223.BR clone ()
fea681da
MK
224call.
225Calls to
226.BR chroot (2),
227.BR chdir (2),
228.BR umask (2)
229performed later by one of the processes do not affect the other process.
fea681da 230.TP
a4cc375e 231.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
232If
233.B CLONE_IO
234is set, then the new process shares an I/O context with
235the calling process.
236If this flag is not set, then (as with
237.BR fork (2))
238the new process has its own I/O context.
239
240.\" The following based on text from Jens Axboe
d1f84ed7 241The I/O context is the I/O scope of the disk scheduler (i.e.,
11f27a1c
JA
242what the I/O scheduler uses to model scheduling of a process's I/O).
243If processes share the same I/O context,
244they are treated as one by the I/O scheduler.
245As a consequence, they get to share disk time.
246For some I/O schedulers,
247.\" the anticipatory and CFQ scheduler
248if two processes share an I/O context,
249they will be allowed to interleave their disk access.
250If several threads are doing I/O on behalf of the same process
251.RB ( aio_read (3),
252for instance), they should employ
253.BR CLONE_IO
254to get better I/O performance.
255.\" with CFQ and AS.
256
257If the kernel is not configured with the
258.B CONFIG_BLOCK
259option, this flag is a no-op.
260.TP
c5af0674
MK
261.BR CLONE_NEWCGROUP " (since Linux 4.6)"
262Create the process in a new cgroup namespace.
263If this flag is not set, then (as with
264.BR fork (2))
265the process is created in the same cgroup namespaces as the calling process.
266This flag is intended for the implementation of containers.
267
268For further information on cgroup namespaces, see
b9fe4bc3 269.BR cgroup_namespaces (7).
c5af0674
MK
270
271Only a privileged process
272.RB ( CAP_SYS_ADMIN )
273can employ
274.BR CLONE_NEWCGROUP .
275.\"
276.TP
8722311b 277.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
278If
279.B CLONE_NEWIPC
280is set, then create the process in a new IPC namespace.
281If this flag is not set, then (as with
06b30458 282.BR fork (2)),
667417b3
MK
283the process is created in the same IPC namespace as
284the calling process.
0236bea9 285This flag is intended for the implementation of containers.
667417b3 286
efbfd7ec 287An IPC namespace provides an isolated view of System\ V IPC objects (see
009a049e
MK
288.BR svipc (7))
289and (since Linux 2.6.30)
290.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
291.\" https://lwn.net/Articles/312232/
292POSIX message queues
293(see
294.BR mq_overview (7)).
19911fa5
MK
295The common characteristic of these IPC mechanisms is that IPC
296objects are identified by mechanisms other than filesystem
297pathnames.
009a049e 298
c440fe01 299Objects created in an IPC namespace are visible to all other processes
667417b3
MK
300that are members of that namespace,
301but are not visible to processes in other IPC namespaces.
302
83c1f4b5 303When an IPC namespace is destroyed
009a049e 304(i.e., when the last process that is a member of the namespace terminates),
83c1f4b5
MK
305all IPC objects in the namespace are automatically destroyed.
306
ab5dd83f
MK
307Only a privileged process
308.RB ( CAP_SYS_ADMIN )
309can employ
310.BR CLONE_NEWIPC .
667417b3
MK
311This flag can't be specified in conjunction with
312.BR CLONE_SYSVSEM .
9343f8e7
MK
313
314For further information on IPC namespaces, see
315.BR namespaces (7).
667417b3 316.TP
163bf178 317.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 318(The implementation of this flag was completed only
9108d867 319by about kernel version 2.6.29.)
163bf178
MK
320
321If
322.B CLONE_NEWNET
323is set, then create the process in a new network namespace.
324If this flag is not set, then (as with
57ef8c39 325.BR fork (2))
163bf178
MK
326the process is created in the same network namespace as
327the calling process.
328This flag is intended for the implementation of containers.
329
330A network namespace provides an isolated view of the networking stack
331(network device interfaces, IPv4 and IPv6 protocol stacks,
332IP routing tables, firewall rules, the
333.I /proc/net
334and
335.I /sys/class/net
336directory trees, sockets, etc.).
337A physical network device can live in exactly one
338network namespace.
339A virtual network device ("veth") pair provides a pipe-like abstraction
bea08fec 340.\" FIXME . Add pointer to veth(4) page when it is eventually completed
163bf178
MK
341that can be used to create tunnels between network namespaces,
342and can be used to create a bridge to a physical network device
343in another namespace.
344
bf032425
SH
345When a network namespace is freed
346(i.e., when the last process in the namespace terminates),
347its physical network devices are moved back to the
348initial network namespace (not to the parent of the process).
73680728
MK
349For further information on network namespaces, see
350.BR namespaces (7).
bf032425 351
ab5dd83f
MK
352Only a privileged process
353.RB ( CAP_SYS_ADMIN )
354can employ
355.BR CLONE_NEWNET .
163bf178 356.TP
c10859eb 357.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
358If
359.B CLONE_NEWNS
360is set, the cloned child is started in a new mount namespace,
361initialized with a copy of the namespace of the parent.
362If
fea681da 363.B CLONE_NEWNS
3dd2331c 364is not set, the child lives in the same mount
4df2eb09 365namespace as the parent.
fea681da 366
ab5dd83f
MK
367Only a privileged process
368.RB ( CAP_SYS_ADMIN )
369can employ
370.BR CLONE_NEWNS .
fea681da
MK
371It is not permitted to specify both
372.B CLONE_NEWNS
373and
374.B CLONE_FS
9219d208 375.\" See https://lwn.net/Articles/543273/
fea681da 376in the same
e511ffb6 377.BR clone ()
fea681da 378call.
c212248c
MK
379
380For further information on mount namespaces, see
381.BR namespaces (7)
382and
383.BR mount_namespaces (7).
9d005472
MK
384.TP
385.BR CLONE_NEWPID " (since Linux 2.6.24)"
386.\" This explanation draws a lot of details from
387.\" http://lwn.net/Articles/259217/
388.\" Authors: Pavel Emelyanov <xemul@openvz.org>
389.\" and Kir Kolyshkin <kir@openvz.org>
390.\"
391.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
392.\" Author: Pavel Emelyanov <xemul@openvz.org>
393If
394.B CLONE_NEWPID
395is set, then create the process in a new PID namespace.
396If this flag is not set, then (as with
397.BR fork (2))
398the process is created in the same PID namespace as
399the calling process.
400This flag is intended for the implementation of containers.
401
402For further information on PID namespaces, see
7e0e902b
MK
403.BR namespaces (7)
404and
39b3f005 405.BR pid_namespaces (7).
9d005472 406
ab5dd83f
MK
407Only a privileged process
408.RB ( CAP_SYS_ADMIN )
409can employ
410.BR CLONE_NEWPID .
9d005472 411This flag can't be specified in conjunction with
f0007192
MK
412.BR CLONE_THREAD
413or
414.BR CLONE_PARENT .
70d21f17 415.TP
06b30458
MK
416.BR CLONE_NEWUSER
417(This flag first became meaningful for
418.BR clone ()
4d2b3ed7
MK
419in Linux 2.6.23,
420the current
11a38815 421.BR clone ()
4d2b3ed7
MK
422semantics were merged in Linux 3.5,
423and the final pieces to make the user namespaces completely usable were
424merged in Linux 3.8.)
425
70d21f17
EB
426If
427.B CLONE_NEWUSER
06b30458
MK
428is set, then create the process in a new user namespace.
429If this flag is not set, then (as with
57ef8c39 430.BR fork (2))
70d21f17
EB
431the process is created in the same user namespace as the calling process.
432
9d005472 433For further information on user namespaces, see
f58fb24f
MK
434.BR namespaces (7)
435and
436.BR user_namespaces (7)
06b30458 437
fefbcba8
MK
438Before Linux 3.8, use of
439.BR CLONE_NEWUSER
440required that the caller have three capabilities:
441.BR CAP_SYS_ADMIN ,
442.BR CAP_SETUID ,
443and
444.BR CAP_SETGID .
445.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 446Starting with Linux 3.8,
9d005472 447no privileges are needed to create a user namespace.
f0007192 448
5e72cf7d
MK
449This flag can't be specified in conjunction with
450.BR CLONE_THREAD
451or
452.BR CLONE_PARENT .
453For security reasons,
454.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
455.\" https://lwn.net/Articles/543273/
456.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
457.\" were, for practical purposes, unusable in earlier 3.8.x because of the
ab3311aa 458.\" various filesystems that didn't support userns.
f0007192
MK
459.BR CLONE_NEWUSER
460cannot be specified in conjunction with
5e72cf7d
MK
461.BR CLONE_FS .
462
463For further information on user namespaces, see
464.BR user_namespaces (7).
82ee147a 465.TP
43ce9dda
MK
466.BR CLONE_NEWUTS " (since Linux 2.6.19)"
467If
468.B CLONE_NEWUTS
e1b11906
MK
469is set, then create the process in a new UTS namespace,
470whose identifiers are initialized by duplicating the identifiers
471from the UTS namespace of the calling process.
43ce9dda 472If this flag is not set, then (as with
57ef8c39 473.BR fork (2))
43ce9dda
MK
474the process is created in the same UTS namespace as
475the calling process.
0236bea9 476This flag is intended for the implementation of containers.
43ce9dda
MK
477
478A UTS namespace is the set of identifiers returned by
479.BR uname (2);
850905cf 480among these, the domain name and the hostname can be modified by
43ce9dda
MK
481.BR setdomainname (2)
482and
43ce9dda
MK
483.BR sethostname (2),
484respectively.
c440fe01
MK
485Changes made to the identifiers in a UTS namespace
486are visible to all other processes in the same namespace,
43ce9dda
MK
487but are not visible to processes in other UTS namespaces.
488
ab5dd83f
MK
489Only a privileged process
490.RB ( CAP_SYS_ADMIN )
491can employ
492.BR CLONE_NEWUTS .
9cc7ad66 493
83d9e9b2 494For further information on UTS namespaces, see
9cc7ad66 495.BR namespaces (7).
43ce9dda 496.TP
f5dbc7c8
MK
497.BR CLONE_PARENT " (since Linux 2.3.12)"
498If
499.B CLONE_PARENT
500is set, then the parent of the new child (as returned by
501.BR getppid (2))
502will be the same as that of the calling process.
503
504If
505.B CLONE_PARENT
506is not set, then (as with
507.BR fork (2))
508the child's parent is the calling process.
509
510Note that it is the parent process, as returned by
511.BR getppid (2),
512which is signaled when the child terminates, so that
513if
514.B CLONE_PARENT
515is set, then the parent of the calling process, rather than the
516calling process itself, will be signaled.
517.TP
518.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
8ef021ea 519Store the child thread ID at the location
d3dbc9b1 520.I ptid
8ef021ea 521in the parent's memory.
f5dbc7c8
MK
522(In Linux 2.5.32-2.5.48 there was a flag
523.B CLONE_SETTID
524that did this.)
525.TP
526.BR CLONE_PID " (obsolete)"
527If
528.B CLONE_PID
529is set, the child process is created with the same process ID as
530the calling process.
531This is good for hacking the system, but otherwise
532of not much use.
533Since 2.3.21 this flag can be
534specified only by the system boot process (PID 0).
28b44abc
MK
535It disappeared in Linux 2.5.16.
536Since then, the kernel silently ignores it without error.
f5dbc7c8 537.TP
1603d6a1 538.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
539If
540.B CLONE_PTRACE
541is specified, and the calling process is being traced,
542then trace the child also (see
543.BR ptrace (2)).
544.TP
545.BR CLONE_SETTLS " (since Linux 2.5.32)"
546The
547.I newtls
548argument is the new TLS (Thread Local Storage) descriptor.
549(See
550.BR set_thread_area (2).)
551.TP
1603d6a1 552.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
553If
554.B CLONE_SIGHAND
314c8ff4 555is set, the calling process and the child process share the same table of
c13182ef
MK
556signal handlers.
557If the calling process or child process calls
fea681da 558.BR sigaction (2)
c13182ef
MK
559to change the behavior associated with a signal, the behavior is
560changed in the other process as well.
561However, the calling process and child
fea681da 562processes still have distinct signal masks and sets of pending
c13182ef
MK
563signals.
564So, one of them may block or unblock some signals using
fea681da
MK
565.BR sigprocmask (2)
566without affecting the other process.
567
568If
569.B CLONE_SIGHAND
570is not set, the child process inherits a copy of the signal handlers
571of the calling process at the time
edcc65ff 572.BR clone ()
c13182ef
MK
573is called.
574Calls to
fea681da
MK
575.BR sigaction (2)
576performed later by one of the processes have no effect on the other
577process.
29546c24
MK
578
579Since Linux 2.6.0-test6,
580.I flags
581must also include
582.B CLONE_VM
583if
584.B CLONE_SIGHAND
585is specified
fea681da 586.TP
a69b6bda
MK
587.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
588If
589.B CLONE_STOPPED
590is set, then the child is initially stopped (as though it was sent a
591.B SIGSTOP
592signal), and must be resumed by sending it a
593.B SIGCONT
594signal.
ef37eaf2 595
a60450a9
MK
596This flag was
597.I deprecated
598from Linux 2.6.25 onward,
599and was
600.I removed
28b44abc
MK
601altogether in Linux 2.6.38.
602Since then, the kernel silently ignores it without error.
a5a061ee 603.\" glibc 2.8 removed this defn from bits/sched.h
c5af0674
MK
604Starting with Linux 4.6, the same bit was reused for the
605.BR CLONE_NEWCGROUP
606flag.
a69b6bda 607.TP
f5dbc7c8 608.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 609If
f5dbc7c8
MK
610.B CLONE_SYSVSEM
611is set, then the child and the calling process share
5ada4b94
MK
612a single list of System V semaphore adjustment
613.RI ( semadj )
614values (see
f5dbc7c8 615.BR semop (2)).
5ada4b94
MK
616In this case, the shared list accumulates
617.I semadj
618values across all processes sharing the list,
619and semaphore adjustments are performed only when the last process
620that is sharing the list terminates (or ceases sharing the list using
621.BR unshare (2)).
f5d401dd 622If this flag is not set, then the child has a separate
5ada4b94
MK
623.I semadj
624list that is initially empty.
fea681da
MK
625.TP
626.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
627If
628.B CLONE_THREAD
629is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
630To make the remainder of the discussion of
631.B CLONE_THREAD
632more readable, the term "thread" is used to refer to the
633processes within a thread group.
fea681da 634
fd8a5be4
MK
635Thread groups were a feature added in Linux 2.4 to support the
636POSIX threads notion of a set of threads that share a single PID.
637Internally, this shared PID is the so-called
638thread group identifier (TGID) for the thread group.
c13182ef 639Since Linux 2.4, calls to
fea681da 640.BR getpid (2)
fd8a5be4
MK
641return the TGID of the caller.
642
643The threads within a group can be distinguished by their (system-wide)
644unique thread IDs (TID).
645A new thread's TID is available as the function result
646returned to the caller of
647.BR clone (),
648and a thread can obtain
649its own TID using
650.BR gettid (2).
651
c13182ef 652When a call is made to
fd8a5be4
MK
653.BR clone ()
654without specifying
655.BR CLONE_THREAD ,
656then the resulting thread is placed in a new thread group
657whose TGID is the same as the thread's TID.
658This thread is the
659.I leader
660of the new thread group.
661
662A new thread created with
663.B CLONE_THREAD
664has the same parent process as the caller of
665.BR clone ()
c13182ef 666(i.e., like
fd8a5be4
MK
667.BR CLONE_PARENT ),
668so that calls to
669.BR getppid (2)
670return the same value for all of the threads in a thread group.
671When a
c13182ef 672.B CLONE_THREAD
fd8a5be4
MK
673thread terminates, the thread that created it using
674.BR clone ()
675is not sent a
676.B SIGCHLD
677(or other termination) signal;
678nor can the status of such a thread be obtained
679using
680.BR wait (2).
681(The thread is said to be
682.IR detached .)
683
e2fbf61d
MK
684After all of the threads in a thread group terminate
685the parent process of the thread group is sent a
fd8a5be4
MK
686.B SIGCHLD
687(or other termination) signal.
688
689If any of the threads in a thread group performs an
690.BR execve (2),
691then all threads other than the thread group leader are terminated,
692and the new program is executed in the thread group leader.
693
f7110f60
MK
694If one of the threads in a thread group creates a child using
695.BR fork (2),
696then any thread in the group can
697.BR wait (2)
698for that child.
699
edcc65ff 700Since Linux 2.5.35,
fd8a5be4
MK
701.I flags
702must also include
703.B CLONE_SIGHAND
704if
705.B CLONE_THREAD
6fd69f33
MK
706is specified
707(and note that, since Linux 2.6.0-test6,
708.BR CLONE_SIGHAND
709also requires
710.BR CLONE_VM
711to be included).
e2fbf61d
MK
712
713Signals may be sent to a thread group as a whole (i.e., a TGID) using
714.BR kill (2),
715or to a specific thread (i.e., TID) using
716.BR tgkill (2).
717
718Signal dispositions and actions are process-wide:
719if an unhandled signal is delivered to a thread, then
720it will affect (terminate, stop, continue, be ignored in)
721all members of the thread group.
722
99408a60 723Each thread has its own signal mask, as set by
e2fbf61d 724.BR sigprocmask (2),
82a06020 725but signals can be pending either: for the whole process
e2fbf61d
MK
726(i.e., deliverable to any member of the thread group),
727when sent with
82a06020 728.BR kill (2);
e2fbf61d
MK
729or for an individual thread, when sent with
730.BR tgkill (2).
99408a60
MK
731A call to
732.BR sigpending (2)
733returns a signal set that is the union of the signals pending for the
734whole process and the signals that are pending for the calling thread.
e2fbf61d 735
c13182ef 736If
e2fbf61d
MK
737.BR kill (2)
738is used to send a signal to a thread group,
739and the thread group has installed a handler for the signal, then
740the handler will be invoked in exactly one, arbitrarily selected
741member of the thread group that has not blocked the signal.
c13182ef 742If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
743.BR sigwaitinfo (2),
744the kernel will arbitrarily select one of these threads
c13182ef 745to receive a signal sent using
e2fbf61d 746.BR kill (2).
a69b6bda 747.TP
f5dbc7c8 748.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 749If
f5dbc7c8
MK
750.B CLONE_UNTRACED
751is specified, then a tracing process cannot force
752.B CLONE_PTRACE
753on this child process.
fea681da 754.TP
1603d6a1 755.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
756If
757.B CLONE_VFORK
758is set, the execution of the calling process is suspended
759until the child releases its virtual memory
760resources via a call to
761.BR execve (2)
762or
763.BR _exit (2)
764(as with
765.BR vfork (2)).
766
767If
768.B CLONE_VFORK
4b4a853a 769is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
770after the call, and an application should not rely on execution occurring
771in any particular order.
fea681da 772.TP
1603d6a1 773.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
774If
775.B CLONE_VM
776is set, the calling process and the child process run in the same memory
777space.
778In particular, memory writes performed by the calling process
779or by the child process are also visible in the other process.
780Moreover, any memory mapping or unmapping performed with
781.BR mmap (2)
782or
783.BR munmap (2)
784by the child or calling process also affects the other process.
785
786If
787.B CLONE_VM
788is not set, the child process runs in a separate copy of the memory
789space of the calling process at the time of
790.BR clone ().
791Memory writes or file mappings/unmappings performed by one of the
792processes do not affect the other, as with
793.BR fork (2).
0722a578 794.SS C library/kernel differences
e585064b
MK
795The raw
796.BR clone ()
fea681da
MK
797system call corresponds more closely to
798.BR fork (2)
799in that execution in the child continues from the point of the
c13182ef 800call.
5add3af3
MK
801As such, the
802.I fn
c13182ef 803and
5add3af3
MK
804.I arg
805arguments of the
806.BR clone ()
807wrapper function are omitted.
808Furthermore, the argument order changes.
c787510f 809The raw system call interface on x86 and many other architectures is roughly:
5add3af3
MK
810.in +4
811.nf
812
813.BI "long clone(unsigned long " flags ", void *" child_stack ,
814.BI " void *" ptid ", void *" ctid ,
815.BI " struct pt_regs *" regs );
fea681da 816
5add3af3
MK
817.fi
818.in
e585064b 819Another difference for the raw system call is that the
fea681da 820.I child_stack
c13182ef 821argument may be zero, in which case copy-on-write semantics ensure that the
fea681da 822child gets separate copies of stack pages when either process modifies
c13182ef
MK
823the stack.
824In this case, for correct operation, the
fea681da
MK
825.B CLONE_VM
826option should not be specified.
c787510f 827
e585064b 828For some architectures, the order of the arguments for the system call
c787510f 829differs from that shown above.
7d2e6d74 830On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
c787510f
MK
831and MIPS architectures,
832the order of the fourth and fifth arguments is reversed.
833On the cris and s390 architectures,
834the order of the first and second arguments is reversed.
251113d0
MK
835.SS blackfin, m68k, and sparc
836The argument-passing conventions on
04346be5 837blackfin, m68k, and sparc are different from the descriptions above.
251113d0 838For details, see the kernel (and glibc) source.
574c92b6 839.SS ia64
097a1f3b
MK
840On ia64, a different interface is used:
841.nf
842
843.BI "int __clone2(int (*" "fn" ")(void *), "
844.BI " void *" child_stack_base ", size_t " stack_size ,
845.BI " int " flags ", void *" "arg" ", ... "
846.BI " /* pid_t *" ptid ", struct user_desc *" tls \
847", pid_t *" ctid " */ );"
848.fi
849.PP
850The prototype shown above is for the glibc wrapper function;
851the raw system call interface has no
852.I fn
853or
854.I arg
855argument, and changes the order of the arguments so that
856.I flags
857is the first argument, and
858.I tls
859is the last argument.
860.PP
861.BR __clone2 ()
862operates in the same way as
863.BR clone (),
864except that
865.I child_stack_base
866points to the lowest address of the child's stack area,
867and
868.I stack_size
869specifies the size of the stack pointed to by
870.IR child_stack_base .
5add3af3 871.SS Linux 2.4 and earlier
577f9b62
MK
872In Linux 2.4 and earlier,
873.BR clone ()
874does not take arguments
875.IR ptid ,
876.IR tls ,
877and
130b2e49 878.IR ctid .
47297adb 879.SH RETURN VALUE
0bfa087b
MK
880.\" gettid(2) returns current->pid;
881.\" getpid(2) returns current->tgid;
fea681da 882On success, the thread ID of the child process is returned
c13182ef 883in the caller's thread of execution.
84811e86 884On failure, \-1 is returned
fea681da
MK
885in the caller's context, no child process will be created, and
886.I errno
887will be set appropriately.
fea681da
MK
888.SH ERRORS
889.TP
890.B EAGAIN
e1b6e186
MK
891Too many processes are already running; see
892.BR fork (2).
fea681da
MK
893.TP
894.B EINVAL
895.B CLONE_SIGHAND
896was specified, but
897.B CLONE_VM
2e8a7fb3
MK
898was not.
899(Since Linux 2.6.0-test6.)
fea681da
MK
900.TP
901.B EINVAL
902.B CLONE_THREAD
903was specified, but
904.B CLONE_SIGHAND
6387216b
MK
905was not.
906(Since Linux 2.5.35.)
29546c24
MK
907.\" .TP
908.\" .B EINVAL
909.\" Precisely one of
910.\" .B CLONE_DETACHED
911.\" and
912.\" .B CLONE_THREAD
6387216b
MK
913.\" was specified.
914.\" (Since Linux 2.6.0-test6.)
fea681da
MK
915.TP
916.B EINVAL
d34e5645 917.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
918Both
919.B CLONE_FS
920and
921.B CLONE_NEWNS
922were specified in
923.IR flags .
924.TP
d34e5645
MK
925.BR EINVAL " (since Linux 3.9)"
926Both
927.B CLONE_NEWUSER
928and
929.B CLONE_FS
930were specified in
931.IR flags .
932.TP
fea681da 933.B EINVAL
82ee147a 934Both
667417b3
MK
935.B CLONE_NEWIPC
936and
937.B CLONE_SYSVSEM
938were specified in
939.IR flags .
940.TP
941.B EINVAL
f0007192 942One (or both) of
82ee147a 943.BR CLONE_NEWPID
f0007192
MK
944or
945.BR CLONE_NEWUSER
946and one (or both) of
82ee147a 947.BR CLONE_THREAD
f0007192
MK
948or
949.BR CLONE_PARENT
82ee147a
MK
950were specified in
951.IR flags .
952.TP
953.B EINVAL
c13182ef 954Returned by
edcc65ff 955.BR clone ()
c13182ef 956when a zero value is specified for
fea681da
MK
957.IR child_stack .
958.TP
28cad2c1 959.B EINVAL
667417b3
MK
960.BR CLONE_NEWIPC
961was specified in
962.IR flags ,
963but the kernel was not configured with the
964.B CONFIG_SYSVIPC
965and
966.BR CONFIG_IPC_NS
967options.
968.TP
969.B EINVAL
163bf178
MK
970.BR CLONE_NEWNET
971was specified in
972.IR flags ,
973but the kernel was not configured with the
974.B CONFIG_NET_NS
975option.
976.TP
977.B EINVAL
28cad2c1
MK
978.BR CLONE_NEWPID
979was specified in
980.IR flags ,
981but the kernel was not configured with the
982.B CONFIG_PID_NS
983option.
984.TP
43ce9dda
MK
985.B EINVAL
986.BR CLONE_NEWUTS
987was specified in
988.IR flags ,
989but the kernel was not configured with the
990.B CONFIG_UTS
991option.
992.TP
c550a897
MK
993.B EINVAL
994.I child_stack
995is not aligned to a suitable boundary for this architecture.
996For example, on aarch64,
997.I child_stack
998must be a multiple of 16.
999.TP
fea681da
MK
1000.B ENOMEM
1001Cannot allocate sufficient memory to allocate a task structure for the
1002child, or to copy those parts of the caller's context that need to be
1003copied.
1004.TP
1005.B EPERM
667417b3 1006.BR CLONE_NEWIPC ,
163bf178 1007.BR CLONE_NEWNET ,
43ce9dda
MK
1008.BR CLONE_NEWNS ,
1009.BR CLONE_NEWPID ,
82ee147a 1010or
43ce9dda 1011.BR CLONE_NEWUTS
00b08db3 1012was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1013.TP
1014.B EPERM
1015.B CLONE_PID
1016was specified by a process other than process 0.
365d292a
MK
1017.TP
1018.B EPERM
1019.BR CLONE_NEWUSER
1020was specified in
1021.IR flags ,
1022but either the effective user ID or the effective group ID of the caller
1023does not have a mapping in the parent namespace (see
f58fb24f 1024.BR user_namespaces (7)).
6fd119e7 1025.TP
ac007938
MK
1026.BR EPERM " (since Linux 3.9)"
1027.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
11a38815
AM
1028.B CLONE_NEWUSER
1029was specified in
ac007938
MK
1030.I flags
1031and the caller is in a chroot environment
1032.\" FIXME What is the rationale for this restriction?
1033(i.e., the caller's root directory does not match the root directory
1034of the mount namespace in which it resides).
1035.TP
1036.BR EUSERS " (since Linux 3.11)"
6fd119e7
MK
1037.B CLONE_NEWUSER
1038was specified in
1039.IR flags ,
1040and the call would cause the limit on the number of
1041nested user namespaces to be exceeded.
1042See
1043.BR user_namespaces (7).
10e46057
NF
1044.TP
1045.BR ERESTARTNOINTR " (since Linux 2.6.17)"
11a6d050 1046.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
10e46057
NF
1047System call was interrupted by a signal and will be restarted.
1048(This can be seen only during a trace.)
365d292a
MK
1049.SH VERSIONS
1050There is no entry for
1051.BR clone ()
1052in libc5.
1053glibc2 provides
1054.BR clone ()
1055as described in this manual page.
47297adb 1056.SH CONFORMING TO
a1d5f77c 1057.BR clone ()
e585064b 1058is Linux-specific and should not be used in programs
a1d5f77c 1059intended to be portable.
fea681da 1060.SH NOTES
fd8a5be4
MK
1061In the kernel 2.4.x series,
1062.B CLONE_THREAD
1063generally does not make the parent of the new thread the same
1064as the parent of the calling process.
1065However, for kernel versions 2.4.7 to 2.4.18 the
1066.B CLONE_THREAD
1067flag implied the
c13182ef 1068.B CLONE_PARENT
fd8a5be4 1069flag (as in kernel 2.6).
fea681da 1070
c13182ef
MK
1071For a while there was
1072.B CLONE_DETACHED
a5053dcb 1073(introduced in 2.5.32):
c13182ef 1074parent wants no child-exit signal.
a5053dcb 1075In 2.6.2 the need to give this
c13182ef
MK
1076together with
1077.B CLONE_THREAD
a5053dcb
MK
1078disappeared.
1079This flag is still defined, but has no effect.
1080
34ccb744 1081On i386,
a5a997ca
MK
1082.BR clone ()
1083should not be called through vsyscall, but directly through
1084.IR "int $0x80" .
31830ef0
MK
1085.SH BUGS
1086Versions of the GNU C library that include the NPTL threading library
c13182ef 1087contain a wrapper function for
0bfa087b 1088.BR getpid (2)
31830ef0 1089that performs caching of PIDs.
c60237c9
MK
1090This caching relies on support in the glibc wrapper for
1091.BR clone (),
1092but as currently implemented,
1093the cache may not be up to date in some circumstances.
1094In particular,
1095if a signal is delivered to the child immediately after the
1096.BR clone ()
1097call, then a call to
0b80cf56 1098.BR getpid (2)
c60237c9
MK
1099in a handler for the signal may return the PID
1100of the calling process ("the parent"),
88619baf 1101if the clone wrapper has not yet had a chance to update the PID
c60237c9
MK
1102cache in the child.
1103(This discussion ignores the case where the child was created using
9291ce36 1104.BR CLONE_THREAD ,
c60237c9 1105when
0b80cf56 1106.BR getpid (2)
c60237c9
MK
1107.I should
1108return the same value in the child and in the process that called
1109.BR clone (),
a1d48abb 1110since the caller and the child are in the same thread group.
e7d807b7 1111The stale-cache problem also does not occur if the
a1d48abb
JR
1112.I flags
1113argument includes
1114.BR CLONE_VM .)
c60237c9 1115To get the truth, it may be necessary to use code such as the following:
31830ef0
MK
1116.nf
1117
1118 #include <syscall.h>
1119
1120 pid_t mypid;
1121
1122 mypid = syscall(SYS_getpid);
1123.fi
c60237c9
MK
1124.\" See also the following bug reports
1125.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1126.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
8c7b566c 1127.SH EXAMPLE
8c7b566c 1128The following program demonstrates the use of
9c13072a 1129.BR clone ()
8c7b566c
MK
1130to create a child process that executes in a separate UTS namespace.
1131The child changes the hostname in its UTS namespace.
1132Both parent and child then display the system hostname,
1133making it possible to see that the hostname
1134differs in the UTS namespaces of the parent and child.
1135For an example of the use of this program, see
1136.BR setns (2).
f30b7415 1137.SS Program source
8c7b566c
MK
1138.nf
1139#define _GNU_SOURCE
1140#include <sys/wait.h>
1141#include <sys/utsname.h>
1142#include <sched.h>
1143#include <string.h>
1144#include <stdio.h>
1145#include <stdlib.h>
1146#include <unistd.h>
1147
1148#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1149 } while (0)
1150
1151static int /* Start function for cloned child */
1152childFunc(void *arg)
1153{
1154 struct utsname uts;
1155
1156 /* Change hostname in UTS namespace of child */
1157
1158 if (sethostname(arg, strlen(arg)) == \-1)
1159 errExit("sethostname");
1160
07d4e6ea 1161 /* Retrieve and display hostname */
8c7b566c
MK
1162
1163 if (uname(&uts) == \-1)
1164 errExit("uname");
1165 printf("uts.nodename in child: %s\\n", uts.nodename);
1166
1167 /* Keep the namespace open for a while, by sleeping.
1168 This allows some experimentation\-\-for example, another
1169 process might join the namespace. */
9f1b9726 1170
8c7b566c
MK
1171 sleep(200);
1172
1173 return 0; /* Child terminates now */
1174}
1175
1176#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1177
1178int
1179main(int argc, char *argv[])
1180{
1181 char *stack; /* Start of stack buffer */
1182 char *stackTop; /* End of stack buffer */
1183 pid_t pid;
1184 struct utsname uts;
1185
1186 if (argc < 2) {
1187 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1188 exit(EXIT_SUCCESS);
1189 }
1190
1191 /* Allocate stack for child */
1192
1193 stack = malloc(STACK_SIZE);
1194 if (stack == NULL)
1195 errExit("malloc");
1196 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1197
1198 /* Create child that has its own UTS namespace;
1199 child commences execution in childFunc() */
1200
1201 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1202 if (pid == \-1)
1203 errExit("clone");
1204 printf("clone() returned %ld\\n", (long) pid);
1205
1206 /* Parent falls through to here */
1207
1208 sleep(1); /* Give child time to change its hostname */
1209
9f1b9726 1210 /* Display hostname in parent\(aqs UTS namespace. This will be
8c7b566c
MK
1211 different from hostname in child\(aqs UTS namespace. */
1212
1213 if (uname(&uts) == \-1)
1214 errExit("uname");
1215 printf("uts.nodename in parent: %s\\n", uts.nodename);
1216
1217 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1218 errExit("waitpid");
1219 printf("child has terminated\\n");
1220
1221 exit(EXIT_SUCCESS);
1222}
1223.fi
47297adb 1224.SH SEE ALSO
fea681da 1225.BR fork (2),
2b44301c 1226.BR futex (2),
fea681da
MK
1227.BR getpid (2),
1228.BR gettid (2),
6f8746e4 1229.BR kcmp (2),
f2d0bbf1 1230.BR set_thread_area (2),
2b44301c 1231.BR set_tid_address (2),
8403481f 1232.BR setns (2),
f2d0bbf1 1233.BR tkill (2),
5cc01e9c 1234.BR unshare (2),
fea681da 1235.BR wait (2),
3616b7c0 1236.BR capabilities (7),
41096af1 1237.BR namespaces (7),
3616b7c0 1238.BR pthreads (7)