]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
getpwent_r.3: wfix
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
fea681da 1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
8c7b566c 2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
2297bf0e 3.\"
fd0fc519 4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
fea681da 5.\" May be distributed under the GNU General Public License.
fd0fc519 6.\" %%%LICENSE_END
dccaff1e 7.\"
fea681da
MK
8.\" Modified by Michael Haardt <michael@moria.de>
9.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11.\" New man page (copied from 'fork.2').
12.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14.\" Modified 26 Jun 2001 by Michael Kerrisk
15.\" Mostly upgraded to 2.4.x
16.\" Added prototype for sys_clone() plus description
17.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 18.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
19.\" between "calling process" and "parent process"
20.\" Added CLONE_PTRACE and CLONE_VFORK
21.\" Added EPERM and EINVAL error codes
fd8a5be4 22.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 23.\" various other minor tidy ups and clarifications.
c11b1abf 24.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 25.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 26.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
fea681da
MK
27.\" Added description for CLONE_NEWNS, which was added in 2.4.19
28.\" Slightly rephrased, aeb.
29.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 31.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 32.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 33.\" wrapper under BUGS.
fd8a5be4
MK
34.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 36.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 37.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 38.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 39.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 40.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 41.\"
daf084cc 42.TH CLONE 2 2014-09-21 "Linux" "Linux Programmer's Manual"
fea681da 43.SH NAME
9b0e0996 44clone, __clone2 \- create a child process
fea681da 45.SH SYNOPSIS
c10859eb 46.nf
81f10dad
MK
47/* Prototype for the glibc wrapper function */
48
fea681da 49.B #include <sched.h>
c10859eb 50
ff929e3b
MK
51.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
52.BI " int " flags ", void *" "arg" ", ... "
d3dbc9b1 53.BI " /* pid_t *" ptid ", struct user_desc *" tls \
ff929e3b 54", pid_t *" ctid " */ );"
81f10dad 55
e585064b 56/* Prototype for the raw system call */
81f10dad
MK
57
58.BI "long clone(unsigned long " flags ", void *" child_stack ,
59.BI " void *" ptid ", void *" ctid ,
60.BI " struct pt_regs *" regs );
c10859eb 61.fi
e73b3103
MK
62.sp
63.in -4n
81f10dad 64Feature Test Macro Requirements for glibc wrapper function (see
e73b3103
MK
65.BR feature_test_macros (7)):
66.in
67.sp
68.BR clone ():
69.ad l
70.RS 4
71.PD 0
72.TP 4
73Since glibc 2.14:
74_GNU_SOURCE
75.TP 4
bd297db0 76.\" See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
e73b3103
MK
77Before glibc 2.14:
78_BSD_SOURCE || _SVID_SOURCE
79 /* _GNU_SOURCE also suffices */
80.PD
81.RE
82.ad b
fea681da 83.SH DESCRIPTION
edcc65ff
MK
84.BR clone ()
85creates a new process, in a manner similar to
fea681da 86.BR fork (2).
81f10dad
MK
87
88This page describes both the glibc
e511ffb6 89.BR clone ()
e585064b 90wrapper function and the underlying system call on which it is based.
81f10dad 91The main text describes the wrapper function;
e585064b 92the differences for the raw system call
81f10dad 93are described toward the end of this page.
fea681da
MK
94
95Unlike
96.BR fork (2),
81f10dad
MK
97.BR clone ()
98allows the child process to share parts of its execution context with
fea681da 99the calling process, such as the memory space, the table of file
c13182ef
MK
100descriptors, and the table of signal handlers.
101(Note that on this manual
102page, "calling process" normally corresponds to "parent process".
103But see the description of
104.B CLONE_PARENT
fea681da
MK
105below.)
106
107The main use of
edcc65ff 108.BR clone ()
fea681da
MK
109is to implement threads: multiple threads of control in a program that
110run concurrently in a shared memory space.
111
112When the child process is created with
c13182ef 113.BR clone (),
fea681da 114it executes the function
c13182ef 115.IR fn ( arg ).
fea681da 116(This differs from
c13182ef 117.BR fork (2),
fea681da 118where execution continues in the child from the point
c13182ef
MK
119of the
120.BR fork (2)
fea681da
MK
121call.)
122The
123.I fn
124argument is a pointer to a function that is called by the child
125process at the beginning of its execution.
126The
127.I arg
128argument is passed to the
129.I fn
130function.
131
c13182ef 132When the
fea681da 133.IR fn ( arg )
c13182ef
MK
134function application returns, the child process terminates.
135The integer returned by
fea681da 136.I fn
c13182ef
MK
137is the exit code for the child process.
138The child process may also terminate explicitly by calling
fea681da
MK
139.BR exit (2)
140or after receiving a fatal signal.
141
142The
143.I child_stack
c13182ef
MK
144argument specifies the location of the stack used by the child process.
145Since the child and calling process may share memory,
fea681da 146it is not possible for the child process to execute in the
c13182ef
MK
147same stack as the calling process.
148The calling process must therefore
fea681da
MK
149set up memory space for the child stack and pass a pointer to this
150space to
edcc65ff 151.BR clone ().
5fab2e7c 152Stacks grow downward on all processors that run Linux
fea681da
MK
153(except the HP PA processors), so
154.I child_stack
155usually points to the topmost address of the memory space set up for
156the child stack.
157
158The low byte of
159.I flags
fd8a5be4
MK
160contains the number of the
161.I "termination signal"
162sent to the parent when the child dies.
163If this signal is specified as anything other than
fea681da
MK
164.BR SIGCHLD ,
165then the parent process must specify the
c13182ef
MK
166.B __WALL
167or
fea681da 168.B __WCLONE
c13182ef
MK
169options when waiting for the child with
170.BR wait (2).
fea681da
MK
171If no signal is specified, then the parent process is not signaled
172when the child terminates.
173
174.I flags
fd8a5be4
MK
175may also be bitwise-or'ed with zero or more of the following constants,
176in order to specify what is shared between the calling process
fea681da 177and the child process:
fea681da 178.TP
f5dbc7c8
MK
179.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
180Erase child thread ID at location
d3dbc9b1 181.I ctid
f5dbc7c8
MK
182in child memory when the child exits, and do a wakeup on the futex
183at that address.
184The address involved may be changed by the
185.BR set_tid_address (2)
186system call.
187This is used by threading libraries.
188.TP
189.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
190Store child thread ID at location
d3dbc9b1 191.I ctid
f5dbc7c8
MK
192in child memory.
193.TP
1603d6a1 194.BR CLONE_FILES " (since Linux 2.0)"
fea681da 195If
f5dbc7c8
MK
196.B CLONE_FILES
197is set, the calling process and the child process share the same file
198descriptor table.
199Any file descriptor created by the calling process or by the child
200process is also valid in the other process.
201Similarly, if one of the processes closes a file descriptor,
202or changes its associated flags (using the
203.BR fcntl (2)
204.B F_SETFD
205operation), the other process is also affected.
fea681da
MK
206
207If
f5dbc7c8
MK
208.B CLONE_FILES
209is not set, the child process inherits a copy of all file descriptors
210opened in the calling process at the time of
211.BR clone ().
212(The duplicated file descriptors in the child refer to the
213same open file descriptions (see
214.BR open (2))
215as the corresponding file descriptors in the calling process.)
216Subsequent operations that open or close file descriptors,
217or change file descriptor flags,
218performed by either the calling
219process or the child process do not affect the other process.
fea681da 220.TP
1603d6a1 221.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
222If
223.B CLONE_FS
9ee4a2b6 224is set, the caller and the child process share the same filesystem
c13182ef 225information.
9ee4a2b6 226This includes the root of the filesystem, the current
c13182ef
MK
227working directory, and the umask.
228Any call to
fea681da
MK
229.BR chroot (2),
230.BR chdir (2),
231or
232.BR umask (2)
edcc65ff 233performed by the calling process or the child process also affects the
fea681da
MK
234other process.
235
c13182ef 236If
fea681da 237.B CLONE_FS
9ee4a2b6 238is not set, the child process works on a copy of the filesystem
fea681da 239information of the calling process at the time of the
edcc65ff 240.BR clone ()
fea681da
MK
241call.
242Calls to
243.BR chroot (2),
244.BR chdir (2),
245.BR umask (2)
246performed later by one of the processes do not affect the other process.
fea681da 247.TP
a4cc375e 248.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
249If
250.B CLONE_IO
251is set, then the new process shares an I/O context with
252the calling process.
253If this flag is not set, then (as with
254.BR fork (2))
255the new process has its own I/O context.
256
257.\" The following based on text from Jens Axboe
a113945f 258The I/O context is the I/O scope of the disk scheduler (i.e,
11f27a1c
JA
259what the I/O scheduler uses to model scheduling of a process's I/O).
260If processes share the same I/O context,
261they are treated as one by the I/O scheduler.
262As a consequence, they get to share disk time.
263For some I/O schedulers,
264.\" the anticipatory and CFQ scheduler
265if two processes share an I/O context,
266they will be allowed to interleave their disk access.
267If several threads are doing I/O on behalf of the same process
268.RB ( aio_read (3),
269for instance), they should employ
270.BR CLONE_IO
271to get better I/O performance.
272.\" with CFQ and AS.
273
274If the kernel is not configured with the
275.B CONFIG_BLOCK
276option, this flag is a no-op.
277.TP
8722311b 278.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
279If
280.B CLONE_NEWIPC
281is set, then create the process in a new IPC namespace.
282If this flag is not set, then (as with
06b30458 283.BR fork (2)),
667417b3
MK
284the process is created in the same IPC namespace as
285the calling process.
0236bea9 286This flag is intended for the implementation of containers.
667417b3 287
efbfd7ec 288An IPC namespace provides an isolated view of System\ V IPC objects (see
009a049e
MK
289.BR svipc (7))
290and (since Linux 2.6.30)
291.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292.\" https://lwn.net/Articles/312232/
293POSIX message queues
294(see
295.BR mq_overview (7)).
19911fa5
MK
296The common characteristic of these IPC mechanisms is that IPC
297objects are identified by mechanisms other than filesystem
298pathnames.
009a049e 299
c440fe01 300Objects created in an IPC namespace are visible to all other processes
667417b3
MK
301that are members of that namespace,
302but are not visible to processes in other IPC namespaces.
303
83c1f4b5 304When an IPC namespace is destroyed
009a049e 305(i.e., when the last process that is a member of the namespace terminates),
83c1f4b5
MK
306all IPC objects in the namespace are automatically destroyed.
307
ab5dd83f
MK
308Only a privileged process
309.RB ( CAP_SYS_ADMIN )
310can employ
311.BR CLONE_NEWIPC .
667417b3
MK
312This flag can't be specified in conjunction with
313.BR CLONE_SYSVSEM .
9343f8e7
MK
314
315For further information on IPC namespaces, see
316.BR namespaces (7).
667417b3 317.TP
163bf178 318.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 319(The implementation of this flag was completed only
9108d867 320by about kernel version 2.6.29.)
163bf178
MK
321
322If
323.B CLONE_NEWNET
324is set, then create the process in a new network namespace.
325If this flag is not set, then (as with
57ef8c39 326.BR fork (2))
163bf178
MK
327the process is created in the same network namespace as
328the calling process.
329This flag is intended for the implementation of containers.
330
331A network namespace provides an isolated view of the networking stack
332(network device interfaces, IPv4 and IPv6 protocol stacks,
333IP routing tables, firewall rules, the
334.I /proc/net
335and
336.I /sys/class/net
337directory trees, sockets, etc.).
338A physical network device can live in exactly one
339network namespace.
340A virtual network device ("veth") pair provides a pipe-like abstraction
bea08fec 341.\" FIXME . Add pointer to veth(4) page when it is eventually completed
163bf178
MK
342that can be used to create tunnels between network namespaces,
343and can be used to create a bridge to a physical network device
344in another namespace.
345
bf032425
SH
346When a network namespace is freed
347(i.e., when the last process in the namespace terminates),
348its physical network devices are moved back to the
349initial network namespace (not to the parent of the process).
73680728
MK
350For further information on network namespaces, see
351.BR namespaces (7).
bf032425 352
ab5dd83f
MK
353Only a privileged process
354.RB ( CAP_SYS_ADMIN )
355can employ
356.BR CLONE_NEWNET .
163bf178 357.TP
c10859eb 358.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
359If
360.B CLONE_NEWNS
361is set, the cloned child is started in a new mount namespace,
362initialized with a copy of the namespace of the parent.
363If
fea681da 364.B CLONE_NEWNS
3dd2331c 365is not set, the child lives in the same mount
4df2eb09 366namespace as the parent.
fea681da 367
3dd2331c
MK
368For further information on mount namespaces, see
369.BR namespaces (7).
fea681da 370
ab5dd83f
MK
371Only a privileged process
372.RB ( CAP_SYS_ADMIN )
373can employ
374.BR CLONE_NEWNS .
fea681da
MK
375It is not permitted to specify both
376.B CLONE_NEWNS
377and
378.B CLONE_FS
9219d208 379.\" See https://lwn.net/Articles/543273/
fea681da 380in the same
e511ffb6 381.BR clone ()
fea681da 382call.
9d005472
MK
383.TP
384.BR CLONE_NEWPID " (since Linux 2.6.24)"
385.\" This explanation draws a lot of details from
386.\" http://lwn.net/Articles/259217/
387.\" Authors: Pavel Emelyanov <xemul@openvz.org>
388.\" and Kir Kolyshkin <kir@openvz.org>
389.\"
390.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
391.\" Author: Pavel Emelyanov <xemul@openvz.org>
392If
393.B CLONE_NEWPID
394is set, then create the process in a new PID namespace.
395If this flag is not set, then (as with
396.BR fork (2))
397the process is created in the same PID namespace as
398the calling process.
399This flag is intended for the implementation of containers.
400
401For further information on PID namespaces, see
7e0e902b
MK
402.BR namespaces (7)
403and
404.BR pid_namespaces (7)
9d005472 405
ab5dd83f
MK
406Only a privileged process
407.RB ( CAP_SYS_ADMIN )
408can employ
409.BR CLONE_NEWPID .
9d005472 410This flag can't be specified in conjunction with
f0007192
MK
411.BR CLONE_THREAD
412or
413.BR CLONE_PARENT .
70d21f17 414.TP
06b30458
MK
415.BR CLONE_NEWUSER
416(This flag first became meaningful for
417.BR clone ()
4d2b3ed7
MK
418in Linux 2.6.23,
419the current
420.BR clone()
421semantics were merged in Linux 3.5,
422and the final pieces to make the user namespaces completely usable were
423merged in Linux 3.8.)
424
70d21f17
EB
425If
426.B CLONE_NEWUSER
06b30458
MK
427is set, then create the process in a new user namespace.
428If this flag is not set, then (as with
57ef8c39 429.BR fork (2))
70d21f17
EB
430the process is created in the same user namespace as the calling process.
431
9d005472 432For further information on user namespaces, see
f58fb24f
MK
433.BR namespaces (7)
434and
435.BR user_namespaces (7)
06b30458 436
fefbcba8
MK
437Before Linux 3.8, use of
438.BR CLONE_NEWUSER
439required that the caller have three capabilities:
440.BR CAP_SYS_ADMIN ,
441.BR CAP_SETUID ,
442and
443.BR CAP_SETGID .
444.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 445Starting with Linux 3.8,
9d005472 446no privileges are needed to create a user namespace.
f0007192 447
5e72cf7d
MK
448This flag can't be specified in conjunction with
449.BR CLONE_THREAD
450or
451.BR CLONE_PARENT .
452For security reasons,
453.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
454.\" https://lwn.net/Articles/543273/
455.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
456.\" were, for practical purposes, unusable in earlier 3.8.x because of the
ab3311aa 457.\" various filesystems that didn't support userns.
f0007192
MK
458.BR CLONE_NEWUSER
459cannot be specified in conjunction with
5e72cf7d
MK
460.BR CLONE_FS .
461
462For further information on user namespaces, see
463.BR user_namespaces (7).
82ee147a 464.TP
43ce9dda
MK
465.BR CLONE_NEWUTS " (since Linux 2.6.19)"
466If
467.B CLONE_NEWUTS
e1b11906
MK
468is set, then create the process in a new UTS namespace,
469whose identifiers are initialized by duplicating the identifiers
470from the UTS namespace of the calling process.
43ce9dda 471If this flag is not set, then (as with
57ef8c39 472.BR fork (2))
43ce9dda
MK
473the process is created in the same UTS namespace as
474the calling process.
0236bea9 475This flag is intended for the implementation of containers.
43ce9dda
MK
476
477A UTS namespace is the set of identifiers returned by
478.BR uname (2);
850905cf 479among these, the domain name and the hostname can be modified by
43ce9dda
MK
480.BR setdomainname (2)
481and
43ce9dda
MK
482.BR sethostname (2),
483respectively.
c440fe01
MK
484Changes made to the identifiers in a UTS namespace
485are visible to all other processes in the same namespace,
43ce9dda
MK
486but are not visible to processes in other UTS namespaces.
487
ab5dd83f
MK
488Only a privileged process
489.RB ( CAP_SYS_ADMIN )
490can employ
491.BR CLONE_NEWUTS .
9cc7ad66 492
83d9e9b2 493For further information on UTS namespaces, see
9cc7ad66 494.BR namespaces (7).
43ce9dda 495.TP
f5dbc7c8
MK
496.BR CLONE_PARENT " (since Linux 2.3.12)"
497If
498.B CLONE_PARENT
499is set, then the parent of the new child (as returned by
500.BR getppid (2))
501will be the same as that of the calling process.
502
503If
504.B CLONE_PARENT
505is not set, then (as with
506.BR fork (2))
507the child's parent is the calling process.
508
509Note that it is the parent process, as returned by
510.BR getppid (2),
511which is signaled when the child terminates, so that
512if
513.B CLONE_PARENT
514is set, then the parent of the calling process, rather than the
515calling process itself, will be signaled.
516.TP
517.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
518Store child thread ID at location
d3dbc9b1 519.I ptid
f5dbc7c8
MK
520in parent and child memory.
521(In Linux 2.5.32-2.5.48 there was a flag
522.B CLONE_SETTID
523that did this.)
524.TP
525.BR CLONE_PID " (obsolete)"
526If
527.B CLONE_PID
528is set, the child process is created with the same process ID as
529the calling process.
530This is good for hacking the system, but otherwise
531of not much use.
532Since 2.3.21 this flag can be
533specified only by the system boot process (PID 0).
534It disappeared in Linux 2.5.16.
535.TP
1603d6a1 536.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
537If
538.B CLONE_PTRACE
539is specified, and the calling process is being traced,
540then trace the child also (see
541.BR ptrace (2)).
542.TP
543.BR CLONE_SETTLS " (since Linux 2.5.32)"
544The
545.I newtls
546argument is the new TLS (Thread Local Storage) descriptor.
547(See
548.BR set_thread_area (2).)
549.TP
1603d6a1 550.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
551If
552.B CLONE_SIGHAND
314c8ff4 553is set, the calling process and the child process share the same table of
c13182ef
MK
554signal handlers.
555If the calling process or child process calls
fea681da 556.BR sigaction (2)
c13182ef
MK
557to change the behavior associated with a signal, the behavior is
558changed in the other process as well.
559However, the calling process and child
fea681da 560processes still have distinct signal masks and sets of pending
c13182ef
MK
561signals.
562So, one of them may block or unblock some signals using
fea681da
MK
563.BR sigprocmask (2)
564without affecting the other process.
565
566If
567.B CLONE_SIGHAND
568is not set, the child process inherits a copy of the signal handlers
569of the calling process at the time
edcc65ff 570.BR clone ()
c13182ef
MK
571is called.
572Calls to
fea681da
MK
573.BR sigaction (2)
574performed later by one of the processes have no effect on the other
575process.
29546c24
MK
576
577Since Linux 2.6.0-test6,
578.I flags
579must also include
580.B CLONE_VM
581if
582.B CLONE_SIGHAND
583is specified
fea681da 584.TP
a69b6bda
MK
585.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
586If
587.B CLONE_STOPPED
588is set, then the child is initially stopped (as though it was sent a
589.B SIGSTOP
590signal), and must be resumed by sending it a
591.B SIGCONT
592signal.
ef37eaf2 593
a60450a9
MK
594This flag was
595.I deprecated
596from Linux 2.6.25 onward,
597and was
598.I removed
599altogether in Linux 2.6.38.
a5a061ee 600.\" glibc 2.8 removed this defn from bits/sched.h
a69b6bda 601.TP
f5dbc7c8 602.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 603If
f5dbc7c8
MK
604.B CLONE_SYSVSEM
605is set, then the child and the calling process share
5ada4b94
MK
606a single list of System V semaphore adjustment
607.RI ( semadj )
608values (see
f5dbc7c8 609.BR semop (2)).
5ada4b94
MK
610In this case, the shared list accumulates
611.I semadj
612values across all processes sharing the list,
613and semaphore adjustments are performed only when the last process
614that is sharing the list terminates (or ceases sharing the list using
615.BR unshare (2)).
f5d401dd 616If this flag is not set, then the child has a separate
5ada4b94
MK
617.I semadj
618list that is initially empty.
fea681da
MK
619.TP
620.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
621If
622.B CLONE_THREAD
623is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
624To make the remainder of the discussion of
625.B CLONE_THREAD
626more readable, the term "thread" is used to refer to the
627processes within a thread group.
fea681da 628
fd8a5be4
MK
629Thread groups were a feature added in Linux 2.4 to support the
630POSIX threads notion of a set of threads that share a single PID.
631Internally, this shared PID is the so-called
632thread group identifier (TGID) for the thread group.
c13182ef 633Since Linux 2.4, calls to
fea681da 634.BR getpid (2)
fd8a5be4
MK
635return the TGID of the caller.
636
637The threads within a group can be distinguished by their (system-wide)
638unique thread IDs (TID).
639A new thread's TID is available as the function result
640returned to the caller of
641.BR clone (),
642and a thread can obtain
643its own TID using
644.BR gettid (2).
645
c13182ef 646When a call is made to
fd8a5be4
MK
647.BR clone ()
648without specifying
649.BR CLONE_THREAD ,
650then the resulting thread is placed in a new thread group
651whose TGID is the same as the thread's TID.
652This thread is the
653.I leader
654of the new thread group.
655
656A new thread created with
657.B CLONE_THREAD
658has the same parent process as the caller of
659.BR clone ()
c13182ef 660(i.e., like
fd8a5be4
MK
661.BR CLONE_PARENT ),
662so that calls to
663.BR getppid (2)
664return the same value for all of the threads in a thread group.
665When a
c13182ef 666.B CLONE_THREAD
fd8a5be4
MK
667thread terminates, the thread that created it using
668.BR clone ()
669is not sent a
670.B SIGCHLD
671(or other termination) signal;
672nor can the status of such a thread be obtained
673using
674.BR wait (2).
675(The thread is said to be
676.IR detached .)
677
e2fbf61d
MK
678After all of the threads in a thread group terminate
679the parent process of the thread group is sent a
fd8a5be4
MK
680.B SIGCHLD
681(or other termination) signal.
682
683If any of the threads in a thread group performs an
684.BR execve (2),
685then all threads other than the thread group leader are terminated,
686and the new program is executed in the thread group leader.
687
f7110f60
MK
688If one of the threads in a thread group creates a child using
689.BR fork (2),
690then any thread in the group can
691.BR wait (2)
692for that child.
693
edcc65ff 694Since Linux 2.5.35,
fd8a5be4
MK
695.I flags
696must also include
697.B CLONE_SIGHAND
698if
699.B CLONE_THREAD
6fd69f33
MK
700is specified
701(and note that, since Linux 2.6.0-test6,
702.BR CLONE_SIGHAND
703also requires
704.BR CLONE_VM
705to be included).
e2fbf61d
MK
706
707Signals may be sent to a thread group as a whole (i.e., a TGID) using
708.BR kill (2),
709or to a specific thread (i.e., TID) using
710.BR tgkill (2).
711
712Signal dispositions and actions are process-wide:
713if an unhandled signal is delivered to a thread, then
714it will affect (terminate, stop, continue, be ignored in)
715all members of the thread group.
716
99408a60 717Each thread has its own signal mask, as set by
e2fbf61d 718.BR sigprocmask (2),
82a06020 719but signals can be pending either: for the whole process
e2fbf61d
MK
720(i.e., deliverable to any member of the thread group),
721when sent with
82a06020 722.BR kill (2);
e2fbf61d
MK
723or for an individual thread, when sent with
724.BR tgkill (2).
99408a60
MK
725A call to
726.BR sigpending (2)
727returns a signal set that is the union of the signals pending for the
728whole process and the signals that are pending for the calling thread.
e2fbf61d 729
c13182ef 730If
e2fbf61d
MK
731.BR kill (2)
732is used to send a signal to a thread group,
733and the thread group has installed a handler for the signal, then
734the handler will be invoked in exactly one, arbitrarily selected
735member of the thread group that has not blocked the signal.
c13182ef 736If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
737.BR sigwaitinfo (2),
738the kernel will arbitrarily select one of these threads
c13182ef 739to receive a signal sent using
e2fbf61d 740.BR kill (2).
a69b6bda 741.TP
f5dbc7c8 742.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 743If
f5dbc7c8
MK
744.B CLONE_UNTRACED
745is specified, then a tracing process cannot force
746.B CLONE_PTRACE
747on this child process.
fea681da 748.TP
1603d6a1 749.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
750If
751.B CLONE_VFORK
752is set, the execution of the calling process is suspended
753until the child releases its virtual memory
754resources via a call to
755.BR execve (2)
756or
757.BR _exit (2)
758(as with
759.BR vfork (2)).
760
761If
762.B CLONE_VFORK
4b4a853a 763is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
764after the call, and an application should not rely on execution occurring
765in any particular order.
fea681da 766.TP
1603d6a1 767.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
768If
769.B CLONE_VM
770is set, the calling process and the child process run in the same memory
771space.
772In particular, memory writes performed by the calling process
773or by the child process are also visible in the other process.
774Moreover, any memory mapping or unmapping performed with
775.BR mmap (2)
776or
777.BR munmap (2)
778by the child or calling process also affects the other process.
779
780If
781.B CLONE_VM
782is not set, the child process runs in a separate copy of the memory
783space of the calling process at the time of
784.BR clone ().
785Memory writes or file mappings/unmappings performed by one of the
786processes do not affect the other, as with
787.BR fork (2).
e8796f63 788.SS C library/kernel ABI differences
e585064b
MK
789The raw
790.BR clone ()
fea681da
MK
791system call corresponds more closely to
792.BR fork (2)
793in that execution in the child continues from the point of the
c13182ef 794call.
5add3af3
MK
795As such, the
796.I fn
c13182ef 797and
5add3af3
MK
798.I arg
799arguments of the
800.BR clone ()
801wrapper function are omitted.
802Furthermore, the argument order changes.
c787510f 803The raw system call interface on x86 and many other architectures is roughly:
5add3af3
MK
804.in +4
805.nf
806
807.BI "long clone(unsigned long " flags ", void *" child_stack ,
808.BI " void *" ptid ", void *" ctid ,
809.BI " struct pt_regs *" regs );
fea681da 810
5add3af3
MK
811.fi
812.in
e585064b 813Another difference for the raw system call is that the
fea681da 814.I child_stack
c13182ef 815argument may be zero, in which case copy-on-write semantics ensure that the
fea681da 816child gets separate copies of stack pages when either process modifies
c13182ef
MK
817the stack.
818In this case, for correct operation, the
fea681da
MK
819.B CLONE_VM
820option should not be specified.
c787510f 821
e585064b 822For some architectures, the order of the arguments for the system call
c787510f 823differs from that shown above.
7d2e6d74 824On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
c787510f
MK
825and MIPS architectures,
826the order of the fourth and fifth arguments is reversed.
827On the cris and s390 architectures,
828the order of the first and second arguments is reversed.
251113d0
MK
829.SS blackfin, m68k, and sparc
830The argument-passing conventions on
04346be5 831blackfin, m68k, and sparc are different from the descriptions above.
251113d0 832For details, see the kernel (and glibc) source.
574c92b6 833.SS ia64
097a1f3b
MK
834On ia64, a different interface is used:
835.nf
836
837.BI "int __clone2(int (*" "fn" ")(void *), "
838.BI " void *" child_stack_base ", size_t " stack_size ,
839.BI " int " flags ", void *" "arg" ", ... "
840.BI " /* pid_t *" ptid ", struct user_desc *" tls \
841", pid_t *" ctid " */ );"
842.fi
843.PP
844The prototype shown above is for the glibc wrapper function;
845the raw system call interface has no
846.I fn
847or
848.I arg
849argument, and changes the order of the arguments so that
850.I flags
851is the first argument, and
852.I tls
853is the last argument.
854.PP
855.BR __clone2 ()
856operates in the same way as
857.BR clone (),
858except that
859.I child_stack_base
860points to the lowest address of the child's stack area,
861and
862.I stack_size
863specifies the size of the stack pointed to by
864.IR child_stack_base .
5add3af3 865.SS Linux 2.4 and earlier
577f9b62
MK
866In Linux 2.4 and earlier,
867.BR clone ()
868does not take arguments
869.IR ptid ,
870.IR tls ,
871and
130b2e49 872.IR ctid .
47297adb 873.SH RETURN VALUE
0bfa087b
MK
874.\" gettid(2) returns current->pid;
875.\" getpid(2) returns current->tgid;
fea681da 876On success, the thread ID of the child process is returned
c13182ef 877in the caller's thread of execution.
84811e86 878On failure, \-1 is returned
fea681da
MK
879in the caller's context, no child process will be created, and
880.I errno
881will be set appropriately.
fea681da
MK
882.SH ERRORS
883.TP
884.B EAGAIN
e1b6e186
MK
885Too many processes are already running; see
886.BR fork (2).
fea681da
MK
887.TP
888.B EINVAL
889.B CLONE_SIGHAND
890was specified, but
891.B CLONE_VM
2e8a7fb3
MK
892was not.
893(Since Linux 2.6.0-test6.)
fea681da
MK
894.TP
895.B EINVAL
896.B CLONE_THREAD
897was specified, but
898.B CLONE_SIGHAND
6387216b
MK
899was not.
900(Since Linux 2.5.35.)
29546c24
MK
901.\" .TP
902.\" .B EINVAL
903.\" Precisely one of
904.\" .B CLONE_DETACHED
905.\" and
906.\" .B CLONE_THREAD
6387216b
MK
907.\" was specified.
908.\" (Since Linux 2.6.0-test6.)
fea681da
MK
909.TP
910.B EINVAL
d34e5645 911.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
912Both
913.B CLONE_FS
914and
915.B CLONE_NEWNS
916were specified in
917.IR flags .
918.TP
d34e5645
MK
919.BR EINVAL " (since Linux 3.9)"
920Both
921.B CLONE_NEWUSER
922and
923.B CLONE_FS
924were specified in
925.IR flags .
926.TP
fea681da 927.B EINVAL
82ee147a 928Both
667417b3
MK
929.B CLONE_NEWIPC
930and
931.B CLONE_SYSVSEM
932were specified in
933.IR flags .
934.TP
935.B EINVAL
f0007192 936One (or both) of
82ee147a 937.BR CLONE_NEWPID
f0007192
MK
938or
939.BR CLONE_NEWUSER
940and one (or both) of
82ee147a 941.BR CLONE_THREAD
f0007192
MK
942or
943.BR CLONE_PARENT
82ee147a
MK
944were specified in
945.IR flags .
946.TP
947.B EINVAL
c13182ef 948Returned by
edcc65ff 949.BR clone ()
c13182ef 950when a zero value is specified for
fea681da
MK
951.IR child_stack .
952.TP
28cad2c1 953.B EINVAL
667417b3
MK
954.BR CLONE_NEWIPC
955was specified in
956.IR flags ,
957but the kernel was not configured with the
958.B CONFIG_SYSVIPC
959and
960.BR CONFIG_IPC_NS
961options.
962.TP
963.B EINVAL
163bf178
MK
964.BR CLONE_NEWNET
965was specified in
966.IR flags ,
967but the kernel was not configured with the
968.B CONFIG_NET_NS
969option.
970.TP
971.B EINVAL
28cad2c1
MK
972.BR CLONE_NEWPID
973was specified in
974.IR flags ,
975but the kernel was not configured with the
976.B CONFIG_PID_NS
977option.
978.TP
43ce9dda
MK
979.B EINVAL
980.BR CLONE_NEWUTS
981was specified in
982.IR flags ,
983but the kernel was not configured with the
984.B CONFIG_UTS
985option.
986.TP
fea681da
MK
987.B ENOMEM
988Cannot allocate sufficient memory to allocate a task structure for the
989child, or to copy those parts of the caller's context that need to be
990copied.
991.TP
992.B EPERM
667417b3 993.BR CLONE_NEWIPC ,
163bf178 994.BR CLONE_NEWNET ,
43ce9dda
MK
995.BR CLONE_NEWNS ,
996.BR CLONE_NEWPID ,
82ee147a 997or
43ce9dda 998.BR CLONE_NEWUTS
00b08db3 999was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1000.TP
1001.B EPERM
1002.B CLONE_PID
1003was specified by a process other than process 0.
365d292a
MK
1004.TP
1005.B EPERM
1006.BR CLONE_NEWUSER
1007was specified in
1008.IR flags ,
1009but either the effective user ID or the effective group ID of the caller
1010does not have a mapping in the parent namespace (see
f58fb24f 1011.BR user_namespaces (7)).
6fd119e7 1012.TP
ac007938
MK
1013.BR EPERM " (since Linux 3.9)"
1014.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1015.B CLONE_NEWUSER was specified in
1016.I flags
1017and the caller is in a chroot environment
1018.\" FIXME What is the rationale for this restriction?
1019(i.e., the caller's root directory does not match the root directory
1020of the mount namespace in which it resides).
1021.TP
1022.BR EUSERS " (since Linux 3.11)"
6fd119e7
MK
1023.B CLONE_NEWUSER
1024was specified in
1025.IR flags ,
1026and the call would cause the limit on the number of
1027nested user namespaces to be exceeded.
1028See
1029.BR user_namespaces (7).
365d292a
MK
1030.SH VERSIONS
1031There is no entry for
1032.BR clone ()
1033in libc5.
1034glibc2 provides
1035.BR clone ()
1036as described in this manual page.
47297adb 1037.SH CONFORMING TO
a1d5f77c 1038.BR clone ()
e585064b 1039is Linux-specific and should not be used in programs
a1d5f77c 1040intended to be portable.
fea681da 1041.SH NOTES
fd8a5be4
MK
1042In the kernel 2.4.x series,
1043.B CLONE_THREAD
1044generally does not make the parent of the new thread the same
1045as the parent of the calling process.
1046However, for kernel versions 2.4.7 to 2.4.18 the
1047.B CLONE_THREAD
1048flag implied the
c13182ef 1049.B CLONE_PARENT
fd8a5be4 1050flag (as in kernel 2.6).
fea681da 1051
c13182ef
MK
1052For a while there was
1053.B CLONE_DETACHED
a5053dcb 1054(introduced in 2.5.32):
c13182ef 1055parent wants no child-exit signal.
a5053dcb 1056In 2.6.2 the need to give this
c13182ef
MK
1057together with
1058.B CLONE_THREAD
a5053dcb
MK
1059disappeared.
1060This flag is still defined, but has no effect.
1061
34ccb744 1062On i386,
a5a997ca
MK
1063.BR clone ()
1064should not be called through vsyscall, but directly through
1065.IR "int $0x80" .
31830ef0
MK
1066.SH BUGS
1067Versions of the GNU C library that include the NPTL threading library
c13182ef 1068contain a wrapper function for
0bfa087b 1069.BR getpid (2)
31830ef0 1070that performs caching of PIDs.
c60237c9
MK
1071This caching relies on support in the glibc wrapper for
1072.BR clone (),
1073but as currently implemented,
1074the cache may not be up to date in some circumstances.
1075In particular,
1076if a signal is delivered to the child immediately after the
1077.BR clone ()
1078call, then a call to
0b80cf56 1079.BR getpid (2)
c60237c9
MK
1080in a handler for the signal may return the PID
1081of the calling process ("the parent"),
88619baf 1082if the clone wrapper has not yet had a chance to update the PID
c60237c9
MK
1083cache in the child.
1084(This discussion ignores the case where the child was created using
9291ce36 1085.BR CLONE_THREAD ,
c60237c9 1086when
0b80cf56 1087.BR getpid (2)
c60237c9
MK
1088.I should
1089return the same value in the child and in the process that called
1090.BR clone (),
a1d48abb 1091since the caller and the child are in the same thread group.
e7d807b7 1092The stale-cache problem also does not occur if the
a1d48abb
JR
1093.I flags
1094argument includes
1095.BR CLONE_VM .)
c60237c9 1096To get the truth, it may be necessary to use code such as the following:
31830ef0
MK
1097.nf
1098
1099 #include <syscall.h>
1100
1101 pid_t mypid;
1102
1103 mypid = syscall(SYS_getpid);
1104.fi
c60237c9
MK
1105.\" See also the following bug reports
1106.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1107.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
8c7b566c 1108.SH EXAMPLE
8c7b566c 1109The following program demonstrates the use of
9c13072a 1110.BR clone ()
8c7b566c
MK
1111to create a child process that executes in a separate UTS namespace.
1112The child changes the hostname in its UTS namespace.
1113Both parent and child then display the system hostname,
1114making it possible to see that the hostname
1115differs in the UTS namespaces of the parent and child.
1116For an example of the use of this program, see
1117.BR setns (2).
f30b7415 1118.SS Program source
8c7b566c
MK
1119.nf
1120#define _GNU_SOURCE
1121#include <sys/wait.h>
1122#include <sys/utsname.h>
1123#include <sched.h>
1124#include <string.h>
1125#include <stdio.h>
1126#include <stdlib.h>
1127#include <unistd.h>
1128
1129#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1130 } while (0)
1131
1132static int /* Start function for cloned child */
1133childFunc(void *arg)
1134{
1135 struct utsname uts;
1136
1137 /* Change hostname in UTS namespace of child */
1138
1139 if (sethostname(arg, strlen(arg)) == \-1)
1140 errExit("sethostname");
1141
07d4e6ea 1142 /* Retrieve and display hostname */
8c7b566c
MK
1143
1144 if (uname(&uts) == \-1)
1145 errExit("uname");
1146 printf("uts.nodename in child: %s\\n", uts.nodename);
1147
1148 /* Keep the namespace open for a while, by sleeping.
1149 This allows some experimentation\-\-for example, another
1150 process might join the namespace. */
9f1b9726 1151
8c7b566c
MK
1152 sleep(200);
1153
1154 return 0; /* Child terminates now */
1155}
1156
1157#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1158
1159int
1160main(int argc, char *argv[])
1161{
1162 char *stack; /* Start of stack buffer */
1163 char *stackTop; /* End of stack buffer */
1164 pid_t pid;
1165 struct utsname uts;
1166
1167 if (argc < 2) {
1168 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1169 exit(EXIT_SUCCESS);
1170 }
1171
1172 /* Allocate stack for child */
1173
1174 stack = malloc(STACK_SIZE);
1175 if (stack == NULL)
1176 errExit("malloc");
1177 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1178
1179 /* Create child that has its own UTS namespace;
1180 child commences execution in childFunc() */
1181
1182 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1183 if (pid == \-1)
1184 errExit("clone");
1185 printf("clone() returned %ld\\n", (long) pid);
1186
1187 /* Parent falls through to here */
1188
1189 sleep(1); /* Give child time to change its hostname */
1190
9f1b9726 1191 /* Display hostname in parent\(aqs UTS namespace. This will be
8c7b566c
MK
1192 different from hostname in child\(aqs UTS namespace. */
1193
1194 if (uname(&uts) == \-1)
1195 errExit("uname");
1196 printf("uts.nodename in parent: %s\\n", uts.nodename);
1197
1198 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1199 errExit("waitpid");
1200 printf("child has terminated\\n");
1201
1202 exit(EXIT_SUCCESS);
1203}
1204.fi
47297adb 1205.SH SEE ALSO
fea681da 1206.BR fork (2),
2b44301c 1207.BR futex (2),
fea681da
MK
1208.BR getpid (2),
1209.BR gettid (2),
6f8746e4 1210.BR kcmp (2),
f2d0bbf1 1211.BR set_thread_area (2),
2b44301c 1212.BR set_tid_address (2),
8403481f 1213.BR setns (2),
f2d0bbf1 1214.BR tkill (2),
5cc01e9c 1215.BR unshare (2),
fea681da 1216.BR wait (2),
3616b7c0 1217.BR capabilities (7),
41096af1 1218.BR namespaces (7),
3616b7c0 1219.BR pthreads (7)