]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
clone.2: Document raw syscall interfaces on various other architectures
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
fea681da 1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
8c7b566c 2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
2297bf0e 3.\"
fd0fc519 4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
fea681da 5.\" May be distributed under the GNU General Public License.
fd0fc519 6.\" %%%LICENSE_END
dccaff1e 7.\"
fea681da
MK
8.\" Modified by Michael Haardt <michael@moria.de>
9.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11.\" New man page (copied from 'fork.2').
12.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14.\" Modified 26 Jun 2001 by Michael Kerrisk
15.\" Mostly upgraded to 2.4.x
16.\" Added prototype for sys_clone() plus description
17.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 18.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
19.\" between "calling process" and "parent process"
20.\" Added CLONE_PTRACE and CLONE_VFORK
21.\" Added EPERM and EINVAL error codes
fd8a5be4 22.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 23.\" various other minor tidy ups and clarifications.
c11b1abf 24.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 25.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 26.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
fea681da
MK
27.\" Added description for CLONE_NEWNS, which was added in 2.4.19
28.\" Slightly rephrased, aeb.
29.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 31.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 32.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 33.\" wrapper under BUGS.
fd8a5be4
MK
34.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 36.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 37.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 38.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 39.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 40.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 41.\"
3df541c0 42.TH CLONE 2 2016-07-17 "Linux" "Linux Programmer's Manual"
fea681da 43.SH NAME
9b0e0996 44clone, __clone2 \- create a child process
fea681da 45.SH SYNOPSIS
c10859eb 46.nf
81f10dad
MK
47/* Prototype for the glibc wrapper function */
48
4f71ba5d 49.B #define _GNU_SOURCE
fea681da 50.B #include <sched.h>
c10859eb 51
ff929e3b
MK
52.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53.BI " int " flags ", void *" "arg" ", ... "
dd6d3d2e 54.BI " /* pid_t *" ptid ", void *" newtls \
ff929e3b 55", pid_t *" ctid " */ );"
81f10dad 56
2a15a76b 57/* For the prototype of the raw system call, see NOTES */
c10859eb 58.fi
fea681da 59.SH DESCRIPTION
edcc65ff
MK
60.BR clone ()
61creates a new process, in a manner similar to
fea681da 62.BR fork (2).
81f10dad
MK
63
64This page describes both the glibc
e511ffb6 65.BR clone ()
e585064b 66wrapper function and the underlying system call on which it is based.
81f10dad 67The main text describes the wrapper function;
e585064b 68the differences for the raw system call
81f10dad 69are described toward the end of this page.
fea681da
MK
70
71Unlike
72.BR fork (2),
81f10dad
MK
73.BR clone ()
74allows the child process to share parts of its execution context with
fea681da 75the calling process, such as the memory space, the table of file
c13182ef
MK
76descriptors, and the table of signal handlers.
77(Note that on this manual
78page, "calling process" normally corresponds to "parent process".
79But see the description of
80.B CLONE_PARENT
fea681da
MK
81below.)
82
1533d242 83One use of
edcc65ff 84.BR clone ()
fea681da
MK
85is to implement threads: multiple threads of control in a program that
86run concurrently in a shared memory space.
87
88When the child process is created with
c13182ef 89.BR clone (),
fea681da 90it executes the function
c13182ef 91.IR fn ( arg ).
fea681da 92(This differs from
c13182ef 93.BR fork (2),
fea681da 94where execution continues in the child from the point
c13182ef
MK
95of the
96.BR fork (2)
fea681da
MK
97call.)
98The
99.I fn
100argument is a pointer to a function that is called by the child
101process at the beginning of its execution.
102The
103.I arg
104argument is passed to the
105.I fn
106function.
107
c13182ef 108When the
fea681da 109.IR fn ( arg )
c13182ef
MK
110function application returns, the child process terminates.
111The integer returned by
fea681da 112.I fn
c13182ef
MK
113is the exit code for the child process.
114The child process may also terminate explicitly by calling
fea681da
MK
115.BR exit (2)
116or after receiving a fatal signal.
117
118The
119.I child_stack
c13182ef
MK
120argument specifies the location of the stack used by the child process.
121Since the child and calling process may share memory,
fea681da 122it is not possible for the child process to execute in the
c13182ef
MK
123same stack as the calling process.
124The calling process must therefore
fea681da
MK
125set up memory space for the child stack and pass a pointer to this
126space to
edcc65ff 127.BR clone ().
5fab2e7c 128Stacks grow downward on all processors that run Linux
fea681da
MK
129(except the HP PA processors), so
130.I child_stack
131usually points to the topmost address of the memory space set up for
132the child stack.
133
134The low byte of
135.I flags
fd8a5be4
MK
136contains the number of the
137.I "termination signal"
138sent to the parent when the child dies.
139If this signal is specified as anything other than
fea681da
MK
140.BR SIGCHLD ,
141then the parent process must specify the
c13182ef
MK
142.B __WALL
143or
fea681da 144.B __WCLONE
c13182ef
MK
145options when waiting for the child with
146.BR wait (2).
fea681da
MK
147If no signal is specified, then the parent process is not signaled
148when the child terminates.
149
150.I flags
fd8a5be4
MK
151may also be bitwise-or'ed with zero or more of the following constants,
152in order to specify what is shared between the calling process
fea681da 153and the child process:
fea681da 154.TP
f5dbc7c8 155.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
8ef021ea 156Erase the child thread ID at the location
d3dbc9b1 157.I ctid
f5dbc7c8
MK
158in child memory when the child exits, and do a wakeup on the futex
159at that address.
160The address involved may be changed by the
161.BR set_tid_address (2)
162system call.
163This is used by threading libraries.
164.TP
165.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
8ef021ea 166Store the child thread ID at the location
d3dbc9b1 167.I ctid
8ef021ea 168in the child's memory.
f5dbc7c8 169.TP
1603d6a1 170.BR CLONE_FILES " (since Linux 2.0)"
fea681da 171If
f5dbc7c8
MK
172.B CLONE_FILES
173is set, the calling process and the child process share the same file
174descriptor table.
175Any file descriptor created by the calling process or by the child
176process is also valid in the other process.
177Similarly, if one of the processes closes a file descriptor,
178or changes its associated flags (using the
179.BR fcntl (2)
180.B F_SETFD
181operation), the other process is also affected.
8a76b19e
KE
182If a process sharing a file descriptor table calls
183.BR execve (2),
184its file descriptor table is duplicated (unshared).
fea681da
MK
185
186If
f5dbc7c8
MK
187.B CLONE_FILES
188is not set, the child process inherits a copy of all file descriptors
189opened in the calling process at the time of
190.BR clone ().
f5dbc7c8
MK
191Subsequent operations that open or close file descriptors,
192or change file descriptor flags,
193performed by either the calling
194process or the child process do not affect the other process.
db8ba2b4
MK
195Note, however,
196that the duplicated file descriptors in the child refer to the same open file
197descriptions as the corresponding file descriptors in the calling process,
198and thus share file offsets and files status flags (see
199.BR open (2)).
fea681da 200.TP
1603d6a1 201.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
202If
203.B CLONE_FS
9ee4a2b6 204is set, the caller and the child process share the same filesystem
c13182ef 205information.
9ee4a2b6 206This includes the root of the filesystem, the current
c13182ef
MK
207working directory, and the umask.
208Any call to
fea681da
MK
209.BR chroot (2),
210.BR chdir (2),
211or
212.BR umask (2)
edcc65ff 213performed by the calling process or the child process also affects the
fea681da
MK
214other process.
215
c13182ef 216If
fea681da 217.B CLONE_FS
9ee4a2b6 218is not set, the child process works on a copy of the filesystem
fea681da 219information of the calling process at the time of the
edcc65ff 220.BR clone ()
fea681da
MK
221call.
222Calls to
223.BR chroot (2),
224.BR chdir (2),
225.BR umask (2)
226performed later by one of the processes do not affect the other process.
fea681da 227.TP
a4cc375e 228.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
229If
230.B CLONE_IO
231is set, then the new process shares an I/O context with
232the calling process.
233If this flag is not set, then (as with
234.BR fork (2))
235the new process has its own I/O context.
236
237.\" The following based on text from Jens Axboe
d1f84ed7 238The I/O context is the I/O scope of the disk scheduler (i.e.,
11f27a1c
JA
239what the I/O scheduler uses to model scheduling of a process's I/O).
240If processes share the same I/O context,
241they are treated as one by the I/O scheduler.
242As a consequence, they get to share disk time.
243For some I/O schedulers,
244.\" the anticipatory and CFQ scheduler
245if two processes share an I/O context,
246they will be allowed to interleave their disk access.
247If several threads are doing I/O on behalf of the same process
248.RB ( aio_read (3),
249for instance), they should employ
250.BR CLONE_IO
251to get better I/O performance.
252.\" with CFQ and AS.
253
254If the kernel is not configured with the
255.B CONFIG_BLOCK
256option, this flag is a no-op.
257.TP
c5af0674
MK
258.BR CLONE_NEWCGROUP " (since Linux 4.6)"
259Create the process in a new cgroup namespace.
260If this flag is not set, then (as with
261.BR fork (2))
262the process is created in the same cgroup namespaces as the calling process.
263This flag is intended for the implementation of containers.
264
265For further information on cgroup namespaces, see
b9fe4bc3 266.BR cgroup_namespaces (7).
c5af0674
MK
267
268Only a privileged process
269.RB ( CAP_SYS_ADMIN )
270can employ
271.BR CLONE_NEWCGROUP .
272.\"
273.TP
8722311b 274.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
275If
276.B CLONE_NEWIPC
277is set, then create the process in a new IPC namespace.
278If this flag is not set, then (as with
06b30458 279.BR fork (2)),
667417b3
MK
280the process is created in the same IPC namespace as
281the calling process.
0236bea9 282This flag is intended for the implementation of containers.
667417b3 283
efbfd7ec 284An IPC namespace provides an isolated view of System\ V IPC objects (see
009a049e
MK
285.BR svipc (7))
286and (since Linux 2.6.30)
287.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
288.\" https://lwn.net/Articles/312232/
289POSIX message queues
290(see
291.BR mq_overview (7)).
19911fa5
MK
292The common characteristic of these IPC mechanisms is that IPC
293objects are identified by mechanisms other than filesystem
294pathnames.
009a049e 295
c440fe01 296Objects created in an IPC namespace are visible to all other processes
667417b3
MK
297that are members of that namespace,
298but are not visible to processes in other IPC namespaces.
299
83c1f4b5 300When an IPC namespace is destroyed
009a049e 301(i.e., when the last process that is a member of the namespace terminates),
83c1f4b5
MK
302all IPC objects in the namespace are automatically destroyed.
303
ab5dd83f
MK
304Only a privileged process
305.RB ( CAP_SYS_ADMIN )
306can employ
307.BR CLONE_NEWIPC .
667417b3
MK
308This flag can't be specified in conjunction with
309.BR CLONE_SYSVSEM .
9343f8e7
MK
310
311For further information on IPC namespaces, see
312.BR namespaces (7).
667417b3 313.TP
163bf178 314.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 315(The implementation of this flag was completed only
9108d867 316by about kernel version 2.6.29.)
163bf178
MK
317
318If
319.B CLONE_NEWNET
320is set, then create the process in a new network namespace.
321If this flag is not set, then (as with
57ef8c39 322.BR fork (2))
163bf178
MK
323the process is created in the same network namespace as
324the calling process.
325This flag is intended for the implementation of containers.
326
327A network namespace provides an isolated view of the networking stack
328(network device interfaces, IPv4 and IPv6 protocol stacks,
329IP routing tables, firewall rules, the
330.I /proc/net
331and
332.I /sys/class/net
333directory trees, sockets, etc.).
334A physical network device can live in exactly one
335network namespace.
336A virtual network device ("veth") pair provides a pipe-like abstraction
bea08fec 337.\" FIXME . Add pointer to veth(4) page when it is eventually completed
163bf178
MK
338that can be used to create tunnels between network namespaces,
339and can be used to create a bridge to a physical network device
340in another namespace.
341
bf032425
SH
342When a network namespace is freed
343(i.e., when the last process in the namespace terminates),
344its physical network devices are moved back to the
345initial network namespace (not to the parent of the process).
73680728
MK
346For further information on network namespaces, see
347.BR namespaces (7).
bf032425 348
ab5dd83f
MK
349Only a privileged process
350.RB ( CAP_SYS_ADMIN )
351can employ
352.BR CLONE_NEWNET .
163bf178 353.TP
c10859eb 354.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
355If
356.B CLONE_NEWNS
357is set, the cloned child is started in a new mount namespace,
358initialized with a copy of the namespace of the parent.
359If
fea681da 360.B CLONE_NEWNS
3dd2331c 361is not set, the child lives in the same mount
4df2eb09 362namespace as the parent.
fea681da 363
ab5dd83f
MK
364Only a privileged process
365.RB ( CAP_SYS_ADMIN )
366can employ
367.BR CLONE_NEWNS .
fea681da
MK
368It is not permitted to specify both
369.B CLONE_NEWNS
370and
371.B CLONE_FS
9219d208 372.\" See https://lwn.net/Articles/543273/
fea681da 373in the same
e511ffb6 374.BR clone ()
fea681da 375call.
c212248c
MK
376
377For further information on mount namespaces, see
378.BR namespaces (7)
379and
380.BR mount_namespaces (7).
9d005472
MK
381.TP
382.BR CLONE_NEWPID " (since Linux 2.6.24)"
383.\" This explanation draws a lot of details from
384.\" http://lwn.net/Articles/259217/
385.\" Authors: Pavel Emelyanov <xemul@openvz.org>
386.\" and Kir Kolyshkin <kir@openvz.org>
387.\"
388.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
389.\" Author: Pavel Emelyanov <xemul@openvz.org>
390If
391.B CLONE_NEWPID
392is set, then create the process in a new PID namespace.
393If this flag is not set, then (as with
394.BR fork (2))
395the process is created in the same PID namespace as
396the calling process.
397This flag is intended for the implementation of containers.
398
399For further information on PID namespaces, see
7e0e902b
MK
400.BR namespaces (7)
401and
39b3f005 402.BR pid_namespaces (7).
9d005472 403
ab5dd83f
MK
404Only a privileged process
405.RB ( CAP_SYS_ADMIN )
406can employ
407.BR CLONE_NEWPID .
9d005472 408This flag can't be specified in conjunction with
f0007192
MK
409.BR CLONE_THREAD
410or
411.BR CLONE_PARENT .
70d21f17 412.TP
06b30458
MK
413.BR CLONE_NEWUSER
414(This flag first became meaningful for
415.BR clone ()
4d2b3ed7
MK
416in Linux 2.6.23,
417the current
11a38815 418.BR clone ()
4d2b3ed7
MK
419semantics were merged in Linux 3.5,
420and the final pieces to make the user namespaces completely usable were
421merged in Linux 3.8.)
422
70d21f17
EB
423If
424.B CLONE_NEWUSER
06b30458
MK
425is set, then create the process in a new user namespace.
426If this flag is not set, then (as with
57ef8c39 427.BR fork (2))
70d21f17
EB
428the process is created in the same user namespace as the calling process.
429
9d005472 430For further information on user namespaces, see
f58fb24f
MK
431.BR namespaces (7)
432and
433.BR user_namespaces (7)
06b30458 434
fefbcba8
MK
435Before Linux 3.8, use of
436.BR CLONE_NEWUSER
437required that the caller have three capabilities:
438.BR CAP_SYS_ADMIN ,
439.BR CAP_SETUID ,
440and
441.BR CAP_SETGID .
442.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 443Starting with Linux 3.8,
9d005472 444no privileges are needed to create a user namespace.
f0007192 445
5e72cf7d
MK
446This flag can't be specified in conjunction with
447.BR CLONE_THREAD
448or
449.BR CLONE_PARENT .
450For security reasons,
451.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
452.\" https://lwn.net/Articles/543273/
453.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
454.\" were, for practical purposes, unusable in earlier 3.8.x because of the
ab3311aa 455.\" various filesystems that didn't support userns.
f0007192
MK
456.BR CLONE_NEWUSER
457cannot be specified in conjunction with
5e72cf7d
MK
458.BR CLONE_FS .
459
460For further information on user namespaces, see
461.BR user_namespaces (7).
82ee147a 462.TP
43ce9dda
MK
463.BR CLONE_NEWUTS " (since Linux 2.6.19)"
464If
465.B CLONE_NEWUTS
e1b11906
MK
466is set, then create the process in a new UTS namespace,
467whose identifiers are initialized by duplicating the identifiers
468from the UTS namespace of the calling process.
43ce9dda 469If this flag is not set, then (as with
57ef8c39 470.BR fork (2))
43ce9dda
MK
471the process is created in the same UTS namespace as
472the calling process.
0236bea9 473This flag is intended for the implementation of containers.
43ce9dda
MK
474
475A UTS namespace is the set of identifiers returned by
476.BR uname (2);
850905cf 477among these, the domain name and the hostname can be modified by
43ce9dda
MK
478.BR setdomainname (2)
479and
43ce9dda
MK
480.BR sethostname (2),
481respectively.
c440fe01
MK
482Changes made to the identifiers in a UTS namespace
483are visible to all other processes in the same namespace,
43ce9dda
MK
484but are not visible to processes in other UTS namespaces.
485
ab5dd83f
MK
486Only a privileged process
487.RB ( CAP_SYS_ADMIN )
488can employ
489.BR CLONE_NEWUTS .
9cc7ad66 490
83d9e9b2 491For further information on UTS namespaces, see
9cc7ad66 492.BR namespaces (7).
43ce9dda 493.TP
f5dbc7c8
MK
494.BR CLONE_PARENT " (since Linux 2.3.12)"
495If
496.B CLONE_PARENT
497is set, then the parent of the new child (as returned by
498.BR getppid (2))
499will be the same as that of the calling process.
500
501If
502.B CLONE_PARENT
503is not set, then (as with
504.BR fork (2))
505the child's parent is the calling process.
506
507Note that it is the parent process, as returned by
508.BR getppid (2),
509which is signaled when the child terminates, so that
510if
511.B CLONE_PARENT
512is set, then the parent of the calling process, rather than the
513calling process itself, will be signaled.
514.TP
515.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
8ef021ea 516Store the child thread ID at the location
d3dbc9b1 517.I ptid
8ef021ea 518in the parent's memory.
f5dbc7c8
MK
519(In Linux 2.5.32-2.5.48 there was a flag
520.B CLONE_SETTID
521that did this.)
522.TP
523.BR CLONE_PID " (obsolete)"
524If
525.B CLONE_PID
526is set, the child process is created with the same process ID as
527the calling process.
528This is good for hacking the system, but otherwise
529of not much use.
530Since 2.3.21 this flag can be
531specified only by the system boot process (PID 0).
28b44abc
MK
532It disappeared in Linux 2.5.16.
533Since then, the kernel silently ignores it without error.
f5dbc7c8 534.TP
1603d6a1 535.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
536If
537.B CLONE_PTRACE
538is specified, and the calling process is being traced,
539then trace the child also (see
540.BR ptrace (2)).
541.TP
542.BR CLONE_SETTLS " (since Linux 2.5.32)"
dd6d3d2e
KF
543The TLS (Thread Local Storage) descriptor is set to
544.I newtls.
545
546The interpretation of
547.I newtls
548and the resulting effect is architecture dependent.
549On x86,
f5dbc7c8 550.I newtls
dd6d3d2e
KF
551is interpreted as a
552.IR "struct user_desc *"
f5dbc7c8 553(See
dd6d3d2e
KF
554.BR set_thread_area (2)).
555On x86_64 it is the new value to be set for the %fs base register
556(See the
557.I ARCH_SET_FS
558argument to
559.BR arch_prctl (2)).
560On architectures with a dedicated TLS register, it is the new value
561of that register.
f5dbc7c8 562.TP
1603d6a1 563.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
564If
565.B CLONE_SIGHAND
314c8ff4 566is set, the calling process and the child process share the same table of
c13182ef
MK
567signal handlers.
568If the calling process or child process calls
fea681da 569.BR sigaction (2)
c13182ef
MK
570to change the behavior associated with a signal, the behavior is
571changed in the other process as well.
572However, the calling process and child
fea681da 573processes still have distinct signal masks and sets of pending
c13182ef
MK
574signals.
575So, one of them may block or unblock some signals using
fea681da
MK
576.BR sigprocmask (2)
577without affecting the other process.
578
579If
580.B CLONE_SIGHAND
581is not set, the child process inherits a copy of the signal handlers
582of the calling process at the time
edcc65ff 583.BR clone ()
c13182ef
MK
584is called.
585Calls to
fea681da
MK
586.BR sigaction (2)
587performed later by one of the processes have no effect on the other
588process.
29546c24
MK
589
590Since Linux 2.6.0-test6,
591.I flags
592must also include
593.B CLONE_VM
594if
595.B CLONE_SIGHAND
596is specified
fea681da 597.TP
a69b6bda
MK
598.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
599If
600.B CLONE_STOPPED
601is set, then the child is initially stopped (as though it was sent a
602.B SIGSTOP
603signal), and must be resumed by sending it a
604.B SIGCONT
605signal.
ef37eaf2 606
a60450a9
MK
607This flag was
608.I deprecated
609from Linux 2.6.25 onward,
610and was
611.I removed
28b44abc
MK
612altogether in Linux 2.6.38.
613Since then, the kernel silently ignores it without error.
a5a061ee 614.\" glibc 2.8 removed this defn from bits/sched.h
c5af0674
MK
615Starting with Linux 4.6, the same bit was reused for the
616.BR CLONE_NEWCGROUP
617flag.
a69b6bda 618.TP
f5dbc7c8 619.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 620If
f5dbc7c8
MK
621.B CLONE_SYSVSEM
622is set, then the child and the calling process share
5ada4b94
MK
623a single list of System V semaphore adjustment
624.RI ( semadj )
625values (see
f5dbc7c8 626.BR semop (2)).
5ada4b94
MK
627In this case, the shared list accumulates
628.I semadj
629values across all processes sharing the list,
630and semaphore adjustments are performed only when the last process
631that is sharing the list terminates (or ceases sharing the list using
632.BR unshare (2)).
f5d401dd 633If this flag is not set, then the child has a separate
5ada4b94
MK
634.I semadj
635list that is initially empty.
fea681da
MK
636.TP
637.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
638If
639.B CLONE_THREAD
640is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
641To make the remainder of the discussion of
642.B CLONE_THREAD
643more readable, the term "thread" is used to refer to the
644processes within a thread group.
fea681da 645
fd8a5be4
MK
646Thread groups were a feature added in Linux 2.4 to support the
647POSIX threads notion of a set of threads that share a single PID.
648Internally, this shared PID is the so-called
649thread group identifier (TGID) for the thread group.
c13182ef 650Since Linux 2.4, calls to
fea681da 651.BR getpid (2)
fd8a5be4
MK
652return the TGID of the caller.
653
654The threads within a group can be distinguished by their (system-wide)
655unique thread IDs (TID).
656A new thread's TID is available as the function result
657returned to the caller of
658.BR clone (),
659and a thread can obtain
660its own TID using
661.BR gettid (2).
662
c13182ef 663When a call is made to
fd8a5be4
MK
664.BR clone ()
665without specifying
666.BR CLONE_THREAD ,
667then the resulting thread is placed in a new thread group
668whose TGID is the same as the thread's TID.
669This thread is the
670.I leader
671of the new thread group.
672
673A new thread created with
674.B CLONE_THREAD
675has the same parent process as the caller of
676.BR clone ()
c13182ef 677(i.e., like
fd8a5be4
MK
678.BR CLONE_PARENT ),
679so that calls to
680.BR getppid (2)
681return the same value for all of the threads in a thread group.
682When a
c13182ef 683.B CLONE_THREAD
fd8a5be4
MK
684thread terminates, the thread that created it using
685.BR clone ()
686is not sent a
687.B SIGCHLD
688(or other termination) signal;
689nor can the status of such a thread be obtained
690using
691.BR wait (2).
692(The thread is said to be
693.IR detached .)
694
e2fbf61d
MK
695After all of the threads in a thread group terminate
696the parent process of the thread group is sent a
fd8a5be4
MK
697.B SIGCHLD
698(or other termination) signal.
699
700If any of the threads in a thread group performs an
701.BR execve (2),
702then all threads other than the thread group leader are terminated,
703and the new program is executed in the thread group leader.
704
f7110f60
MK
705If one of the threads in a thread group creates a child using
706.BR fork (2),
707then any thread in the group can
708.BR wait (2)
709for that child.
710
edcc65ff 711Since Linux 2.5.35,
fd8a5be4
MK
712.I flags
713must also include
714.B CLONE_SIGHAND
715if
716.B CLONE_THREAD
6fd69f33
MK
717is specified
718(and note that, since Linux 2.6.0-test6,
719.BR CLONE_SIGHAND
720also requires
721.BR CLONE_VM
722to be included).
e2fbf61d
MK
723
724Signals may be sent to a thread group as a whole (i.e., a TGID) using
725.BR kill (2),
726or to a specific thread (i.e., TID) using
727.BR tgkill (2).
728
729Signal dispositions and actions are process-wide:
730if an unhandled signal is delivered to a thread, then
731it will affect (terminate, stop, continue, be ignored in)
732all members of the thread group.
733
99408a60 734Each thread has its own signal mask, as set by
e2fbf61d 735.BR sigprocmask (2),
82a06020 736but signals can be pending either: for the whole process
e2fbf61d
MK
737(i.e., deliverable to any member of the thread group),
738when sent with
82a06020 739.BR kill (2);
e2fbf61d
MK
740or for an individual thread, when sent with
741.BR tgkill (2).
99408a60
MK
742A call to
743.BR sigpending (2)
744returns a signal set that is the union of the signals pending for the
745whole process and the signals that are pending for the calling thread.
e2fbf61d 746
c13182ef 747If
e2fbf61d
MK
748.BR kill (2)
749is used to send a signal to a thread group,
750and the thread group has installed a handler for the signal, then
751the handler will be invoked in exactly one, arbitrarily selected
752member of the thread group that has not blocked the signal.
c13182ef 753If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
754.BR sigwaitinfo (2),
755the kernel will arbitrarily select one of these threads
c13182ef 756to receive a signal sent using
e2fbf61d 757.BR kill (2).
a69b6bda 758.TP
f5dbc7c8 759.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 760If
f5dbc7c8
MK
761.B CLONE_UNTRACED
762is specified, then a tracing process cannot force
763.B CLONE_PTRACE
764on this child process.
fea681da 765.TP
1603d6a1 766.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
767If
768.B CLONE_VFORK
769is set, the execution of the calling process is suspended
770until the child releases its virtual memory
771resources via a call to
772.BR execve (2)
773or
774.BR _exit (2)
775(as with
776.BR vfork (2)).
777
778If
779.B CLONE_VFORK
4b4a853a 780is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
781after the call, and an application should not rely on execution occurring
782in any particular order.
fea681da 783.TP
1603d6a1 784.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
785If
786.B CLONE_VM
787is set, the calling process and the child process run in the same memory
788space.
789In particular, memory writes performed by the calling process
790or by the child process are also visible in the other process.
791Moreover, any memory mapping or unmapping performed with
792.BR mmap (2)
793or
794.BR munmap (2)
795by the child or calling process also affects the other process.
796
797If
798.B CLONE_VM
799is not set, the child process runs in a separate copy of the memory
800space of the calling process at the time of
801.BR clone ().
802Memory writes or file mappings/unmappings performed by one of the
803processes do not affect the other, as with
804.BR fork (2).
0722a578 805.SS C library/kernel differences
e585064b
MK
806The raw
807.BR clone ()
fea681da
MK
808system call corresponds more closely to
809.BR fork (2)
810in that execution in the child continues from the point of the
c13182ef 811call.
5add3af3
MK
812As such, the
813.I fn
c13182ef 814and
5add3af3
MK
815.I arg
816arguments of the
817.BR clone ()
818wrapper function are omitted.
819Furthermore, the argument order changes.
2a15a76b
MK
820In addition, there are variations across architectures.
821
822The raw system call interface on x86-64 and some other architectures
823(including sh, tile, and alpha) is roughly:
824
5add3af3
MK
825.in +4
826.nf
2a15a76b
MK
827.BI "long clone(unsigned long " flags ", void *" child_stack ,
828.BI " int *" ptid ", int *" ctid ,
829.BI " unsigned long " newtls );
830.fi
831.in
832
833On x86-32, and several other common architectures
834(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
835and MIPS),
836.\" CONFIG_CLONE_BACKWARDS
837the order of the last two arguments is reversed:
5add3af3 838
2a15a76b
MK
839.in +4
840.nf
5add3af3 841.BI "long clone(unsigned long " flags ", void *" child_stack ,
2a15a76b
MK
842.BI " int *" ptid ", unsigned long " newtls ,
843.BI " int *" ctid );
844.fi
845.in
846
847On the cris and s390 architectures,
848.\" CONFIG_CLONE_BACKWARDS2
849the order of the first two arguments is reversed:
850
851.in +4
852.nf
853.BI "long clone(void *" child_stack ", unsigned long " flags ,
fda55470 854.BI " int *" ptid ", int *" ctid ,
dd6d3d2e 855.BI " unsigned long " newtls );
2a15a76b
MK
856.fi
857.in
858
859On the microblaze architecture,
860.\" CONFIG_CLONE_BACKWARDS3
861an additional argument is supplied:
fea681da 862
2a15a76b
MK
863.in +4
864.nf
865.BI "long clone(unsigned long " flags ", void *" child_stack ,
866.BI " int " stack_size , "\fR /* Size of stack */"
867.BI " int *" ptid ", int *" ctid ,
868.BI " unsigned long " newtls );
5add3af3
MK
869.fi
870.in
2a15a76b 871
e585064b 872Another difference for the raw system call is that the
fea681da 873.I child_stack
c13182ef 874argument may be zero, in which case copy-on-write semantics ensure that the
fea681da 875child gets separate copies of stack pages when either process modifies
c13182ef
MK
876the stack.
877In this case, for correct operation, the
fea681da
MK
878.B CLONE_VM
879option should not be specified.
2a15a76b 880.\"
251113d0 881.SS blackfin, m68k, and sparc
2a15a76b
MK
882.\" Mike Frysinger noted in a 2013 mail:
883.\" these arches don't define __ARCH_WANT_SYS_CLONE:
884.\" blackfin ia64 m68k sparc
251113d0 885The argument-passing conventions on
04346be5 886blackfin, m68k, and sparc are different from the descriptions above.
251113d0 887For details, see the kernel (and glibc) source.
574c92b6 888.SS ia64
097a1f3b
MK
889On ia64, a different interface is used:
890.nf
891
892.BI "int __clone2(int (*" "fn" ")(void *), "
893.BI " void *" child_stack_base ", size_t " stack_size ,
894.BI " int " flags ", void *" "arg" ", ... "
895.BI " /* pid_t *" ptid ", struct user_desc *" tls \
896", pid_t *" ctid " */ );"
897.fi
898.PP
899The prototype shown above is for the glibc wrapper function;
900the raw system call interface has no
901.I fn
902or
903.I arg
904argument, and changes the order of the arguments so that
905.I flags
906is the first argument, and
907.I tls
908is the last argument.
909.PP
910.BR __clone2 ()
911operates in the same way as
912.BR clone (),
913except that
914.I child_stack_base
915points to the lowest address of the child's stack area,
916and
917.I stack_size
918specifies the size of the stack pointed to by
919.IR child_stack_base .
5add3af3 920.SS Linux 2.4 and earlier
577f9b62
MK
921In Linux 2.4 and earlier,
922.BR clone ()
923does not take arguments
924.IR ptid ,
925.IR tls ,
926and
130b2e49 927.IR ctid .
47297adb 928.SH RETURN VALUE
0bfa087b
MK
929.\" gettid(2) returns current->pid;
930.\" getpid(2) returns current->tgid;
fea681da 931On success, the thread ID of the child process is returned
c13182ef 932in the caller's thread of execution.
84811e86 933On failure, \-1 is returned
fea681da
MK
934in the caller's context, no child process will be created, and
935.I errno
936will be set appropriately.
fea681da
MK
937.SH ERRORS
938.TP
939.B EAGAIN
e1b6e186
MK
940Too many processes are already running; see
941.BR fork (2).
fea681da
MK
942.TP
943.B EINVAL
944.B CLONE_SIGHAND
945was specified, but
946.B CLONE_VM
2e8a7fb3
MK
947was not.
948(Since Linux 2.6.0-test6.)
fea681da
MK
949.TP
950.B EINVAL
951.B CLONE_THREAD
952was specified, but
953.B CLONE_SIGHAND
6387216b
MK
954was not.
955(Since Linux 2.5.35.)
29546c24
MK
956.\" .TP
957.\" .B EINVAL
958.\" Precisely one of
959.\" .B CLONE_DETACHED
960.\" and
961.\" .B CLONE_THREAD
6387216b
MK
962.\" was specified.
963.\" (Since Linux 2.6.0-test6.)
fea681da
MK
964.TP
965.B EINVAL
d34e5645 966.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
967Both
968.B CLONE_FS
969and
970.B CLONE_NEWNS
971were specified in
972.IR flags .
973.TP
d34e5645
MK
974.BR EINVAL " (since Linux 3.9)"
975Both
976.B CLONE_NEWUSER
977and
978.B CLONE_FS
979were specified in
980.IR flags .
981.TP
fea681da 982.B EINVAL
82ee147a 983Both
667417b3
MK
984.B CLONE_NEWIPC
985and
986.B CLONE_SYSVSEM
987were specified in
988.IR flags .
989.TP
990.B EINVAL
f0007192 991One (or both) of
82ee147a 992.BR CLONE_NEWPID
f0007192
MK
993or
994.BR CLONE_NEWUSER
995and one (or both) of
82ee147a 996.BR CLONE_THREAD
f0007192
MK
997or
998.BR CLONE_PARENT
82ee147a
MK
999were specified in
1000.IR flags .
1001.TP
1002.B EINVAL
d4748fad 1003Returned by the glibc
edcc65ff 1004.BR clone ()
d4748fad
MK
1005wrapper function when
1006.IR fn
1007or
1008.IR child_stack
1009is specified as NULL.
fea681da 1010.TP
28cad2c1 1011.B EINVAL
667417b3
MK
1012.BR CLONE_NEWIPC
1013was specified in
1014.IR flags ,
1015but the kernel was not configured with the
1016.B CONFIG_SYSVIPC
1017and
1018.BR CONFIG_IPC_NS
1019options.
1020.TP
1021.B EINVAL
163bf178
MK
1022.BR CLONE_NEWNET
1023was specified in
1024.IR flags ,
1025but the kernel was not configured with the
1026.B CONFIG_NET_NS
1027option.
1028.TP
1029.B EINVAL
28cad2c1
MK
1030.BR CLONE_NEWPID
1031was specified in
1032.IR flags ,
1033but the kernel was not configured with the
1034.B CONFIG_PID_NS
1035option.
1036.TP
43ce9dda
MK
1037.B EINVAL
1038.BR CLONE_NEWUTS
1039was specified in
1040.IR flags ,
1041but the kernel was not configured with the
1042.B CONFIG_UTS
1043option.
1044.TP
c550a897
MK
1045.B EINVAL
1046.I child_stack
1047is not aligned to a suitable boundary for this architecture.
1048For example, on aarch64,
1049.I child_stack
1050must be a multiple of 16.
1051.TP
fea681da
MK
1052.B ENOMEM
1053Cannot allocate sufficient memory to allocate a task structure for the
1054child, or to copy those parts of the caller's context that need to be
1055copied.
1056.TP
1057.B EPERM
667417b3 1058.BR CLONE_NEWIPC ,
163bf178 1059.BR CLONE_NEWNET ,
43ce9dda
MK
1060.BR CLONE_NEWNS ,
1061.BR CLONE_NEWPID ,
82ee147a 1062or
43ce9dda 1063.BR CLONE_NEWUTS
00b08db3 1064was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1065.TP
1066.B EPERM
1067.B CLONE_PID
1068was specified by a process other than process 0.
365d292a
MK
1069.TP
1070.B EPERM
1071.BR CLONE_NEWUSER
1072was specified in
1073.IR flags ,
1074but either the effective user ID or the effective group ID of the caller
1075does not have a mapping in the parent namespace (see
f58fb24f 1076.BR user_namespaces (7)).
6fd119e7 1077.TP
ac007938
MK
1078.BR EPERM " (since Linux 3.9)"
1079.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
11a38815
AM
1080.B CLONE_NEWUSER
1081was specified in
ac007938
MK
1082.I flags
1083and the caller is in a chroot environment
1084.\" FIXME What is the rationale for this restriction?
1085(i.e., the caller's root directory does not match the root directory
1086of the mount namespace in which it resides).
1087.TP
6717ee86
MK
1088.BR ERESTARTNOINTR " (since Linux 2.6.17)"
1089.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1090System call was interrupted by a signal and will be restarted.
1091(This can be seen only during a trace.)
1092.TP
ac007938 1093.BR EUSERS " (since Linux 3.11)"
6fd119e7
MK
1094.B CLONE_NEWUSER
1095was specified in
1096.IR flags ,
1097and the call would cause the limit on the number of
1098nested user namespaces to be exceeded.
1099See
1100.BR user_namespaces (7).
365d292a
MK
1101.SH VERSIONS
1102There is no entry for
1103.BR clone ()
1104in libc5.
1105glibc2 provides
1106.BR clone ()
1107as described in this manual page.
47297adb 1108.SH CONFORMING TO
a1d5f77c 1109.BR clone ()
e585064b 1110is Linux-specific and should not be used in programs
a1d5f77c 1111intended to be portable.
fea681da 1112.SH NOTES
79bdcc4a
MK
1113The
1114.BR kcmp (2)
1115system call can be used to test whether two processes share various
49dba87f 1116resources such as a file descriptor table,
79bdcc4a
MK
1117System V semaphore undo operations, or a virtual address space.
1118
fd8a5be4
MK
1119In the kernel 2.4.x series,
1120.B CLONE_THREAD
1121generally does not make the parent of the new thread the same
1122as the parent of the calling process.
1123However, for kernel versions 2.4.7 to 2.4.18 the
1124.B CLONE_THREAD
1125flag implied the
c13182ef 1126.B CLONE_PARENT
fd8a5be4 1127flag (as in kernel 2.6).
fea681da 1128
c13182ef
MK
1129For a while there was
1130.B CLONE_DETACHED
a5053dcb 1131(introduced in 2.5.32):
c13182ef 1132parent wants no child-exit signal.
4d543007 1133In Linux 2.6.2, the need to give this flag together with
c13182ef 1134.B CLONE_THREAD
a5053dcb
MK
1135disappeared.
1136This flag is still defined, but has no effect.
1137
34ccb744 1138On i386,
a5a997ca
MK
1139.BR clone ()
1140should not be called through vsyscall, but directly through
1141.IR "int $0x80" .
31830ef0
MK
1142.SH BUGS
1143Versions of the GNU C library that include the NPTL threading library
c13182ef 1144contain a wrapper function for
0bfa087b 1145.BR getpid (2)
31830ef0 1146that performs caching of PIDs.
c60237c9
MK
1147This caching relies on support in the glibc wrapper for
1148.BR clone (),
1149but as currently implemented,
1150the cache may not be up to date in some circumstances.
1151In particular,
1152if a signal is delivered to the child immediately after the
1153.BR clone ()
1154call, then a call to
0b80cf56 1155.BR getpid (2)
c60237c9
MK
1156in a handler for the signal may return the PID
1157of the calling process ("the parent"),
88619baf 1158if the clone wrapper has not yet had a chance to update the PID
c60237c9
MK
1159cache in the child.
1160(This discussion ignores the case where the child was created using
9291ce36 1161.BR CLONE_THREAD ,
c60237c9 1162when
0b80cf56 1163.BR getpid (2)
c60237c9
MK
1164.I should
1165return the same value in the child and in the process that called
1166.BR clone (),
a1d48abb 1167since the caller and the child are in the same thread group.
e7d807b7 1168The stale-cache problem also does not occur if the
a1d48abb
JR
1169.I flags
1170argument includes
1171.BR CLONE_VM .)
c60237c9 1172To get the truth, it may be necessary to use code such as the following:
31830ef0
MK
1173.nf
1174
1175 #include <syscall.h>
1176
1177 pid_t mypid;
1178
1179 mypid = syscall(SYS_getpid);
1180.fi
c60237c9
MK
1181.\" See also the following bug reports
1182.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1183.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
8c7b566c 1184.SH EXAMPLE
8c7b566c 1185The following program demonstrates the use of
9c13072a 1186.BR clone ()
8c7b566c
MK
1187to create a child process that executes in a separate UTS namespace.
1188The child changes the hostname in its UTS namespace.
1189Both parent and child then display the system hostname,
1190making it possible to see that the hostname
1191differs in the UTS namespaces of the parent and child.
1192For an example of the use of this program, see
1193.BR setns (2).
f30b7415 1194.SS Program source
8c7b566c
MK
1195.nf
1196#define _GNU_SOURCE
1197#include <sys/wait.h>
1198#include <sys/utsname.h>
1199#include <sched.h>
1200#include <string.h>
1201#include <stdio.h>
1202#include <stdlib.h>
1203#include <unistd.h>
1204
1205#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1206 } while (0)
1207
1208static int /* Start function for cloned child */
1209childFunc(void *arg)
1210{
1211 struct utsname uts;
1212
1213 /* Change hostname in UTS namespace of child */
1214
1215 if (sethostname(arg, strlen(arg)) == \-1)
1216 errExit("sethostname");
1217
07d4e6ea 1218 /* Retrieve and display hostname */
8c7b566c
MK
1219
1220 if (uname(&uts) == \-1)
1221 errExit("uname");
1222 printf("uts.nodename in child: %s\\n", uts.nodename);
1223
1224 /* Keep the namespace open for a while, by sleeping.
1225 This allows some experimentation\-\-for example, another
1226 process might join the namespace. */
9f1b9726 1227
8c7b566c
MK
1228 sleep(200);
1229
1230 return 0; /* Child terminates now */
1231}
1232
1233#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1234
1235int
1236main(int argc, char *argv[])
1237{
1238 char *stack; /* Start of stack buffer */
1239 char *stackTop; /* End of stack buffer */
1240 pid_t pid;
1241 struct utsname uts;
1242
1243 if (argc < 2) {
1244 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1245 exit(EXIT_SUCCESS);
1246 }
1247
1248 /* Allocate stack for child */
1249
1250 stack = malloc(STACK_SIZE);
1251 if (stack == NULL)
1252 errExit("malloc");
1253 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1254
1255 /* Create child that has its own UTS namespace;
1256 child commences execution in childFunc() */
1257
1258 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1259 if (pid == \-1)
1260 errExit("clone");
1261 printf("clone() returned %ld\\n", (long) pid);
1262
1263 /* Parent falls through to here */
1264
1265 sleep(1); /* Give child time to change its hostname */
1266
9f1b9726 1267 /* Display hostname in parent\(aqs UTS namespace. This will be
8c7b566c
MK
1268 different from hostname in child\(aqs UTS namespace. */
1269
1270 if (uname(&uts) == \-1)
1271 errExit("uname");
1272 printf("uts.nodename in parent: %s\\n", uts.nodename);
1273
1274 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1275 errExit("waitpid");
1276 printf("child has terminated\\n");
1277
1278 exit(EXIT_SUCCESS);
1279}
1280.fi
47297adb 1281.SH SEE ALSO
fea681da 1282.BR fork (2),
2b44301c 1283.BR futex (2),
fea681da
MK
1284.BR getpid (2),
1285.BR gettid (2),
6f8746e4 1286.BR kcmp (2),
f2d0bbf1 1287.BR set_thread_area (2),
2b44301c 1288.BR set_tid_address (2),
8403481f 1289.BR setns (2),
f2d0bbf1 1290.BR tkill (2),
5cc01e9c 1291.BR unshare (2),
fea681da 1292.BR wait (2),
3616b7c0 1293.BR capabilities (7),
41096af1 1294.BR namespaces (7),
3616b7c0 1295.BR pthreads (7)