]> git.ipfire.org Git - thirdparty/man-pages.git/blame_incremental - man2/clone.2
clone.2, namespaces.7: Move some CLONE_NEWIPC text from clone.2 to namespaces.7
[thirdparty/man-pages.git] / man2 / clone.2
... / ...
CommitLineData
1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3.\"
4.\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5.\" May be distributed under the GNU General Public License.
6.\" %%%LICENSE_END
7.\"
8.\" Modified by Michael Haardt <michael@moria.de>
9.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11.\" New man page (copied from 'fork.2').
12.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14.\" Modified 26 Jun 2001 by Michael Kerrisk
15.\" Mostly upgraded to 2.4.x
16.\" Added prototype for sys_clone() plus description
17.\" Added CLONE_THREAD with a brief description of thread groups
18.\" Added CLONE_PARENT and revised entire page remove ambiguity
19.\" between "calling process" and "parent process"
20.\" Added CLONE_PTRACE and CLONE_VFORK
21.\" Added EPERM and EINVAL error codes
22.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23.\" various other minor tidy ups and clarifications.
24.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27.\" Added description for CLONE_NEWNS, which was added in 2.4.19
28.\" Slightly rephrased, aeb.
29.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30.\" Modified 1 Jan 2004 - various updates, aeb
31.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33.\" wrapper under BUGS.
34.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37.\" 2008-11-18, mtk, document CLONE_NEWPID
38.\" 2008-11-19, mtk, document CLONE_NEWUTS
39.\" 2008-11-19, mtk, document CLONE_NEWIPC
40.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41.\"
42.TH CLONE 2 2014-08-19 "Linux" "Linux Programmer's Manual"
43.SH NAME
44clone, __clone2 \- create a child process
45.SH SYNOPSIS
46.nf
47/* Prototype for the glibc wrapper function */
48
49.B #include <sched.h>
50
51.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
52.BI " int " flags ", void *" "arg" ", ... "
53.BI " /* pid_t *" ptid ", struct user_desc *" tls \
54", pid_t *" ctid " */ );"
55
56/* Prototype for the raw system call */
57
58.BI "long clone(unsigned long " flags ", void *" child_stack ,
59.BI " void *" ptid ", void *" ctid ,
60.BI " struct pt_regs *" regs );
61.fi
62.sp
63.in -4n
64Feature Test Macro Requirements for glibc wrapper function (see
65.BR feature_test_macros (7)):
66.in
67.sp
68.BR clone ():
69.ad l
70.RS 4
71.PD 0
72.TP 4
73Since glibc 2.14:
74_GNU_SOURCE
75.TP 4
76.\" See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
77Before glibc 2.14:
78_BSD_SOURCE || _SVID_SOURCE
79 /* _GNU_SOURCE also suffices */
80.PD
81.RE
82.ad b
83.SH DESCRIPTION
84.BR clone ()
85creates a new process, in a manner similar to
86.BR fork (2).
87
88This page describes both the glibc
89.BR clone ()
90wrapper function and the underlying system call on which it is based.
91The main text describes the wrapper function;
92the differences for the raw system call
93are described toward the end of this page.
94
95Unlike
96.BR fork (2),
97.BR clone ()
98allows the child process to share parts of its execution context with
99the calling process, such as the memory space, the table of file
100descriptors, and the table of signal handlers.
101(Note that on this manual
102page, "calling process" normally corresponds to "parent process".
103But see the description of
104.B CLONE_PARENT
105below.)
106
107The main use of
108.BR clone ()
109is to implement threads: multiple threads of control in a program that
110run concurrently in a shared memory space.
111
112When the child process is created with
113.BR clone (),
114it executes the function
115.IR fn ( arg ).
116(This differs from
117.BR fork (2),
118where execution continues in the child from the point
119of the
120.BR fork (2)
121call.)
122The
123.I fn
124argument is a pointer to a function that is called by the child
125process at the beginning of its execution.
126The
127.I arg
128argument is passed to the
129.I fn
130function.
131
132When the
133.IR fn ( arg )
134function application returns, the child process terminates.
135The integer returned by
136.I fn
137is the exit code for the child process.
138The child process may also terminate explicitly by calling
139.BR exit (2)
140or after receiving a fatal signal.
141
142The
143.I child_stack
144argument specifies the location of the stack used by the child process.
145Since the child and calling process may share memory,
146it is not possible for the child process to execute in the
147same stack as the calling process.
148The calling process must therefore
149set up memory space for the child stack and pass a pointer to this
150space to
151.BR clone ().
152Stacks grow downward on all processors that run Linux
153(except the HP PA processors), so
154.I child_stack
155usually points to the topmost address of the memory space set up for
156the child stack.
157
158The low byte of
159.I flags
160contains the number of the
161.I "termination signal"
162sent to the parent when the child dies.
163If this signal is specified as anything other than
164.BR SIGCHLD ,
165then the parent process must specify the
166.B __WALL
167or
168.B __WCLONE
169options when waiting for the child with
170.BR wait (2).
171If no signal is specified, then the parent process is not signaled
172when the child terminates.
173
174.I flags
175may also be bitwise-or'ed with zero or more of the following constants,
176in order to specify what is shared between the calling process
177and the child process:
178.TP
179.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
180Erase child thread ID at location
181.I ctid
182in child memory when the child exits, and do a wakeup on the futex
183at that address.
184The address involved may be changed by the
185.BR set_tid_address (2)
186system call.
187This is used by threading libraries.
188.TP
189.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
190Store child thread ID at location
191.I ctid
192in child memory.
193.TP
194.BR CLONE_FILES " (since Linux 2.0)"
195If
196.B CLONE_FILES
197is set, the calling process and the child process share the same file
198descriptor table.
199Any file descriptor created by the calling process or by the child
200process is also valid in the other process.
201Similarly, if one of the processes closes a file descriptor,
202or changes its associated flags (using the
203.BR fcntl (2)
204.B F_SETFD
205operation), the other process is also affected.
206
207If
208.B CLONE_FILES
209is not set, the child process inherits a copy of all file descriptors
210opened in the calling process at the time of
211.BR clone ().
212(The duplicated file descriptors in the child refer to the
213same open file descriptions (see
214.BR open (2))
215as the corresponding file descriptors in the calling process.)
216Subsequent operations that open or close file descriptors,
217or change file descriptor flags,
218performed by either the calling
219process or the child process do not affect the other process.
220.TP
221.BR CLONE_FS " (since Linux 2.0)"
222If
223.B CLONE_FS
224is set, the caller and the child process share the same filesystem
225information.
226This includes the root of the filesystem, the current
227working directory, and the umask.
228Any call to
229.BR chroot (2),
230.BR chdir (2),
231or
232.BR umask (2)
233performed by the calling process or the child process also affects the
234other process.
235
236If
237.B CLONE_FS
238is not set, the child process works on a copy of the filesystem
239information of the calling process at the time of the
240.BR clone ()
241call.
242Calls to
243.BR chroot (2),
244.BR chdir (2),
245.BR umask (2)
246performed later by one of the processes do not affect the other process.
247.TP
248.BR CLONE_IO " (since Linux 2.6.25)"
249If
250.B CLONE_IO
251is set, then the new process shares an I/O context with
252the calling process.
253If this flag is not set, then (as with
254.BR fork (2))
255the new process has its own I/O context.
256
257.\" The following based on text from Jens Axboe
258The I/O context is the I/O scope of the disk scheduler (i.e,
259what the I/O scheduler uses to model scheduling of a process's I/O).
260If processes share the same I/O context,
261they are treated as one by the I/O scheduler.
262As a consequence, they get to share disk time.
263For some I/O schedulers,
264.\" the anticipatory and CFQ scheduler
265if two processes share an I/O context,
266they will be allowed to interleave their disk access.
267If several threads are doing I/O on behalf of the same process
268.RB ( aio_read (3),
269for instance), they should employ
270.BR CLONE_IO
271to get better I/O performance.
272.\" with CFQ and AS.
273
274If the kernel is not configured with the
275.B CONFIG_BLOCK
276option, this flag is a no-op.
277.TP
278.BR CLONE_NEWIPC " (since Linux 2.6.19)"
279If
280.B CLONE_NEWIPC
281is set, then create the process in a new IPC namespace.
282If this flag is not set, then (as with
283.BR fork (2)),
284the process is created in the same IPC namespace as
285the calling process.
286This flag is intended for the implementation of containers.
287
288An IPC namespace provides an isolated view of System\ V IPC objects (see
289.BR svipc (7))
290and (since Linux 2.6.30)
291.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292.\" https://lwn.net/Articles/312232/
293POSIX message queues
294(see
295.BR mq_overview (7)).
296The common characteristic of these IPC mechanisms is that IPC
297objects are identified by mechanisms other than filesystem
298pathnames.
299
300Objects created in an IPC namespace are visible to all other processes
301that are members of that namespace,
302but are not visible to processes in other IPC namespaces.
303
304When an IPC namespace is destroyed
305(i.e., when the last process that is a member of the namespace terminates),
306all IPC objects in the namespace are automatically destroyed.
307
308Use of this flag requires
309that the process be privileged
310.RB ( CAP_SYS_ADMIN ).
311This flag can't be specified in conjunction with
312.BR CLONE_SYSVSEM .
313
314For further information on IPC namespaces, see
315.BR namespaces (7).
316.TP
317.BR CLONE_NEWNET " (since Linux 2.6.24)"
318(The implementation of this flag was completed only
319by about kernel version 2.6.29.)
320
321If
322.B CLONE_NEWNET
323is set, then create the process in a new network namespace.
324If this flag is not set, then (as with
325.BR fork (2))
326the process is created in the same network namespace as
327the calling process.
328This flag is intended for the implementation of containers.
329
330A network namespace provides an isolated view of the networking stack
331(network device interfaces, IPv4 and IPv6 protocol stacks,
332IP routing tables, firewall rules, the
333.I /proc/net
334and
335.I /sys/class/net
336directory trees, sockets, etc.).
337A physical network device can live in exactly one
338network namespace.
339A virtual network device ("veth") pair provides a pipe-like abstraction
340.\" FIXME . Add pointer to veth(4) page when it is eventually completed
341that can be used to create tunnels between network namespaces,
342and can be used to create a bridge to a physical network device
343in another namespace.
344
345When a network namespace is freed
346(i.e., when the last process in the namespace terminates),
347its physical network devices are moved back to the
348initial network namespace (not to the parent of the process).
349
350Use of this flag requires: a kernel configured with the
351.B CONFIG_NET_NS
352option and that the process be privileged
353.RB ( CAP_SYS_ADMIN ).
354.TP
355.BR CLONE_NEWNS " (since Linux 2.4.19)"
356Start the child in a new mount namespace.
357
358Every process lives in a mount namespace.
359The
360.I namespace
361of a process is the data (the set of mounts) describing the file hierarchy
362as seen by that process.
363After a
364.BR fork (2)
365or
366.BR clone ()
367where the
368.B CLONE_NEWNS
369flag is not set, the child lives in the same mount
370namespace as the parent.
371The system calls
372.BR mount (2)
373and
374.BR umount (2)
375change the mount namespace of the calling process, and hence affect
376all processes that live in the same namespace, but do not affect
377processes in a different mount namespace.
378
379After a
380.BR clone ()
381where the
382.B CLONE_NEWNS
383flag is set, the cloned child is started in a new mount namespace,
384initialized with a copy of the namespace of the parent.
385
386Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability)
387may specify the
388.B CLONE_NEWNS
389flag.
390It is not permitted to specify both
391.B CLONE_NEWNS
392and
393.B CLONE_FS
394in the same
395.BR clone ()
396call.
397.TP
398.BR CLONE_NEWUSER
399(This flag first became meaningful for
400.BR clone ()
401in Linux 2.6.23,
402the current
403.BR clone()
404semantics were merged in Linux 3.5,
405and the final pieces to make the user namespaces completely usable were
406merged in Linux 3.8.)
407
408If
409.B CLONE_NEWUSER
410is set, then create the process in a new user namespace.
411If this flag is not set, then (as with
412.BR fork (2))
413the process is created in the same user namespace as the calling process.
414
415A user namespace provides an isolated environment for
416security related identifiers, in particular,
417user IDs, group IDs, keys (see
418.BR keyctl (2)),
419and capabilities.
420
421When a user namespace is created,
422it starts out without a mapping of user IDs (group IDs)
423to the parent user namespace.
424The desired mapping of user IDs (group IDs) to the parent user namespace
425may be set by writing into
426.IR /proc/[pid]/uid_map
427.RI ( /proc/[pid]/gid_map );
428see
429.BR proc (5).
430
431The first process in a user namespace starts out with a complete set
432of capabilities with respect to the new user namespace.
433
434System calls that return user IDs (group IDs) will return
435either the user ID (group ID) mapped into the current
436user namespace if there is a mapping, or the overflow user ID (group ID);
437the default value for the overflow user ID (group ID) is 65534.
438See the descriptions of
439.IR /proc/sys/kernel/overflowuid
440and
441.IR /proc/sys/kernel/overflowgid
442in
443.BR proc (5).
444
445Use of this flag requires a kernel configured with the
446.BR CONFIG_USER_NS
447option.
448Before Linux 3.8, use of
449.BR CLONE_NEWUSER
450required that the caller have three capabilities:
451.BR CAP_SYS_ADMIN ,
452.BR CAP_SETUID ,
453and
454.BR CAP_SETGID .
455.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
456Starting with Linux 3.8,
457no privileges are needed to create a user namespace,
458and mount, PID, IPC, network, and UTS namespaces can be created with just the
459.B CAP_SYS_ADMIN
460capability in the caller's user namespace.
461
462If
463.BR CLONE_NEWUSER
464is specified along with other
465.B CLONE_NEW*
466flags in a single
467.BR clone()
468call, the user namespace is guaranteed to be created first,
469giving the caller privileges over the remaining
470namespaces created by the call.
471Thus, it possible for an unprivileged caller to specify this combination
472of flags.
473
474Over the years, there have been a lot of features that have been added
475to the Linux kernel that are only available to privileged users
476because of their potential to confuse set-user-ID-root applications.
477In general, it becomes safe to allow the root user in a user namespace to
478use those features because it is impossible, while in a user namespace,
479to gain more privilege than the root user of a user namespace has.
480
481.TP
482.BR CLONE_NEWPID " (since Linux 2.6.24)"
483.\" This explanation draws a lot of details from
484.\" http://lwn.net/Articles/259217/
485.\" Authors: Pavel Emelyanov <xemul@openvz.org>
486.\" and Kir Kolyshkin <kir@openvz.org>
487.\"
488.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
489.\" Author: Pavel Emelyanov <xemul@openvz.org>
490If
491.B CLONE_NEWPID
492is set, then create the process in a new PID namespace.
493If this flag is not set, then (as with
494.BR fork (2))
495the process is created in the same PID namespace as
496the calling process.
497This flag is intended for the implementation of containers.
498
499A PID namespace provides an isolated environment for PIDs:
500PIDs in a new namespace start at 1,
501somewhat like a standalone system, and calls to
502.BR fork (2),
503.BR vfork (2),
504or
505.BR clone ()
506will produce processes with PIDs that are unique within the namespace.
507
508The first process created in a new namespace
509(i.e., the process created using the
510.BR CLONE_NEWPID
511flag) has the PID 1, and is the "init" process for the namespace.
512Children that are orphaned within the namespace will be reparented
513to this process rather than
514.BR init (8).
515Unlike the traditional
516.B init
517process, the "init" process of a PID namespace can terminate,
518and if it does, all of the processes in the namespace are terminated.
519
520PID namespaces form a hierarchy.
521When a new PID namespace is created,
522the processes in that namespace are visible
523in the PID namespace of the process that created the new namespace;
524analogously, if the parent PID namespace is itself
525the child of another PID namespace,
526then processes in the child and parent PID namespaces will both be
527visible in the grandparent PID namespace.
528Conversely, the processes in the "child" PID namespace do not see
529the processes in the parent namespace.
530The existence of a namespace hierarchy means that each process
531may now have multiple PIDs:
532one for each namespace in which it is visible;
533each of these PIDs is unique within the corresponding namespace.
534(A call to
535.BR getpid (2)
536always returns the PID associated with the namespace in which
537the process lives.)
538
539After creating the new namespace,
540it is useful for the child to change its root directory
541and mount a new procfs instance at
542.I /proc
543so that tools such as
544.BR ps (1)
545work correctly.
546.\" mount -t proc proc /proc
547(If
548.BR CLONE_NEWNS
549is also included in
550.IR flags ,
551then it isn't necessary to change the root directory:
552a new procfs instance can be mounted directly over
553.IR /proc .)
554
555Use of this flag requires: a kernel configured with the
556.B CONFIG_PID_NS
557option and that the process be privileged
558.RB ( CAP_SYS_ADMIN ).
559This flag can't be specified in conjunction with
560.BR CLONE_THREAD .
561.TP
562.BR CLONE_NEWUTS " (since Linux 2.6.19)"
563If
564.B CLONE_NEWUTS
565is set, then create the process in a new UTS namespace,
566whose identifiers are initialized by duplicating the identifiers
567from the UTS namespace of the calling process.
568If this flag is not set, then (as with
569.BR fork (2))
570the process is created in the same UTS namespace as
571the calling process.
572This flag is intended for the implementation of containers.
573
574A UTS namespace is the set of identifiers returned by
575.BR uname (2);
576among these, the domain name and the hostname can be modified by
577.BR setdomainname (2)
578and
579.BR sethostname (2),
580respectively.
581Changes made to the identifiers in a UTS namespace
582are visible to all other processes in the same namespace,
583but are not visible to processes in other UTS namespaces.
584
585Use of this flag requires: a kernel configured with the
586.B CONFIG_UTS_NS
587option and that the process be privileged
588.RB ( CAP_SYS_ADMIN ).
589.TP
590.BR CLONE_PARENT " (since Linux 2.3.12)"
591If
592.B CLONE_PARENT
593is set, then the parent of the new child (as returned by
594.BR getppid (2))
595will be the same as that of the calling process.
596
597If
598.B CLONE_PARENT
599is not set, then (as with
600.BR fork (2))
601the child's parent is the calling process.
602
603Note that it is the parent process, as returned by
604.BR getppid (2),
605which is signaled when the child terminates, so that
606if
607.B CLONE_PARENT
608is set, then the parent of the calling process, rather than the
609calling process itself, will be signaled.
610.TP
611.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
612Store child thread ID at location
613.I ptid
614in parent and child memory.
615(In Linux 2.5.32-2.5.48 there was a flag
616.B CLONE_SETTID
617that did this.)
618.TP
619.BR CLONE_PID " (obsolete)"
620If
621.B CLONE_PID
622is set, the child process is created with the same process ID as
623the calling process.
624This is good for hacking the system, but otherwise
625of not much use.
626Since 2.3.21 this flag can be
627specified only by the system boot process (PID 0).
628It disappeared in Linux 2.5.16.
629.TP
630.BR CLONE_PTRACE " (since Linux 2.2)"
631If
632.B CLONE_PTRACE
633is specified, and the calling process is being traced,
634then trace the child also (see
635.BR ptrace (2)).
636.TP
637.BR CLONE_SETTLS " (since Linux 2.5.32)"
638The
639.I newtls
640argument is the new TLS (Thread Local Storage) descriptor.
641(See
642.BR set_thread_area (2).)
643.TP
644.BR CLONE_SIGHAND " (since Linux 2.0)"
645If
646.B CLONE_SIGHAND
647is set, the calling process and the child process share the same table of
648signal handlers.
649If the calling process or child process calls
650.BR sigaction (2)
651to change the behavior associated with a signal, the behavior is
652changed in the other process as well.
653However, the calling process and child
654processes still have distinct signal masks and sets of pending
655signals.
656So, one of them may block or unblock some signals using
657.BR sigprocmask (2)
658without affecting the other process.
659
660If
661.B CLONE_SIGHAND
662is not set, the child process inherits a copy of the signal handlers
663of the calling process at the time
664.BR clone ()
665is called.
666Calls to
667.BR sigaction (2)
668performed later by one of the processes have no effect on the other
669process.
670
671Since Linux 2.6.0-test6,
672.I flags
673must also include
674.B CLONE_VM
675if
676.B CLONE_SIGHAND
677is specified
678.TP
679.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
680If
681.B CLONE_STOPPED
682is set, then the child is initially stopped (as though it was sent a
683.B SIGSTOP
684signal), and must be resumed by sending it a
685.B SIGCONT
686signal.
687
688This flag was
689.I deprecated
690from Linux 2.6.25 onward,
691and was
692.I removed
693altogether in Linux 2.6.38.
694.\" glibc 2.8 removed this defn from bits/sched.h
695.TP
696.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
697If
698.B CLONE_SYSVSEM
699is set, then the child and the calling process share
700a single list of System\ V semaphore undo values (see
701.BR semop (2)).
702If this flag is not set, then the child has a separate undo list,
703which is initially empty.
704.TP
705.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
706If
707.B CLONE_THREAD
708is set, the child is placed in the same thread group as the calling process.
709To make the remainder of the discussion of
710.B CLONE_THREAD
711more readable, the term "thread" is used to refer to the
712processes within a thread group.
713
714Thread groups were a feature added in Linux 2.4 to support the
715POSIX threads notion of a set of threads that share a single PID.
716Internally, this shared PID is the so-called
717thread group identifier (TGID) for the thread group.
718Since Linux 2.4, calls to
719.BR getpid (2)
720return the TGID of the caller.
721
722The threads within a group can be distinguished by their (system-wide)
723unique thread IDs (TID).
724A new thread's TID is available as the function result
725returned to the caller of
726.BR clone (),
727and a thread can obtain
728its own TID using
729.BR gettid (2).
730
731When a call is made to
732.BR clone ()
733without specifying
734.BR CLONE_THREAD ,
735then the resulting thread is placed in a new thread group
736whose TGID is the same as the thread's TID.
737This thread is the
738.I leader
739of the new thread group.
740
741A new thread created with
742.B CLONE_THREAD
743has the same parent process as the caller of
744.BR clone ()
745(i.e., like
746.BR CLONE_PARENT ),
747so that calls to
748.BR getppid (2)
749return the same value for all of the threads in a thread group.
750When a
751.B CLONE_THREAD
752thread terminates, the thread that created it using
753.BR clone ()
754is not sent a
755.B SIGCHLD
756(or other termination) signal;
757nor can the status of such a thread be obtained
758using
759.BR wait (2).
760(The thread is said to be
761.IR detached .)
762
763After all of the threads in a thread group terminate
764the parent process of the thread group is sent a
765.B SIGCHLD
766(or other termination) signal.
767
768If any of the threads in a thread group performs an
769.BR execve (2),
770then all threads other than the thread group leader are terminated,
771and the new program is executed in the thread group leader.
772
773If one of the threads in a thread group creates a child using
774.BR fork (2),
775then any thread in the group can
776.BR wait (2)
777for that child.
778
779Since Linux 2.5.35,
780.I flags
781must also include
782.B CLONE_SIGHAND
783if
784.B CLONE_THREAD
785is specified
786(and note that, since Linux 2.6.0-test6,
787.BR CLONE_SIGHAND
788also requires
789.BR CLONE_VM
790to be included).
791
792Signals may be sent to a thread group as a whole (i.e., a TGID) using
793.BR kill (2),
794or to a specific thread (i.e., TID) using
795.BR tgkill (2).
796
797Signal dispositions and actions are process-wide:
798if an unhandled signal is delivered to a thread, then
799it will affect (terminate, stop, continue, be ignored in)
800all members of the thread group.
801
802Each thread has its own signal mask, as set by
803.BR sigprocmask (2),
804but signals can be pending either: for the whole process
805(i.e., deliverable to any member of the thread group),
806when sent with
807.BR kill (2);
808or for an individual thread, when sent with
809.BR tgkill (2).
810A call to
811.BR sigpending (2)
812returns a signal set that is the union of the signals pending for the
813whole process and the signals that are pending for the calling thread.
814
815If
816.BR kill (2)
817is used to send a signal to a thread group,
818and the thread group has installed a handler for the signal, then
819the handler will be invoked in exactly one, arbitrarily selected
820member of the thread group that has not blocked the signal.
821If multiple threads in a group are waiting to accept the same signal using
822.BR sigwaitinfo (2),
823the kernel will arbitrarily select one of these threads
824to receive a signal sent using
825.BR kill (2).
826.TP
827.BR CLONE_UNTRACED " (since Linux 2.5.46)"
828If
829.B CLONE_UNTRACED
830is specified, then a tracing process cannot force
831.B CLONE_PTRACE
832on this child process.
833.TP
834.BR CLONE_VFORK " (since Linux 2.2)"
835If
836.B CLONE_VFORK
837is set, the execution of the calling process is suspended
838until the child releases its virtual memory
839resources via a call to
840.BR execve (2)
841or
842.BR _exit (2)
843(as with
844.BR vfork (2)).
845
846If
847.B CLONE_VFORK
848is not set, then both the calling process and the child are schedulable
849after the call, and an application should not rely on execution occurring
850in any particular order.
851.TP
852.BR CLONE_VM " (since Linux 2.0)"
853If
854.B CLONE_VM
855is set, the calling process and the child process run in the same memory
856space.
857In particular, memory writes performed by the calling process
858or by the child process are also visible in the other process.
859Moreover, any memory mapping or unmapping performed with
860.BR mmap (2)
861or
862.BR munmap (2)
863by the child or calling process also affects the other process.
864
865If
866.B CLONE_VM
867is not set, the child process runs in a separate copy of the memory
868space of the calling process at the time of
869.BR clone ().
870Memory writes or file mappings/unmappings performed by one of the
871processes do not affect the other, as with
872.BR fork (2).
873.SS C library/kernel ABI differences
874The raw
875.BR clone ()
876system call corresponds more closely to
877.BR fork (2)
878in that execution in the child continues from the point of the
879call.
880As such, the
881.I fn
882and
883.I arg
884arguments of the
885.BR clone ()
886wrapper function are omitted.
887Furthermore, the argument order changes.
888The raw system call interface on x86 and many other architectures is roughly:
889.in +4
890.nf
891
892.BI "long clone(unsigned long " flags ", void *" child_stack ,
893.BI " void *" ptid ", void *" ctid ,
894.BI " struct pt_regs *" regs );
895
896.fi
897.in
898Another difference for the raw system call is that the
899.I child_stack
900argument may be zero, in which case copy-on-write semantics ensure that the
901child gets separate copies of stack pages when either process modifies
902the stack.
903In this case, for correct operation, the
904.B CLONE_VM
905option should not be specified.
906
907For some architectures, the order of the arguments for the system call
908differs from that shown above.
909On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
910and MIPS architectures,
911the order of the fourth and fifth arguments is reversed.
912On the cris and s390 architectures,
913the order of the first and second arguments is reversed.
914.SS blackfin, m68k, and sparc
915The argument-passing conventions on
916blackfin, m68k, and sparc are different from the descriptions above.
917For details, see the kernel (and glibc) source.
918.SS ia64
919On ia64, a different interface is used:
920.nf
921
922.BI "int __clone2(int (*" "fn" ")(void *), "
923.BI " void *" child_stack_base ", size_t " stack_size ,
924.BI " int " flags ", void *" "arg" ", ... "
925.BI " /* pid_t *" ptid ", struct user_desc *" tls \
926", pid_t *" ctid " */ );"
927.fi
928.PP
929The prototype shown above is for the glibc wrapper function;
930the raw system call interface has no
931.I fn
932or
933.I arg
934argument, and changes the order of the arguments so that
935.I flags
936is the first argument, and
937.I tls
938is the last argument.
939.PP
940.BR __clone2 ()
941operates in the same way as
942.BR clone (),
943except that
944.I child_stack_base
945points to the lowest address of the child's stack area,
946and
947.I stack_size
948specifies the size of the stack pointed to by
949.IR child_stack_base .
950.SS Linux 2.4 and earlier
951In Linux 2.4 and earlier,
952.BR clone ()
953does not take arguments
954.IR ptid ,
955.IR tls ,
956and
957.IR ctid .
958.SH RETURN VALUE
959.\" gettid(2) returns current->pid;
960.\" getpid(2) returns current->tgid;
961On success, the thread ID of the child process is returned
962in the caller's thread of execution.
963On failure, \-1 is returned
964in the caller's context, no child process will be created, and
965.I errno
966will be set appropriately.
967.SH ERRORS
968.TP
969.B EAGAIN
970Too many processes are already running; see
971.BR fork (2).
972.TP
973.B EINVAL
974.B CLONE_SIGHAND
975was specified, but
976.B CLONE_VM
977was not.
978(Since Linux 2.6.0-test6.)
979.TP
980.B EINVAL
981.B CLONE_THREAD
982was specified, but
983.B CLONE_SIGHAND
984was not.
985(Since Linux 2.5.35.)
986.\" .TP
987.\" .B EINVAL
988.\" Precisely one of
989.\" .B CLONE_DETACHED
990.\" and
991.\" .B CLONE_THREAD
992.\" was specified.
993.\" (Since Linux 2.6.0-test6.)
994.TP
995.B EINVAL
996Both
997.B CLONE_FS
998and
999.B CLONE_NEWNS
1000were specified in
1001.IR flags .
1002.TP
1003.B EINVAL
1004Both
1005.B CLONE_NEWIPC
1006and
1007.B CLONE_SYSVSEM
1008were specified in
1009.IR flags .
1010.TP
1011.B EINVAL
1012Both
1013.BR CLONE_NEWPID
1014and
1015.BR CLONE_THREAD
1016were specified in
1017.IR flags .
1018.TP
1019.B EINVAL
1020Returned by
1021.BR clone ()
1022when a zero value is specified for
1023.IR child_stack .
1024.TP
1025.B EINVAL
1026.BR CLONE_NEWIPC
1027was specified in
1028.IR flags ,
1029but the kernel was not configured with the
1030.B CONFIG_SYSVIPC
1031and
1032.BR CONFIG_IPC_NS
1033options.
1034.TP
1035.B EINVAL
1036.BR CLONE_NEWNET
1037was specified in
1038.IR flags ,
1039but the kernel was not configured with the
1040.B CONFIG_NET_NS
1041option.
1042.TP
1043.B EINVAL
1044.BR CLONE_NEWPID
1045was specified in
1046.IR flags ,
1047but the kernel was not configured with the
1048.B CONFIG_PID_NS
1049option.
1050.TP
1051.B EINVAL
1052.BR CLONE_NEWUTS
1053was specified in
1054.IR flags ,
1055but the kernel was not configured with the
1056.B CONFIG_UTS
1057option.
1058.TP
1059.B ENOMEM
1060Cannot allocate sufficient memory to allocate a task structure for the
1061child, or to copy those parts of the caller's context that need to be
1062copied.
1063.TP
1064.B EPERM
1065.BR CLONE_NEWIPC ,
1066.BR CLONE_NEWNET ,
1067.BR CLONE_NEWNS ,
1068.BR CLONE_NEWPID ,
1069or
1070.BR CLONE_NEWUTS
1071was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1072.TP
1073.B EPERM
1074.B CLONE_PID
1075was specified by a process other than process 0.
1076.SH CONFORMING TO
1077.BR clone ()
1078is Linux-specific and should not be used in programs
1079intended to be portable.
1080.SH NOTES
1081In the kernel 2.4.x series,
1082.B CLONE_THREAD
1083generally does not make the parent of the new thread the same
1084as the parent of the calling process.
1085However, for kernel versions 2.4.7 to 2.4.18 the
1086.B CLONE_THREAD
1087flag implied the
1088.B CLONE_PARENT
1089flag (as in kernel 2.6).
1090
1091For a while there was
1092.B CLONE_DETACHED
1093(introduced in 2.5.32):
1094parent wants no child-exit signal.
1095In 2.6.2 the need to give this
1096together with
1097.B CLONE_THREAD
1098disappeared.
1099This flag is still defined, but has no effect.
1100
1101On i386,
1102.BR clone ()
1103should not be called through vsyscall, but directly through
1104.IR "int $0x80" .
1105.SH BUGS
1106Versions of the GNU C library that include the NPTL threading library
1107contain a wrapper function for
1108.BR getpid (2)
1109that performs caching of PIDs.
1110This caching relies on support in the glibc wrapper for
1111.BR clone (),
1112but as currently implemented,
1113the cache may not be up to date in some circumstances.
1114In particular,
1115if a signal is delivered to the child immediately after the
1116.BR clone ()
1117call, then a call to
1118.BR getpid (2)
1119in a handler for the signal may return the PID
1120of the calling process ("the parent"),
1121if the clone wrapper has not yet had a chance to update the PID
1122cache in the child.
1123(This discussion ignores the case where the child was created using
1124.BR CLONE_THREAD ,
1125when
1126.BR getpid (2)
1127.I should
1128return the same value in the child and in the process that called
1129.BR clone (),
1130since the caller and the child are in the same thread group.
1131The stale-cache problem also does not occur if the
1132.I flags
1133argument includes
1134.BR CLONE_VM .)
1135To get the truth, it may be necessary to use code such as the following:
1136.nf
1137
1138 #include <syscall.h>
1139
1140 pid_t mypid;
1141
1142 mypid = syscall(SYS_getpid);
1143.fi
1144.\" See also the following bug reports
1145.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1146.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1147.SH EXAMPLE
1148The following program demonstrates the use of
1149.BR clone ()
1150to create a child process that executes in a separate UTS namespace.
1151The child changes the hostname in its UTS namespace.
1152Both parent and child then display the system hostname,
1153making it possible to see that the hostname
1154differs in the UTS namespaces of the parent and child.
1155For an example of the use of this program, see
1156.BR setns (2).
1157.SS Program source
1158.nf
1159#define _GNU_SOURCE
1160#include <sys/wait.h>
1161#include <sys/utsname.h>
1162#include <sched.h>
1163#include <string.h>
1164#include <stdio.h>
1165#include <stdlib.h>
1166#include <unistd.h>
1167
1168#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1169 } while (0)
1170
1171static int /* Start function for cloned child */
1172childFunc(void *arg)
1173{
1174 struct utsname uts;
1175
1176 /* Change hostname in UTS namespace of child */
1177
1178 if (sethostname(arg, strlen(arg)) == \-1)
1179 errExit("sethostname");
1180
1181 /* Retrieve and display hostname */
1182
1183 if (uname(&uts) == \-1)
1184 errExit("uname");
1185 printf("uts.nodename in child: %s\\n", uts.nodename);
1186
1187 /* Keep the namespace open for a while, by sleeping.
1188 This allows some experimentation\-\-for example, another
1189 process might join the namespace. */
1190
1191 sleep(200);
1192
1193 return 0; /* Child terminates now */
1194}
1195
1196#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1197
1198int
1199main(int argc, char *argv[])
1200{
1201 char *stack; /* Start of stack buffer */
1202 char *stackTop; /* End of stack buffer */
1203 pid_t pid;
1204 struct utsname uts;
1205
1206 if (argc < 2) {
1207 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1208 exit(EXIT_SUCCESS);
1209 }
1210
1211 /* Allocate stack for child */
1212
1213 stack = malloc(STACK_SIZE);
1214 if (stack == NULL)
1215 errExit("malloc");
1216 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1217
1218 /* Create child that has its own UTS namespace;
1219 child commences execution in childFunc() */
1220
1221 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1222 if (pid == \-1)
1223 errExit("clone");
1224 printf("clone() returned %ld\\n", (long) pid);
1225
1226 /* Parent falls through to here */
1227
1228 sleep(1); /* Give child time to change its hostname */
1229
1230 /* Display hostname in parent\(aqs UTS namespace. This will be
1231 different from hostname in child\(aqs UTS namespace. */
1232
1233 if (uname(&uts) == \-1)
1234 errExit("uname");
1235 printf("uts.nodename in parent: %s\\n", uts.nodename);
1236
1237 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1238 errExit("waitpid");
1239 printf("child has terminated\\n");
1240
1241 exit(EXIT_SUCCESS);
1242}
1243.fi
1244.SH SEE ALSO
1245.BR fork (2),
1246.BR futex (2),
1247.BR getpid (2),
1248.BR gettid (2),
1249.BR kcmp (2),
1250.BR set_thread_area (2),
1251.BR set_tid_address (2),
1252.BR setns (2),
1253.BR tkill (2),
1254.BR unshare (2),
1255.BR wait (2),
1256.BR proc (5),
1257.BR capabilities (7),
1258.BR pthreads (7)