]> git.ipfire.org Git - thirdparty/man-pages.git/blame_incremental - man2/clone.2
setns.2: Minor changes to example program discussion
[thirdparty/man-pages.git] / man2 / clone.2
... / ...
CommitLineData
1.\" Hey Emacs! This file is -*- nroff -*- source.
2.\"
3.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
4.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
5.\" May be distributed under the GNU General Public License.
6.\" Modified by Michael Haardt <michael@moria.de>
7.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
8.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
9.\" New man page (copied from 'fork.2').
10.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
11.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
12.\" Modified 26 Jun 2001 by Michael Kerrisk
13.\" Mostly upgraded to 2.4.x
14.\" Added prototype for sys_clone() plus description
15.\" Added CLONE_THREAD with a brief description of thread groups
16.\" Added CLONE_PARENT and revised entire page remove ambiguity
17.\" between "calling process" and "parent process"
18.\" Added CLONE_PTRACE and CLONE_VFORK
19.\" Added EPERM and EINVAL error codes
20.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
21.\" various other minor tidy ups and clarifications.
22.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
23.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
24.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
25.\" Added description for CLONE_NEWNS, which was added in 2.4.19
26.\" Slightly rephrased, aeb.
27.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
28.\" Modified 1 Jan 2004 - various updates, aeb
29.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
30.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31.\" wrapper under BUGS.
32.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
33.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
34.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
35.\" 2008-11-18, mtk, document CLONE_NEWPID
36.\" 2008-11-19, mtk, document CLONE_NEWUTS
37.\" 2008-11-19, mtk, document CLONE_NEWIPC
38.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
39.\"
40.\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23
41.\" (also supported for unshare()?)
42.\"
43.TH CLONE 2 2013-01-01 "Linux" "Linux Programmer's Manual"
44.SH NAME
45clone, __clone2 \- create a child process
46.SH SYNOPSIS
47.nf
48.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */"
49.\" Actually _BSD_SOURCE || _SVID_SOURCE
50.\" FIXME See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
51.B #include <sched.h>
52
53.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
54.BI " int " flags ", void *" "arg" ", ... "
55.BI " /* pid_t *" ptid ", struct user_desc *" tls \
56", pid_t *" ctid " */ );"
57.fi
58.SH DESCRIPTION
59.BR clone ()
60creates a new process, in a manner similar to
61.BR fork (2).
62It is actually a library function layered on top of the underlying
63.BR clone ()
64system call, hereinafter referred to as
65.BR sys_clone .
66A description of
67.B sys_clone
68is given toward the end of this page.
69
70Unlike
71.BR fork (2),
72these calls
73allow the child process to share parts of its execution context with
74the calling process, such as the memory space, the table of file
75descriptors, and the table of signal handlers.
76(Note that on this manual
77page, "calling process" normally corresponds to "parent process".
78But see the description of
79.B CLONE_PARENT
80below.)
81
82The main use of
83.BR clone ()
84is to implement threads: multiple threads of control in a program that
85run concurrently in a shared memory space.
86
87When the child process is created with
88.BR clone (),
89it executes the function
90.IR fn ( arg ).
91(This differs from
92.BR fork (2),
93where execution continues in the child from the point
94of the
95.BR fork (2)
96call.)
97The
98.I fn
99argument is a pointer to a function that is called by the child
100process at the beginning of its execution.
101The
102.I arg
103argument is passed to the
104.I fn
105function.
106
107When the
108.IR fn ( arg )
109function application returns, the child process terminates.
110The integer returned by
111.I fn
112is the exit code for the child process.
113The child process may also terminate explicitly by calling
114.BR exit (2)
115or after receiving a fatal signal.
116
117The
118.I child_stack
119argument specifies the location of the stack used by the child process.
120Since the child and calling process may share memory,
121it is not possible for the child process to execute in the
122same stack as the calling process.
123The calling process must therefore
124set up memory space for the child stack and pass a pointer to this
125space to
126.BR clone ().
127Stacks grow downward on all processors that run Linux
128(except the HP PA processors), so
129.I child_stack
130usually points to the topmost address of the memory space set up for
131the child stack.
132
133The low byte of
134.I flags
135contains the number of the
136.I "termination signal"
137sent to the parent when the child dies.
138If this signal is specified as anything other than
139.BR SIGCHLD ,
140then the parent process must specify the
141.B __WALL
142or
143.B __WCLONE
144options when waiting for the child with
145.BR wait (2).
146If no signal is specified, then the parent process is not signaled
147when the child terminates.
148
149.I flags
150may also be bitwise-or'ed with zero or more of the following constants,
151in order to specify what is shared between the calling process
152and the child process:
153.TP
154.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
155Erase child thread ID at location
156.I ctid
157in child memory when the child exits, and do a wakeup on the futex
158at that address.
159The address involved may be changed by the
160.BR set_tid_address (2)
161system call.
162This is used by threading libraries.
163.TP
164.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
165Store child thread ID at location
166.I ctid
167in child memory.
168.TP
169.BR CLONE_FILES " (since Linux 2.0)"
170If
171.B CLONE_FILES
172is set, the calling process and the child process share the same file
173descriptor table.
174Any file descriptor created by the calling process or by the child
175process is also valid in the other process.
176Similarly, if one of the processes closes a file descriptor,
177or changes its associated flags (using the
178.BR fcntl (2)
179.B F_SETFD
180operation), the other process is also affected.
181
182If
183.B CLONE_FILES
184is not set, the child process inherits a copy of all file descriptors
185opened in the calling process at the time of
186.BR clone ().
187(The duplicated file descriptors in the child refer to the
188same open file descriptions (see
189.BR open (2))
190as the corresponding file descriptors in the calling process.)
191Subsequent operations that open or close file descriptors,
192or change file descriptor flags,
193performed by either the calling
194process or the child process do not affect the other process.
195.TP
196.BR CLONE_FS " (since Linux 2.0)"
197If
198.B CLONE_FS
199is set, the caller and the child process share the same file system
200information.
201This includes the root of the file system, the current
202working directory, and the umask.
203Any call to
204.BR chroot (2),
205.BR chdir (2),
206or
207.BR umask (2)
208performed by the calling process or the child process also affects the
209other process.
210
211If
212.B CLONE_FS
213is not set, the child process works on a copy of the file system
214information of the calling process at the time of the
215.BR clone ()
216call.
217Calls to
218.BR chroot (2),
219.BR chdir (2),
220.BR umask (2)
221performed later by one of the processes do not affect the other process.
222.TP
223.BR CLONE_IO " (since Linux 2.6.25)"
224If
225.B CLONE_IO
226is set, then the new process shares an I/O context with
227the calling process.
228If this flag is not set, then (as with
229.BR fork (2))
230the new process has its own I/O context.
231
232.\" The following based on text from Jens Axboe
233The I/O context is the I/O scope of the disk scheduler (i.e,
234what the I/O scheduler uses to model scheduling of a process's I/O).
235If processes share the same I/O context,
236they are treated as one by the I/O scheduler.
237As a consequence, they get to share disk time.
238For some I/O schedulers,
239.\" the anticipatory and CFQ scheduler
240if two processes share an I/O context,
241they will be allowed to interleave their disk access.
242If several threads are doing I/O on behalf of the same process
243.RB ( aio_read (3),
244for instance), they should employ
245.BR CLONE_IO
246to get better I/O performance.
247.\" with CFQ and AS.
248
249If the kernel is not configured with the
250.B CONFIG_BLOCK
251option, this flag is a no-op.
252.TP
253.BR CLONE_NEWIPC " (since Linux 2.6.19)"
254If
255.B CLONE_NEWIPC
256is set, then create the process in a new IPC namespace.
257If this flag is not set, then (as with
258.BR fork (2)),
259the process is created in the same IPC namespace as
260the calling process.
261This flag is intended for the implementation of containers.
262
263An IPC namespace consists of the set of identifiers for
264System V IPC objects.
265(These objects are created using
266.BR msgctl (2),
267.BR semctl (2),
268and
269.BR shmctl (2)).
270Objects created in an IPC namespace are visible to all other processes
271that are members of that namespace,
272but are not visible to processes in other IPC namespaces.
273
274Since Linux 2.6.30,
275.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
276.\" https://lwn.net/Articles/312232/
277.B CLONE_NEWIPC
278also supports POSIX message queues, meaning that
279.B CLONE_NEWIPC
280causes a new instance of a POSIX message queue file system (see
281.BR mq_overview (7))
282to be create.
283
284When an IPC namespace is destroyed
285(i.e, when the last process that is a member of the namespace terminates),
286all IPC objects in the namespace are automatically destroyed.
287
288Use of this flag requires: a kernel configured with the
289.B CONFIG_SYSVIPC
290and
291.B CONFIG_IPC_NS
292options and that the process be privileged
293.RB ( CAP_SYS_ADMIN ).
294This flag can't be specified in conjunction with
295.BR CLONE_SYSVSEM .
296.TP
297.BR CLONE_NEWNET " (since Linux 2.6.24)"
298.\" FIXME Check when the implementation was completed
299(The implementation of this flag was only completed
300by about kernel version 2.6.29.)
301
302If
303.B CLONE_NEWNET
304is set, then create the process in a new network namespace.
305If this flag is not set, then (as with
306.BR fork (2)),
307the process is created in the same network namespace as
308the calling process.
309This flag is intended for the implementation of containers.
310
311A network namespace provides an isolated view of the networking stack
312(network device interfaces, IPv4 and IPv6 protocol stacks,
313IP routing tables, firewall rules, the
314.I /proc/net
315and
316.I /sys/class/net
317directory trees, sockets, etc.).
318A physical network device can live in exactly one
319network namespace.
320A virtual network device ("veth") pair provides a pipe-like abstraction
321.\" FIXME Add pointer to veth(4) page when it is eventually completed
322that can be used to create tunnels between network namespaces,
323and can be used to create a bridge to a physical network device
324in another namespace.
325
326When a network namespace is freed
327(i.e., when the last process in the namespace terminates),
328its physical network devices are moved back to the
329initial network namespace (not to the parent of the process).
330
331Use of this flag requires: a kernel configured with the
332.B CONFIG_NET_NS
333option and that the process be privileged
334.RB ( CAP_SYS_ADMIN ).
335.TP
336.BR CLONE_NEWNS " (since Linux 2.4.19)"
337Start the child in a new mount namespace.
338
339Every process lives in a mount namespace.
340The
341.I namespace
342of a process is the data (the set of mounts) describing the file hierarchy
343as seen by that process.
344After a
345.BR fork (2)
346or
347.BR clone ()
348where the
349.B CLONE_NEWNS
350flag is not set, the child lives in the same mount
351namespace as the parent.
352The system calls
353.BR mount (2)
354and
355.BR umount (2)
356change the mount namespace of the calling process, and hence affect
357all processes that live in the same namespace, but do not affect
358processes in a different mount namespace.
359
360After a
361.BR clone ()
362where the
363.B CLONE_NEWNS
364flag is set, the cloned child is started in a new mount namespace,
365initialized with a copy of the namespace of the parent.
366
367Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability)
368may specify the
369.B CLONE_NEWNS
370flag.
371It is not permitted to specify both
372.B CLONE_NEWNS
373and
374.B CLONE_FS
375in the same
376.BR clone ()
377call.
378.TP
379.BR CLONE_NEWPID " (since Linux 2.6.24)"
380.\" This explanation draws a lot of details from
381.\" http://lwn.net/Articles/259217/
382.\" Authors: Pavel Emelyanov <xemul@openvz.org>
383.\" and Kir Kolyshkin <kir@openvz.org>
384.\"
385.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
386.\" Author: Pavel Emelyanov <xemul@openvz.org>
387If
388.B CLONE_NEWPID
389is set, then create the process in a new PID namespace.
390If this flag is not set, then (as with
391.BR fork (2)),
392the process is created in the same PID namespace as
393the calling process.
394This flag is intended for the implementation of containers.
395
396A PID namespace provides an isolated environment for PIDs:
397PIDs in a new namespace start at 1,
398somewhat like a standalone system, and calls to
399.BR fork (2),
400.BR vfork (2),
401or
402.BR clone ()
403will produce processes with PIDs that are unique within the namespace.
404
405The first process created in a new namespace
406(i.e., the process created using the
407.BR CLONE_NEWPID
408flag) has the PID 1, and is the "init" process for the namespace.
409Children that are orphaned within the namespace will be reparented
410to this process rather than
411.BR init (8).
412Unlike the traditional
413.B init
414process, the "init" process of a PID namespace can terminate,
415and if it does, all of the processes in the namespace are terminated.
416
417PID namespaces form a hierarchy.
418When a new PID namespace is created,
419the processes in that namespace are visible
420in the PID namespace of the process that created the new namespace;
421analogously, if the parent PID namespace is itself
422the child of another PID namespace,
423then processes in the child and parent PID namespaces will both be
424visible in the grandparent PID namespace.
425Conversely, the processes in the "child" PID namespace do not see
426the processes in the parent namespace.
427The existence of a namespace hierarchy means that each process
428may now have multiple PIDs:
429one for each namespace in which it is visible;
430each of these PIDs is unique within the corresponding namespace.
431(A call to
432.BR getpid (2)
433always returns the PID associated with the namespace in which
434the process lives.)
435
436After creating the new namespace,
437it is useful for the child to change its root directory
438and mount a new procfs instance at
439.I /proc
440so that tools such as
441.BR ps (1)
442work correctly.
443.\" mount -t proc proc /proc
444(If
445.BR CLONE_NEWNS
446is also included in
447.IR flags ,
448then it isn't necessary to change the root directory:
449a new procfs instance can be mounted directly over
450.IR /proc .)
451
452Use of this flag requires: a kernel configured with the
453.B CONFIG_PID_NS
454option and that the process be privileged
455.RB ( CAP_SYS_ADMIN ).
456This flag can't be specified in conjunction with
457.BR CLONE_THREAD .
458.TP
459.BR CLONE_NEWUTS " (since Linux 2.6.19)"
460If
461.B CLONE_NEWUTS
462is set, then create the process in a new UTS namespace,
463whose identifiers are initialized by duplicating the identifiers
464from the UTS namespace of the calling process.
465If this flag is not set, then (as with
466.BR fork (2)),
467the process is created in the same UTS namespace as
468the calling process.
469This flag is intended for the implementation of containers.
470
471A UTS namespace is the set of identifiers returned by
472.BR uname (2);
473among these, the domain name and the host name can be modified by
474.BR setdomainname (2)
475and
476.BR
477.BR sethostname (2),
478respectively.
479Changes made to the identifiers in a UTS namespace
480are visible to all other processes in the same namespace,
481but are not visible to processes in other UTS namespaces.
482
483Use of this flag requires: a kernel configured with the
484.B CONFIG_UTS_NS
485option and that the process be privileged
486.RB ( CAP_SYS_ADMIN ).
487.TP
488.BR CLONE_PARENT " (since Linux 2.3.12)"
489If
490.B CLONE_PARENT
491is set, then the parent of the new child (as returned by
492.BR getppid (2))
493will be the same as that of the calling process.
494
495If
496.B CLONE_PARENT
497is not set, then (as with
498.BR fork (2))
499the child's parent is the calling process.
500
501Note that it is the parent process, as returned by
502.BR getppid (2),
503which is signaled when the child terminates, so that
504if
505.B CLONE_PARENT
506is set, then the parent of the calling process, rather than the
507calling process itself, will be signaled.
508.TP
509.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
510Store child thread ID at location
511.I ptid
512in parent and child memory.
513(In Linux 2.5.32-2.5.48 there was a flag
514.B CLONE_SETTID
515that did this.)
516.TP
517.BR CLONE_PID " (obsolete)"
518If
519.B CLONE_PID
520is set, the child process is created with the same process ID as
521the calling process.
522This is good for hacking the system, but otherwise
523of not much use.
524Since 2.3.21 this flag can be
525specified only by the system boot process (PID 0).
526It disappeared in Linux 2.5.16.
527.TP
528.BR CLONE_PTRACE " (since Linux 2.2)"
529If
530.B CLONE_PTRACE
531is specified, and the calling process is being traced,
532then trace the child also (see
533.BR ptrace (2)).
534.TP
535.BR CLONE_SETTLS " (since Linux 2.5.32)"
536The
537.I newtls
538argument is the new TLS (Thread Local Storage) descriptor.
539(See
540.BR set_thread_area (2).)
541.TP
542.BR CLONE_SIGHAND " (since Linux 2.0)"
543If
544.B CLONE_SIGHAND
545is set, the calling process and the child process share the same table of
546signal handlers.
547If the calling process or child process calls
548.BR sigaction (2)
549to change the behavior associated with a signal, the behavior is
550changed in the other process as well.
551However, the calling process and child
552processes still have distinct signal masks and sets of pending
553signals.
554So, one of them may block or unblock some signals using
555.BR sigprocmask (2)
556without affecting the other process.
557
558If
559.B CLONE_SIGHAND
560is not set, the child process inherits a copy of the signal handlers
561of the calling process at the time
562.BR clone ()
563is called.
564Calls to
565.BR sigaction (2)
566performed later by one of the processes have no effect on the other
567process.
568
569Since Linux 2.6.0-test6,
570.I flags
571must also include
572.B CLONE_VM
573if
574.B CLONE_SIGHAND
575is specified
576.TP
577.BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
578If
579.B CLONE_STOPPED
580is set, then the child is initially stopped (as though it was sent a
581.B SIGSTOP
582signal), and must be resumed by sending it a
583.B SIGCONT
584signal.
585
586This flag was
587.I deprecated
588from Linux 2.6.25 onward,
589and was
590.I removed
591altogether in Linux 2.6.38.
592.\" glibc 2.8 removed this defn from bits/sched.h
593.TP
594.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
595If
596.B CLONE_SYSVSEM
597is set, then the child and the calling process share
598a single list of System V semaphore undo values (see
599.BR semop (2)).
600If this flag is not set, then the child has a separate undo list,
601which is initially empty.
602.TP
603.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
604If
605.B CLONE_THREAD
606is set, the child is placed in the same thread group as the calling process.
607To make the remainder of the discussion of
608.B CLONE_THREAD
609more readable, the term "thread" is used to refer to the
610processes within a thread group.
611
612Thread groups were a feature added in Linux 2.4 to support the
613POSIX threads notion of a set of threads that share a single PID.
614Internally, this shared PID is the so-called
615thread group identifier (TGID) for the thread group.
616Since Linux 2.4, calls to
617.BR getpid (2)
618return the TGID of the caller.
619
620The threads within a group can be distinguished by their (system-wide)
621unique thread IDs (TID).
622A new thread's TID is available as the function result
623returned to the caller of
624.BR clone (),
625and a thread can obtain
626its own TID using
627.BR gettid (2).
628
629When a call is made to
630.BR clone ()
631without specifying
632.BR CLONE_THREAD ,
633then the resulting thread is placed in a new thread group
634whose TGID is the same as the thread's TID.
635This thread is the
636.I leader
637of the new thread group.
638
639A new thread created with
640.B CLONE_THREAD
641has the same parent process as the caller of
642.BR clone ()
643(i.e., like
644.BR CLONE_PARENT ),
645so that calls to
646.BR getppid (2)
647return the same value for all of the threads in a thread group.
648When a
649.B CLONE_THREAD
650thread terminates, the thread that created it using
651.BR clone ()
652is not sent a
653.B SIGCHLD
654(or other termination) signal;
655nor can the status of such a thread be obtained
656using
657.BR wait (2).
658(The thread is said to be
659.IR detached .)
660
661After all of the threads in a thread group terminate
662the parent process of the thread group is sent a
663.B SIGCHLD
664(or other termination) signal.
665
666If any of the threads in a thread group performs an
667.BR execve (2),
668then all threads other than the thread group leader are terminated,
669and the new program is executed in the thread group leader.
670
671If one of the threads in a thread group creates a child using
672.BR fork (2),
673then any thread in the group can
674.BR wait (2)
675for that child.
676
677Since Linux 2.5.35,
678.I flags
679must also include
680.B CLONE_SIGHAND
681if
682.B CLONE_THREAD
683is specified.
684
685Signals may be sent to a thread group as a whole (i.e., a TGID) using
686.BR kill (2),
687or to a specific thread (i.e., TID) using
688.BR tgkill (2).
689
690Signal dispositions and actions are process-wide:
691if an unhandled signal is delivered to a thread, then
692it will affect (terminate, stop, continue, be ignored in)
693all members of the thread group.
694
695Each thread has its own signal mask, as set by
696.BR sigprocmask (2),
697but signals can be pending either: for the whole process
698(i.e., deliverable to any member of the thread group),
699when sent with
700.BR kill (2);
701or for an individual thread, when sent with
702.BR tgkill (2).
703A call to
704.BR sigpending (2)
705returns a signal set that is the union of the signals pending for the
706whole process and the signals that are pending for the calling thread.
707
708If
709.BR kill (2)
710is used to send a signal to a thread group,
711and the thread group has installed a handler for the signal, then
712the handler will be invoked in exactly one, arbitrarily selected
713member of the thread group that has not blocked the signal.
714If multiple threads in a group are waiting to accept the same signal using
715.BR sigwaitinfo (2),
716the kernel will arbitrarily select one of these threads
717to receive a signal sent using
718.BR kill (2).
719.TP
720.BR CLONE_UNTRACED " (since Linux 2.5.46)"
721If
722.B CLONE_UNTRACED
723is specified, then a tracing process cannot force
724.B CLONE_PTRACE
725on this child process.
726.TP
727.BR CLONE_VFORK " (since Linux 2.2)"
728If
729.B CLONE_VFORK
730is set, the execution of the calling process is suspended
731until the child releases its virtual memory
732resources via a call to
733.BR execve (2)
734or
735.BR _exit (2)
736(as with
737.BR vfork (2)).
738
739If
740.B CLONE_VFORK
741is not set then both the calling process and the child are schedulable
742after the call, and an application should not rely on execution occurring
743in any particular order.
744.TP
745.BR CLONE_VM " (since Linux 2.0)"
746If
747.B CLONE_VM
748is set, the calling process and the child process run in the same memory
749space.
750In particular, memory writes performed by the calling process
751or by the child process are also visible in the other process.
752Moreover, any memory mapping or unmapping performed with
753.BR mmap (2)
754or
755.BR munmap (2)
756by the child or calling process also affects the other process.
757
758If
759.B CLONE_VM
760is not set, the child process runs in a separate copy of the memory
761space of the calling process at the time of
762.BR clone ().
763Memory writes or file mappings/unmappings performed by one of the
764processes do not affect the other, as with
765.BR fork (2).
766.SS "sys_clone"
767The
768.B sys_clone
769system call corresponds more closely to
770.BR fork (2)
771in that execution in the child continues from the point of the
772call.
773As such, the
774.I fn
775and
776.I arg
777arguments of the
778.BR clone ()
779wrapper function are omitted.
780Furthermore, the argument order changes.
781The raw system call interface is roughly:
782.in +4
783.nf
784
785.BI "long clone(unsigned long " flags ", void *" child_stack ,
786.BI " void *" ptid ", void *" ctid ,
787.BI " struct pt_regs *" regs );
788
789.fi
790.in
791Another difference for
792.B sys_clone
793is that the
794.I child_stack
795argument may be zero, in which case copy-on-write semantics ensure that the
796child gets separate copies of stack pages when either process modifies
797the stack.
798In this case, for correct operation, the
799.B CLONE_VM
800option should not be specified.
801.SS Linux 2.4 and earlier
802In Linux 2.4 and earlier,
803.BR clone ()
804does not take arguments
805.IR ptid ,
806.IR tls ,
807and
808.IR ctid .
809.SH "RETURN VALUE"
810.\" gettid(2) returns current->pid;
811.\" getpid(2) returns current->tgid;
812On success, the thread ID of the child process is returned
813in the caller's thread of execution.
814On failure, \-1 is returned
815in the caller's context, no child process will be created, and
816.I errno
817will be set appropriately.
818.SH ERRORS
819.TP
820.B EAGAIN
821Too many processes are already running.
822.TP
823.B EINVAL
824.B CLONE_SIGHAND
825was specified, but
826.B CLONE_VM
827was not.
828(Since Linux 2.6.0-test6.)
829.TP
830.B EINVAL
831.B CLONE_THREAD
832was specified, but
833.B CLONE_SIGHAND
834was not.
835(Since Linux 2.5.35.)
836.\" .TP
837.\" .B EINVAL
838.\" Precisely one of
839.\" .B CLONE_DETACHED
840.\" and
841.\" .B CLONE_THREAD
842.\" was specified.
843.\" (Since Linux 2.6.0-test6.)
844.TP
845.B EINVAL
846Both
847.B CLONE_FS
848and
849.B CLONE_NEWNS
850were specified in
851.IR flags .
852.TP
853.B EINVAL
854Both
855.B CLONE_NEWIPC
856and
857.B CLONE_SYSVSEM
858were specified in
859.IR flags .
860.TP
861.B EINVAL
862Both
863.BR CLONE_NEWPID
864and
865.BR CLONE_THREAD
866were specified in
867.IR flags .
868.TP
869.B EINVAL
870Returned by
871.BR clone ()
872when a zero value is specified for
873.IR child_stack .
874.TP
875.B EINVAL
876.BR CLONE_NEWIPC
877was specified in
878.IR flags ,
879but the kernel was not configured with the
880.B CONFIG_SYSVIPC
881and
882.BR CONFIG_IPC_NS
883options.
884.TP
885.B EINVAL
886.BR CLONE_NEWNET
887was specified in
888.IR flags ,
889but the kernel was not configured with the
890.B CONFIG_NET_NS
891option.
892.TP
893.B EINVAL
894.BR CLONE_NEWPID
895was specified in
896.IR flags ,
897but the kernel was not configured with the
898.B CONFIG_PID_NS
899option.
900.TP
901.B EINVAL
902.BR CLONE_NEWUTS
903was specified in
904.IR flags ,
905but the kernel was not configured with the
906.B CONFIG_UTS
907option.
908.TP
909.B ENOMEM
910Cannot allocate sufficient memory to allocate a task structure for the
911child, or to copy those parts of the caller's context that need to be
912copied.
913.TP
914.B EPERM
915.BR CLONE_NEWIPC ,
916.BR CLONE_NEWNET ,
917.BR CLONE_NEWNS ,
918.BR CLONE_NEWPID ,
919or
920.BR CLONE_NEWUTS
921was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
922.TP
923.B EPERM
924.B CLONE_PID
925was specified by a process other than process 0.
926.SH VERSIONS
927There is no entry for
928.BR clone ()
929in libc5.
930glibc2 provides
931.BR clone ()
932as described in this manual page.
933.SH "CONFORMING TO"
934The
935.BR clone ()
936and
937.B sys_clone
938calls are Linux-specific and should not be used in programs
939intended to be portable.
940.SH NOTES
941In the kernel 2.4.x series,
942.B CLONE_THREAD
943generally does not make the parent of the new thread the same
944as the parent of the calling process.
945However, for kernel versions 2.4.7 to 2.4.18 the
946.B CLONE_THREAD
947flag implied the
948.B CLONE_PARENT
949flag (as in kernel 2.6).
950
951For a while there was
952.B CLONE_DETACHED
953(introduced in 2.5.32):
954parent wants no child-exit signal.
955In 2.6.2 the need to give this
956together with
957.B CLONE_THREAD
958disappeared.
959This flag is still defined, but has no effect.
960
961On i386,
962.BR clone ()
963should not be called through vsyscall, but directly through
964.IR "int $0x80" .
965
966On ia64, a different system call is used:
967.nf
968
969.BI "int __clone2(int (*" "fn" ")(void *), "
970.BI " void *" child_stack_base ", size_t " stack_size ,
971.BI " int " flags ", void *" "arg" ", ... "
972.BI " /* pid_t *" ptid ", struct user_desc *" tls \
973", pid_t *" ctid " */ );"
974.fi
975.PP
976The
977.BR __clone2 ()
978system call operates in the same way as
979.BR clone (),
980except that
981.I child_stack_base
982points to the lowest address of the child's stack area,
983and
984.I stack_size
985specifies the size of the stack pointed to by
986.IR child_stack_base .
987.SH BUGS
988Versions of the GNU C library that include the NPTL threading library
989contain a wrapper function for
990.BR getpid (2)
991that performs caching of PIDs.
992This caching relies on support in the glibc wrapper for
993.BR clone (),
994but as currently implemented,
995the cache may not be up to date in some circumstances.
996In particular,
997if a signal is delivered to the child immediately after the
998.BR clone ()
999call, then a call to
1000.BR getpid (2)
1001in a handler for the signal may return the PID
1002of the calling process ("the parent"),
1003if the clone wrapper has not yet had a chance to update the PID
1004cache in the child.
1005(This discussion ignores the case where the child was created using
1006.BR CLONE_THREAD ,
1007when
1008.BR getpid (2)
1009.I should
1010return the same value in the child and in the process that called
1011.BR clone (),
1012since the caller and the child are in the same thread group.
1013The stale-cache problem also does not occur if the
1014.I flags
1015argument includes
1016.BR CLONE_VM .)
1017To get the truth, it may be necessary to use code such as the following:
1018.nf
1019
1020 #include <syscall.h>
1021
1022 pid_t mypid;
1023
1024 mypid = syscall(SYS_getpid);
1025.fi
1026.\" See also the following bug reports
1027.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1028.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1029.SH EXAMPLE
1030.SS Create a child that executes in a separate UTS namespace
1031The following program demonstrates the use of
1032.BR clone (2)
1033to create a child process that executes in a separate UTS namespace.
1034The child changes the hostname in its UTS namespace.
1035Both parent and child then display the system hostname,
1036making it possible to see that the hostname
1037differs in the UTS namespaces of the parent and child.
1038For an example of the use of this program, see
1039.BR setns (2).
1040
1041.nf
1042#define _GNU_SOURCE
1043#include <sys/wait.h>
1044#include <sys/utsname.h>
1045#include <sched.h>
1046#include <string.h>
1047#include <stdio.h>
1048#include <stdlib.h>
1049#include <unistd.h>
1050
1051#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1052 } while (0)
1053
1054static int /* Start function for cloned child */
1055childFunc(void *arg)
1056{
1057 struct utsname uts;
1058
1059 /* Change hostname in UTS namespace of child */
1060
1061 if (sethostname(arg, strlen(arg)) == \-1)
1062 errExit("sethostname");
1063
1064 /* Retrieve and display hostname */
1065
1066 if (uname(&uts) == \-1)
1067 errExit("uname");
1068 printf("uts.nodename in child: %s\\n", uts.nodename);
1069
1070 /* Keep the namespace open for a while, by sleeping.
1071 This allows some experimentation\-\-for example, another
1072 process might join the namespace. */
1073
1074 sleep(200);
1075
1076 return 0; /* Child terminates now */
1077}
1078
1079#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1080
1081int
1082main(int argc, char *argv[])
1083{
1084 char *stack; /* Start of stack buffer */
1085 char *stackTop; /* End of stack buffer */
1086 pid_t pid;
1087 struct utsname uts;
1088
1089 if (argc < 2) {
1090 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1091 exit(EXIT_SUCCESS);
1092 }
1093
1094 /* Allocate stack for child */
1095
1096 stack = malloc(STACK_SIZE);
1097 if (stack == NULL)
1098 errExit("malloc");
1099 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1100
1101 /* Create child that has its own UTS namespace;
1102 child commences execution in childFunc() */
1103
1104 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1105 if (pid == \-1)
1106 errExit("clone");
1107 printf("clone() returned %ld\\n", (long) pid);
1108
1109 /* Parent falls through to here */
1110
1111 sleep(1); /* Give child time to change its hostname */
1112
1113 /* Display hostname in parent\(aqs UTS namespace. This will be
1114 different from hostname in child\(aqs UTS namespace. */
1115
1116 if (uname(&uts) == \-1)
1117 errExit("uname");
1118 printf("uts.nodename in parent: %s\\n", uts.nodename);
1119
1120 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1121 errExit("waitpid");
1122 printf("child has terminated\\n");
1123
1124 exit(EXIT_SUCCESS);
1125}
1126.fi
1127.SH "SEE ALSO"
1128.BR fork (2),
1129.BR futex (2),
1130.BR getpid (2),
1131.BR gettid (2),
1132.BR kcmp (2),
1133.BR set_thread_area (2),
1134.BR set_tid_address (2),
1135.BR setns (2),
1136.BR tkill (2),
1137.BR unshare (2),
1138.BR wait (2),
1139.BR capabilities (7),
1140.BR pthreads (7)