]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: Reword discussion of CLONE_NEWNS, removing text also in namespaces(7)
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2014-08-19 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48
49 .B #include <sched.h>
50
51 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
52 .BI " int " flags ", void *" "arg" ", ... "
53 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
54 ", pid_t *" ctid " */ );"
55
56 /* Prototype for the raw system call */
57
58 .BI "long clone(unsigned long " flags ", void *" child_stack ,
59 .BI " void *" ptid ", void *" ctid ,
60 .BI " struct pt_regs *" regs );
61 .fi
62 .sp
63 .in -4n
64 Feature Test Macro Requirements for glibc wrapper function (see
65 .BR feature_test_macros (7)):
66 .in
67 .sp
68 .BR clone ():
69 .ad l
70 .RS 4
71 .PD 0
72 .TP 4
73 Since glibc 2.14:
74 _GNU_SOURCE
75 .TP 4
76 .\" See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
77 Before glibc 2.14:
78 _BSD_SOURCE || _SVID_SOURCE
79 /* _GNU_SOURCE also suffices */
80 .PD
81 .RE
82 .ad b
83 .SH DESCRIPTION
84 .BR clone ()
85 creates a new process, in a manner similar to
86 .BR fork (2).
87
88 This page describes both the glibc
89 .BR clone ()
90 wrapper function and the underlying system call on which it is based.
91 The main text describes the wrapper function;
92 the differences for the raw system call
93 are described toward the end of this page.
94
95 Unlike
96 .BR fork (2),
97 .BR clone ()
98 allows the child process to share parts of its execution context with
99 the calling process, such as the memory space, the table of file
100 descriptors, and the table of signal handlers.
101 (Note that on this manual
102 page, "calling process" normally corresponds to "parent process".
103 But see the description of
104 .B CLONE_PARENT
105 below.)
106
107 The main use of
108 .BR clone ()
109 is to implement threads: multiple threads of control in a program that
110 run concurrently in a shared memory space.
111
112 When the child process is created with
113 .BR clone (),
114 it executes the function
115 .IR fn ( arg ).
116 (This differs from
117 .BR fork (2),
118 where execution continues in the child from the point
119 of the
120 .BR fork (2)
121 call.)
122 The
123 .I fn
124 argument is a pointer to a function that is called by the child
125 process at the beginning of its execution.
126 The
127 .I arg
128 argument is passed to the
129 .I fn
130 function.
131
132 When the
133 .IR fn ( arg )
134 function application returns, the child process terminates.
135 The integer returned by
136 .I fn
137 is the exit code for the child process.
138 The child process may also terminate explicitly by calling
139 .BR exit (2)
140 or after receiving a fatal signal.
141
142 The
143 .I child_stack
144 argument specifies the location of the stack used by the child process.
145 Since the child and calling process may share memory,
146 it is not possible for the child process to execute in the
147 same stack as the calling process.
148 The calling process must therefore
149 set up memory space for the child stack and pass a pointer to this
150 space to
151 .BR clone ().
152 Stacks grow downward on all processors that run Linux
153 (except the HP PA processors), so
154 .I child_stack
155 usually points to the topmost address of the memory space set up for
156 the child stack.
157
158 The low byte of
159 .I flags
160 contains the number of the
161 .I "termination signal"
162 sent to the parent when the child dies.
163 If this signal is specified as anything other than
164 .BR SIGCHLD ,
165 then the parent process must specify the
166 .B __WALL
167 or
168 .B __WCLONE
169 options when waiting for the child with
170 .BR wait (2).
171 If no signal is specified, then the parent process is not signaled
172 when the child terminates.
173
174 .I flags
175 may also be bitwise-or'ed with zero or more of the following constants,
176 in order to specify what is shared between the calling process
177 and the child process:
178 .TP
179 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
180 Erase child thread ID at location
181 .I ctid
182 in child memory when the child exits, and do a wakeup on the futex
183 at that address.
184 The address involved may be changed by the
185 .BR set_tid_address (2)
186 system call.
187 This is used by threading libraries.
188 .TP
189 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
190 Store child thread ID at location
191 .I ctid
192 in child memory.
193 .TP
194 .BR CLONE_FILES " (since Linux 2.0)"
195 If
196 .B CLONE_FILES
197 is set, the calling process and the child process share the same file
198 descriptor table.
199 Any file descriptor created by the calling process or by the child
200 process is also valid in the other process.
201 Similarly, if one of the processes closes a file descriptor,
202 or changes its associated flags (using the
203 .BR fcntl (2)
204 .B F_SETFD
205 operation), the other process is also affected.
206
207 If
208 .B CLONE_FILES
209 is not set, the child process inherits a copy of all file descriptors
210 opened in the calling process at the time of
211 .BR clone ().
212 (The duplicated file descriptors in the child refer to the
213 same open file descriptions (see
214 .BR open (2))
215 as the corresponding file descriptors in the calling process.)
216 Subsequent operations that open or close file descriptors,
217 or change file descriptor flags,
218 performed by either the calling
219 process or the child process do not affect the other process.
220 .TP
221 .BR CLONE_FS " (since Linux 2.0)"
222 If
223 .B CLONE_FS
224 is set, the caller and the child process share the same filesystem
225 information.
226 This includes the root of the filesystem, the current
227 working directory, and the umask.
228 Any call to
229 .BR chroot (2),
230 .BR chdir (2),
231 or
232 .BR umask (2)
233 performed by the calling process or the child process also affects the
234 other process.
235
236 If
237 .B CLONE_FS
238 is not set, the child process works on a copy of the filesystem
239 information of the calling process at the time of the
240 .BR clone ()
241 call.
242 Calls to
243 .BR chroot (2),
244 .BR chdir (2),
245 .BR umask (2)
246 performed later by one of the processes do not affect the other process.
247 .TP
248 .BR CLONE_IO " (since Linux 2.6.25)"
249 If
250 .B CLONE_IO
251 is set, then the new process shares an I/O context with
252 the calling process.
253 If this flag is not set, then (as with
254 .BR fork (2))
255 the new process has its own I/O context.
256
257 .\" The following based on text from Jens Axboe
258 The I/O context is the I/O scope of the disk scheduler (i.e,
259 what the I/O scheduler uses to model scheduling of a process's I/O).
260 If processes share the same I/O context,
261 they are treated as one by the I/O scheduler.
262 As a consequence, they get to share disk time.
263 For some I/O schedulers,
264 .\" the anticipatory and CFQ scheduler
265 if two processes share an I/O context,
266 they will be allowed to interleave their disk access.
267 If several threads are doing I/O on behalf of the same process
268 .RB ( aio_read (3),
269 for instance), they should employ
270 .BR CLONE_IO
271 to get better I/O performance.
272 .\" with CFQ and AS.
273
274 If the kernel is not configured with the
275 .B CONFIG_BLOCK
276 option, this flag is a no-op.
277 .TP
278 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
279 If
280 .B CLONE_NEWIPC
281 is set, then create the process in a new IPC namespace.
282 If this flag is not set, then (as with
283 .BR fork (2)),
284 the process is created in the same IPC namespace as
285 the calling process.
286 This flag is intended for the implementation of containers.
287
288 An IPC namespace provides an isolated view of System\ V IPC objects (see
289 .BR svipc (7))
290 and (since Linux 2.6.30)
291 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292 .\" https://lwn.net/Articles/312232/
293 POSIX message queues
294 (see
295 .BR mq_overview (7)).
296 The common characteristic of these IPC mechanisms is that IPC
297 objects are identified by mechanisms other than filesystem
298 pathnames.
299
300 Objects created in an IPC namespace are visible to all other processes
301 that are members of that namespace,
302 but are not visible to processes in other IPC namespaces.
303
304 When an IPC namespace is destroyed
305 (i.e., when the last process that is a member of the namespace terminates),
306 all IPC objects in the namespace are automatically destroyed.
307
308 Use of this flag requires
309 that the process be privileged
310 .RB ( CAP_SYS_ADMIN ).
311 This flag can't be specified in conjunction with
312 .BR CLONE_SYSVSEM .
313
314 For further information on IPC namespaces, see
315 .BR namespaces (7).
316 .TP
317 .BR CLONE_NEWNET " (since Linux 2.6.24)"
318 (The implementation of this flag was completed only
319 by about kernel version 2.6.29.)
320
321 If
322 .B CLONE_NEWNET
323 is set, then create the process in a new network namespace.
324 If this flag is not set, then (as with
325 .BR fork (2))
326 the process is created in the same network namespace as
327 the calling process.
328 This flag is intended for the implementation of containers.
329
330 A network namespace provides an isolated view of the networking stack
331 (network device interfaces, IPv4 and IPv6 protocol stacks,
332 IP routing tables, firewall rules, the
333 .I /proc/net
334 and
335 .I /sys/class/net
336 directory trees, sockets, etc.).
337 A physical network device can live in exactly one
338 network namespace.
339 A virtual network device ("veth") pair provides a pipe-like abstraction
340 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
341 that can be used to create tunnels between network namespaces,
342 and can be used to create a bridge to a physical network device
343 in another namespace.
344
345 When a network namespace is freed
346 (i.e., when the last process in the namespace terminates),
347 its physical network devices are moved back to the
348 initial network namespace (not to the parent of the process).
349 For further information on network namespaces, see
350 .BR namespaces (7).
351
352 Use of this flag requires
353 that the process be privileged
354 .RB ( CAP_SYS_ADMIN ).
355
356 .TP
357 .BR CLONE_NEWNS " (since Linux 2.4.19)"
358 If
359 .B CLONE_NEWNS
360 is set, the cloned child is started in a new mount namespace,
361 initialized with a copy of the namespace of the parent.
362 If
363 .B CLONE_NEWNS
364 is not set, the child lives in the same mount
365 namespace as the parent.
366
367 For further information on mount namespaces, see
368 .BR namespaces (7).
369
370 Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability)
371 may specify the
372 .B CLONE_NEWNS
373 flag.
374 It is not permitted to specify both
375 .B CLONE_NEWNS
376 and
377 .B CLONE_FS
378 in the same
379 .BR clone ()
380 call.
381
382 .TP
383 .BR CLONE_NEWUSER
384 (This flag first became meaningful for
385 .BR clone ()
386 in Linux 2.6.23,
387 the current
388 .BR clone()
389 semantics were merged in Linux 3.5,
390 and the final pieces to make the user namespaces completely usable were
391 merged in Linux 3.8.)
392
393 If
394 .B CLONE_NEWUSER
395 is set, then create the process in a new user namespace.
396 If this flag is not set, then (as with
397 .BR fork (2))
398 the process is created in the same user namespace as the calling process.
399
400 A user namespace provides an isolated environment for
401 security related identifiers, in particular,
402 user IDs, group IDs, keys (see
403 .BR keyctl (2)),
404 and capabilities.
405
406 When a user namespace is created,
407 it starts out without a mapping of user IDs (group IDs)
408 to the parent user namespace.
409 The desired mapping of user IDs (group IDs) to the parent user namespace
410 may be set by writing into
411 .IR /proc/[pid]/uid_map
412 .RI ( /proc/[pid]/gid_map );
413 see
414 .BR proc (5).
415
416 The first process in a user namespace starts out with a complete set
417 of capabilities with respect to the new user namespace.
418
419 System calls that return user IDs (group IDs) will return
420 either the user ID (group ID) mapped into the current
421 user namespace if there is a mapping, or the overflow user ID (group ID);
422 the default value for the overflow user ID (group ID) is 65534.
423 See the descriptions of
424 .IR /proc/sys/kernel/overflowuid
425 and
426 .IR /proc/sys/kernel/overflowgid
427 in
428 .BR proc (5).
429
430 Use of this flag requires a kernel configured with the
431 .BR CONFIG_USER_NS
432 option.
433 Before Linux 3.8, use of
434 .BR CLONE_NEWUSER
435 required that the caller have three capabilities:
436 .BR CAP_SYS_ADMIN ,
437 .BR CAP_SETUID ,
438 and
439 .BR CAP_SETGID .
440 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
441 Starting with Linux 3.8,
442 no privileges are needed to create a user namespace,
443 and mount, PID, IPC, network, and UTS namespaces can be created with just the
444 .B CAP_SYS_ADMIN
445 capability in the caller's user namespace.
446
447 If
448 .BR CLONE_NEWUSER
449 is specified along with other
450 .B CLONE_NEW*
451 flags in a single
452 .BR clone()
453 call, the user namespace is guaranteed to be created first,
454 giving the caller privileges over the remaining
455 namespaces created by the call.
456 Thus, it possible for an unprivileged caller to specify this combination
457 of flags.
458
459 Over the years, there have been a lot of features that have been added
460 to the Linux kernel that are only available to privileged users
461 because of their potential to confuse set-user-ID-root applications.
462 In general, it becomes safe to allow the root user in a user namespace to
463 use those features because it is impossible, while in a user namespace,
464 to gain more privilege than the root user of a user namespace has.
465
466 .TP
467 .BR CLONE_NEWPID " (since Linux 2.6.24)"
468 .\" This explanation draws a lot of details from
469 .\" http://lwn.net/Articles/259217/
470 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
471 .\" and Kir Kolyshkin <kir@openvz.org>
472 .\"
473 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
474 .\" Author: Pavel Emelyanov <xemul@openvz.org>
475 If
476 .B CLONE_NEWPID
477 is set, then create the process in a new PID namespace.
478 If this flag is not set, then (as with
479 .BR fork (2))
480 the process is created in the same PID namespace as
481 the calling process.
482 This flag is intended for the implementation of containers.
483
484 A PID namespace provides an isolated environment for PIDs:
485 PIDs in a new namespace start at 1,
486 somewhat like a standalone system, and calls to
487 .BR fork (2),
488 .BR vfork (2),
489 or
490 .BR clone ()
491 will produce processes with PIDs that are unique within the namespace.
492
493 The first process created in a new namespace
494 (i.e., the process created using the
495 .BR CLONE_NEWPID
496 flag) has the PID 1, and is the "init" process for the namespace.
497 Children that are orphaned within the namespace will be reparented
498 to this process rather than
499 .BR init (8).
500 Unlike the traditional
501 .B init
502 process, the "init" process of a PID namespace can terminate,
503 and if it does, all of the processes in the namespace are terminated.
504
505 PID namespaces form a hierarchy.
506 When a new PID namespace is created,
507 the processes in that namespace are visible
508 in the PID namespace of the process that created the new namespace;
509 analogously, if the parent PID namespace is itself
510 the child of another PID namespace,
511 then processes in the child and parent PID namespaces will both be
512 visible in the grandparent PID namespace.
513 Conversely, the processes in the "child" PID namespace do not see
514 the processes in the parent namespace.
515 The existence of a namespace hierarchy means that each process
516 may now have multiple PIDs:
517 one for each namespace in which it is visible;
518 each of these PIDs is unique within the corresponding namespace.
519 (A call to
520 .BR getpid (2)
521 always returns the PID associated with the namespace in which
522 the process lives.)
523
524 After creating the new namespace,
525 it is useful for the child to change its root directory
526 and mount a new procfs instance at
527 .I /proc
528 so that tools such as
529 .BR ps (1)
530 work correctly.
531 .\" mount -t proc proc /proc
532 (If
533 .BR CLONE_NEWNS
534 is also included in
535 .IR flags ,
536 then it isn't necessary to change the root directory:
537 a new procfs instance can be mounted directly over
538 .IR /proc .)
539
540 Use of this flag requires: a kernel configured with the
541 .B CONFIG_PID_NS
542 option and that the process be privileged
543 .RB ( CAP_SYS_ADMIN ).
544 This flag can't be specified in conjunction with
545 .BR CLONE_THREAD .
546 .TP
547 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
548 If
549 .B CLONE_NEWUTS
550 is set, then create the process in a new UTS namespace,
551 whose identifiers are initialized by duplicating the identifiers
552 from the UTS namespace of the calling process.
553 If this flag is not set, then (as with
554 .BR fork (2))
555 the process is created in the same UTS namespace as
556 the calling process.
557 This flag is intended for the implementation of containers.
558
559 A UTS namespace is the set of identifiers returned by
560 .BR uname (2);
561 among these, the domain name and the hostname can be modified by
562 .BR setdomainname (2)
563 and
564 .BR sethostname (2),
565 respectively.
566 Changes made to the identifiers in a UTS namespace
567 are visible to all other processes in the same namespace,
568 but are not visible to processes in other UTS namespaces.
569
570 Use of this flag requires: a kernel configured with the
571 .B CONFIG_UTS_NS
572 option and that the process be privileged
573 .RB ( CAP_SYS_ADMIN ).
574 .TP
575 .BR CLONE_PARENT " (since Linux 2.3.12)"
576 If
577 .B CLONE_PARENT
578 is set, then the parent of the new child (as returned by
579 .BR getppid (2))
580 will be the same as that of the calling process.
581
582 If
583 .B CLONE_PARENT
584 is not set, then (as with
585 .BR fork (2))
586 the child's parent is the calling process.
587
588 Note that it is the parent process, as returned by
589 .BR getppid (2),
590 which is signaled when the child terminates, so that
591 if
592 .B CLONE_PARENT
593 is set, then the parent of the calling process, rather than the
594 calling process itself, will be signaled.
595 .TP
596 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
597 Store child thread ID at location
598 .I ptid
599 in parent and child memory.
600 (In Linux 2.5.32-2.5.48 there was a flag
601 .B CLONE_SETTID
602 that did this.)
603 .TP
604 .BR CLONE_PID " (obsolete)"
605 If
606 .B CLONE_PID
607 is set, the child process is created with the same process ID as
608 the calling process.
609 This is good for hacking the system, but otherwise
610 of not much use.
611 Since 2.3.21 this flag can be
612 specified only by the system boot process (PID 0).
613 It disappeared in Linux 2.5.16.
614 .TP
615 .BR CLONE_PTRACE " (since Linux 2.2)"
616 If
617 .B CLONE_PTRACE
618 is specified, and the calling process is being traced,
619 then trace the child also (see
620 .BR ptrace (2)).
621 .TP
622 .BR CLONE_SETTLS " (since Linux 2.5.32)"
623 The
624 .I newtls
625 argument is the new TLS (Thread Local Storage) descriptor.
626 (See
627 .BR set_thread_area (2).)
628 .TP
629 .BR CLONE_SIGHAND " (since Linux 2.0)"
630 If
631 .B CLONE_SIGHAND
632 is set, the calling process and the child process share the same table of
633 signal handlers.
634 If the calling process or child process calls
635 .BR sigaction (2)
636 to change the behavior associated with a signal, the behavior is
637 changed in the other process as well.
638 However, the calling process and child
639 processes still have distinct signal masks and sets of pending
640 signals.
641 So, one of them may block or unblock some signals using
642 .BR sigprocmask (2)
643 without affecting the other process.
644
645 If
646 .B CLONE_SIGHAND
647 is not set, the child process inherits a copy of the signal handlers
648 of the calling process at the time
649 .BR clone ()
650 is called.
651 Calls to
652 .BR sigaction (2)
653 performed later by one of the processes have no effect on the other
654 process.
655
656 Since Linux 2.6.0-test6,
657 .I flags
658 must also include
659 .B CLONE_VM
660 if
661 .B CLONE_SIGHAND
662 is specified
663 .TP
664 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
665 If
666 .B CLONE_STOPPED
667 is set, then the child is initially stopped (as though it was sent a
668 .B SIGSTOP
669 signal), and must be resumed by sending it a
670 .B SIGCONT
671 signal.
672
673 This flag was
674 .I deprecated
675 from Linux 2.6.25 onward,
676 and was
677 .I removed
678 altogether in Linux 2.6.38.
679 .\" glibc 2.8 removed this defn from bits/sched.h
680 .TP
681 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
682 If
683 .B CLONE_SYSVSEM
684 is set, then the child and the calling process share
685 a single list of System\ V semaphore undo values (see
686 .BR semop (2)).
687 If this flag is not set, then the child has a separate undo list,
688 which is initially empty.
689 .TP
690 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
691 If
692 .B CLONE_THREAD
693 is set, the child is placed in the same thread group as the calling process.
694 To make the remainder of the discussion of
695 .B CLONE_THREAD
696 more readable, the term "thread" is used to refer to the
697 processes within a thread group.
698
699 Thread groups were a feature added in Linux 2.4 to support the
700 POSIX threads notion of a set of threads that share a single PID.
701 Internally, this shared PID is the so-called
702 thread group identifier (TGID) for the thread group.
703 Since Linux 2.4, calls to
704 .BR getpid (2)
705 return the TGID of the caller.
706
707 The threads within a group can be distinguished by their (system-wide)
708 unique thread IDs (TID).
709 A new thread's TID is available as the function result
710 returned to the caller of
711 .BR clone (),
712 and a thread can obtain
713 its own TID using
714 .BR gettid (2).
715
716 When a call is made to
717 .BR clone ()
718 without specifying
719 .BR CLONE_THREAD ,
720 then the resulting thread is placed in a new thread group
721 whose TGID is the same as the thread's TID.
722 This thread is the
723 .I leader
724 of the new thread group.
725
726 A new thread created with
727 .B CLONE_THREAD
728 has the same parent process as the caller of
729 .BR clone ()
730 (i.e., like
731 .BR CLONE_PARENT ),
732 so that calls to
733 .BR getppid (2)
734 return the same value for all of the threads in a thread group.
735 When a
736 .B CLONE_THREAD
737 thread terminates, the thread that created it using
738 .BR clone ()
739 is not sent a
740 .B SIGCHLD
741 (or other termination) signal;
742 nor can the status of such a thread be obtained
743 using
744 .BR wait (2).
745 (The thread is said to be
746 .IR detached .)
747
748 After all of the threads in a thread group terminate
749 the parent process of the thread group is sent a
750 .B SIGCHLD
751 (or other termination) signal.
752
753 If any of the threads in a thread group performs an
754 .BR execve (2),
755 then all threads other than the thread group leader are terminated,
756 and the new program is executed in the thread group leader.
757
758 If one of the threads in a thread group creates a child using
759 .BR fork (2),
760 then any thread in the group can
761 .BR wait (2)
762 for that child.
763
764 Since Linux 2.5.35,
765 .I flags
766 must also include
767 .B CLONE_SIGHAND
768 if
769 .B CLONE_THREAD
770 is specified
771 (and note that, since Linux 2.6.0-test6,
772 .BR CLONE_SIGHAND
773 also requires
774 .BR CLONE_VM
775 to be included).
776
777 Signals may be sent to a thread group as a whole (i.e., a TGID) using
778 .BR kill (2),
779 or to a specific thread (i.e., TID) using
780 .BR tgkill (2).
781
782 Signal dispositions and actions are process-wide:
783 if an unhandled signal is delivered to a thread, then
784 it will affect (terminate, stop, continue, be ignored in)
785 all members of the thread group.
786
787 Each thread has its own signal mask, as set by
788 .BR sigprocmask (2),
789 but signals can be pending either: for the whole process
790 (i.e., deliverable to any member of the thread group),
791 when sent with
792 .BR kill (2);
793 or for an individual thread, when sent with
794 .BR tgkill (2).
795 A call to
796 .BR sigpending (2)
797 returns a signal set that is the union of the signals pending for the
798 whole process and the signals that are pending for the calling thread.
799
800 If
801 .BR kill (2)
802 is used to send a signal to a thread group,
803 and the thread group has installed a handler for the signal, then
804 the handler will be invoked in exactly one, arbitrarily selected
805 member of the thread group that has not blocked the signal.
806 If multiple threads in a group are waiting to accept the same signal using
807 .BR sigwaitinfo (2),
808 the kernel will arbitrarily select one of these threads
809 to receive a signal sent using
810 .BR kill (2).
811 .TP
812 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
813 If
814 .B CLONE_UNTRACED
815 is specified, then a tracing process cannot force
816 .B CLONE_PTRACE
817 on this child process.
818 .TP
819 .BR CLONE_VFORK " (since Linux 2.2)"
820 If
821 .B CLONE_VFORK
822 is set, the execution of the calling process is suspended
823 until the child releases its virtual memory
824 resources via a call to
825 .BR execve (2)
826 or
827 .BR _exit (2)
828 (as with
829 .BR vfork (2)).
830
831 If
832 .B CLONE_VFORK
833 is not set, then both the calling process and the child are schedulable
834 after the call, and an application should not rely on execution occurring
835 in any particular order.
836 .TP
837 .BR CLONE_VM " (since Linux 2.0)"
838 If
839 .B CLONE_VM
840 is set, the calling process and the child process run in the same memory
841 space.
842 In particular, memory writes performed by the calling process
843 or by the child process are also visible in the other process.
844 Moreover, any memory mapping or unmapping performed with
845 .BR mmap (2)
846 or
847 .BR munmap (2)
848 by the child or calling process also affects the other process.
849
850 If
851 .B CLONE_VM
852 is not set, the child process runs in a separate copy of the memory
853 space of the calling process at the time of
854 .BR clone ().
855 Memory writes or file mappings/unmappings performed by one of the
856 processes do not affect the other, as with
857 .BR fork (2).
858 .SS C library/kernel ABI differences
859 The raw
860 .BR clone ()
861 system call corresponds more closely to
862 .BR fork (2)
863 in that execution in the child continues from the point of the
864 call.
865 As such, the
866 .I fn
867 and
868 .I arg
869 arguments of the
870 .BR clone ()
871 wrapper function are omitted.
872 Furthermore, the argument order changes.
873 The raw system call interface on x86 and many other architectures is roughly:
874 .in +4
875 .nf
876
877 .BI "long clone(unsigned long " flags ", void *" child_stack ,
878 .BI " void *" ptid ", void *" ctid ,
879 .BI " struct pt_regs *" regs );
880
881 .fi
882 .in
883 Another difference for the raw system call is that the
884 .I child_stack
885 argument may be zero, in which case copy-on-write semantics ensure that the
886 child gets separate copies of stack pages when either process modifies
887 the stack.
888 In this case, for correct operation, the
889 .B CLONE_VM
890 option should not be specified.
891
892 For some architectures, the order of the arguments for the system call
893 differs from that shown above.
894 On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
895 and MIPS architectures,
896 the order of the fourth and fifth arguments is reversed.
897 On the cris and s390 architectures,
898 the order of the first and second arguments is reversed.
899 .SS blackfin, m68k, and sparc
900 The argument-passing conventions on
901 blackfin, m68k, and sparc are different from the descriptions above.
902 For details, see the kernel (and glibc) source.
903 .SS ia64
904 On ia64, a different interface is used:
905 .nf
906
907 .BI "int __clone2(int (*" "fn" ")(void *), "
908 .BI " void *" child_stack_base ", size_t " stack_size ,
909 .BI " int " flags ", void *" "arg" ", ... "
910 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
911 ", pid_t *" ctid " */ );"
912 .fi
913 .PP
914 The prototype shown above is for the glibc wrapper function;
915 the raw system call interface has no
916 .I fn
917 or
918 .I arg
919 argument, and changes the order of the arguments so that
920 .I flags
921 is the first argument, and
922 .I tls
923 is the last argument.
924 .PP
925 .BR __clone2 ()
926 operates in the same way as
927 .BR clone (),
928 except that
929 .I child_stack_base
930 points to the lowest address of the child's stack area,
931 and
932 .I stack_size
933 specifies the size of the stack pointed to by
934 .IR child_stack_base .
935 .SS Linux 2.4 and earlier
936 In Linux 2.4 and earlier,
937 .BR clone ()
938 does not take arguments
939 .IR ptid ,
940 .IR tls ,
941 and
942 .IR ctid .
943 .SH RETURN VALUE
944 .\" gettid(2) returns current->pid;
945 .\" getpid(2) returns current->tgid;
946 On success, the thread ID of the child process is returned
947 in the caller's thread of execution.
948 On failure, \-1 is returned
949 in the caller's context, no child process will be created, and
950 .I errno
951 will be set appropriately.
952 .SH ERRORS
953 .TP
954 .B EAGAIN
955 Too many processes are already running; see
956 .BR fork (2).
957 .TP
958 .B EINVAL
959 .B CLONE_SIGHAND
960 was specified, but
961 .B CLONE_VM
962 was not.
963 (Since Linux 2.6.0-test6.)
964 .TP
965 .B EINVAL
966 .B CLONE_THREAD
967 was specified, but
968 .B CLONE_SIGHAND
969 was not.
970 (Since Linux 2.5.35.)
971 .\" .TP
972 .\" .B EINVAL
973 .\" Precisely one of
974 .\" .B CLONE_DETACHED
975 .\" and
976 .\" .B CLONE_THREAD
977 .\" was specified.
978 .\" (Since Linux 2.6.0-test6.)
979 .TP
980 .B EINVAL
981 Both
982 .B CLONE_FS
983 and
984 .B CLONE_NEWNS
985 were specified in
986 .IR flags .
987 .TP
988 .B EINVAL
989 Both
990 .B CLONE_NEWIPC
991 and
992 .B CLONE_SYSVSEM
993 were specified in
994 .IR flags .
995 .TP
996 .B EINVAL
997 Both
998 .BR CLONE_NEWPID
999 and
1000 .BR CLONE_THREAD
1001 were specified in
1002 .IR flags .
1003 .TP
1004 .B EINVAL
1005 Returned by
1006 .BR clone ()
1007 when a zero value is specified for
1008 .IR child_stack .
1009 .TP
1010 .B EINVAL
1011 .BR CLONE_NEWIPC
1012 was specified in
1013 .IR flags ,
1014 but the kernel was not configured with the
1015 .B CONFIG_SYSVIPC
1016 and
1017 .BR CONFIG_IPC_NS
1018 options.
1019 .TP
1020 .B EINVAL
1021 .BR CLONE_NEWNET
1022 was specified in
1023 .IR flags ,
1024 but the kernel was not configured with the
1025 .B CONFIG_NET_NS
1026 option.
1027 .TP
1028 .B EINVAL
1029 .BR CLONE_NEWPID
1030 was specified in
1031 .IR flags ,
1032 but the kernel was not configured with the
1033 .B CONFIG_PID_NS
1034 option.
1035 .TP
1036 .B EINVAL
1037 .BR CLONE_NEWUTS
1038 was specified in
1039 .IR flags ,
1040 but the kernel was not configured with the
1041 .B CONFIG_UTS
1042 option.
1043 .TP
1044 .B ENOMEM
1045 Cannot allocate sufficient memory to allocate a task structure for the
1046 child, or to copy those parts of the caller's context that need to be
1047 copied.
1048 .TP
1049 .B EPERM
1050 .BR CLONE_NEWIPC ,
1051 .BR CLONE_NEWNET ,
1052 .BR CLONE_NEWNS ,
1053 .BR CLONE_NEWPID ,
1054 or
1055 .BR CLONE_NEWUTS
1056 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1057 .TP
1058 .B EPERM
1059 .B CLONE_PID
1060 was specified by a process other than process 0.
1061 .SH CONFORMING TO
1062 .BR clone ()
1063 is Linux-specific and should not be used in programs
1064 intended to be portable.
1065 .SH NOTES
1066 In the kernel 2.4.x series,
1067 .B CLONE_THREAD
1068 generally does not make the parent of the new thread the same
1069 as the parent of the calling process.
1070 However, for kernel versions 2.4.7 to 2.4.18 the
1071 .B CLONE_THREAD
1072 flag implied the
1073 .B CLONE_PARENT
1074 flag (as in kernel 2.6).
1075
1076 For a while there was
1077 .B CLONE_DETACHED
1078 (introduced in 2.5.32):
1079 parent wants no child-exit signal.
1080 In 2.6.2 the need to give this
1081 together with
1082 .B CLONE_THREAD
1083 disappeared.
1084 This flag is still defined, but has no effect.
1085
1086 On i386,
1087 .BR clone ()
1088 should not be called through vsyscall, but directly through
1089 .IR "int $0x80" .
1090 .SH BUGS
1091 Versions of the GNU C library that include the NPTL threading library
1092 contain a wrapper function for
1093 .BR getpid (2)
1094 that performs caching of PIDs.
1095 This caching relies on support in the glibc wrapper for
1096 .BR clone (),
1097 but as currently implemented,
1098 the cache may not be up to date in some circumstances.
1099 In particular,
1100 if a signal is delivered to the child immediately after the
1101 .BR clone ()
1102 call, then a call to
1103 .BR getpid (2)
1104 in a handler for the signal may return the PID
1105 of the calling process ("the parent"),
1106 if the clone wrapper has not yet had a chance to update the PID
1107 cache in the child.
1108 (This discussion ignores the case where the child was created using
1109 .BR CLONE_THREAD ,
1110 when
1111 .BR getpid (2)
1112 .I should
1113 return the same value in the child and in the process that called
1114 .BR clone (),
1115 since the caller and the child are in the same thread group.
1116 The stale-cache problem also does not occur if the
1117 .I flags
1118 argument includes
1119 .BR CLONE_VM .)
1120 To get the truth, it may be necessary to use code such as the following:
1121 .nf
1122
1123 #include <syscall.h>
1124
1125 pid_t mypid;
1126
1127 mypid = syscall(SYS_getpid);
1128 .fi
1129 .\" See also the following bug reports
1130 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1131 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1132 .SH EXAMPLE
1133 The following program demonstrates the use of
1134 .BR clone ()
1135 to create a child process that executes in a separate UTS namespace.
1136 The child changes the hostname in its UTS namespace.
1137 Both parent and child then display the system hostname,
1138 making it possible to see that the hostname
1139 differs in the UTS namespaces of the parent and child.
1140 For an example of the use of this program, see
1141 .BR setns (2).
1142 .SS Program source
1143 .nf
1144 #define _GNU_SOURCE
1145 #include <sys/wait.h>
1146 #include <sys/utsname.h>
1147 #include <sched.h>
1148 #include <string.h>
1149 #include <stdio.h>
1150 #include <stdlib.h>
1151 #include <unistd.h>
1152
1153 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1154 } while (0)
1155
1156 static int /* Start function for cloned child */
1157 childFunc(void *arg)
1158 {
1159 struct utsname uts;
1160
1161 /* Change hostname in UTS namespace of child */
1162
1163 if (sethostname(arg, strlen(arg)) == \-1)
1164 errExit("sethostname");
1165
1166 /* Retrieve and display hostname */
1167
1168 if (uname(&uts) == \-1)
1169 errExit("uname");
1170 printf("uts.nodename in child: %s\\n", uts.nodename);
1171
1172 /* Keep the namespace open for a while, by sleeping.
1173 This allows some experimentation\-\-for example, another
1174 process might join the namespace. */
1175
1176 sleep(200);
1177
1178 return 0; /* Child terminates now */
1179 }
1180
1181 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1182
1183 int
1184 main(int argc, char *argv[])
1185 {
1186 char *stack; /* Start of stack buffer */
1187 char *stackTop; /* End of stack buffer */
1188 pid_t pid;
1189 struct utsname uts;
1190
1191 if (argc < 2) {
1192 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1193 exit(EXIT_SUCCESS);
1194 }
1195
1196 /* Allocate stack for child */
1197
1198 stack = malloc(STACK_SIZE);
1199 if (stack == NULL)
1200 errExit("malloc");
1201 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1202
1203 /* Create child that has its own UTS namespace;
1204 child commences execution in childFunc() */
1205
1206 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1207 if (pid == \-1)
1208 errExit("clone");
1209 printf("clone() returned %ld\\n", (long) pid);
1210
1211 /* Parent falls through to here */
1212
1213 sleep(1); /* Give child time to change its hostname */
1214
1215 /* Display hostname in parent\(aqs UTS namespace. This will be
1216 different from hostname in child\(aqs UTS namespace. */
1217
1218 if (uname(&uts) == \-1)
1219 errExit("uname");
1220 printf("uts.nodename in parent: %s\\n", uts.nodename);
1221
1222 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1223 errExit("waitpid");
1224 printf("child has terminated\\n");
1225
1226 exit(EXIT_SUCCESS);
1227 }
1228 .fi
1229 .SH SEE ALSO
1230 .BR fork (2),
1231 .BR futex (2),
1232 .BR getpid (2),
1233 .BR gettid (2),
1234 .BR kcmp (2),
1235 .BR set_thread_area (2),
1236 .BR set_tid_address (2),
1237 .BR setns (2),
1238 .BR tkill (2),
1239 .BR unshare (2),
1240 .BR wait (2),
1241 .BR proc (5),
1242 .BR capabilities (7),
1243 .BR pthreads (7)