]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: Note that child_stack can be NULL when using the raw system call
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2017-09-15 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48 .PP
49 .B #define _GNU_SOURCE
50 .B #include <sched.h>
51 .PP
52 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53 .BI " int " flags ", void *" "arg" ", ... "
54 .BI " /* pid_t *" ptid ", void *" newtls \
55 ", pid_t *" ctid " */ );"
56 .PP
57 /* For the prototype of the raw system call, see NOTES */
58 .fi
59 .SH DESCRIPTION
60 .BR clone ()
61 creates a new process, in a manner similar to
62 .BR fork (2).
63 .PP
64 This page describes both the glibc
65 .BR clone ()
66 wrapper function and the underlying system call on which it is based.
67 The main text describes the wrapper function;
68 the differences for the raw system call
69 are described toward the end of this page.
70 .PP
71 Unlike
72 .BR fork (2),
73 .BR clone ()
74 allows the child process to share parts of its execution context with
75 the calling process, such as the memory space, the table of file
76 descriptors, and the table of signal handlers.
77 (Note that on this manual
78 page, "calling process" normally corresponds to "parent process".
79 But see the description of
80 .B CLONE_PARENT
81 below.)
82 .PP
83 One use of
84 .BR clone ()
85 is to implement threads: multiple threads of control in a program that
86 run concurrently in a shared memory space.
87 .PP
88 When the child process is created with
89 .BR clone (),
90 it executes the function
91 .IR fn ( arg ).
92 (This differs from
93 .BR fork (2),
94 where execution continues in the child from the point
95 of the
96 .BR fork (2)
97 call.)
98 The
99 .I fn
100 argument is a pointer to a function that is called by the child
101 process at the beginning of its execution.
102 The
103 .I arg
104 argument is passed to the
105 .I fn
106 function.
107 .PP
108 When the
109 .IR fn ( arg )
110 function application returns, the child process terminates.
111 The integer returned by
112 .I fn
113 is the exit code for the child process.
114 The child process may also terminate explicitly by calling
115 .BR exit (2)
116 or after receiving a fatal signal.
117 .PP
118 The
119 .I child_stack
120 argument specifies the location of the stack used by the child process.
121 Since the child and calling process may share memory,
122 it is not possible for the child process to execute in the
123 same stack as the calling process.
124 The calling process must therefore
125 set up memory space for the child stack and pass a pointer to this
126 space to
127 .BR clone ().
128 Stacks grow downward on all processors that run Linux
129 (except the HP PA processors), so
130 .I child_stack
131 usually points to the topmost address of the memory space set up for
132 the child stack.
133 .PP
134 The low byte of
135 .I flags
136 contains the number of the
137 .I "termination signal"
138 sent to the parent when the child dies.
139 If this signal is specified as anything other than
140 .BR SIGCHLD ,
141 then the parent process must specify the
142 .B __WALL
143 or
144 .B __WCLONE
145 options when waiting for the child with
146 .BR wait (2).
147 If no signal is specified, then the parent process is not signaled
148 when the child terminates.
149 .PP
150 .I flags
151 may also be bitwise-or'ed with zero or more of the following constants,
152 in order to specify what is shared between the calling process
153 and the child process:
154 .TP
155 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
156 Clear (zero) the child thread ID at the location
157 .I ctid
158 in child memory when the child exits, and do a wakeup on the futex
159 at that address.
160 The address involved may be changed by the
161 .BR set_tid_address (2)
162 system call.
163 This is used by threading libraries.
164 .TP
165 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
166 Store the child thread ID at the location
167 .I ctid
168 in the child's memory.
169 The store operation completes before
170 .BR clone ()
171 returns control to user space.
172 .TP
173 .BR CLONE_FILES " (since Linux 2.0)"
174 If
175 .B CLONE_FILES
176 is set, the calling process and the child process share the same file
177 descriptor table.
178 Any file descriptor created by the calling process or by the child
179 process is also valid in the other process.
180 Similarly, if one of the processes closes a file descriptor,
181 or changes its associated flags (using the
182 .BR fcntl (2)
183 .B F_SETFD
184 operation), the other process is also affected.
185 If a process sharing a file descriptor table calls
186 .BR execve (2),
187 its file descriptor table is duplicated (unshared).
188 .IP
189 If
190 .B CLONE_FILES
191 is not set, the child process inherits a copy of all file descriptors
192 opened in the calling process at the time of
193 .BR clone ().
194 Subsequent operations that open or close file descriptors,
195 or change file descriptor flags,
196 performed by either the calling
197 process or the child process do not affect the other process.
198 Note, however,
199 that the duplicated file descriptors in the child refer to the same open file
200 descriptions as the corresponding file descriptors in the calling process,
201 and thus share file offsets and file status flags (see
202 .BR open (2)).
203 .TP
204 .BR CLONE_FS " (since Linux 2.0)"
205 If
206 .B CLONE_FS
207 is set, the caller and the child process share the same filesystem
208 information.
209 This includes the root of the filesystem, the current
210 working directory, and the umask.
211 Any call to
212 .BR chroot (2),
213 .BR chdir (2),
214 or
215 .BR umask (2)
216 performed by the calling process or the child process also affects the
217 other process.
218 .IP
219 If
220 .B CLONE_FS
221 is not set, the child process works on a copy of the filesystem
222 information of the calling process at the time of the
223 .BR clone ()
224 call.
225 Calls to
226 .BR chroot (2),
227 .BR chdir (2),
228 .BR umask (2)
229 performed later by one of the processes do not affect the other process.
230 .TP
231 .BR CLONE_IO " (since Linux 2.6.25)"
232 If
233 .B CLONE_IO
234 is set, then the new process shares an I/O context with
235 the calling process.
236 If this flag is not set, then (as with
237 .BR fork (2))
238 the new process has its own I/O context.
239 .IP
240 .\" The following based on text from Jens Axboe
241 The I/O context is the I/O scope of the disk scheduler (i.e.,
242 what the I/O scheduler uses to model scheduling of a process's I/O).
243 If processes share the same I/O context,
244 they are treated as one by the I/O scheduler.
245 As a consequence, they get to share disk time.
246 For some I/O schedulers,
247 .\" the anticipatory and CFQ scheduler
248 if two processes share an I/O context,
249 they will be allowed to interleave their disk access.
250 If several threads are doing I/O on behalf of the same process
251 .RB ( aio_read (3),
252 for instance), they should employ
253 .BR CLONE_IO
254 to get better I/O performance.
255 .\" with CFQ and AS.
256 .IP
257 If the kernel is not configured with the
258 .B CONFIG_BLOCK
259 option, this flag is a no-op.
260 .TP
261 .BR CLONE_NEWCGROUP " (since Linux 4.6)"
262 Create the process in a new cgroup namespace.
263 If this flag is not set, then (as with
264 .BR fork (2))
265 the process is created in the same cgroup namespaces as the calling process.
266 This flag is intended for the implementation of containers.
267 .IP
268 For further information on cgroup namespaces, see
269 .BR cgroup_namespaces (7).
270 .IP
271 Only a privileged process
272 .RB ( CAP_SYS_ADMIN )
273 can employ
274 .BR CLONE_NEWCGROUP .
275 .\"
276 .TP
277 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
278 If
279 .B CLONE_NEWIPC
280 is set, then create the process in a new IPC namespace.
281 If this flag is not set, then (as with
282 .BR fork (2)),
283 the process is created in the same IPC namespace as
284 the calling process.
285 This flag is intended for the implementation of containers.
286 .IP
287 An IPC namespace provides an isolated view of System\ V IPC objects (see
288 .BR svipc (7))
289 and (since Linux 2.6.30)
290 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
291 .\" https://lwn.net/Articles/312232/
292 POSIX message queues
293 (see
294 .BR mq_overview (7)).
295 The common characteristic of these IPC mechanisms is that IPC
296 objects are identified by mechanisms other than filesystem
297 pathnames.
298 .IP
299 Objects created in an IPC namespace are visible to all other processes
300 that are members of that namespace,
301 but are not visible to processes in other IPC namespaces.
302 .IP
303 When an IPC namespace is destroyed
304 (i.e., when the last process that is a member of the namespace terminates),
305 all IPC objects in the namespace are automatically destroyed.
306 .IP
307 Only a privileged process
308 .RB ( CAP_SYS_ADMIN )
309 can employ
310 .BR CLONE_NEWIPC .
311 This flag can't be specified in conjunction with
312 .BR CLONE_SYSVSEM .
313 .IP
314 For further information on IPC namespaces, see
315 .BR namespaces (7).
316 .TP
317 .BR CLONE_NEWNET " (since Linux 2.6.24)"
318 (The implementation of this flag was completed only
319 by about kernel version 2.6.29.)
320 .IP
321 If
322 .B CLONE_NEWNET
323 is set, then create the process in a new network namespace.
324 If this flag is not set, then (as with
325 .BR fork (2))
326 the process is created in the same network namespace as
327 the calling process.
328 This flag is intended for the implementation of containers.
329 .IP
330 A network namespace provides an isolated view of the networking stack
331 (network device interfaces, IPv4 and IPv6 protocol stacks,
332 IP routing tables, firewall rules, the
333 .I /proc/net
334 and
335 .I /sys/class/net
336 directory trees, sockets, etc.).
337 A physical network device can live in exactly one
338 network namespace.
339 A virtual network device ("veth") pair provides a pipe-like abstraction
340 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
341 that can be used to create tunnels between network namespaces,
342 and can be used to create a bridge to a physical network device
343 in another namespace.
344 .IP
345 When a network namespace is freed
346 (i.e., when the last process in the namespace terminates),
347 its physical network devices are moved back to the
348 initial network namespace (not to the parent of the process).
349 For further information on network namespaces, see
350 .BR namespaces (7).
351 .IP
352 Only a privileged process
353 .RB ( CAP_SYS_ADMIN )
354 can employ
355 .BR CLONE_NEWNET .
356 .TP
357 .BR CLONE_NEWNS " (since Linux 2.4.19)"
358 If
359 .B CLONE_NEWNS
360 is set, the cloned child is started in a new mount namespace,
361 initialized with a copy of the namespace of the parent.
362 If
363 .B CLONE_NEWNS
364 is not set, the child lives in the same mount
365 namespace as the parent.
366 .IP
367 Only a privileged process
368 .RB ( CAP_SYS_ADMIN )
369 can employ
370 .BR CLONE_NEWNS .
371 It is not permitted to specify both
372 .B CLONE_NEWNS
373 and
374 .B CLONE_FS
375 .\" See https://lwn.net/Articles/543273/
376 in the same
377 .BR clone ()
378 call.
379 .IP
380 For further information on mount namespaces, see
381 .BR namespaces (7)
382 and
383 .BR mount_namespaces (7).
384 .TP
385 .BR CLONE_NEWPID " (since Linux 2.6.24)"
386 .\" This explanation draws a lot of details from
387 .\" http://lwn.net/Articles/259217/
388 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
389 .\" and Kir Kolyshkin <kir@openvz.org>
390 .\"
391 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
392 .\" Author: Pavel Emelyanov <xemul@openvz.org>
393 If
394 .B CLONE_NEWPID
395 is set, then create the process in a new PID namespace.
396 If this flag is not set, then (as with
397 .BR fork (2))
398 the process is created in the same PID namespace as
399 the calling process.
400 This flag is intended for the implementation of containers.
401 .IP
402 For further information on PID namespaces, see
403 .BR namespaces (7)
404 and
405 .BR pid_namespaces (7).
406 .IP
407 Only a privileged process
408 .RB ( CAP_SYS_ADMIN )
409 can employ
410 .BR CLONE_NEWPID .
411 This flag can't be specified in conjunction with
412 .BR CLONE_THREAD
413 or
414 .BR CLONE_PARENT .
415 .TP
416 .BR CLONE_NEWUSER
417 (This flag first became meaningful for
418 .BR clone ()
419 in Linux 2.6.23,
420 the current
421 .BR clone ()
422 semantics were merged in Linux 3.5,
423 and the final pieces to make the user namespaces completely usable were
424 merged in Linux 3.8.)
425 .IP
426 If
427 .B CLONE_NEWUSER
428 is set, then create the process in a new user namespace.
429 If this flag is not set, then (as with
430 .BR fork (2))
431 the process is created in the same user namespace as the calling process.
432 .IP
433 For further information on user namespaces, see
434 .BR namespaces (7)
435 and
436 .BR user_namespaces (7)
437 .IP
438 Before Linux 3.8, use of
439 .BR CLONE_NEWUSER
440 required that the caller have three capabilities:
441 .BR CAP_SYS_ADMIN ,
442 .BR CAP_SETUID ,
443 and
444 .BR CAP_SETGID .
445 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
446 Starting with Linux 3.8,
447 no privileges are needed to create a user namespace.
448 .IP
449 This flag can't be specified in conjunction with
450 .BR CLONE_THREAD
451 or
452 .BR CLONE_PARENT .
453 For security reasons,
454 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
455 .\" https://lwn.net/Articles/543273/
456 .\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
457 .\" were, for practical purposes, unusable in earlier 3.8.x because of the
458 .\" various filesystems that didn't support userns.
459 .BR CLONE_NEWUSER
460 cannot be specified in conjunction with
461 .BR CLONE_FS .
462 .IP
463 For further information on user namespaces, see
464 .BR user_namespaces (7).
465 .TP
466 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
467 If
468 .B CLONE_NEWUTS
469 is set, then create the process in a new UTS namespace,
470 whose identifiers are initialized by duplicating the identifiers
471 from the UTS namespace of the calling process.
472 If this flag is not set, then (as with
473 .BR fork (2))
474 the process is created in the same UTS namespace as
475 the calling process.
476 This flag is intended for the implementation of containers.
477 .IP
478 A UTS namespace is the set of identifiers returned by
479 .BR uname (2);
480 among these, the domain name and the hostname can be modified by
481 .BR setdomainname (2)
482 and
483 .BR sethostname (2),
484 respectively.
485 Changes made to the identifiers in a UTS namespace
486 are visible to all other processes in the same namespace,
487 but are not visible to processes in other UTS namespaces.
488 .IP
489 Only a privileged process
490 .RB ( CAP_SYS_ADMIN )
491 can employ
492 .BR CLONE_NEWUTS .
493 .IP
494 For further information on UTS namespaces, see
495 .BR namespaces (7).
496 .TP
497 .BR CLONE_PARENT " (since Linux 2.3.12)"
498 If
499 .B CLONE_PARENT
500 is set, then the parent of the new child (as returned by
501 .BR getppid (2))
502 will be the same as that of the calling process.
503 .IP
504 If
505 .B CLONE_PARENT
506 is not set, then (as with
507 .BR fork (2))
508 the child's parent is the calling process.
509 .IP
510 Note that it is the parent process, as returned by
511 .BR getppid (2),
512 which is signaled when the child terminates, so that
513 if
514 .B CLONE_PARENT
515 is set, then the parent of the calling process, rather than the
516 calling process itself, will be signaled.
517 .TP
518 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
519 Store the child thread ID at the location
520 .I ptid
521 in the parent's memory.
522 (In Linux 2.5.32-2.5.48 there was a flag
523 .B CLONE_SETTID
524 that did this.)
525 The store operation completes before
526 .BR clone ()
527 returns control to user space.
528 .TP
529 .BR CLONE_PID " (obsolete)"
530 If
531 .B CLONE_PID
532 is set, the child process is created with the same process ID as
533 the calling process.
534 This is good for hacking the system, but otherwise
535 of not much use.
536 Since 2.3.21 this flag can be
537 specified only by the system boot process (PID 0).
538 It disappeared in Linux 2.5.16.
539 Since then, the kernel silently ignores it without error.
540 .TP
541 .BR CLONE_PTRACE " (since Linux 2.2)"
542 If
543 .B CLONE_PTRACE
544 is specified, and the calling process is being traced,
545 then trace the child also (see
546 .BR ptrace (2)).
547 .TP
548 .BR CLONE_SETTLS " (since Linux 2.5.32)"
549 The TLS (Thread Local Storage) descriptor is set to
550 .I newtls.
551 .IP
552 The interpretation of
553 .I newtls
554 and the resulting effect is architecture dependent.
555 On x86,
556 .I newtls
557 is interpreted as a
558 .IR "struct user_desc *"
559 (see
560 .BR set_thread_area (2)).
561 On x86_64 it is the new value to be set for the %fs base register
562 (see the
563 .I ARCH_SET_FS
564 argument to
565 .BR arch_prctl (2)).
566 On architectures with a dedicated TLS register, it is the new value
567 of that register.
568 .TP
569 .BR CLONE_SIGHAND " (since Linux 2.0)"
570 If
571 .B CLONE_SIGHAND
572 is set, the calling process and the child process share the same table of
573 signal handlers.
574 If the calling process or child process calls
575 .BR sigaction (2)
576 to change the behavior associated with a signal, the behavior is
577 changed in the other process as well.
578 However, the calling process and child
579 processes still have distinct signal masks and sets of pending
580 signals.
581 So, one of them may block or unblock some signals using
582 .BR sigprocmask (2)
583 without affecting the other process.
584 .IP
585 If
586 .B CLONE_SIGHAND
587 is not set, the child process inherits a copy of the signal handlers
588 of the calling process at the time
589 .BR clone ()
590 is called.
591 Calls to
592 .BR sigaction (2)
593 performed later by one of the processes have no effect on the other
594 process.
595 .IP
596 Since Linux 2.6.0-test6,
597 .I flags
598 must also include
599 .B CLONE_VM
600 if
601 .B CLONE_SIGHAND
602 is specified
603 .TP
604 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
605 If
606 .B CLONE_STOPPED
607 is set, then the child is initially stopped (as though it was sent a
608 .B SIGSTOP
609 signal), and must be resumed by sending it a
610 .B SIGCONT
611 signal.
612 .IP
613 This flag was
614 .I deprecated
615 from Linux 2.6.25 onward,
616 and was
617 .I removed
618 altogether in Linux 2.6.38.
619 Since then, the kernel silently ignores it without error.
620 .\" glibc 2.8 removed this defn from bits/sched.h
621 Starting with Linux 4.6, the same bit was reused for the
622 .BR CLONE_NEWCGROUP
623 flag.
624 .TP
625 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
626 If
627 .B CLONE_SYSVSEM
628 is set, then the child and the calling process share
629 a single list of System V semaphore adjustment
630 .RI ( semadj )
631 values (see
632 .BR semop (2)).
633 In this case, the shared list accumulates
634 .I semadj
635 values across all processes sharing the list,
636 and semaphore adjustments are performed only when the last process
637 that is sharing the list terminates (or ceases sharing the list using
638 .BR unshare (2)).
639 If this flag is not set, then the child has a separate
640 .I semadj
641 list that is initially empty.
642 .TP
643 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
644 If
645 .B CLONE_THREAD
646 is set, the child is placed in the same thread group as the calling process.
647 To make the remainder of the discussion of
648 .B CLONE_THREAD
649 more readable, the term "thread" is used to refer to the
650 processes within a thread group.
651 .IP
652 Thread groups were a feature added in Linux 2.4 to support the
653 POSIX threads notion of a set of threads that share a single PID.
654 Internally, this shared PID is the so-called
655 thread group identifier (TGID) for the thread group.
656 Since Linux 2.4, calls to
657 .BR getpid (2)
658 return the TGID of the caller.
659 .IP
660 The threads within a group can be distinguished by their (system-wide)
661 unique thread IDs (TID).
662 A new thread's TID is available as the function result
663 returned to the caller of
664 .BR clone (),
665 and a thread can obtain
666 its own TID using
667 .BR gettid (2).
668 .IP
669 When a call is made to
670 .BR clone ()
671 without specifying
672 .BR CLONE_THREAD ,
673 then the resulting thread is placed in a new thread group
674 whose TGID is the same as the thread's TID.
675 This thread is the
676 .I leader
677 of the new thread group.
678 .IP
679 A new thread created with
680 .B CLONE_THREAD
681 has the same parent process as the caller of
682 .BR clone ()
683 (i.e., like
684 .BR CLONE_PARENT ),
685 so that calls to
686 .BR getppid (2)
687 return the same value for all of the threads in a thread group.
688 When a
689 .B CLONE_THREAD
690 thread terminates, the thread that created it using
691 .BR clone ()
692 is not sent a
693 .B SIGCHLD
694 (or other termination) signal;
695 nor can the status of such a thread be obtained
696 using
697 .BR wait (2).
698 (The thread is said to be
699 .IR detached .)
700 .IP
701 After all of the threads in a thread group terminate
702 the parent process of the thread group is sent a
703 .B SIGCHLD
704 (or other termination) signal.
705 .IP
706 If any of the threads in a thread group performs an
707 .BR execve (2),
708 then all threads other than the thread group leader are terminated,
709 and the new program is executed in the thread group leader.
710 .IP
711 If one of the threads in a thread group creates a child using
712 .BR fork (2),
713 then any thread in the group can
714 .BR wait (2)
715 for that child.
716 .IP
717 Since Linux 2.5.35,
718 .I flags
719 must also include
720 .B CLONE_SIGHAND
721 if
722 .B CLONE_THREAD
723 is specified
724 (and note that, since Linux 2.6.0-test6,
725 .BR CLONE_SIGHAND
726 also requires
727 .BR CLONE_VM
728 to be included).
729 .IP
730 Signals may be sent to a thread group as a whole (i.e., a TGID) using
731 .BR kill (2),
732 or to a specific thread (i.e., TID) using
733 .BR tgkill (2).
734 .IP
735 Signal dispositions and actions are process-wide:
736 if an unhandled signal is delivered to a thread, then
737 it will affect (terminate, stop, continue, be ignored in)
738 all members of the thread group.
739 .IP
740 Each thread has its own signal mask, as set by
741 .BR sigprocmask (2),
742 but signals can be pending either: for the whole process
743 (i.e., deliverable to any member of the thread group),
744 when sent with
745 .BR kill (2);
746 or for an individual thread, when sent with
747 .BR tgkill (2).
748 A call to
749 .BR sigpending (2)
750 returns a signal set that is the union of the signals pending for the
751 whole process and the signals that are pending for the calling thread.
752 .IP
753 If
754 .BR kill (2)
755 is used to send a signal to a thread group,
756 and the thread group has installed a handler for the signal, then
757 the handler will be invoked in exactly one, arbitrarily selected
758 member of the thread group that has not blocked the signal.
759 If multiple threads in a group are waiting to accept the same signal using
760 .BR sigwaitinfo (2),
761 the kernel will arbitrarily select one of these threads
762 to receive a signal sent using
763 .BR kill (2).
764 .TP
765 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
766 If
767 .B CLONE_UNTRACED
768 is specified, then a tracing process cannot force
769 .B CLONE_PTRACE
770 on this child process.
771 .TP
772 .BR CLONE_VFORK " (since Linux 2.2)"
773 If
774 .B CLONE_VFORK
775 is set, the execution of the calling process is suspended
776 until the child releases its virtual memory
777 resources via a call to
778 .BR execve (2)
779 or
780 .BR _exit (2)
781 (as with
782 .BR vfork (2)).
783 .IP
784 If
785 .B CLONE_VFORK
786 is not set, then both the calling process and the child are schedulable
787 after the call, and an application should not rely on execution occurring
788 in any particular order.
789 .TP
790 .BR CLONE_VM " (since Linux 2.0)"
791 If
792 .B CLONE_VM
793 is set, the calling process and the child process run in the same memory
794 space.
795 In particular, memory writes performed by the calling process
796 or by the child process are also visible in the other process.
797 Moreover, any memory mapping or unmapping performed with
798 .BR mmap (2)
799 or
800 .BR munmap (2)
801 by the child or calling process also affects the other process.
802 .IP
803 If
804 .B CLONE_VM
805 is not set, the child process runs in a separate copy of the memory
806 space of the calling process at the time of
807 .BR clone ().
808 Memory writes or file mappings/unmappings performed by one of the
809 processes do not affect the other, as with
810 .BR fork (2).
811 .SS C library/kernel differences
812 The raw
813 .BR clone ()
814 system call corresponds more closely to
815 .BR fork (2)
816 in that execution in the child continues from the point of the
817 call.
818 As such, the
819 .I fn
820 and
821 .I arg
822 arguments of the
823 .BR clone ()
824 wrapper function are omitted.
825 .PP
826 Unlike the glibc wrapper function, the raw
827 .BR clone ()
828 system call permits
829 .IR child_stack
830 to be specified as NULL,
831 with the meaning that the child uses the stack that was
832 duplicated from the parent.
833 (If the child
834 .I shares
835 the parent's memory because of the use of the
836 .BR CLONE_VM
837 flag, then chaos is likely to result if
838 .I child_stack
839 is specified as NULL.)
840 .PP
841 The order of the arguments also differs in the raw system call,
842 and there are variations in the arguments across architectures,
843 as detailed in the following paragraphs.
844 .PP
845 The raw system call interface on x86-64 and some other architectures
846 (including sh, tile, and alpha) is roughly:
847 .PP
848 .in +4
849 .EX
850 .BI "long clone(unsigned long " flags ", void *" child_stack ,
851 .BI " int *" ptid ", int *" ctid ,
852 .BI " unsigned long " newtls );
853 .EE
854 .in
855 .PP
856 On x86-32, and several other common architectures
857 (including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
858 and MIPS),
859 .\" CONFIG_CLONE_BACKWARDS
860 the order of the last two arguments is reversed:
861 .PP
862 .in +4
863 .EX
864 .BI "long clone(unsigned long " flags ", void *" child_stack ,
865 .BI " int *" ptid ", unsigned long " newtls ,
866 .BI " int *" ctid );
867 .EE
868 .in
869 .PP
870 On the cris and s390 architectures,
871 .\" CONFIG_CLONE_BACKWARDS2
872 the order of the first two arguments is reversed:
873 .PP
874 .in +4
875 .EX
876 .BI "long clone(void *" child_stack ", unsigned long " flags ,
877 .BI " int *" ptid ", int *" ctid ,
878 .BI " unsigned long " newtls );
879 .EE
880 .in
881 .PP
882 On the microblaze architecture,
883 .\" CONFIG_CLONE_BACKWARDS3
884 an additional argument is supplied:
885 .PP
886 .in +4
887 .EX
888 .BI "long clone(unsigned long " flags ", void *" child_stack ,
889 .BI " int " stack_size , "\fR /* Size of stack */"
890 .BI " int *" ptid ", int *" ctid ,
891 .BI " unsigned long " newtls );
892 .EE
893 .in
894 .PP
895 Another difference for the raw system call is that the
896 .I child_stack
897 argument may be zero, in which case copy-on-write semantics ensure that the
898 child gets separate copies of stack pages when either process modifies
899 the stack.
900 In this case, for correct operation, the
901 .B CLONE_VM
902 option should not be specified.
903 .\"
904 .SS blackfin, m68k, and sparc
905 .\" Mike Frysinger noted in a 2013 mail:
906 .\" these arches don't define __ARCH_WANT_SYS_CLONE:
907 .\" blackfin ia64 m68k sparc
908 The argument-passing conventions on
909 blackfin, m68k, and sparc are different from the descriptions above.
910 For details, see the kernel (and glibc) source.
911 .SS ia64
912 On ia64, a different interface is used:
913 .PP
914 .nf
915 .BI "int __clone2(int (*" "fn" ")(void *), "
916 .BI " void *" child_stack_base ", size_t " stack_size ,
917 .BI " int " flags ", void *" "arg" ", ... "
918 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
919 ", pid_t *" ctid " */ );"
920 .fi
921 .PP
922 The prototype shown above is for the glibc wrapper function;
923 the raw system call interface has no
924 .I fn
925 or
926 .I arg
927 argument, and changes the order of the arguments so that
928 .I flags
929 is the first argument, and
930 .I tls
931 is the last argument.
932 .PP
933 .BR __clone2 ()
934 operates in the same way as
935 .BR clone (),
936 except that
937 .I child_stack_base
938 points to the lowest address of the child's stack area,
939 and
940 .I stack_size
941 specifies the size of the stack pointed to by
942 .IR child_stack_base .
943 .SS Linux 2.4 and earlier
944 In Linux 2.4 and earlier,
945 .BR clone ()
946 does not take arguments
947 .IR ptid ,
948 .IR tls ,
949 and
950 .IR ctid .
951 .SH RETURN VALUE
952 .\" gettid(2) returns current->pid;
953 .\" getpid(2) returns current->tgid;
954 On success, the thread ID of the child process is returned
955 in the caller's thread of execution.
956 On failure, \-1 is returned
957 in the caller's context, no child process will be created, and
958 .I errno
959 will be set appropriately.
960 .SH ERRORS
961 .TP
962 .B EAGAIN
963 Too many processes are already running; see
964 .BR fork (2).
965 .TP
966 .B EINVAL
967 .B CLONE_SIGHAND
968 was specified, but
969 .B CLONE_VM
970 was not.
971 (Since Linux 2.6.0-test6.)
972 .TP
973 .B EINVAL
974 .B CLONE_THREAD
975 was specified, but
976 .B CLONE_SIGHAND
977 was not.
978 (Since Linux 2.5.35.)
979 .\" .TP
980 .\" .B EINVAL
981 .\" Precisely one of
982 .\" .B CLONE_DETACHED
983 .\" and
984 .\" .B CLONE_THREAD
985 .\" was specified.
986 .\" (Since Linux 2.6.0-test6.)
987 .TP
988 .B EINVAL
989 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
990 Both
991 .B CLONE_FS
992 and
993 .B CLONE_NEWNS
994 were specified in
995 .IR flags .
996 .TP
997 .BR EINVAL " (since Linux 3.9)"
998 Both
999 .B CLONE_NEWUSER
1000 and
1001 .B CLONE_FS
1002 were specified in
1003 .IR flags .
1004 .TP
1005 .B EINVAL
1006 Both
1007 .B CLONE_NEWIPC
1008 and
1009 .B CLONE_SYSVSEM
1010 were specified in
1011 .IR flags .
1012 .TP
1013 .B EINVAL
1014 One (or both) of
1015 .BR CLONE_NEWPID
1016 or
1017 .BR CLONE_NEWUSER
1018 and one (or both) of
1019 .BR CLONE_THREAD
1020 or
1021 .BR CLONE_PARENT
1022 were specified in
1023 .IR flags .
1024 .TP
1025 .B EINVAL
1026 Returned by the glibc
1027 .BR clone ()
1028 wrapper function when
1029 .IR fn
1030 or
1031 .IR child_stack
1032 is specified as NULL.
1033 .TP
1034 .B EINVAL
1035 .BR CLONE_NEWIPC
1036 was specified in
1037 .IR flags ,
1038 but the kernel was not configured with the
1039 .B CONFIG_SYSVIPC
1040 and
1041 .BR CONFIG_IPC_NS
1042 options.
1043 .TP
1044 .B EINVAL
1045 .BR CLONE_NEWNET
1046 was specified in
1047 .IR flags ,
1048 but the kernel was not configured with the
1049 .B CONFIG_NET_NS
1050 option.
1051 .TP
1052 .B EINVAL
1053 .BR CLONE_NEWPID
1054 was specified in
1055 .IR flags ,
1056 but the kernel was not configured with the
1057 .B CONFIG_PID_NS
1058 option.
1059 .TP
1060 .B EINVAL
1061 .BR CLONE_NEWUTS
1062 was specified in
1063 .IR flags ,
1064 but the kernel was not configured with the
1065 .B CONFIG_UTS
1066 option.
1067 .TP
1068 .B EINVAL
1069 .I child_stack
1070 is not aligned to a suitable boundary for this architecture.
1071 For example, on aarch64,
1072 .I child_stack
1073 must be a multiple of 16.
1074 .TP
1075 .B ENOMEM
1076 Cannot allocate sufficient memory to allocate a task structure for the
1077 child, or to copy those parts of the caller's context that need to be
1078 copied.
1079 .TP
1080 .BR ENOSPC " (since Linux 3.7)"
1081 .\" commit f2302505775fd13ba93f034206f1e2a587017929
1082 .B CLONE_NEWPID
1083 was specified in flags,
1084 but the limit on the nesting depth of PID namespaces
1085 would have been exceeded; see
1086 .BR pid_namespaces (7).
1087 .TP
1088 .BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1089 .B CLONE_NEWUSER
1090 was specified in
1091 .IR flags ,
1092 and the call would cause the limit on the number of
1093 nested user namespaces to be exceeded.
1094 See
1095 .BR user_namespaces (7).
1096 .IP
1097 From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1098 .BR EUSERS .
1099 .TP
1100 .BR ENOSPC " (since Linux 4.9)"
1101 One of the values in
1102 .I flags
1103 specified the creation of a new user namespace,
1104 but doing so would have caused the limit defined by the corresponding file in
1105 .IR /proc/sys/user
1106 to be exceeded.
1107 For further details, see
1108 .BR namespaces (7).
1109 .TP
1110 .B EPERM
1111 .BR CLONE_NEWCGROUP ,
1112 .BR CLONE_NEWIPC ,
1113 .BR CLONE_NEWNET ,
1114 .BR CLONE_NEWNS ,
1115 .BR CLONE_NEWPID ,
1116 or
1117 .BR CLONE_NEWUTS
1118 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1119 .TP
1120 .B EPERM
1121 .B CLONE_PID
1122 was specified by a process other than process 0.
1123 .TP
1124 .B EPERM
1125 .BR CLONE_NEWUSER
1126 was specified in
1127 .IR flags ,
1128 but either the effective user ID or the effective group ID of the caller
1129 does not have a mapping in the parent namespace (see
1130 .BR user_namespaces (7)).
1131 .TP
1132 .BR EPERM " (since Linux 3.9)"
1133 .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1134 .B CLONE_NEWUSER
1135 was specified in
1136 .I flags
1137 and the caller is in a chroot environment
1138 .\" FIXME What is the rationale for this restriction?
1139 (i.e., the caller's root directory does not match the root directory
1140 of the mount namespace in which it resides).
1141 .TP
1142 .BR ERESTARTNOINTR " (since Linux 2.6.17)"
1143 .\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1144 System call was interrupted by a signal and will be restarted.
1145 (This can be seen only during a trace.)
1146 .TP
1147 .BR EUSERS " (Linux 3.11 to Linux 4.8)"
1148 .B CLONE_NEWUSER
1149 was specified in
1150 .IR flags ,
1151 and the limit on the number of nested user namespaces would be exceeded.
1152 See the discussion of the
1153 .BR ENOSPC
1154 error above.
1155 .\" .SH VERSIONS
1156 .\" There is no entry for
1157 .\" .BR clone ()
1158 .\" in libc5.
1159 .\" glibc2 provides
1160 .\" .BR clone ()
1161 .\" as described in this manual page.
1162 .SH CONFORMING TO
1163 .BR clone ()
1164 is Linux-specific and should not be used in programs
1165 intended to be portable.
1166 .SH NOTES
1167 The
1168 .BR kcmp (2)
1169 system call can be used to test whether two processes share various
1170 resources such as a file descriptor table,
1171 System V semaphore undo operations, or a virtual address space.
1172 .PP
1173 .PP
1174 Handlers registered using
1175 .BR pthread_atfork (3)
1176 are not executed during a call to
1177 .BR clone ().
1178 .PP
1179 In the Linux 2.4.x series,
1180 .B CLONE_THREAD
1181 generally does not make the parent of the new thread the same
1182 as the parent of the calling process.
1183 However, for kernel versions 2.4.7 to 2.4.18 the
1184 .B CLONE_THREAD
1185 flag implied the
1186 .B CLONE_PARENT
1187 flag (as in Linux 2.6.0 and later).
1188 .PP
1189 For a while there was
1190 .B CLONE_DETACHED
1191 (introduced in 2.5.32):
1192 parent wants no child-exit signal.
1193 In Linux 2.6.2, the need to give this flag together with
1194 .B CLONE_THREAD
1195 disappeared.
1196 This flag is still defined, but has no effect.
1197 .PP
1198 On i386,
1199 .BR clone ()
1200 should not be called through vsyscall, but directly through
1201 .IR "int $0x80" .
1202 .SH BUGS
1203 GNU C library versions 2.3.4 up to and including 2.24
1204 contained a wrapper function for
1205 .BR getpid (2)
1206 that performed caching of PIDs.
1207 This caching relied on support in the glibc wrapper for
1208 .BR clone (),
1209 but limitations in the implementation
1210 meant that the cache was not up to date in some circumstances.
1211 In particular,
1212 if a signal was delivered to the child immediately after the
1213 .BR clone ()
1214 call, then a call to
1215 .BR getpid (2)
1216 in a handler for the signal could return the PID
1217 of the calling process ("the parent"),
1218 if the clone wrapper had not yet had a chance to update the PID
1219 cache in the child.
1220 (This discussion ignores the case where the child was created using
1221 .BR CLONE_THREAD ,
1222 when
1223 .BR getpid (2)
1224 .I should
1225 return the same value in the child and in the process that called
1226 .BR clone (),
1227 since the caller and the child are in the same thread group.
1228 The stale-cache problem also does not occur if the
1229 .I flags
1230 argument includes
1231 .BR CLONE_VM .)
1232 To get the truth, it was sometimes necessary to use code such as the following:
1233 .PP
1234 .in +4n
1235 .EX
1236 #include <syscall.h>
1237
1238 pid_t mypid;
1239
1240 mypid = syscall(SYS_getpid);
1241 .EE
1242 .in
1243 .\" See also the following bug reports
1244 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1245 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1246 .PP
1247 Because of the stale-cache problem, as well as other problems noted in
1248 .BR getpid (2),
1249 the PID caching feature was removed in glibc 2.25.
1250 .SH EXAMPLE
1251 The following program demonstrates the use of
1252 .BR clone ()
1253 to create a child process that executes in a separate UTS namespace.
1254 The child changes the hostname in its UTS namespace.
1255 Both parent and child then display the system hostname,
1256 making it possible to see that the hostname
1257 differs in the UTS namespaces of the parent and child.
1258 For an example of the use of this program, see
1259 .BR setns (2).
1260 .SS Program source
1261 .EX
1262 #define _GNU_SOURCE
1263 #include <sys/wait.h>
1264 #include <sys/utsname.h>
1265 #include <sched.h>
1266 #include <string.h>
1267 #include <stdio.h>
1268 #include <stdlib.h>
1269 #include <unistd.h>
1270
1271 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1272 } while (0)
1273
1274 static int /* Start function for cloned child */
1275 childFunc(void *arg)
1276 {
1277 struct utsname uts;
1278
1279 /* Change hostname in UTS namespace of child */
1280
1281 if (sethostname(arg, strlen(arg)) == \-1)
1282 errExit("sethostname");
1283
1284 /* Retrieve and display hostname */
1285
1286 if (uname(&uts) == \-1)
1287 errExit("uname");
1288 printf("uts.nodename in child: %s\\n", uts.nodename);
1289
1290 /* Keep the namespace open for a while, by sleeping.
1291 This allows some experimentation\-\-for example, another
1292 process might join the namespace. */
1293
1294 sleep(200);
1295
1296 return 0; /* Child terminates now */
1297 }
1298
1299 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1300
1301 int
1302 main(int argc, char *argv[])
1303 {
1304 char *stack; /* Start of stack buffer */
1305 char *stackTop; /* End of stack buffer */
1306 pid_t pid;
1307 struct utsname uts;
1308
1309 if (argc < 2) {
1310 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1311 exit(EXIT_SUCCESS);
1312 }
1313
1314 /* Allocate stack for child */
1315
1316 stack = malloc(STACK_SIZE);
1317 if (stack == NULL)
1318 errExit("malloc");
1319 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1320
1321 /* Create child that has its own UTS namespace;
1322 child commences execution in childFunc() */
1323
1324 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1325 if (pid == \-1)
1326 errExit("clone");
1327 printf("clone() returned %ld\\n", (long) pid);
1328
1329 /* Parent falls through to here */
1330
1331 sleep(1); /* Give child time to change its hostname */
1332
1333 /* Display hostname in parent\(aqs UTS namespace. This will be
1334 different from hostname in child\(aqs UTS namespace. */
1335
1336 if (uname(&uts) == \-1)
1337 errExit("uname");
1338 printf("uts.nodename in parent: %s\\n", uts.nodename);
1339
1340 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1341 errExit("waitpid");
1342 printf("child has terminated\\n");
1343
1344 exit(EXIT_SUCCESS);
1345 }
1346 .EE
1347 .SH SEE ALSO
1348 .BR fork (2),
1349 .BR futex (2),
1350 .BR getpid (2),
1351 .BR gettid (2),
1352 .BR kcmp (2),
1353 .BR set_thread_area (2),
1354 .BR set_tid_address (2),
1355 .BR setns (2),
1356 .BR tkill (2),
1357 .BR unshare (2),
1358 .BR wait (2),
1359 .BR capabilities (7),
1360 .BR namespaces (7),
1361 .BR pthreads (7)