]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: tfix
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2017-09-15 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48 .PP
49 .B #define _GNU_SOURCE
50 .B #include <sched.h>
51 .PP
52 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53 .BI " int " flags ", void *" "arg" ", ... "
54 .BI " /* pid_t *" ptid ", void *" newtls \
55 ", pid_t *" ctid " */ );"
56 .PP
57 /* For the prototype of the raw system call, see NOTES */
58 .fi
59 .SH DESCRIPTION
60 .BR clone ()
61 creates a new process, in a manner similar to
62 .BR fork (2).
63 .PP
64 This page describes both the glibc
65 .BR clone ()
66 wrapper function and the underlying system call on which it is based.
67 The main text describes the wrapper function;
68 the differences for the raw system call
69 are described toward the end of this page.
70 .PP
71 Unlike
72 .BR fork (2),
73 .BR clone ()
74 allows the child process to share parts of its execution context with
75 the calling process, such as the memory space, the table of file
76 descriptors, and the table of signal handlers.
77 (Note that on this manual
78 page, "calling process" normally corresponds to "parent process".
79 But see the description of
80 .B CLONE_PARENT
81 below.)
82 .PP
83 One use of
84 .BR clone ()
85 is to implement threads: multiple threads of control in a program that
86 run concurrently in a shared memory space.
87 .PP
88 When the child process is created with
89 .BR clone (),
90 it executes the function
91 .IR fn ( arg ).
92 (This differs from
93 .BR fork (2),
94 where execution continues in the child from the point
95 of the
96 .BR fork (2)
97 call.)
98 The
99 .I fn
100 argument is a pointer to a function that is called by the child
101 process at the beginning of its execution.
102 The
103 .I arg
104 argument is passed to the
105 .I fn
106 function.
107 .PP
108 When the
109 .IR fn ( arg )
110 function application returns, the child process terminates.
111 The integer returned by
112 .I fn
113 is the exit code for the child process.
114 The child process may also terminate explicitly by calling
115 .BR exit (2)
116 or after receiving a fatal signal.
117 .PP
118 The
119 .I child_stack
120 argument specifies the location of the stack used by the child process.
121 Since the child and calling process may share memory,
122 it is not possible for the child process to execute in the
123 same stack as the calling process.
124 The calling process must therefore
125 set up memory space for the child stack and pass a pointer to this
126 space to
127 .BR clone ().
128 Stacks grow downward on all processors that run Linux
129 (except the HP PA processors), so
130 .I child_stack
131 usually points to the topmost address of the memory space set up for
132 the child stack.
133 .PP
134 The low byte of
135 .I flags
136 contains the number of the
137 .I "termination signal"
138 sent to the parent when the child dies.
139 If this signal is specified as anything other than
140 .BR SIGCHLD ,
141 then the parent process must specify the
142 .B __WALL
143 or
144 .B __WCLONE
145 options when waiting for the child with
146 .BR wait (2).
147 If no signal is specified, then the parent process is not signaled
148 when the child terminates.
149 .PP
150 .I flags
151 may also be bitwise-or'ed with zero or more of the following constants,
152 in order to specify what is shared between the calling process
153 and the child process:
154 .TP
155 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
156 Clear (zero) the child thread ID at the location
157 .I ctid
158 in child memory when the child exits, and do a wakeup on the futex
159 at that address.
160 The address involved may be changed by the
161 .BR set_tid_address (2)
162 system call.
163 This is used by threading libraries.
164 .TP
165 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
166 Store the child thread ID at the location
167 .I ctid
168 in the child's memory.
169 The store operation completes before
170 .BR clone ()
171 returns control to user space.
172 .TP
173 .BR CLONE_FILES " (since Linux 2.0)"
174 If
175 .B CLONE_FILES
176 is set, the calling process and the child process share the same file
177 descriptor table.
178 Any file descriptor created by the calling process or by the child
179 process is also valid in the other process.
180 Similarly, if one of the processes closes a file descriptor,
181 or changes its associated flags (using the
182 .BR fcntl (2)
183 .B F_SETFD
184 operation), the other process is also affected.
185 If a process sharing a file descriptor table calls
186 .BR execve (2),
187 its file descriptor table is duplicated (unshared).
188 .IP
189 If
190 .B CLONE_FILES
191 is not set, the child process inherits a copy of all file descriptors
192 opened in the calling process at the time of
193 .BR clone ().
194 Subsequent operations that open or close file descriptors,
195 or change file descriptor flags,
196 performed by either the calling
197 process or the child process do not affect the other process.
198 Note, however,
199 that the duplicated file descriptors in the child refer to the same open file
200 descriptions as the corresponding file descriptors in the calling process,
201 and thus share file offsets and file status flags (see
202 .BR open (2)).
203 .TP
204 .BR CLONE_FS " (since Linux 2.0)"
205 If
206 .B CLONE_FS
207 is set, the caller and the child process share the same filesystem
208 information.
209 This includes the root of the filesystem, the current
210 working directory, and the umask.
211 Any call to
212 .BR chroot (2),
213 .BR chdir (2),
214 or
215 .BR umask (2)
216 performed by the calling process or the child process also affects the
217 other process.
218 .IP
219 If
220 .B CLONE_FS
221 is not set, the child process works on a copy of the filesystem
222 information of the calling process at the time of the
223 .BR clone ()
224 call.
225 Calls to
226 .BR chroot (2),
227 .BR chdir (2),
228 .BR umask (2)
229 performed later by one of the processes do not affect the other process.
230 .TP
231 .BR CLONE_IO " (since Linux 2.6.25)"
232 If
233 .B CLONE_IO
234 is set, then the new process shares an I/O context with
235 the calling process.
236 If this flag is not set, then (as with
237 .BR fork (2))
238 the new process has its own I/O context.
239 .IP
240 .\" The following based on text from Jens Axboe
241 The I/O context is the I/O scope of the disk scheduler (i.e.,
242 what the I/O scheduler uses to model scheduling of a process's I/O).
243 If processes share the same I/O context,
244 they are treated as one by the I/O scheduler.
245 As a consequence, they get to share disk time.
246 For some I/O schedulers,
247 .\" the anticipatory and CFQ scheduler
248 if two processes share an I/O context,
249 they will be allowed to interleave their disk access.
250 If several threads are doing I/O on behalf of the same process
251 .RB ( aio_read (3),
252 for instance), they should employ
253 .BR CLONE_IO
254 to get better I/O performance.
255 .\" with CFQ and AS.
256 .IP
257 If the kernel is not configured with the
258 .B CONFIG_BLOCK
259 option, this flag is a no-op.
260 .TP
261 .BR CLONE_NEWCGROUP " (since Linux 4.6)"
262 Create the process in a new cgroup namespace.
263 If this flag is not set, then (as with
264 .BR fork (2))
265 the process is created in the same cgroup namespaces as the calling process.
266 This flag is intended for the implementation of containers.
267 .IP
268 For further information on cgroup namespaces, see
269 .BR cgroup_namespaces (7).
270 .IP
271 Only a privileged process
272 .RB ( CAP_SYS_ADMIN )
273 can employ
274 .BR CLONE_NEWCGROUP .
275 .\"
276 .TP
277 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
278 If
279 .B CLONE_NEWIPC
280 is set, then create the process in a new IPC namespace.
281 If this flag is not set, then (as with
282 .BR fork (2)),
283 the process is created in the same IPC namespace as
284 the calling process.
285 This flag is intended for the implementation of containers.
286 .IP
287 An IPC namespace provides an isolated view of System\ V IPC objects (see
288 .BR svipc (7))
289 and (since Linux 2.6.30)
290 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
291 .\" https://lwn.net/Articles/312232/
292 POSIX message queues
293 (see
294 .BR mq_overview (7)).
295 The common characteristic of these IPC mechanisms is that IPC
296 objects are identified by mechanisms other than filesystem
297 pathnames.
298 .IP
299 Objects created in an IPC namespace are visible to all other processes
300 that are members of that namespace,
301 but are not visible to processes in other IPC namespaces.
302 .IP
303 When an IPC namespace is destroyed
304 (i.e., when the last process that is a member of the namespace terminates),
305 all IPC objects in the namespace are automatically destroyed.
306 .IP
307 Only a privileged process
308 .RB ( CAP_SYS_ADMIN )
309 can employ
310 .BR CLONE_NEWIPC .
311 This flag can't be specified in conjunction with
312 .BR CLONE_SYSVSEM .
313 .IP
314 For further information on IPC namespaces, see
315 .BR namespaces (7).
316 .TP
317 .BR CLONE_NEWNET " (since Linux 2.6.24)"
318 (The implementation of this flag was completed only
319 by about kernel version 2.6.29.)
320 .IP
321 If
322 .B CLONE_NEWNET
323 is set, then create the process in a new network namespace.
324 If this flag is not set, then (as with
325 .BR fork (2))
326 the process is created in the same network namespace as
327 the calling process.
328 This flag is intended for the implementation of containers.
329 .IP
330 A network namespace provides an isolated view of the networking stack
331 (network device interfaces, IPv4 and IPv6 protocol stacks,
332 IP routing tables, firewall rules, the
333 .I /proc/net
334 and
335 .I /sys/class/net
336 directory trees, sockets, etc.).
337 A physical network device can live in exactly one
338 network namespace.
339 A virtual network device ("veth") pair provides a pipe-like abstraction
340 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
341 that can be used to create tunnels between network namespaces,
342 and can be used to create a bridge to a physical network device
343 in another namespace.
344 .IP
345 When a network namespace is freed
346 (i.e., when the last process in the namespace terminates),
347 its physical network devices are moved back to the
348 initial network namespace (not to the parent of the process).
349 For further information on network namespaces, see
350 .BR namespaces (7).
351 .IP
352 Only a privileged process
353 .RB ( CAP_SYS_ADMIN )
354 can employ
355 .BR CLONE_NEWNET .
356 .TP
357 .BR CLONE_NEWNS " (since Linux 2.4.19)"
358 If
359 .B CLONE_NEWNS
360 is set, the cloned child is started in a new mount namespace,
361 initialized with a copy of the namespace of the parent.
362 If
363 .B CLONE_NEWNS
364 is not set, the child lives in the same mount
365 namespace as the parent.
366 .IP
367 Only a privileged process
368 .RB ( CAP_SYS_ADMIN )
369 can employ
370 .BR CLONE_NEWNS .
371 It is not permitted to specify both
372 .B CLONE_NEWNS
373 and
374 .B CLONE_FS
375 .\" See https://lwn.net/Articles/543273/
376 in the same
377 .BR clone ()
378 call.
379 .IP
380 For further information on mount namespaces, see
381 .BR namespaces (7)
382 and
383 .BR mount_namespaces (7).
384 .TP
385 .BR CLONE_NEWPID " (since Linux 2.6.24)"
386 .\" This explanation draws a lot of details from
387 .\" http://lwn.net/Articles/259217/
388 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
389 .\" and Kir Kolyshkin <kir@openvz.org>
390 .\"
391 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
392 .\" Author: Pavel Emelyanov <xemul@openvz.org>
393 If
394 .B CLONE_NEWPID
395 is set, then create the process in a new PID namespace.
396 If this flag is not set, then (as with
397 .BR fork (2))
398 the process is created in the same PID namespace as
399 the calling process.
400 This flag is intended for the implementation of containers.
401 .IP
402 For further information on PID namespaces, see
403 .BR namespaces (7)
404 and
405 .BR pid_namespaces (7).
406 .IP
407 Only a privileged process
408 .RB ( CAP_SYS_ADMIN )
409 can employ
410 .BR CLONE_NEWPID .
411 This flag can't be specified in conjunction with
412 .BR CLONE_THREAD
413 or
414 .BR CLONE_PARENT .
415 .TP
416 .BR CLONE_NEWUSER
417 (This flag first became meaningful for
418 .BR clone ()
419 in Linux 2.6.23,
420 the current
421 .BR clone ()
422 semantics were merged in Linux 3.5,
423 and the final pieces to make the user namespaces completely usable were
424 merged in Linux 3.8.)
425 .IP
426 If
427 .B CLONE_NEWUSER
428 is set, then create the process in a new user namespace.
429 If this flag is not set, then (as with
430 .BR fork (2))
431 the process is created in the same user namespace as the calling process.
432 .IP
433 For further information on user namespaces, see
434 .BR namespaces (7)
435 and
436 .BR user_namespaces (7)
437 .IP
438 Before Linux 3.8, use of
439 .BR CLONE_NEWUSER
440 required that the caller have three capabilities:
441 .BR CAP_SYS_ADMIN ,
442 .BR CAP_SETUID ,
443 and
444 .BR CAP_SETGID .
445 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
446 Starting with Linux 3.8,
447 no privileges are needed to create a user namespace.
448 .IP
449 This flag can't be specified in conjunction with
450 .BR CLONE_THREAD
451 or
452 .BR CLONE_PARENT .
453 For security reasons,
454 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
455 .\" https://lwn.net/Articles/543273/
456 .\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
457 .\" were, for practical purposes, unusable in earlier 3.8.x because of the
458 .\" various filesystems that didn't support userns.
459 .BR CLONE_NEWUSER
460 cannot be specified in conjunction with
461 .BR CLONE_FS .
462 .IP
463 For further information on user namespaces, see
464 .BR user_namespaces (7).
465 .TP
466 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
467 If
468 .B CLONE_NEWUTS
469 is set, then create the process in a new UTS namespace,
470 whose identifiers are initialized by duplicating the identifiers
471 from the UTS namespace of the calling process.
472 If this flag is not set, then (as with
473 .BR fork (2))
474 the process is created in the same UTS namespace as
475 the calling process.
476 This flag is intended for the implementation of containers.
477 .IP
478 A UTS namespace is the set of identifiers returned by
479 .BR uname (2);
480 among these, the domain name and the hostname can be modified by
481 .BR setdomainname (2)
482 and
483 .BR sethostname (2),
484 respectively.
485 Changes made to the identifiers in a UTS namespace
486 are visible to all other processes in the same namespace,
487 but are not visible to processes in other UTS namespaces.
488 .IP
489 Only a privileged process
490 .RB ( CAP_SYS_ADMIN )
491 can employ
492 .BR CLONE_NEWUTS .
493 .IP
494 For further information on UTS namespaces, see
495 .BR namespaces (7).
496 .TP
497 .BR CLONE_PARENT " (since Linux 2.3.12)"
498 If
499 .B CLONE_PARENT
500 is set, then the parent of the new child (as returned by
501 .BR getppid (2))
502 will be the same as that of the calling process.
503 .IP
504 If
505 .B CLONE_PARENT
506 is not set, then (as with
507 .BR fork (2))
508 the child's parent is the calling process.
509 .IP
510 Note that it is the parent process, as returned by
511 .BR getppid (2),
512 which is signaled when the child terminates, so that
513 if
514 .B CLONE_PARENT
515 is set, then the parent of the calling process, rather than the
516 calling process itself, will be signaled.
517 .TP
518 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
519 Store the child thread ID at the location
520 .I ptid
521 in the parent's memory.
522 (In Linux 2.5.32-2.5.48 there was a flag
523 .B CLONE_SETTID
524 that did this.)
525 The store operation completes before
526 .BR clone ()
527 returns control to user space.
528 .TP
529 .BR CLONE_PID " (obsolete)"
530 If
531 .B CLONE_PID
532 is set, the child process is created with the same process ID as
533 the calling process.
534 This is good for hacking the system, but otherwise
535 of not much use.
536 Since 2.3.21 this flag can be
537 specified only by the system boot process (PID 0).
538 It disappeared in Linux 2.5.16.
539 Since then, the kernel silently ignores it without error.
540 .TP
541 .BR CLONE_PTRACE " (since Linux 2.2)"
542 If
543 .B CLONE_PTRACE
544 is specified, and the calling process is being traced,
545 then trace the child also (see
546 .BR ptrace (2)).
547 .TP
548 .BR CLONE_SETTLS " (since Linux 2.5.32)"
549 The TLS (Thread Local Storage) descriptor is set to
550 .I newtls.
551 .IP
552 The interpretation of
553 .I newtls
554 and the resulting effect is architecture dependent.
555 On x86,
556 .I newtls
557 is interpreted as a
558 .IR "struct user_desc *"
559 (see
560 .BR set_thread_area (2)).
561 On x86_64 it is the new value to be set for the %fs base register
562 (see the
563 .I ARCH_SET_FS
564 argument to
565 .BR arch_prctl (2)).
566 On architectures with a dedicated TLS register, it is the new value
567 of that register.
568 .TP
569 .BR CLONE_SIGHAND " (since Linux 2.0)"
570 If
571 .B CLONE_SIGHAND
572 is set, the calling process and the child process share the same table of
573 signal handlers.
574 If the calling process or child process calls
575 .BR sigaction (2)
576 to change the behavior associated with a signal, the behavior is
577 changed in the other process as well.
578 However, the calling process and child
579 processes still have distinct signal masks and sets of pending
580 signals.
581 So, one of them may block or unblock some signals using
582 .BR sigprocmask (2)
583 without affecting the other process.
584 .IP
585 If
586 .B CLONE_SIGHAND
587 is not set, the child process inherits a copy of the signal handlers
588 of the calling process at the time
589 .BR clone ()
590 is called.
591 Calls to
592 .BR sigaction (2)
593 performed later by one of the processes have no effect on the other
594 process.
595 .IP
596 Since Linux 2.6.0-test6,
597 .I flags
598 must also include
599 .B CLONE_VM
600 if
601 .B CLONE_SIGHAND
602 is specified
603 .TP
604 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
605 If
606 .B CLONE_STOPPED
607 is set, then the child is initially stopped (as though it was sent a
608 .B SIGSTOP
609 signal), and must be resumed by sending it a
610 .B SIGCONT
611 signal.
612 .IP
613 This flag was
614 .I deprecated
615 from Linux 2.6.25 onward,
616 and was
617 .I removed
618 altogether in Linux 2.6.38.
619 Since then, the kernel silently ignores it without error.
620 .\" glibc 2.8 removed this defn from bits/sched.h
621 Starting with Linux 4.6, the same bit was reused for the
622 .BR CLONE_NEWCGROUP
623 flag.
624 .TP
625 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
626 If
627 .B CLONE_SYSVSEM
628 is set, then the child and the calling process share
629 a single list of System V semaphore adjustment
630 .RI ( semadj )
631 values (see
632 .BR semop (2)).
633 In this case, the shared list accumulates
634 .I semadj
635 values across all processes sharing the list,
636 and semaphore adjustments are performed only when the last process
637 that is sharing the list terminates (or ceases sharing the list using
638 .BR unshare (2)).
639 If this flag is not set, then the child has a separate
640 .I semadj
641 list that is initially empty.
642 .TP
643 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
644 If
645 .B CLONE_THREAD
646 is set, the child is placed in the same thread group as the calling process.
647 To make the remainder of the discussion of
648 .B CLONE_THREAD
649 more readable, the term "thread" is used to refer to the
650 processes within a thread group.
651 .IP
652 Thread groups were a feature added in Linux 2.4 to support the
653 POSIX threads notion of a set of threads that share a single PID.
654 Internally, this shared PID is the so-called
655 thread group identifier (TGID) for the thread group.
656 Since Linux 2.4, calls to
657 .BR getpid (2)
658 return the TGID of the caller.
659 .IP
660 The threads within a group can be distinguished by their (system-wide)
661 unique thread IDs (TID).
662 A new thread's TID is available as the function result
663 returned to the caller of
664 .BR clone (),
665 and a thread can obtain
666 its own TID using
667 .BR gettid (2).
668 .IP
669 When a call is made to
670 .BR clone ()
671 without specifying
672 .BR CLONE_THREAD ,
673 then the resulting thread is placed in a new thread group
674 whose TGID is the same as the thread's TID.
675 This thread is the
676 .I leader
677 of the new thread group.
678 .IP
679 A new thread created with
680 .B CLONE_THREAD
681 has the same parent process as the caller of
682 .BR clone ()
683 (i.e., like
684 .BR CLONE_PARENT ),
685 so that calls to
686 .BR getppid (2)
687 return the same value for all of the threads in a thread group.
688 When a
689 .B CLONE_THREAD
690 thread terminates, the thread that created it using
691 .BR clone ()
692 is not sent a
693 .B SIGCHLD
694 (or other termination) signal;
695 nor can the status of such a thread be obtained
696 using
697 .BR wait (2).
698 (The thread is said to be
699 .IR detached .)
700 .IP
701 After all of the threads in a thread group terminate
702 the parent process of the thread group is sent a
703 .B SIGCHLD
704 (or other termination) signal.
705 .IP
706 If any of the threads in a thread group performs an
707 .BR execve (2),
708 then all threads other than the thread group leader are terminated,
709 and the new program is executed in the thread group leader.
710 .IP
711 If one of the threads in a thread group creates a child using
712 .BR fork (2),
713 then any thread in the group can
714 .BR wait (2)
715 for that child.
716 .IP
717 Since Linux 2.5.35,
718 .I flags
719 must also include
720 .B CLONE_SIGHAND
721 if
722 .B CLONE_THREAD
723 is specified
724 (and note that, since Linux 2.6.0-test6,
725 .BR CLONE_SIGHAND
726 also requires
727 .BR CLONE_VM
728 to be included).
729 .IP
730 Signals may be sent to a thread group as a whole (i.e., a TGID) using
731 .BR kill (2),
732 or to a specific thread (i.e., TID) using
733 .BR tgkill (2).
734 .IP
735 Signal dispositions and actions are process-wide:
736 if an unhandled signal is delivered to a thread, then
737 it will affect (terminate, stop, continue, be ignored in)
738 all members of the thread group.
739 .IP
740 Each thread has its own signal mask, as set by
741 .BR sigprocmask (2),
742 but signals can be pending either: for the whole process
743 (i.e., deliverable to any member of the thread group),
744 when sent with
745 .BR kill (2);
746 or for an individual thread, when sent with
747 .BR tgkill (2).
748 A call to
749 .BR sigpending (2)
750 returns a signal set that is the union of the signals pending for the
751 whole process and the signals that are pending for the calling thread.
752 .IP
753 If
754 .BR kill (2)
755 is used to send a signal to a thread group,
756 and the thread group has installed a handler for the signal, then
757 the handler will be invoked in exactly one, arbitrarily selected
758 member of the thread group that has not blocked the signal.
759 If multiple threads in a group are waiting to accept the same signal using
760 .BR sigwaitinfo (2),
761 the kernel will arbitrarily select one of these threads
762 to receive a signal sent using
763 .BR kill (2).
764 .TP
765 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
766 If
767 .B CLONE_UNTRACED
768 is specified, then a tracing process cannot force
769 .B CLONE_PTRACE
770 on this child process.
771 .TP
772 .BR CLONE_VFORK " (since Linux 2.2)"
773 If
774 .B CLONE_VFORK
775 is set, the execution of the calling process is suspended
776 until the child releases its virtual memory
777 resources via a call to
778 .BR execve (2)
779 or
780 .BR _exit (2)
781 (as with
782 .BR vfork (2)).
783 .IP
784 If
785 .B CLONE_VFORK
786 is not set, then both the calling process and the child are schedulable
787 after the call, and an application should not rely on execution occurring
788 in any particular order.
789 .TP
790 .BR CLONE_VM " (since Linux 2.0)"
791 If
792 .B CLONE_VM
793 is set, the calling process and the child process run in the same memory
794 space.
795 In particular, memory writes performed by the calling process
796 or by the child process are also visible in the other process.
797 Moreover, any memory mapping or unmapping performed with
798 .BR mmap (2)
799 or
800 .BR munmap (2)
801 by the child or calling process also affects the other process.
802 .IP
803 If
804 .B CLONE_VM
805 is not set, the child process runs in a separate copy of the memory
806 space of the calling process at the time of
807 .BR clone ().
808 Memory writes or file mappings/unmappings performed by one of the
809 processes do not affect the other, as with
810 .BR fork (2).
811 .SS C library/kernel differences
812 The raw
813 .BR clone ()
814 system call corresponds more closely to
815 .BR fork (2)
816 in that execution in the child continues from the point of the
817 call.
818 As such, the
819 .I fn
820 and
821 .I arg
822 arguments of the
823 .BR clone ()
824 wrapper function are omitted.
825 Furthermore, the argument order changes.
826 In addition, there are variations across architectures.
827 .PP
828 The raw system call interface on x86-64 and some other architectures
829 (including sh, tile, and alpha) is roughly:
830 .PP
831 .in +4
832 .EX
833 .BI "long clone(unsigned long " flags ", void *" child_stack ,
834 .BI " int *" ptid ", int *" ctid ,
835 .BI " unsigned long " newtls );
836 .EE
837 .in
838 .PP
839 On x86-32, and several other common architectures
840 (including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
841 and MIPS),
842 .\" CONFIG_CLONE_BACKWARDS
843 the order of the last two arguments is reversed:
844 .PP
845 .in +4
846 .EX
847 .BI "long clone(unsigned long " flags ", void *" child_stack ,
848 .BI " int *" ptid ", unsigned long " newtls ,
849 .BI " int *" ctid );
850 .EE
851 .in
852 .PP
853 On the cris and s390 architectures,
854 .\" CONFIG_CLONE_BACKWARDS2
855 the order of the first two arguments is reversed:
856 .PP
857 .in +4
858 .EX
859 .BI "long clone(void *" child_stack ", unsigned long " flags ,
860 .BI " int *" ptid ", int *" ctid ,
861 .BI " unsigned long " newtls );
862 .EE
863 .in
864 .PP
865 On the microblaze architecture,
866 .\" CONFIG_CLONE_BACKWARDS3
867 an additional argument is supplied:
868 .PP
869 .in +4
870 .EX
871 .BI "long clone(unsigned long " flags ", void *" child_stack ,
872 .BI " int " stack_size , "\fR /* Size of stack */"
873 .BI " int *" ptid ", int *" ctid ,
874 .BI " unsigned long " newtls );
875 .EE
876 .in
877 .PP
878 Another difference for the raw system call is that the
879 .I child_stack
880 argument may be zero, in which case copy-on-write semantics ensure that the
881 child gets separate copies of stack pages when either process modifies
882 the stack.
883 In this case, for correct operation, the
884 .B CLONE_VM
885 option should not be specified.
886 .\"
887 .SS blackfin, m68k, and sparc
888 .\" Mike Frysinger noted in a 2013 mail:
889 .\" these arches don't define __ARCH_WANT_SYS_CLONE:
890 .\" blackfin ia64 m68k sparc
891 The argument-passing conventions on
892 blackfin, m68k, and sparc are different from the descriptions above.
893 For details, see the kernel (and glibc) source.
894 .SS ia64
895 On ia64, a different interface is used:
896 .PP
897 .nf
898 .BI "int __clone2(int (*" "fn" ")(void *), "
899 .BI " void *" child_stack_base ", size_t " stack_size ,
900 .BI " int " flags ", void *" "arg" ", ... "
901 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
902 ", pid_t *" ctid " */ );"
903 .fi
904 .PP
905 The prototype shown above is for the glibc wrapper function;
906 the raw system call interface has no
907 .I fn
908 or
909 .I arg
910 argument, and changes the order of the arguments so that
911 .I flags
912 is the first argument, and
913 .I tls
914 is the last argument.
915 .PP
916 .BR __clone2 ()
917 operates in the same way as
918 .BR clone (),
919 except that
920 .I child_stack_base
921 points to the lowest address of the child's stack area,
922 and
923 .I stack_size
924 specifies the size of the stack pointed to by
925 .IR child_stack_base .
926 .SS Linux 2.4 and earlier
927 In Linux 2.4 and earlier,
928 .BR clone ()
929 does not take arguments
930 .IR ptid ,
931 .IR tls ,
932 and
933 .IR ctid .
934 .SH RETURN VALUE
935 .\" gettid(2) returns current->pid;
936 .\" getpid(2) returns current->tgid;
937 On success, the thread ID of the child process is returned
938 in the caller's thread of execution.
939 On failure, \-1 is returned
940 in the caller's context, no child process will be created, and
941 .I errno
942 will be set appropriately.
943 .SH ERRORS
944 .TP
945 .B EAGAIN
946 Too many processes are already running; see
947 .BR fork (2).
948 .TP
949 .B EINVAL
950 .B CLONE_SIGHAND
951 was specified, but
952 .B CLONE_VM
953 was not.
954 (Since Linux 2.6.0-test6.)
955 .TP
956 .B EINVAL
957 .B CLONE_THREAD
958 was specified, but
959 .B CLONE_SIGHAND
960 was not.
961 (Since Linux 2.5.35.)
962 .\" .TP
963 .\" .B EINVAL
964 .\" Precisely one of
965 .\" .B CLONE_DETACHED
966 .\" and
967 .\" .B CLONE_THREAD
968 .\" was specified.
969 .\" (Since Linux 2.6.0-test6.)
970 .TP
971 .B EINVAL
972 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
973 Both
974 .B CLONE_FS
975 and
976 .B CLONE_NEWNS
977 were specified in
978 .IR flags .
979 .TP
980 .BR EINVAL " (since Linux 3.9)"
981 Both
982 .B CLONE_NEWUSER
983 and
984 .B CLONE_FS
985 were specified in
986 .IR flags .
987 .TP
988 .B EINVAL
989 Both
990 .B CLONE_NEWIPC
991 and
992 .B CLONE_SYSVSEM
993 were specified in
994 .IR flags .
995 .TP
996 .B EINVAL
997 One (or both) of
998 .BR CLONE_NEWPID
999 or
1000 .BR CLONE_NEWUSER
1001 and one (or both) of
1002 .BR CLONE_THREAD
1003 or
1004 .BR CLONE_PARENT
1005 were specified in
1006 .IR flags .
1007 .TP
1008 .B EINVAL
1009 Returned by the glibc
1010 .BR clone ()
1011 wrapper function when
1012 .IR fn
1013 or
1014 .IR child_stack
1015 is specified as NULL.
1016 .TP
1017 .B EINVAL
1018 .BR CLONE_NEWIPC
1019 was specified in
1020 .IR flags ,
1021 but the kernel was not configured with the
1022 .B CONFIG_SYSVIPC
1023 and
1024 .BR CONFIG_IPC_NS
1025 options.
1026 .TP
1027 .B EINVAL
1028 .BR CLONE_NEWNET
1029 was specified in
1030 .IR flags ,
1031 but the kernel was not configured with the
1032 .B CONFIG_NET_NS
1033 option.
1034 .TP
1035 .B EINVAL
1036 .BR CLONE_NEWPID
1037 was specified in
1038 .IR flags ,
1039 but the kernel was not configured with the
1040 .B CONFIG_PID_NS
1041 option.
1042 .TP
1043 .B EINVAL
1044 .BR CLONE_NEWUTS
1045 was specified in
1046 .IR flags ,
1047 but the kernel was not configured with the
1048 .B CONFIG_UTS
1049 option.
1050 .TP
1051 .B EINVAL
1052 .I child_stack
1053 is not aligned to a suitable boundary for this architecture.
1054 For example, on aarch64,
1055 .I child_stack
1056 must be a multiple of 16.
1057 .TP
1058 .B ENOMEM
1059 Cannot allocate sufficient memory to allocate a task structure for the
1060 child, or to copy those parts of the caller's context that need to be
1061 copied.
1062 .TP
1063 .BR ENOSPC " (since Linux 3.7)"
1064 .\" commit f2302505775fd13ba93f034206f1e2a587017929
1065 .B CLONE_NEWPID
1066 was specified in flags,
1067 but the limit on the nesting depth of PID namespaces
1068 would have been exceeded; see
1069 .BR pid_namespaces (7).
1070 .TP
1071 .BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1072 .B CLONE_NEWUSER
1073 was specified in
1074 .IR flags ,
1075 and the call would cause the limit on the number of
1076 nested user namespaces to be exceeded.
1077 See
1078 .BR user_namespaces (7).
1079 .IP
1080 From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1081 .BR EUSERS .
1082 .TP
1083 .BR ENOSPC " (since Linux 4.9)"
1084 One of the values in
1085 .I flags
1086 specified the creation of a new user namespace,
1087 but doing so would have caused the limit defined by the corresponding file in
1088 .IR /proc/sys/user
1089 to be exceeded.
1090 For further details, see
1091 .BR namespaces (7).
1092 .TP
1093 .B EPERM
1094 .BR CLONE_NEWCGROUP ,
1095 .BR CLONE_NEWIPC ,
1096 .BR CLONE_NEWNET ,
1097 .BR CLONE_NEWNS ,
1098 .BR CLONE_NEWPID ,
1099 or
1100 .BR CLONE_NEWUTS
1101 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1102 .TP
1103 .B EPERM
1104 .B CLONE_PID
1105 was specified by a process other than process 0.
1106 .TP
1107 .B EPERM
1108 .BR CLONE_NEWUSER
1109 was specified in
1110 .IR flags ,
1111 but either the effective user ID or the effective group ID of the caller
1112 does not have a mapping in the parent namespace (see
1113 .BR user_namespaces (7)).
1114 .TP
1115 .BR EPERM " (since Linux 3.9)"
1116 .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1117 .B CLONE_NEWUSER
1118 was specified in
1119 .I flags
1120 and the caller is in a chroot environment
1121 .\" FIXME What is the rationale for this restriction?
1122 (i.e., the caller's root directory does not match the root directory
1123 of the mount namespace in which it resides).
1124 .TP
1125 .BR ERESTARTNOINTR " (since Linux 2.6.17)"
1126 .\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1127 System call was interrupted by a signal and will be restarted.
1128 (This can be seen only during a trace.)
1129 .TP
1130 .BR EUSERS " (Linux 3.11 to Linux 4.8)"
1131 .B CLONE_NEWUSER
1132 was specified in
1133 .IR flags ,
1134 and the limit on the number of nested user namespaces would be exceeded.
1135 See the discussion of the
1136 .BR ENOSPC
1137 error above.
1138 .\" .SH VERSIONS
1139 .\" There is no entry for
1140 .\" .BR clone ()
1141 .\" in libc5.
1142 .\" glibc2 provides
1143 .\" .BR clone ()
1144 .\" as described in this manual page.
1145 .SH CONFORMING TO
1146 .BR clone ()
1147 is Linux-specific and should not be used in programs
1148 intended to be portable.
1149 .SH NOTES
1150 The
1151 .BR kcmp (2)
1152 system call can be used to test whether two processes share various
1153 resources such as a file descriptor table,
1154 System V semaphore undo operations, or a virtual address space.
1155 .PP
1156 .PP
1157 Handlers registered using
1158 .BR pthread_atfork (3)
1159 are not executed during a call to
1160 .BR clone ().
1161 .PP
1162 In the Linux 2.4.x series,
1163 .B CLONE_THREAD
1164 generally does not make the parent of the new thread the same
1165 as the parent of the calling process.
1166 However, for kernel versions 2.4.7 to 2.4.18 the
1167 .B CLONE_THREAD
1168 flag implied the
1169 .B CLONE_PARENT
1170 flag (as in Linux 2.6.0 and later).
1171 .PP
1172 For a while there was
1173 .B CLONE_DETACHED
1174 (introduced in 2.5.32):
1175 parent wants no child-exit signal.
1176 In Linux 2.6.2, the need to give this flag together with
1177 .B CLONE_THREAD
1178 disappeared.
1179 This flag is still defined, but has no effect.
1180 .PP
1181 On i386,
1182 .BR clone ()
1183 should not be called through vsyscall, but directly through
1184 .IR "int $0x80" .
1185 .SH BUGS
1186 GNU C library versions 2.3.4 up to and including 2.24
1187 contained a wrapper function for
1188 .BR getpid (2)
1189 that performed caching of PIDs.
1190 This caching relied on support in the glibc wrapper for
1191 .BR clone (),
1192 but limitations in the implementation
1193 meant that the cache was not up to date in some circumstances.
1194 In particular,
1195 if a signal was delivered to the child immediately after the
1196 .BR clone ()
1197 call, then a call to
1198 .BR getpid (2)
1199 in a handler for the signal could return the PID
1200 of the calling process ("the parent"),
1201 if the clone wrapper had not yet had a chance to update the PID
1202 cache in the child.
1203 (This discussion ignores the case where the child was created using
1204 .BR CLONE_THREAD ,
1205 when
1206 .BR getpid (2)
1207 .I should
1208 return the same value in the child and in the process that called
1209 .BR clone (),
1210 since the caller and the child are in the same thread group.
1211 The stale-cache problem also does not occur if the
1212 .I flags
1213 argument includes
1214 .BR CLONE_VM .)
1215 To get the truth, it was sometimes necessary to use code such as the following:
1216 .PP
1217 .in +4n
1218 .EX
1219 #include <syscall.h>
1220
1221 pid_t mypid;
1222
1223 mypid = syscall(SYS_getpid);
1224 .EE
1225 .in
1226 .\" See also the following bug reports
1227 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1228 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1229 .PP
1230 Because of the stale-cache problem, as well as other problems noted in
1231 .BR getpid (2),
1232 the PID caching feature was removed in glibc 2.25.
1233 .SH EXAMPLE
1234 The following program demonstrates the use of
1235 .BR clone ()
1236 to create a child process that executes in a separate UTS namespace.
1237 The child changes the hostname in its UTS namespace.
1238 Both parent and child then display the system hostname,
1239 making it possible to see that the hostname
1240 differs in the UTS namespaces of the parent and child.
1241 For an example of the use of this program, see
1242 .BR setns (2).
1243 .SS Program source
1244 .EX
1245 #define _GNU_SOURCE
1246 #include <sys/wait.h>
1247 #include <sys/utsname.h>
1248 #include <sched.h>
1249 #include <string.h>
1250 #include <stdio.h>
1251 #include <stdlib.h>
1252 #include <unistd.h>
1253
1254 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1255 } while (0)
1256
1257 static int /* Start function for cloned child */
1258 childFunc(void *arg)
1259 {
1260 struct utsname uts;
1261
1262 /* Change hostname in UTS namespace of child */
1263
1264 if (sethostname(arg, strlen(arg)) == \-1)
1265 errExit("sethostname");
1266
1267 /* Retrieve and display hostname */
1268
1269 if (uname(&uts) == \-1)
1270 errExit("uname");
1271 printf("uts.nodename in child: %s\\n", uts.nodename);
1272
1273 /* Keep the namespace open for a while, by sleeping.
1274 This allows some experimentation\-\-for example, another
1275 process might join the namespace. */
1276
1277 sleep(200);
1278
1279 return 0; /* Child terminates now */
1280 }
1281
1282 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1283
1284 int
1285 main(int argc, char *argv[])
1286 {
1287 char *stack; /* Start of stack buffer */
1288 char *stackTop; /* End of stack buffer */
1289 pid_t pid;
1290 struct utsname uts;
1291
1292 if (argc < 2) {
1293 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1294 exit(EXIT_SUCCESS);
1295 }
1296
1297 /* Allocate stack for child */
1298
1299 stack = malloc(STACK_SIZE);
1300 if (stack == NULL)
1301 errExit("malloc");
1302 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1303
1304 /* Create child that has its own UTS namespace;
1305 child commences execution in childFunc() */
1306
1307 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1308 if (pid == \-1)
1309 errExit("clone");
1310 printf("clone() returned %ld\\n", (long) pid);
1311
1312 /* Parent falls through to here */
1313
1314 sleep(1); /* Give child time to change its hostname */
1315
1316 /* Display hostname in parent\(aqs UTS namespace. This will be
1317 different from hostname in child\(aqs UTS namespace. */
1318
1319 if (uname(&uts) == \-1)
1320 errExit("uname");
1321 printf("uts.nodename in parent: %s\\n", uts.nodename);
1322
1323 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1324 errExit("waitpid");
1325 printf("child has terminated\\n");
1326
1327 exit(EXIT_SUCCESS);
1328 }
1329 .EE
1330 .SH SEE ALSO
1331 .BR fork (2),
1332 .BR futex (2),
1333 .BR getpid (2),
1334 .BR gettid (2),
1335 .BR kcmp (2),
1336 .BR set_thread_area (2),
1337 .BR set_tid_address (2),
1338 .BR setns (2),
1339 .BR tkill (2),
1340 .BR unshare (2),
1341 .BR wait (2),
1342 .BR capabilities (7),
1343 .BR namespaces (7),
1344 .BR pthreads (7)