]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: Minor wording fix (in preparation for subsequent patch)
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2017-09-15 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48 .PP
49 .B #define _GNU_SOURCE
50 .B #include <sched.h>
51 .PP
52 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53 .BI " int " flags ", void *" "arg" ", ... "
54 .BI " /* pid_t *" ptid ", void *" newtls \
55 ", pid_t *" ctid " */ );"
56 .PP
57 /* For the prototype of the raw system call, see NOTES */
58 .fi
59 .SH DESCRIPTION
60 .BR clone ()
61 creates a new process, in a manner similar to
62 .BR fork (2).
63 .PP
64 This page describes both the glibc
65 .BR clone ()
66 wrapper function and the underlying system call on which it is based.
67 The main text describes the wrapper function;
68 the differences for the raw system call
69 are described toward the end of this page.
70 .PP
71 Unlike
72 .BR fork (2),
73 .BR clone ()
74 allows the child process to share parts of its execution context with
75 the calling process, such as the memory space, the table of file
76 descriptors, and the table of signal handlers.
77 (Note that on this manual
78 page, "calling process" normally corresponds to "parent process".
79 But see the description of
80 .B CLONE_PARENT
81 below.)
82 .PP
83 One use of
84 .BR clone ()
85 is to implement threads: multiple threads of control in a program that
86 run concurrently in a shared memory space.
87 .PP
88 When the child process is created with
89 .BR clone (),
90 it executes the function
91 .IR fn ( arg ).
92 (This differs from
93 .BR fork (2),
94 where execution continues in the child from the point
95 of the
96 .BR fork (2)
97 call.)
98 The
99 .I fn
100 argument is a pointer to a function that is called by the child
101 process at the beginning of its execution.
102 The
103 .I arg
104 argument is passed to the
105 .I fn
106 function.
107 .PP
108 When the
109 .IR fn ( arg )
110 function application returns, the child process terminates.
111 The integer returned by
112 .I fn
113 is the exit code for the child process.
114 The child process may also terminate explicitly by calling
115 .BR exit (2)
116 or after receiving a fatal signal.
117 .PP
118 The
119 .I child_stack
120 argument specifies the location of the stack used by the child process.
121 Since the child and calling process may share memory,
122 it is not possible for the child process to execute in the
123 same stack as the calling process.
124 The calling process must therefore
125 set up memory space for the child stack and pass a pointer to this
126 space to
127 .BR clone ().
128 Stacks grow downward on all processors that run Linux
129 (except the HP PA processors), so
130 .I child_stack
131 usually points to the topmost address of the memory space set up for
132 the child stack.
133 .PP
134 The low byte of
135 .I flags
136 contains the number of the
137 .I "termination signal"
138 sent to the parent when the child dies.
139 If this signal is specified as anything other than
140 .BR SIGCHLD ,
141 then the parent process must specify the
142 .B __WALL
143 or
144 .B __WCLONE
145 options when waiting for the child with
146 .BR wait (2).
147 If no signal is specified, then the parent process is not signaled
148 when the child terminates.
149 .PP
150 .I flags
151 may also be bitwise-or'ed with zero or more of the following constants,
152 in order to specify what is shared between the calling process
153 and the child process:
154 .TP
155 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
156 Clear (zero) the child thread ID at the location
157 .I ctid
158 in child memory when the child exits, and do a wakeup on the futex
159 at that address.
160 The address involved may be changed by the
161 .BR set_tid_address (2)
162 system call.
163 This is used by threading libraries.
164 .TP
165 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
166 Store the child thread ID at the location
167 .I ctid
168 in the child's memory.
169 The store operation completes before
170 .BR clone ()
171 returns control to user space.
172 .TP
173 .BR CLONE_FILES " (since Linux 2.0)"
174 If
175 .B CLONE_FILES
176 is set, the calling process and the child process share the same file
177 descriptor table.
178 Any file descriptor created by the calling process or by the child
179 process is also valid in the other process.
180 Similarly, if one of the processes closes a file descriptor,
181 or changes its associated flags (using the
182 .BR fcntl (2)
183 .B F_SETFD
184 operation), the other process is also affected.
185 If a process sharing a file descriptor table calls
186 .BR execve (2),
187 its file descriptor table is duplicated (unshared).
188 .IP
189 If
190 .B CLONE_FILES
191 is not set, the child process inherits a copy of all file descriptors
192 opened in the calling process at the time of
193 .BR clone ().
194 Subsequent operations that open or close file descriptors,
195 or change file descriptor flags,
196 performed by either the calling
197 process or the child process do not affect the other process.
198 Note, however,
199 that the duplicated file descriptors in the child refer to the same open file
200 descriptions as the corresponding file descriptors in the calling process,
201 and thus share file offsets and file status flags (see
202 .BR open (2)).
203 .TP
204 .BR CLONE_FS " (since Linux 2.0)"
205 If
206 .B CLONE_FS
207 is set, the caller and the child process share the same filesystem
208 information.
209 This includes the root of the filesystem, the current
210 working directory, and the umask.
211 Any call to
212 .BR chroot (2),
213 .BR chdir (2),
214 or
215 .BR umask (2)
216 performed by the calling process or the child process also affects the
217 other process.
218 .IP
219 If
220 .B CLONE_FS
221 is not set, the child process works on a copy of the filesystem
222 information of the calling process at the time of the
223 .BR clone ()
224 call.
225 Calls to
226 .BR chroot (2),
227 .BR chdir (2),
228 .BR umask (2)
229 performed later by one of the processes do not affect the other process.
230 .TP
231 .BR CLONE_IO " (since Linux 2.6.25)"
232 If
233 .B CLONE_IO
234 is set, then the new process shares an I/O context with
235 the calling process.
236 If this flag is not set, then (as with
237 .BR fork (2))
238 the new process has its own I/O context.
239 .IP
240 .\" The following based on text from Jens Axboe
241 The I/O context is the I/O scope of the disk scheduler (i.e.,
242 what the I/O scheduler uses to model scheduling of a process's I/O).
243 If processes share the same I/O context,
244 they are treated as one by the I/O scheduler.
245 As a consequence, they get to share disk time.
246 For some I/O schedulers,
247 .\" the anticipatory and CFQ scheduler
248 if two processes share an I/O context,
249 they will be allowed to interleave their disk access.
250 If several threads are doing I/O on behalf of the same process
251 .RB ( aio_read (3),
252 for instance), they should employ
253 .BR CLONE_IO
254 to get better I/O performance.
255 .\" with CFQ and AS.
256 .IP
257 If the kernel is not configured with the
258 .B CONFIG_BLOCK
259 option, this flag is a no-op.
260 .TP
261 .BR CLONE_NEWCGROUP " (since Linux 4.6)"
262 Create the process in a new cgroup namespace.
263 If this flag is not set, then (as with
264 .BR fork (2))
265 the process is created in the same cgroup namespaces as the calling process.
266 This flag is intended for the implementation of containers.
267 .IP
268 For further information on cgroup namespaces, see
269 .BR cgroup_namespaces (7).
270 .IP
271 Only a privileged process
272 .RB ( CAP_SYS_ADMIN )
273 can employ
274 .BR CLONE_NEWCGROUP .
275 .\"
276 .TP
277 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
278 If
279 .B CLONE_NEWIPC
280 is set, then create the process in a new IPC namespace.
281 If this flag is not set, then (as with
282 .BR fork (2)),
283 the process is created in the same IPC namespace as
284 the calling process.
285 This flag is intended for the implementation of containers.
286 .IP
287 An IPC namespace provides an isolated view of System\ V IPC objects (see
288 .BR svipc (7))
289 and (since Linux 2.6.30)
290 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
291 .\" https://lwn.net/Articles/312232/
292 POSIX message queues
293 (see
294 .BR mq_overview (7)).
295 The common characteristic of these IPC mechanisms is that IPC
296 objects are identified by mechanisms other than filesystem
297 pathnames.
298 .IP
299 Objects created in an IPC namespace are visible to all other processes
300 that are members of that namespace,
301 but are not visible to processes in other IPC namespaces.
302 .IP
303 When an IPC namespace is destroyed
304 (i.e., when the last process that is a member of the namespace terminates),
305 all IPC objects in the namespace are automatically destroyed.
306 .IP
307 Only a privileged process
308 .RB ( CAP_SYS_ADMIN )
309 can employ
310 .BR CLONE_NEWIPC .
311 This flag can't be specified in conjunction with
312 .BR CLONE_SYSVSEM .
313 .IP
314 For further information on IPC namespaces, see
315 .BR namespaces (7).
316 .TP
317 .BR CLONE_NEWNET " (since Linux 2.6.24)"
318 (The implementation of this flag was completed only
319 by about kernel version 2.6.29.)
320 .IP
321 If
322 .B CLONE_NEWNET
323 is set, then create the process in a new network namespace.
324 If this flag is not set, then (as with
325 .BR fork (2))
326 the process is created in the same network namespace as
327 the calling process.
328 This flag is intended for the implementation of containers.
329 .IP
330 A network namespace provides an isolated view of the networking stack
331 (network device interfaces, IPv4 and IPv6 protocol stacks,
332 IP routing tables, firewall rules, the
333 .I /proc/net
334 and
335 .I /sys/class/net
336 directory trees, sockets, etc.).
337 A physical network device can live in exactly one
338 network namespace.
339 A virtual network device ("veth") pair provides a pipe-like abstraction
340 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
341 that can be used to create tunnels between network namespaces,
342 and can be used to create a bridge to a physical network device
343 in another namespace.
344 .IP
345 When a network namespace is freed
346 (i.e., when the last process in the namespace terminates),
347 its physical network devices are moved back to the
348 initial network namespace (not to the parent of the process).
349 For further information on network namespaces, see
350 .BR namespaces (7).
351 .IP
352 Only a privileged process
353 .RB ( CAP_SYS_ADMIN )
354 can employ
355 .BR CLONE_NEWNET .
356 .TP
357 .BR CLONE_NEWNS " (since Linux 2.4.19)"
358 If
359 .B CLONE_NEWNS
360 is set, the cloned child is started in a new mount namespace,
361 initialized with a copy of the namespace of the parent.
362 If
363 .B CLONE_NEWNS
364 is not set, the child lives in the same mount
365 namespace as the parent.
366 .IP
367 Only a privileged process
368 .RB ( CAP_SYS_ADMIN )
369 can employ
370 .BR CLONE_NEWNS .
371 It is not permitted to specify both
372 .B CLONE_NEWNS
373 and
374 .B CLONE_FS
375 .\" See https://lwn.net/Articles/543273/
376 in the same
377 .BR clone ()
378 call.
379 .IP
380 For further information on mount namespaces, see
381 .BR namespaces (7)
382 and
383 .BR mount_namespaces (7).
384 .TP
385 .BR CLONE_NEWPID " (since Linux 2.6.24)"
386 .\" This explanation draws a lot of details from
387 .\" http://lwn.net/Articles/259217/
388 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
389 .\" and Kir Kolyshkin <kir@openvz.org>
390 .\"
391 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
392 .\" Author: Pavel Emelyanov <xemul@openvz.org>
393 If
394 .B CLONE_NEWPID
395 is set, then create the process in a new PID namespace.
396 If this flag is not set, then (as with
397 .BR fork (2))
398 the process is created in the same PID namespace as
399 the calling process.
400 This flag is intended for the implementation of containers.
401 .IP
402 For further information on PID namespaces, see
403 .BR namespaces (7)
404 and
405 .BR pid_namespaces (7).
406 .IP
407 Only a privileged process
408 .RB ( CAP_SYS_ADMIN )
409 can employ
410 .BR CLONE_NEWPID .
411 This flag can't be specified in conjunction with
412 .BR CLONE_THREAD
413 or
414 .BR CLONE_PARENT .
415 .TP
416 .BR CLONE_NEWUSER
417 (This flag first became meaningful for
418 .BR clone ()
419 in Linux 2.6.23,
420 the current
421 .BR clone ()
422 semantics were merged in Linux 3.5,
423 and the final pieces to make the user namespaces completely usable were
424 merged in Linux 3.8.)
425 .IP
426 If
427 .B CLONE_NEWUSER
428 is set, then create the process in a new user namespace.
429 If this flag is not set, then (as with
430 .BR fork (2))
431 the process is created in the same user namespace as the calling process.
432 .IP
433 For further information on user namespaces, see
434 .BR namespaces (7)
435 and
436 .BR user_namespaces (7)
437 .IP
438 Before Linux 3.8, use of
439 .BR CLONE_NEWUSER
440 required that the caller have three capabilities:
441 .BR CAP_SYS_ADMIN ,
442 .BR CAP_SETUID ,
443 and
444 .BR CAP_SETGID .
445 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
446 Starting with Linux 3.8,
447 no privileges are needed to create a user namespace.
448 .IP
449 This flag can't be specified in conjunction with
450 .BR CLONE_THREAD
451 or
452 .BR CLONE_PARENT .
453 For security reasons,
454 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
455 .\" https://lwn.net/Articles/543273/
456 .\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
457 .\" were, for practical purposes, unusable in earlier 3.8.x because of the
458 .\" various filesystems that didn't support userns.
459 .BR CLONE_NEWUSER
460 cannot be specified in conjunction with
461 .BR CLONE_FS .
462 .IP
463 For further information on user namespaces, see
464 .BR user_namespaces (7).
465 .TP
466 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
467 If
468 .B CLONE_NEWUTS
469 is set, then create the process in a new UTS namespace,
470 whose identifiers are initialized by duplicating the identifiers
471 from the UTS namespace of the calling process.
472 If this flag is not set, then (as with
473 .BR fork (2))
474 the process is created in the same UTS namespace as
475 the calling process.
476 This flag is intended for the implementation of containers.
477 .IP
478 A UTS namespace is the set of identifiers returned by
479 .BR uname (2);
480 among these, the domain name and the hostname can be modified by
481 .BR setdomainname (2)
482 and
483 .BR sethostname (2),
484 respectively.
485 Changes made to the identifiers in a UTS namespace
486 are visible to all other processes in the same namespace,
487 but are not visible to processes in other UTS namespaces.
488 .IP
489 Only a privileged process
490 .RB ( CAP_SYS_ADMIN )
491 can employ
492 .BR CLONE_NEWUTS .
493 .IP
494 For further information on UTS namespaces, see
495 .BR namespaces (7).
496 .TP
497 .BR CLONE_PARENT " (since Linux 2.3.12)"
498 If
499 .B CLONE_PARENT
500 is set, then the parent of the new child (as returned by
501 .BR getppid (2))
502 will be the same as that of the calling process.
503 .IP
504 If
505 .B CLONE_PARENT
506 is not set, then (as with
507 .BR fork (2))
508 the child's parent is the calling process.
509 .IP
510 Note that it is the parent process, as returned by
511 .BR getppid (2),
512 which is signaled when the child terminates, so that
513 if
514 .B CLONE_PARENT
515 is set, then the parent of the calling process, rather than the
516 calling process itself, will be signaled.
517 .TP
518 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
519 Store the child thread ID at the location
520 .I ptid
521 in the parent's memory.
522 (In Linux 2.5.32-2.5.48 there was a flag
523 .B CLONE_SETTID
524 that did this.)
525 The store operation completes before
526 .BR clone ()
527 returns control to user space.
528 .TP
529 .BR CLONE_PID " (obsolete)"
530 If
531 .B CLONE_PID
532 is set, the child process is created with the same process ID as
533 the calling process.
534 This is good for hacking the system, but otherwise
535 of not much use.
536 Since 2.3.21 this flag can be
537 specified only by the system boot process (PID 0).
538 It disappeared in Linux 2.5.16.
539 Since then, the kernel silently ignores it without error.
540 .TP
541 .BR CLONE_PTRACE " (since Linux 2.2)"
542 If
543 .B CLONE_PTRACE
544 is specified, and the calling process is being traced,
545 then trace the child also (see
546 .BR ptrace (2)).
547 .TP
548 .BR CLONE_SETTLS " (since Linux 2.5.32)"
549 The TLS (Thread Local Storage) descriptor is set to
550 .I newtls.
551 .IP
552 The interpretation of
553 .I newtls
554 and the resulting effect is architecture dependent.
555 On x86,
556 .I newtls
557 is interpreted as a
558 .IR "struct user_desc *"
559 (see
560 .BR set_thread_area (2)).
561 On x86_64 it is the new value to be set for the %fs base register
562 (see the
563 .I ARCH_SET_FS
564 argument to
565 .BR arch_prctl (2)).
566 On architectures with a dedicated TLS register, it is the new value
567 of that register.
568 .TP
569 .BR CLONE_SIGHAND " (since Linux 2.0)"
570 If
571 .B CLONE_SIGHAND
572 is set, the calling process and the child process share the same table of
573 signal handlers.
574 If the calling process or child process calls
575 .BR sigaction (2)
576 to change the behavior associated with a signal, the behavior is
577 changed in the other process as well.
578 However, the calling process and child
579 processes still have distinct signal masks and sets of pending
580 signals.
581 So, one of them may block or unblock some signals using
582 .BR sigprocmask (2)
583 without affecting the other process.
584 .IP
585 If
586 .B CLONE_SIGHAND
587 is not set, the child process inherits a copy of the signal handlers
588 of the calling process at the time
589 .BR clone ()
590 is called.
591 Calls to
592 .BR sigaction (2)
593 performed later by one of the processes have no effect on the other
594 process.
595 .IP
596 Since Linux 2.6.0-test6,
597 .I flags
598 must also include
599 .B CLONE_VM
600 if
601 .B CLONE_SIGHAND
602 is specified
603 .TP
604 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
605 If
606 .B CLONE_STOPPED
607 is set, then the child is initially stopped (as though it was sent a
608 .B SIGSTOP
609 signal), and must be resumed by sending it a
610 .B SIGCONT
611 signal.
612 .IP
613 This flag was
614 .I deprecated
615 from Linux 2.6.25 onward,
616 and was
617 .I removed
618 altogether in Linux 2.6.38.
619 Since then, the kernel silently ignores it without error.
620 .\" glibc 2.8 removed this defn from bits/sched.h
621 Starting with Linux 4.6, the same bit was reused for the
622 .BR CLONE_NEWCGROUP
623 flag.
624 .TP
625 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
626 If
627 .B CLONE_SYSVSEM
628 is set, then the child and the calling process share
629 a single list of System V semaphore adjustment
630 .RI ( semadj )
631 values (see
632 .BR semop (2)).
633 In this case, the shared list accumulates
634 .I semadj
635 values across all processes sharing the list,
636 and semaphore adjustments are performed only when the last process
637 that is sharing the list terminates (or ceases sharing the list using
638 .BR unshare (2)).
639 If this flag is not set, then the child has a separate
640 .I semadj
641 list that is initially empty.
642 .TP
643 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
644 If
645 .B CLONE_THREAD
646 is set, the child is placed in the same thread group as the calling process.
647 To make the remainder of the discussion of
648 .B CLONE_THREAD
649 more readable, the term "thread" is used to refer to the
650 processes within a thread group.
651 .IP
652 Thread groups were a feature added in Linux 2.4 to support the
653 POSIX threads notion of a set of threads that share a single PID.
654 Internally, this shared PID is the so-called
655 thread group identifier (TGID) for the thread group.
656 Since Linux 2.4, calls to
657 .BR getpid (2)
658 return the TGID of the caller.
659 .IP
660 The threads within a group can be distinguished by their (system-wide)
661 unique thread IDs (TID).
662 A new thread's TID is available as the function result
663 returned to the caller of
664 .BR clone (),
665 and a thread can obtain
666 its own TID using
667 .BR gettid (2).
668 .IP
669 When a call is made to
670 .BR clone ()
671 without specifying
672 .BR CLONE_THREAD ,
673 then the resulting thread is placed in a new thread group
674 whose TGID is the same as the thread's TID.
675 This thread is the
676 .I leader
677 of the new thread group.
678 .IP
679 A new thread created with
680 .B CLONE_THREAD
681 has the same parent process as the caller of
682 .BR clone ()
683 (i.e., like
684 .BR CLONE_PARENT ),
685 so that calls to
686 .BR getppid (2)
687 return the same value for all of the threads in a thread group.
688 When a
689 .B CLONE_THREAD
690 thread terminates, the thread that created it using
691 .BR clone ()
692 is not sent a
693 .B SIGCHLD
694 (or other termination) signal;
695 nor can the status of such a thread be obtained
696 using
697 .BR wait (2).
698 (The thread is said to be
699 .IR detached .)
700 .IP
701 After all of the threads in a thread group terminate
702 the parent process of the thread group is sent a
703 .B SIGCHLD
704 (or other termination) signal.
705 .IP
706 If any of the threads in a thread group performs an
707 .BR execve (2),
708 then all threads other than the thread group leader are terminated,
709 and the new program is executed in the thread group leader.
710 .IP
711 If one of the threads in a thread group creates a child using
712 .BR fork (2),
713 then any thread in the group can
714 .BR wait (2)
715 for that child.
716 .IP
717 Since Linux 2.5.35,
718 .I flags
719 must also include
720 .B CLONE_SIGHAND
721 if
722 .B CLONE_THREAD
723 is specified
724 (and note that, since Linux 2.6.0-test6,
725 .BR CLONE_SIGHAND
726 also requires
727 .BR CLONE_VM
728 to be included).
729 .IP
730 Signals may be sent to a thread group as a whole (i.e., a TGID) using
731 .BR kill (2),
732 or to a specific thread (i.e., TID) using
733 .BR tgkill (2).
734 .IP
735 Signal dispositions and actions are process-wide:
736 if an unhandled signal is delivered to a thread, then
737 it will affect (terminate, stop, continue, be ignored in)
738 all members of the thread group.
739 .IP
740 Each thread has its own signal mask, as set by
741 .BR sigprocmask (2),
742 but signals can be pending either: for the whole process
743 (i.e., deliverable to any member of the thread group),
744 when sent with
745 .BR kill (2);
746 or for an individual thread, when sent with
747 .BR tgkill (2).
748 A call to
749 .BR sigpending (2)
750 returns a signal set that is the union of the signals pending for the
751 whole process and the signals that are pending for the calling thread.
752 .IP
753 If
754 .BR kill (2)
755 is used to send a signal to a thread group,
756 and the thread group has installed a handler for the signal, then
757 the handler will be invoked in exactly one, arbitrarily selected
758 member of the thread group that has not blocked the signal.
759 If multiple threads in a group are waiting to accept the same signal using
760 .BR sigwaitinfo (2),
761 the kernel will arbitrarily select one of these threads
762 to receive a signal sent using
763 .BR kill (2).
764 .TP
765 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
766 If
767 .B CLONE_UNTRACED
768 is specified, then a tracing process cannot force
769 .B CLONE_PTRACE
770 on this child process.
771 .TP
772 .BR CLONE_VFORK " (since Linux 2.2)"
773 If
774 .B CLONE_VFORK
775 is set, the execution of the calling process is suspended
776 until the child releases its virtual memory
777 resources via a call to
778 .BR execve (2)
779 or
780 .BR _exit (2)
781 (as with
782 .BR vfork (2)).
783 .IP
784 If
785 .B CLONE_VFORK
786 is not set, then both the calling process and the child are schedulable
787 after the call, and an application should not rely on execution occurring
788 in any particular order.
789 .TP
790 .BR CLONE_VM " (since Linux 2.0)"
791 If
792 .B CLONE_VM
793 is set, the calling process and the child process run in the same memory
794 space.
795 In particular, memory writes performed by the calling process
796 or by the child process are also visible in the other process.
797 Moreover, any memory mapping or unmapping performed with
798 .BR mmap (2)
799 or
800 .BR munmap (2)
801 by the child or calling process also affects the other process.
802 .IP
803 If
804 .B CLONE_VM
805 is not set, the child process runs in a separate copy of the memory
806 space of the calling process at the time of
807 .BR clone ().
808 Memory writes or file mappings/unmappings performed by one of the
809 processes do not affect the other, as with
810 .BR fork (2).
811 .SS C library/kernel differences
812 The raw
813 .BR clone ()
814 system call corresponds more closely to
815 .BR fork (2)
816 in that execution in the child continues from the point of the
817 call.
818 As such, the
819 .I fn
820 and
821 .I arg
822 arguments of the
823 .BR clone ()
824 wrapper function are omitted.
825 .PP
826 The order of the arguments also differs in the raw system call,
827 and there are variations in the arguments across architectures,
828 as detailed in the following paragraphs.
829 .PP
830 The raw system call interface on x86-64 and some other architectures
831 (including sh, tile, and alpha) is roughly:
832 .PP
833 .in +4
834 .EX
835 .BI "long clone(unsigned long " flags ", void *" child_stack ,
836 .BI " int *" ptid ", int *" ctid ,
837 .BI " unsigned long " newtls );
838 .EE
839 .in
840 .PP
841 On x86-32, and several other common architectures
842 (including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
843 and MIPS),
844 .\" CONFIG_CLONE_BACKWARDS
845 the order of the last two arguments is reversed:
846 .PP
847 .in +4
848 .EX
849 .BI "long clone(unsigned long " flags ", void *" child_stack ,
850 .BI " int *" ptid ", unsigned long " newtls ,
851 .BI " int *" ctid );
852 .EE
853 .in
854 .PP
855 On the cris and s390 architectures,
856 .\" CONFIG_CLONE_BACKWARDS2
857 the order of the first two arguments is reversed:
858 .PP
859 .in +4
860 .EX
861 .BI "long clone(void *" child_stack ", unsigned long " flags ,
862 .BI " int *" ptid ", int *" ctid ,
863 .BI " unsigned long " newtls );
864 .EE
865 .in
866 .PP
867 On the microblaze architecture,
868 .\" CONFIG_CLONE_BACKWARDS3
869 an additional argument is supplied:
870 .PP
871 .in +4
872 .EX
873 .BI "long clone(unsigned long " flags ", void *" child_stack ,
874 .BI " int " stack_size , "\fR /* Size of stack */"
875 .BI " int *" ptid ", int *" ctid ,
876 .BI " unsigned long " newtls );
877 .EE
878 .in
879 .PP
880 Another difference for the raw system call is that the
881 .I child_stack
882 argument may be zero, in which case copy-on-write semantics ensure that the
883 child gets separate copies of stack pages when either process modifies
884 the stack.
885 In this case, for correct operation, the
886 .B CLONE_VM
887 option should not be specified.
888 .\"
889 .SS blackfin, m68k, and sparc
890 .\" Mike Frysinger noted in a 2013 mail:
891 .\" these arches don't define __ARCH_WANT_SYS_CLONE:
892 .\" blackfin ia64 m68k sparc
893 The argument-passing conventions on
894 blackfin, m68k, and sparc are different from the descriptions above.
895 For details, see the kernel (and glibc) source.
896 .SS ia64
897 On ia64, a different interface is used:
898 .PP
899 .nf
900 .BI "int __clone2(int (*" "fn" ")(void *), "
901 .BI " void *" child_stack_base ", size_t " stack_size ,
902 .BI " int " flags ", void *" "arg" ", ... "
903 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
904 ", pid_t *" ctid " */ );"
905 .fi
906 .PP
907 The prototype shown above is for the glibc wrapper function;
908 the raw system call interface has no
909 .I fn
910 or
911 .I arg
912 argument, and changes the order of the arguments so that
913 .I flags
914 is the first argument, and
915 .I tls
916 is the last argument.
917 .PP
918 .BR __clone2 ()
919 operates in the same way as
920 .BR clone (),
921 except that
922 .I child_stack_base
923 points to the lowest address of the child's stack area,
924 and
925 .I stack_size
926 specifies the size of the stack pointed to by
927 .IR child_stack_base .
928 .SS Linux 2.4 and earlier
929 In Linux 2.4 and earlier,
930 .BR clone ()
931 does not take arguments
932 .IR ptid ,
933 .IR tls ,
934 and
935 .IR ctid .
936 .SH RETURN VALUE
937 .\" gettid(2) returns current->pid;
938 .\" getpid(2) returns current->tgid;
939 On success, the thread ID of the child process is returned
940 in the caller's thread of execution.
941 On failure, \-1 is returned
942 in the caller's context, no child process will be created, and
943 .I errno
944 will be set appropriately.
945 .SH ERRORS
946 .TP
947 .B EAGAIN
948 Too many processes are already running; see
949 .BR fork (2).
950 .TP
951 .B EINVAL
952 .B CLONE_SIGHAND
953 was specified, but
954 .B CLONE_VM
955 was not.
956 (Since Linux 2.6.0-test6.)
957 .TP
958 .B EINVAL
959 .B CLONE_THREAD
960 was specified, but
961 .B CLONE_SIGHAND
962 was not.
963 (Since Linux 2.5.35.)
964 .\" .TP
965 .\" .B EINVAL
966 .\" Precisely one of
967 .\" .B CLONE_DETACHED
968 .\" and
969 .\" .B CLONE_THREAD
970 .\" was specified.
971 .\" (Since Linux 2.6.0-test6.)
972 .TP
973 .B EINVAL
974 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
975 Both
976 .B CLONE_FS
977 and
978 .B CLONE_NEWNS
979 were specified in
980 .IR flags .
981 .TP
982 .BR EINVAL " (since Linux 3.9)"
983 Both
984 .B CLONE_NEWUSER
985 and
986 .B CLONE_FS
987 were specified in
988 .IR flags .
989 .TP
990 .B EINVAL
991 Both
992 .B CLONE_NEWIPC
993 and
994 .B CLONE_SYSVSEM
995 were specified in
996 .IR flags .
997 .TP
998 .B EINVAL
999 One (or both) of
1000 .BR CLONE_NEWPID
1001 or
1002 .BR CLONE_NEWUSER
1003 and one (or both) of
1004 .BR CLONE_THREAD
1005 or
1006 .BR CLONE_PARENT
1007 were specified in
1008 .IR flags .
1009 .TP
1010 .B EINVAL
1011 Returned by the glibc
1012 .BR clone ()
1013 wrapper function when
1014 .IR fn
1015 or
1016 .IR child_stack
1017 is specified as NULL.
1018 .TP
1019 .B EINVAL
1020 .BR CLONE_NEWIPC
1021 was specified in
1022 .IR flags ,
1023 but the kernel was not configured with the
1024 .B CONFIG_SYSVIPC
1025 and
1026 .BR CONFIG_IPC_NS
1027 options.
1028 .TP
1029 .B EINVAL
1030 .BR CLONE_NEWNET
1031 was specified in
1032 .IR flags ,
1033 but the kernel was not configured with the
1034 .B CONFIG_NET_NS
1035 option.
1036 .TP
1037 .B EINVAL
1038 .BR CLONE_NEWPID
1039 was specified in
1040 .IR flags ,
1041 but the kernel was not configured with the
1042 .B CONFIG_PID_NS
1043 option.
1044 .TP
1045 .B EINVAL
1046 .BR CLONE_NEWUTS
1047 was specified in
1048 .IR flags ,
1049 but the kernel was not configured with the
1050 .B CONFIG_UTS
1051 option.
1052 .TP
1053 .B EINVAL
1054 .I child_stack
1055 is not aligned to a suitable boundary for this architecture.
1056 For example, on aarch64,
1057 .I child_stack
1058 must be a multiple of 16.
1059 .TP
1060 .B ENOMEM
1061 Cannot allocate sufficient memory to allocate a task structure for the
1062 child, or to copy those parts of the caller's context that need to be
1063 copied.
1064 .TP
1065 .BR ENOSPC " (since Linux 3.7)"
1066 .\" commit f2302505775fd13ba93f034206f1e2a587017929
1067 .B CLONE_NEWPID
1068 was specified in flags,
1069 but the limit on the nesting depth of PID namespaces
1070 would have been exceeded; see
1071 .BR pid_namespaces (7).
1072 .TP
1073 .BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1074 .B CLONE_NEWUSER
1075 was specified in
1076 .IR flags ,
1077 and the call would cause the limit on the number of
1078 nested user namespaces to be exceeded.
1079 See
1080 .BR user_namespaces (7).
1081 .IP
1082 From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1083 .BR EUSERS .
1084 .TP
1085 .BR ENOSPC " (since Linux 4.9)"
1086 One of the values in
1087 .I flags
1088 specified the creation of a new user namespace,
1089 but doing so would have caused the limit defined by the corresponding file in
1090 .IR /proc/sys/user
1091 to be exceeded.
1092 For further details, see
1093 .BR namespaces (7).
1094 .TP
1095 .B EPERM
1096 .BR CLONE_NEWCGROUP ,
1097 .BR CLONE_NEWIPC ,
1098 .BR CLONE_NEWNET ,
1099 .BR CLONE_NEWNS ,
1100 .BR CLONE_NEWPID ,
1101 or
1102 .BR CLONE_NEWUTS
1103 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1104 .TP
1105 .B EPERM
1106 .B CLONE_PID
1107 was specified by a process other than process 0.
1108 .TP
1109 .B EPERM
1110 .BR CLONE_NEWUSER
1111 was specified in
1112 .IR flags ,
1113 but either the effective user ID or the effective group ID of the caller
1114 does not have a mapping in the parent namespace (see
1115 .BR user_namespaces (7)).
1116 .TP
1117 .BR EPERM " (since Linux 3.9)"
1118 .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1119 .B CLONE_NEWUSER
1120 was specified in
1121 .I flags
1122 and the caller is in a chroot environment
1123 .\" FIXME What is the rationale for this restriction?
1124 (i.e., the caller's root directory does not match the root directory
1125 of the mount namespace in which it resides).
1126 .TP
1127 .BR ERESTARTNOINTR " (since Linux 2.6.17)"
1128 .\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1129 System call was interrupted by a signal and will be restarted.
1130 (This can be seen only during a trace.)
1131 .TP
1132 .BR EUSERS " (Linux 3.11 to Linux 4.8)"
1133 .B CLONE_NEWUSER
1134 was specified in
1135 .IR flags ,
1136 and the limit on the number of nested user namespaces would be exceeded.
1137 See the discussion of the
1138 .BR ENOSPC
1139 error above.
1140 .\" .SH VERSIONS
1141 .\" There is no entry for
1142 .\" .BR clone ()
1143 .\" in libc5.
1144 .\" glibc2 provides
1145 .\" .BR clone ()
1146 .\" as described in this manual page.
1147 .SH CONFORMING TO
1148 .BR clone ()
1149 is Linux-specific and should not be used in programs
1150 intended to be portable.
1151 .SH NOTES
1152 The
1153 .BR kcmp (2)
1154 system call can be used to test whether two processes share various
1155 resources such as a file descriptor table,
1156 System V semaphore undo operations, or a virtual address space.
1157 .PP
1158 .PP
1159 Handlers registered using
1160 .BR pthread_atfork (3)
1161 are not executed during a call to
1162 .BR clone ().
1163 .PP
1164 In the Linux 2.4.x series,
1165 .B CLONE_THREAD
1166 generally does not make the parent of the new thread the same
1167 as the parent of the calling process.
1168 However, for kernel versions 2.4.7 to 2.4.18 the
1169 .B CLONE_THREAD
1170 flag implied the
1171 .B CLONE_PARENT
1172 flag (as in Linux 2.6.0 and later).
1173 .PP
1174 For a while there was
1175 .B CLONE_DETACHED
1176 (introduced in 2.5.32):
1177 parent wants no child-exit signal.
1178 In Linux 2.6.2, the need to give this flag together with
1179 .B CLONE_THREAD
1180 disappeared.
1181 This flag is still defined, but has no effect.
1182 .PP
1183 On i386,
1184 .BR clone ()
1185 should not be called through vsyscall, but directly through
1186 .IR "int $0x80" .
1187 .SH BUGS
1188 GNU C library versions 2.3.4 up to and including 2.24
1189 contained a wrapper function for
1190 .BR getpid (2)
1191 that performed caching of PIDs.
1192 This caching relied on support in the glibc wrapper for
1193 .BR clone (),
1194 but limitations in the implementation
1195 meant that the cache was not up to date in some circumstances.
1196 In particular,
1197 if a signal was delivered to the child immediately after the
1198 .BR clone ()
1199 call, then a call to
1200 .BR getpid (2)
1201 in a handler for the signal could return the PID
1202 of the calling process ("the parent"),
1203 if the clone wrapper had not yet had a chance to update the PID
1204 cache in the child.
1205 (This discussion ignores the case where the child was created using
1206 .BR CLONE_THREAD ,
1207 when
1208 .BR getpid (2)
1209 .I should
1210 return the same value in the child and in the process that called
1211 .BR clone (),
1212 since the caller and the child are in the same thread group.
1213 The stale-cache problem also does not occur if the
1214 .I flags
1215 argument includes
1216 .BR CLONE_VM .)
1217 To get the truth, it was sometimes necessary to use code such as the following:
1218 .PP
1219 .in +4n
1220 .EX
1221 #include <syscall.h>
1222
1223 pid_t mypid;
1224
1225 mypid = syscall(SYS_getpid);
1226 .EE
1227 .in
1228 .\" See also the following bug reports
1229 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1230 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1231 .PP
1232 Because of the stale-cache problem, as well as other problems noted in
1233 .BR getpid (2),
1234 the PID caching feature was removed in glibc 2.25.
1235 .SH EXAMPLE
1236 The following program demonstrates the use of
1237 .BR clone ()
1238 to create a child process that executes in a separate UTS namespace.
1239 The child changes the hostname in its UTS namespace.
1240 Both parent and child then display the system hostname,
1241 making it possible to see that the hostname
1242 differs in the UTS namespaces of the parent and child.
1243 For an example of the use of this program, see
1244 .BR setns (2).
1245 .SS Program source
1246 .EX
1247 #define _GNU_SOURCE
1248 #include <sys/wait.h>
1249 #include <sys/utsname.h>
1250 #include <sched.h>
1251 #include <string.h>
1252 #include <stdio.h>
1253 #include <stdlib.h>
1254 #include <unistd.h>
1255
1256 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1257 } while (0)
1258
1259 static int /* Start function for cloned child */
1260 childFunc(void *arg)
1261 {
1262 struct utsname uts;
1263
1264 /* Change hostname in UTS namespace of child */
1265
1266 if (sethostname(arg, strlen(arg)) == \-1)
1267 errExit("sethostname");
1268
1269 /* Retrieve and display hostname */
1270
1271 if (uname(&uts) == \-1)
1272 errExit("uname");
1273 printf("uts.nodename in child: %s\\n", uts.nodename);
1274
1275 /* Keep the namespace open for a while, by sleeping.
1276 This allows some experimentation\-\-for example, another
1277 process might join the namespace. */
1278
1279 sleep(200);
1280
1281 return 0; /* Child terminates now */
1282 }
1283
1284 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1285
1286 int
1287 main(int argc, char *argv[])
1288 {
1289 char *stack; /* Start of stack buffer */
1290 char *stackTop; /* End of stack buffer */
1291 pid_t pid;
1292 struct utsname uts;
1293
1294 if (argc < 2) {
1295 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1296 exit(EXIT_SUCCESS);
1297 }
1298
1299 /* Allocate stack for child */
1300
1301 stack = malloc(STACK_SIZE);
1302 if (stack == NULL)
1303 errExit("malloc");
1304 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1305
1306 /* Create child that has its own UTS namespace;
1307 child commences execution in childFunc() */
1308
1309 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1310 if (pid == \-1)
1311 errExit("clone");
1312 printf("clone() returned %ld\\n", (long) pid);
1313
1314 /* Parent falls through to here */
1315
1316 sleep(1); /* Give child time to change its hostname */
1317
1318 /* Display hostname in parent\(aqs UTS namespace. This will be
1319 different from hostname in child\(aqs UTS namespace. */
1320
1321 if (uname(&uts) == \-1)
1322 errExit("uname");
1323 printf("uts.nodename in parent: %s\\n", uts.nodename);
1324
1325 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1326 errExit("waitpid");
1327 printf("child has terminated\\n");
1328
1329 exit(EXIT_SUCCESS);
1330 }
1331 .EE
1332 .SH SEE ALSO
1333 .BR fork (2),
1334 .BR futex (2),
1335 .BR getpid (2),
1336 .BR gettid (2),
1337 .BR kcmp (2),
1338 .BR set_thread_area (2),
1339 .BR set_tid_address (2),
1340 .BR setns (2),
1341 .BR tkill (2),
1342 .BR unshare (2),
1343 .BR wait (2),
1344 .BR capabilities (7),
1345 .BR namespaces (7),
1346 .BR pthreads (7)