]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
_syscall.2, clock_getres.2, clone.2, copy_file_range.2, create_module.2, delete_modul...
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2017-05-03 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48 .PP
49 .B #define _GNU_SOURCE
50 .B #include <sched.h>
51 .PP
52 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53 .BI " int " flags ", void *" "arg" ", ... "
54 .BI " /* pid_t *" ptid ", void *" newtls \
55 ", pid_t *" ctid " */ );"
56 .PP
57 /* For the prototype of the raw system call, see NOTES */
58 .fi
59 .SH DESCRIPTION
60 .BR clone ()
61 creates a new process, in a manner similar to
62 .BR fork (2).
63
64 This page describes both the glibc
65 .BR clone ()
66 wrapper function and the underlying system call on which it is based.
67 The main text describes the wrapper function;
68 the differences for the raw system call
69 are described toward the end of this page.
70
71 Unlike
72 .BR fork (2),
73 .BR clone ()
74 allows the child process to share parts of its execution context with
75 the calling process, such as the memory space, the table of file
76 descriptors, and the table of signal handlers.
77 (Note that on this manual
78 page, "calling process" normally corresponds to "parent process".
79 But see the description of
80 .B CLONE_PARENT
81 below.)
82
83 One use of
84 .BR clone ()
85 is to implement threads: multiple threads of control in a program that
86 run concurrently in a shared memory space.
87
88 When the child process is created with
89 .BR clone (),
90 it executes the function
91 .IR fn ( arg ).
92 (This differs from
93 .BR fork (2),
94 where execution continues in the child from the point
95 of the
96 .BR fork (2)
97 call.)
98 The
99 .I fn
100 argument is a pointer to a function that is called by the child
101 process at the beginning of its execution.
102 The
103 .I arg
104 argument is passed to the
105 .I fn
106 function.
107
108 When the
109 .IR fn ( arg )
110 function application returns, the child process terminates.
111 The integer returned by
112 .I fn
113 is the exit code for the child process.
114 The child process may also terminate explicitly by calling
115 .BR exit (2)
116 or after receiving a fatal signal.
117
118 The
119 .I child_stack
120 argument specifies the location of the stack used by the child process.
121 Since the child and calling process may share memory,
122 it is not possible for the child process to execute in the
123 same stack as the calling process.
124 The calling process must therefore
125 set up memory space for the child stack and pass a pointer to this
126 space to
127 .BR clone ().
128 Stacks grow downward on all processors that run Linux
129 (except the HP PA processors), so
130 .I child_stack
131 usually points to the topmost address of the memory space set up for
132 the child stack.
133
134 The low byte of
135 .I flags
136 contains the number of the
137 .I "termination signal"
138 sent to the parent when the child dies.
139 If this signal is specified as anything other than
140 .BR SIGCHLD ,
141 then the parent process must specify the
142 .B __WALL
143 or
144 .B __WCLONE
145 options when waiting for the child with
146 .BR wait (2).
147 If no signal is specified, then the parent process is not signaled
148 when the child terminates.
149
150 .I flags
151 may also be bitwise-or'ed with zero or more of the following constants,
152 in order to specify what is shared between the calling process
153 and the child process:
154 .TP
155 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
156 Clear (zero) the child thread ID at the location
157 .I ctid
158 in child memory when the child exits, and do a wakeup on the futex
159 at that address.
160 The address involved may be changed by the
161 .BR set_tid_address (2)
162 system call.
163 This is used by threading libraries.
164 .TP
165 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
166 Store the child thread ID at the location
167 .I ctid
168 in the child's memory.
169 The store operation completes before
170 .BR clone ()
171 returns control to user space.
172 .TP
173 .BR CLONE_FILES " (since Linux 2.0)"
174 If
175 .B CLONE_FILES
176 is set, the calling process and the child process share the same file
177 descriptor table.
178 Any file descriptor created by the calling process or by the child
179 process is also valid in the other process.
180 Similarly, if one of the processes closes a file descriptor,
181 or changes its associated flags (using the
182 .BR fcntl (2)
183 .B F_SETFD
184 operation), the other process is also affected.
185 If a process sharing a file descriptor table calls
186 .BR execve (2),
187 its file descriptor table is duplicated (unshared).
188
189 If
190 .B CLONE_FILES
191 is not set, the child process inherits a copy of all file descriptors
192 opened in the calling process at the time of
193 .BR clone ().
194 Subsequent operations that open or close file descriptors,
195 or change file descriptor flags,
196 performed by either the calling
197 process or the child process do not affect the other process.
198 Note, however,
199 that the duplicated file descriptors in the child refer to the same open file
200 descriptions as the corresponding file descriptors in the calling process,
201 and thus share file offsets and file status flags (see
202 .BR open (2)).
203 .TP
204 .BR CLONE_FS " (since Linux 2.0)"
205 If
206 .B CLONE_FS
207 is set, the caller and the child process share the same filesystem
208 information.
209 This includes the root of the filesystem, the current
210 working directory, and the umask.
211 Any call to
212 .BR chroot (2),
213 .BR chdir (2),
214 or
215 .BR umask (2)
216 performed by the calling process or the child process also affects the
217 other process.
218
219 If
220 .B CLONE_FS
221 is not set, the child process works on a copy of the filesystem
222 information of the calling process at the time of the
223 .BR clone ()
224 call.
225 Calls to
226 .BR chroot (2),
227 .BR chdir (2),
228 .BR umask (2)
229 performed later by one of the processes do not affect the other process.
230 .TP
231 .BR CLONE_IO " (since Linux 2.6.25)"
232 If
233 .B CLONE_IO
234 is set, then the new process shares an I/O context with
235 the calling process.
236 If this flag is not set, then (as with
237 .BR fork (2))
238 the new process has its own I/O context.
239
240 .\" The following based on text from Jens Axboe
241 The I/O context is the I/O scope of the disk scheduler (i.e.,
242 what the I/O scheduler uses to model scheduling of a process's I/O).
243 If processes share the same I/O context,
244 they are treated as one by the I/O scheduler.
245 As a consequence, they get to share disk time.
246 For some I/O schedulers,
247 .\" the anticipatory and CFQ scheduler
248 if two processes share an I/O context,
249 they will be allowed to interleave their disk access.
250 If several threads are doing I/O on behalf of the same process
251 .RB ( aio_read (3),
252 for instance), they should employ
253 .BR CLONE_IO
254 to get better I/O performance.
255 .\" with CFQ and AS.
256
257 If the kernel is not configured with the
258 .B CONFIG_BLOCK
259 option, this flag is a no-op.
260 .TP
261 .BR CLONE_NEWCGROUP " (since Linux 4.6)"
262 Create the process in a new cgroup namespace.
263 If this flag is not set, then (as with
264 .BR fork (2))
265 the process is created in the same cgroup namespaces as the calling process.
266 This flag is intended for the implementation of containers.
267
268 For further information on cgroup namespaces, see
269 .BR cgroup_namespaces (7).
270
271 Only a privileged process
272 .RB ( CAP_SYS_ADMIN )
273 can employ
274 .BR CLONE_NEWCGROUP .
275 .\"
276 .TP
277 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
278 If
279 .B CLONE_NEWIPC
280 is set, then create the process in a new IPC namespace.
281 If this flag is not set, then (as with
282 .BR fork (2)),
283 the process is created in the same IPC namespace as
284 the calling process.
285 This flag is intended for the implementation of containers.
286
287 An IPC namespace provides an isolated view of System\ V IPC objects (see
288 .BR svipc (7))
289 and (since Linux 2.6.30)
290 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
291 .\" https://lwn.net/Articles/312232/
292 POSIX message queues
293 (see
294 .BR mq_overview (7)).
295 The common characteristic of these IPC mechanisms is that IPC
296 objects are identified by mechanisms other than filesystem
297 pathnames.
298
299 Objects created in an IPC namespace are visible to all other processes
300 that are members of that namespace,
301 but are not visible to processes in other IPC namespaces.
302
303 When an IPC namespace is destroyed
304 (i.e., when the last process that is a member of the namespace terminates),
305 all IPC objects in the namespace are automatically destroyed.
306
307 Only a privileged process
308 .RB ( CAP_SYS_ADMIN )
309 can employ
310 .BR CLONE_NEWIPC .
311 This flag can't be specified in conjunction with
312 .BR CLONE_SYSVSEM .
313
314 For further information on IPC namespaces, see
315 .BR namespaces (7).
316 .TP
317 .BR CLONE_NEWNET " (since Linux 2.6.24)"
318 (The implementation of this flag was completed only
319 by about kernel version 2.6.29.)
320
321 If
322 .B CLONE_NEWNET
323 is set, then create the process in a new network namespace.
324 If this flag is not set, then (as with
325 .BR fork (2))
326 the process is created in the same network namespace as
327 the calling process.
328 This flag is intended for the implementation of containers.
329
330 A network namespace provides an isolated view of the networking stack
331 (network device interfaces, IPv4 and IPv6 protocol stacks,
332 IP routing tables, firewall rules, the
333 .I /proc/net
334 and
335 .I /sys/class/net
336 directory trees, sockets, etc.).
337 A physical network device can live in exactly one
338 network namespace.
339 A virtual network device ("veth") pair provides a pipe-like abstraction
340 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
341 that can be used to create tunnels between network namespaces,
342 and can be used to create a bridge to a physical network device
343 in another namespace.
344
345 When a network namespace is freed
346 (i.e., when the last process in the namespace terminates),
347 its physical network devices are moved back to the
348 initial network namespace (not to the parent of the process).
349 For further information on network namespaces, see
350 .BR namespaces (7).
351
352 Only a privileged process
353 .RB ( CAP_SYS_ADMIN )
354 can employ
355 .BR CLONE_NEWNET .
356 .TP
357 .BR CLONE_NEWNS " (since Linux 2.4.19)"
358 If
359 .B CLONE_NEWNS
360 is set, the cloned child is started in a new mount namespace,
361 initialized with a copy of the namespace of the parent.
362 If
363 .B CLONE_NEWNS
364 is not set, the child lives in the same mount
365 namespace as the parent.
366
367 Only a privileged process
368 .RB ( CAP_SYS_ADMIN )
369 can employ
370 .BR CLONE_NEWNS .
371 It is not permitted to specify both
372 .B CLONE_NEWNS
373 and
374 .B CLONE_FS
375 .\" See https://lwn.net/Articles/543273/
376 in the same
377 .BR clone ()
378 call.
379
380 For further information on mount namespaces, see
381 .BR namespaces (7)
382 and
383 .BR mount_namespaces (7).
384 .TP
385 .BR CLONE_NEWPID " (since Linux 2.6.24)"
386 .\" This explanation draws a lot of details from
387 .\" http://lwn.net/Articles/259217/
388 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
389 .\" and Kir Kolyshkin <kir@openvz.org>
390 .\"
391 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
392 .\" Author: Pavel Emelyanov <xemul@openvz.org>
393 If
394 .B CLONE_NEWPID
395 is set, then create the process in a new PID namespace.
396 If this flag is not set, then (as with
397 .BR fork (2))
398 the process is created in the same PID namespace as
399 the calling process.
400 This flag is intended for the implementation of containers.
401
402 For further information on PID namespaces, see
403 .BR namespaces (7)
404 and
405 .BR pid_namespaces (7).
406
407 Only a privileged process
408 .RB ( CAP_SYS_ADMIN )
409 can employ
410 .BR CLONE_NEWPID .
411 This flag can't be specified in conjunction with
412 .BR CLONE_THREAD
413 or
414 .BR CLONE_PARENT .
415 .TP
416 .BR CLONE_NEWUSER
417 (This flag first became meaningful for
418 .BR clone ()
419 in Linux 2.6.23,
420 the current
421 .BR clone ()
422 semantics were merged in Linux 3.5,
423 and the final pieces to make the user namespaces completely usable were
424 merged in Linux 3.8.)
425
426 If
427 .B CLONE_NEWUSER
428 is set, then create the process in a new user namespace.
429 If this flag is not set, then (as with
430 .BR fork (2))
431 the process is created in the same user namespace as the calling process.
432
433 For further information on user namespaces, see
434 .BR namespaces (7)
435 and
436 .BR user_namespaces (7)
437
438 Before Linux 3.8, use of
439 .BR CLONE_NEWUSER
440 required that the caller have three capabilities:
441 .BR CAP_SYS_ADMIN ,
442 .BR CAP_SETUID ,
443 and
444 .BR CAP_SETGID .
445 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
446 Starting with Linux 3.8,
447 no privileges are needed to create a user namespace.
448
449 This flag can't be specified in conjunction with
450 .BR CLONE_THREAD
451 or
452 .BR CLONE_PARENT .
453 For security reasons,
454 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
455 .\" https://lwn.net/Articles/543273/
456 .\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
457 .\" were, for practical purposes, unusable in earlier 3.8.x because of the
458 .\" various filesystems that didn't support userns.
459 .BR CLONE_NEWUSER
460 cannot be specified in conjunction with
461 .BR CLONE_FS .
462
463 For further information on user namespaces, see
464 .BR user_namespaces (7).
465 .TP
466 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
467 If
468 .B CLONE_NEWUTS
469 is set, then create the process in a new UTS namespace,
470 whose identifiers are initialized by duplicating the identifiers
471 from the UTS namespace of the calling process.
472 If this flag is not set, then (as with
473 .BR fork (2))
474 the process is created in the same UTS namespace as
475 the calling process.
476 This flag is intended for the implementation of containers.
477
478 A UTS namespace is the set of identifiers returned by
479 .BR uname (2);
480 among these, the domain name and the hostname can be modified by
481 .BR setdomainname (2)
482 and
483 .BR sethostname (2),
484 respectively.
485 Changes made to the identifiers in a UTS namespace
486 are visible to all other processes in the same namespace,
487 but are not visible to processes in other UTS namespaces.
488
489 Only a privileged process
490 .RB ( CAP_SYS_ADMIN )
491 can employ
492 .BR CLONE_NEWUTS .
493
494 For further information on UTS namespaces, see
495 .BR namespaces (7).
496 .TP
497 .BR CLONE_PARENT " (since Linux 2.3.12)"
498 If
499 .B CLONE_PARENT
500 is set, then the parent of the new child (as returned by
501 .BR getppid (2))
502 will be the same as that of the calling process.
503
504 If
505 .B CLONE_PARENT
506 is not set, then (as with
507 .BR fork (2))
508 the child's parent is the calling process.
509
510 Note that it is the parent process, as returned by
511 .BR getppid (2),
512 which is signaled when the child terminates, so that
513 if
514 .B CLONE_PARENT
515 is set, then the parent of the calling process, rather than the
516 calling process itself, will be signaled.
517 .TP
518 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
519 Store the child thread ID at the location
520 .I ptid
521 in the parent's memory.
522 (In Linux 2.5.32-2.5.48 there was a flag
523 .B CLONE_SETTID
524 that did this.)
525 The store operation completes before
526 .BR clone ()
527 returns control to user space.
528 .TP
529 .BR CLONE_PID " (obsolete)"
530 If
531 .B CLONE_PID
532 is set, the child process is created with the same process ID as
533 the calling process.
534 This is good for hacking the system, but otherwise
535 of not much use.
536 Since 2.3.21 this flag can be
537 specified only by the system boot process (PID 0).
538 It disappeared in Linux 2.5.16.
539 Since then, the kernel silently ignores it without error.
540 .TP
541 .BR CLONE_PTRACE " (since Linux 2.2)"
542 If
543 .B CLONE_PTRACE
544 is specified, and the calling process is being traced,
545 then trace the child also (see
546 .BR ptrace (2)).
547 .TP
548 .BR CLONE_SETTLS " (since Linux 2.5.32)"
549 The TLS (Thread Local Storage) descriptor is set to
550 .I newtls.
551
552 The interpretation of
553 .I newtls
554 and the resulting effect is architecture dependent.
555 On x86,
556 .I newtls
557 is interpreted as a
558 .IR "struct user_desc *"
559 (See
560 .BR set_thread_area (2)).
561 On x86_64 it is the new value to be set for the %fs base register
562 (See the
563 .I ARCH_SET_FS
564 argument to
565 .BR arch_prctl (2)).
566 On architectures with a dedicated TLS register, it is the new value
567 of that register.
568 .TP
569 .BR CLONE_SIGHAND " (since Linux 2.0)"
570 If
571 .B CLONE_SIGHAND
572 is set, the calling process and the child process share the same table of
573 signal handlers.
574 If the calling process or child process calls
575 .BR sigaction (2)
576 to change the behavior associated with a signal, the behavior is
577 changed in the other process as well.
578 However, the calling process and child
579 processes still have distinct signal masks and sets of pending
580 signals.
581 So, one of them may block or unblock some signals using
582 .BR sigprocmask (2)
583 without affecting the other process.
584
585 If
586 .B CLONE_SIGHAND
587 is not set, the child process inherits a copy of the signal handlers
588 of the calling process at the time
589 .BR clone ()
590 is called.
591 Calls to
592 .BR sigaction (2)
593 performed later by one of the processes have no effect on the other
594 process.
595
596 Since Linux 2.6.0-test6,
597 .I flags
598 must also include
599 .B CLONE_VM
600 if
601 .B CLONE_SIGHAND
602 is specified
603 .TP
604 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
605 If
606 .B CLONE_STOPPED
607 is set, then the child is initially stopped (as though it was sent a
608 .B SIGSTOP
609 signal), and must be resumed by sending it a
610 .B SIGCONT
611 signal.
612
613 This flag was
614 .I deprecated
615 from Linux 2.6.25 onward,
616 and was
617 .I removed
618 altogether in Linux 2.6.38.
619 Since then, the kernel silently ignores it without error.
620 .\" glibc 2.8 removed this defn from bits/sched.h
621 Starting with Linux 4.6, the same bit was reused for the
622 .BR CLONE_NEWCGROUP
623 flag.
624 .TP
625 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
626 If
627 .B CLONE_SYSVSEM
628 is set, then the child and the calling process share
629 a single list of System V semaphore adjustment
630 .RI ( semadj )
631 values (see
632 .BR semop (2)).
633 In this case, the shared list accumulates
634 .I semadj
635 values across all processes sharing the list,
636 and semaphore adjustments are performed only when the last process
637 that is sharing the list terminates (or ceases sharing the list using
638 .BR unshare (2)).
639 If this flag is not set, then the child has a separate
640 .I semadj
641 list that is initially empty.
642 .TP
643 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
644 If
645 .B CLONE_THREAD
646 is set, the child is placed in the same thread group as the calling process.
647 To make the remainder of the discussion of
648 .B CLONE_THREAD
649 more readable, the term "thread" is used to refer to the
650 processes within a thread group.
651
652 Thread groups were a feature added in Linux 2.4 to support the
653 POSIX threads notion of a set of threads that share a single PID.
654 Internally, this shared PID is the so-called
655 thread group identifier (TGID) for the thread group.
656 Since Linux 2.4, calls to
657 .BR getpid (2)
658 return the TGID of the caller.
659
660 The threads within a group can be distinguished by their (system-wide)
661 unique thread IDs (TID).
662 A new thread's TID is available as the function result
663 returned to the caller of
664 .BR clone (),
665 and a thread can obtain
666 its own TID using
667 .BR gettid (2).
668
669 When a call is made to
670 .BR clone ()
671 without specifying
672 .BR CLONE_THREAD ,
673 then the resulting thread is placed in a new thread group
674 whose TGID is the same as the thread's TID.
675 This thread is the
676 .I leader
677 of the new thread group.
678
679 A new thread created with
680 .B CLONE_THREAD
681 has the same parent process as the caller of
682 .BR clone ()
683 (i.e., like
684 .BR CLONE_PARENT ),
685 so that calls to
686 .BR getppid (2)
687 return the same value for all of the threads in a thread group.
688 When a
689 .B CLONE_THREAD
690 thread terminates, the thread that created it using
691 .BR clone ()
692 is not sent a
693 .B SIGCHLD
694 (or other termination) signal;
695 nor can the status of such a thread be obtained
696 using
697 .BR wait (2).
698 (The thread is said to be
699 .IR detached .)
700
701 After all of the threads in a thread group terminate
702 the parent process of the thread group is sent a
703 .B SIGCHLD
704 (or other termination) signal.
705
706 If any of the threads in a thread group performs an
707 .BR execve (2),
708 then all threads other than the thread group leader are terminated,
709 and the new program is executed in the thread group leader.
710
711 If one of the threads in a thread group creates a child using
712 .BR fork (2),
713 then any thread in the group can
714 .BR wait (2)
715 for that child.
716
717 Since Linux 2.5.35,
718 .I flags
719 must also include
720 .B CLONE_SIGHAND
721 if
722 .B CLONE_THREAD
723 is specified
724 (and note that, since Linux 2.6.0-test6,
725 .BR CLONE_SIGHAND
726 also requires
727 .BR CLONE_VM
728 to be included).
729
730 Signals may be sent to a thread group as a whole (i.e., a TGID) using
731 .BR kill (2),
732 or to a specific thread (i.e., TID) using
733 .BR tgkill (2).
734
735 Signal dispositions and actions are process-wide:
736 if an unhandled signal is delivered to a thread, then
737 it will affect (terminate, stop, continue, be ignored in)
738 all members of the thread group.
739
740 Each thread has its own signal mask, as set by
741 .BR sigprocmask (2),
742 but signals can be pending either: for the whole process
743 (i.e., deliverable to any member of the thread group),
744 when sent with
745 .BR kill (2);
746 or for an individual thread, when sent with
747 .BR tgkill (2).
748 A call to
749 .BR sigpending (2)
750 returns a signal set that is the union of the signals pending for the
751 whole process and the signals that are pending for the calling thread.
752
753 If
754 .BR kill (2)
755 is used to send a signal to a thread group,
756 and the thread group has installed a handler for the signal, then
757 the handler will be invoked in exactly one, arbitrarily selected
758 member of the thread group that has not blocked the signal.
759 If multiple threads in a group are waiting to accept the same signal using
760 .BR sigwaitinfo (2),
761 the kernel will arbitrarily select one of these threads
762 to receive a signal sent using
763 .BR kill (2).
764 .TP
765 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
766 If
767 .B CLONE_UNTRACED
768 is specified, then a tracing process cannot force
769 .B CLONE_PTRACE
770 on this child process.
771 .TP
772 .BR CLONE_VFORK " (since Linux 2.2)"
773 If
774 .B CLONE_VFORK
775 is set, the execution of the calling process is suspended
776 until the child releases its virtual memory
777 resources via a call to
778 .BR execve (2)
779 or
780 .BR _exit (2)
781 (as with
782 .BR vfork (2)).
783
784 If
785 .B CLONE_VFORK
786 is not set, then both the calling process and the child are schedulable
787 after the call, and an application should not rely on execution occurring
788 in any particular order.
789 .TP
790 .BR CLONE_VM " (since Linux 2.0)"
791 If
792 .B CLONE_VM
793 is set, the calling process and the child process run in the same memory
794 space.
795 In particular, memory writes performed by the calling process
796 or by the child process are also visible in the other process.
797 Moreover, any memory mapping or unmapping performed with
798 .BR mmap (2)
799 or
800 .BR munmap (2)
801 by the child or calling process also affects the other process.
802
803 If
804 .B CLONE_VM
805 is not set, the child process runs in a separate copy of the memory
806 space of the calling process at the time of
807 .BR clone ().
808 Memory writes or file mappings/unmappings performed by one of the
809 processes do not affect the other, as with
810 .BR fork (2).
811 .SS C library/kernel differences
812 The raw
813 .BR clone ()
814 system call corresponds more closely to
815 .BR fork (2)
816 in that execution in the child continues from the point of the
817 call.
818 As such, the
819 .I fn
820 and
821 .I arg
822 arguments of the
823 .BR clone ()
824 wrapper function are omitted.
825 Furthermore, the argument order changes.
826 In addition, there are variations across architectures.
827
828 The raw system call interface on x86-64 and some other architectures
829 (including sh, tile, and alpha) is roughly:
830
831 .in +4
832 .nf
833 .BI "long clone(unsigned long " flags ", void *" child_stack ,
834 .BI " int *" ptid ", int *" ctid ,
835 .BI " unsigned long " newtls );
836 .fi
837 .in
838
839 On x86-32, and several other common architectures
840 (including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
841 and MIPS),
842 .\" CONFIG_CLONE_BACKWARDS
843 the order of the last two arguments is reversed:
844
845 .in +4
846 .nf
847 .BI "long clone(unsigned long " flags ", void *" child_stack ,
848 .BI " int *" ptid ", unsigned long " newtls ,
849 .BI " int *" ctid );
850 .fi
851 .in
852
853 On the cris and s390 architectures,
854 .\" CONFIG_CLONE_BACKWARDS2
855 the order of the first two arguments is reversed:
856
857 .in +4
858 .nf
859 .BI "long clone(void *" child_stack ", unsigned long " flags ,
860 .BI " int *" ptid ", int *" ctid ,
861 .BI " unsigned long " newtls );
862 .fi
863 .in
864
865 On the microblaze architecture,
866 .\" CONFIG_CLONE_BACKWARDS3
867 an additional argument is supplied:
868
869 .in +4
870 .nf
871 .BI "long clone(unsigned long " flags ", void *" child_stack ,
872 .BI " int " stack_size , "\fR /* Size of stack */"
873 .BI " int *" ptid ", int *" ctid ,
874 .BI " unsigned long " newtls );
875 .fi
876 .in
877
878 Another difference for the raw system call is that the
879 .I child_stack
880 argument may be zero, in which case copy-on-write semantics ensure that the
881 child gets separate copies of stack pages when either process modifies
882 the stack.
883 In this case, for correct operation, the
884 .B CLONE_VM
885 option should not be specified.
886 .\"
887 .SS blackfin, m68k, and sparc
888 .\" Mike Frysinger noted in a 2013 mail:
889 .\" these arches don't define __ARCH_WANT_SYS_CLONE:
890 .\" blackfin ia64 m68k sparc
891 The argument-passing conventions on
892 blackfin, m68k, and sparc are different from the descriptions above.
893 For details, see the kernel (and glibc) source.
894 .SS ia64
895 On ia64, a different interface is used:
896 .nf
897
898 .BI "int __clone2(int (*" "fn" ")(void *), "
899 .BI " void *" child_stack_base ", size_t " stack_size ,
900 .BI " int " flags ", void *" "arg" ", ... "
901 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
902 ", pid_t *" ctid " */ );"
903 .fi
904 .PP
905 The prototype shown above is for the glibc wrapper function;
906 the raw system call interface has no
907 .I fn
908 or
909 .I arg
910 argument, and changes the order of the arguments so that
911 .I flags
912 is the first argument, and
913 .I tls
914 is the last argument.
915 .PP
916 .BR __clone2 ()
917 operates in the same way as
918 .BR clone (),
919 except that
920 .I child_stack_base
921 points to the lowest address of the child's stack area,
922 and
923 .I stack_size
924 specifies the size of the stack pointed to by
925 .IR child_stack_base .
926 .SS Linux 2.4 and earlier
927 In Linux 2.4 and earlier,
928 .BR clone ()
929 does not take arguments
930 .IR ptid ,
931 .IR tls ,
932 and
933 .IR ctid .
934 .SH RETURN VALUE
935 .\" gettid(2) returns current->pid;
936 .\" getpid(2) returns current->tgid;
937 On success, the thread ID of the child process is returned
938 in the caller's thread of execution.
939 On failure, \-1 is returned
940 in the caller's context, no child process will be created, and
941 .I errno
942 will be set appropriately.
943 .SH ERRORS
944 .TP
945 .B EAGAIN
946 Too many processes are already running; see
947 .BR fork (2).
948 .TP
949 .B EINVAL
950 .B CLONE_SIGHAND
951 was specified, but
952 .B CLONE_VM
953 was not.
954 (Since Linux 2.6.0-test6.)
955 .TP
956 .B EINVAL
957 .B CLONE_THREAD
958 was specified, but
959 .B CLONE_SIGHAND
960 was not.
961 (Since Linux 2.5.35.)
962 .\" .TP
963 .\" .B EINVAL
964 .\" Precisely one of
965 .\" .B CLONE_DETACHED
966 .\" and
967 .\" .B CLONE_THREAD
968 .\" was specified.
969 .\" (Since Linux 2.6.0-test6.)
970 .TP
971 .B EINVAL
972 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
973 Both
974 .B CLONE_FS
975 and
976 .B CLONE_NEWNS
977 were specified in
978 .IR flags .
979 .TP
980 .BR EINVAL " (since Linux 3.9)"
981 Both
982 .B CLONE_NEWUSER
983 and
984 .B CLONE_FS
985 were specified in
986 .IR flags .
987 .TP
988 .B EINVAL
989 Both
990 .B CLONE_NEWIPC
991 and
992 .B CLONE_SYSVSEM
993 were specified in
994 .IR flags .
995 .TP
996 .B EINVAL
997 One (or both) of
998 .BR CLONE_NEWPID
999 or
1000 .BR CLONE_NEWUSER
1001 and one (or both) of
1002 .BR CLONE_THREAD
1003 or
1004 .BR CLONE_PARENT
1005 were specified in
1006 .IR flags .
1007 .TP
1008 .B EINVAL
1009 Returned by the glibc
1010 .BR clone ()
1011 wrapper function when
1012 .IR fn
1013 or
1014 .IR child_stack
1015 is specified as NULL.
1016 .TP
1017 .B EINVAL
1018 .BR CLONE_NEWIPC
1019 was specified in
1020 .IR flags ,
1021 but the kernel was not configured with the
1022 .B CONFIG_SYSVIPC
1023 and
1024 .BR CONFIG_IPC_NS
1025 options.
1026 .TP
1027 .B EINVAL
1028 .BR CLONE_NEWNET
1029 was specified in
1030 .IR flags ,
1031 but the kernel was not configured with the
1032 .B CONFIG_NET_NS
1033 option.
1034 .TP
1035 .B EINVAL
1036 .BR CLONE_NEWPID
1037 was specified in
1038 .IR flags ,
1039 but the kernel was not configured with the
1040 .B CONFIG_PID_NS
1041 option.
1042 .TP
1043 .B EINVAL
1044 .BR CLONE_NEWUTS
1045 was specified in
1046 .IR flags ,
1047 but the kernel was not configured with the
1048 .B CONFIG_UTS
1049 option.
1050 .TP
1051 .B EINVAL
1052 .I child_stack
1053 is not aligned to a suitable boundary for this architecture.
1054 For example, on aarch64,
1055 .I child_stack
1056 must be a multiple of 16.
1057 .TP
1058 .B ENOMEM
1059 Cannot allocate sufficient memory to allocate a task structure for the
1060 child, or to copy those parts of the caller's context that need to be
1061 copied.
1062 .TP
1063 .BR ENOSPC " (since Linux 3.7)"
1064 .\" commit f2302505775fd13ba93f034206f1e2a587017929
1065 .B CLONE_NEWPID
1066 was specified in flags,
1067 but the limit on the nesting depth of PID namespaces
1068 would have been exceeded; see
1069 .BR pid_namespaces (7).
1070 .TP
1071 .BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1072 .B CLONE_NEWUSER
1073 was specified in
1074 .IR flags ,
1075 and the call would cause the limit on the number of
1076 nested user namespaces to be exceeded.
1077 See
1078 .BR user_namespaces (7).
1079
1080 From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1081 .BR EUSERS .
1082 .TP
1083 .BR ENOSPC " (since Linux 4.9)"
1084 One of the values in
1085 .I flags
1086 specified the creation of a new user namespace,
1087 but doing so would have caused the limit defined by the corresponding file in
1088 .IR /proc/sys/user
1089 to be exceeded.
1090 For further details, see
1091 .BR namespaces (7).
1092 .TP
1093 .B EPERM
1094 .BR CLONE_NEWCGROUP ,
1095 .BR CLONE_NEWIPC ,
1096 .BR CLONE_NEWNET ,
1097 .BR CLONE_NEWNS ,
1098 .BR CLONE_NEWPID ,
1099 or
1100 .BR CLONE_NEWUTS
1101 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
1102 .TP
1103 .B EPERM
1104 .B CLONE_PID
1105 was specified by a process other than process 0.
1106 .TP
1107 .B EPERM
1108 .BR CLONE_NEWUSER
1109 was specified in
1110 .IR flags ,
1111 but either the effective user ID or the effective group ID of the caller
1112 does not have a mapping in the parent namespace (see
1113 .BR user_namespaces (7)).
1114 .TP
1115 .BR EPERM " (since Linux 3.9)"
1116 .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1117 .B CLONE_NEWUSER
1118 was specified in
1119 .I flags
1120 and the caller is in a chroot environment
1121 .\" FIXME What is the rationale for this restriction?
1122 (i.e., the caller's root directory does not match the root directory
1123 of the mount namespace in which it resides).
1124 .TP
1125 .BR ERESTARTNOINTR " (since Linux 2.6.17)"
1126 .\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1127 System call was interrupted by a signal and will be restarted.
1128 (This can be seen only during a trace.)
1129 .TP
1130 .BR EUSERS " (Linux 3.11 to Linux 4.8)"
1131 .B CLONE_NEWUSER
1132 was specified in
1133 .IR flags ,
1134 and the limit on the number of nested user namespaces would be exceeded.
1135 See the discussion of the
1136 .BR ENOSPC
1137 error above.
1138 .\" .SH VERSIONS
1139 .\" There is no entry for
1140 .\" .BR clone ()
1141 .\" in libc5.
1142 .\" glibc2 provides
1143 .\" .BR clone ()
1144 .\" as described in this manual page.
1145 .SH CONFORMING TO
1146 .BR clone ()
1147 is Linux-specific and should not be used in programs
1148 intended to be portable.
1149 .SH NOTES
1150 The
1151 .BR kcmp (2)
1152 system call can be used to test whether two processes share various
1153 resources such as a file descriptor table,
1154 System V semaphore undo operations, or a virtual address space.
1155
1156
1157 Handlers registered using
1158 .BR pthread_atfork (3)
1159 are not executed during a call to
1160 .BR clone ().
1161
1162 In the Linux 2.4.x series,
1163 .B CLONE_THREAD
1164 generally does not make the parent of the new thread the same
1165 as the parent of the calling process.
1166 However, for kernel versions 2.4.7 to 2.4.18 the
1167 .B CLONE_THREAD
1168 flag implied the
1169 .B CLONE_PARENT
1170 flag (as in Linux 2.6.0 and later).
1171
1172 For a while there was
1173 .B CLONE_DETACHED
1174 (introduced in 2.5.32):
1175 parent wants no child-exit signal.
1176 In Linux 2.6.2, the need to give this flag together with
1177 .B CLONE_THREAD
1178 disappeared.
1179 This flag is still defined, but has no effect.
1180
1181 On i386,
1182 .BR clone ()
1183 should not be called through vsyscall, but directly through
1184 .IR "int $0x80" .
1185 .SH BUGS
1186 GNU C library versions 2.3.4 up to and including 2.24
1187 contained a wrapper function for
1188 .BR getpid (2)
1189 that performed caching of PIDs.
1190 This caching relied on support in the glibc wrapper for
1191 .BR clone (),
1192 but limitations in the implementation
1193 meant that the cache was not up to date in some circumstances.
1194 In particular,
1195 if a signal was delivered to the child immediately after the
1196 .BR clone ()
1197 call, then a call to
1198 .BR getpid (2)
1199 in a handler for the signal could return the PID
1200 of the calling process ("the parent"),
1201 if the clone wrapper had not yet had a chance to update the PID
1202 cache in the child.
1203 (This discussion ignores the case where the child was created using
1204 .BR CLONE_THREAD ,
1205 when
1206 .BR getpid (2)
1207 .I should
1208 return the same value in the child and in the process that called
1209 .BR clone (),
1210 since the caller and the child are in the same thread group.
1211 The stale-cache problem also does not occur if the
1212 .I flags
1213 argument includes
1214 .BR CLONE_VM .)
1215 To get the truth, it was sometimes necessary to use code such as the following:
1216 .PP
1217 .nf
1218 #include <syscall.h>
1219
1220 pid_t mypid;
1221
1222 mypid = syscall(SYS_getpid);
1223 .fi
1224 .\" See also the following bug reports
1225 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1226 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1227 .PP
1228 Because of the stale-cache problem, as well as other problems noted in
1229 .BR getpid (2),
1230 the PID caching feature was removed in glibc 2.25.
1231 .SH EXAMPLE
1232 The following program demonstrates the use of
1233 .BR clone ()
1234 to create a child process that executes in a separate UTS namespace.
1235 The child changes the hostname in its UTS namespace.
1236 Both parent and child then display the system hostname,
1237 making it possible to see that the hostname
1238 differs in the UTS namespaces of the parent and child.
1239 For an example of the use of this program, see
1240 .BR setns (2).
1241 .SS Program source
1242 .nf
1243 #define _GNU_SOURCE
1244 #include <sys/wait.h>
1245 #include <sys/utsname.h>
1246 #include <sched.h>
1247 #include <string.h>
1248 #include <stdio.h>
1249 #include <stdlib.h>
1250 #include <unistd.h>
1251
1252 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1253 } while (0)
1254
1255 static int /* Start function for cloned child */
1256 childFunc(void *arg)
1257 {
1258 struct utsname uts;
1259
1260 /* Change hostname in UTS namespace of child */
1261
1262 if (sethostname(arg, strlen(arg)) == \-1)
1263 errExit("sethostname");
1264
1265 /* Retrieve and display hostname */
1266
1267 if (uname(&uts) == \-1)
1268 errExit("uname");
1269 printf("uts.nodename in child: %s\\n", uts.nodename);
1270
1271 /* Keep the namespace open for a while, by sleeping.
1272 This allows some experimentation\-\-for example, another
1273 process might join the namespace. */
1274
1275 sleep(200);
1276
1277 return 0; /* Child terminates now */
1278 }
1279
1280 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1281
1282 int
1283 main(int argc, char *argv[])
1284 {
1285 char *stack; /* Start of stack buffer */
1286 char *stackTop; /* End of stack buffer */
1287 pid_t pid;
1288 struct utsname uts;
1289
1290 if (argc < 2) {
1291 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1292 exit(EXIT_SUCCESS);
1293 }
1294
1295 /* Allocate stack for child */
1296
1297 stack = malloc(STACK_SIZE);
1298 if (stack == NULL)
1299 errExit("malloc");
1300 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1301
1302 /* Create child that has its own UTS namespace;
1303 child commences execution in childFunc() */
1304
1305 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1306 if (pid == \-1)
1307 errExit("clone");
1308 printf("clone() returned %ld\\n", (long) pid);
1309
1310 /* Parent falls through to here */
1311
1312 sleep(1); /* Give child time to change its hostname */
1313
1314 /* Display hostname in parent\(aqs UTS namespace. This will be
1315 different from hostname in child\(aqs UTS namespace. */
1316
1317 if (uname(&uts) == \-1)
1318 errExit("uname");
1319 printf("uts.nodename in parent: %s\\n", uts.nodename);
1320
1321 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1322 errExit("waitpid");
1323 printf("child has terminated\\n");
1324
1325 exit(EXIT_SUCCESS);
1326 }
1327 .fi
1328 .SH SEE ALSO
1329 .BR fork (2),
1330 .BR futex (2),
1331 .BR getpid (2),
1332 .BR gettid (2),
1333 .BR kcmp (2),
1334 .BR set_thread_area (2),
1335 .BR set_tid_address (2),
1336 .BR setns (2),
1337 .BR tkill (2),
1338 .BR unshare (2),
1339 .BR wait (2),
1340 .BR capabilities (7),
1341 .BR namespaces (7),
1342 .BR pthreads (7)