]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2, unshare.2: Remove mention of _BSD_SOURCE and _SVID_SOURCE
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2015-07-23 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48
49 .B #define _GNU_SOURCE
50 .B #include <sched.h>
51
52 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
53 .BI " int " flags ", void *" "arg" ", ... "
54 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
55 ", pid_t *" ctid " */ );"
56
57 /* Prototype for the raw system call */
58
59 .BI "long clone(unsigned long " flags ", void *" child_stack ,
60 .BI " void *" ptid ", void *" ctid ,
61 .BI " struct pt_regs *" regs );
62 .fi
63 .SH DESCRIPTION
64 .BR clone ()
65 creates a new process, in a manner similar to
66 .BR fork (2).
67
68 This page describes both the glibc
69 .BR clone ()
70 wrapper function and the underlying system call on which it is based.
71 The main text describes the wrapper function;
72 the differences for the raw system call
73 are described toward the end of this page.
74
75 Unlike
76 .BR fork (2),
77 .BR clone ()
78 allows the child process to share parts of its execution context with
79 the calling process, such as the memory space, the table of file
80 descriptors, and the table of signal handlers.
81 (Note that on this manual
82 page, "calling process" normally corresponds to "parent process".
83 But see the description of
84 .B CLONE_PARENT
85 below.)
86
87 One use of
88 .BR clone ()
89 is to implement threads: multiple threads of control in a program that
90 run concurrently in a shared memory space.
91
92 When the child process is created with
93 .BR clone (),
94 it executes the function
95 .IR fn ( arg ).
96 (This differs from
97 .BR fork (2),
98 where execution continues in the child from the point
99 of the
100 .BR fork (2)
101 call.)
102 The
103 .I fn
104 argument is a pointer to a function that is called by the child
105 process at the beginning of its execution.
106 The
107 .I arg
108 argument is passed to the
109 .I fn
110 function.
111
112 When the
113 .IR fn ( arg )
114 function application returns, the child process terminates.
115 The integer returned by
116 .I fn
117 is the exit code for the child process.
118 The child process may also terminate explicitly by calling
119 .BR exit (2)
120 or after receiving a fatal signal.
121
122 The
123 .I child_stack
124 argument specifies the location of the stack used by the child process.
125 Since the child and calling process may share memory,
126 it is not possible for the child process to execute in the
127 same stack as the calling process.
128 The calling process must therefore
129 set up memory space for the child stack and pass a pointer to this
130 space to
131 .BR clone ().
132 Stacks grow downward on all processors that run Linux
133 (except the HP PA processors), so
134 .I child_stack
135 usually points to the topmost address of the memory space set up for
136 the child stack.
137
138 The low byte of
139 .I flags
140 contains the number of the
141 .I "termination signal"
142 sent to the parent when the child dies.
143 If this signal is specified as anything other than
144 .BR SIGCHLD ,
145 then the parent process must specify the
146 .B __WALL
147 or
148 .B __WCLONE
149 options when waiting for the child with
150 .BR wait (2).
151 If no signal is specified, then the parent process is not signaled
152 when the child terminates.
153
154 .I flags
155 may also be bitwise-or'ed with zero or more of the following constants,
156 in order to specify what is shared between the calling process
157 and the child process:
158 .TP
159 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
160 Erase the child thread ID at the location
161 .I ctid
162 in child memory when the child exits, and do a wakeup on the futex
163 at that address.
164 The address involved may be changed by the
165 .BR set_tid_address (2)
166 system call.
167 This is used by threading libraries.
168 .TP
169 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
170 Store the child thread ID at the location
171 .I ctid
172 in the child's memory.
173 .TP
174 .BR CLONE_FILES " (since Linux 2.0)"
175 If
176 .B CLONE_FILES
177 is set, the calling process and the child process share the same file
178 descriptor table.
179 Any file descriptor created by the calling process or by the child
180 process is also valid in the other process.
181 Similarly, if one of the processes closes a file descriptor,
182 or changes its associated flags (using the
183 .BR fcntl (2)
184 .B F_SETFD
185 operation), the other process is also affected.
186 If a process sharing a file descriptor table calls
187 .BR execve (2),
188 its file descriptor table is duplicated (unshared).
189
190 If
191 .B CLONE_FILES
192 is not set, the child process inherits a copy of all file descriptors
193 opened in the calling process at the time of
194 .BR clone ().
195 (The duplicated file descriptors in the child refer to the
196 same open file descriptions (see
197 .BR open (2))
198 as the corresponding file descriptors in the calling process.)
199 Subsequent operations that open or close file descriptors,
200 or change file descriptor flags,
201 performed by either the calling
202 process or the child process do not affect the other process.
203 .TP
204 .BR CLONE_FS " (since Linux 2.0)"
205 If
206 .B CLONE_FS
207 is set, the caller and the child process share the same filesystem
208 information.
209 This includes the root of the filesystem, the current
210 working directory, and the umask.
211 Any call to
212 .BR chroot (2),
213 .BR chdir (2),
214 or
215 .BR umask (2)
216 performed by the calling process or the child process also affects the
217 other process.
218
219 If
220 .B CLONE_FS
221 is not set, the child process works on a copy of the filesystem
222 information of the calling process at the time of the
223 .BR clone ()
224 call.
225 Calls to
226 .BR chroot (2),
227 .BR chdir (2),
228 .BR umask (2)
229 performed later by one of the processes do not affect the other process.
230 .TP
231 .BR CLONE_IO " (since Linux 2.6.25)"
232 If
233 .B CLONE_IO
234 is set, then the new process shares an I/O context with
235 the calling process.
236 If this flag is not set, then (as with
237 .BR fork (2))
238 the new process has its own I/O context.
239
240 .\" The following based on text from Jens Axboe
241 The I/O context is the I/O scope of the disk scheduler (i.e,
242 what the I/O scheduler uses to model scheduling of a process's I/O).
243 If processes share the same I/O context,
244 they are treated as one by the I/O scheduler.
245 As a consequence, they get to share disk time.
246 For some I/O schedulers,
247 .\" the anticipatory and CFQ scheduler
248 if two processes share an I/O context,
249 they will be allowed to interleave their disk access.
250 If several threads are doing I/O on behalf of the same process
251 .RB ( aio_read (3),
252 for instance), they should employ
253 .BR CLONE_IO
254 to get better I/O performance.
255 .\" with CFQ and AS.
256
257 If the kernel is not configured with the
258 .B CONFIG_BLOCK
259 option, this flag is a no-op.
260 .TP
261 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
262 If
263 .B CLONE_NEWIPC
264 is set, then create the process in a new IPC namespace.
265 If this flag is not set, then (as with
266 .BR fork (2)),
267 the process is created in the same IPC namespace as
268 the calling process.
269 This flag is intended for the implementation of containers.
270
271 An IPC namespace provides an isolated view of System\ V IPC objects (see
272 .BR svipc (7))
273 and (since Linux 2.6.30)
274 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
275 .\" https://lwn.net/Articles/312232/
276 POSIX message queues
277 (see
278 .BR mq_overview (7)).
279 The common characteristic of these IPC mechanisms is that IPC
280 objects are identified by mechanisms other than filesystem
281 pathnames.
282
283 Objects created in an IPC namespace are visible to all other processes
284 that are members of that namespace,
285 but are not visible to processes in other IPC namespaces.
286
287 When an IPC namespace is destroyed
288 (i.e., when the last process that is a member of the namespace terminates),
289 all IPC objects in the namespace are automatically destroyed.
290
291 Only a privileged process
292 .RB ( CAP_SYS_ADMIN )
293 can employ
294 .BR CLONE_NEWIPC .
295 This flag can't be specified in conjunction with
296 .BR CLONE_SYSVSEM .
297
298 For further information on IPC namespaces, see
299 .BR namespaces (7).
300 .TP
301 .BR CLONE_NEWNET " (since Linux 2.6.24)"
302 (The implementation of this flag was completed only
303 by about kernel version 2.6.29.)
304
305 If
306 .B CLONE_NEWNET
307 is set, then create the process in a new network namespace.
308 If this flag is not set, then (as with
309 .BR fork (2))
310 the process is created in the same network namespace as
311 the calling process.
312 This flag is intended for the implementation of containers.
313
314 A network namespace provides an isolated view of the networking stack
315 (network device interfaces, IPv4 and IPv6 protocol stacks,
316 IP routing tables, firewall rules, the
317 .I /proc/net
318 and
319 .I /sys/class/net
320 directory trees, sockets, etc.).
321 A physical network device can live in exactly one
322 network namespace.
323 A virtual network device ("veth") pair provides a pipe-like abstraction
324 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
325 that can be used to create tunnels between network namespaces,
326 and can be used to create a bridge to a physical network device
327 in another namespace.
328
329 When a network namespace is freed
330 (i.e., when the last process in the namespace terminates),
331 its physical network devices are moved back to the
332 initial network namespace (not to the parent of the process).
333 For further information on network namespaces, see
334 .BR namespaces (7).
335
336 Only a privileged process
337 .RB ( CAP_SYS_ADMIN )
338 can employ
339 .BR CLONE_NEWNET .
340 .TP
341 .BR CLONE_NEWNS " (since Linux 2.4.19)"
342 If
343 .B CLONE_NEWNS
344 is set, the cloned child is started in a new mount namespace,
345 initialized with a copy of the namespace of the parent.
346 If
347 .B CLONE_NEWNS
348 is not set, the child lives in the same mount
349 namespace as the parent.
350
351 For further information on mount namespaces, see
352 .BR namespaces (7).
353
354 Only a privileged process
355 .RB ( CAP_SYS_ADMIN )
356 can employ
357 .BR CLONE_NEWNS .
358 It is not permitted to specify both
359 .B CLONE_NEWNS
360 and
361 .B CLONE_FS
362 .\" See https://lwn.net/Articles/543273/
363 in the same
364 .BR clone ()
365 call.
366 .TP
367 .BR CLONE_NEWPID " (since Linux 2.6.24)"
368 .\" This explanation draws a lot of details from
369 .\" http://lwn.net/Articles/259217/
370 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
371 .\" and Kir Kolyshkin <kir@openvz.org>
372 .\"
373 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
374 .\" Author: Pavel Emelyanov <xemul@openvz.org>
375 If
376 .B CLONE_NEWPID
377 is set, then create the process in a new PID namespace.
378 If this flag is not set, then (as with
379 .BR fork (2))
380 the process is created in the same PID namespace as
381 the calling process.
382 This flag is intended for the implementation of containers.
383
384 For further information on PID namespaces, see
385 .BR namespaces (7)
386 and
387 .BR pid_namespaces (7)
388
389 Only a privileged process
390 .RB ( CAP_SYS_ADMIN )
391 can employ
392 .BR CLONE_NEWPID .
393 This flag can't be specified in conjunction with
394 .BR CLONE_THREAD
395 or
396 .BR CLONE_PARENT .
397 .TP
398 .BR CLONE_NEWUSER
399 (This flag first became meaningful for
400 .BR clone ()
401 in Linux 2.6.23,
402 the current
403 .BR clone ()
404 semantics were merged in Linux 3.5,
405 and the final pieces to make the user namespaces completely usable were
406 merged in Linux 3.8.)
407
408 If
409 .B CLONE_NEWUSER
410 is set, then create the process in a new user namespace.
411 If this flag is not set, then (as with
412 .BR fork (2))
413 the process is created in the same user namespace as the calling process.
414
415 For further information on user namespaces, see
416 .BR namespaces (7)
417 and
418 .BR user_namespaces (7)
419
420 Before Linux 3.8, use of
421 .BR CLONE_NEWUSER
422 required that the caller have three capabilities:
423 .BR CAP_SYS_ADMIN ,
424 .BR CAP_SETUID ,
425 and
426 .BR CAP_SETGID .
427 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
428 Starting with Linux 3.8,
429 no privileges are needed to create a user namespace.
430
431 This flag can't be specified in conjunction with
432 .BR CLONE_THREAD
433 or
434 .BR CLONE_PARENT .
435 For security reasons,
436 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
437 .\" https://lwn.net/Articles/543273/
438 .\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
439 .\" were, for practical purposes, unusable in earlier 3.8.x because of the
440 .\" various filesystems that didn't support userns.
441 .BR CLONE_NEWUSER
442 cannot be specified in conjunction with
443 .BR CLONE_FS .
444
445 For further information on user namespaces, see
446 .BR user_namespaces (7).
447 .TP
448 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
449 If
450 .B CLONE_NEWUTS
451 is set, then create the process in a new UTS namespace,
452 whose identifiers are initialized by duplicating the identifiers
453 from the UTS namespace of the calling process.
454 If this flag is not set, then (as with
455 .BR fork (2))
456 the process is created in the same UTS namespace as
457 the calling process.
458 This flag is intended for the implementation of containers.
459
460 A UTS namespace is the set of identifiers returned by
461 .BR uname (2);
462 among these, the domain name and the hostname can be modified by
463 .BR setdomainname (2)
464 and
465 .BR sethostname (2),
466 respectively.
467 Changes made to the identifiers in a UTS namespace
468 are visible to all other processes in the same namespace,
469 but are not visible to processes in other UTS namespaces.
470
471 Only a privileged process
472 .RB ( CAP_SYS_ADMIN )
473 can employ
474 .BR CLONE_NEWUTS .
475
476 For further information on UTS namespaces, see
477 .BR namespaces (7).
478 .TP
479 .BR CLONE_PARENT " (since Linux 2.3.12)"
480 If
481 .B CLONE_PARENT
482 is set, then the parent of the new child (as returned by
483 .BR getppid (2))
484 will be the same as that of the calling process.
485
486 If
487 .B CLONE_PARENT
488 is not set, then (as with
489 .BR fork (2))
490 the child's parent is the calling process.
491
492 Note that it is the parent process, as returned by
493 .BR getppid (2),
494 which is signaled when the child terminates, so that
495 if
496 .B CLONE_PARENT
497 is set, then the parent of the calling process, rather than the
498 calling process itself, will be signaled.
499 .TP
500 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
501 Store the child thread ID at the location
502 .I ptid
503 in the parent's memory.
504 (In Linux 2.5.32-2.5.48 there was a flag
505 .B CLONE_SETTID
506 that did this.)
507 .TP
508 .BR CLONE_PID " (obsolete)"
509 If
510 .B CLONE_PID
511 is set, the child process is created with the same process ID as
512 the calling process.
513 This is good for hacking the system, but otherwise
514 of not much use.
515 Since 2.3.21 this flag can be
516 specified only by the system boot process (PID 0).
517 It disappeared in Linux 2.5.16.
518 Since then, the kernel silently ignores it without error.
519 .TP
520 .BR CLONE_PTRACE " (since Linux 2.2)"
521 If
522 .B CLONE_PTRACE
523 is specified, and the calling process is being traced,
524 then trace the child also (see
525 .BR ptrace (2)).
526 .TP
527 .BR CLONE_SETTLS " (since Linux 2.5.32)"
528 The
529 .I newtls
530 argument is the new TLS (Thread Local Storage) descriptor.
531 (See
532 .BR set_thread_area (2).)
533 .TP
534 .BR CLONE_SIGHAND " (since Linux 2.0)"
535 If
536 .B CLONE_SIGHAND
537 is set, the calling process and the child process share the same table of
538 signal handlers.
539 If the calling process or child process calls
540 .BR sigaction (2)
541 to change the behavior associated with a signal, the behavior is
542 changed in the other process as well.
543 However, the calling process and child
544 processes still have distinct signal masks and sets of pending
545 signals.
546 So, one of them may block or unblock some signals using
547 .BR sigprocmask (2)
548 without affecting the other process.
549
550 If
551 .B CLONE_SIGHAND
552 is not set, the child process inherits a copy of the signal handlers
553 of the calling process at the time
554 .BR clone ()
555 is called.
556 Calls to
557 .BR sigaction (2)
558 performed later by one of the processes have no effect on the other
559 process.
560
561 Since Linux 2.6.0-test6,
562 .I flags
563 must also include
564 .B CLONE_VM
565 if
566 .B CLONE_SIGHAND
567 is specified
568 .TP
569 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
570 If
571 .B CLONE_STOPPED
572 is set, then the child is initially stopped (as though it was sent a
573 .B SIGSTOP
574 signal), and must be resumed by sending it a
575 .B SIGCONT
576 signal.
577
578 This flag was
579 .I deprecated
580 from Linux 2.6.25 onward,
581 and was
582 .I removed
583 altogether in Linux 2.6.38.
584 Since then, the kernel silently ignores it without error.
585 .\" glibc 2.8 removed this defn from bits/sched.h
586 .TP
587 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
588 If
589 .B CLONE_SYSVSEM
590 is set, then the child and the calling process share
591 a single list of System V semaphore adjustment
592 .RI ( semadj )
593 values (see
594 .BR semop (2)).
595 In this case, the shared list accumulates
596 .I semadj
597 values across all processes sharing the list,
598 and semaphore adjustments are performed only when the last process
599 that is sharing the list terminates (or ceases sharing the list using
600 .BR unshare (2)).
601 If this flag is not set, then the child has a separate
602 .I semadj
603 list that is initially empty.
604 .TP
605 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
606 If
607 .B CLONE_THREAD
608 is set, the child is placed in the same thread group as the calling process.
609 To make the remainder of the discussion of
610 .B CLONE_THREAD
611 more readable, the term "thread" is used to refer to the
612 processes within a thread group.
613
614 Thread groups were a feature added in Linux 2.4 to support the
615 POSIX threads notion of a set of threads that share a single PID.
616 Internally, this shared PID is the so-called
617 thread group identifier (TGID) for the thread group.
618 Since Linux 2.4, calls to
619 .BR getpid (2)
620 return the TGID of the caller.
621
622 The threads within a group can be distinguished by their (system-wide)
623 unique thread IDs (TID).
624 A new thread's TID is available as the function result
625 returned to the caller of
626 .BR clone (),
627 and a thread can obtain
628 its own TID using
629 .BR gettid (2).
630
631 When a call is made to
632 .BR clone ()
633 without specifying
634 .BR CLONE_THREAD ,
635 then the resulting thread is placed in a new thread group
636 whose TGID is the same as the thread's TID.
637 This thread is the
638 .I leader
639 of the new thread group.
640
641 A new thread created with
642 .B CLONE_THREAD
643 has the same parent process as the caller of
644 .BR clone ()
645 (i.e., like
646 .BR CLONE_PARENT ),
647 so that calls to
648 .BR getppid (2)
649 return the same value for all of the threads in a thread group.
650 When a
651 .B CLONE_THREAD
652 thread terminates, the thread that created it using
653 .BR clone ()
654 is not sent a
655 .B SIGCHLD
656 (or other termination) signal;
657 nor can the status of such a thread be obtained
658 using
659 .BR wait (2).
660 (The thread is said to be
661 .IR detached .)
662
663 After all of the threads in a thread group terminate
664 the parent process of the thread group is sent a
665 .B SIGCHLD
666 (or other termination) signal.
667
668 If any of the threads in a thread group performs an
669 .BR execve (2),
670 then all threads other than the thread group leader are terminated,
671 and the new program is executed in the thread group leader.
672
673 If one of the threads in a thread group creates a child using
674 .BR fork (2),
675 then any thread in the group can
676 .BR wait (2)
677 for that child.
678
679 Since Linux 2.5.35,
680 .I flags
681 must also include
682 .B CLONE_SIGHAND
683 if
684 .B CLONE_THREAD
685 is specified
686 (and note that, since Linux 2.6.0-test6,
687 .BR CLONE_SIGHAND
688 also requires
689 .BR CLONE_VM
690 to be included).
691
692 Signals may be sent to a thread group as a whole (i.e., a TGID) using
693 .BR kill (2),
694 or to a specific thread (i.e., TID) using
695 .BR tgkill (2).
696
697 Signal dispositions and actions are process-wide:
698 if an unhandled signal is delivered to a thread, then
699 it will affect (terminate, stop, continue, be ignored in)
700 all members of the thread group.
701
702 Each thread has its own signal mask, as set by
703 .BR sigprocmask (2),
704 but signals can be pending either: for the whole process
705 (i.e., deliverable to any member of the thread group),
706 when sent with
707 .BR kill (2);
708 or for an individual thread, when sent with
709 .BR tgkill (2).
710 A call to
711 .BR sigpending (2)
712 returns a signal set that is the union of the signals pending for the
713 whole process and the signals that are pending for the calling thread.
714
715 If
716 .BR kill (2)
717 is used to send a signal to a thread group,
718 and the thread group has installed a handler for the signal, then
719 the handler will be invoked in exactly one, arbitrarily selected
720 member of the thread group that has not blocked the signal.
721 If multiple threads in a group are waiting to accept the same signal using
722 .BR sigwaitinfo (2),
723 the kernel will arbitrarily select one of these threads
724 to receive a signal sent using
725 .BR kill (2).
726 .TP
727 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
728 If
729 .B CLONE_UNTRACED
730 is specified, then a tracing process cannot force
731 .B CLONE_PTRACE
732 on this child process.
733 .TP
734 .BR CLONE_VFORK " (since Linux 2.2)"
735 If
736 .B CLONE_VFORK
737 is set, the execution of the calling process is suspended
738 until the child releases its virtual memory
739 resources via a call to
740 .BR execve (2)
741 or
742 .BR _exit (2)
743 (as with
744 .BR vfork (2)).
745
746 If
747 .B CLONE_VFORK
748 is not set, then both the calling process and the child are schedulable
749 after the call, and an application should not rely on execution occurring
750 in any particular order.
751 .TP
752 .BR CLONE_VM " (since Linux 2.0)"
753 If
754 .B CLONE_VM
755 is set, the calling process and the child process run in the same memory
756 space.
757 In particular, memory writes performed by the calling process
758 or by the child process are also visible in the other process.
759 Moreover, any memory mapping or unmapping performed with
760 .BR mmap (2)
761 or
762 .BR munmap (2)
763 by the child or calling process also affects the other process.
764
765 If
766 .B CLONE_VM
767 is not set, the child process runs in a separate copy of the memory
768 space of the calling process at the time of
769 .BR clone ().
770 Memory writes or file mappings/unmappings performed by one of the
771 processes do not affect the other, as with
772 .BR fork (2).
773 .SS C library/kernel differences
774 The raw
775 .BR clone ()
776 system call corresponds more closely to
777 .BR fork (2)
778 in that execution in the child continues from the point of the
779 call.
780 As such, the
781 .I fn
782 and
783 .I arg
784 arguments of the
785 .BR clone ()
786 wrapper function are omitted.
787 Furthermore, the argument order changes.
788 The raw system call interface on x86 and many other architectures is roughly:
789 .in +4
790 .nf
791
792 .BI "long clone(unsigned long " flags ", void *" child_stack ,
793 .BI " void *" ptid ", void *" ctid ,
794 .BI " struct pt_regs *" regs );
795
796 .fi
797 .in
798 Another difference for the raw system call is that the
799 .I child_stack
800 argument may be zero, in which case copy-on-write semantics ensure that the
801 child gets separate copies of stack pages when either process modifies
802 the stack.
803 In this case, for correct operation, the
804 .B CLONE_VM
805 option should not be specified.
806
807 For some architectures, the order of the arguments for the system call
808 differs from that shown above.
809 On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
810 and MIPS architectures,
811 the order of the fourth and fifth arguments is reversed.
812 On the cris and s390 architectures,
813 the order of the first and second arguments is reversed.
814 .SS blackfin, m68k, and sparc
815 The argument-passing conventions on
816 blackfin, m68k, and sparc are different from the descriptions above.
817 For details, see the kernel (and glibc) source.
818 .SS ia64
819 On ia64, a different interface is used:
820 .nf
821
822 .BI "int __clone2(int (*" "fn" ")(void *), "
823 .BI " void *" child_stack_base ", size_t " stack_size ,
824 .BI " int " flags ", void *" "arg" ", ... "
825 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
826 ", pid_t *" ctid " */ );"
827 .fi
828 .PP
829 The prototype shown above is for the glibc wrapper function;
830 the raw system call interface has no
831 .I fn
832 or
833 .I arg
834 argument, and changes the order of the arguments so that
835 .I flags
836 is the first argument, and
837 .I tls
838 is the last argument.
839 .PP
840 .BR __clone2 ()
841 operates in the same way as
842 .BR clone (),
843 except that
844 .I child_stack_base
845 points to the lowest address of the child's stack area,
846 and
847 .I stack_size
848 specifies the size of the stack pointed to by
849 .IR child_stack_base .
850 .SS Linux 2.4 and earlier
851 In Linux 2.4 and earlier,
852 .BR clone ()
853 does not take arguments
854 .IR ptid ,
855 .IR tls ,
856 and
857 .IR ctid .
858 .SH RETURN VALUE
859 .\" gettid(2) returns current->pid;
860 .\" getpid(2) returns current->tgid;
861 On success, the thread ID of the child process is returned
862 in the caller's thread of execution.
863 On failure, \-1 is returned
864 in the caller's context, no child process will be created, and
865 .I errno
866 will be set appropriately.
867 .SH ERRORS
868 .TP
869 .B EAGAIN
870 Too many processes are already running; see
871 .BR fork (2).
872 .TP
873 .B EINVAL
874 .B CLONE_SIGHAND
875 was specified, but
876 .B CLONE_VM
877 was not.
878 (Since Linux 2.6.0-test6.)
879 .TP
880 .B EINVAL
881 .B CLONE_THREAD
882 was specified, but
883 .B CLONE_SIGHAND
884 was not.
885 (Since Linux 2.5.35.)
886 .\" .TP
887 .\" .B EINVAL
888 .\" Precisely one of
889 .\" .B CLONE_DETACHED
890 .\" and
891 .\" .B CLONE_THREAD
892 .\" was specified.
893 .\" (Since Linux 2.6.0-test6.)
894 .TP
895 .B EINVAL
896 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
897 Both
898 .B CLONE_FS
899 and
900 .B CLONE_NEWNS
901 were specified in
902 .IR flags .
903 .TP
904 .BR EINVAL " (since Linux 3.9)"
905 Both
906 .B CLONE_NEWUSER
907 and
908 .B CLONE_FS
909 were specified in
910 .IR flags .
911 .TP
912 .B EINVAL
913 Both
914 .B CLONE_NEWIPC
915 and
916 .B CLONE_SYSVSEM
917 were specified in
918 .IR flags .
919 .TP
920 .B EINVAL
921 One (or both) of
922 .BR CLONE_NEWPID
923 or
924 .BR CLONE_NEWUSER
925 and one (or both) of
926 .BR CLONE_THREAD
927 or
928 .BR CLONE_PARENT
929 were specified in
930 .IR flags .
931 .TP
932 .B EINVAL
933 Returned by
934 .BR clone ()
935 when a zero value is specified for
936 .IR child_stack .
937 .TP
938 .B EINVAL
939 .BR CLONE_NEWIPC
940 was specified in
941 .IR flags ,
942 but the kernel was not configured with the
943 .B CONFIG_SYSVIPC
944 and
945 .BR CONFIG_IPC_NS
946 options.
947 .TP
948 .B EINVAL
949 .BR CLONE_NEWNET
950 was specified in
951 .IR flags ,
952 but the kernel was not configured with the
953 .B CONFIG_NET_NS
954 option.
955 .TP
956 .B EINVAL
957 .BR CLONE_NEWPID
958 was specified in
959 .IR flags ,
960 but the kernel was not configured with the
961 .B CONFIG_PID_NS
962 option.
963 .TP
964 .B EINVAL
965 .BR CLONE_NEWUTS
966 was specified in
967 .IR flags ,
968 but the kernel was not configured with the
969 .B CONFIG_UTS
970 option.
971 .TP
972 .B ENOMEM
973 Cannot allocate sufficient memory to allocate a task structure for the
974 child, or to copy those parts of the caller's context that need to be
975 copied.
976 .TP
977 .B EPERM
978 .BR CLONE_NEWIPC ,
979 .BR CLONE_NEWNET ,
980 .BR CLONE_NEWNS ,
981 .BR CLONE_NEWPID ,
982 or
983 .BR CLONE_NEWUTS
984 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
985 .TP
986 .B EPERM
987 .B CLONE_PID
988 was specified by a process other than process 0.
989 .TP
990 .B EPERM
991 .BR CLONE_NEWUSER
992 was specified in
993 .IR flags ,
994 but either the effective user ID or the effective group ID of the caller
995 does not have a mapping in the parent namespace (see
996 .BR user_namespaces (7)).
997 .TP
998 .BR EPERM " (since Linux 3.9)"
999 .\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
1000 .B CLONE_NEWUSER
1001 was specified in
1002 .I flags
1003 and the caller is in a chroot environment
1004 .\" FIXME What is the rationale for this restriction?
1005 (i.e., the caller's root directory does not match the root directory
1006 of the mount namespace in which it resides).
1007 .TP
1008 .BR EUSERS " (since Linux 3.11)"
1009 .B CLONE_NEWUSER
1010 was specified in
1011 .IR flags ,
1012 and the call would cause the limit on the number of
1013 nested user namespaces to be exceeded.
1014 See
1015 .BR user_namespaces (7).
1016 .SH VERSIONS
1017 There is no entry for
1018 .BR clone ()
1019 in libc5.
1020 glibc2 provides
1021 .BR clone ()
1022 as described in this manual page.
1023 .SH CONFORMING TO
1024 .BR clone ()
1025 is Linux-specific and should not be used in programs
1026 intended to be portable.
1027 .SH NOTES
1028 In the kernel 2.4.x series,
1029 .B CLONE_THREAD
1030 generally does not make the parent of the new thread the same
1031 as the parent of the calling process.
1032 However, for kernel versions 2.4.7 to 2.4.18 the
1033 .B CLONE_THREAD
1034 flag implied the
1035 .B CLONE_PARENT
1036 flag (as in kernel 2.6).
1037
1038 For a while there was
1039 .B CLONE_DETACHED
1040 (introduced in 2.5.32):
1041 parent wants no child-exit signal.
1042 In 2.6.2 the need to give this
1043 together with
1044 .B CLONE_THREAD
1045 disappeared.
1046 This flag is still defined, but has no effect.
1047
1048 On i386,
1049 .BR clone ()
1050 should not be called through vsyscall, but directly through
1051 .IR "int $0x80" .
1052 .SH BUGS
1053 Versions of the GNU C library that include the NPTL threading library
1054 contain a wrapper function for
1055 .BR getpid (2)
1056 that performs caching of PIDs.
1057 This caching relies on support in the glibc wrapper for
1058 .BR clone (),
1059 but as currently implemented,
1060 the cache may not be up to date in some circumstances.
1061 In particular,
1062 if a signal is delivered to the child immediately after the
1063 .BR clone ()
1064 call, then a call to
1065 .BR getpid (2)
1066 in a handler for the signal may return the PID
1067 of the calling process ("the parent"),
1068 if the clone wrapper has not yet had a chance to update the PID
1069 cache in the child.
1070 (This discussion ignores the case where the child was created using
1071 .BR CLONE_THREAD ,
1072 when
1073 .BR getpid (2)
1074 .I should
1075 return the same value in the child and in the process that called
1076 .BR clone (),
1077 since the caller and the child are in the same thread group.
1078 The stale-cache problem also does not occur if the
1079 .I flags
1080 argument includes
1081 .BR CLONE_VM .)
1082 To get the truth, it may be necessary to use code such as the following:
1083 .nf
1084
1085 #include <syscall.h>
1086
1087 pid_t mypid;
1088
1089 mypid = syscall(SYS_getpid);
1090 .fi
1091 .\" See also the following bug reports
1092 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1093 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1094 .SH EXAMPLE
1095 The following program demonstrates the use of
1096 .BR clone ()
1097 to create a child process that executes in a separate UTS namespace.
1098 The child changes the hostname in its UTS namespace.
1099 Both parent and child then display the system hostname,
1100 making it possible to see that the hostname
1101 differs in the UTS namespaces of the parent and child.
1102 For an example of the use of this program, see
1103 .BR setns (2).
1104 .SS Program source
1105 .nf
1106 #define _GNU_SOURCE
1107 #include <sys/wait.h>
1108 #include <sys/utsname.h>
1109 #include <sched.h>
1110 #include <string.h>
1111 #include <stdio.h>
1112 #include <stdlib.h>
1113 #include <unistd.h>
1114
1115 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1116 } while (0)
1117
1118 static int /* Start function for cloned child */
1119 childFunc(void *arg)
1120 {
1121 struct utsname uts;
1122
1123 /* Change hostname in UTS namespace of child */
1124
1125 if (sethostname(arg, strlen(arg)) == \-1)
1126 errExit("sethostname");
1127
1128 /* Retrieve and display hostname */
1129
1130 if (uname(&uts) == \-1)
1131 errExit("uname");
1132 printf("uts.nodename in child: %s\\n", uts.nodename);
1133
1134 /* Keep the namespace open for a while, by sleeping.
1135 This allows some experimentation\-\-for example, another
1136 process might join the namespace. */
1137
1138 sleep(200);
1139
1140 return 0; /* Child terminates now */
1141 }
1142
1143 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1144
1145 int
1146 main(int argc, char *argv[])
1147 {
1148 char *stack; /* Start of stack buffer */
1149 char *stackTop; /* End of stack buffer */
1150 pid_t pid;
1151 struct utsname uts;
1152
1153 if (argc < 2) {
1154 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1155 exit(EXIT_SUCCESS);
1156 }
1157
1158 /* Allocate stack for child */
1159
1160 stack = malloc(STACK_SIZE);
1161 if (stack == NULL)
1162 errExit("malloc");
1163 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1164
1165 /* Create child that has its own UTS namespace;
1166 child commences execution in childFunc() */
1167
1168 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1169 if (pid == \-1)
1170 errExit("clone");
1171 printf("clone() returned %ld\\n", (long) pid);
1172
1173 /* Parent falls through to here */
1174
1175 sleep(1); /* Give child time to change its hostname */
1176
1177 /* Display hostname in parent\(aqs UTS namespace. This will be
1178 different from hostname in child\(aqs UTS namespace. */
1179
1180 if (uname(&uts) == \-1)
1181 errExit("uname");
1182 printf("uts.nodename in parent: %s\\n", uts.nodename);
1183
1184 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1185 errExit("waitpid");
1186 printf("child has terminated\\n");
1187
1188 exit(EXIT_SUCCESS);
1189 }
1190 .fi
1191 .SH SEE ALSO
1192 .BR fork (2),
1193 .BR futex (2),
1194 .BR getpid (2),
1195 .BR gettid (2),
1196 .BR kcmp (2),
1197 .BR set_thread_area (2),
1198 .BR set_tid_address (2),
1199 .BR setns (2),
1200 .BR tkill (2),
1201 .BR unshare (2),
1202 .BR wait (2),
1203 .BR capabilities (7),
1204 .BR namespaces (7),
1205 .BR pthreads (7)