]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: Add more detail on the meaning of CLONE_SYVSEM
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2014-08-19 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48
49 .B #include <sched.h>
50
51 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
52 .BI " int " flags ", void *" "arg" ", ... "
53 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
54 ", pid_t *" ctid " */ );"
55
56 /* Prototype for the raw system call */
57
58 .BI "long clone(unsigned long " flags ", void *" child_stack ,
59 .BI " void *" ptid ", void *" ctid ,
60 .BI " struct pt_regs *" regs );
61 .fi
62 .sp
63 .in -4n
64 Feature Test Macro Requirements for glibc wrapper function (see
65 .BR feature_test_macros (7)):
66 .in
67 .sp
68 .BR clone ():
69 .ad l
70 .RS 4
71 .PD 0
72 .TP 4
73 Since glibc 2.14:
74 _GNU_SOURCE
75 .TP 4
76 .\" See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
77 Before glibc 2.14:
78 _BSD_SOURCE || _SVID_SOURCE
79 /* _GNU_SOURCE also suffices */
80 .PD
81 .RE
82 .ad b
83 .SH DESCRIPTION
84 .BR clone ()
85 creates a new process, in a manner similar to
86 .BR fork (2).
87
88 This page describes both the glibc
89 .BR clone ()
90 wrapper function and the underlying system call on which it is based.
91 The main text describes the wrapper function;
92 the differences for the raw system call
93 are described toward the end of this page.
94
95 Unlike
96 .BR fork (2),
97 .BR clone ()
98 allows the child process to share parts of its execution context with
99 the calling process, such as the memory space, the table of file
100 descriptors, and the table of signal handlers.
101 (Note that on this manual
102 page, "calling process" normally corresponds to "parent process".
103 But see the description of
104 .B CLONE_PARENT
105 below.)
106
107 The main use of
108 .BR clone ()
109 is to implement threads: multiple threads of control in a program that
110 run concurrently in a shared memory space.
111
112 When the child process is created with
113 .BR clone (),
114 it executes the function
115 .IR fn ( arg ).
116 (This differs from
117 .BR fork (2),
118 where execution continues in the child from the point
119 of the
120 .BR fork (2)
121 call.)
122 The
123 .I fn
124 argument is a pointer to a function that is called by the child
125 process at the beginning of its execution.
126 The
127 .I arg
128 argument is passed to the
129 .I fn
130 function.
131
132 When the
133 .IR fn ( arg )
134 function application returns, the child process terminates.
135 The integer returned by
136 .I fn
137 is the exit code for the child process.
138 The child process may also terminate explicitly by calling
139 .BR exit (2)
140 or after receiving a fatal signal.
141
142 The
143 .I child_stack
144 argument specifies the location of the stack used by the child process.
145 Since the child and calling process may share memory,
146 it is not possible for the child process to execute in the
147 same stack as the calling process.
148 The calling process must therefore
149 set up memory space for the child stack and pass a pointer to this
150 space to
151 .BR clone ().
152 Stacks grow downward on all processors that run Linux
153 (except the HP PA processors), so
154 .I child_stack
155 usually points to the topmost address of the memory space set up for
156 the child stack.
157
158 The low byte of
159 .I flags
160 contains the number of the
161 .I "termination signal"
162 sent to the parent when the child dies.
163 If this signal is specified as anything other than
164 .BR SIGCHLD ,
165 then the parent process must specify the
166 .B __WALL
167 or
168 .B __WCLONE
169 options when waiting for the child with
170 .BR wait (2).
171 If no signal is specified, then the parent process is not signaled
172 when the child terminates.
173
174 .I flags
175 may also be bitwise-or'ed with zero or more of the following constants,
176 in order to specify what is shared between the calling process
177 and the child process:
178 .TP
179 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
180 Erase child thread ID at location
181 .I ctid
182 in child memory when the child exits, and do a wakeup on the futex
183 at that address.
184 The address involved may be changed by the
185 .BR set_tid_address (2)
186 system call.
187 This is used by threading libraries.
188 .TP
189 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
190 Store child thread ID at location
191 .I ctid
192 in child memory.
193 .TP
194 .BR CLONE_FILES " (since Linux 2.0)"
195 If
196 .B CLONE_FILES
197 is set, the calling process and the child process share the same file
198 descriptor table.
199 Any file descriptor created by the calling process or by the child
200 process is also valid in the other process.
201 Similarly, if one of the processes closes a file descriptor,
202 or changes its associated flags (using the
203 .BR fcntl (2)
204 .B F_SETFD
205 operation), the other process is also affected.
206
207 If
208 .B CLONE_FILES
209 is not set, the child process inherits a copy of all file descriptors
210 opened in the calling process at the time of
211 .BR clone ().
212 (The duplicated file descriptors in the child refer to the
213 same open file descriptions (see
214 .BR open (2))
215 as the corresponding file descriptors in the calling process.)
216 Subsequent operations that open or close file descriptors,
217 or change file descriptor flags,
218 performed by either the calling
219 process or the child process do not affect the other process.
220 .TP
221 .BR CLONE_FS " (since Linux 2.0)"
222 If
223 .B CLONE_FS
224 is set, the caller and the child process share the same filesystem
225 information.
226 This includes the root of the filesystem, the current
227 working directory, and the umask.
228 Any call to
229 .BR chroot (2),
230 .BR chdir (2),
231 or
232 .BR umask (2)
233 performed by the calling process or the child process also affects the
234 other process.
235
236 If
237 .B CLONE_FS
238 is not set, the child process works on a copy of the filesystem
239 information of the calling process at the time of the
240 .BR clone ()
241 call.
242 Calls to
243 .BR chroot (2),
244 .BR chdir (2),
245 .BR umask (2)
246 performed later by one of the processes do not affect the other process.
247 .TP
248 .BR CLONE_IO " (since Linux 2.6.25)"
249 If
250 .B CLONE_IO
251 is set, then the new process shares an I/O context with
252 the calling process.
253 If this flag is not set, then (as with
254 .BR fork (2))
255 the new process has its own I/O context.
256
257 .\" The following based on text from Jens Axboe
258 The I/O context is the I/O scope of the disk scheduler (i.e,
259 what the I/O scheduler uses to model scheduling of a process's I/O).
260 If processes share the same I/O context,
261 they are treated as one by the I/O scheduler.
262 As a consequence, they get to share disk time.
263 For some I/O schedulers,
264 .\" the anticipatory and CFQ scheduler
265 if two processes share an I/O context,
266 they will be allowed to interleave their disk access.
267 If several threads are doing I/O on behalf of the same process
268 .RB ( aio_read (3),
269 for instance), they should employ
270 .BR CLONE_IO
271 to get better I/O performance.
272 .\" with CFQ and AS.
273
274 If the kernel is not configured with the
275 .B CONFIG_BLOCK
276 option, this flag is a no-op.
277 .TP
278 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
279 If
280 .B CLONE_NEWIPC
281 is set, then create the process in a new IPC namespace.
282 If this flag is not set, then (as with
283 .BR fork (2)),
284 the process is created in the same IPC namespace as
285 the calling process.
286 This flag is intended for the implementation of containers.
287
288 An IPC namespace provides an isolated view of System\ V IPC objects (see
289 .BR svipc (7))
290 and (since Linux 2.6.30)
291 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292 .\" https://lwn.net/Articles/312232/
293 POSIX message queues
294 (see
295 .BR mq_overview (7)).
296 The common characteristic of these IPC mechanisms is that IPC
297 objects are identified by mechanisms other than filesystem
298 pathnames.
299
300 Objects created in an IPC namespace are visible to all other processes
301 that are members of that namespace,
302 but are not visible to processes in other IPC namespaces.
303
304 When an IPC namespace is destroyed
305 (i.e., when the last process that is a member of the namespace terminates),
306 all IPC objects in the namespace are automatically destroyed.
307
308 Only a privileged process
309 .RB ( CAP_SYS_ADMIN )
310 can employ
311 .BR CLONE_NEWIPC .
312 This flag can't be specified in conjunction with
313 .BR CLONE_SYSVSEM .
314
315 For further information on IPC namespaces, see
316 .BR namespaces (7).
317 .TP
318 .BR CLONE_NEWNET " (since Linux 2.6.24)"
319 (The implementation of this flag was completed only
320 by about kernel version 2.6.29.)
321
322 If
323 .B CLONE_NEWNET
324 is set, then create the process in a new network namespace.
325 If this flag is not set, then (as with
326 .BR fork (2))
327 the process is created in the same network namespace as
328 the calling process.
329 This flag is intended for the implementation of containers.
330
331 A network namespace provides an isolated view of the networking stack
332 (network device interfaces, IPv4 and IPv6 protocol stacks,
333 IP routing tables, firewall rules, the
334 .I /proc/net
335 and
336 .I /sys/class/net
337 directory trees, sockets, etc.).
338 A physical network device can live in exactly one
339 network namespace.
340 A virtual network device ("veth") pair provides a pipe-like abstraction
341 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
342 that can be used to create tunnels between network namespaces,
343 and can be used to create a bridge to a physical network device
344 in another namespace.
345
346 When a network namespace is freed
347 (i.e., when the last process in the namespace terminates),
348 its physical network devices are moved back to the
349 initial network namespace (not to the parent of the process).
350 For further information on network namespaces, see
351 .BR namespaces (7).
352
353 Only a privileged process
354 .RB ( CAP_SYS_ADMIN )
355 can employ
356 .BR CLONE_NEWNET .
357 .TP
358 .BR CLONE_NEWNS " (since Linux 2.4.19)"
359 If
360 .B CLONE_NEWNS
361 is set, the cloned child is started in a new mount namespace,
362 initialized with a copy of the namespace of the parent.
363 If
364 .B CLONE_NEWNS
365 is not set, the child lives in the same mount
366 namespace as the parent.
367
368 For further information on mount namespaces, see
369 .BR namespaces (7).
370
371 Only a privileged process
372 .RB ( CAP_SYS_ADMIN )
373 can employ
374 .BR CLONE_NEWNS .
375 It is not permitted to specify both
376 .B CLONE_NEWNS
377 and
378 .B CLONE_FS
379 in the same
380 .BR clone ()
381 call.
382 .TP
383 .BR CLONE_NEWPID " (since Linux 2.6.24)"
384 .\" This explanation draws a lot of details from
385 .\" http://lwn.net/Articles/259217/
386 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
387 .\" and Kir Kolyshkin <kir@openvz.org>
388 .\"
389 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
390 .\" Author: Pavel Emelyanov <xemul@openvz.org>
391 If
392 .B CLONE_NEWPID
393 is set, then create the process in a new PID namespace.
394 If this flag is not set, then (as with
395 .BR fork (2))
396 the process is created in the same PID namespace as
397 the calling process.
398 This flag is intended for the implementation of containers.
399
400 For further information on PID namespaces, see
401 .BR namespaces (7)
402 and
403 .BR pid_namespaces (7)
404
405 Only a privileged process
406 .RB ( CAP_SYS_ADMIN )
407 can employ
408 .BR CLONE_NEWPID .
409 This flag can't be specified in conjunction with
410 .BR CLONE_THREAD
411 or
412 .BR CLONE_PARENT .
413 .TP
414 .BR CLONE_NEWUSER
415 (This flag first became meaningful for
416 .BR clone ()
417 in Linux 2.6.23,
418 the current
419 .BR clone()
420 semantics were merged in Linux 3.5,
421 and the final pieces to make the user namespaces completely usable were
422 merged in Linux 3.8.)
423
424 If
425 .B CLONE_NEWUSER
426 is set, then create the process in a new user namespace.
427 If this flag is not set, then (as with
428 .BR fork (2))
429 the process is created in the same user namespace as the calling process.
430
431 For further information on user namespaces, see
432 .BR namespaces (7)
433 and
434 .BR user_namespaces (7)
435
436 Before Linux 3.8, use of
437 .BR CLONE_NEWUSER
438 required that the caller have three capabilities:
439 .BR CAP_SYS_ADMIN ,
440 .BR CAP_SETUID ,
441 and
442 .BR CAP_SETGID .
443 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
444 Starting with Linux 3.8,
445 no privileges are needed to create a user namespace.
446
447 .BR CLONE_NEWUSER
448 cannot be specified in conjunction with
449 various other
450 .BR CLONE_*
451 flags.
452 For further details, see
453 .BR user_namespaces (7),
454 and ERRORS below.
455 .TP
456 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
457 If
458 .B CLONE_NEWUTS
459 is set, then create the process in a new UTS namespace,
460 whose identifiers are initialized by duplicating the identifiers
461 from the UTS namespace of the calling process.
462 If this flag is not set, then (as with
463 .BR fork (2))
464 the process is created in the same UTS namespace as
465 the calling process.
466 This flag is intended for the implementation of containers.
467
468 A UTS namespace is the set of identifiers returned by
469 .BR uname (2);
470 among these, the domain name and the hostname can be modified by
471 .BR setdomainname (2)
472 and
473 .BR sethostname (2),
474 respectively.
475 Changes made to the identifiers in a UTS namespace
476 are visible to all other processes in the same namespace,
477 but are not visible to processes in other UTS namespaces.
478
479 Only a privileged process
480 .RB ( CAP_SYS_ADMIN )
481 can employ
482 .BR CLONE_NEWUTS .
483
484 For further information on UTS namespaces, see
485 .BR namespaces (7).
486 .TP
487 .BR CLONE_PARENT " (since Linux 2.3.12)"
488 If
489 .B CLONE_PARENT
490 is set, then the parent of the new child (as returned by
491 .BR getppid (2))
492 will be the same as that of the calling process.
493
494 If
495 .B CLONE_PARENT
496 is not set, then (as with
497 .BR fork (2))
498 the child's parent is the calling process.
499
500 Note that it is the parent process, as returned by
501 .BR getppid (2),
502 which is signaled when the child terminates, so that
503 if
504 .B CLONE_PARENT
505 is set, then the parent of the calling process, rather than the
506 calling process itself, will be signaled.
507 .TP
508 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
509 Store child thread ID at location
510 .I ptid
511 in parent and child memory.
512 (In Linux 2.5.32-2.5.48 there was a flag
513 .B CLONE_SETTID
514 that did this.)
515 .TP
516 .BR CLONE_PID " (obsolete)"
517 If
518 .B CLONE_PID
519 is set, the child process is created with the same process ID as
520 the calling process.
521 This is good for hacking the system, but otherwise
522 of not much use.
523 Since 2.3.21 this flag can be
524 specified only by the system boot process (PID 0).
525 It disappeared in Linux 2.5.16.
526 .TP
527 .BR CLONE_PTRACE " (since Linux 2.2)"
528 If
529 .B CLONE_PTRACE
530 is specified, and the calling process is being traced,
531 then trace the child also (see
532 .BR ptrace (2)).
533 .TP
534 .BR CLONE_SETTLS " (since Linux 2.5.32)"
535 The
536 .I newtls
537 argument is the new TLS (Thread Local Storage) descriptor.
538 (See
539 .BR set_thread_area (2).)
540 .TP
541 .BR CLONE_SIGHAND " (since Linux 2.0)"
542 If
543 .B CLONE_SIGHAND
544 is set, the calling process and the child process share the same table of
545 signal handlers.
546 If the calling process or child process calls
547 .BR sigaction (2)
548 to change the behavior associated with a signal, the behavior is
549 changed in the other process as well.
550 However, the calling process and child
551 processes still have distinct signal masks and sets of pending
552 signals.
553 So, one of them may block or unblock some signals using
554 .BR sigprocmask (2)
555 without affecting the other process.
556
557 If
558 .B CLONE_SIGHAND
559 is not set, the child process inherits a copy of the signal handlers
560 of the calling process at the time
561 .BR clone ()
562 is called.
563 Calls to
564 .BR sigaction (2)
565 performed later by one of the processes have no effect on the other
566 process.
567
568 Since Linux 2.6.0-test6,
569 .I flags
570 must also include
571 .B CLONE_VM
572 if
573 .B CLONE_SIGHAND
574 is specified
575 .TP
576 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
577 If
578 .B CLONE_STOPPED
579 is set, then the child is initially stopped (as though it was sent a
580 .B SIGSTOP
581 signal), and must be resumed by sending it a
582 .B SIGCONT
583 signal.
584
585 This flag was
586 .I deprecated
587 from Linux 2.6.25 onward,
588 and was
589 .I removed
590 altogether in Linux 2.6.38.
591 .\" glibc 2.8 removed this defn from bits/sched.h
592 .TP
593 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
594 If
595 .B CLONE_SYSVSEM
596 is set, then the child and the calling process share
597 a single list of System V semaphore adjustment
598 .RI ( semadj )
599 values (see
600 .BR semop (2)).
601 In this case, the shared list accumulates
602 .I semadj
603 values across all processes sharing the list,
604 and semaphore adjustments are performed only when the last process
605 that is sharing the list terminates (or ceases sharing the list using
606 .BR unshare (2)).
607 If this flag is not set, then the child has a separate
608 .I semadj
609 list that is initially empty.
610 .TP
611 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
612 If
613 .B CLONE_THREAD
614 is set, the child is placed in the same thread group as the calling process.
615 To make the remainder of the discussion of
616 .B CLONE_THREAD
617 more readable, the term "thread" is used to refer to the
618 processes within a thread group.
619
620 Thread groups were a feature added in Linux 2.4 to support the
621 POSIX threads notion of a set of threads that share a single PID.
622 Internally, this shared PID is the so-called
623 thread group identifier (TGID) for the thread group.
624 Since Linux 2.4, calls to
625 .BR getpid (2)
626 return the TGID of the caller.
627
628 The threads within a group can be distinguished by their (system-wide)
629 unique thread IDs (TID).
630 A new thread's TID is available as the function result
631 returned to the caller of
632 .BR clone (),
633 and a thread can obtain
634 its own TID using
635 .BR gettid (2).
636
637 When a call is made to
638 .BR clone ()
639 without specifying
640 .BR CLONE_THREAD ,
641 then the resulting thread is placed in a new thread group
642 whose TGID is the same as the thread's TID.
643 This thread is the
644 .I leader
645 of the new thread group.
646
647 A new thread created with
648 .B CLONE_THREAD
649 has the same parent process as the caller of
650 .BR clone ()
651 (i.e., like
652 .BR CLONE_PARENT ),
653 so that calls to
654 .BR getppid (2)
655 return the same value for all of the threads in a thread group.
656 When a
657 .B CLONE_THREAD
658 thread terminates, the thread that created it using
659 .BR clone ()
660 is not sent a
661 .B SIGCHLD
662 (or other termination) signal;
663 nor can the status of such a thread be obtained
664 using
665 .BR wait (2).
666 (The thread is said to be
667 .IR detached .)
668
669 After all of the threads in a thread group terminate
670 the parent process of the thread group is sent a
671 .B SIGCHLD
672 (or other termination) signal.
673
674 If any of the threads in a thread group performs an
675 .BR execve (2),
676 then all threads other than the thread group leader are terminated,
677 and the new program is executed in the thread group leader.
678
679 If one of the threads in a thread group creates a child using
680 .BR fork (2),
681 then any thread in the group can
682 .BR wait (2)
683 for that child.
684
685 Since Linux 2.5.35,
686 .I flags
687 must also include
688 .B CLONE_SIGHAND
689 if
690 .B CLONE_THREAD
691 is specified
692 (and note that, since Linux 2.6.0-test6,
693 .BR CLONE_SIGHAND
694 also requires
695 .BR CLONE_VM
696 to be included).
697
698 Signals may be sent to a thread group as a whole (i.e., a TGID) using
699 .BR kill (2),
700 or to a specific thread (i.e., TID) using
701 .BR tgkill (2).
702
703 Signal dispositions and actions are process-wide:
704 if an unhandled signal is delivered to a thread, then
705 it will affect (terminate, stop, continue, be ignored in)
706 all members of the thread group.
707
708 Each thread has its own signal mask, as set by
709 .BR sigprocmask (2),
710 but signals can be pending either: for the whole process
711 (i.e., deliverable to any member of the thread group),
712 when sent with
713 .BR kill (2);
714 or for an individual thread, when sent with
715 .BR tgkill (2).
716 A call to
717 .BR sigpending (2)
718 returns a signal set that is the union of the signals pending for the
719 whole process and the signals that are pending for the calling thread.
720
721 If
722 .BR kill (2)
723 is used to send a signal to a thread group,
724 and the thread group has installed a handler for the signal, then
725 the handler will be invoked in exactly one, arbitrarily selected
726 member of the thread group that has not blocked the signal.
727 If multiple threads in a group are waiting to accept the same signal using
728 .BR sigwaitinfo (2),
729 the kernel will arbitrarily select one of these threads
730 to receive a signal sent using
731 .BR kill (2).
732 .TP
733 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
734 If
735 .B CLONE_UNTRACED
736 is specified, then a tracing process cannot force
737 .B CLONE_PTRACE
738 on this child process.
739 .TP
740 .BR CLONE_VFORK " (since Linux 2.2)"
741 If
742 .B CLONE_VFORK
743 is set, the execution of the calling process is suspended
744 until the child releases its virtual memory
745 resources via a call to
746 .BR execve (2)
747 or
748 .BR _exit (2)
749 (as with
750 .BR vfork (2)).
751
752 If
753 .B CLONE_VFORK
754 is not set, then both the calling process and the child are schedulable
755 after the call, and an application should not rely on execution occurring
756 in any particular order.
757 .TP
758 .BR CLONE_VM " (since Linux 2.0)"
759 If
760 .B CLONE_VM
761 is set, the calling process and the child process run in the same memory
762 space.
763 In particular, memory writes performed by the calling process
764 or by the child process are also visible in the other process.
765 Moreover, any memory mapping or unmapping performed with
766 .BR mmap (2)
767 or
768 .BR munmap (2)
769 by the child or calling process also affects the other process.
770
771 If
772 .B CLONE_VM
773 is not set, the child process runs in a separate copy of the memory
774 space of the calling process at the time of
775 .BR clone ().
776 Memory writes or file mappings/unmappings performed by one of the
777 processes do not affect the other, as with
778 .BR fork (2).
779 .SS C library/kernel ABI differences
780 The raw
781 .BR clone ()
782 system call corresponds more closely to
783 .BR fork (2)
784 in that execution in the child continues from the point of the
785 call.
786 As such, the
787 .I fn
788 and
789 .I arg
790 arguments of the
791 .BR clone ()
792 wrapper function are omitted.
793 Furthermore, the argument order changes.
794 The raw system call interface on x86 and many other architectures is roughly:
795 .in +4
796 .nf
797
798 .BI "long clone(unsigned long " flags ", void *" child_stack ,
799 .BI " void *" ptid ", void *" ctid ,
800 .BI " struct pt_regs *" regs );
801
802 .fi
803 .in
804 Another difference for the raw system call is that the
805 .I child_stack
806 argument may be zero, in which case copy-on-write semantics ensure that the
807 child gets separate copies of stack pages when either process modifies
808 the stack.
809 In this case, for correct operation, the
810 .B CLONE_VM
811 option should not be specified.
812
813 For some architectures, the order of the arguments for the system call
814 differs from that shown above.
815 On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
816 and MIPS architectures,
817 the order of the fourth and fifth arguments is reversed.
818 On the cris and s390 architectures,
819 the order of the first and second arguments is reversed.
820 .SS blackfin, m68k, and sparc
821 The argument-passing conventions on
822 blackfin, m68k, and sparc are different from the descriptions above.
823 For details, see the kernel (and glibc) source.
824 .SS ia64
825 On ia64, a different interface is used:
826 .nf
827
828 .BI "int __clone2(int (*" "fn" ")(void *), "
829 .BI " void *" child_stack_base ", size_t " stack_size ,
830 .BI " int " flags ", void *" "arg" ", ... "
831 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
832 ", pid_t *" ctid " */ );"
833 .fi
834 .PP
835 The prototype shown above is for the glibc wrapper function;
836 the raw system call interface has no
837 .I fn
838 or
839 .I arg
840 argument, and changes the order of the arguments so that
841 .I flags
842 is the first argument, and
843 .I tls
844 is the last argument.
845 .PP
846 .BR __clone2 ()
847 operates in the same way as
848 .BR clone (),
849 except that
850 .I child_stack_base
851 points to the lowest address of the child's stack area,
852 and
853 .I stack_size
854 specifies the size of the stack pointed to by
855 .IR child_stack_base .
856 .SS Linux 2.4 and earlier
857 In Linux 2.4 and earlier,
858 .BR clone ()
859 does not take arguments
860 .IR ptid ,
861 .IR tls ,
862 and
863 .IR ctid .
864 .SH RETURN VALUE
865 .\" gettid(2) returns current->pid;
866 .\" getpid(2) returns current->tgid;
867 On success, the thread ID of the child process is returned
868 in the caller's thread of execution.
869 On failure, \-1 is returned
870 in the caller's context, no child process will be created, and
871 .I errno
872 will be set appropriately.
873 .SH ERRORS
874 .TP
875 .B EAGAIN
876 Too many processes are already running; see
877 .BR fork (2).
878 .TP
879 .B EINVAL
880 .B CLONE_SIGHAND
881 was specified, but
882 .B CLONE_VM
883 was not.
884 (Since Linux 2.6.0-test6.)
885 .TP
886 .B EINVAL
887 .B CLONE_THREAD
888 was specified, but
889 .B CLONE_SIGHAND
890 was not.
891 (Since Linux 2.5.35.)
892 .\" .TP
893 .\" .B EINVAL
894 .\" Precisely one of
895 .\" .B CLONE_DETACHED
896 .\" and
897 .\" .B CLONE_THREAD
898 .\" was specified.
899 .\" (Since Linux 2.6.0-test6.)
900 .TP
901 .B EINVAL
902 .\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
903 Both
904 .B CLONE_FS
905 and
906 .B CLONE_NEWNS
907 were specified in
908 .IR flags .
909 .TP
910 .BR EINVAL " (since Linux 3.9)"
911 Both
912 .B CLONE_NEWUSER
913 and
914 .B CLONE_FS
915 were specified in
916 .IR flags .
917 .TP
918 .B EINVAL
919 Both
920 .B CLONE_NEWIPC
921 and
922 .B CLONE_SYSVSEM
923 were specified in
924 .IR flags .
925 .TP
926 .B EINVAL
927 One (or both) of
928 .BR CLONE_NEWPID
929 or
930 .BR CLONE_NEWUSER
931 and one (or both) of
932 .BR CLONE_THREAD
933 or
934 .BR CLONE_PARENT
935 were specified in
936 .IR flags .
937 .TP
938 .B EINVAL
939 Returned by
940 .BR clone ()
941 when a zero value is specified for
942 .IR child_stack .
943 .TP
944 .B EINVAL
945 .BR CLONE_NEWIPC
946 was specified in
947 .IR flags ,
948 but the kernel was not configured with the
949 .B CONFIG_SYSVIPC
950 and
951 .BR CONFIG_IPC_NS
952 options.
953 .TP
954 .B EINVAL
955 .BR CLONE_NEWNET
956 was specified in
957 .IR flags ,
958 but the kernel was not configured with the
959 .B CONFIG_NET_NS
960 option.
961 .TP
962 .B EINVAL
963 .BR CLONE_NEWPID
964 was specified in
965 .IR flags ,
966 but the kernel was not configured with the
967 .B CONFIG_PID_NS
968 option.
969 .TP
970 .B EINVAL
971 .BR CLONE_NEWUTS
972 was specified in
973 .IR flags ,
974 but the kernel was not configured with the
975 .B CONFIG_UTS
976 option.
977 .TP
978 .B ENOMEM
979 Cannot allocate sufficient memory to allocate a task structure for the
980 child, or to copy those parts of the caller's context that need to be
981 copied.
982 .TP
983 .B EPERM
984 .BR CLONE_NEWIPC ,
985 .BR CLONE_NEWNET ,
986 .BR CLONE_NEWNS ,
987 .BR CLONE_NEWPID ,
988 or
989 .BR CLONE_NEWUTS
990 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
991 .TP
992 .B EPERM
993 .B CLONE_PID
994 was specified by a process other than process 0.
995 .TP
996 .B EPERM
997 .BR CLONE_NEWUSER
998 was specified in
999 .IR flags ,
1000 but either the effective user ID or the effective group ID of the caller
1001 does not have a mapping in the parent namespace (see
1002 .BR user_namespaces (7)).
1003 .SH VERSIONS
1004 There is no entry for
1005 .BR clone ()
1006 in libc5.
1007 glibc2 provides
1008 .BR clone ()
1009 as described in this manual page.
1010 .SH CONFORMING TO
1011 .BR clone ()
1012 is Linux-specific and should not be used in programs
1013 intended to be portable.
1014 .SH NOTES
1015 In the kernel 2.4.x series,
1016 .B CLONE_THREAD
1017 generally does not make the parent of the new thread the same
1018 as the parent of the calling process.
1019 However, for kernel versions 2.4.7 to 2.4.18 the
1020 .B CLONE_THREAD
1021 flag implied the
1022 .B CLONE_PARENT
1023 flag (as in kernel 2.6).
1024
1025 For a while there was
1026 .B CLONE_DETACHED
1027 (introduced in 2.5.32):
1028 parent wants no child-exit signal.
1029 In 2.6.2 the need to give this
1030 together with
1031 .B CLONE_THREAD
1032 disappeared.
1033 This flag is still defined, but has no effect.
1034
1035 On i386,
1036 .BR clone ()
1037 should not be called through vsyscall, but directly through
1038 .IR "int $0x80" .
1039 .SH BUGS
1040 Versions of the GNU C library that include the NPTL threading library
1041 contain a wrapper function for
1042 .BR getpid (2)
1043 that performs caching of PIDs.
1044 This caching relies on support in the glibc wrapper for
1045 .BR clone (),
1046 but as currently implemented,
1047 the cache may not be up to date in some circumstances.
1048 In particular,
1049 if a signal is delivered to the child immediately after the
1050 .BR clone ()
1051 call, then a call to
1052 .BR getpid (2)
1053 in a handler for the signal may return the PID
1054 of the calling process ("the parent"),
1055 if the clone wrapper has not yet had a chance to update the PID
1056 cache in the child.
1057 (This discussion ignores the case where the child was created using
1058 .BR CLONE_THREAD ,
1059 when
1060 .BR getpid (2)
1061 .I should
1062 return the same value in the child and in the process that called
1063 .BR clone (),
1064 since the caller and the child are in the same thread group.
1065 The stale-cache problem also does not occur if the
1066 .I flags
1067 argument includes
1068 .BR CLONE_VM .)
1069 To get the truth, it may be necessary to use code such as the following:
1070 .nf
1071
1072 #include <syscall.h>
1073
1074 pid_t mypid;
1075
1076 mypid = syscall(SYS_getpid);
1077 .fi
1078 .\" See also the following bug reports
1079 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1080 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1081 .SH EXAMPLE
1082 The following program demonstrates the use of
1083 .BR clone ()
1084 to create a child process that executes in a separate UTS namespace.
1085 The child changes the hostname in its UTS namespace.
1086 Both parent and child then display the system hostname,
1087 making it possible to see that the hostname
1088 differs in the UTS namespaces of the parent and child.
1089 For an example of the use of this program, see
1090 .BR setns (2).
1091 .SS Program source
1092 .nf
1093 #define _GNU_SOURCE
1094 #include <sys/wait.h>
1095 #include <sys/utsname.h>
1096 #include <sched.h>
1097 #include <string.h>
1098 #include <stdio.h>
1099 #include <stdlib.h>
1100 #include <unistd.h>
1101
1102 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1103 } while (0)
1104
1105 static int /* Start function for cloned child */
1106 childFunc(void *arg)
1107 {
1108 struct utsname uts;
1109
1110 /* Change hostname in UTS namespace of child */
1111
1112 if (sethostname(arg, strlen(arg)) == \-1)
1113 errExit("sethostname");
1114
1115 /* Retrieve and display hostname */
1116
1117 if (uname(&uts) == \-1)
1118 errExit("uname");
1119 printf("uts.nodename in child: %s\\n", uts.nodename);
1120
1121 /* Keep the namespace open for a while, by sleeping.
1122 This allows some experimentation\-\-for example, another
1123 process might join the namespace. */
1124
1125 sleep(200);
1126
1127 return 0; /* Child terminates now */
1128 }
1129
1130 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1131
1132 int
1133 main(int argc, char *argv[])
1134 {
1135 char *stack; /* Start of stack buffer */
1136 char *stackTop; /* End of stack buffer */
1137 pid_t pid;
1138 struct utsname uts;
1139
1140 if (argc < 2) {
1141 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1142 exit(EXIT_SUCCESS);
1143 }
1144
1145 /* Allocate stack for child */
1146
1147 stack = malloc(STACK_SIZE);
1148 if (stack == NULL)
1149 errExit("malloc");
1150 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1151
1152 /* Create child that has its own UTS namespace;
1153 child commences execution in childFunc() */
1154
1155 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1156 if (pid == \-1)
1157 errExit("clone");
1158 printf("clone() returned %ld\\n", (long) pid);
1159
1160 /* Parent falls through to here */
1161
1162 sleep(1); /* Give child time to change its hostname */
1163
1164 /* Display hostname in parent\(aqs UTS namespace. This will be
1165 different from hostname in child\(aqs UTS namespace. */
1166
1167 if (uname(&uts) == \-1)
1168 errExit("uname");
1169 printf("uts.nodename in parent: %s\\n", uts.nodename);
1170
1171 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1172 errExit("waitpid");
1173 printf("child has terminated\\n");
1174
1175 exit(EXIT_SUCCESS);
1176 }
1177 .fi
1178 .SH SEE ALSO
1179 .BR fork (2),
1180 .BR futex (2),
1181 .BR getpid (2),
1182 .BR gettid (2),
1183 .BR kcmp (2),
1184 .BR set_thread_area (2),
1185 .BR set_tid_address (2),
1186 .BR setns (2),
1187 .BR tkill (2),
1188 .BR unshare (2),
1189 .BR wait (2),
1190 .BR capabilities (7),
1191 .BR namespaces (7),
1192 .BR pthreads (7)