]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/clone.2
clone.2: ffix
[thirdparty/man-pages.git] / man2 / clone.2
1 .\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
2 .\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013
3 .\"
4 .\" %%%LICENSE_START(GPL_NOVERSION_ONELINE)
5 .\" May be distributed under the GNU General Public License.
6 .\" %%%LICENSE_END
7 .\"
8 .\" Modified by Michael Haardt <michael@moria.de>
9 .\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
10 .\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
11 .\" New man page (copied from 'fork.2').
12 .\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
13 .\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
14 .\" Modified 26 Jun 2001 by Michael Kerrisk
15 .\" Mostly upgraded to 2.4.x
16 .\" Added prototype for sys_clone() plus description
17 .\" Added CLONE_THREAD with a brief description of thread groups
18 .\" Added CLONE_PARENT and revised entire page remove ambiguity
19 .\" between "calling process" and "parent process"
20 .\" Added CLONE_PTRACE and CLONE_VFORK
21 .\" Added EPERM and EINVAL error codes
22 .\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
23 .\" various other minor tidy ups and clarifications.
24 .\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
25 .\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
26 .\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
27 .\" Added description for CLONE_NEWNS, which was added in 2.4.19
28 .\" Slightly rephrased, aeb.
29 .\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
30 .\" Modified 1 Jan 2004 - various updates, aeb
31 .\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
32 .\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
33 .\" wrapper under BUGS.
34 .\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
35 .\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
36 .\" 2008-11-18, mtk, order CLONE_* flags alphabetically
37 .\" 2008-11-18, mtk, document CLONE_NEWPID
38 .\" 2008-11-19, mtk, document CLONE_NEWUTS
39 .\" 2008-11-19, mtk, document CLONE_NEWIPC
40 .\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
41 .\"
42 .TH CLONE 2 2014-08-19 "Linux" "Linux Programmer's Manual"
43 .SH NAME
44 clone, __clone2 \- create a child process
45 .SH SYNOPSIS
46 .nf
47 /* Prototype for the glibc wrapper function */
48
49 .B #include <sched.h>
50
51 .BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
52 .BI " int " flags ", void *" "arg" ", ... "
53 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
54 ", pid_t *" ctid " */ );"
55
56 /* Prototype for the raw system call */
57
58 .BI "long clone(unsigned long " flags ", void *" child_stack ,
59 .BI " void *" ptid ", void *" ctid ,
60 .BI " struct pt_regs *" regs );
61 .fi
62 .sp
63 .in -4n
64 Feature Test Macro Requirements for glibc wrapper function (see
65 .BR feature_test_macros (7)):
66 .in
67 .sp
68 .BR clone ():
69 .ad l
70 .RS 4
71 .PD 0
72 .TP 4
73 Since glibc 2.14:
74 _GNU_SOURCE
75 .TP 4
76 .\" See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
77 Before glibc 2.14:
78 _BSD_SOURCE || _SVID_SOURCE
79 /* _GNU_SOURCE also suffices */
80 .PD
81 .RE
82 .ad b
83 .SH DESCRIPTION
84 .BR clone ()
85 creates a new process, in a manner similar to
86 .BR fork (2).
87
88 This page describes both the glibc
89 .BR clone ()
90 wrapper function and the underlying system call on which it is based.
91 The main text describes the wrapper function;
92 the differences for the raw system call
93 are described toward the end of this page.
94
95 Unlike
96 .BR fork (2),
97 .BR clone ()
98 allows the child process to share parts of its execution context with
99 the calling process, such as the memory space, the table of file
100 descriptors, and the table of signal handlers.
101 (Note that on this manual
102 page, "calling process" normally corresponds to "parent process".
103 But see the description of
104 .B CLONE_PARENT
105 below.)
106
107 The main use of
108 .BR clone ()
109 is to implement threads: multiple threads of control in a program that
110 run concurrently in a shared memory space.
111
112 When the child process is created with
113 .BR clone (),
114 it executes the function
115 .IR fn ( arg ).
116 (This differs from
117 .BR fork (2),
118 where execution continues in the child from the point
119 of the
120 .BR fork (2)
121 call.)
122 The
123 .I fn
124 argument is a pointer to a function that is called by the child
125 process at the beginning of its execution.
126 The
127 .I arg
128 argument is passed to the
129 .I fn
130 function.
131
132 When the
133 .IR fn ( arg )
134 function application returns, the child process terminates.
135 The integer returned by
136 .I fn
137 is the exit code for the child process.
138 The child process may also terminate explicitly by calling
139 .BR exit (2)
140 or after receiving a fatal signal.
141
142 The
143 .I child_stack
144 argument specifies the location of the stack used by the child process.
145 Since the child and calling process may share memory,
146 it is not possible for the child process to execute in the
147 same stack as the calling process.
148 The calling process must therefore
149 set up memory space for the child stack and pass a pointer to this
150 space to
151 .BR clone ().
152 Stacks grow downward on all processors that run Linux
153 (except the HP PA processors), so
154 .I child_stack
155 usually points to the topmost address of the memory space set up for
156 the child stack.
157
158 The low byte of
159 .I flags
160 contains the number of the
161 .I "termination signal"
162 sent to the parent when the child dies.
163 If this signal is specified as anything other than
164 .BR SIGCHLD ,
165 then the parent process must specify the
166 .B __WALL
167 or
168 .B __WCLONE
169 options when waiting for the child with
170 .BR wait (2).
171 If no signal is specified, then the parent process is not signaled
172 when the child terminates.
173
174 .I flags
175 may also be bitwise-or'ed with zero or more of the following constants,
176 in order to specify what is shared between the calling process
177 and the child process:
178 .TP
179 .BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
180 Erase child thread ID at location
181 .I ctid
182 in child memory when the child exits, and do a wakeup on the futex
183 at that address.
184 The address involved may be changed by the
185 .BR set_tid_address (2)
186 system call.
187 This is used by threading libraries.
188 .TP
189 .BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
190 Store child thread ID at location
191 .I ctid
192 in child memory.
193 .TP
194 .BR CLONE_FILES " (since Linux 2.0)"
195 If
196 .B CLONE_FILES
197 is set, the calling process and the child process share the same file
198 descriptor table.
199 Any file descriptor created by the calling process or by the child
200 process is also valid in the other process.
201 Similarly, if one of the processes closes a file descriptor,
202 or changes its associated flags (using the
203 .BR fcntl (2)
204 .B F_SETFD
205 operation), the other process is also affected.
206
207 If
208 .B CLONE_FILES
209 is not set, the child process inherits a copy of all file descriptors
210 opened in the calling process at the time of
211 .BR clone ().
212 (The duplicated file descriptors in the child refer to the
213 same open file descriptions (see
214 .BR open (2))
215 as the corresponding file descriptors in the calling process.)
216 Subsequent operations that open or close file descriptors,
217 or change file descriptor flags,
218 performed by either the calling
219 process or the child process do not affect the other process.
220 .TP
221 .BR CLONE_FS " (since Linux 2.0)"
222 If
223 .B CLONE_FS
224 is set, the caller and the child process share the same filesystem
225 information.
226 This includes the root of the filesystem, the current
227 working directory, and the umask.
228 Any call to
229 .BR chroot (2),
230 .BR chdir (2),
231 or
232 .BR umask (2)
233 performed by the calling process or the child process also affects the
234 other process.
235
236 If
237 .B CLONE_FS
238 is not set, the child process works on a copy of the filesystem
239 information of the calling process at the time of the
240 .BR clone ()
241 call.
242 Calls to
243 .BR chroot (2),
244 .BR chdir (2),
245 .BR umask (2)
246 performed later by one of the processes do not affect the other process.
247 .TP
248 .BR CLONE_IO " (since Linux 2.6.25)"
249 If
250 .B CLONE_IO
251 is set, then the new process shares an I/O context with
252 the calling process.
253 If this flag is not set, then (as with
254 .BR fork (2))
255 the new process has its own I/O context.
256
257 .\" The following based on text from Jens Axboe
258 The I/O context is the I/O scope of the disk scheduler (i.e,
259 what the I/O scheduler uses to model scheduling of a process's I/O).
260 If processes share the same I/O context,
261 they are treated as one by the I/O scheduler.
262 As a consequence, they get to share disk time.
263 For some I/O schedulers,
264 .\" the anticipatory and CFQ scheduler
265 if two processes share an I/O context,
266 they will be allowed to interleave their disk access.
267 If several threads are doing I/O on behalf of the same process
268 .RB ( aio_read (3),
269 for instance), they should employ
270 .BR CLONE_IO
271 to get better I/O performance.
272 .\" with CFQ and AS.
273
274 If the kernel is not configured with the
275 .B CONFIG_BLOCK
276 option, this flag is a no-op.
277 .TP
278 .BR CLONE_NEWIPC " (since Linux 2.6.19)"
279 If
280 .B CLONE_NEWIPC
281 is set, then create the process in a new IPC namespace.
282 If this flag is not set, then (as with
283 .BR fork (2)),
284 the process is created in the same IPC namespace as
285 the calling process.
286 This flag is intended for the implementation of containers.
287
288 An IPC namespace provides an isolated view of System\ V IPC objects (see
289 .BR svipc (7))
290 and (since Linux 2.6.30)
291 .\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
292 .\" https://lwn.net/Articles/312232/
293 POSIX message queues
294 (see
295 .BR mq_overview (7)).
296 The common characteristic of these IPC mechanisms is that IPC
297 objects are identified by mechanisms other than filesystem
298 pathnames.
299
300 Objects created in an IPC namespace are visible to all other processes
301 that are members of that namespace,
302 but are not visible to processes in other IPC namespaces.
303
304 When an IPC namespace is destroyed
305 (i.e., when the last process that is a member of the namespace terminates),
306 all IPC objects in the namespace are automatically destroyed.
307
308 Only a privileged process
309 .RB ( CAP_SYS_ADMIN )
310 can employ
311 .BR CLONE_NEWIPC .
312 This flag can't be specified in conjunction with
313 .BR CLONE_SYSVSEM .
314
315 For further information on IPC namespaces, see
316 .BR namespaces (7).
317 .TP
318 .BR CLONE_NEWNET " (since Linux 2.6.24)"
319 (The implementation of this flag was completed only
320 by about kernel version 2.6.29.)
321
322 If
323 .B CLONE_NEWNET
324 is set, then create the process in a new network namespace.
325 If this flag is not set, then (as with
326 .BR fork (2))
327 the process is created in the same network namespace as
328 the calling process.
329 This flag is intended for the implementation of containers.
330
331 A network namespace provides an isolated view of the networking stack
332 (network device interfaces, IPv4 and IPv6 protocol stacks,
333 IP routing tables, firewall rules, the
334 .I /proc/net
335 and
336 .I /sys/class/net
337 directory trees, sockets, etc.).
338 A physical network device can live in exactly one
339 network namespace.
340 A virtual network device ("veth") pair provides a pipe-like abstraction
341 .\" FIXME . Add pointer to veth(4) page when it is eventually completed
342 that can be used to create tunnels between network namespaces,
343 and can be used to create a bridge to a physical network device
344 in another namespace.
345
346 When a network namespace is freed
347 (i.e., when the last process in the namespace terminates),
348 its physical network devices are moved back to the
349 initial network namespace (not to the parent of the process).
350 For further information on network namespaces, see
351 .BR namespaces (7).
352
353 Only a privileged process
354 .RB ( CAP_SYS_ADMIN )
355 can employ
356 .BR CLONE_NEWNET .
357 .TP
358 .BR CLONE_NEWNS " (since Linux 2.4.19)"
359 If
360 .B CLONE_NEWNS
361 is set, the cloned child is started in a new mount namespace,
362 initialized with a copy of the namespace of the parent.
363 If
364 .B CLONE_NEWNS
365 is not set, the child lives in the same mount
366 namespace as the parent.
367
368 For further information on mount namespaces, see
369 .BR namespaces (7).
370
371 Only a privileged process
372 .RB ( CAP_SYS_ADMIN )
373 can employ
374 .BR CLONE_NEWNS .
375 It is not permitted to specify both
376 .B CLONE_NEWNS
377 and
378 .B CLONE_FS
379 in the same
380 .BR clone ()
381 call.
382 .TP
383 .BR CLONE_NEWPID " (since Linux 2.6.24)"
384 .\" This explanation draws a lot of details from
385 .\" http://lwn.net/Articles/259217/
386 .\" Authors: Pavel Emelyanov <xemul@openvz.org>
387 .\" and Kir Kolyshkin <kir@openvz.org>
388 .\"
389 .\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
390 .\" Author: Pavel Emelyanov <xemul@openvz.org>
391 If
392 .B CLONE_NEWPID
393 is set, then create the process in a new PID namespace.
394 If this flag is not set, then (as with
395 .BR fork (2))
396 the process is created in the same PID namespace as
397 the calling process.
398 This flag is intended for the implementation of containers.
399
400 For further information on PID namespaces, see
401 .BR namespaces (7).
402
403 Only a privileged process
404 .RB ( CAP_SYS_ADMIN )
405 can employ
406 .BR CLONE_NEWPID .
407 This flag can't be specified in conjunction with
408 .BR CLONE_THREAD .
409 .TP
410 .BR CLONE_NEWUSER
411 (This flag first became meaningful for
412 .BR clone ()
413 in Linux 2.6.23,
414 the current
415 .BR clone()
416 semantics were merged in Linux 3.5,
417 and the final pieces to make the user namespaces completely usable were
418 merged in Linux 3.8.)
419
420 If
421 .B CLONE_NEWUSER
422 is set, then create the process in a new user namespace.
423 If this flag is not set, then (as with
424 .BR fork (2))
425 the process is created in the same user namespace as the calling process.
426
427 For further information on user namespaces, see
428 .BR namespaces (7).
429
430 Before Linux 3.8, use of
431 .BR CLONE_NEWUSER
432 required that the caller have three capabilities:
433 .BR CAP_SYS_ADMIN ,
434 .BR CAP_SETUID ,
435 and
436 .BR CAP_SETGID .
437 .\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
438 Starting with Linux 3.8,
439 no privileges are needed to create a user namespace.
440 .TP
441 .BR CLONE_NEWUTS " (since Linux 2.6.19)"
442 If
443 .B CLONE_NEWUTS
444 is set, then create the process in a new UTS namespace,
445 whose identifiers are initialized by duplicating the identifiers
446 from the UTS namespace of the calling process.
447 If this flag is not set, then (as with
448 .BR fork (2))
449 the process is created in the same UTS namespace as
450 the calling process.
451 This flag is intended for the implementation of containers.
452
453 A UTS namespace is the set of identifiers returned by
454 .BR uname (2);
455 among these, the domain name and the hostname can be modified by
456 .BR setdomainname (2)
457 and
458 .BR sethostname (2),
459 respectively.
460 Changes made to the identifiers in a UTS namespace
461 are visible to all other processes in the same namespace,
462 but are not visible to processes in other UTS namespaces.
463
464 Only a privileged process
465 .RB ( CAP_SYS_ADMIN )
466 can employ
467 .BR CLONE_NEWUTS .
468
469 For further information on UTS namespaces, see
470 .BR namespaces (7).
471 .TP
472 .BR CLONE_PARENT " (since Linux 2.3.12)"
473 If
474 .B CLONE_PARENT
475 is set, then the parent of the new child (as returned by
476 .BR getppid (2))
477 will be the same as that of the calling process.
478
479 If
480 .B CLONE_PARENT
481 is not set, then (as with
482 .BR fork (2))
483 the child's parent is the calling process.
484
485 Note that it is the parent process, as returned by
486 .BR getppid (2),
487 which is signaled when the child terminates, so that
488 if
489 .B CLONE_PARENT
490 is set, then the parent of the calling process, rather than the
491 calling process itself, will be signaled.
492 .TP
493 .BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
494 Store child thread ID at location
495 .I ptid
496 in parent and child memory.
497 (In Linux 2.5.32-2.5.48 there was a flag
498 .B CLONE_SETTID
499 that did this.)
500 .TP
501 .BR CLONE_PID " (obsolete)"
502 If
503 .B CLONE_PID
504 is set, the child process is created with the same process ID as
505 the calling process.
506 This is good for hacking the system, but otherwise
507 of not much use.
508 Since 2.3.21 this flag can be
509 specified only by the system boot process (PID 0).
510 It disappeared in Linux 2.5.16.
511 .TP
512 .BR CLONE_PTRACE " (since Linux 2.2)"
513 If
514 .B CLONE_PTRACE
515 is specified, and the calling process is being traced,
516 then trace the child also (see
517 .BR ptrace (2)).
518 .TP
519 .BR CLONE_SETTLS " (since Linux 2.5.32)"
520 The
521 .I newtls
522 argument is the new TLS (Thread Local Storage) descriptor.
523 (See
524 .BR set_thread_area (2).)
525 .TP
526 .BR CLONE_SIGHAND " (since Linux 2.0)"
527 If
528 .B CLONE_SIGHAND
529 is set, the calling process and the child process share the same table of
530 signal handlers.
531 If the calling process or child process calls
532 .BR sigaction (2)
533 to change the behavior associated with a signal, the behavior is
534 changed in the other process as well.
535 However, the calling process and child
536 processes still have distinct signal masks and sets of pending
537 signals.
538 So, one of them may block or unblock some signals using
539 .BR sigprocmask (2)
540 without affecting the other process.
541
542 If
543 .B CLONE_SIGHAND
544 is not set, the child process inherits a copy of the signal handlers
545 of the calling process at the time
546 .BR clone ()
547 is called.
548 Calls to
549 .BR sigaction (2)
550 performed later by one of the processes have no effect on the other
551 process.
552
553 Since Linux 2.6.0-test6,
554 .I flags
555 must also include
556 .B CLONE_VM
557 if
558 .B CLONE_SIGHAND
559 is specified
560 .TP
561 .BR CLONE_STOPPED " (since Linux 2.6.0-test2)"
562 If
563 .B CLONE_STOPPED
564 is set, then the child is initially stopped (as though it was sent a
565 .B SIGSTOP
566 signal), and must be resumed by sending it a
567 .B SIGCONT
568 signal.
569
570 This flag was
571 .I deprecated
572 from Linux 2.6.25 onward,
573 and was
574 .I removed
575 altogether in Linux 2.6.38.
576 .\" glibc 2.8 removed this defn from bits/sched.h
577 .TP
578 .BR CLONE_SYSVSEM " (since Linux 2.5.10)"
579 If
580 .B CLONE_SYSVSEM
581 is set, then the child and the calling process share
582 a single list of System\ V semaphore undo values (see
583 .BR semop (2)).
584 If this flag is not set, then the child has a separate undo list,
585 which is initially empty.
586 .TP
587 .BR CLONE_THREAD " (since Linux 2.4.0-test8)"
588 If
589 .B CLONE_THREAD
590 is set, the child is placed in the same thread group as the calling process.
591 To make the remainder of the discussion of
592 .B CLONE_THREAD
593 more readable, the term "thread" is used to refer to the
594 processes within a thread group.
595
596 Thread groups were a feature added in Linux 2.4 to support the
597 POSIX threads notion of a set of threads that share a single PID.
598 Internally, this shared PID is the so-called
599 thread group identifier (TGID) for the thread group.
600 Since Linux 2.4, calls to
601 .BR getpid (2)
602 return the TGID of the caller.
603
604 The threads within a group can be distinguished by their (system-wide)
605 unique thread IDs (TID).
606 A new thread's TID is available as the function result
607 returned to the caller of
608 .BR clone (),
609 and a thread can obtain
610 its own TID using
611 .BR gettid (2).
612
613 When a call is made to
614 .BR clone ()
615 without specifying
616 .BR CLONE_THREAD ,
617 then the resulting thread is placed in a new thread group
618 whose TGID is the same as the thread's TID.
619 This thread is the
620 .I leader
621 of the new thread group.
622
623 A new thread created with
624 .B CLONE_THREAD
625 has the same parent process as the caller of
626 .BR clone ()
627 (i.e., like
628 .BR CLONE_PARENT ),
629 so that calls to
630 .BR getppid (2)
631 return the same value for all of the threads in a thread group.
632 When a
633 .B CLONE_THREAD
634 thread terminates, the thread that created it using
635 .BR clone ()
636 is not sent a
637 .B SIGCHLD
638 (or other termination) signal;
639 nor can the status of such a thread be obtained
640 using
641 .BR wait (2).
642 (The thread is said to be
643 .IR detached .)
644
645 After all of the threads in a thread group terminate
646 the parent process of the thread group is sent a
647 .B SIGCHLD
648 (or other termination) signal.
649
650 If any of the threads in a thread group performs an
651 .BR execve (2),
652 then all threads other than the thread group leader are terminated,
653 and the new program is executed in the thread group leader.
654
655 If one of the threads in a thread group creates a child using
656 .BR fork (2),
657 then any thread in the group can
658 .BR wait (2)
659 for that child.
660
661 Since Linux 2.5.35,
662 .I flags
663 must also include
664 .B CLONE_SIGHAND
665 if
666 .B CLONE_THREAD
667 is specified
668 (and note that, since Linux 2.6.0-test6,
669 .BR CLONE_SIGHAND
670 also requires
671 .BR CLONE_VM
672 to be included).
673
674 Signals may be sent to a thread group as a whole (i.e., a TGID) using
675 .BR kill (2),
676 or to a specific thread (i.e., TID) using
677 .BR tgkill (2).
678
679 Signal dispositions and actions are process-wide:
680 if an unhandled signal is delivered to a thread, then
681 it will affect (terminate, stop, continue, be ignored in)
682 all members of the thread group.
683
684 Each thread has its own signal mask, as set by
685 .BR sigprocmask (2),
686 but signals can be pending either: for the whole process
687 (i.e., deliverable to any member of the thread group),
688 when sent with
689 .BR kill (2);
690 or for an individual thread, when sent with
691 .BR tgkill (2).
692 A call to
693 .BR sigpending (2)
694 returns a signal set that is the union of the signals pending for the
695 whole process and the signals that are pending for the calling thread.
696
697 If
698 .BR kill (2)
699 is used to send a signal to a thread group,
700 and the thread group has installed a handler for the signal, then
701 the handler will be invoked in exactly one, arbitrarily selected
702 member of the thread group that has not blocked the signal.
703 If multiple threads in a group are waiting to accept the same signal using
704 .BR sigwaitinfo (2),
705 the kernel will arbitrarily select one of these threads
706 to receive a signal sent using
707 .BR kill (2).
708 .TP
709 .BR CLONE_UNTRACED " (since Linux 2.5.46)"
710 If
711 .B CLONE_UNTRACED
712 is specified, then a tracing process cannot force
713 .B CLONE_PTRACE
714 on this child process.
715 .TP
716 .BR CLONE_VFORK " (since Linux 2.2)"
717 If
718 .B CLONE_VFORK
719 is set, the execution of the calling process is suspended
720 until the child releases its virtual memory
721 resources via a call to
722 .BR execve (2)
723 or
724 .BR _exit (2)
725 (as with
726 .BR vfork (2)).
727
728 If
729 .B CLONE_VFORK
730 is not set, then both the calling process and the child are schedulable
731 after the call, and an application should not rely on execution occurring
732 in any particular order.
733 .TP
734 .BR CLONE_VM " (since Linux 2.0)"
735 If
736 .B CLONE_VM
737 is set, the calling process and the child process run in the same memory
738 space.
739 In particular, memory writes performed by the calling process
740 or by the child process are also visible in the other process.
741 Moreover, any memory mapping or unmapping performed with
742 .BR mmap (2)
743 or
744 .BR munmap (2)
745 by the child or calling process also affects the other process.
746
747 If
748 .B CLONE_VM
749 is not set, the child process runs in a separate copy of the memory
750 space of the calling process at the time of
751 .BR clone ().
752 Memory writes or file mappings/unmappings performed by one of the
753 processes do not affect the other, as with
754 .BR fork (2).
755 .SS C library/kernel ABI differences
756 The raw
757 .BR clone ()
758 system call corresponds more closely to
759 .BR fork (2)
760 in that execution in the child continues from the point of the
761 call.
762 As such, the
763 .I fn
764 and
765 .I arg
766 arguments of the
767 .BR clone ()
768 wrapper function are omitted.
769 Furthermore, the argument order changes.
770 The raw system call interface on x86 and many other architectures is roughly:
771 .in +4
772 .nf
773
774 .BI "long clone(unsigned long " flags ", void *" child_stack ,
775 .BI " void *" ptid ", void *" ctid ,
776 .BI " struct pt_regs *" regs );
777
778 .fi
779 .in
780 Another difference for the raw system call is that the
781 .I child_stack
782 argument may be zero, in which case copy-on-write semantics ensure that the
783 child gets separate copies of stack pages when either process modifies
784 the stack.
785 In this case, for correct operation, the
786 .B CLONE_VM
787 option should not be specified.
788
789 For some architectures, the order of the arguments for the system call
790 differs from that shown above.
791 On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
792 and MIPS architectures,
793 the order of the fourth and fifth arguments is reversed.
794 On the cris and s390 architectures,
795 the order of the first and second arguments is reversed.
796 .SS blackfin, m68k, and sparc
797 The argument-passing conventions on
798 blackfin, m68k, and sparc are different from the descriptions above.
799 For details, see the kernel (and glibc) source.
800 .SS ia64
801 On ia64, a different interface is used:
802 .nf
803
804 .BI "int __clone2(int (*" "fn" ")(void *), "
805 .BI " void *" child_stack_base ", size_t " stack_size ,
806 .BI " int " flags ", void *" "arg" ", ... "
807 .BI " /* pid_t *" ptid ", struct user_desc *" tls \
808 ", pid_t *" ctid " */ );"
809 .fi
810 .PP
811 The prototype shown above is for the glibc wrapper function;
812 the raw system call interface has no
813 .I fn
814 or
815 .I arg
816 argument, and changes the order of the arguments so that
817 .I flags
818 is the first argument, and
819 .I tls
820 is the last argument.
821 .PP
822 .BR __clone2 ()
823 operates in the same way as
824 .BR clone (),
825 except that
826 .I child_stack_base
827 points to the lowest address of the child's stack area,
828 and
829 .I stack_size
830 specifies the size of the stack pointed to by
831 .IR child_stack_base .
832 .SS Linux 2.4 and earlier
833 In Linux 2.4 and earlier,
834 .BR clone ()
835 does not take arguments
836 .IR ptid ,
837 .IR tls ,
838 and
839 .IR ctid .
840 .SH RETURN VALUE
841 .\" gettid(2) returns current->pid;
842 .\" getpid(2) returns current->tgid;
843 On success, the thread ID of the child process is returned
844 in the caller's thread of execution.
845 On failure, \-1 is returned
846 in the caller's context, no child process will be created, and
847 .I errno
848 will be set appropriately.
849 .SH ERRORS
850 .TP
851 .B EAGAIN
852 Too many processes are already running; see
853 .BR fork (2).
854 .TP
855 .B EINVAL
856 .B CLONE_SIGHAND
857 was specified, but
858 .B CLONE_VM
859 was not.
860 (Since Linux 2.6.0-test6.)
861 .TP
862 .B EINVAL
863 .B CLONE_THREAD
864 was specified, but
865 .B CLONE_SIGHAND
866 was not.
867 (Since Linux 2.5.35.)
868 .\" .TP
869 .\" .B EINVAL
870 .\" Precisely one of
871 .\" .B CLONE_DETACHED
872 .\" and
873 .\" .B CLONE_THREAD
874 .\" was specified.
875 .\" (Since Linux 2.6.0-test6.)
876 .TP
877 .B EINVAL
878 Both
879 .B CLONE_FS
880 and
881 .B CLONE_NEWNS
882 were specified in
883 .IR flags .
884 .TP
885 .B EINVAL
886 Both
887 .B CLONE_NEWIPC
888 and
889 .B CLONE_SYSVSEM
890 were specified in
891 .IR flags .
892 .TP
893 .B EINVAL
894 Both
895 .BR CLONE_NEWPID
896 and
897 .BR CLONE_THREAD
898 were specified in
899 .IR flags .
900 .TP
901 .B EINVAL
902 Returned by
903 .BR clone ()
904 when a zero value is specified for
905 .IR child_stack .
906 .TP
907 .B EINVAL
908 .BR CLONE_NEWIPC
909 was specified in
910 .IR flags ,
911 but the kernel was not configured with the
912 .B CONFIG_SYSVIPC
913 and
914 .BR CONFIG_IPC_NS
915 options.
916 .TP
917 .B EINVAL
918 .BR CLONE_NEWNET
919 was specified in
920 .IR flags ,
921 but the kernel was not configured with the
922 .B CONFIG_NET_NS
923 option.
924 .TP
925 .B EINVAL
926 .BR CLONE_NEWPID
927 was specified in
928 .IR flags ,
929 but the kernel was not configured with the
930 .B CONFIG_PID_NS
931 option.
932 .TP
933 .B EINVAL
934 .BR CLONE_NEWUTS
935 was specified in
936 .IR flags ,
937 but the kernel was not configured with the
938 .B CONFIG_UTS
939 option.
940 .TP
941 .B ENOMEM
942 Cannot allocate sufficient memory to allocate a task structure for the
943 child, or to copy those parts of the caller's context that need to be
944 copied.
945 .TP
946 .B EPERM
947 .BR CLONE_NEWIPC ,
948 .BR CLONE_NEWNET ,
949 .BR CLONE_NEWNS ,
950 .BR CLONE_NEWPID ,
951 or
952 .BR CLONE_NEWUTS
953 was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
954 .TP
955 .B EPERM
956 .B CLONE_PID
957 was specified by a process other than process 0.
958 .SH CONFORMING TO
959 .BR clone ()
960 is Linux-specific and should not be used in programs
961 intended to be portable.
962 .SH NOTES
963 In the kernel 2.4.x series,
964 .B CLONE_THREAD
965 generally does not make the parent of the new thread the same
966 as the parent of the calling process.
967 However, for kernel versions 2.4.7 to 2.4.18 the
968 .B CLONE_THREAD
969 flag implied the
970 .B CLONE_PARENT
971 flag (as in kernel 2.6).
972
973 For a while there was
974 .B CLONE_DETACHED
975 (introduced in 2.5.32):
976 parent wants no child-exit signal.
977 In 2.6.2 the need to give this
978 together with
979 .B CLONE_THREAD
980 disappeared.
981 This flag is still defined, but has no effect.
982
983 On i386,
984 .BR clone ()
985 should not be called through vsyscall, but directly through
986 .IR "int $0x80" .
987 .SH BUGS
988 Versions of the GNU C library that include the NPTL threading library
989 contain a wrapper function for
990 .BR getpid (2)
991 that performs caching of PIDs.
992 This caching relies on support in the glibc wrapper for
993 .BR clone (),
994 but as currently implemented,
995 the cache may not be up to date in some circumstances.
996 In particular,
997 if a signal is delivered to the child immediately after the
998 .BR clone ()
999 call, then a call to
1000 .BR getpid (2)
1001 in a handler for the signal may return the PID
1002 of the calling process ("the parent"),
1003 if the clone wrapper has not yet had a chance to update the PID
1004 cache in the child.
1005 (This discussion ignores the case where the child was created using
1006 .BR CLONE_THREAD ,
1007 when
1008 .BR getpid (2)
1009 .I should
1010 return the same value in the child and in the process that called
1011 .BR clone (),
1012 since the caller and the child are in the same thread group.
1013 The stale-cache problem also does not occur if the
1014 .I flags
1015 argument includes
1016 .BR CLONE_VM .)
1017 To get the truth, it may be necessary to use code such as the following:
1018 .nf
1019
1020 #include <syscall.h>
1021
1022 pid_t mypid;
1023
1024 mypid = syscall(SYS_getpid);
1025 .fi
1026 .\" See also the following bug reports
1027 .\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1028 .\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
1029 .SH EXAMPLE
1030 The following program demonstrates the use of
1031 .BR clone ()
1032 to create a child process that executes in a separate UTS namespace.
1033 The child changes the hostname in its UTS namespace.
1034 Both parent and child then display the system hostname,
1035 making it possible to see that the hostname
1036 differs in the UTS namespaces of the parent and child.
1037 For an example of the use of this program, see
1038 .BR setns (2).
1039 .SS Program source
1040 .nf
1041 #define _GNU_SOURCE
1042 #include <sys/wait.h>
1043 #include <sys/utsname.h>
1044 #include <sched.h>
1045 #include <string.h>
1046 #include <stdio.h>
1047 #include <stdlib.h>
1048 #include <unistd.h>
1049
1050 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
1051 } while (0)
1052
1053 static int /* Start function for cloned child */
1054 childFunc(void *arg)
1055 {
1056 struct utsname uts;
1057
1058 /* Change hostname in UTS namespace of child */
1059
1060 if (sethostname(arg, strlen(arg)) == \-1)
1061 errExit("sethostname");
1062
1063 /* Retrieve and display hostname */
1064
1065 if (uname(&uts) == \-1)
1066 errExit("uname");
1067 printf("uts.nodename in child: %s\\n", uts.nodename);
1068
1069 /* Keep the namespace open for a while, by sleeping.
1070 This allows some experimentation\-\-for example, another
1071 process might join the namespace. */
1072
1073 sleep(200);
1074
1075 return 0; /* Child terminates now */
1076 }
1077
1078 #define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1079
1080 int
1081 main(int argc, char *argv[])
1082 {
1083 char *stack; /* Start of stack buffer */
1084 char *stackTop; /* End of stack buffer */
1085 pid_t pid;
1086 struct utsname uts;
1087
1088 if (argc < 2) {
1089 fprintf(stderr, "Usage: %s <child\-hostname>\\n", argv[0]);
1090 exit(EXIT_SUCCESS);
1091 }
1092
1093 /* Allocate stack for child */
1094
1095 stack = malloc(STACK_SIZE);
1096 if (stack == NULL)
1097 errExit("malloc");
1098 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1099
1100 /* Create child that has its own UTS namespace;
1101 child commences execution in childFunc() */
1102
1103 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1104 if (pid == \-1)
1105 errExit("clone");
1106 printf("clone() returned %ld\\n", (long) pid);
1107
1108 /* Parent falls through to here */
1109
1110 sleep(1); /* Give child time to change its hostname */
1111
1112 /* Display hostname in parent\(aqs UTS namespace. This will be
1113 different from hostname in child\(aqs UTS namespace. */
1114
1115 if (uname(&uts) == \-1)
1116 errExit("uname");
1117 printf("uts.nodename in parent: %s\\n", uts.nodename);
1118
1119 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1120 errExit("waitpid");
1121 printf("child has terminated\\n");
1122
1123 exit(EXIT_SUCCESS);
1124 }
1125 .fi
1126 .SH SEE ALSO
1127 .BR fork (2),
1128 .BR futex (2),
1129 .BR getpid (2),
1130 .BR gettid (2),
1131 .BR kcmp (2),
1132 .BR set_thread_area (2),
1133 .BR set_tid_address (2),
1134 .BR setns (2),
1135 .BR tkill (2),
1136 .BR unshare (2),
1137 .BR wait (2),
1138 .BR proc (5),
1139 .BR capabilities (7),
1140 .BR pthreads (7)