]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
man*/: srcfix (Use .P instead of .PP or .LP)
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
a1eaacb1 1'\" t
fea681da 2.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
fb1fa92b 3.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013, 2019
2297bf0e 4.\"
95fb8859 5.\" SPDX-License-Identifier: GPL-1.0-or-later
dccaff1e 6.\"
fea681da
MK
7.\" Modified by Michael Haardt <michael@moria.de>
8.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
9.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
10.\" New man page (copied from 'fork.2').
11.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
12.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
13.\" Modified 26 Jun 2001 by Michael Kerrisk
b324e17d 14.\" Mostly upgraded to Linux 2.4.x
fea681da
MK
15.\" Added prototype for sys_clone() plus description
16.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 17.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
18.\" between "calling process" and "parent process"
19.\" Added CLONE_PTRACE and CLONE_VFORK
20.\" Added EPERM and EINVAL error codes
fd8a5be4 21.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 22.\" various other minor tidy ups and clarifications.
c11b1abf 23.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 24.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 25.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
b324e17d 26.\" Added description for CLONE_NEWNS, which was added in Linux 2.4.19
fea681da
MK
27.\" Slightly rephrased, aeb.
28.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
29.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 30.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 31.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 32.\" wrapper under BUGS.
fd8a5be4
MK
33.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
34.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 35.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 36.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 37.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 38.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 39.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 40.\"
4c1c5274 41.TH clone 2 (date) "Linux man-pages (unreleased)"
fea681da 42.SH NAME
a9e52b43 43clone, __clone2, clone3 \- create a child process
48113faf
AC
44.SH LIBRARY
45Standard C library
8fc3b2cf 46.RI ( libc ", " \-lc )
fea681da 47.SH SYNOPSIS
c10859eb 48.nf
81f10dad 49/* Prototype for the glibc wrapper function */
c6d039a3 50.P
4f71ba5d 51.B #define _GNU_SOURCE
fea681da 52.B #include <sched.h>
c6d039a3 53.P
f65432f1
AC
54.BI "int clone(int (*" "fn" ")(void *_Nullable), void *" stack \
55", int " flags ,
56.BI " void *_Nullable " "arg" ", ..." \
57" \fR/*\fP" " pid_t *_Nullable " parent_tid ,
58.BI " void *_Nullable " tls ,
59.BI " pid_t *_Nullable " child_tid " \fR*/\fP );"
c6d039a3 60.P
faa0e55a 61/* For the prototype of the raw clone() system call, see NOTES */
c6d039a3 62.P
a3805383
AC
63.BR "#include <linux/sched.h>" " /* Definition of " "struct clone_args" " */"
64.BR "#include <sched.h>" " /* Definition of " CLONE_* " constants */"
65.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
66.B #include <unistd.h>
c6d039a3 67.P
a3805383
AC
68.BI "long syscall(SYS_clone3, struct clone_args *" cl_args ", size_t " size );
69.fi
c6d039a3 70.P
bc2813df
AC
71.IR Note :
72glibc provides no wrapper for
73.BR clone3 (),
74necessitating the use of
75.BR syscall (2).
fea681da 76.SH DESCRIPTION
faa0e55a 77These system calls
8dd6b0bc 78create a new ("child") process, in a manner similar to
fea681da 79.BR fork (2).
c6d039a3 80.P
8dd6b0bc 81By contrast with
fea681da 82.BR fork (2),
225f5da8 83these system calls provide more precise control over what pieces of execution
10337567
MK
84context are shared between the calling process and the child process.
85For example, using these system calls, the caller can control whether
86or not the two processes share the virtual address space,
87the table of file descriptors, and the table of signal handlers.
225f5da8 88These system calls also allow the new child process to be placed
10337567
MK
89in separate
90.BR namespaces (7).
c6d039a3 91.P
10337567 92Note that in this manual
c13182ef 93page, "calling process" normally corresponds to "parent process".
a10c5a33 94But see the descriptions of
c13182ef 95.B CLONE_PARENT
a10c5a33
MK
96and
97.B CLONE_THREAD
10337567 98below.
c6d039a3 99.P
faa0e55a 100This page describes the following interfaces:
cdede5cd 101.IP \[bu] 3
faa0e55a
MK
102The glibc
103.BR clone ()
104wrapper function and the underlying system call on which it is based.
105The main text describes the wrapper function;
106the differences for the raw system call
107are described toward the end of this page.
cdede5cd 108.IP \[bu]
faa0e55a
MK
109The newer
110.BR clone3 ()
111system call.
c6d039a3 112.P
5261b0fe 113In the remainder of this page, the terminology "the clone call" is used
a30b6c9f 114when noting details that apply to all of these interfaces.
faa0e55a
MK
115.\"
116.SS The clone() wrapper function
faa0e55a
MK
117When the child process is created with the
118.BR clone ()
119wrapper function,
7495cbc7
MK
120it commences execution by calling the function pointed to by the argument
121.IR fn .
fea681da 122(This differs from
c13182ef 123.BR fork (2),
fea681da 124where execution continues in the child from the point
c13182ef
MK
125of the
126.BR fork (2)
fea681da
MK
127call.)
128The
fea681da 129.I arg
7495cbc7
MK
130argument is passed as the argument of the function
131.IR fn .
c6d039a3 132.P
c13182ef 133When the
fea681da 134.IR fn ( arg )
4ba17a6d 135function returns, the child process terminates.
c13182ef 136The integer returned by
fea681da 137.I fn
4ba17a6d 138is the exit status for the child process.
c13182ef 139The child process may also terminate explicitly by calling
fea681da
MK
140.BR exit (2)
141or after receiving a fatal signal.
c6d039a3 142.P
fea681da 143The
81c2368f 144.I stack
c13182ef
MK
145argument specifies the location of the stack used by the child process.
146Since the child and calling process may share memory,
fea681da 147it is not possible for the child process to execute in the
c13182ef
MK
148same stack as the calling process.
149The calling process must therefore
fea681da
MK
150set up memory space for the child stack and pass a pointer to this
151space to
edcc65ff 152.BR clone ().
5fab2e7c 153Stacks grow downward on all processors that run Linux
fea681da 154(except the HP PA processors), so
81c2368f 155.I stack
fea681da
MK
156usually points to the topmost address of the memory space set up for
157the child stack.
faa0e55a
MK
158Note that
159.BR clone ()
160does not provide a means whereby the caller can inform the kernel of the
161size of the stack area.
c6d039a3 162.P
faa0e55a
MK
163The remaining arguments to
164.BR clone ()
165are discussed below.
166.\"
167.SS clone3()
faa0e55a
MK
168The
169.BR clone3 ()
170system call provides a superset of the functionality of the older
171.BR clone ()
172interface.
173It also provides a number of API improvements, including:
174space for additional flags bits;
175cleaner separation in the use of various arguments;
176and the ability to specify the size of the child's stack area.
c6d039a3 177.P
faa0e55a
MK
178As with
179.BR fork (2),
180.BR clone3 ()
181returns in both the parent and the child.
182It returns 0 in the child process and returns the PID of the child
183in the parent.
c6d039a3 184.P
faa0e55a
MK
185The
186.I cl_args
187argument of
188.BR clone3 ()
189is a structure of the following form:
c6d039a3 190.P
faa0e55a
MK
191.in +4n
192.EX
193struct clone_args {
115b4e0e
AC
194 u64 flags; /* Flags bit mask */
195 u64 pidfd; /* Where to store PID file descriptor
196 (\fIint *\fP) */
197 u64 child_tid; /* Where to store child TID,
b957f81f 198 in child\[aq]s memory (\fIpid_t *\fP) */
115b4e0e 199 u64 parent_tid; /* Where to store child TID,
b957f81f 200 in parent\[aq]s memory (\fIpid_t *\fP) */
115b4e0e
AC
201 u64 exit_signal; /* Signal to deliver to parent on
202 child termination */
203 u64 stack; /* Pointer to lowest byte of stack */
204 u64 stack_size; /* Size of stack */
205 u64 tls; /* Location of new TLS */
206 u64 set_tid; /* Pointer to a \fIpid_t\fP array
207 (since Linux 5.5) */
208 u64 set_tid_size; /* Number of elements in \fIset_tid\fP
209 (since Linux 5.5) */
210 u64 cgroup; /* File descriptor for target cgroup
211 of child (since Linux 5.7) */
faa0e55a
MK
212};
213.EE
214.in
c6d039a3 215.P
faa0e55a
MK
216The
217.I size
218argument that is supplied to
219.BR clone3 ()
220should be initialized to the size of this structure.
221(The existence of the
222.I size
223argument permits future extensions to the
1ae6b2c7 224.I clone_args
faa0e55a 225structure.)
c6d039a3 226.P
faa0e55a
MK
227The stack for the child process is specified via
228.IR cl_args.stack ,
229which points to the lowest byte of the stack area,
230and
231.IR cl_args.stack_size ,
232which specifies the size of the stack in bytes.
233In the case where the
1ae6b2c7 234.B CLONE_VM
faa0e55a
MK
235flag (see below) is specified, a stack must be explicitly allocated
236and specified.
237Otherwise, these two fields can be specified as NULL and 0,
238which causes the child to use the same stack area as the parent
239(in the child's own virtual address space).
c6d039a3 240.P
faa0e55a
MK
241The remaining fields in the
242.I cl_args
243argument are discussed below.
244.\"
245.SS Equivalence between clone() and clone3() arguments
faa0e55a
MK
246Unlike the older
247.BR clone ()
248interface, where arguments are passed individually, in the newer
249.BR clone3 ()
250interface the arguments are packaged into the
251.I clone_args
252structure shown above.
253This structure allows for a superset of the information passed via the
254.BR clone ()
255arguments.
c6d039a3 256.P
faa0e55a
MK
257The following table shows the equivalence between the arguments of
258.BR clone ()
259and the fields in the
260.I clone_args
261argument supplied to
262.BR clone3 ():
0b174fe0 263.RS 4
faa0e55a
MK
264.TS
265lb lb lb
266l l l
267li li l.
97883fae 268clone() clone3() Notes
faa0e55a 269 \fIcl_args\fP field
3f029bc9 270flags & \[ti]0xff flags T{
0b174fe0
MK
271For most flags; details below
272T}
faa0e55a
MK
273parent_tid pidfd See CLONE_PIDFD
274child_tid child_tid See CLONE_CHILD_SETTID
275parent_tid parent_tid See CLONE_PARENT_SETTID
276flags & 0xff exit_signal
277stack stack
278\fP---\fP stack_size
279tls tls See CLONE_SETTLS
bf031aaa
AR
280\fP---\fP set_tid See below for details
281\fP---\fP set_tid_size
ed7c1377 282\fP---\fP cgroup See CLONE_INTO_CGROUP
faa0e55a
MK
283.TE
284.RE
5fbce8f2
MK
285.\"
286.SS The child termination signal
faa0e55a
MK
287When the child process terminates, a signal may be sent to the parent.
288The termination signal is specified in the low byte of
fea681da 289.I flags
faa0e55a
MK
290.RB ( clone ())
291or in
292.I cl_args.exit_signal
293.RB ( clone3 ()).
fd8a5be4 294If this signal is specified as anything other than
fea681da
MK
295.BR SIGCHLD ,
296then the parent process must specify the
c13182ef
MK
297.B __WALL
298or
fea681da 299.B __WCLONE
c13182ef
MK
300options when waiting for the child with
301.BR wait (2).
faa0e55a 302If no signal (i.e., zero) is specified, then the parent process is not signaled
fea681da 303when the child terminates.
5fbce8f2 304.\"
bf031aaa 305.SS The set_tid array
bf031aaa
AR
306By default, the kernel chooses the next sequential PID for the new
307process in each of the PID namespaces where it is present.
308When creating a process with
309.BR clone3 (),
310the
311.I set_tid
b386cee3
MK
312array (available since Linux 5.5)
313can be used to select specific PIDs for the process in some
bf031aaa 314or all of the PID namespaces where it is present.
ee8bb310 315If the PID of the newly created process should be set only for the current
bf031aaa
AR
316PID namespace or in the newly created PID namespace (if
317.I flags
318contains
319.BR CLONE_NEWPID )
320then the first element in the
321.I set_tid
322array has to be the desired PID and
323.I set_tid_size
324needs to be 1.
c6d039a3 325.P
bf031aaa 326If the PID of the newly created process should have a certain value in
ee8bb310 327multiple PID namespaces, then the
bf031aaa 328.I set_tid
09007c4b
MK
329array can have multiple entries.
330The first entry defines the PID in the most
ee8bb310
MK
331deeply nested PID namespace and each of the following entries contains
332the PID in the
333corresponding ancestor PID namespace.
09007c4b 334The number of PID namespaces in which a PID
bf031aaa
AR
335should be set is defined by
336.I set_tid_size
337which cannot be larger than the number of currently nested PID namespaces.
c6d039a3 338.P
bf031aaa 339To create a process with the following PIDs in a PID namespace hierarchy:
0b174fe0 340.RS 4
bf031aaa 341.TS
ee8bb310
MK
342lb lb lb
343l l l.
344PID NS level Requested PID Notes
3450 31496 Outermost PID namespace
bf031aaa 3461 42
ee8bb310 3472 7 Innermost PID namespace
bf031aaa
AR
348.TE
349.RE
c6d039a3 350.P
bf031aaa 351Set the array to:
c6d039a3 352.P
ee8bb310 353.in +4n
bf031aaa 354.EX
ee8bb310
MK
355set_tid[0] = 7;
356set_tid[1] = 42;
357set_tid[2] = 31496;
358set_tid_size = 3;
bf031aaa 359.EE
ee8bb310 360.in
c6d039a3 361.P
bf031aaa
AR
362If only the PIDs in the two innermost PID namespaces
363need to be specified, set the array to:
c6d039a3 364.P
ee8bb310 365.in +4n
bf031aaa 366.EX
ee8bb310
MK
367set_tid[0] = 7;
368set_tid[1] = 42;
369set_tid_size = 2;
bf031aaa 370.EE
ee8bb310 371.in
c6d039a3 372.P
bf031aaa 373The PID in the PID namespaces outside the two innermost PID namespaces
95887a00 374is selected the same way as any other PID is selected.
c6d039a3 375.P
bf031aaa
AR
376The
377.I set_tid
378feature requires
1ae6b2c7 379.B CAP_SYS_ADMIN
1e4d6750
MK
380or
381(since Linux 5.9)
382.\" commit 124ea650d3072b005457faed69909221c2905a1f
383.\" commit 1caef81da05a84a40dbf02110e967ce6d1135ff6
1ae6b2c7 384.B CAP_CHECKPOINT_RESTORE
bf031aaa 385in all owning user namespaces of the target PID namespaces.
c6d039a3 386.P
ee8bb310
MK
387Callers may only choose a PID greater than 1 in a given PID namespace
388if an
1ae6b2c7 389.B init
ee8bb310 390process (i.e., a process with PID 1) already exists in that namespace.
09007c4b 391Otherwise the PID
bf031aaa
AR
392entry for this PID namespace must be 1.
393.\"
16853a31 394.SS The flags mask
faa0e55a
MK
395Both
396.BR clone ()
397and
398.BR clone3 ()
399allow a flags bit mask that modifies their behavior
400and allows the caller to specify what is shared between the calling process
401and the child process.
36546c38 402This bit mask\[em]the
5261b0fe
MK
403.I flags
404argument of
405.BR clone ()
406or the
407.I cl_args.flags
408field passed to
36546c38 409.BR clone3 ()\[em]is
16853a31
MK
410referred to as the
411.I flags
412mask in the remainder of this page.
c6d039a3 413.P
16853a31
MK
414The
415.I flags
32b517c9 416mask is specified as a bitwise OR of zero or more of
16853a31 417the constants listed below.
5261b0fe 418Except as noted below, these flags are available
faa0e55a
MK
419(and have the same effect) in both
420.BR clone ()
421and
422.BR clone3 ().
fea681da 423.TP
f5dbc7c8 424.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
e2bf1234 425Clear (zero) the child thread ID at the location pointed to by
81c2368f 426.I child_tid
faa0e55a
MK
427.RB ( clone ())
428or
429.I cl_args.child_tid
430.RB ( clone3 ())
f5dbc7c8
MK
431in child memory when the child exits, and do a wakeup on the futex
432at that address.
433The address involved may be changed by the
434.BR set_tid_address (2)
435system call.
436This is used by threading libraries.
437.TP
438.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
e2bf1234 439Store the child thread ID at the location pointed to by
81c2368f 440.I child_tid
faa0e55a
MK
441.RB ( clone ())
442or
443.I cl_args.child_tid
444.RB ( clone3 ())
8ef021ea 445in the child's memory.
5261b0fe 446The store operation completes before the clone call
6ab62ed8 447returns control to user space in the child process.
5261b0fe 448(Note that the store operation may not have completed before the clone call
95887a00 449returns in the parent process, which is relevant if the
1ae6b2c7 450.B CLONE_VM
6ab62ed8 451flag is also employed.)
f5dbc7c8 452.TP
27f14b44
MK
453.BR CLONE_CLEAR_SIGHAND " (since Linux 5.5)"
454.\" commit b612e5df4587c934bd056bf05f4a1deca4de4f75
455By default, signal dispositions in the child thread are the same as
456in the parent.
457If this flag is specified,
458then all signals that are handled in the parent
459are reset to their default dispositions
460.RB ( SIG_DFL )
461in the child.
462.IP
463Specifying this flag together with
464.B CLONE_SIGHAND
465is nonsensical and disallowed.
466.TP
baa435c6
MK
467.BR CLONE_DETACHED " (historical)"
468For a while (during the Linux 2.5 development series)
b324e17d 469.\" added in Linux 2.5.32; removed in Linux 2.6.0-test4
baa435c6
MK
470there was a
471.B CLONE_DETACHED
472flag,
473which caused the parent not to receive a signal when the child terminated.
474Ultimately, the effect of this flag was subsumed under the
1ae6b2c7 475.B CLONE_THREAD
baa435c6
MK
476flag and by the time Linux 2.6.0 was released, this flag had no effect.
477Starting in Linux 2.6.2, the need to give this flag together with
478.B CLONE_THREAD
479disappeared.
480.IP
481This flag is still defined, but it is usually ignored when calling
482.BR clone ().
483However, see the description of
1ae6b2c7 484.B CLONE_PIDFD
baa435c6
MK
485for some exceptions.
486.TP
1603d6a1 487.BR CLONE_FILES " (since Linux 2.0)"
fea681da 488If
f5dbc7c8
MK
489.B CLONE_FILES
490is set, the calling process and the child process share the same file
491descriptor table.
492Any file descriptor created by the calling process or by the child
493process is also valid in the other process.
494Similarly, if one of the processes closes a file descriptor,
495or changes its associated flags (using the
496.BR fcntl (2)
497.B F_SETFD
498operation), the other process is also affected.
8a76b19e
KE
499If a process sharing a file descriptor table calls
500.BR execve (2),
501its file descriptor table is duplicated (unshared).
efeece04 502.IP
fea681da 503If
f5dbc7c8
MK
504.B CLONE_FILES
505is not set, the child process inherits a copy of all file descriptors
5261b0fe 506opened in the calling process at the time of the clone call.
f5dbc7c8
MK
507Subsequent operations that open or close file descriptors,
508or change file descriptor flags,
509performed by either the calling
510process or the child process do not affect the other process.
db8ba2b4 511Note, however,
839d161f
MK
512that the duplicated file descriptors in the child refer to the same
513open file descriptions as the corresponding file descriptors
514in the calling process,
2433365b 515and thus share file offsets and file status flags (see
db8ba2b4 516.BR open (2)).
fea681da 517.TP
1603d6a1 518.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
519If
520.B CLONE_FS
9ee4a2b6 521is set, the caller and the child process share the same filesystem
c13182ef 522information.
9ee4a2b6 523This includes the root of the filesystem, the current
c13182ef
MK
524working directory, and the umask.
525Any call to
fea681da
MK
526.BR chroot (2),
527.BR chdir (2),
528or
529.BR umask (2)
edcc65ff 530performed by the calling process or the child process also affects the
fea681da 531other process.
efeece04 532.IP
c13182ef 533If
fea681da 534.B CLONE_FS
9ee4a2b6 535is not set, the child process works on a copy of the filesystem
5261b0fe 536information of the calling process at the time of the clone call.
fea681da
MK
537Calls to
538.BR chroot (2),
539.BR chdir (2),
4ba17a6d 540or
fea681da
MK
541.BR umask (2)
542performed later by one of the processes do not affect the other process.
fea681da 543.TP
edc1b9fc
MK
544.BR CLONE_INTO_CGROUP " (since Linux 5.7)"
545.\" commit ef2c41cf38a7559bbf91af42d5b6a4429db8fc68
546By default, a child process is placed in the same version 2
547cgroup as its parent.
548The
549.B CLONE_INTO_CGROUP
17d86030 550flag allows the child process to be created in a different version 2 cgroup.
edc1b9fc 551(Note that
1ae6b2c7 552.B CLONE_INTO_CGROUP
edc1b9fc
MK
553has effect only for version 2 cgroups.)
554.IP
555In order to place the child process in a different cgroup,
556the caller specifies
1ae6b2c7 557.B CLONE_INTO_CGROUP
edc1b9fc
MK
558in
559.I cl_args.flags
560and passes a file descriptor that refers to a version 2 cgroup in the
561.I cl_args.cgroup
562field.
17d86030 563(This file descriptor can be obtained by opening a cgroup v2 directory
edc1b9fc
MK
564using either the
565.B O_RDONLY
566or the
567.B O_PATH
568flag.)
569Note that all of the usual restrictions (described in
570.BR cgroups (7))
571on placing a process into a version 2 cgroup apply.
572.IP
b3041511 573Among the possible use cases for
1ae6b2c7 574.B CLONE_INTO_CGROUP
b3041511
MK
575are the following:
576.RS
cdede5cd 577.IP \[bu] 3
edc1b9fc
MK
578Spawning a process into a cgroup different from the parent's cgroup
579makes it possible for a service manager to directly spawn new
580services into dedicated cgroups.
581This eliminates the accounting
582jitter that would be caused if the child process was first created in the
583same cgroup as the parent and then
584moved into the target cgroup.
4fe3acd9
MK
585Furthermore, spawning the child process directly into a target cgroup
586is significantly cheaper than moving the child process into
587the target cgroup after it has been created.
cdede5cd 588.IP \[bu]
edc1b9fc 589The
1ae6b2c7 590.B CLONE_INTO_CGROUP
edc1b9fc
MK
591flag also allows the creation of
592frozen child processes by spawning them into a frozen cgroup.
593(See
594.BR cgroups (7)
595for a description of the freezer controller.)
cdede5cd 596.IP \[bu]
edc1b9fc
MK
597For threaded applications (or even thread implementations which
598make use of cgroups to limit individual threads), it is possible to
599establish a fixed cgroup layout before spawning each thread
600directly into its target cgroup.
b3041511 601.RE
edc1b9fc 602.TP
a4cc375e 603.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
604If
605.B CLONE_IO
606is set, then the new process shares an I/O context with
607the calling process.
608If this flag is not set, then (as with
609.BR fork (2))
610the new process has its own I/O context.
efeece04 611.IP
11f27a1c 612.\" The following based on text from Jens Axboe
d1f84ed7 613The I/O context is the I/O scope of the disk scheduler (i.e.,
11f27a1c
JA
614what the I/O scheduler uses to model scheduling of a process's I/O).
615If processes share the same I/O context,
616they are treated as one by the I/O scheduler.
617As a consequence, they get to share disk time.
618For some I/O schedulers,
619.\" the anticipatory and CFQ scheduler
620if two processes share an I/O context,
621they will be allowed to interleave their disk access.
622If several threads are doing I/O on behalf of the same process
623.RB ( aio_read (3),
624for instance), they should employ
1ae6b2c7 625.B CLONE_IO
11f27a1c
JA
626to get better I/O performance.
627.\" with CFQ and AS.
efeece04 628.IP
11f27a1c
JA
629If the kernel is not configured with the
630.B CONFIG_BLOCK
631option, this flag is a no-op.
632.TP
c5af0674
MK
633.BR CLONE_NEWCGROUP " (since Linux 4.6)"
634Create the process in a new cgroup namespace.
635If this flag is not set, then (as with
636.BR fork (2))
637the process is created in the same cgroup namespaces as the calling process.
efeece04 638.IP
c5af0674 639For further information on cgroup namespaces, see
b9fe4bc3 640.BR cgroup_namespaces (7).
efeece04 641.IP
c5af0674
MK
642Only a privileged process
643.RB ( CAP_SYS_ADMIN )
644can employ
645.BR CLONE_NEWCGROUP .
646.\"
647.TP
8722311b 648.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
649If
650.B CLONE_NEWIPC
651is set, then create the process in a new IPC namespace.
652If this flag is not set, then (as with
06b30458 653.BR fork (2)),
667417b3
MK
654the process is created in the same IPC namespace as
655the calling process.
efeece04 656.IP
981eda4a
MK
657For further information on IPC namespaces, see
658.BR ipc_namespaces (7).
659.IP
ab5dd83f
MK
660Only a privileged process
661.RB ( CAP_SYS_ADMIN )
662can employ
663.BR CLONE_NEWIPC .
667417b3
MK
664This flag can't be specified in conjunction with
665.BR CLONE_SYSVSEM .
666.TP
163bf178 667.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 668(The implementation of this flag was completed only
b324e17d 669by about Linux 2.6.29.)
efeece04 670.IP
163bf178
MK
671If
672.B CLONE_NEWNET
673is set, then create the process in a new network namespace.
674If this flag is not set, then (as with
57ef8c39 675.BR fork (2))
163bf178
MK
676the process is created in the same network namespace as
677the calling process.
efeece04 678.IP
73680728 679For further information on network namespaces, see
40002795 680.BR network_namespaces (7).
efeece04 681.IP
ab5dd83f
MK
682Only a privileged process
683.RB ( CAP_SYS_ADMIN )
684can employ
685.BR CLONE_NEWNET .
163bf178 686.TP
c10859eb 687.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
688If
689.B CLONE_NEWNS
690is set, the cloned child is started in a new mount namespace,
691initialized with a copy of the namespace of the parent.
692If
fea681da 693.B CLONE_NEWNS
3dd2331c 694is not set, the child lives in the same mount
4df2eb09 695namespace as the parent.
efeece04 696.IP
981eda4a
MK
697For further information on mount namespaces, see
698.BR namespaces (7)
699and
700.BR mount_namespaces (7).
701.IP
ab5dd83f
MK
702Only a privileged process
703.RB ( CAP_SYS_ADMIN )
704can employ
705.BR CLONE_NEWNS .
fea681da
MK
706It is not permitted to specify both
707.B CLONE_NEWNS
708and
709.B CLONE_FS
9219d208 710.\" See https://lwn.net/Articles/543273/
5261b0fe 711in the same clone call.
9d005472
MK
712.TP
713.BR CLONE_NEWPID " (since Linux 2.6.24)"
714.\" This explanation draws a lot of details from
715.\" http://lwn.net/Articles/259217/
716.\" Authors: Pavel Emelyanov <xemul@openvz.org>
717.\" and Kir Kolyshkin <kir@openvz.org>
718.\"
719.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
720.\" Author: Pavel Emelyanov <xemul@openvz.org>
721If
722.B CLONE_NEWPID
723is set, then create the process in a new PID namespace.
724If this flag is not set, then (as with
725.BR fork (2))
726the process is created in the same PID namespace as
727the calling process.
efeece04 728.IP
9d005472 729For further information on PID namespaces, see
7e0e902b
MK
730.BR namespaces (7)
731and
39b3f005 732.BR pid_namespaces (7).
efeece04 733.IP
ab5dd83f
MK
734Only a privileged process
735.RB ( CAP_SYS_ADMIN )
736can employ
737.BR CLONE_NEWPID .
9d005472 738This flag can't be specified in conjunction with
6d95f767 739.BR CLONE_THREAD .
70d21f17 740.TP
1ae6b2c7 741.B CLONE_NEWUSER
06b30458
MK
742(This flag first became meaningful for
743.BR clone ()
4d2b3ed7
MK
744in Linux 2.6.23,
745the current
11a38815 746.BR clone ()
4d2b3ed7
MK
747semantics were merged in Linux 3.5,
748and the final pieces to make the user namespaces completely usable were
749merged in Linux 3.8.)
efeece04 750.IP
70d21f17
EB
751If
752.B CLONE_NEWUSER
06b30458
MK
753is set, then create the process in a new user namespace.
754If this flag is not set, then (as with
57ef8c39 755.BR fork (2))
70d21f17 756the process is created in the same user namespace as the calling process.
efeece04 757.IP
981eda4a
MK
758For further information on user namespaces, see
759.BR namespaces (7)
760and
761.BR user_namespaces (7).
762.IP
fefbcba8 763Before Linux 3.8, use of
1ae6b2c7 764.B CLONE_NEWUSER
fefbcba8
MK
765required that the caller have three capabilities:
766.BR CAP_SYS_ADMIN ,
767.BR CAP_SETUID ,
768and
769.BR CAP_SETGID .
770.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 771Starting with Linux 3.8,
9d005472 772no privileges are needed to create a user namespace.
efeece04 773.IP
5e72cf7d 774This flag can't be specified in conjunction with
1ae6b2c7 775.B CLONE_THREAD
5e72cf7d
MK
776or
777.BR CLONE_PARENT .
778For security reasons,
779.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
780.\" https://lwn.net/Articles/543273/
b324e17d
AC
781.\" The fix actually went into Linux 3.9 and into Linux 3.8.3. However, user namespaces
782.\" were, for practical purposes, unusable in earlier Linux 3.8.x because of the
ab3311aa 783.\" various filesystems that didn't support userns.
1ae6b2c7 784.B CLONE_NEWUSER
f0007192 785cannot be specified in conjunction with
5e72cf7d 786.BR CLONE_FS .
82ee147a 787.TP
43ce9dda
MK
788.BR CLONE_NEWUTS " (since Linux 2.6.19)"
789If
790.B CLONE_NEWUTS
e1b11906
MK
791is set, then create the process in a new UTS namespace,
792whose identifiers are initialized by duplicating the identifiers
793from the UTS namespace of the calling process.
43ce9dda 794If this flag is not set, then (as with
57ef8c39 795.BR fork (2))
43ce9dda
MK
796the process is created in the same UTS namespace as
797the calling process.
efeece04 798.IP
981eda4a
MK
799For further information on UTS namespaces, see
800.BR uts_namespaces (7).
801.IP
ab5dd83f
MK
802Only a privileged process
803.RB ( CAP_SYS_ADMIN )
804can employ
805.BR CLONE_NEWUTS .
43ce9dda 806.TP
f5dbc7c8
MK
807.BR CLONE_PARENT " (since Linux 2.3.12)"
808If
809.B CLONE_PARENT
810is set, then the parent of the new child (as returned by
811.BR getppid (2))
812will be the same as that of the calling process.
efeece04 813.IP
f5dbc7c8
MK
814If
815.B CLONE_PARENT
816is not set, then (as with
817.BR fork (2))
818the child's parent is the calling process.
efeece04 819.IP
f5dbc7c8
MK
820Note that it is the parent process, as returned by
821.BR getppid (2),
822which is signaled when the child terminates, so that
823if
824.B CLONE_PARENT
825is set, then the parent of the calling process, rather than the
95887a00 826calling process itself, is signaled.
a17b9d28 827.IP
4269a6ab 828The
a17b9d28 829.B CLONE_PARENT
4269a6ab
MK
830flag can't be used in clone calls by the
831global init process (PID 1 in the initial PID namespace)
832and init processes in other PID namespaces.
833This restriction prevents the creation of multi-rooted process trees
834as well as the creation of unreapable zombies in the initial PID namespace.
f5dbc7c8
MK
835.TP
836.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
e2bf1234 837Store the child thread ID at the location pointed to by
81c2368f 838.I parent_tid
faa0e55a
MK
839.RB ( clone ())
840or
d5d482ec 841.I cl_args.parent_tid
faa0e55a 842.RB ( clone3 ())
8ef021ea 843in the parent's memory.
f5dbc7c8
MK
844(In Linux 2.5.32-2.5.48 there was a flag
845.B CLONE_SETTID
846that did this.)
5261b0fe 847The store operation completes before the clone call
b5da2f91 848returns control to user space.
f5dbc7c8 849.TP
b324e17d 850.BR CLONE_PID " (Linux 2.0 to Linux 2.5.15)"
f5dbc7c8
MK
851If
852.B CLONE_PID
853is set, the child process is created with the same process ID as
854the calling process.
855This is good for hacking the system, but otherwise
856of not much use.
1c173eb3 857From Linux 2.3.21 onward, this flag could be
f5dbc7c8 858specified only by the system boot process (PID 0).
1c173eb3 859The flag disappeared completely from the kernel sources in Linux 2.5.16.
f5d5180f 860Subsequently, the kernel silently ignored this bit if it was specified in the
1ae6b2c7 861.I flags
16853a31 862mask.
f5d5180f
MK
863Much later, the same bit was recycled for use as the
864.B CLONE_PIDFD
865flag.
f5dbc7c8 866.TP
9f938981 867.BR CLONE_PIDFD " (since Linux 5.2)"
4e98b074 868.\" commit b3e5838252665ee4cfa76b82bdf1198dca81e5be
faa0e55a
MK
869If this flag is specified,
870a PID file descriptor referring to the child process is allocated
871and placed at a specified location in the parent's memory.
b4ebffb2 872The close-on-exec flag is set on this new file descriptor.
34a975f8
MK
873PID file descriptors can be used for the purposes described in
874.BR pidfd_open (2).
faa0e55a 875.RS
cdede5cd 876.IP \[bu] 3
faa0e55a
MK
877When using
878.BR clone3 (),
879the PID file descriptor is placed at the location pointed to by
880.IR cl_args.pidfd .
cdede5cd 881.IP \[bu]
faa0e55a
MK
882When using
883.BR clone (),
884the PID file descriptor is placed at the location pointed to by
885.IR parent_tid .
9f938981 886Since the
81c2368f 887.I parent_tid
b97cc7ae 888argument is used to return the PID file descriptor,
9f938981
CB
889.B CLONE_PIDFD
890cannot be used with
faa0e55a
MK
891.B CLONE_PARENT_SETTID
892when calling
893.BR clone ().
894.RE
9f938981
CB
895.IP
896It is currently not possible to use this flag together with
897.B CLONE_THREAD.
b97cc7ae 898This means that the process identified by the PID file descriptor
f6183e5b 899will always be a thread group leader.
9f938981 900.IP
baa435c6 901If the obsolete
9f938981 902.B CLONE_DETACHED
baa435c6 903flag is specified alongside
1ae6b2c7 904.B CLONE_PIDFD
baa435c6
MK
905when calling
906.BR clone (),
4e98b074 907an error is returned.
baa435c6
MK
908An error also results if
909.B CLONE_DETACHED
910is specified when calling
911.BR clone3 ().
912This error behavior ensures that the bit corresponding to
1ae6b2c7 913.B CLONE_DETACHED
baa435c6 914can be reused for further PID file descriptor features in the future.
9f938981 915.TP
1603d6a1 916.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
917If
918.B CLONE_PTRACE
919is specified, and the calling process is being traced,
920then trace the child also (see
921.BR ptrace (2)).
922.TP
923.BR CLONE_SETTLS " (since Linux 2.5.32)"
dd6d3d2e 924The TLS (Thread Local Storage) descriptor is set to
81c2368f 925.IR tls .
efeece04 926.IP
dd6d3d2e 927The interpretation of
81c2368f 928.I tls
dd6d3d2e
KF
929and the resulting effect is architecture dependent.
930On x86,
81c2368f 931.I tls
dd6d3d2e 932is interpreted as a
1ae6b2c7 933.I struct user_desc\~*
35bf8cb4 934(see
dd6d3d2e 935.BR set_thread_area (2)).
9ea5bc66 936On x86-64 it is the new value to be set for the %fs base register
35bf8cb4 937(see the
2551f801 938.B ARCH_SET_FS
dd6d3d2e
KF
939argument to
940.BR arch_prctl (2)).
941On architectures with a dedicated TLS register, it is the new value
942of that register.
f5d5180f
MK
943.IP
944Use of this flag requires detailed knowledge and generally it
945should not be used except in libraries implementing threading.
f5dbc7c8 946.TP
1603d6a1 947.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
948If
949.B CLONE_SIGHAND
314c8ff4 950is set, the calling process and the child process share the same table of
c13182ef
MK
951signal handlers.
952If the calling process or child process calls
fea681da 953.BR sigaction (2)
c13182ef
MK
954to change the behavior associated with a signal, the behavior is
955changed in the other process as well.
956However, the calling process and child
fea681da 957processes still have distinct signal masks and sets of pending
c13182ef 958signals.
4ba17a6d 959So, one of them may block or unblock signals using
fea681da
MK
960.BR sigprocmask (2)
961without affecting the other process.
efeece04 962.IP
fea681da
MK
963If
964.B CLONE_SIGHAND
965is not set, the child process inherits a copy of the signal handlers
5261b0fe 966of the calling process at the time of the clone call.
c13182ef 967Calls to
fea681da
MK
968.BR sigaction (2)
969performed later by one of the processes have no effect on the other
970process.
efeece04 971.IP
d6bec36e
MK
972Since Linux 2.6.0,
973.\" Precisely: Linux 2.6.0-test6
16853a31 974the
29546c24 975.I flags
16853a31 976mask must also include
29546c24
MK
977.B CLONE_VM
978if
979.B CLONE_SIGHAND
fe10d82f 980is specified.
fea681da 981.TP
d6bec36e
MK
982.BR CLONE_STOPPED " (since Linux 2.6.0)"
983.\" Precisely: Linux 2.6.0-test2
a69b6bda
MK
984If
985.B CLONE_STOPPED
986is set, then the child is initially stopped (as though it was sent a
987.B SIGSTOP
988signal), and must be resumed by sending it a
989.B SIGCONT
990signal.
efeece04 991.IP
a60450a9
MK
992This flag was
993.I deprecated
994from Linux 2.6.25 onward,
995and was
996.I removed
28b44abc
MK
997altogether in Linux 2.6.38.
998Since then, the kernel silently ignores it without error.
a5a061ee 999.\" glibc 2.8 removed this defn from bits/sched.h
c5af0674 1000Starting with Linux 4.6, the same bit was reused for the
1ae6b2c7 1001.B CLONE_NEWCGROUP
c5af0674 1002flag.
a69b6bda 1003.TP
f5dbc7c8 1004.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 1005If
f5dbc7c8
MK
1006.B CLONE_SYSVSEM
1007is set, then the child and the calling process share
5ada4b94
MK
1008a single list of System V semaphore adjustment
1009.RI ( semadj )
1010values (see
f5dbc7c8 1011.BR semop (2)).
5ada4b94
MK
1012In this case, the shared list accumulates
1013.I semadj
1014values across all processes sharing the list,
1015and semaphore adjustments are performed only when the last process
1016that is sharing the list terminates (or ceases sharing the list using
1017.BR unshare (2)).
f5d401dd 1018If this flag is not set, then the child has a separate
5ada4b94
MK
1019.I semadj
1020list that is initially empty.
fea681da 1021.TP
d6bec36e
MK
1022.BR CLONE_THREAD " (since Linux 2.4.0)"
1023.\" Precisely: Linux 2.6.0-test8
fea681da
MK
1024If
1025.B CLONE_THREAD
1026is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
1027To make the remainder of the discussion of
1028.B CLONE_THREAD
1029more readable, the term "thread" is used to refer to the
1030processes within a thread group.
efeece04 1031.IP
fd8a5be4
MK
1032Thread groups were a feature added in Linux 2.4 to support the
1033POSIX threads notion of a set of threads that share a single PID.
1034Internally, this shared PID is the so-called
1035thread group identifier (TGID) for the thread group.
c13182ef 1036Since Linux 2.4, calls to
fea681da 1037.BR getpid (2)
fd8a5be4 1038return the TGID of the caller.
efeece04 1039.IP
fd8a5be4
MK
1040The threads within a group can be distinguished by their (system-wide)
1041unique thread IDs (TID).
1042A new thread's TID is available as the function result
5261b0fe 1043returned to the caller,
fd8a5be4
MK
1044and a thread can obtain
1045its own TID using
1046.BR gettid (2).
efeece04 1047.IP
5261b0fe 1048When a clone call is made without specifying
fd8a5be4
MK
1049.BR CLONE_THREAD ,
1050then the resulting thread is placed in a new thread group
1051whose TGID is the same as the thread's TID.
1052This thread is the
1053.I leader
1054of the new thread group.
efeece04 1055.IP
fd8a5be4
MK
1056A new thread created with
1057.B CLONE_THREAD
5261b0fe 1058has the same parent process as the process that made the clone call
c13182ef 1059(i.e., like
fd8a5be4
MK
1060.BR CLONE_PARENT ),
1061so that calls to
1062.BR getppid (2)
1063return the same value for all of the threads in a thread group.
1064When a
c13182ef 1065.B CLONE_THREAD
5261b0fe 1066thread terminates, the thread that created it is not sent a
fd8a5be4
MK
1067.B SIGCHLD
1068(or other termination) signal;
1069nor can the status of such a thread be obtained
1070using
1071.BR wait (2).
1072(The thread is said to be
1073.IR detached .)
efeece04 1074.IP
e2fbf61d
MK
1075After all of the threads in a thread group terminate
1076the parent process of the thread group is sent a
fd8a5be4
MK
1077.B SIGCHLD
1078(or other termination) signal.
efeece04 1079.IP
fd8a5be4
MK
1080If any of the threads in a thread group performs an
1081.BR execve (2),
1082then all threads other than the thread group leader are terminated,
1083and the new program is executed in the thread group leader.
efeece04 1084.IP
f7110f60
MK
1085If one of the threads in a thread group creates a child using
1086.BR fork (2),
1087then any thread in the group can
1088.BR wait (2)
1089for that child.
efeece04 1090.IP
16853a31 1091Since Linux 2.5.35, the
fd8a5be4 1092.I flags
16853a31 1093mask must also include
fd8a5be4
MK
1094.B CLONE_SIGHAND
1095if
1096.B CLONE_THREAD
6fd69f33 1097is specified
d6bec36e
MK
1098(and note that, since Linux 2.6.0,
1099.\" Precisely: Linux 2.6.0-test6
1ae6b2c7 1100.B CLONE_SIGHAND
6fd69f33 1101also requires
1ae6b2c7 1102.B CLONE_VM
6fd69f33 1103to be included).
efeece04 1104.IP
e2fbf61d
MK
1105Signal dispositions and actions are process-wide:
1106if an unhandled signal is delivered to a thread, then
1107it will affect (terminate, stop, continue, be ignored in)
1108all members of the thread group.
efeece04 1109.IP
99408a60 1110Each thread has its own signal mask, as set by
f957eebd
MK
1111.BR sigprocmask (2).
1112.IP
1113A signal may be process-directed or thread-directed.
1114A process-directed signal is targeted at a thread group (i.e., a TGID),
1115and is delivered to an arbitrarily selected thread from among those
1116that are not blocking the signal.
ed4f87f0 1117A signal may be process-directed because it was generated by the kernel
f957eebd
MK
1118for reasons other than a hardware exception, or because it was sent using
1119.BR kill (2)
1120or
1121.BR sigqueue (3).
1122A thread-directed signal is targeted at (i.e., delivered to)
1123a specific thread.
1124A signal may be thread directed because it was sent using
1125.BR tgkill (2)
1126or
1127.BR pthread_sigqueue (3),
1128or because the thread executed a machine language instruction that triggered
1129a hardware exception
1130(e.g., invalid memory access triggering
1ae6b2c7 1131.B SIGSEGV
f957eebd
MK
1132or a floating-point exception triggering
1133.BR SIGFPE ).
1134.IP
99408a60
MK
1135A call to
1136.BR sigpending (2)
f957eebd
MK
1137returns a signal set that is the union of the pending process-directed
1138signals and the signals that are pending for the calling thread.
efeece04 1139.IP
475c2753 1140If a process-directed signal is delivered to a thread group,
e2fbf61d 1141and the thread group has installed a handler for the signal, then
95887a00 1142the handler is invoked in exactly one, arbitrarily selected
e2fbf61d 1143member of the thread group that has not blocked the signal.
c13182ef 1144If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
1145.BR sigwaitinfo (2),
1146the kernel will arbitrarily select one of these threads
475c2753 1147to receive the signal.
a69b6bda 1148.TP
f5dbc7c8 1149.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 1150If
f5dbc7c8
MK
1151.B CLONE_UNTRACED
1152is specified, then a tracing process cannot force
1153.B CLONE_PTRACE
1154on this child process.
fea681da 1155.TP
1603d6a1 1156.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
1157If
1158.B CLONE_VFORK
1159is set, the execution of the calling process is suspended
1160until the child releases its virtual memory
1161resources via a call to
1162.BR execve (2)
1163or
1164.BR _exit (2)
1165(as with
1166.BR vfork (2)).
efeece04 1167.IP
f5dbc7c8
MK
1168If
1169.B CLONE_VFORK
4b4a853a 1170is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
1171after the call, and an application should not rely on execution occurring
1172in any particular order.
fea681da 1173.TP
1603d6a1 1174.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
1175If
1176.B CLONE_VM
1177is set, the calling process and the child process run in the same memory
1178space.
1179In particular, memory writes performed by the calling process
1180or by the child process are also visible in the other process.
1181Moreover, any memory mapping or unmapping performed with
1182.BR mmap (2)
1183or
1184.BR munmap (2)
1185by the child or calling process also affects the other process.
efeece04 1186.IP
f5dbc7c8
MK
1187If
1188.B CLONE_VM
1189is not set, the child process runs in a separate copy of the memory
5261b0fe 1190space of the calling process at the time of the clone call.
f5dbc7c8
MK
1191Memory writes or file mappings/unmappings performed by one of the
1192processes do not affect the other, as with
1193.BR fork (2).
52e5819c
MK
1194.IP
1195If the
1ae6b2c7 1196.B CLONE_VM
52e5819c 1197flag is specified and the
1ae6b2c7 1198.B CLONE_VFORK
52e5819c
MK
1199flag is not specified,
1200then any alternate signal stack that was established by
1201.BR sigaltstack (2)
1202is cleared in the child process.
47297adb 1203.SH RETURN VALUE
0bfa087b
MK
1204.\" gettid(2) returns current->pid;
1205.\" getpid(2) returns current->tgid;
fea681da 1206On success, the thread ID of the child process is returned
c13182ef 1207in the caller's thread of execution.
84811e86 1208On failure, \-1 is returned
95887a00 1209in the caller's context, no child process is created, and
fea681da 1210.I errno
f6a4078b 1211is set to indicate the error.
fea681da
MK
1212.SH ERRORS
1213.TP
3396ec7b
AC
1214.BR EACCES " (" clone3 "() only)"
1215.B CLONE_INTO_CGROUP
1216was specified in
1217.IR cl_args.flags ,
1218but the restrictions (described in
1219.BR cgroups (7))
1220on placing the child process into the version 2 cgroup referred to by
1221.I cl_args.cgroup
1222are not met.
1223.TP
fea681da 1224.B EAGAIN
e1b6e186
MK
1225Too many processes are already running; see
1226.BR fork (2).
fea681da 1227.TP
6ba79da9
MK
1228.BR EBUSY " (" clone3 "() only)"
1229.B CLONE_INTO_CGROUP
1230was specified in
1231.IR cl_args.flags ,
1232but the file descriptor specified in
1ae6b2c7 1233.I cl_args.cgroup
6ba79da9
MK
1234refers to a version 2 cgroup in which a domain controller is enabled.
1235.TP
bf031aaa 1236.BR EEXIST " (" clone3 "() only)"
ee8bb310 1237One (or more) of the PIDs specified in
bf031aaa
AR
1238.I set_tid
1239already exists in the corresponding PID namespace.
1240.TP
fea681da 1241.B EINVAL
27f14b44
MK
1242Both
1243.B CLONE_SIGHAND
1244and
1245.B CLONE_CLEAR_SIGHAND
1246were specified in the
1247.I flags
1248mask.
1249.TP
1250.B EINVAL
fea681da 1251.B CLONE_SIGHAND
16853a31
MK
1252was specified in the
1253.I flags
1254mask, but
fea681da 1255.B CLONE_VM
2e8a7fb3 1256was not.
d6bec36e
MK
1257(Since Linux 2.6.0.)
1258.\" Precisely: Linux 2.6.0-test6
fea681da
MK
1259.TP
1260.B EINVAL
1261.B CLONE_THREAD
16853a31
MK
1262was specified in the
1263.I flags
1264mask, but
fea681da 1265.B CLONE_SIGHAND
6387216b
MK
1266was not.
1267(Since Linux 2.5.35.)
29546c24
MK
1268.\" .TP
1269.\" .B EINVAL
1270.\" Precisely one of
1271.\" .B CLONE_DETACHED
1272.\" and
1273.\" .B CLONE_THREAD
6387216b
MK
1274.\" was specified.
1275.\" (Since Linux 2.6.0-test6.)
fea681da
MK
1276.TP
1277.B EINVAL
d6868c69 1278.B CLONE_THREAD
16853a31
MK
1279was specified in the
1280.I flags
1281mask, but the current process previously called
d6868c69
JH
1282.BR unshare (2)
1283with the
1284.B CLONE_NEWPID
1285flag or used
1286.BR setns (2)
1287to reassociate itself with a PID namespace.
1288.TP
1289.B EINVAL
d34e5645 1290.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
1291Both
1292.B CLONE_FS
1293and
1294.B CLONE_NEWNS
16853a31 1295were specified in the
1ae6b2c7 1296.I flags
16853a31 1297mask.
fea681da 1298.TP
d34e5645
MK
1299.BR EINVAL " (since Linux 3.9)"
1300Both
1301.B CLONE_NEWUSER
1302and
1303.B CLONE_FS
16853a31 1304were specified in the
1ae6b2c7 1305.I flags
16853a31 1306mask.
d34e5645 1307.TP
fea681da 1308.B EINVAL
82ee147a 1309Both
667417b3
MK
1310.B CLONE_NEWIPC
1311and
1312.B CLONE_SYSVSEM
16853a31 1313were specified in the
1ae6b2c7 1314.I flags
16853a31 1315mask.
667417b3
MK
1316.TP
1317.B EINVAL
1ae6b2c7 1318.B CLONE_NEWPID
f0007192 1319and one (or both) of
1ae6b2c7 1320.B CLONE_THREAD
f0007192 1321or
1ae6b2c7 1322.B CLONE_PARENT
16853a31 1323were specified in the
1ae6b2c7 1324.I flags
16853a31 1325mask.
82ee147a 1326.TP
6d95f767
SD
1327.B EINVAL
1328.B CLONE_NEWUSER
1329and
1330.B CLONE_THREAD
1331were specified in the
1332.I flags
1333mask.
1334.TP
be479fdf
MK
1335.BR EINVAL " (since Linux 2.6.32)"
1336.\" commit 123be07b0b399670a7cc3d82fef0cb4f93ef885c
1ae6b2c7 1337.B CLONE_PARENT
be479fdf
MK
1338was specified, and the caller is an init process.
1339.TP
82ee147a 1340.B EINVAL
d4748fad 1341Returned by the glibc
edcc65ff 1342.BR clone ()
d4748fad 1343wrapper function when
1ae6b2c7 1344.I fn
d4748fad 1345or
1ae6b2c7 1346.I stack
d4748fad 1347is specified as NULL.
fea681da 1348.TP
28cad2c1 1349.B EINVAL
1ae6b2c7 1350.B CLONE_NEWIPC
16853a31 1351was specified in the
1ae6b2c7 1352.I flags
16853a31 1353mask,
667417b3
MK
1354but the kernel was not configured with the
1355.B CONFIG_SYSVIPC
1356and
1ae6b2c7 1357.B CONFIG_IPC_NS
667417b3
MK
1358options.
1359.TP
1360.B EINVAL
1ae6b2c7 1361.B CLONE_NEWNET
16853a31 1362was specified in the
1ae6b2c7 1363.I flags
16853a31 1364mask,
163bf178
MK
1365but the kernel was not configured with the
1366.B CONFIG_NET_NS
1367option.
1368.TP
1369.B EINVAL
1ae6b2c7 1370.B CLONE_NEWPID
16853a31 1371was specified in the
1ae6b2c7 1372.I flags
16853a31 1373mask,
28cad2c1
MK
1374but the kernel was not configured with the
1375.B CONFIG_PID_NS
1376option.
1377.TP
43ce9dda 1378.B EINVAL
1ae6b2c7 1379.B CLONE_NEWUSER
16853a31 1380was specified in the
1ae6b2c7 1381.I flags
16853a31 1382mask,
231d0bbe
MK
1383but the kernel was not configured with the
1384.B CONFIG_USER_NS
1385option.
1386.TP
1387.B EINVAL
1ae6b2c7 1388.B CLONE_NEWUTS
16853a31 1389was specified in the
1ae6b2c7 1390.I flags
16853a31 1391mask,
43ce9dda 1392but the kernel was not configured with the
832fe8ea 1393.B CONFIG_UTS_NS
43ce9dda
MK
1394option.
1395.TP
c550a897 1396.B EINVAL
81c2368f 1397.I stack
c550a897
MK
1398is not aligned to a suitable boundary for this architecture.
1399For example, on aarch64,
81c2368f 1400.I stack
c550a897
MK
1401must be a multiple of 16.
1402.TP
bc03b116 1403.BR EINVAL " (" clone3 "() only)"
baa435c6
MK
1404.B CLONE_DETACHED
1405was specified in the
1406.I flags
1407mask.
1408.TP
bc03b116 1409.BR EINVAL " (" clone "() only)"
9f938981
CB
1410.B CLONE_PIDFD
1411was specified together with
16853a31
MK
1412.B CLONE_DETACHED
1413in the
1414.I flags
1415mask.
9f938981
CB
1416.TP
1417.B EINVAL
1418.B CLONE_PIDFD
1419was specified together with
16853a31
MK
1420.B CLONE_THREAD
1421in the
1422.I flags
1423mask.
9f938981 1424.TP
faa0e55a 1425.BR "EINVAL " "(" clone "() only)"
9f938981
CB
1426.B CLONE_PIDFD
1427was specified together with
16853a31
MK
1428.B CLONE_PARENT_SETTID
1429in the
1430.I flags
1431mask.
9f938981 1432.TP
bf031aaa
AR
1433.BR EINVAL " (" clone3 "() only)"
1434.I set_tid_size
ee8bb310 1435is greater than the number of nested PID namespaces.
bf031aaa
AR
1436.TP
1437.BR EINVAL " (" clone3 "() only)"
2a2b2a5d 1438One of the PIDs specified in
bf031aaa 1439.I set_tid
2a2b2a5d 1440was an invalid.
bf031aaa 1441.TP
184ecd22
JP
1442.BR EINVAL " (" clone3 "() only)"
1443.\" commit 7f192e3cd316ba58c88dfa26796cf77789dd9872
1444.B CLONE_THREAD
1445or
1446.B CLONE_PARENT
1447was specified in the
1448.I flags
1449mask, but a signal was specified in
1450.I exit_signal.
1451.TP
ba9ae75d
MK
1452.BR EINVAL " (AArch64 only, Linux 4.6 and earlier)"
1453.I stack
1874ca39 1454was not aligned to a 128-bit boundary.
ba9ae75d 1455.TP
fea681da
MK
1456.B ENOMEM
1457Cannot allocate sufficient memory to allocate a task structure for the
1458child, or to copy those parts of the caller's context that need to be
1459copied.
1460.TP
b20e22ae
MK
1461.BR ENOSPC " (since Linux 3.7)"
1462.\" commit f2302505775fd13ba93f034206f1e2a587017929
1463.B CLONE_NEWPID
16853a31
MK
1464was specified in the
1465.I flags
1466mask,
b20e22ae
MK
1467but the limit on the nesting depth of PID namespaces
1468would have been exceeded; see
1469.BR pid_namespaces (7).
1470.TP
b5742ecc
MK
1471.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1472.B CLONE_NEWUSER
16853a31 1473was specified in the
1ae6b2c7 1474.I flags
16853a31 1475mask, and the call would cause the limit on the number of
b5742ecc
MK
1476nested user namespaces to be exceeded.
1477See
1478.BR user_namespaces (7).
efeece04 1479.IP
b5742ecc
MK
1480From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1481.BR EUSERS .
1482.TP
2f7a331e 1483.BR ENOSPC " (since Linux 4.9)"
16853a31 1484One of the values in the
2f7a331e 1485.I flags
16853a31 1486mask specified the creation of a new user namespace,
2f7a331e 1487but doing so would have caused the limit defined by the corresponding file in
1ae6b2c7 1488.I /proc/sys/user
2f7a331e
MK
1489to be exceeded.
1490For further details, see
1491.BR namespaces (7).
1492.TP
0b065634 1493.BR EOPNOTSUPP " (" clone3 "() only)"
6ba79da9
MK
1494.B CLONE_INTO_CGROUP
1495was specified in
1496.IR cl_args.flags ,
1497but the file descriptor specified in
1ae6b2c7 1498.I cl_args.cgroup
6ba79da9 1499refers to a version 2 cgroup that is in the
1ae6b2c7 1500.I domain invalid
6ba79da9
MK
1501state.
1502.TP
fea681da 1503.B EPERM
aa825b59 1504.BR CLONE_NEWCGROUP ,
667417b3 1505.BR CLONE_NEWIPC ,
163bf178 1506.BR CLONE_NEWNET ,
43ce9dda
MK
1507.BR CLONE_NEWNS ,
1508.BR CLONE_NEWPID ,
82ee147a 1509or
1ae6b2c7 1510.B CLONE_NEWUTS
00b08db3 1511was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1512.TP
1513.B EPERM
1514.B CLONE_PID
1515was specified by a process other than process 0.
1c173eb3 1516(This error occurs only on Linux 2.5.15 and earlier.)
365d292a
MK
1517.TP
1518.B EPERM
1ae6b2c7 1519.B CLONE_NEWUSER
16853a31 1520was specified in the
1ae6b2c7 1521.I flags
16853a31 1522mask,
365d292a
MK
1523but either the effective user ID or the effective group ID of the caller
1524does not have a mapping in the parent namespace (see
f58fb24f 1525.BR user_namespaces (7)).
6fd119e7 1526.TP
ac007938
MK
1527.BR EPERM " (since Linux 3.9)"
1528.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
11a38815 1529.B CLONE_NEWUSER
16853a31 1530was specified in the
ac007938 1531.I flags
16853a31 1532mask and the caller is in a chroot environment
ac007938
MK
1533.\" FIXME What is the rationale for this restriction?
1534(i.e., the caller's root directory does not match the root directory
1535of the mount namespace in which it resides).
1536.TP
bf031aaa
AR
1537.BR EPERM " (" clone3 "() only)"
1538.I set_tid_size
1539was greater than zero, and the caller lacks the
1540.B CAP_SYS_ADMIN
1541capability in one or more of the user namespaces that own the
1542corresponding PID namespaces.
1543.TP
6717ee86
MK
1544.BR ERESTARTNOINTR " (since Linux 2.6.17)"
1545.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1546System call was interrupted by a signal and will be restarted.
1547(This can be seen only during a trace.)
1548.TP
b5742ecc 1549.BR EUSERS " (Linux 3.11 to Linux 4.8)"
6fd119e7 1550.B CLONE_NEWUSER
16853a31 1551was specified in the
1ae6b2c7 1552.I flags
16853a31 1553mask,
b5742ecc
MK
1554and the limit on the number of nested user namespaces would be exceeded.
1555See the discussion of the
1ae6b2c7 1556.B ENOSPC
b5742ecc 1557error above.
faa0e55a 1558.SH VERSIONS
4131356c 1559The glibc
673d16da
MK
1560.BR clone ()
1561wrapper function makes some changes
1562in the memory pointed to by
1563.I stack
1564(changes required to set the stack up correctly for the child)
1565.I before
1566invoking the
1567.BR clone ()
1568system call.
1569So, in cases where
1570.BR clone ()
1571is used to recursively create children,
1572do not use the buffer employed for the parent's stack
1573as the stack of the child.
c6d039a3 1574.P
34ccb744 1575On i386,
a5a997ca
MK
1576.BR clone ()
1577should not be called through vsyscall, but directly through
1578.IR "int $0x80" .
673d16da
MK
1579.SS C library/kernel differences
1580The raw
1581.BR clone ()
1582system call corresponds more closely to
1583.BR fork (2)
1584in that execution in the child continues from the point of the
1585call.
1586As such, the
1587.I fn
1588and
1589.I arg
1590arguments of the
1591.BR clone ()
1592wrapper function are omitted.
c6d039a3 1593.P
673d16da
MK
1594In contrast to the glibc wrapper, the raw
1595.BR clone ()
1596system call accepts NULL as a
1597.I stack
1598argument (and
1599.BR clone3 ()
1600likewise allows
1601.I cl_args.stack
1602to be NULL).
1603In this case, the child uses a duplicate of the parent's stack.
1604(Copy-on-write semantics ensure that the child gets separate copies
1605of stack pages when either process modifies the stack.)
1606In this case, for correct operation, the
1607.B CLONE_VM
1608option should not be specified.
1609(If the child
1610.I shares
1611the parent's memory because of the use of the
1ae6b2c7 1612.B CLONE_VM
673d16da
MK
1613flag,
1614then no copy-on-write duplication occurs and chaos is likely to result.)
c6d039a3 1615.P
673d16da
MK
1616The order of the arguments also differs in the raw system call,
1617and there are variations in the arguments across architectures,
1618as detailed in the following paragraphs.
c6d039a3 1619.P
673d16da
MK
1620The raw system call interface on x86-64 and some other architectures
1621(including sh, tile, and alpha) is:
c6d039a3 1622.P
161b8eda 1623.in +4n
673d16da
MK
1624.EX
1625.BI "long clone(unsigned long " flags ", void *" stack ,
1626.BI " int *" parent_tid ", int *" child_tid ,
1627.BI " unsigned long " tls );
1628.EE
1629.in
c6d039a3 1630.P
673d16da
MK
1631On x86-32, and several other common architectures
1632(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
1633and MIPS),
1634.\" CONFIG_CLONE_BACKWARDS
1635the order of the last two arguments is reversed:
c6d039a3 1636.P
161b8eda 1637.in +4n
673d16da
MK
1638.EX
1639.BI "long clone(unsigned long " flags ", void *" stack ,
1640.BI " int *" parent_tid ", unsigned long " tls ,
1641.BI " int *" child_tid );
1642.EE
1643.in
c6d039a3 1644.P
673d16da
MK
1645On the cris and s390 architectures,
1646.\" CONFIG_CLONE_BACKWARDS2
1647the order of the first two arguments is reversed:
c6d039a3 1648.P
161b8eda 1649.in +4n
673d16da
MK
1650.EX
1651.BI "long clone(void *" stack ", unsigned long " flags ,
1652.BI " int *" parent_tid ", int *" child_tid ,
1653.BI " unsigned long " tls );
1654.EE
1655.in
c6d039a3 1656.P
673d16da
MK
1657On the microblaze architecture,
1658.\" CONFIG_CLONE_BACKWARDS3
1659an additional argument is supplied:
c6d039a3 1660.P
161b8eda 1661.in +4n
673d16da
MK
1662.EX
1663.BI "long clone(unsigned long " flags ", void *" stack ,
1664.BI " int " stack_size , "\fR /* Size of stack */"
1665.BI " int *" parent_tid ", int *" child_tid ,
1666.BI " unsigned long " tls );
1667.EE
1668.in
1669.\"
1670.SS blackfin, m68k, and sparc
1671.\" Mike Frysinger noted in a 2013 mail:
1672.\" these arches don't define __ARCH_WANT_SYS_CLONE:
1673.\" blackfin ia64 m68k sparc
1674The argument-passing conventions on
1675blackfin, m68k, and sparc are different from the descriptions above.
1676For details, see the kernel (and glibc) source.
1677.SS ia64
1678On ia64, a different interface is used:
c6d039a3 1679.P
161b8eda 1680.in +4n
673d16da 1681.EX
77ca5b1d 1682.BI "int __clone2(int (*" "fn" ")(void *),"
673d16da 1683.BI " void *" stack_base ", size_t " stack_size ,
77ca5b1d 1684.BI " int " flags ", void *" "arg" ", ..."
673d16da
MK
1685.BI " /* pid_t *" parent_tid ", struct user_desc *" tls ,
1686.BI " pid_t *" child_tid " */ );"
1687.EE
1688.in
c6d039a3 1689.P
673d16da
MK
1690The prototype shown above is for the glibc wrapper function;
1691for the system call itself,
1692the prototype can be described as follows (it is identical to the
1693.BR clone ()
1694prototype on microblaze):
c6d039a3 1695.P
161b8eda 1696.in +4n
673d16da
MK
1697.EX
1698.BI "long clone2(unsigned long " flags ", void *" stack_base ,
1699.BI " int " stack_size , "\fR /* Size of stack */"
1700.BI " int *" parent_tid ", int *" child_tid ,
1701.BI " unsigned long " tls );
1702.EE
1703.in
c6d039a3 1704.P
673d16da
MK
1705.BR __clone2 ()
1706operates in the same way as
1707.BR clone (),
1708except that
1709.I stack_base
1710points to the lowest address of the child's stack area,
1711and
1712.I stack_size
1713specifies the size of the stack pointed to by
1714.IR stack_base .
4131356c
AC
1715.SH STANDARDS
1716Linux.
1717.SH HISTORY
1718.TP
1719.BR clone3 ()
1720Linux 5.3.
1721.\" There is no entry for
1722.\" .BR clone ()
1723.\" in libc5.
1724.\" glibc2 provides
1725.\" .BR clone ()
1726.\" as described in this manual page.
673d16da 1727.SS Linux 2.4 and earlier
4131356c
AC
1728In the Linux 2.4.x series,
1729.B CLONE_THREAD
1730generally does not make the parent of the new thread the same
1731as the parent of the calling process.
1732However, from Linux 2.4.7 to Linux 2.4.18 the
1733.B CLONE_THREAD
1734flag implied the
1735.B CLONE_PARENT
1736flag (as in Linux 2.6.0 and later).
c6d039a3 1737.P
673d16da
MK
1738In Linux 2.4 and earlier,
1739.BR clone ()
1740does not take arguments
1741.IR parent_tid ,
1742.IR tls ,
1743and
1744.IR child_tid .
4131356c
AC
1745.SH NOTES
1746One use of these systems calls
1747is to implement threads: multiple flows of control in a program that
1748run concurrently in a shared address space.
c6d039a3 1749.P
4131356c
AC
1750The
1751.BR kcmp (2)
1752system call can be used to test whether two processes share various
1753resources such as a file descriptor table,
1754System V semaphore undo operations, or a virtual address space.
c6d039a3 1755.P
4131356c
AC
1756Handlers registered using
1757.BR pthread_atfork (3)
1758are not executed during a clone call.
31830ef0 1759.SH BUGS
abcf3b1d
MK
1760GNU C library versions 2.3.4 up to and including 2.24
1761contained a wrapper function for
0bfa087b 1762.BR getpid (2)
abcf3b1d
MK
1763that performed caching of PIDs.
1764This caching relied on support in the glibc wrapper for
c60237c9 1765.BR clone (),
abcf3b1d
MK
1766but limitations in the implementation
1767meant that the cache was not up to date in some circumstances.
c60237c9 1768In particular,
abcf3b1d 1769if a signal was delivered to the child immediately after the
c60237c9
MK
1770.BR clone ()
1771call, then a call to
0b80cf56 1772.BR getpid (2)
abcf3b1d 1773in a handler for the signal could return the PID
c60237c9 1774of the calling process ("the parent"),
abcf3b1d 1775if the clone wrapper had not yet had a chance to update the PID
c60237c9
MK
1776cache in the child.
1777(This discussion ignores the case where the child was created using
9291ce36 1778.BR CLONE_THREAD ,
c60237c9 1779when
0b80cf56 1780.BR getpid (2)
c60237c9
MK
1781.I should
1782return the same value in the child and in the process that called
1783.BR clone (),
a1d48abb 1784since the caller and the child are in the same thread group.
e7d807b7 1785The stale-cache problem also does not occur if the
a1d48abb
JR
1786.I flags
1787argument includes
1788.BR CLONE_VM .)
abcf3b1d 1789To get the truth, it was sometimes necessary to use code such as the following:
c6d039a3 1790.P
47f743f1
MK
1791.in +4n
1792.EX
1793#include <syscall.h>
fe5dba13 1794\&
47f743f1 1795pid_t mypid;
fe5dba13 1796\&
47f743f1
MK
1797mypid = syscall(SYS_getpid);
1798.EE
1799.in
c60237c9
MK
1800.\" See also the following bug reports
1801.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1802.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
c6d039a3 1803.P
abcf3b1d
MK
1804Because of the stale-cache problem, as well as other problems noted in
1805.BR getpid (2),
1806the PID caching feature was removed in glibc 2.25.
a14af333 1807.SH EXAMPLES
8c7b566c 1808The following program demonstrates the use of
9c13072a 1809.BR clone ()
8c7b566c
MK
1810to create a child process that executes in a separate UTS namespace.
1811The child changes the hostname in its UTS namespace.
1812Both parent and child then display the system hostname,
1813making it possible to see that the hostname
1814differs in the UTS namespaces of the parent and child.
1815For an example of the use of this program, see
1816.BR setns (2).
c6d039a3 1817.P
99c3a000
MK
1818Within the sample program, we allocate the memory that is to
1819be used for the child's stack using
1820.BR mmap (2)
1821rather than
1822.BR malloc (3)
1823for the following reasons:
cdede5cd 1824.IP \[bu] 3
99c3a000
MK
1825.BR mmap (2)
1826allocates a block of memory that starts on a page
1827boundary and is a multiple of the page size.
1828This is useful if we want to establish a guard page (a page with protection
1829.BR PROT_NONE )
1830at the end of the stack using
1831.BR mprotect (2).
cdede5cd 1832.IP \[bu]
99c3a000 1833We can specify the
1ae6b2c7 1834.B MAP_STACK
99c3a000
MK
1835flag to request a mapping that is suitable for a stack.
1836For the moment, this flag is a no-op on Linux,
1837but it exists and has effect on some other systems,
1838so we should include it for portability.
f30b7415 1839.SS Program source
33857069 1840.\" SRC BEGIN (clone.c)
e7d0bb47 1841.EX
8c7b566c 1842#define _GNU_SOURCE
5a5208c1 1843#include <err.h>
8c7b566c 1844#include <sched.h>
80ae7514 1845#include <signal.h>
8eb90116 1846#include <stdint.h>
8c7b566c
MK
1847#include <stdio.h>
1848#include <stdlib.h>
80ae7514 1849#include <string.h>
99c3a000 1850#include <sys/mman.h>
80ae7514
AC
1851#include <sys/utsname.h>
1852#include <sys/wait.h>
1853#include <unistd.h>
fe5dba13 1854\&
8c7b566c
MK
1855static int /* Start function for cloned child */
1856childFunc(void *arg)
1857{
1858 struct utsname uts;
fe5dba13 1859\&
c6beb8a1 1860 /* Change hostname in UTS namespace of child. */
fe5dba13 1861\&
8c7b566c 1862 if (sethostname(arg, strlen(arg)) == \-1)
5a5208c1 1863 err(EXIT_FAILURE, "sethostname");
fe5dba13 1864\&
c6beb8a1 1865 /* Retrieve and display hostname. */
fe5dba13 1866\&
8c7b566c 1867 if (uname(&uts) == \-1)
5a5208c1 1868 err(EXIT_FAILURE, "uname");
d1a71985 1869 printf("uts.nodename in child: %s\en", uts.nodename);
fe5dba13 1870\&
8c7b566c
MK
1871 /* Keep the namespace open for a while, by sleeping.
1872 This allows some experimentation\-\-for example, another
1873 process might join the namespace. */
fe5dba13 1874\&
8c7b566c 1875 sleep(200);
fe5dba13 1876\&
8c7b566c
MK
1877 return 0; /* Child terminates now */
1878}
fe5dba13 1879\&
8c7b566c 1880#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
fe5dba13 1881\&
8c7b566c
MK
1882int
1883main(int argc, char *argv[])
1884{
0b94bd78
AC
1885 char *stack; /* Start of stack buffer */
1886 char *stackTop; /* End of stack buffer */
1887 pid_t pid;
1888 struct utsname uts;
fe5dba13 1889\&
8c7b566c 1890 if (argc < 2) {
d1a71985 1891 fprintf(stderr, "Usage: %s <child\-hostname>\en", argv[0]);
8c7b566c
MK
1892 exit(EXIT_SUCCESS);
1893 }
fe5dba13 1894\&
c6beb8a1 1895 /* Allocate memory to be used for the stack of the child. */
fe5dba13 1896\&
99c3a000
MK
1897 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
1898 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
8eea66b8 1899 if (stack == MAP_FAILED)
5a5208c1 1900 err(EXIT_FAILURE, "mmap");
fe5dba13 1901\&
8c7b566c 1902 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
fe5dba13 1903\&
8c7b566c 1904 /* Create child that has its own UTS namespace;
c6beb8a1 1905 child commences execution in childFunc(). */
fe5dba13 1906\&
8c7b566c
MK
1907 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1908 if (pid == \-1)
5a5208c1 1909 err(EXIT_FAILURE, "clone");
8eb90116 1910 printf("clone() returned %jd\en", (intmax_t) pid);
fe5dba13 1911\&
8c7b566c 1912 /* Parent falls through to here */
fe5dba13 1913\&
8c7b566c 1914 sleep(1); /* Give child time to change its hostname */
fe5dba13 1915\&
b957f81f
AC
1916 /* Display hostname in parent\[aq]s UTS namespace. This will be
1917 different from hostname in child\[aq]s UTS namespace. */
fe5dba13 1918\&
8c7b566c 1919 if (uname(&uts) == \-1)
5a5208c1 1920 err(EXIT_FAILURE, "uname");
d1a71985 1921 printf("uts.nodename in parent: %s\en", uts.nodename);
fe5dba13 1922\&
8c7b566c 1923 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
5a5208c1 1924 err(EXIT_FAILURE, "waitpid");
d1a71985 1925 printf("child has terminated\en");
fe5dba13 1926\&
8c7b566c
MK
1927 exit(EXIT_SUCCESS);
1928}
e7d0bb47 1929.EE
33857069 1930.\" SRC END
47297adb 1931.SH SEE ALSO
fea681da 1932.BR fork (2),
2b44301c 1933.BR futex (2),
fea681da
MK
1934.BR getpid (2),
1935.BR gettid (2),
6f8746e4 1936.BR kcmp (2),
99c3a000 1937.BR mmap (2),
d8837668 1938.BR pidfd_open (2),
f2d0bbf1 1939.BR set_thread_area (2),
2b44301c 1940.BR set_tid_address (2),
8403481f 1941.BR setns (2),
f2d0bbf1 1942.BR tkill (2),
5cc01e9c 1943.BR unshare (2),
fea681da 1944.BR wait (2),
3616b7c0 1945.BR capabilities (7),
41096af1 1946.BR namespaces (7),
3616b7c0 1947.BR pthreads (7)