]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/clone.2
All pages: Remove the 5th argument to .TH
[thirdparty/man-pages.git] / man2 / clone.2
CommitLineData
fea681da 1.\" Copyright (c) 1992 Drew Eckhardt <drew@cs.colorado.edu>, March 28, 1992
fb1fa92b 2.\" and Copyright (c) Michael Kerrisk, 2001, 2002, 2005, 2013, 2019
2297bf0e 3.\"
95fb8859 4.\" SPDX-License-Identifier: GPL-1.0-or-later
dccaff1e 5.\"
fea681da
MK
6.\" Modified by Michael Haardt <michael@moria.de>
7.\" Modified 24 Jul 1993 by Rik Faith <faith@cs.unc.edu>
8.\" Modified 21 Aug 1994 by Michael Chastain <mec@shell.portal.com>:
9.\" New man page (copied from 'fork.2').
10.\" Modified 10 June 1995 by Andries Brouwer <aeb@cwi.nl>
11.\" Modified 25 April 1998 by Xavier Leroy <Xavier.Leroy@inria.fr>
12.\" Modified 26 Jun 2001 by Michael Kerrisk
13.\" Mostly upgraded to 2.4.x
14.\" Added prototype for sys_clone() plus description
15.\" Added CLONE_THREAD with a brief description of thread groups
c13182ef 16.\" Added CLONE_PARENT and revised entire page remove ambiguity
fea681da
MK
17.\" between "calling process" and "parent process"
18.\" Added CLONE_PTRACE and CLONE_VFORK
19.\" Added EPERM and EINVAL error codes
fd8a5be4 20.\" Renamed "__clone" to "clone" (which is the prototype in <sched.h>)
fea681da 21.\" various other minor tidy ups and clarifications.
c11b1abf 22.\" Modified 26 Jun 2001 by Michael Kerrisk <mtk.manpages@gmail.com>
d9bfdb9c 23.\" Updated notes for 2.4.7+ behavior of CLONE_THREAD
c11b1abf 24.\" Modified 15 Oct 2002 by Michael Kerrisk <mtk.manpages@gmail.com>
fea681da
MK
25.\" Added description for CLONE_NEWNS, which was added in 2.4.19
26.\" Slightly rephrased, aeb.
27.\" Modified 1 Feb 2003 - added CLONE_SIGHAND restriction, aeb.
28.\" Modified 1 Jan 2004 - various updates, aeb
0967c11f 29.\" Modified 2004-09-10 - added CLONE_PARENT_SETTID etc. - aeb.
d9bfdb9c 30.\" 2005-04-12, mtk, noted the PID caching behavior of NPTL's getpid()
31830ef0 31.\" wrapper under BUGS.
fd8a5be4
MK
32.\" 2005-05-10, mtk, added CLONE_SYSVSEM, CLONE_UNTRACED, CLONE_STOPPED.
33.\" 2005-05-17, mtk, Substantially enhanced discussion of CLONE_THREAD.
4e836144 34.\" 2008-11-18, mtk, order CLONE_* flags alphabetically
82ee147a 35.\" 2008-11-18, mtk, document CLONE_NEWPID
43ce9dda 36.\" 2008-11-19, mtk, document CLONE_NEWUTS
667417b3 37.\" 2008-11-19, mtk, document CLONE_NEWIPC
cfdc761b 38.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
fea681da 39.\"
45186a5d 40.TH CLONE 2 2021-03-22 "Linux man-pages (unreleased)"
fea681da 41.SH NAME
a9e52b43 42clone, __clone2, clone3 \- create a child process
48113faf
AC
43.SH LIBRARY
44Standard C library
8fc3b2cf 45.RI ( libc ", " \-lc )
fea681da 46.SH SYNOPSIS
c10859eb 47.nf
81f10dad 48/* Prototype for the glibc wrapper function */
dbfe9c70 49.PP
4f71ba5d 50.B #define _GNU_SOURCE
fea681da 51.B #include <sched.h>
dbfe9c70 52.PP
81c2368f 53.BI "int clone(int (*" "fn" ")(void *), void *" stack \
77ca5b1d 54", int " flags ", void *" "arg" ", ..."
81c2368f
MK
55.BI " /* pid_t *" parent_tid ", void *" tls \
56", pid_t *" child_tid " */ );"
dbfe9c70 57.PP
faa0e55a
MK
58/* For the prototype of the raw clone() system call, see NOTES */
59.PP
a3805383
AC
60.BR "#include <linux/sched.h>" " /* Definition of " "struct clone_args" " */"
61.BR "#include <sched.h>" " /* Definition of " CLONE_* " constants */"
62.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
63.B #include <unistd.h>
faa0e55a 64.PP
a3805383
AC
65.BI "long syscall(SYS_clone3, struct clone_args *" cl_args ", size_t " size );
66.fi
bc2813df
AC
67.PP
68.IR Note :
69glibc provides no wrapper for
70.BR clone3 (),
71necessitating the use of
72.BR syscall (2).
fea681da 73.SH DESCRIPTION
faa0e55a 74These system calls
8dd6b0bc 75create a new ("child") process, in a manner similar to
fea681da 76.BR fork (2).
efeece04 77.PP
8dd6b0bc 78By contrast with
fea681da 79.BR fork (2),
225f5da8 80these system calls provide more precise control over what pieces of execution
10337567
MK
81context are shared between the calling process and the child process.
82For example, using these system calls, the caller can control whether
83or not the two processes share the virtual address space,
84the table of file descriptors, and the table of signal handlers.
225f5da8 85These system calls also allow the new child process to be placed
10337567
MK
86in separate
87.BR namespaces (7).
88.PP
89Note that in this manual
c13182ef 90page, "calling process" normally corresponds to "parent process".
a10c5a33 91But see the descriptions of
c13182ef 92.B CLONE_PARENT
a10c5a33
MK
93and
94.B CLONE_THREAD
10337567 95below.
efeece04 96.PP
faa0e55a
MK
97This page describes the following interfaces:
98.IP * 3
99The glibc
100.BR clone ()
101wrapper function and the underlying system call on which it is based.
102The main text describes the wrapper function;
103the differences for the raw system call
104are described toward the end of this page.
105.IP *
106The newer
107.BR clone3 ()
108system call.
5261b0fe
MK
109.PP
110In the remainder of this page, the terminology "the clone call" is used
324f6154 111when noting details that apply to all of these interfaces,
faa0e55a
MK
112.\"
113.SS The clone() wrapper function
faa0e55a
MK
114When the child process is created with the
115.BR clone ()
116wrapper function,
7495cbc7
MK
117it commences execution by calling the function pointed to by the argument
118.IR fn .
fea681da 119(This differs from
c13182ef 120.BR fork (2),
fea681da 121where execution continues in the child from the point
c13182ef
MK
122of the
123.BR fork (2)
fea681da
MK
124call.)
125The
fea681da 126.I arg
7495cbc7
MK
127argument is passed as the argument of the function
128.IR fn .
efeece04 129.PP
c13182ef 130When the
fea681da 131.IR fn ( arg )
4ba17a6d 132function returns, the child process terminates.
c13182ef 133The integer returned by
fea681da 134.I fn
4ba17a6d 135is the exit status for the child process.
c13182ef 136The child process may also terminate explicitly by calling
fea681da
MK
137.BR exit (2)
138or after receiving a fatal signal.
efeece04 139.PP
fea681da 140The
81c2368f 141.I stack
c13182ef
MK
142argument specifies the location of the stack used by the child process.
143Since the child and calling process may share memory,
fea681da 144it is not possible for the child process to execute in the
c13182ef
MK
145same stack as the calling process.
146The calling process must therefore
fea681da
MK
147set up memory space for the child stack and pass a pointer to this
148space to
edcc65ff 149.BR clone ().
5fab2e7c 150Stacks grow downward on all processors that run Linux
fea681da 151(except the HP PA processors), so
81c2368f 152.I stack
fea681da
MK
153usually points to the topmost address of the memory space set up for
154the child stack.
faa0e55a
MK
155Note that
156.BR clone ()
157does not provide a means whereby the caller can inform the kernel of the
158size of the stack area.
159.PP
160The remaining arguments to
161.BR clone ()
162are discussed below.
163.\"
164.SS clone3()
faa0e55a
MK
165The
166.BR clone3 ()
167system call provides a superset of the functionality of the older
168.BR clone ()
169interface.
170It also provides a number of API improvements, including:
171space for additional flags bits;
172cleaner separation in the use of various arguments;
173and the ability to specify the size of the child's stack area.
174.PP
175As with
176.BR fork (2),
177.BR clone3 ()
178returns in both the parent and the child.
179It returns 0 in the child process and returns the PID of the child
180in the parent.
181.PP
182The
183.I cl_args
184argument of
185.BR clone3 ()
186is a structure of the following form:
187.PP
188.in +4n
189.EX
190struct clone_args {
191 u64 flags; /* Flags bit mask */
192 u64 pidfd; /* Where to store PID file descriptor
e71d103b 193 (\fIint *\fP) */
faa0e55a 194 u64 child_tid; /* Where to store child TID,
861d36ba 195 in child\(aqs memory (\fIpid_t *\fP) */
faa0e55a 196 u64 parent_tid; /* Where to store child TID,
e71d103b 197 in parent\(aqs memory (\fIpid_t *\fP) */
faa0e55a
MK
198 u64 exit_signal; /* Signal to deliver to parent on
199 child termination */
200 u64 stack; /* Pointer to lowest byte of stack */
201 u64 stack_size; /* Size of stack */
202 u64 tls; /* Location of new TLS */
f7d5e082
MK
203 u64 set_tid; /* Pointer to a \fIpid_t\fP array
204 (since Linux 5.5) */
205 u64 set_tid_size; /* Number of elements in \fIset_tid\fP
206 (since Linux 5.5) */
ed7c1377
MK
207 u64 cgroup; /* File descriptor for target cgroup
208 of child (since Linux 5.7) */
faa0e55a
MK
209};
210.EE
211.in
212.PP
213The
214.I size
215argument that is supplied to
216.BR clone3 ()
217should be initialized to the size of this structure.
218(The existence of the
219.I size
220argument permits future extensions to the
1ae6b2c7 221.I clone_args
faa0e55a
MK
222structure.)
223.PP
224The stack for the child process is specified via
225.IR cl_args.stack ,
226which points to the lowest byte of the stack area,
227and
228.IR cl_args.stack_size ,
229which specifies the size of the stack in bytes.
230In the case where the
1ae6b2c7 231.B CLONE_VM
faa0e55a
MK
232flag (see below) is specified, a stack must be explicitly allocated
233and specified.
234Otherwise, these two fields can be specified as NULL and 0,
235which causes the child to use the same stack area as the parent
236(in the child's own virtual address space).
237.PP
238The remaining fields in the
239.I cl_args
240argument are discussed below.
241.\"
242.SS Equivalence between clone() and clone3() arguments
faa0e55a
MK
243Unlike the older
244.BR clone ()
245interface, where arguments are passed individually, in the newer
246.BR clone3 ()
247interface the arguments are packaged into the
248.I clone_args
249structure shown above.
250This structure allows for a superset of the information passed via the
251.BR clone ()
252arguments.
253.PP
254The following table shows the equivalence between the arguments of
255.BR clone ()
256and the fields in the
257.I clone_args
258argument supplied to
259.BR clone3 ():
0b174fe0 260.RS 4
faa0e55a
MK
261.TS
262lb lb lb
263l l l
264li li l.
97883fae 265clone() clone3() Notes
faa0e55a 266 \fIcl_args\fP field
0b174fe0
MK
267flags & \(ti0xff flags T{
268For most flags; details below
269T}
faa0e55a
MK
270parent_tid pidfd See CLONE_PIDFD
271child_tid child_tid See CLONE_CHILD_SETTID
272parent_tid parent_tid See CLONE_PARENT_SETTID
273flags & 0xff exit_signal
274stack stack
275\fP---\fP stack_size
276tls tls See CLONE_SETTLS
bf031aaa
AR
277\fP---\fP set_tid See below for details
278\fP---\fP set_tid_size
ed7c1377 279\fP---\fP cgroup See CLONE_INTO_CGROUP
faa0e55a
MK
280.TE
281.RE
5fbce8f2
MK
282.\"
283.SS The child termination signal
faa0e55a
MK
284When the child process terminates, a signal may be sent to the parent.
285The termination signal is specified in the low byte of
fea681da 286.I flags
faa0e55a
MK
287.RB ( clone ())
288or in
289.I cl_args.exit_signal
290.RB ( clone3 ()).
fd8a5be4 291If this signal is specified as anything other than
fea681da
MK
292.BR SIGCHLD ,
293then the parent process must specify the
c13182ef
MK
294.B __WALL
295or
fea681da 296.B __WCLONE
c13182ef
MK
297options when waiting for the child with
298.BR wait (2).
faa0e55a 299If no signal (i.e., zero) is specified, then the parent process is not signaled
fea681da 300when the child terminates.
5fbce8f2 301.\"
bf031aaa 302.SS The set_tid array
bf031aaa
AR
303By default, the kernel chooses the next sequential PID for the new
304process in each of the PID namespaces where it is present.
305When creating a process with
306.BR clone3 (),
307the
308.I set_tid
b386cee3
MK
309array (available since Linux 5.5)
310can be used to select specific PIDs for the process in some
bf031aaa 311or all of the PID namespaces where it is present.
ee8bb310 312If the PID of the newly created process should be set only for the current
bf031aaa
AR
313PID namespace or in the newly created PID namespace (if
314.I flags
315contains
316.BR CLONE_NEWPID )
317then the first element in the
318.I set_tid
319array has to be the desired PID and
320.I set_tid_size
321needs to be 1.
322.PP
323If the PID of the newly created process should have a certain value in
ee8bb310 324multiple PID namespaces, then the
bf031aaa 325.I set_tid
09007c4b
MK
326array can have multiple entries.
327The first entry defines the PID in the most
ee8bb310
MK
328deeply nested PID namespace and each of the following entries contains
329the PID in the
330corresponding ancestor PID namespace.
09007c4b 331The number of PID namespaces in which a PID
bf031aaa
AR
332should be set is defined by
333.I set_tid_size
334which cannot be larger than the number of currently nested PID namespaces.
335.PP
336To create a process with the following PIDs in a PID namespace hierarchy:
0b174fe0 337.RS 4
bf031aaa 338.TS
ee8bb310
MK
339lb lb lb
340l l l.
341PID NS level Requested PID Notes
3420 31496 Outermost PID namespace
bf031aaa 3431 42
ee8bb310 3442 7 Innermost PID namespace
bf031aaa
AR
345.TE
346.RE
347.PP
348Set the array to:
349.PP
ee8bb310 350.in +4n
bf031aaa 351.EX
ee8bb310
MK
352set_tid[0] = 7;
353set_tid[1] = 42;
354set_tid[2] = 31496;
355set_tid_size = 3;
bf031aaa 356.EE
ee8bb310 357.in
bf031aaa
AR
358.PP
359If only the PIDs in the two innermost PID namespaces
360need to be specified, set the array to:
361.PP
ee8bb310 362.in +4n
bf031aaa 363.EX
ee8bb310
MK
364set_tid[0] = 7;
365set_tid[1] = 42;
366set_tid_size = 2;
bf031aaa 367.EE
ee8bb310 368.in
bf031aaa
AR
369.PP
370The PID in the PID namespaces outside the two innermost PID namespaces
95887a00 371is selected the same way as any other PID is selected.
bf031aaa
AR
372.PP
373The
374.I set_tid
375feature requires
1ae6b2c7 376.B CAP_SYS_ADMIN
1e4d6750
MK
377or
378(since Linux 5.9)
379.\" commit 124ea650d3072b005457faed69909221c2905a1f
380.\" commit 1caef81da05a84a40dbf02110e967ce6d1135ff6
1ae6b2c7 381.B CAP_CHECKPOINT_RESTORE
bf031aaa
AR
382in all owning user namespaces of the target PID namespaces.
383.PP
ee8bb310
MK
384Callers may only choose a PID greater than 1 in a given PID namespace
385if an
1ae6b2c7 386.B init
ee8bb310 387process (i.e., a process with PID 1) already exists in that namespace.
09007c4b 388Otherwise the PID
bf031aaa
AR
389entry for this PID namespace must be 1.
390.\"
16853a31 391.SS The flags mask
faa0e55a
MK
392Both
393.BR clone ()
394and
395.BR clone3 ()
396allow a flags bit mask that modifies their behavior
397and allows the caller to specify what is shared between the calling process
398and the child process.
5261b0fe
MK
399This bit mask\(emthe
400.I flags
401argument of
402.BR clone ()
403or the
404.I cl_args.flags
405field passed to
406.BR clone3 ()\(emis
16853a31
MK
407referred to as the
408.I flags
409mask in the remainder of this page.
410.PP
411The
412.I flags
413mask is specified as a bitwise-OR of zero or more of
414the constants listed below.
5261b0fe 415Except as noted below, these flags are available
faa0e55a
MK
416(and have the same effect) in both
417.BR clone ()
418and
419.BR clone3 ().
fea681da 420.TP
f5dbc7c8 421.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
e2bf1234 422Clear (zero) the child thread ID at the location pointed to by
81c2368f 423.I child_tid
faa0e55a
MK
424.RB ( clone ())
425or
426.I cl_args.child_tid
427.RB ( clone3 ())
f5dbc7c8
MK
428in child memory when the child exits, and do a wakeup on the futex
429at that address.
430The address involved may be changed by the
431.BR set_tid_address (2)
432system call.
433This is used by threading libraries.
434.TP
435.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
e2bf1234 436Store the child thread ID at the location pointed to by
81c2368f 437.I child_tid
faa0e55a
MK
438.RB ( clone ())
439or
440.I cl_args.child_tid
441.RB ( clone3 ())
8ef021ea 442in the child's memory.
5261b0fe 443The store operation completes before the clone call
6ab62ed8 444returns control to user space in the child process.
5261b0fe 445(Note that the store operation may not have completed before the clone call
95887a00 446returns in the parent process, which is relevant if the
1ae6b2c7 447.B CLONE_VM
6ab62ed8 448flag is also employed.)
f5dbc7c8 449.TP
27f14b44
MK
450.BR CLONE_CLEAR_SIGHAND " (since Linux 5.5)"
451.\" commit b612e5df4587c934bd056bf05f4a1deca4de4f75
452By default, signal dispositions in the child thread are the same as
453in the parent.
454If this flag is specified,
455then all signals that are handled in the parent
456are reset to their default dispositions
457.RB ( SIG_DFL )
458in the child.
459.IP
460Specifying this flag together with
461.B CLONE_SIGHAND
462is nonsensical and disallowed.
463.TP
baa435c6
MK
464.BR CLONE_DETACHED " (historical)"
465For a while (during the Linux 2.5 development series)
466.\" added in 2.5.32; removed in 2.6.0-test4
467there was a
468.B CLONE_DETACHED
469flag,
470which caused the parent not to receive a signal when the child terminated.
471Ultimately, the effect of this flag was subsumed under the
1ae6b2c7 472.B CLONE_THREAD
baa435c6
MK
473flag and by the time Linux 2.6.0 was released, this flag had no effect.
474Starting in Linux 2.6.2, the need to give this flag together with
475.B CLONE_THREAD
476disappeared.
477.IP
478This flag is still defined, but it is usually ignored when calling
479.BR clone ().
480However, see the description of
1ae6b2c7 481.B CLONE_PIDFD
baa435c6
MK
482for some exceptions.
483.TP
1603d6a1 484.BR CLONE_FILES " (since Linux 2.0)"
fea681da 485If
f5dbc7c8
MK
486.B CLONE_FILES
487is set, the calling process and the child process share the same file
488descriptor table.
489Any file descriptor created by the calling process or by the child
490process is also valid in the other process.
491Similarly, if one of the processes closes a file descriptor,
492or changes its associated flags (using the
493.BR fcntl (2)
494.B F_SETFD
495operation), the other process is also affected.
8a76b19e
KE
496If a process sharing a file descriptor table calls
497.BR execve (2),
498its file descriptor table is duplicated (unshared).
efeece04 499.IP
fea681da 500If
f5dbc7c8
MK
501.B CLONE_FILES
502is not set, the child process inherits a copy of all file descriptors
5261b0fe 503opened in the calling process at the time of the clone call.
f5dbc7c8
MK
504Subsequent operations that open or close file descriptors,
505or change file descriptor flags,
506performed by either the calling
507process or the child process do not affect the other process.
db8ba2b4 508Note, however,
839d161f
MK
509that the duplicated file descriptors in the child refer to the same
510open file descriptions as the corresponding file descriptors
511in the calling process,
2433365b 512and thus share file offsets and file status flags (see
db8ba2b4 513.BR open (2)).
fea681da 514.TP
1603d6a1 515.BR CLONE_FS " (since Linux 2.0)"
fea681da
MK
516If
517.B CLONE_FS
9ee4a2b6 518is set, the caller and the child process share the same filesystem
c13182ef 519information.
9ee4a2b6 520This includes the root of the filesystem, the current
c13182ef
MK
521working directory, and the umask.
522Any call to
fea681da
MK
523.BR chroot (2),
524.BR chdir (2),
525or
526.BR umask (2)
edcc65ff 527performed by the calling process or the child process also affects the
fea681da 528other process.
efeece04 529.IP
c13182ef 530If
fea681da 531.B CLONE_FS
9ee4a2b6 532is not set, the child process works on a copy of the filesystem
5261b0fe 533information of the calling process at the time of the clone call.
fea681da
MK
534Calls to
535.BR chroot (2),
536.BR chdir (2),
4ba17a6d 537or
fea681da
MK
538.BR umask (2)
539performed later by one of the processes do not affect the other process.
fea681da 540.TP
edc1b9fc
MK
541.BR CLONE_INTO_CGROUP " (since Linux 5.7)"
542.\" commit ef2c41cf38a7559bbf91af42d5b6a4429db8fc68
543By default, a child process is placed in the same version 2
544cgroup as its parent.
545The
546.B CLONE_INTO_CGROUP
17d86030 547flag allows the child process to be created in a different version 2 cgroup.
edc1b9fc 548(Note that
1ae6b2c7 549.B CLONE_INTO_CGROUP
edc1b9fc
MK
550has effect only for version 2 cgroups.)
551.IP
552In order to place the child process in a different cgroup,
553the caller specifies
1ae6b2c7 554.B CLONE_INTO_CGROUP
edc1b9fc
MK
555in
556.I cl_args.flags
557and passes a file descriptor that refers to a version 2 cgroup in the
558.I cl_args.cgroup
559field.
17d86030 560(This file descriptor can be obtained by opening a cgroup v2 directory
edc1b9fc
MK
561using either the
562.B O_RDONLY
563or the
564.B O_PATH
565flag.)
566Note that all of the usual restrictions (described in
567.BR cgroups (7))
568on placing a process into a version 2 cgroup apply.
569.IP
b3041511 570Among the possible use cases for
1ae6b2c7 571.B CLONE_INTO_CGROUP
b3041511
MK
572are the following:
573.RS
574.IP * 3
edc1b9fc
MK
575Spawning a process into a cgroup different from the parent's cgroup
576makes it possible for a service manager to directly spawn new
577services into dedicated cgroups.
578This eliminates the accounting
579jitter that would be caused if the child process was first created in the
580same cgroup as the parent and then
581moved into the target cgroup.
4fe3acd9
MK
582Furthermore, spawning the child process directly into a target cgroup
583is significantly cheaper than moving the child process into
584the target cgroup after it has been created.
b3041511 585.IP *
edc1b9fc 586The
1ae6b2c7 587.B CLONE_INTO_CGROUP
edc1b9fc
MK
588flag also allows the creation of
589frozen child processes by spawning them into a frozen cgroup.
590(See
591.BR cgroups (7)
592for a description of the freezer controller.)
b3041511 593.IP *
edc1b9fc
MK
594For threaded applications (or even thread implementations which
595make use of cgroups to limit individual threads), it is possible to
596establish a fixed cgroup layout before spawning each thread
597directly into its target cgroup.
b3041511 598.RE
edc1b9fc 599.TP
a4cc375e 600.BR CLONE_IO " (since Linux 2.6.25)"
11f27a1c
JA
601If
602.B CLONE_IO
603is set, then the new process shares an I/O context with
604the calling process.
605If this flag is not set, then (as with
606.BR fork (2))
607the new process has its own I/O context.
efeece04 608.IP
11f27a1c 609.\" The following based on text from Jens Axboe
d1f84ed7 610The I/O context is the I/O scope of the disk scheduler (i.e.,
11f27a1c
JA
611what the I/O scheduler uses to model scheduling of a process's I/O).
612If processes share the same I/O context,
613they are treated as one by the I/O scheduler.
614As a consequence, they get to share disk time.
615For some I/O schedulers,
616.\" the anticipatory and CFQ scheduler
617if two processes share an I/O context,
618they will be allowed to interleave their disk access.
619If several threads are doing I/O on behalf of the same process
620.RB ( aio_read (3),
621for instance), they should employ
1ae6b2c7 622.B CLONE_IO
11f27a1c
JA
623to get better I/O performance.
624.\" with CFQ and AS.
efeece04 625.IP
11f27a1c
JA
626If the kernel is not configured with the
627.B CONFIG_BLOCK
628option, this flag is a no-op.
629.TP
c5af0674
MK
630.BR CLONE_NEWCGROUP " (since Linux 4.6)"
631Create the process in a new cgroup namespace.
632If this flag is not set, then (as with
633.BR fork (2))
634the process is created in the same cgroup namespaces as the calling process.
efeece04 635.IP
c5af0674 636For further information on cgroup namespaces, see
b9fe4bc3 637.BR cgroup_namespaces (7).
efeece04 638.IP
c5af0674
MK
639Only a privileged process
640.RB ( CAP_SYS_ADMIN )
641can employ
642.BR CLONE_NEWCGROUP .
643.\"
644.TP
8722311b 645.BR CLONE_NEWIPC " (since Linux 2.6.19)"
667417b3
MK
646If
647.B CLONE_NEWIPC
648is set, then create the process in a new IPC namespace.
649If this flag is not set, then (as with
06b30458 650.BR fork (2)),
667417b3
MK
651the process is created in the same IPC namespace as
652the calling process.
efeece04 653.IP
981eda4a
MK
654For further information on IPC namespaces, see
655.BR ipc_namespaces (7).
656.IP
ab5dd83f
MK
657Only a privileged process
658.RB ( CAP_SYS_ADMIN )
659can employ
660.BR CLONE_NEWIPC .
667417b3
MK
661This flag can't be specified in conjunction with
662.BR CLONE_SYSVSEM .
663.TP
163bf178 664.BR CLONE_NEWNET " (since Linux 2.6.24)"
33a0ccb2 665(The implementation of this flag was completed only
9108d867 666by about kernel version 2.6.29.)
efeece04 667.IP
163bf178
MK
668If
669.B CLONE_NEWNET
670is set, then create the process in a new network namespace.
671If this flag is not set, then (as with
57ef8c39 672.BR fork (2))
163bf178
MK
673the process is created in the same network namespace as
674the calling process.
efeece04 675.IP
73680728 676For further information on network namespaces, see
40002795 677.BR network_namespaces (7).
efeece04 678.IP
ab5dd83f
MK
679Only a privileged process
680.RB ( CAP_SYS_ADMIN )
681can employ
682.BR CLONE_NEWNET .
163bf178 683.TP
c10859eb 684.BR CLONE_NEWNS " (since Linux 2.4.19)"
3dd2331c
MK
685If
686.B CLONE_NEWNS
687is set, the cloned child is started in a new mount namespace,
688initialized with a copy of the namespace of the parent.
689If
fea681da 690.B CLONE_NEWNS
3dd2331c 691is not set, the child lives in the same mount
4df2eb09 692namespace as the parent.
efeece04 693.IP
981eda4a
MK
694For further information on mount namespaces, see
695.BR namespaces (7)
696and
697.BR mount_namespaces (7).
698.IP
ab5dd83f
MK
699Only a privileged process
700.RB ( CAP_SYS_ADMIN )
701can employ
702.BR CLONE_NEWNS .
fea681da
MK
703It is not permitted to specify both
704.B CLONE_NEWNS
705and
706.B CLONE_FS
9219d208 707.\" See https://lwn.net/Articles/543273/
5261b0fe 708in the same clone call.
9d005472
MK
709.TP
710.BR CLONE_NEWPID " (since Linux 2.6.24)"
711.\" This explanation draws a lot of details from
712.\" http://lwn.net/Articles/259217/
713.\" Authors: Pavel Emelyanov <xemul@openvz.org>
714.\" and Kir Kolyshkin <kir@openvz.org>
715.\"
716.\" The primary kernel commit is 30e49c263e36341b60b735cbef5ca37912549264
717.\" Author: Pavel Emelyanov <xemul@openvz.org>
718If
719.B CLONE_NEWPID
720is set, then create the process in a new PID namespace.
721If this flag is not set, then (as with
722.BR fork (2))
723the process is created in the same PID namespace as
724the calling process.
efeece04 725.IP
9d005472 726For further information on PID namespaces, see
7e0e902b
MK
727.BR namespaces (7)
728and
39b3f005 729.BR pid_namespaces (7).
efeece04 730.IP
ab5dd83f
MK
731Only a privileged process
732.RB ( CAP_SYS_ADMIN )
733can employ
734.BR CLONE_NEWPID .
9d005472 735This flag can't be specified in conjunction with
1ae6b2c7 736.B CLONE_THREAD
f0007192
MK
737or
738.BR CLONE_PARENT .
70d21f17 739.TP
1ae6b2c7 740.B CLONE_NEWUSER
06b30458
MK
741(This flag first became meaningful for
742.BR clone ()
4d2b3ed7
MK
743in Linux 2.6.23,
744the current
11a38815 745.BR clone ()
4d2b3ed7
MK
746semantics were merged in Linux 3.5,
747and the final pieces to make the user namespaces completely usable were
748merged in Linux 3.8.)
efeece04 749.IP
70d21f17
EB
750If
751.B CLONE_NEWUSER
06b30458
MK
752is set, then create the process in a new user namespace.
753If this flag is not set, then (as with
57ef8c39 754.BR fork (2))
70d21f17 755the process is created in the same user namespace as the calling process.
efeece04 756.IP
981eda4a
MK
757For further information on user namespaces, see
758.BR namespaces (7)
759and
760.BR user_namespaces (7).
761.IP
fefbcba8 762Before Linux 3.8, use of
1ae6b2c7 763.B CLONE_NEWUSER
fefbcba8
MK
764required that the caller have three capabilities:
765.BR CAP_SYS_ADMIN ,
766.BR CAP_SETUID ,
767and
768.BR CAP_SETGID .
769.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
06b30458 770Starting with Linux 3.8,
9d005472 771no privileges are needed to create a user namespace.
efeece04 772.IP
5e72cf7d 773This flag can't be specified in conjunction with
1ae6b2c7 774.B CLONE_THREAD
5e72cf7d
MK
775or
776.BR CLONE_PARENT .
777For security reasons,
778.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
779.\" https://lwn.net/Articles/543273/
780.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
781.\" were, for practical purposes, unusable in earlier 3.8.x because of the
ab3311aa 782.\" various filesystems that didn't support userns.
1ae6b2c7 783.B CLONE_NEWUSER
f0007192 784cannot be specified in conjunction with
5e72cf7d 785.BR CLONE_FS .
82ee147a 786.TP
43ce9dda
MK
787.BR CLONE_NEWUTS " (since Linux 2.6.19)"
788If
789.B CLONE_NEWUTS
e1b11906
MK
790is set, then create the process in a new UTS namespace,
791whose identifiers are initialized by duplicating the identifiers
792from the UTS namespace of the calling process.
43ce9dda 793If this flag is not set, then (as with
57ef8c39 794.BR fork (2))
43ce9dda
MK
795the process is created in the same UTS namespace as
796the calling process.
efeece04 797.IP
981eda4a
MK
798For further information on UTS namespaces, see
799.BR uts_namespaces (7).
800.IP
ab5dd83f
MK
801Only a privileged process
802.RB ( CAP_SYS_ADMIN )
803can employ
804.BR CLONE_NEWUTS .
43ce9dda 805.TP
f5dbc7c8
MK
806.BR CLONE_PARENT " (since Linux 2.3.12)"
807If
808.B CLONE_PARENT
809is set, then the parent of the new child (as returned by
810.BR getppid (2))
811will be the same as that of the calling process.
efeece04 812.IP
f5dbc7c8
MK
813If
814.B CLONE_PARENT
815is not set, then (as with
816.BR fork (2))
817the child's parent is the calling process.
efeece04 818.IP
f5dbc7c8
MK
819Note that it is the parent process, as returned by
820.BR getppid (2),
821which is signaled when the child terminates, so that
822if
823.B CLONE_PARENT
824is set, then the parent of the calling process, rather than the
95887a00 825calling process itself, is signaled.
a17b9d28 826.IP
4269a6ab 827The
a17b9d28 828.B CLONE_PARENT
4269a6ab
MK
829flag can't be used in clone calls by the
830global init process (PID 1 in the initial PID namespace)
831and init processes in other PID namespaces.
832This restriction prevents the creation of multi-rooted process trees
833as well as the creation of unreapable zombies in the initial PID namespace.
f5dbc7c8
MK
834.TP
835.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
e2bf1234 836Store the child thread ID at the location pointed to by
81c2368f 837.I parent_tid
faa0e55a
MK
838.RB ( clone ())
839or
d5d482ec 840.I cl_args.parent_tid
faa0e55a 841.RB ( clone3 ())
8ef021ea 842in the parent's memory.
f5dbc7c8
MK
843(In Linux 2.5.32-2.5.48 there was a flag
844.B CLONE_SETTID
845that did this.)
5261b0fe 846The store operation completes before the clone call
b5da2f91 847returns control to user space.
f5dbc7c8 848.TP
1c173eb3 849.BR CLONE_PID " (Linux 2.0 to 2.5.15)"
f5dbc7c8
MK
850If
851.B CLONE_PID
852is set, the child process is created with the same process ID as
853the calling process.
854This is good for hacking the system, but otherwise
855of not much use.
1c173eb3 856From Linux 2.3.21 onward, this flag could be
f5dbc7c8 857specified only by the system boot process (PID 0).
1c173eb3 858The flag disappeared completely from the kernel sources in Linux 2.5.16.
f5d5180f 859Subsequently, the kernel silently ignored this bit if it was specified in the
1ae6b2c7 860.I flags
16853a31 861mask.
f5d5180f
MK
862Much later, the same bit was recycled for use as the
863.B CLONE_PIDFD
864flag.
f5dbc7c8 865.TP
9f938981 866.BR CLONE_PIDFD " (since Linux 5.2)"
4e98b074 867.\" commit b3e5838252665ee4cfa76b82bdf1198dca81e5be
faa0e55a
MK
868If this flag is specified,
869a PID file descriptor referring to the child process is allocated
870and placed at a specified location in the parent's memory.
b4ebffb2 871The close-on-exec flag is set on this new file descriptor.
34a975f8
MK
872PID file descriptors can be used for the purposes described in
873.BR pidfd_open (2).
faa0e55a
MK
874.RS
875.IP * 3
876When using
877.BR clone3 (),
878the PID file descriptor is placed at the location pointed to by
879.IR cl_args.pidfd .
880.IP *
881When using
882.BR clone (),
883the PID file descriptor is placed at the location pointed to by
884.IR parent_tid .
9f938981 885Since the
81c2368f 886.I parent_tid
b97cc7ae 887argument is used to return the PID file descriptor,
9f938981
CB
888.B CLONE_PIDFD
889cannot be used with
faa0e55a
MK
890.B CLONE_PARENT_SETTID
891when calling
892.BR clone ().
893.RE
9f938981
CB
894.IP
895It is currently not possible to use this flag together with
896.B CLONE_THREAD.
b97cc7ae 897This means that the process identified by the PID file descriptor
f6183e5b 898will always be a thread group leader.
9f938981 899.IP
baa435c6 900If the obsolete
9f938981 901.B CLONE_DETACHED
baa435c6 902flag is specified alongside
1ae6b2c7 903.B CLONE_PIDFD
baa435c6
MK
904when calling
905.BR clone (),
4e98b074 906an error is returned.
baa435c6
MK
907An error also results if
908.B CLONE_DETACHED
909is specified when calling
910.BR clone3 ().
911This error behavior ensures that the bit corresponding to
1ae6b2c7 912.B CLONE_DETACHED
baa435c6 913can be reused for further PID file descriptor features in the future.
9f938981 914.TP
1603d6a1 915.BR CLONE_PTRACE " (since Linux 2.2)"
f5dbc7c8
MK
916If
917.B CLONE_PTRACE
918is specified, and the calling process is being traced,
919then trace the child also (see
920.BR ptrace (2)).
921.TP
922.BR CLONE_SETTLS " (since Linux 2.5.32)"
dd6d3d2e 923The TLS (Thread Local Storage) descriptor is set to
81c2368f 924.IR tls .
efeece04 925.IP
dd6d3d2e 926The interpretation of
81c2368f 927.I tls
dd6d3d2e
KF
928and the resulting effect is architecture dependent.
929On x86,
81c2368f 930.I tls
dd6d3d2e 931is interpreted as a
1ae6b2c7 932.I struct user_desc\~*
35bf8cb4 933(see
dd6d3d2e 934.BR set_thread_area (2)).
9ea5bc66 935On x86-64 it is the new value to be set for the %fs base register
35bf8cb4 936(see the
2551f801 937.B ARCH_SET_FS
dd6d3d2e
KF
938argument to
939.BR arch_prctl (2)).
940On architectures with a dedicated TLS register, it is the new value
941of that register.
f5d5180f
MK
942.IP
943Use of this flag requires detailed knowledge and generally it
944should not be used except in libraries implementing threading.
f5dbc7c8 945.TP
1603d6a1 946.BR CLONE_SIGHAND " (since Linux 2.0)"
fea681da
MK
947If
948.B CLONE_SIGHAND
314c8ff4 949is set, the calling process and the child process share the same table of
c13182ef
MK
950signal handlers.
951If the calling process or child process calls
fea681da 952.BR sigaction (2)
c13182ef
MK
953to change the behavior associated with a signal, the behavior is
954changed in the other process as well.
955However, the calling process and child
fea681da 956processes still have distinct signal masks and sets of pending
c13182ef 957signals.
4ba17a6d 958So, one of them may block or unblock signals using
fea681da
MK
959.BR sigprocmask (2)
960without affecting the other process.
efeece04 961.IP
fea681da
MK
962If
963.B CLONE_SIGHAND
964is not set, the child process inherits a copy of the signal handlers
5261b0fe 965of the calling process at the time of the clone call.
c13182ef 966Calls to
fea681da
MK
967.BR sigaction (2)
968performed later by one of the processes have no effect on the other
969process.
efeece04 970.IP
d6bec36e
MK
971Since Linux 2.6.0,
972.\" Precisely: Linux 2.6.0-test6
16853a31 973the
29546c24 974.I flags
16853a31 975mask must also include
29546c24
MK
976.B CLONE_VM
977if
978.B CLONE_SIGHAND
fe10d82f 979is specified.
fea681da 980.TP
d6bec36e
MK
981.BR CLONE_STOPPED " (since Linux 2.6.0)"
982.\" Precisely: Linux 2.6.0-test2
a69b6bda
MK
983If
984.B CLONE_STOPPED
985is set, then the child is initially stopped (as though it was sent a
986.B SIGSTOP
987signal), and must be resumed by sending it a
988.B SIGCONT
989signal.
efeece04 990.IP
a60450a9
MK
991This flag was
992.I deprecated
993from Linux 2.6.25 onward,
994and was
995.I removed
28b44abc
MK
996altogether in Linux 2.6.38.
997Since then, the kernel silently ignores it without error.
a5a061ee 998.\" glibc 2.8 removed this defn from bits/sched.h
c5af0674 999Starting with Linux 4.6, the same bit was reused for the
1ae6b2c7 1000.B CLONE_NEWCGROUP
c5af0674 1001flag.
a69b6bda 1002.TP
f5dbc7c8 1003.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
fea681da 1004If
f5dbc7c8
MK
1005.B CLONE_SYSVSEM
1006is set, then the child and the calling process share
5ada4b94
MK
1007a single list of System V semaphore adjustment
1008.RI ( semadj )
1009values (see
f5dbc7c8 1010.BR semop (2)).
5ada4b94
MK
1011In this case, the shared list accumulates
1012.I semadj
1013values across all processes sharing the list,
1014and semaphore adjustments are performed only when the last process
1015that is sharing the list terminates (or ceases sharing the list using
1016.BR unshare (2)).
f5d401dd 1017If this flag is not set, then the child has a separate
5ada4b94
MK
1018.I semadj
1019list that is initially empty.
fea681da 1020.TP
d6bec36e
MK
1021.BR CLONE_THREAD " (since Linux 2.4.0)"
1022.\" Precisely: Linux 2.6.0-test8
fea681da
MK
1023If
1024.B CLONE_THREAD
1025is set, the child is placed in the same thread group as the calling process.
fd8a5be4
MK
1026To make the remainder of the discussion of
1027.B CLONE_THREAD
1028more readable, the term "thread" is used to refer to the
1029processes within a thread group.
efeece04 1030.IP
fd8a5be4
MK
1031Thread groups were a feature added in Linux 2.4 to support the
1032POSIX threads notion of a set of threads that share a single PID.
1033Internally, this shared PID is the so-called
1034thread group identifier (TGID) for the thread group.
c13182ef 1035Since Linux 2.4, calls to
fea681da 1036.BR getpid (2)
fd8a5be4 1037return the TGID of the caller.
efeece04 1038.IP
fd8a5be4
MK
1039The threads within a group can be distinguished by their (system-wide)
1040unique thread IDs (TID).
1041A new thread's TID is available as the function result
5261b0fe 1042returned to the caller,
fd8a5be4
MK
1043and a thread can obtain
1044its own TID using
1045.BR gettid (2).
efeece04 1046.IP
5261b0fe 1047When a clone call is made without specifying
fd8a5be4
MK
1048.BR CLONE_THREAD ,
1049then the resulting thread is placed in a new thread group
1050whose TGID is the same as the thread's TID.
1051This thread is the
1052.I leader
1053of the new thread group.
efeece04 1054.IP
fd8a5be4
MK
1055A new thread created with
1056.B CLONE_THREAD
5261b0fe 1057has the same parent process as the process that made the clone call
c13182ef 1058(i.e., like
fd8a5be4
MK
1059.BR CLONE_PARENT ),
1060so that calls to
1061.BR getppid (2)
1062return the same value for all of the threads in a thread group.
1063When a
c13182ef 1064.B CLONE_THREAD
5261b0fe 1065thread terminates, the thread that created it is not sent a
fd8a5be4
MK
1066.B SIGCHLD
1067(or other termination) signal;
1068nor can the status of such a thread be obtained
1069using
1070.BR wait (2).
1071(The thread is said to be
1072.IR detached .)
efeece04 1073.IP
e2fbf61d
MK
1074After all of the threads in a thread group terminate
1075the parent process of the thread group is sent a
fd8a5be4
MK
1076.B SIGCHLD
1077(or other termination) signal.
efeece04 1078.IP
fd8a5be4
MK
1079If any of the threads in a thread group performs an
1080.BR execve (2),
1081then all threads other than the thread group leader are terminated,
1082and the new program is executed in the thread group leader.
efeece04 1083.IP
f7110f60
MK
1084If one of the threads in a thread group creates a child using
1085.BR fork (2),
1086then any thread in the group can
1087.BR wait (2)
1088for that child.
efeece04 1089.IP
16853a31 1090Since Linux 2.5.35, the
fd8a5be4 1091.I flags
16853a31 1092mask must also include
fd8a5be4
MK
1093.B CLONE_SIGHAND
1094if
1095.B CLONE_THREAD
6fd69f33 1096is specified
d6bec36e
MK
1097(and note that, since Linux 2.6.0,
1098.\" Precisely: Linux 2.6.0-test6
1ae6b2c7 1099.B CLONE_SIGHAND
6fd69f33 1100also requires
1ae6b2c7 1101.B CLONE_VM
6fd69f33 1102to be included).
efeece04 1103.IP
e2fbf61d
MK
1104Signal dispositions and actions are process-wide:
1105if an unhandled signal is delivered to a thread, then
1106it will affect (terminate, stop, continue, be ignored in)
1107all members of the thread group.
efeece04 1108.IP
99408a60 1109Each thread has its own signal mask, as set by
f957eebd
MK
1110.BR sigprocmask (2).
1111.IP
1112A signal may be process-directed or thread-directed.
1113A process-directed signal is targeted at a thread group (i.e., a TGID),
1114and is delivered to an arbitrarily selected thread from among those
1115that are not blocking the signal.
ed4f87f0 1116A signal may be process-directed because it was generated by the kernel
f957eebd
MK
1117for reasons other than a hardware exception, or because it was sent using
1118.BR kill (2)
1119or
1120.BR sigqueue (3).
1121A thread-directed signal is targeted at (i.e., delivered to)
1122a specific thread.
1123A signal may be thread directed because it was sent using
1124.BR tgkill (2)
1125or
1126.BR pthread_sigqueue (3),
1127or because the thread executed a machine language instruction that triggered
1128a hardware exception
1129(e.g., invalid memory access triggering
1ae6b2c7 1130.B SIGSEGV
f957eebd
MK
1131or a floating-point exception triggering
1132.BR SIGFPE ).
1133.IP
99408a60
MK
1134A call to
1135.BR sigpending (2)
f957eebd
MK
1136returns a signal set that is the union of the pending process-directed
1137signals and the signals that are pending for the calling thread.
efeece04 1138.IP
475c2753 1139If a process-directed signal is delivered to a thread group,
e2fbf61d 1140and the thread group has installed a handler for the signal, then
95887a00 1141the handler is invoked in exactly one, arbitrarily selected
e2fbf61d 1142member of the thread group that has not blocked the signal.
c13182ef 1143If multiple threads in a group are waiting to accept the same signal using
e2fbf61d
MK
1144.BR sigwaitinfo (2),
1145the kernel will arbitrarily select one of these threads
475c2753 1146to receive the signal.
a69b6bda 1147.TP
f5dbc7c8 1148.BR CLONE_UNTRACED " (since Linux 2.5.46)"
a69b6bda 1149If
f5dbc7c8
MK
1150.B CLONE_UNTRACED
1151is specified, then a tracing process cannot force
1152.B CLONE_PTRACE
1153on this child process.
fea681da 1154.TP
1603d6a1 1155.BR CLONE_VFORK " (since Linux 2.2)"
f5dbc7c8
MK
1156If
1157.B CLONE_VFORK
1158is set, the execution of the calling process is suspended
1159until the child releases its virtual memory
1160resources via a call to
1161.BR execve (2)
1162or
1163.BR _exit (2)
1164(as with
1165.BR vfork (2)).
efeece04 1166.IP
f5dbc7c8
MK
1167If
1168.B CLONE_VFORK
4b4a853a 1169is not set, then both the calling process and the child are schedulable
f5dbc7c8
MK
1170after the call, and an application should not rely on execution occurring
1171in any particular order.
fea681da 1172.TP
1603d6a1 1173.BR CLONE_VM " (since Linux 2.0)"
f5dbc7c8
MK
1174If
1175.B CLONE_VM
1176is set, the calling process and the child process run in the same memory
1177space.
1178In particular, memory writes performed by the calling process
1179or by the child process are also visible in the other process.
1180Moreover, any memory mapping or unmapping performed with
1181.BR mmap (2)
1182or
1183.BR munmap (2)
1184by the child or calling process also affects the other process.
efeece04 1185.IP
f5dbc7c8
MK
1186If
1187.B CLONE_VM
1188is not set, the child process runs in a separate copy of the memory
5261b0fe 1189space of the calling process at the time of the clone call.
f5dbc7c8
MK
1190Memory writes or file mappings/unmappings performed by one of the
1191processes do not affect the other, as with
1192.BR fork (2).
52e5819c
MK
1193.IP
1194If the
1ae6b2c7 1195.B CLONE_VM
52e5819c 1196flag is specified and the
1ae6b2c7 1197.B CLONE_VFORK
52e5819c
MK
1198flag is not specified,
1199then any alternate signal stack that was established by
1200.BR sigaltstack (2)
1201is cleared in the child process.
47297adb 1202.SH RETURN VALUE
0bfa087b
MK
1203.\" gettid(2) returns current->pid;
1204.\" getpid(2) returns current->tgid;
fea681da 1205On success, the thread ID of the child process is returned
c13182ef 1206in the caller's thread of execution.
84811e86 1207On failure, \-1 is returned
95887a00 1208in the caller's context, no child process is created, and
fea681da 1209.I errno
f6a4078b 1210is set to indicate the error.
fea681da
MK
1211.SH ERRORS
1212.TP
3396ec7b
AC
1213.BR EACCES " (" clone3 "() only)"
1214.B CLONE_INTO_CGROUP
1215was specified in
1216.IR cl_args.flags ,
1217but the restrictions (described in
1218.BR cgroups (7))
1219on placing the child process into the version 2 cgroup referred to by
1220.I cl_args.cgroup
1221are not met.
1222.TP
fea681da 1223.B EAGAIN
e1b6e186
MK
1224Too many processes are already running; see
1225.BR fork (2).
fea681da 1226.TP
6ba79da9
MK
1227.BR EBUSY " (" clone3 "() only)"
1228.B CLONE_INTO_CGROUP
1229was specified in
1230.IR cl_args.flags ,
1231but the file descriptor specified in
1ae6b2c7 1232.I cl_args.cgroup
6ba79da9
MK
1233refers to a version 2 cgroup in which a domain controller is enabled.
1234.TP
bf031aaa 1235.BR EEXIST " (" clone3 "() only)"
ee8bb310 1236One (or more) of the PIDs specified in
bf031aaa
AR
1237.I set_tid
1238already exists in the corresponding PID namespace.
1239.TP
fea681da 1240.B EINVAL
27f14b44
MK
1241Both
1242.B CLONE_SIGHAND
1243and
1244.B CLONE_CLEAR_SIGHAND
1245were specified in the
1246.I flags
1247mask.
1248.TP
1249.B EINVAL
fea681da 1250.B CLONE_SIGHAND
16853a31
MK
1251was specified in the
1252.I flags
1253mask, but
fea681da 1254.B CLONE_VM
2e8a7fb3 1255was not.
d6bec36e
MK
1256(Since Linux 2.6.0.)
1257.\" Precisely: Linux 2.6.0-test6
fea681da
MK
1258.TP
1259.B EINVAL
1260.B CLONE_THREAD
16853a31
MK
1261was specified in the
1262.I flags
1263mask, but
fea681da 1264.B CLONE_SIGHAND
6387216b
MK
1265was not.
1266(Since Linux 2.5.35.)
29546c24
MK
1267.\" .TP
1268.\" .B EINVAL
1269.\" Precisely one of
1270.\" .B CLONE_DETACHED
1271.\" and
1272.\" .B CLONE_THREAD
6387216b
MK
1273.\" was specified.
1274.\" (Since Linux 2.6.0-test6.)
fea681da
MK
1275.TP
1276.B EINVAL
d6868c69 1277.B CLONE_THREAD
16853a31
MK
1278was specified in the
1279.I flags
1280mask, but the current process previously called
d6868c69
JH
1281.BR unshare (2)
1282with the
1283.B CLONE_NEWPID
1284flag or used
1285.BR setns (2)
1286to reassociate itself with a PID namespace.
1287.TP
1288.B EINVAL
d34e5645 1289.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
fea681da
MK
1290Both
1291.B CLONE_FS
1292and
1293.B CLONE_NEWNS
16853a31 1294were specified in the
1ae6b2c7 1295.I flags
16853a31 1296mask.
fea681da 1297.TP
d34e5645
MK
1298.BR EINVAL " (since Linux 3.9)"
1299Both
1300.B CLONE_NEWUSER
1301and
1302.B CLONE_FS
16853a31 1303were specified in the
1ae6b2c7 1304.I flags
16853a31 1305mask.
d34e5645 1306.TP
fea681da 1307.B EINVAL
82ee147a 1308Both
667417b3
MK
1309.B CLONE_NEWIPC
1310and
1311.B CLONE_SYSVSEM
16853a31 1312were specified in the
1ae6b2c7 1313.I flags
16853a31 1314mask.
667417b3
MK
1315.TP
1316.B EINVAL
f0007192 1317One (or both) of
1ae6b2c7 1318.B CLONE_NEWPID
f0007192 1319or
1ae6b2c7 1320.B CLONE_NEWUSER
f0007192 1321and one (or both) of
1ae6b2c7 1322.B CLONE_THREAD
f0007192 1323or
1ae6b2c7 1324.B CLONE_PARENT
16853a31 1325were specified in the
1ae6b2c7 1326.I flags
16853a31 1327mask.
82ee147a 1328.TP
be479fdf
MK
1329.BR EINVAL " (since Linux 2.6.32)"
1330.\" commit 123be07b0b399670a7cc3d82fef0cb4f93ef885c
1ae6b2c7 1331.B CLONE_PARENT
be479fdf
MK
1332was specified, and the caller is an init process.
1333.TP
82ee147a 1334.B EINVAL
d4748fad 1335Returned by the glibc
edcc65ff 1336.BR clone ()
d4748fad 1337wrapper function when
1ae6b2c7 1338.I fn
d4748fad 1339or
1ae6b2c7 1340.I stack
d4748fad 1341is specified as NULL.
fea681da 1342.TP
28cad2c1 1343.B EINVAL
1ae6b2c7 1344.B CLONE_NEWIPC
16853a31 1345was specified in the
1ae6b2c7 1346.I flags
16853a31 1347mask,
667417b3
MK
1348but the kernel was not configured with the
1349.B CONFIG_SYSVIPC
1350and
1ae6b2c7 1351.B CONFIG_IPC_NS
667417b3
MK
1352options.
1353.TP
1354.B EINVAL
1ae6b2c7 1355.B CLONE_NEWNET
16853a31 1356was specified in the
1ae6b2c7 1357.I flags
16853a31 1358mask,
163bf178
MK
1359but the kernel was not configured with the
1360.B CONFIG_NET_NS
1361option.
1362.TP
1363.B EINVAL
1ae6b2c7 1364.B CLONE_NEWPID
16853a31 1365was specified in the
1ae6b2c7 1366.I flags
16853a31 1367mask,
28cad2c1
MK
1368but the kernel was not configured with the
1369.B CONFIG_PID_NS
1370option.
1371.TP
43ce9dda 1372.B EINVAL
1ae6b2c7 1373.B CLONE_NEWUSER
16853a31 1374was specified in the
1ae6b2c7 1375.I flags
16853a31 1376mask,
231d0bbe
MK
1377but the kernel was not configured with the
1378.B CONFIG_USER_NS
1379option.
1380.TP
1381.B EINVAL
1ae6b2c7 1382.B CLONE_NEWUTS
16853a31 1383was specified in the
1ae6b2c7 1384.I flags
16853a31 1385mask,
43ce9dda 1386but the kernel was not configured with the
832fe8ea 1387.B CONFIG_UTS_NS
43ce9dda
MK
1388option.
1389.TP
c550a897 1390.B EINVAL
81c2368f 1391.I stack
c550a897
MK
1392is not aligned to a suitable boundary for this architecture.
1393For example, on aarch64,
81c2368f 1394.I stack
c550a897
MK
1395must be a multiple of 16.
1396.TP
bc03b116 1397.BR EINVAL " (" clone3 "() only)"
baa435c6
MK
1398.B CLONE_DETACHED
1399was specified in the
1400.I flags
1401mask.
1402.TP
bc03b116 1403.BR EINVAL " (" clone "() only)"
9f938981
CB
1404.B CLONE_PIDFD
1405was specified together with
16853a31
MK
1406.B CLONE_DETACHED
1407in the
1408.I flags
1409mask.
9f938981
CB
1410.TP
1411.B EINVAL
1412.B CLONE_PIDFD
1413was specified together with
16853a31
MK
1414.B CLONE_THREAD
1415in the
1416.I flags
1417mask.
9f938981 1418.TP
faa0e55a 1419.BR "EINVAL " "(" clone "() only)"
9f938981
CB
1420.B CLONE_PIDFD
1421was specified together with
16853a31
MK
1422.B CLONE_PARENT_SETTID
1423in the
1424.I flags
1425mask.
9f938981 1426.TP
bf031aaa
AR
1427.BR EINVAL " (" clone3 "() only)"
1428.I set_tid_size
ee8bb310 1429is greater than the number of nested PID namespaces.
bf031aaa
AR
1430.TP
1431.BR EINVAL " (" clone3 "() only)"
2a2b2a5d 1432One of the PIDs specified in
bf031aaa 1433.I set_tid
2a2b2a5d 1434was an invalid.
bf031aaa 1435.TP
ba9ae75d
MK
1436.BR EINVAL " (AArch64 only, Linux 4.6 and earlier)"
1437.I stack
1874ca39 1438was not aligned to a 128-bit boundary.
ba9ae75d 1439.TP
fea681da
MK
1440.B ENOMEM
1441Cannot allocate sufficient memory to allocate a task structure for the
1442child, or to copy those parts of the caller's context that need to be
1443copied.
1444.TP
b20e22ae
MK
1445.BR ENOSPC " (since Linux 3.7)"
1446.\" commit f2302505775fd13ba93f034206f1e2a587017929
1447.B CLONE_NEWPID
16853a31
MK
1448was specified in the
1449.I flags
1450mask,
b20e22ae
MK
1451but the limit on the nesting depth of PID namespaces
1452would have been exceeded; see
1453.BR pid_namespaces (7).
1454.TP
b5742ecc
MK
1455.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
1456.B CLONE_NEWUSER
16853a31 1457was specified in the
1ae6b2c7 1458.I flags
16853a31 1459mask, and the call would cause the limit on the number of
b5742ecc
MK
1460nested user namespaces to be exceeded.
1461See
1462.BR user_namespaces (7).
efeece04 1463.IP
b5742ecc
MK
1464From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
1465.BR EUSERS .
1466.TP
2f7a331e 1467.BR ENOSPC " (since Linux 4.9)"
16853a31 1468One of the values in the
2f7a331e 1469.I flags
16853a31 1470mask specified the creation of a new user namespace,
2f7a331e 1471but doing so would have caused the limit defined by the corresponding file in
1ae6b2c7 1472.I /proc/sys/user
2f7a331e
MK
1473to be exceeded.
1474For further details, see
1475.BR namespaces (7).
1476.TP
0b065634 1477.BR EOPNOTSUPP " (" clone3 "() only)"
6ba79da9
MK
1478.B CLONE_INTO_CGROUP
1479was specified in
1480.IR cl_args.flags ,
1481but the file descriptor specified in
1ae6b2c7 1482.I cl_args.cgroup
6ba79da9 1483refers to a version 2 cgroup that is in the
1ae6b2c7 1484.I domain invalid
6ba79da9
MK
1485state.
1486.TP
fea681da 1487.B EPERM
aa825b59 1488.BR CLONE_NEWCGROUP ,
667417b3 1489.BR CLONE_NEWIPC ,
163bf178 1490.BR CLONE_NEWNET ,
43ce9dda
MK
1491.BR CLONE_NEWNS ,
1492.BR CLONE_NEWPID ,
82ee147a 1493or
1ae6b2c7 1494.B CLONE_NEWUTS
00b08db3 1495was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
fea681da
MK
1496.TP
1497.B EPERM
1498.B CLONE_PID
1499was specified by a process other than process 0.
1c173eb3 1500(This error occurs only on Linux 2.5.15 and earlier.)
365d292a
MK
1501.TP
1502.B EPERM
1ae6b2c7 1503.B CLONE_NEWUSER
16853a31 1504was specified in the
1ae6b2c7 1505.I flags
16853a31 1506mask,
365d292a
MK
1507but either the effective user ID or the effective group ID of the caller
1508does not have a mapping in the parent namespace (see
f58fb24f 1509.BR user_namespaces (7)).
6fd119e7 1510.TP
ac007938
MK
1511.BR EPERM " (since Linux 3.9)"
1512.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
11a38815 1513.B CLONE_NEWUSER
16853a31 1514was specified in the
ac007938 1515.I flags
16853a31 1516mask and the caller is in a chroot environment
ac007938
MK
1517.\" FIXME What is the rationale for this restriction?
1518(i.e., the caller's root directory does not match the root directory
1519of the mount namespace in which it resides).
1520.TP
bf031aaa
AR
1521.BR EPERM " (" clone3 "() only)"
1522.I set_tid_size
1523was greater than zero, and the caller lacks the
1524.B CAP_SYS_ADMIN
1525capability in one or more of the user namespaces that own the
1526corresponding PID namespaces.
1527.TP
6717ee86
MK
1528.BR ERESTARTNOINTR " (since Linux 2.6.17)"
1529.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
1530System call was interrupted by a signal and will be restarted.
1531(This can be seen only during a trace.)
1532.TP
b5742ecc 1533.BR EUSERS " (Linux 3.11 to Linux 4.8)"
6fd119e7 1534.B CLONE_NEWUSER
16853a31 1535was specified in the
1ae6b2c7 1536.I flags
16853a31 1537mask,
b5742ecc
MK
1538and the limit on the number of nested user namespaces would be exceeded.
1539See the discussion of the
1ae6b2c7 1540.B ENOSPC
b5742ecc 1541error above.
faa0e55a
MK
1542.SH VERSIONS
1543The
1544.BR clone3 ()
1545system call first appeared in Linux 5.3.
92b72224
MK
1546.\" There is no entry for
1547.\" .BR clone ()
1548.\" in libc5.
1549.\" glibc2 provides
1550.\" .BR clone ()
1551.\" as described in this manual page.
3113c7f3 1552.SH STANDARDS
faa0e55a
MK
1553These system calls
1554are Linux-specific and should not be used in programs
a1d5f77c 1555intended to be portable.
fea681da 1556.SH NOTES
673d16da
MK
1557One use of these systems calls
1558is to implement threads: multiple flows of control in a program that
1559run concurrently in a shared address space.
1560.PP
673d16da
MK
1561Note that the glibc
1562.BR clone ()
1563wrapper function makes some changes
1564in the memory pointed to by
1565.I stack
1566(changes required to set the stack up correctly for the child)
1567.I before
1568invoking the
1569.BR clone ()
1570system call.
1571So, in cases where
1572.BR clone ()
1573is used to recursively create children,
1574do not use the buffer employed for the parent's stack
1575as the stack of the child.
1576.PP
79bdcc4a
MK
1577The
1578.BR kcmp (2)
1579system call can be used to test whether two processes share various
49dba87f 1580resources such as a file descriptor table,
79bdcc4a 1581System V semaphore undo operations, or a virtual address space.
efeece04 1582.PP
c471c363
MK
1583Handlers registered using
1584.BR pthread_atfork (3)
5261b0fe 1585are not executed during a clone call.
efeece04 1586.PP
ca8b1e32 1587In the Linux 2.4.x series,
fd8a5be4
MK
1588.B CLONE_THREAD
1589generally does not make the parent of the new thread the same
1590as the parent of the calling process.
1591However, for kernel versions 2.4.7 to 2.4.18 the
1592.B CLONE_THREAD
1593flag implied the
c13182ef 1594.B CLONE_PARENT
ca8b1e32 1595flag (as in Linux 2.6.0 and later).
efeece04 1596.PP
34ccb744 1597On i386,
a5a997ca
MK
1598.BR clone ()
1599should not be called through vsyscall, but directly through
1600.IR "int $0x80" .
673d16da
MK
1601.\"
1602.SS C library/kernel differences
1603The raw
1604.BR clone ()
1605system call corresponds more closely to
1606.BR fork (2)
1607in that execution in the child continues from the point of the
1608call.
1609As such, the
1610.I fn
1611and
1612.I arg
1613arguments of the
1614.BR clone ()
1615wrapper function are omitted.
1616.PP
1617In contrast to the glibc wrapper, the raw
1618.BR clone ()
1619system call accepts NULL as a
1620.I stack
1621argument (and
1622.BR clone3 ()
1623likewise allows
1624.I cl_args.stack
1625to be NULL).
1626In this case, the child uses a duplicate of the parent's stack.
1627(Copy-on-write semantics ensure that the child gets separate copies
1628of stack pages when either process modifies the stack.)
1629In this case, for correct operation, the
1630.B CLONE_VM
1631option should not be specified.
1632(If the child
1633.I shares
1634the parent's memory because of the use of the
1ae6b2c7 1635.B CLONE_VM
673d16da
MK
1636flag,
1637then no copy-on-write duplication occurs and chaos is likely to result.)
1638.PP
1639The order of the arguments also differs in the raw system call,
1640and there are variations in the arguments across architectures,
1641as detailed in the following paragraphs.
1642.PP
1643The raw system call interface on x86-64 and some other architectures
1644(including sh, tile, and alpha) is:
1645.PP
161b8eda 1646.in +4n
673d16da
MK
1647.EX
1648.BI "long clone(unsigned long " flags ", void *" stack ,
1649.BI " int *" parent_tid ", int *" child_tid ,
1650.BI " unsigned long " tls );
1651.EE
1652.in
1653.PP
1654On x86-32, and several other common architectures
1655(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
1656and MIPS),
1657.\" CONFIG_CLONE_BACKWARDS
1658the order of the last two arguments is reversed:
1659.PP
161b8eda 1660.in +4n
673d16da
MK
1661.EX
1662.BI "long clone(unsigned long " flags ", void *" stack ,
1663.BI " int *" parent_tid ", unsigned long " tls ,
1664.BI " int *" child_tid );
1665.EE
1666.in
1667.PP
1668On the cris and s390 architectures,
1669.\" CONFIG_CLONE_BACKWARDS2
1670the order of the first two arguments is reversed:
1671.PP
161b8eda 1672.in +4n
673d16da
MK
1673.EX
1674.BI "long clone(void *" stack ", unsigned long " flags ,
1675.BI " int *" parent_tid ", int *" child_tid ,
1676.BI " unsigned long " tls );
1677.EE
1678.in
1679.PP
1680On the microblaze architecture,
1681.\" CONFIG_CLONE_BACKWARDS3
1682an additional argument is supplied:
1683.PP
161b8eda 1684.in +4n
673d16da
MK
1685.EX
1686.BI "long clone(unsigned long " flags ", void *" stack ,
1687.BI " int " stack_size , "\fR /* Size of stack */"
1688.BI " int *" parent_tid ", int *" child_tid ,
1689.BI " unsigned long " tls );
1690.EE
1691.in
1692.\"
1693.SS blackfin, m68k, and sparc
1694.\" Mike Frysinger noted in a 2013 mail:
1695.\" these arches don't define __ARCH_WANT_SYS_CLONE:
1696.\" blackfin ia64 m68k sparc
1697The argument-passing conventions on
1698blackfin, m68k, and sparc are different from the descriptions above.
1699For details, see the kernel (and glibc) source.
1700.SS ia64
1701On ia64, a different interface is used:
1702.PP
161b8eda 1703.in +4n
673d16da 1704.EX
77ca5b1d 1705.BI "int __clone2(int (*" "fn" ")(void *),"
673d16da 1706.BI " void *" stack_base ", size_t " stack_size ,
77ca5b1d 1707.BI " int " flags ", void *" "arg" ", ..."
673d16da
MK
1708.BI " /* pid_t *" parent_tid ", struct user_desc *" tls ,
1709.BI " pid_t *" child_tid " */ );"
1710.EE
1711.in
1712.PP
1713The prototype shown above is for the glibc wrapper function;
1714for the system call itself,
1715the prototype can be described as follows (it is identical to the
1716.BR clone ()
1717prototype on microblaze):
1718.PP
161b8eda 1719.in +4n
673d16da
MK
1720.EX
1721.BI "long clone2(unsigned long " flags ", void *" stack_base ,
1722.BI " int " stack_size , "\fR /* Size of stack */"
1723.BI " int *" parent_tid ", int *" child_tid ,
1724.BI " unsigned long " tls );
1725.EE
1726.in
1727.PP
1728.BR __clone2 ()
1729operates in the same way as
1730.BR clone (),
1731except that
1732.I stack_base
1733points to the lowest address of the child's stack area,
1734and
1735.I stack_size
1736specifies the size of the stack pointed to by
1737.IR stack_base .
1738.SS Linux 2.4 and earlier
1739In Linux 2.4 and earlier,
1740.BR clone ()
1741does not take arguments
1742.IR parent_tid ,
1743.IR tls ,
1744and
1745.IR child_tid .
31830ef0 1746.SH BUGS
abcf3b1d
MK
1747GNU C library versions 2.3.4 up to and including 2.24
1748contained a wrapper function for
0bfa087b 1749.BR getpid (2)
abcf3b1d
MK
1750that performed caching of PIDs.
1751This caching relied on support in the glibc wrapper for
c60237c9 1752.BR clone (),
abcf3b1d
MK
1753but limitations in the implementation
1754meant that the cache was not up to date in some circumstances.
c60237c9 1755In particular,
abcf3b1d 1756if a signal was delivered to the child immediately after the
c60237c9
MK
1757.BR clone ()
1758call, then a call to
0b80cf56 1759.BR getpid (2)
abcf3b1d 1760in a handler for the signal could return the PID
c60237c9 1761of the calling process ("the parent"),
abcf3b1d 1762if the clone wrapper had not yet had a chance to update the PID
c60237c9
MK
1763cache in the child.
1764(This discussion ignores the case where the child was created using
9291ce36 1765.BR CLONE_THREAD ,
c60237c9 1766when
0b80cf56 1767.BR getpid (2)
c60237c9
MK
1768.I should
1769return the same value in the child and in the process that called
1770.BR clone (),
a1d48abb 1771since the caller and the child are in the same thread group.
e7d807b7 1772The stale-cache problem also does not occur if the
a1d48abb
JR
1773.I flags
1774argument includes
1775.BR CLONE_VM .)
abcf3b1d
MK
1776To get the truth, it was sometimes necessary to use code such as the following:
1777.PP
47f743f1
MK
1778.in +4n
1779.EX
1780#include <syscall.h>
31830ef0 1781
47f743f1 1782pid_t mypid;
31830ef0 1783
47f743f1
MK
1784mypid = syscall(SYS_getpid);
1785.EE
1786.in
c60237c9
MK
1787.\" See also the following bug reports
1788.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
1789.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
abcf3b1d
MK
1790.PP
1791Because of the stale-cache problem, as well as other problems noted in
1792.BR getpid (2),
1793the PID caching feature was removed in glibc 2.25.
a14af333 1794.SH EXAMPLES
8c7b566c 1795The following program demonstrates the use of
9c13072a 1796.BR clone ()
8c7b566c
MK
1797to create a child process that executes in a separate UTS namespace.
1798The child changes the hostname in its UTS namespace.
1799Both parent and child then display the system hostname,
1800making it possible to see that the hostname
1801differs in the UTS namespaces of the parent and child.
1802For an example of the use of this program, see
1803.BR setns (2).
99c3a000
MK
1804.PP
1805Within the sample program, we allocate the memory that is to
1806be used for the child's stack using
1807.BR mmap (2)
1808rather than
1809.BR malloc (3)
1810for the following reasons:
1811.IP * 3
1812.BR mmap (2)
1813allocates a block of memory that starts on a page
1814boundary and is a multiple of the page size.
1815This is useful if we want to establish a guard page (a page with protection
1816.BR PROT_NONE )
1817at the end of the stack using
1818.BR mprotect (2).
1819.IP *
1820We can specify the
1ae6b2c7 1821.B MAP_STACK
99c3a000
MK
1822flag to request a mapping that is suitable for a stack.
1823For the moment, this flag is a no-op on Linux,
1824but it exists and has effect on some other systems,
1825so we should include it for portability.
f30b7415 1826.SS Program source
33857069 1827.\" SRC BEGIN (clone.c)
e7d0bb47 1828.EX
8c7b566c 1829#define _GNU_SOURCE
8c7b566c 1830#include <sched.h>
80ae7514 1831#include <signal.h>
8eb90116 1832#include <stdint.h>
8c7b566c
MK
1833#include <stdio.h>
1834#include <stdlib.h>
80ae7514 1835#include <string.h>
99c3a000 1836#include <sys/mman.h>
80ae7514
AC
1837#include <sys/utsname.h>
1838#include <sys/wait.h>
1839#include <unistd.h>
8c7b566c 1840
d1a71985 1841#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \e
8c7b566c
MK
1842 } while (0)
1843
1844static int /* Start function for cloned child */
1845childFunc(void *arg)
1846{
1847 struct utsname uts;
1848
c6beb8a1 1849 /* Change hostname in UTS namespace of child. */
8c7b566c
MK
1850
1851 if (sethostname(arg, strlen(arg)) == \-1)
1852 errExit("sethostname");
1853
c6beb8a1 1854 /* Retrieve and display hostname. */
8c7b566c
MK
1855
1856 if (uname(&uts) == \-1)
1857 errExit("uname");
d1a71985 1858 printf("uts.nodename in child: %s\en", uts.nodename);
8c7b566c
MK
1859
1860 /* Keep the namespace open for a while, by sleeping.
1861 This allows some experimentation\-\-for example, another
1862 process might join the namespace. */
9f1b9726 1863
8c7b566c
MK
1864 sleep(200);
1865
1866 return 0; /* Child terminates now */
1867}
1868
1869#define STACK_SIZE (1024 * 1024) /* Stack size for cloned child */
1870
1871int
1872main(int argc, char *argv[])
1873{
1874 char *stack; /* Start of stack buffer */
1875 char *stackTop; /* End of stack buffer */
1876 pid_t pid;
1877 struct utsname uts;
1878
1879 if (argc < 2) {
d1a71985 1880 fprintf(stderr, "Usage: %s <child\-hostname>\en", argv[0]);
8c7b566c
MK
1881 exit(EXIT_SUCCESS);
1882 }
1883
c6beb8a1 1884 /* Allocate memory to be used for the stack of the child. */
8c7b566c 1885
99c3a000
MK
1886 stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
1887 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, \-1, 0);
8eea66b8 1888 if (stack == MAP_FAILED)
99c3a000
MK
1889 errExit("mmap");
1890
8c7b566c
MK
1891 stackTop = stack + STACK_SIZE; /* Assume stack grows downward */
1892
1893 /* Create child that has its own UTS namespace;
c6beb8a1 1894 child commences execution in childFunc(). */
8c7b566c
MK
1895
1896 pid = clone(childFunc, stackTop, CLONE_NEWUTS | SIGCHLD, argv[1]);
1897 if (pid == \-1)
1898 errExit("clone");
8eb90116 1899 printf("clone() returned %jd\en", (intmax_t) pid);
8c7b566c
MK
1900
1901 /* Parent falls through to here */
1902
1903 sleep(1); /* Give child time to change its hostname */
1904
9f1b9726 1905 /* Display hostname in parent\(aqs UTS namespace. This will be
8c7b566c
MK
1906 different from hostname in child\(aqs UTS namespace. */
1907
1908 if (uname(&uts) == \-1)
1909 errExit("uname");
d1a71985 1910 printf("uts.nodename in parent: %s\en", uts.nodename);
8c7b566c
MK
1911
1912 if (waitpid(pid, NULL, 0) == \-1) /* Wait for child */
1913 errExit("waitpid");
d1a71985 1914 printf("child has terminated\en");
8c7b566c
MK
1915
1916 exit(EXIT_SUCCESS);
1917}
e7d0bb47 1918.EE
33857069 1919.\" SRC END
47297adb 1920.SH SEE ALSO
fea681da 1921.BR fork (2),
2b44301c 1922.BR futex (2),
fea681da
MK
1923.BR getpid (2),
1924.BR gettid (2),
6f8746e4 1925.BR kcmp (2),
99c3a000 1926.BR mmap (2),
d8837668 1927.BR pidfd_open (2),
f2d0bbf1 1928.BR set_thread_area (2),
2b44301c 1929.BR set_tid_address (2),
8403481f 1930.BR setns (2),
f2d0bbf1 1931.BR tkill (2),
5cc01e9c 1932.BR unshare (2),
fea681da 1933.BR wait (2),
3616b7c0 1934.BR capabilities (7),
41096af1 1935.BR namespaces (7),
3616b7c0 1936.BR pthreads (7)