]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/mount_setattr.2
Many pages: Use correct letter case in page titles (TH)
[thirdparty/man-pages.git] / man2 / mount_setattr.2
CommitLineData
f3a5ba3f
CB
1.\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
2.\"
5fbde956 3.\" SPDX-License-Identifier: Linux-man-pages-copyleft
f3a5ba3f 4.\"
4c1c5274 5.TH mount_setattr 2 (date) "Linux man-pages (unreleased)"
f3a5ba3f 6.SH NAME
70a9d0fe 7mount_setattr \- change properties of a mount or mount tree
c084848f
AC
8.SH LIBRARY
9Standard C library
8fc3b2cf 10.RI ( libc ", " \-lc )
f3a5ba3f
CB
11.SH SYNOPSIS
12.nf
13
14.PP
15.BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
63097cb7 16.BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
f3a5ba3f
CB
17.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
18.B #include <unistd.h>
19.PP
5a9ebeba 20.BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" pathname ,
63097cb7
AC
21.BI " unsigned int " flags ", struct mount_attr *" attr \
22", size_t " size );
f3a5ba3f
CB
23.fi
24.PP
25.IR Note :
26glibc provides no wrapper for
27.BR mount_setattr (),
28necessitating the use of
29.BR syscall (2).
30.SH DESCRIPTION
31The
63097cb7 32.BR mount_setattr ()
133e6b16 33system call changes the mount properties of a mount or an entire mount tree.
f3a5ba3f 34If
5a9ebeba 35.I pathname
f3a5ba3f 36is a relative pathname,
63097cb7
AC
37then it is interpreted relative to
38the directory referred to by the file descriptor
30397d7d 39.IR dirfd .
f3a5ba3f 40If
30397d7d 41.I dirfd
f3a5ba3f 42is the special value
133e6b16 43.BR AT_FDCWD ,
f3a5ba3f 44then
5a9ebeba 45.I pathname
63097cb7
AC
46is interpreted relative to
47the current working directory of the calling process.
f3a5ba3f 48If
5a9ebeba 49.I pathname
f3a5ba3f 50is the empty string and
63097cb7 51.B AT_EMPTY_PATH
f3a5ba3f 52is specified in
63097cb7 53.IR flags ,
f3a5ba3f 54then the mount properties of the mount identified by
30397d7d 55.I dirfd
f3a5ba3f 56are changed.
717c3a7d
MK
57(See
58.BR openat (2)
59for an explanation of why the
60.I dirfd
61argument is useful.)
f3a5ba3f
CB
62.PP
63The
63097cb7 64.BR mount_setattr ()
f3a5ba3f 65system call uses an extensible structure
63097cb7 66.RI ( "struct mount_attr" )
f3a5ba3f
CB
67to allow for future extensions.
68Any non-flag extensions to
63097cb7 69.BR mount_setattr ()
133e6b16 70will be implemented as new fields appended to the this structure,
f3a5ba3f
CB
71with a zero value in a new field resulting in the kernel behaving
72as though that extension field was not present.
73Therefore,
74the caller
75.I must
76zero-fill this structure on initialization.
133e6b16 77See the "Extensibility" subsection under
f3a5ba3f
CB
78.B NOTES
79for more details.
80.PP
81The
82.I size
83argument should usually be specified as
84.IR "sizeof(struct mount_attr)" .
70a9d0fe 85However, if the caller is using a kernel that supports an extended
133e6b16 86.IR "struct mount_attr" ,
70a9d0fe 87but the caller does not intend to make use of these features,
5303eb87 88it is possible to pass the size of an earlier
70a9d0fe 89version of the structure together with the extended structure.
5303eb87 90This allows the kernel to not copy later parts of the structure
63097cb7 91that aren't used anyway.
f3a5ba3f 92With each extension that changes the size of
133e6b16
MK
93.IR "struct mount_attr" ,
94the kernel will expose a definition of the form
63097cb7
AC
95.BI MOUNT_ATTR_SIZE_VER number\c
96\&.
133e6b16 97For example, the macro for the size of the initial version of
f3a5ba3f
CB
98.I struct mount_attr
99is
100.BR MOUNT_ATTR_SIZE_VER0 .
101.PP
102The
103.I flags
5a9ebeba 104argument can be used to alter the pathname resolution behavior.
f3a5ba3f
CB
105The supported values are:
106.TP
107.B AT_EMPTY_PATH
108If
5a9ebeba 109.I pathname
63097cb7
AC
110is the empty string,
111change the mount properties on
30397d7d 112.I dirfd
f3a5ba3f
CB
113itself.
114.TP
115.B AT_RECURSIVE
116Change the mount properties of the entire mount tree.
117.TP
118.B AT_SYMLINK_NOFOLLOW
133e6b16 119Don't follow trailing symbolic links.
f3a5ba3f
CB
120.TP
121.B AT_NO_AUTOMOUNT
122Don't trigger automounts.
123.PP
124The
125.I attr
126argument of
63097cb7 127.BR mount_setattr ()
f3a5ba3f
CB
128is a structure of the following form:
129.PP
130.in +4n
131.EX
132struct mount_attr {
115b4e0e
AC
133 __u64 attr_set; /* Mount properties to set */
134 __u64 attr_clr; /* Mount properties to clear */
135 __u64 propagation; /* Mount propagation type */
136 __u64 userns_fd; /* User namespace file descriptor */
f3a5ba3f
CB
137};
138.EE
139.in
140.PP
141The
142.I attr_set
143and
144.I attr_clr
63097cb7
AC
145members are used to specify the mount properties that
146are supposed to be set or cleared for a mount or mount tree.
f3a5ba3f
CB
147Flags set in
148.I attr_set
63097cb7
AC
149enable a property on a mount or mount tree,
150and flags set in
f3a5ba3f
CB
151.I attr_clr
152remove a property from a mount or mount tree.
153.PP
63097cb7
AC
154When changing mount properties,
155the kernel will first clear the flags specified
f3a5ba3f
CB
156in the
157.I attr_clr
63097cb7
AC
158field,
159and then set the flags specified in the
f3a5ba3f 160.I attr_set
70a9d0fe
MK
161field.
162For example, these settings:
f3a5ba3f
CB
163.PP
164.in +4n
165.EX
166struct mount_attr attr = {
167 .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
168 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
169};
70a9d0fe
MK
170.EE
171.in
172.PP
173are equivalent to the following steps:
174.PP
175.in +4n
176.EX
f3a5ba3f
CB
177unsigned int current_mnt_flags = mnt->mnt_flags;
178
179/*
180 * Clear all flags set in .attr_clr,
181 * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
182 */
183current_mnt_flags &= ~attr->attr_clr;
184
185/*
186 * Now set all flags set in .attr_set,
187 * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
188 */
189current_mnt_flags |= attr->attr_set;
190
191mnt->mnt_flags = current_mnt_flags;
192.EE
193.in
194.PP
70a9d0fe 195As a result of this change, the mount or mount tree (a) is read-only;
133e6b16
MK
196(b) blocks the execution of set-user-ID and set-group-ID programs;
197(c) allows execution of programs; and (d) allows access to devices.
198.PP
f3a5ba3f
CB
199Multiple changes with the same set of flags requested
200in
201.I attr_clr
202and
203.I attr_set
204are guaranteed to be idempotent after the changes have been applied.
205.PP
206The following mount attributes can be specified in the
207.I attr_set
208or
209.I attr_clr
210fields:
211.TP
212.B MOUNT_ATTR_RDONLY
213If set in
133e6b16
MK
214.IR attr_set ,
215makes the mount read-only.
216If set in
217.IR attr_clr ,
f3a5ba3f
CB
218removes the read-only setting if set on the mount.
219.TP
220.B MOUNT_ATTR_NOSUID
221If set in
133e6b16
MK
222.IR attr_set ,
223causes the mount not to honor the set-user-ID and set-group-ID mode bits and
224file capabilities when executing programs.
f3a5ba3f 225If set in
133e6b16 226.IR attr_clr ,
f3a5ba3f
CB
227clears the set-user-ID, set-group-ID,
228and file capability restriction if set on this mount.
229.TP
230.B MOUNT_ATTR_NODEV
231If set in
133e6b16
MK
232.IR attr_set ,
233prevents access to devices on this mount.
234If set in
235.IR attr_clr ,
236removes the restriction that prevented accessing devices on this mount.
f3a5ba3f 237.TP
63097cb7 238.B MOUNT_ATTR_NOEXEC
f3a5ba3f 239If set in
133e6b16
MK
240.IR attr_set ,
241prevents executing programs on this mount.
242If set in
243.IR attr_clr ,
63097cb7 244removes the restriction that prevented executing programs on this mount.
f3a5ba3f 245.TP
63097cb7 246.B MOUNT_ATTR_NOSYMFOLLOW
f3a5ba3f 247If set in
133e6b16
MK
248.IR attr_set ,
249prevents following symbolic links on this mount.
250If set in
251.IR attr_clr ,
252removes the restriction that prevented following symbolic links on this mount.
f3a5ba3f
CB
253.TP
254.B MOUNT_ATTR_NODIRATIME
255If set in
133e6b16
MK
256.IR attr_set ,
257prevents updating access time for directories on this mount.
258If set in
259.IR attr_clr ,
63097cb7 260removes the restriction that prevented updating access time for directories.
f3a5ba3f 261Note that
63097cb7 262.B MOUNT_ATTR_NODIRATIME
133e6b16 263can be combined with other access-time settings
63097cb7 264and is implied by the noatime setting.
133e6b16 265All other access-time settings are mutually exclusive.
f3a5ba3f 266.TP
133e6b16 267.BR MOUNT_ATTR__ATIME " - changing access-time settings"
d27bcddc
MK
268The access-time values listed below are an enumeration that
269includes the value zero, expressed in the bits defined by the mask
270.BR MOUNT_ATTR__ATIME .
271Even though these bits are an enumeration
272(in contrast to the other mount flags such as
133e6b16 273.BR MOUNT_ATTR_NOEXEC ),
f3a5ba3f
CB
274they are nonetheless passed in
275.I attr_set
276and
277.I attr_clr
278for consistency with
63097cb7 279.BR fsmount (2),
f3a5ba3f
CB
280which introduced this behavior.
281.IP
133e6b16 282Note that,
d27bcddc
MK
283since the access-time values are an enumeration rather than bit values,
284a caller wanting to transition to a different access-time setting
285cannot simply specify the access-time setting in
286.IR attr_set ,
287but must also include
f3a5ba3f
CB
288.B MOUNT_ATTR__ATIME
289in the
290.I attr_clr
291field.
292The kernel will verify that
63097cb7 293.B MOUNT_ATTR__ATIME
f3a5ba3f 294isn't partially set in
1ae6b2c7 295.I attr_clr
d27bcddc
MK
296(i.e., either all bits in the
297.B MOUNT_ATTR__ATIME
298bit field are either set or clear), and that
f3a5ba3f 299.I attr_set
133e6b16 300doesn't have any access-time bits set if
63097cb7 301.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
302isn't set in
303.IR attr_clr .
304.RS
305.TP
306.B MOUNT_ATTR_RELATIME
307When a file is accessed via this mount,
63097cb7
AC
308update the file's last access time (atime)
309only if the current value of atime is less than or equal to
310the file's last modification time (mtime) or last status change time (ctime).
f3a5ba3f 311.IP
133e6b16 312To enable this access-time setting on a mount or mount tree,
63097cb7 313.B MOUNT_ATTR_RELATIME
f3a5ba3f
CB
314must be set in
315.I attr_set
316and
63097cb7 317.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
318must be set in the
319.I attr_clr
320field.
321.TP
63097cb7 322.B MOUNT_ATTR_NOATIME
f3a5ba3f
CB
323Do not update access times for (all types of) files on this mount.
324.IP
133e6b16 325To enable this access-time setting on a mount or mount tree,
63097cb7 326.B MOUNT_ATTR_NOATIME
f3a5ba3f
CB
327must be set in
328.I attr_set
329and
63097cb7 330.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
331must be set in the
332.I attr_clr
333field.
334.TP
63097cb7
AC
335.B MOUNT_ATTR_STRICTATIME
336Always update the last access time (atime)
337when files are accessed on this mount.
f3a5ba3f 338.IP
133e6b16 339To enable this access-time setting on a mount or mount tree,
63097cb7 340.B MOUNT_ATTR_STRICTATIME
f3a5ba3f
CB
341must be set in
342.I attr_set
343and
63097cb7 344.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
345must be set in the
346.I attr_clr
347field.
348.RE
349.TP
63097cb7 350.B MOUNT_ATTR_IDMAP
f3a5ba3f 351If set in
133e6b16 352.IR attr_set ,
3643106e 353creates an ID-mapped mount.
133e6b16
MK
354The ID mapping is taken from the user namespace specified in
355.I userns_fd
356and attached to the mount.
357.IP
63097cb7 358Since it is not supported to
3643106e 359change the ID mapping of a mount after it has been ID mapped,
f3a5ba3f
CB
360it is invalid to specify
361.B MOUNT_ATTR_IDMAP
362in
363.IR attr_clr .
f3a5ba3f 364.IP
538a491e 365For further details, see the subsection "ID-mapped mounts" under NOTES.
f3a5ba3f
CB
366.PP
367The
368.I propagation
369field is used to specify the propagation type of the mount or mount tree.
29fdc88d
MK
370This field either has the value zero,
371meaning leave the propagation type unchanged, or it has one of
372the following values:
f3a5ba3f
CB
373.TP
374.B MS_PRIVATE
375Turn all mounts into private mounts.
f3a5ba3f
CB
376.TP
377.B MS_SHARED
378Turn all mounts into shared mounts.
f3a5ba3f
CB
379.TP
380.B MS_SLAVE
381Turn all mounts into dependent mounts.
f3a5ba3f
CB
382.TP
383.B MS_UNBINDABLE
705bf534 384Turn all mounts into unbindable mounts.
38635f0b 385.PP
705bf534 386For further details on the above propagation types, see
38635f0b 387.BR mount_namespaces (7).
f3a5ba3f
CB
388.SH RETURN VALUE
389On success,
63097cb7 390.BR mount_setattr ()
f3a5ba3f
CB
391returns zero.
392On error,
393\-1 is returned and
394.I errno
395is set to indicate the cause of the error.
396.SH ERRORS
397.TP
398.B EBADF
9f4e736a
MK
399.I pathname
400is relative but
30397d7d 401.I dirfd
9f4e736a
MK
402is neither
403.B AT_FDCWD
404nor a valid file descriptor.
f3a5ba3f
CB
405.TP
406.B EBADF
407.I userns_fd
408is not a valid file descriptor.
409.TP
410.B EBUSY
411The caller tried to change the mount to
133e6b16 412.BR MOUNT_ATTR_RDONLY ,
63097cb7 413but the mount still holds files open for writing.
f3a5ba3f 414.TP
11f44554
CB
415.B EBUSY
416The caller tried to create an ID-mapped mount raising
d12916a0 417.B MOUNT_ATTR_IDMAP
11f44554
CB
418and specifying
419.I userns_fd
420but the mount still holds files open for writing.
421.TP
f3a5ba3f 422.B EINVAL
5a9ebeba 423The pathname specified via the
30397d7d 424.I dirfd
f3a5ba3f 425and
5a9ebeba 426.I pathname
f3a5ba3f 427arguments to
63097cb7 428.BR mount_setattr ()
133e6b16 429isn't a mount point.
f3a5ba3f
CB
430.TP
431.B EINVAL
432An unsupported value was set in
5303eb87 433.IR flags .
f3a5ba3f
CB
434.TP
435.B EINVAL
436An unsupported value was specified in the
437.I attr_set
438field of
439.IR mount_attr .
440.TP
441.B EINVAL
442An unsupported value was specified in the
443.I attr_clr
444field of
445.IR mount_attr .
446.TP
447.B EINVAL
448An unsupported value was specified in the
449.I propagation
450field of
451.IR mount_attr .
452.TP
453.B EINVAL
454More than one of
63097cb7
AC
455.BR MS_SHARED ,
456.BR MS_SLAVE ,
457.BR MS_PRIVATE ,
f3a5ba3f 458or
63097cb7 459.B MS_UNBINDABLE
f606879a 460was set in the
f3a5ba3f
CB
461.I propagation
462field of
463.IR mount_attr .
464.TP
465.B EINVAL
133e6b16 466An access-time setting was specified in the
f3a5ba3f
CB
467.I attr_set
468field without
63097cb7 469.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
470being set in the
471.I attr_clr
472field.
473.TP
474.B EINVAL
63097cb7 475.B MOUNT_ATTR_IDMAP
f3a5ba3f
CB
476was specified in
477.IR attr_clr .
478.TP
479.B EINVAL
480A file descriptor value was specified in
481.I userns_fd
482which exceeds
483.BR INT_MAX .
484.TP
485.B EINVAL
486A valid file descriptor value was specified in
133e6b16 487.IR userns_fd ,
70a9d0fe 488but the file descriptor did not refer to a user namespace.
f3a5ba3f
CB
489.TP
490.B EINVAL
3643106e 491The underlying filesystem does not support ID-mapped mounts.
f3a5ba3f
CB
492.TP
493.B EINVAL
70a9d0fe 494The mount that is to be ID mapped is not a detached mount;
20e6e6ed 495that is, the mount has not previously been visible in a mount namespace.
f3a5ba3f
CB
496.TP
497.B EINVAL
133e6b16 498A partial access-time setting was specified in
f3a5ba3f
CB
499.I attr_clr
500instead of
63097cb7 501.B MOUNT_ATTR__ATIME
f3a5ba3f
CB
502being set.
503.TP
504.B EINVAL
505The mount is located outside the caller's mount namespace.
506.TP
507.B EINVAL
9e11604c
MK
508The underlying filesystem has been mounted in a mount namespace that is
509owned by a noninitial user namespace
f3a5ba3f
CB
510.TP
511.B ENOENT
512A pathname was empty or had a nonexistent component.
513.TP
514.B ENOMEM
515When changing mount propagation to
133e6b16
MK
516.BR MS_SHARED ,
517a new peer group ID needs to be allocated for all mounts without a peer group
518ID set.
9e11604c
MK
519This allocation failed because there was not
520enough memory to allocate the relevant internal structures.
f3a5ba3f
CB
521.TP
522.B ENOSPC
523When changing mount propagation to
133e6b16
MK
524.BR MS_SHARED ,
525a new peer group ID needs to be allocated for all mounts without a peer group
526ID set.
9e11604c
MK
527This allocation failed because
528the kernel has run out of IDs.
61dc7df0 529.\" Christian Brauner: i.e. someone has somehow managed to
9e11604c
MK
530.\" allocate so many peer groups and managed to keep the kernel running
531.\" (???) that the ida has ran out of ids
532.\" Note that technically further error codes are possible that are
533.\" specific to the ID allocation implementation used.
f3a5ba3f
CB
534.TP
535.B EPERM
536One of the mounts had at least one of
63097cb7
AC
537.BR MOUNT_ATTR_NOATIME ,
538.BR MOUNT_ATTR_NODEV ,
539.BR MOUNT_ATTR_NODIRATIME ,
540.BR MOUNT_ATTR_NOEXEC ,
541.BR MOUNT_ATTR_NOSUID ,
f3a5ba3f 542or
63097cb7 543.B MOUNT_ATTR_RDONLY
f3a5ba3f
CB
544set and the flag is locked.
545Mount attributes become locked on a mount if:
546.RS
03cd41e9 547.IP \(bu 3
63097cb7 548A new mount or mount tree is created causing mount propagation across user
9e11604c
MK
549namespaces
550(i.e., propagation to a mount namespace owned by a different user namespace).
551The kernel will lock the aforementioned flags to prevent these sensitive
f3a5ba3f
CB
552properties from being altered.
553.IP \(bu
63097cb7 554A new mount and user namespace pair is created.
f3a5ba3f 555This happens for example when specifying
63097cb7 556.B CLONE_NEWUSER | CLONE_NEWNS
f3a5ba3f
CB
557in
558.BR unshare (2),
559.BR clone (2),
560or
03cd41e9 561.BR clone3 (2).
9e11604c
MK
562The aforementioned flags become locked in the new mount namespace
563to prevent sensitive mount properties from being altered.
564Since the newly created mount namespace will be owned by the
565newly created user namespace,
566a calling process that is privileged in the new
567user namespace would\(emin the absence of such locking\(embe
568able to alter sensitive mount properties (e.g., to remount a mount
569that was marked read-only as read-write in the new mount namespace).
f3a5ba3f
CB
570.RE
571.TP
572.B EPERM
573A valid file descriptor value was specified in
133e6b16 574.IR userns_fd ,
f3a5ba3f
CB
575but the file descriptor refers to the initial user namespace.
576.TP
577.B EPERM
70a9d0fe 578An attempt was made to add an ID mapping to a mount that is already ID mapped.
f3a5ba3f
CB
579.TP
580.B EPERM
581The caller does not have
133e6b16 582.B CAP_SYS_ADMIN
f3a5ba3f
CB
583in the initial user namespace.
584.SH VERSIONS
63097cb7 585.BR mount_setattr ()
f3a5ba3f
CB
586first appeared in Linux 5.12.
587.\" commit 7d6beb71da3cc033649d641e1e608713b8220290
588.\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
589.\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
3113c7f3 590.SH STANDARDS
63097cb7
AC
591.BR mount_setattr ()
592is Linux-specific.
f3a5ba3f 593.SH NOTES
538a491e
MK
594.SS ID-mapped mounts
595Creating an ID-mapped mount makes it possible to
596change the ownership of all files located under a mount.
597Thus, ID-mapped mounts make it possible to
598change ownership in a temporary and localized way.
70a9d0fe
MK
599It is a localized change because the ownership changes are
600visible only via a specific mount.
538a491e 601All other users and locations where the filesystem is exposed are unaffected.
5303eb87
MK
602It is a temporary change because
603the ownership changes are tied to the lifetime of the mount.
538a491e
MK
604.PP
605Whenever callers interact with the filesystem through an ID-mapped mount,
606the ID mapping of the mount will be applied to
607user and group IDs associated with filesystem objects.
608This encompasses the user and group IDs associated with inodes
609and also the following
610.BR xattr (7)
611keys:
612.IP \(bu 3
613.IR security.capability ,
614whenever filesystem capabilities
615are stored or returned in the
616.B VFS_CAP_REVISION_3
617format,
618which stores a root user ID alongside the capabilities
619(see
620.BR capabilities (7)).
621.IP \(bu
622.I system.posix_acl_access
623and
624.IR system.posix_acl_default ,
625whenever user IDs or group IDs are stored in
626.B ACL_USER
627or
628.B ACL_GROUP
629entries.
630.PP
631The following conditions must be met in order to create an ID-mapped mount:
632.IP \(bu 3
633The caller must have the
634.B CAP_SYS_ADMIN
9292cbb1
CB
635capability in the user namespace the filesystem was mounted in.
636.\" commit bd303368b776eead1c29e6cdda82bde7128b82a7
637.\" Christian Brauner
638.\" Note, currently no filesystems mountable in non-initial user namespaces
639.\" support ID-mapped mounts.
538a491e
MK
640.IP \(bu
641The underlying filesystem must support ID-mapped mounts.
be974ba2
CB
642Currently, the following filesystems support ID-mapped mounts:
643.\" fs_flags = FS_ALLOW_IDMAP in kernel sources
22356d97
AC
644.IP
645.RS
be974ba2
CB
646.PD 0
647.IP \(bu 3
648.BR xfs (5)
649(since Linux 5.12)
650.IP \(bu
651.BR ext4 (5)
652(since Linux 5.12)
653.IP \(bu
d12916a0 654.B FAT
be974ba2
CB
655(since Linux 5.12)
656.IP \(bu
657.BR btrfs (5)
658(since Linux 5.15)
659.\" commit 5b9b26f5d0b88b74001dcfe4ab8a8f2f4e744112
538a491e 660.IP \(bu
d12916a0 661.B ntfs3
be974ba2
CB
662(since Linux 5.15)
663.\" commit 82cae269cfa953032fbb8980a7d554d60fb00b17
664.IP \(bu
d12916a0 665.B f2fs
be974ba2
CB
666(since Linux 5.18)
667.\" commit 984fc4e76d63345499f01c0c198a4b44860cf027
668.IP \(bu
d12916a0 669.B erofs
be974ba2
CB
670(since Linux 5.19)
671.\" commit 6c459b78d4793afbba6d864c466cc5cd2932459d
672.IP \(bu
d12916a0 673.B overlayfs
be974ba2
CB
674(ID-mapped lower and upper layers supported since Linux 5.19)
675.PD
676.RE
be974ba2 677.IP \(bu 3
538a491e
MK
678The mount must not already be ID-mapped.
679This also implies that the ID mapping of a mount cannot be altered.
680.IP \(bu
70a9d0fe 681The mount must be a detached mount;
538a491e
MK
682that is,
683it must have been created by calling
11f44554
CB
684.IP \(bu
685The mount must not have any writers.
686.\" commit 1bbcd277a53e08d619ffeec56c5c9287f2bf42f
538a491e
MK
687.BR open_tree (2)
688with the
689.B OPEN_TREE_CLONE
20e6e6ed
MK
690flag and it must not already have been visible in a mount namespace.
691(To put things another way:
692the mount must not have been attached to the filesystem hierarchy
693with a system call such as
694.BR move_mount (2).)
538a491e
MK
695.PP
696ID mappings can be created for user IDs, group IDs, and project IDs.
697An ID mapping is essentially a mapping of a range of user or group IDs into
698another or the same range of user or group IDs.
70a9d0fe
MK
699ID mappings are written to map files as three numbers
700separated by white space.
538a491e
MK
701The first two numbers specify the starting user or group ID
702in each of the two user namespaces.
703The third number specifies the range of the ID mapping.
70a9d0fe
MK
704For example,
705a mapping for user IDs such as "1000\ 1001\ 1" would indicate that
538a491e
MK
706user ID 1000 in the caller's user namespace is mapped to
707user ID 1001 in its ancestor user namespace.
708Since the map range is 1,
709only user ID 1000 is mapped.
710.PP
711It is possible to specify up to 340 ID mappings for each ID mapping type.
712If any user IDs or group IDs are not mapped,
713all files owned by that unmapped user or group ID will appear as
714being owned by the overflow user ID or overflow group ID respectively.
715.PP
5303eb87
MK
716Further details on setting up ID mappings can be found in
717.BR user_namespaces (7).
538a491e
MK
718.PP
719In the common case, the user namespace passed in
720.I userns_fd
5303eb87 721(together with
538a491e
MK
722.B MOUNT_ATTR_IDMAP
723in
5303eb87 724.IR attr_set )
538a491e
MK
725to create an ID-mapped mount will be the user namespace of a container.
726In other scenarios it will be a dedicated user namespace associated with
727a user's login session as is the case for portable home directories in
728.BR systemd-homed.service (8)).
729It is also perfectly fine to create a dedicated user namespace
730for the sake of ID mapping a mount.
731.PP
732ID-mapped mounts can be useful in the following
733and a variety of other scenarios:
734.IP \(bu 3
70a9d0fe
MK
735Sharing files or filesystems
736between multiple users or multiple machines,
538a491e
MK
737especially in complex scenarios.
738For example,
739ID-mapped mounts are used to implement portable home directories in
740.BR systemd-homed.service (8),
741where they allow users to move their home directory
742to an external storage device
743and use it on multiple computers
744where they are assigned different user IDs and group IDs.
745This effectively makes it possible to
746assign random user IDs and group IDs at login time.
747.IP \(bu
70a9d0fe
MK
748Sharing files or filesystems
749from the host with unprivileged containers.
538a491e
MK
750This allows a user to avoid having to change ownership permanently through
751.BR chown (2).
752.IP \(bu
753ID mapping a container's root filesystem.
754Users don't need to change ownership permanently through
755.BR chown (2).
756Especially for large root filesystems, using
757.BR chown (2)
758can be prohibitively expensive.
759.IP \(bu
70a9d0fe
MK
760Sharing files or filesystems
761between containers with non-overlapping ID mappings.
538a491e
MK
762.IP \(bu
763Implementing discretionary access (DAC) permission checking
764for filesystems lacking a concept of ownership.
765.IP \(bu
766Efficiently changing ownership on a per-mount basis.
767In contrast to
768.BR chown (2),
769changing ownership of large sets of files is instantaneous with
770ID-mapped mounts.
771This is especially useful when ownership of
772an entire root filesystem of a virtual machine or container
773is to be changed as mentioned above.
774With ID-mapped mounts,
775a single
776.BR mount_setattr ()
777system call will be sufficient to change the ownership of all files.
778.IP \(bu
779Taking the current ownership into account.
780ID mappings specify precisely
781what a user or group ID is supposed to be mapped to.
782This contrasts with the
783.BR chown (2)
784system call which cannot by itself
785take the current ownership of the files it changes into account.
786It simply changes the ownership to the specified user ID and group ID.
787.IP \(bu
788Locally and temporarily restricted ownership changes.
789ID-mapped mounts make it possible to change ownership locally,
70a9d0fe 790restricting the ownership changes to specific mounts,
538a491e
MK
791and temporarily as the ownership changes only apply as long as the mount exists.
792By contrast,
793changing ownership via the
794.BR chown (2)
795system call changes the ownership globally and permanently.
796.\"
f3a5ba3f
CB
797.SS Extensibility
798In order to allow for future extensibility,
63097cb7 799.BR mount_setattr ()
f3a5ba3f
CB
800requires the user-space application to specify the size of the
801.I mount_attr
802structure that it is passing.
803By providing this information, it is possible for
63097cb7 804.BR mount_setattr ()
f3a5ba3f
CB
805to provide both forwards- and backwards-compatibility, with
806.I size
807acting as an implicit version number.
808(Because new extension fields will always
809be appended, the structure size will always increase.)
810This extensibility design is very similar to other system calls such as
811.BR perf_setattr (2),
812.BR perf_event_open (2),
813.BR clone3 (2)
814and
03cd41e9 815.BR openat2 (2).
f3a5ba3f
CB
816.PP
817Let
818.I usize
819be the size of the structure as specified by the user-space application,
820and let
821.I ksize
822be the size of the structure which the kernel supports,
823then there are three cases to consider:
03cd41e9 824.IP \(bu 3
f3a5ba3f 825If
63097cb7 826.I ksize
f3a5ba3f
CB
827equals
828.IR usize ,
829then there is no version mismatch and
830.I attr
831can be used verbatim.
832.IP \(bu
833If
63097cb7 834.I ksize
f3a5ba3f
CB
835is larger than
836.IR usize ,
63097cb7
AC
837then there are some extension fields that the kernel supports
838which the user-space application is unaware of.
f3a5ba3f 839Because a zero value in any added extension field signifies a no-op,
63097cb7
AC
840the kernel treats all of the extension fields
841not provided by the user-space application
842as having zero values.
f3a5ba3f
CB
843This provides backwards-compatibility.
844.IP \(bu
845If
63097cb7 846.I ksize
f3a5ba3f
CB
847is smaller than
848.IR usize ,
849then there are some extension fields which the user-space application is aware
850of but which the kernel does not support.
851Because any extension field must have its zero values signify a no-op,
63097cb7
AC
852the kernel can safely ignore the unsupported extension fields
853if they are all zero.
854If any unsupported extension fields are non-zero,
855then \-1 is returned and
f3a5ba3f
CB
856.I errno
857is set to
858.BR E2BIG .
859This provides forwards-compatibility.
f3a5ba3f
CB
860.PP
861Because the definition of
862.I struct mount_attr
863may change in the future
864(with new fields being added when system headers are updated),
865user-space applications should zero-fill
866.I struct mount_attr
867to ensure that recompiling the program with new headers will not result in
868spurious errors at runtime.
869The simplest way is to use a designated initializer:
870.PP
871.in +4n
872.EX
873struct mount_attr attr = {
874 .attr_set = MOUNT_ATTR_RDONLY,
875 .attr_clr = MOUNT_ATTR_NODEV
876};
877.EE
878.in
879.PP
133e6b16 880Alternatively, the structure can be zero-filled using
f3a5ba3f
CB
881.BR memset (3)
882or similar functions:
883.PP
884.in +4n
885.EX
886struct mount_attr attr;
887memset(&attr, 0, sizeof(attr));
888attr.attr_set = MOUNT_ATTR_RDONLY;
889attr.attr_clr = MOUNT_ATTR_NODEV;
890.EE
891.in
892.PP
893A user-space application that wishes to determine which extensions the running
894kernel supports can do so by conducting a binary search on
63097cb7 895.I size
f3a5ba3f
CB
896with a structure which has every byte nonzero
897(to find the largest value which doesn't produce an error of
03cd41e9 898.BR E2BIG ).
f3a5ba3f 899.SH EXAMPLES
33857069 900.\" SRC BEGIN (mount_setattr.c)
f3a5ba3f
CB
901.EX
902/*
133e6b16
MK
903 * This program allows the caller to create a new detached mount
904 * and set various properties on it.
f3a5ba3f
CB
905 */
906#define _GNU_SOURCE
5a5208c1 907#include <err.h>
f3a5ba3f
CB
908#include <fcntl.h>
909#include <getopt.h>
910#include <linux/mount.h>
911#include <linux/types.h>
912#include <stdbool.h>
913#include <stdio.h>
914#include <stdlib.h>
915#include <string.h>
916#include <sys/syscall.h>
917#include <unistd.h>
918
63097cb7 919static inline int
5a9ebeba 920mount_setattr(int dirfd, const char *pathname, unsigned int flags,
63097cb7 921 struct mount_attr *attr, size_t size)
f3a5ba3f 922{
af35474e
MK
923 return syscall(SYS_mount_setattr, dirfd, pathname, flags,
924 attr, size);
f3a5ba3f
CB
925}
926
63097cb7 927static inline int
30397d7d 928open_tree(int dirfd, const char *filename, unsigned int flags)
f3a5ba3f 929{
30397d7d 930 return syscall(SYS_open_tree, dirfd, filename, flags);
f3a5ba3f
CB
931}
932
63097cb7 933static inline int
30397d7d
MK
934move_mount(int from_dirfd, const char *from_pathname,
935 int to_dirfd, const char *to_pathname, unsigned int flags)
f3a5ba3f 936{
30397d7d
MK
937 return syscall(SYS_move_mount, from_dirfd, from_pathname,
938 to_dirfd, to_pathname, flags);
f3a5ba3f
CB
939}
940
941static const struct option longopts[] = {
529027f0
AC
942 {"map\-mount", required_argument, NULL, \(aqa\(aq},
943 {"recursive", no_argument, NULL, \(aqb\(aq},
944 {"read\-only", no_argument, NULL, \(aqc\(aq},
945 {"block\-setid", no_argument, NULL, \(aqd\(aq},
946 {"block\-devices", no_argument, NULL, \(aqe\(aq},
947 {"block\-exec", no_argument, NULL, \(aqf\(aq},
948 {"no\-access\-time", no_argument, NULL, \(aqg\(aq},
f3a5ba3f
CB
949 { NULL, 0, NULL, 0 },
950};
951
63097cb7
AC
952int
953main(int argc, char *argv[])
f3a5ba3f 954{
0b94bd78
AC
955 int fd_userns = \-1;
956 int index = 0;
957 int ret;
958 bool recursive = false;
959 struct mount_attr *attr = &(struct mount_attr){};
f3a5ba3f
CB
960
961 while ((ret = getopt_long_only(argc, argv, "",
63097cb7 962 longopts, &index)) != \-1) {
f3a5ba3f 963 switch (ret) {
529027f0 964 case \(aqa\(aq:
f3a5ba3f
CB
965 fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
966 if (fd_userns == \-1)
5a5208c1 967 err(EXIT_FAILURE, "open(%s)", optarg);
f3a5ba3f 968 break;
529027f0 969 case \(aqb\(aq:
f3a5ba3f
CB
970 recursive = true;
971 break;
529027f0 972 case \(aqc\(aq:
f606879a 973 attr\->attr_set |= MOUNT_ATTR_RDONLY;
f3a5ba3f 974 break;
529027f0 975 case \(aqd\(aq:
f606879a 976 attr\->attr_set |= MOUNT_ATTR_NOSUID;
f3a5ba3f 977 break;
529027f0 978 case \(aqe\(aq:
f606879a 979 attr\->attr_set |= MOUNT_ATTR_NODEV;
f3a5ba3f 980 break;
529027f0 981 case \(aqf\(aq:
f606879a 982 attr\->attr_set |= MOUNT_ATTR_NOEXEC;
f3a5ba3f 983 break;
529027f0 984 case \(aqg\(aq:
f606879a
MK
985 attr\->attr_set |= MOUNT_ATTR_NOATIME;
986 attr\->attr_clr |= MOUNT_ATTR__ATIME;
f3a5ba3f
CB
987 break;
988 default:
5a5208c1 989 errx(EXIT_FAILURE, "Invalid argument specified");
f3a5ba3f
CB
990 }
991 }
992
91ce7d5f 993 if ((argc \- optind) < 2)
5a5208c1 994 errx(EXIT_FAILURE, "Missing source or target mount point");
f3a5ba3f 995
91ce7d5f
MK
996 const char *source = argv[optind];
997 const char *target = argv[optind + 1];
f606879a 998
45ea537c
MK
999 /* In the following, \-1 as the \(aqdirfd\(aq argument ensures that
1000 open_tree() fails if \(aqsource\(aq is not an absolute pathname. */
4c313d97
MK
1001.\" Christian Brauner
1002.\" When writing programs I like to never use relative paths with AT_FDCWD
1003.\" because. Because making assumptions about the current working directory
1004.\" of the calling process is just too easy to get wrong; especially when
1005.\" pivot_root() or chroot() are in play.
1006.\" My absolut preference (joke intended) is to open a well-known starting
1007.\" point with an absolute path to get a dirfd and then scope all future
1008.\" operations beneath that dirfd. This already works with old-style
1009.\" openat() and _very_ cautious programming but openat2() and its
1010.\" resolve-flag space have made this **chef's kiss**.
1011.\" If I can't operate based on a well-known dirfd I use absolute paths
1012.\" with a -EBADF dirfd passed to *at() functions.
45ea537c
MK
1013
1014 int fd_tree = open_tree(\-1, source,
f606879a
MK
1015 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
1016 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
f3a5ba3f 1017 if (fd_tree == \-1)
5a5208c1 1018 err(EXIT_FAILURE, "open(%s)", source);
f3a5ba3f
CB
1019
1020 if (fd_userns >= 0) {
f606879a
MK
1021 attr\->attr_set |= MOUNT_ATTR_IDMAP;
1022 attr\->userns_fd = fd_userns;
f3a5ba3f 1023 }
f606879a 1024
f3a5ba3f 1025 ret = mount_setattr(fd_tree, "",
f606879a 1026 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
f3a5ba3f
CB
1027 attr, sizeof(struct mount_attr));
1028 if (ret == \-1)
5a5208c1 1029 err(EXIT_FAILURE, "mount_setattr");
f606879a 1030
f3a5ba3f
CB
1031 close(fd_userns);
1032
45ea537c
MK
1033 /* In the following, \-1 as the \(aqto_dirfd\(aq argument ensures that
1034 open_tree() fails if \(aqtarget\(aq is not an absolute pathname. */
1035
1036 ret = move_mount(fd_tree, "", \-1, target,
f3a5ba3f
CB
1037 MOVE_MOUNT_F_EMPTY_PATH);
1038 if (ret == \-1)
5a5208c1 1039 err(EXIT_FAILURE, "move_mount() to %s", target);
f606879a 1040
f3a5ba3f
CB
1041 close(fd_tree);
1042
1043 exit(EXIT_SUCCESS);
1044}
1045.EE
33857069 1046.\" SRC END
f3a5ba3f 1047.SH SEE ALSO
f3a5ba3f 1048.BR newgidmap (1),
85a7ae73 1049.BR newuidmap (1),
8c674810
MK
1050.BR clone (2),
1051.BR mount (2),
f3a5ba3f 1052.BR unshare (2),
8c674810 1053.BR proc (5),
8c674810 1054.BR capabilities (7),
85a7ae73 1055.BR mount_namespaces (7),
f3a5ba3f 1056.BR user_namespaces (7),
5c3a06ed 1057.BR xattr (7)