1 .\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
3 .\" SPDX-License-Identifier: Linux-man-pages-copyleft
5 .TH MOUNT_SETATTR 2 2021-08-27 "Linux" "Linux Programmer's Manual"
7 mount_setattr \- change properties of a mount or mount tree
10 .RI ( libc ", " \-lc )
15 .BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
16 .BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
17 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
18 .B #include <unistd.h>
20 .BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" pathname ,
21 .BI " unsigned int " flags ", struct mount_attr *" attr \
26 glibc provides no wrapper for
28 necessitating the use of
33 system call changes the mount properties of a mount or an entire mount tree.
36 is a relative pathname,
37 then it is interpreted relative to
38 the directory referred to by the file descriptor
46 is interpreted relative to
47 the current working directory of the calling process.
50 is the empty string and
54 then the mount properties of the mount identified by
59 for an explanation of why the
65 system call uses an extensible structure
66 .RI ( "struct mount_attr" )
67 to allow for future extensions.
68 Any non-flag extensions to
70 will be implemented as new fields appended to the this structure,
71 with a zero value in a new field resulting in the kernel behaving
72 as though that extension field was not present.
76 zero-fill this structure on initialization.
77 See the "Extensibility" subsection under
83 argument should usually be specified as
84 .IR "sizeof(struct mount_attr)" .
85 However, if the caller is using a kernel that supports an extended
86 .IR "struct mount_attr" ,
87 but the caller does not intend to make use of these features,
88 it is possible to pass the size of an earlier
89 version of the structure together with the extended structure.
90 This allows the kernel to not copy later parts of the structure
91 that aren't used anyway.
92 With each extension that changes the size of
93 .IR "struct mount_attr" ,
94 the kernel will expose a definition of the form
95 .BI MOUNT_ATTR_SIZE_VER number\c
97 For example, the macro for the size of the initial version of
100 .BR MOUNT_ATTR_SIZE_VER0 .
104 argument can be used to alter the pathname resolution behavior.
105 The supported values are:
111 change the mount properties on
116 Change the mount properties of the entire mount tree.
118 .B AT_SYMLINK_NOFOLLOW
119 Don't follow trailing symbolic links.
122 Don't trigger automounts.
128 is a structure of the following form:
133 __u64 attr_set; /* Mount properties to set */
134 __u64 attr_clr; /* Mount properties to clear */
135 __u64 propagation; /* Mount propagation type */
136 __u64 userns_fd; /* User namespace file descriptor */
145 members are used to specify the mount properties that
146 are supposed to be set or cleared for a mount or mount tree.
149 enable a property on a mount or mount tree,
152 remove a property from a mount or mount tree.
154 When changing mount properties,
155 the kernel will first clear the flags specified
159 and then set the flags specified in the
162 For example, these settings:
166 struct mount_attr attr = {
167 .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
168 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
173 are equivalent to the following steps:
177 unsigned int current_mnt_flags = mnt->mnt_flags;
180 * Clear all flags set in .attr_clr,
181 * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
183 current_mnt_flags &= ~attr->attr_clr;
186 * Now set all flags set in .attr_set,
187 * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
189 current_mnt_flags |= attr->attr_set;
191 mnt->mnt_flags = current_mnt_flags;
195 As a result of this change, the mount or mount tree (a) is read-only;
196 (b) blocks the execution of set-user-ID and set-group-ID programs;
197 (c) allows execution of programs; and (d) allows access to devices.
199 Multiple changes with the same set of flags requested
204 are guaranteed to be idempotent after the changes have been applied.
206 The following mount attributes can be specified in the
215 makes the mount read-only.
218 removes the read-only setting if set on the mount.
223 causes the mount not to honor the set-user-ID and set-group-ID mode bits and
224 file capabilities when executing programs.
227 clears the set-user-ID, set-group-ID,
228 and file capability restriction if set on this mount.
233 prevents access to devices on this mount.
236 removes the restriction that prevented accessing devices on this mount.
241 prevents executing programs on this mount.
244 removes the restriction that prevented executing programs on this mount.
246 .B MOUNT_ATTR_NOSYMFOLLOW
249 prevents following symbolic links on this mount.
252 removes the restriction that prevented following symbolic links on this mount.
254 .B MOUNT_ATTR_NODIRATIME
257 prevents updating access time for directories on this mount.
260 removes the restriction that prevented updating access time for directories.
262 .B MOUNT_ATTR_NODIRATIME
263 can be combined with other access-time settings
264 and is implied by the noatime setting.
265 All other access-time settings are mutually exclusive.
267 .BR MOUNT_ATTR__ATIME " - changing access-time settings"
268 The access-time values listed below are an enumeration that
269 includes the value zero, expressed in the bits defined by the mask
270 .BR MOUNT_ATTR__ATIME .
271 Even though these bits are an enumeration
272 (in contrast to the other mount flags such as
273 .BR MOUNT_ATTR_NOEXEC ),
274 they are nonetheless passed in
280 which introduced this behavior.
283 since the access-time values are an enumeration rather than bit values,
284 a caller wanting to transition to a different access-time setting
285 cannot simply specify the access-time setting in
287 but must also include
292 The kernel will verify that
294 isn't partially set in
296 (i.e., either all bits in the
298 bit field are either set or clear), and that
300 doesn't have any access-time bits set if
306 .B MOUNT_ATTR_RELATIME
307 When a file is accessed via this mount,
308 update the file's last access time (atime)
309 only if the current value of atime is less than or equal to
310 the file's last modification time (mtime) or last status change time (ctime).
312 To enable this access-time setting on a mount or mount tree,
313 .B MOUNT_ATTR_RELATIME
322 .B MOUNT_ATTR_NOATIME
323 Do not update access times for (all types of) files on this mount.
325 To enable this access-time setting on a mount or mount tree,
326 .B MOUNT_ATTR_NOATIME
335 .B MOUNT_ATTR_STRICTATIME
336 Always update the last access time (atime)
337 when files are accessed on this mount.
339 To enable this access-time setting on a mount or mount tree,
340 .B MOUNT_ATTR_STRICTATIME
353 creates an ID-mapped mount.
354 The ID mapping is taken from the user namespace specified in
356 and attached to the mount.
358 Since it is not supported to
359 change the ID mapping of a mount after it has been ID mapped,
360 it is invalid to specify
365 For further details, see the subsection "ID-mapped mounts" under NOTES.
369 field is used to specify the propagation type of the mount or mount tree.
370 This field either has the value zero,
371 meaning leave the propagation type unchanged, or it has one of
372 the following values:
375 Turn all mounts into private mounts.
378 Turn all mounts into shared mounts.
381 Turn all mounts into dependent mounts.
384 Turn all mounts into unbindable mounts.
386 For further details on the above propagation types, see
387 .BR mount_namespaces (7).
395 is set to indicate the cause of the error.
404 nor a valid file descriptor.
408 is not a valid file descriptor.
411 The caller tried to change the mount to
412 .BR MOUNT_ATTR_RDONLY ,
413 but the mount still holds files open for writing.
416 The pathname specified via the
425 An unsupported value was set in
429 An unsupported value was specified in the
435 An unsupported value was specified in the
441 An unsupported value was specified in the
459 An access-time setting was specified in the
473 A file descriptor value was specified in
479 A valid file descriptor value was specified in
481 but the file descriptor did not refer to a user namespace.
484 The underlying filesystem does not support ID-mapped mounts.
487 The mount that is to be ID mapped is not a detached mount;
488 that is, the mount has not previously been visible in a mount namespace.
491 A partial access-time setting was specified in
498 The mount is located outside the caller's mount namespace.
501 The underlying filesystem has been mounted in a mount namespace that is
502 owned by a noninitial user namespace
505 A pathname was empty or had a nonexistent component.
508 When changing mount propagation to
510 a new peer group ID needs to be allocated for all mounts without a peer group
512 This allocation failed because there was not
513 enough memory to allocate the relevant internal structures.
516 When changing mount propagation to
518 a new peer group ID needs to be allocated for all mounts without a peer group
520 This allocation failed because
521 the kernel has run out of IDs.
522 .\" Christian Bruner: i.e. someone has somehow managed to
523 .\" allocate so many peer groups and managed to keep the kernel running
524 .\" (???) that the ida has ran out of ids
525 .\" Note that technically further error codes are possible that are
526 .\" specific to the ID allocation implementation used.
529 One of the mounts had at least one of
530 .BR MOUNT_ATTR_NOATIME ,
531 .BR MOUNT_ATTR_NODEV ,
532 .BR MOUNT_ATTR_NODIRATIME ,
533 .BR MOUNT_ATTR_NOEXEC ,
534 .BR MOUNT_ATTR_NOSUID ,
537 set and the flag is locked.
538 Mount attributes become locked on a mount if:
541 A new mount or mount tree is created causing mount propagation across user
543 (i.e., propagation to a mount namespace owned by a different user namespace).
544 The kernel will lock the aforementioned flags to prevent these sensitive
545 properties from being altered.
547 A new mount and user namespace pair is created.
548 This happens for example when specifying
549 .B CLONE_NEWUSER | CLONE_NEWNS
555 The aforementioned flags become locked in the new mount namespace
556 to prevent sensitive mount properties from being altered.
557 Since the newly created mount namespace will be owned by the
558 newly created user namespace,
559 a calling process that is privileged in the new
560 user namespace would\(emin the absence of such locking\(embe
561 able to alter sensitive mount properties (e.g., to remount a mount
562 that was marked read-only as read-write in the new mount namespace).
566 A valid file descriptor value was specified in
568 but the file descriptor refers to the initial user namespace.
571 An attempt was made to add an ID mapping to a mount that is already ID mapped.
574 The caller does not have
576 in the initial user namespace.
579 first appeared in Linux 5.12.
580 .\" commit 7d6beb71da3cc033649d641e1e608713b8220290
581 .\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
582 .\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
588 Creating an ID-mapped mount makes it possible to
589 change the ownership of all files located under a mount.
590 Thus, ID-mapped mounts make it possible to
591 change ownership in a temporary and localized way.
592 It is a localized change because the ownership changes are
593 visible only via a specific mount.
594 All other users and locations where the filesystem is exposed are unaffected.
595 It is a temporary change because
596 the ownership changes are tied to the lifetime of the mount.
598 Whenever callers interact with the filesystem through an ID-mapped mount,
599 the ID mapping of the mount will be applied to
600 user and group IDs associated with filesystem objects.
601 This encompasses the user and group IDs associated with inodes
602 and also the following
606 .IR security.capability ,
607 whenever filesystem capabilities
608 are stored or returned in the
609 .B VFS_CAP_REVISION_3
611 which stores a root user ID alongside the capabilities
613 .BR capabilities (7)).
615 .I system.posix_acl_access
617 .IR system.posix_acl_default ,
618 whenever user IDs or group IDs are stored in
624 The following conditions must be met in order to create an ID-mapped mount:
626 The caller must have the
628 capability in the initial user namespace.
630 The filesystem must be mounted in a mount namespace
631 that is owned by the initial user namespace.
633 The underlying filesystem must support ID-mapped mounts.
639 filesystems support ID-mapped mounts
640 with more filesystems being actively worked on.
642 The mount must not already be ID-mapped.
643 This also implies that the ID mapping of a mount cannot be altered.
645 The mount must be a detached mount;
647 it must have been created by calling
651 flag and it must not already have been visible in a mount namespace.
652 (To put things another way:
653 the mount must not have been attached to the filesystem hierarchy
654 with a system call such as
657 ID mappings can be created for user IDs, group IDs, and project IDs.
658 An ID mapping is essentially a mapping of a range of user or group IDs into
659 another or the same range of user or group IDs.
660 ID mappings are written to map files as three numbers
661 separated by white space.
662 The first two numbers specify the starting user or group ID
663 in each of the two user namespaces.
664 The third number specifies the range of the ID mapping.
666 a mapping for user IDs such as "1000\ 1001\ 1" would indicate that
667 user ID 1000 in the caller's user namespace is mapped to
668 user ID 1001 in its ancestor user namespace.
669 Since the map range is 1,
670 only user ID 1000 is mapped.
672 It is possible to specify up to 340 ID mappings for each ID mapping type.
673 If any user IDs or group IDs are not mapped,
674 all files owned by that unmapped user or group ID will appear as
675 being owned by the overflow user ID or overflow group ID respectively.
677 Further details on setting up ID mappings can be found in
678 .BR user_namespaces (7).
680 In the common case, the user namespace passed in
686 to create an ID-mapped mount will be the user namespace of a container.
687 In other scenarios it will be a dedicated user namespace associated with
688 a user's login session as is the case for portable home directories in
689 .BR systemd-homed.service (8)).
690 It is also perfectly fine to create a dedicated user namespace
691 for the sake of ID mapping a mount.
693 ID-mapped mounts can be useful in the following
694 and a variety of other scenarios:
696 Sharing files or filesystems
697 between multiple users or multiple machines,
698 especially in complex scenarios.
700 ID-mapped mounts are used to implement portable home directories in
701 .BR systemd-homed.service (8),
702 where they allow users to move their home directory
703 to an external storage device
704 and use it on multiple computers
705 where they are assigned different user IDs and group IDs.
706 This effectively makes it possible to
707 assign random user IDs and group IDs at login time.
709 Sharing files or filesystems
710 from the host with unprivileged containers.
711 This allows a user to avoid having to change ownership permanently through
714 ID mapping a container's root filesystem.
715 Users don't need to change ownership permanently through
717 Especially for large root filesystems, using
719 can be prohibitively expensive.
721 Sharing files or filesystems
722 between containers with non-overlapping ID mappings.
724 Implementing discretionary access (DAC) permission checking
725 for filesystems lacking a concept of ownership.
727 Efficiently changing ownership on a per-mount basis.
730 changing ownership of large sets of files is instantaneous with
732 This is especially useful when ownership of
733 an entire root filesystem of a virtual machine or container
734 is to be changed as mentioned above.
735 With ID-mapped mounts,
738 system call will be sufficient to change the ownership of all files.
740 Taking the current ownership into account.
741 ID mappings specify precisely
742 what a user or group ID is supposed to be mapped to.
743 This contrasts with the
745 system call which cannot by itself
746 take the current ownership of the files it changes into account.
747 It simply changes the ownership to the specified user ID and group ID.
749 Locally and temporarily restricted ownership changes.
750 ID-mapped mounts make it possible to change ownership locally,
751 restricting the ownership changes to specific mounts,
752 and temporarily as the ownership changes only apply as long as the mount exists.
754 changing ownership via the
756 system call changes the ownership globally and permanently.
759 In order to allow for future extensibility,
761 requires the user-space application to specify the size of the
763 structure that it is passing.
764 By providing this information, it is possible for
766 to provide both forwards- and backwards-compatibility, with
768 acting as an implicit version number.
769 (Because new extension fields will always
770 be appended, the structure size will always increase.)
771 This extensibility design is very similar to other system calls such as
772 .BR perf_setattr (2),
773 .BR perf_event_open (2),
780 be the size of the structure as specified by the user-space application,
783 be the size of the structure which the kernel supports,
784 then there are three cases to consider:
790 then there is no version mismatch and
792 can be used verbatim.
798 then there are some extension fields that the kernel supports
799 which the user-space application is unaware of.
800 Because a zero value in any added extension field signifies a no-op,
801 the kernel treats all of the extension fields
802 not provided by the user-space application
803 as having zero values.
804 This provides backwards-compatibility.
810 then there are some extension fields which the user-space application is aware
811 of but which the kernel does not support.
812 Because any extension field must have its zero values signify a no-op,
813 the kernel can safely ignore the unsupported extension fields
814 if they are all zero.
815 If any unsupported extension fields are non-zero,
816 then \-1 is returned and
820 This provides forwards-compatibility.
822 Because the definition of
824 may change in the future
825 (with new fields being added when system headers are updated),
826 user-space applications should zero-fill
828 to ensure that recompiling the program with new headers will not result in
829 spurious errors at runtime.
830 The simplest way is to use a designated initializer:
834 struct mount_attr attr = {
835 .attr_set = MOUNT_ATTR_RDONLY,
836 .attr_clr = MOUNT_ATTR_NODEV
841 Alternatively, the structure can be zero-filled using
843 or similar functions:
847 struct mount_attr attr;
848 memset(&attr, 0, sizeof(attr));
849 attr.attr_set = MOUNT_ATTR_RDONLY;
850 attr.attr_clr = MOUNT_ATTR_NODEV;
854 A user-space application that wishes to determine which extensions the running
855 kernel supports can do so by conducting a binary search on
857 with a structure which has every byte nonzero
858 (to find the largest value which doesn't produce an error of
863 * This program allows the caller to create a new detached mount
864 * and set various properties on it.
870 #include <linux/mount.h>
871 #include <linux/types.h>
876 #include <sys/syscall.h>
880 mount_setattr(int dirfd, const char *pathname, unsigned int flags,
881 struct mount_attr *attr, size_t size)
883 return syscall(SYS_mount_setattr, dirfd, pathname, flags,
888 open_tree(int dirfd, const char *filename, unsigned int flags)
890 return syscall(SYS_open_tree, dirfd, filename, flags);
894 move_mount(int from_dirfd, const char *from_pathname,
895 int to_dirfd, const char *to_pathname, unsigned int flags)
897 return syscall(SYS_move_mount, from_dirfd, from_pathname,
898 to_dirfd, to_pathname, flags);
901 static const struct option longopts[] = {
902 {"map\-mount", required_argument, NULL, 'a'},
903 {"recursive", no_argument, NULL, 'b'},
904 {"read\-only", no_argument, NULL, 'c'},
905 {"block\-setid", no_argument, NULL, 'd'},
906 {"block\-devices", no_argument, NULL, 'e'},
907 {"block\-exec", no_argument, NULL, 'f'},
908 {"no\-access\-time", no_argument, NULL, 'g'},
909 { NULL, 0, NULL, 0 },
912 #define exit_log(format, ...) do \e
914 fprintf(stderr, format, ##__VA_ARGS__); \e
915 exit(EXIT_FAILURE); \e
919 main(int argc, char *argv[])
921 struct mount_attr *attr = &(struct mount_attr){};
923 bool recursive = false;
927 while ((ret = getopt_long_only(argc, argv, "",
928 longopts, &index)) != \-1) {
931 fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
932 if (fd_userns == \-1)
933 exit_log("%m \- Failed top open %s\en", optarg);
939 attr\->attr_set |= MOUNT_ATTR_RDONLY;
942 attr\->attr_set |= MOUNT_ATTR_NOSUID;
945 attr\->attr_set |= MOUNT_ATTR_NODEV;
948 attr\->attr_set |= MOUNT_ATTR_NOEXEC;
951 attr\->attr_set |= MOUNT_ATTR_NOATIME;
952 attr\->attr_clr |= MOUNT_ATTR__ATIME;
955 exit_log("Invalid argument specified");
959 if ((argc \- optind) < 2)
960 exit_log("Missing source or target mount point\en");
962 const char *source = argv[optind];
963 const char *target = argv[optind + 1];
965 /* In the following, \-1 as the \(aqdirfd\(aq argument ensures that
966 open_tree() fails if \(aqsource\(aq is not an absolute pathname. */
967 .\" Christian Brauner
968 .\" When writing programs I like to never use relative paths with AT_FDCWD
969 .\" because. Because making assumptions about the current working directory
970 .\" of the calling process is just too easy to get wrong; especially when
971 .\" pivot_root() or chroot() are in play.
972 .\" My absolut preference (joke intended) is to open a well-known starting
973 .\" point with an absolute path to get a dirfd and then scope all future
974 .\" operations beneath that dirfd. This already works with old-style
975 .\" openat() and _very_ cautious programming but openat2() and its
976 .\" resolve-flag space have made this **chef's kiss**.
977 .\" If I can't operate based on a well-known dirfd I use absolute paths
978 .\" with a -EBADF dirfd passed to *at() functions.
980 int fd_tree = open_tree(\-1, source,
981 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
982 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
984 exit_log("%m \- Failed to open %s\en", source);
986 if (fd_userns >= 0) {
987 attr\->attr_set |= MOUNT_ATTR_IDMAP;
988 attr\->userns_fd = fd_userns;
991 ret = mount_setattr(fd_tree, "",
992 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
993 attr, sizeof(struct mount_attr));
995 exit_log("%m \- Failed to change mount attributes\en");
999 /* In the following, \-1 as the \(aqto_dirfd\(aq argument ensures that
1000 open_tree() fails if \(aqtarget\(aq is not an absolute pathname. */
1002 ret = move_mount(fd_tree, "", \-1, target,
1003 MOVE_MOUNT_F_EMPTY_PATH);
1005 exit_log("%m \- Failed to attach mount to %s\en", target);
1019 .BR capabilities (7),
1020 .BR mount_namespaces (7),
1021 .BR user_namespaces (7),