]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/mount_setattr.2
f6f257e2e4682d921580c0a4c5d947cbe1d1c447
[thirdparty/man-pages.git] / man2 / mount_setattr.2
1 .\" Copyright (c) 2021 by Christian Brauner <christian.brauner@ubuntu.com>
2 .\"
3 .\" SPDX-License-Identifier: Linux-man-pages-copyleft
4 .\"
5 .TH MOUNT_SETATTR 2 2021-08-27 "Linux" "Linux Programmer's Manual"
6 .SH NAME
7 mount_setattr \- change properties of a mount or mount tree
8 .SH LIBRARY
9 Standard C library
10 .RI ( libc ", " \-lc )
11 .SH SYNOPSIS
12 .nf
13
14 .PP
15 .BR "#include <linux/fcntl.h>" " /* Definition of " AT_* " constants */"
16 .BR "#include <linux/mount.h>" " /* Definition of " MOUNT_ATTR_* " constants */"
17 .BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
18 .B #include <unistd.h>
19 .PP
20 .BI "int syscall(SYS_mount_setattr, int " dirfd ", const char *" pathname ,
21 .BI " unsigned int " flags ", struct mount_attr *" attr \
22 ", size_t " size );
23 .fi
24 .PP
25 .IR Note :
26 glibc provides no wrapper for
27 .BR mount_setattr (),
28 necessitating the use of
29 .BR syscall (2).
30 .SH DESCRIPTION
31 The
32 .BR mount_setattr ()
33 system call changes the mount properties of a mount or an entire mount tree.
34 If
35 .I pathname
36 is a relative pathname,
37 then it is interpreted relative to
38 the directory referred to by the file descriptor
39 .IR dirfd .
40 If
41 .I dirfd
42 is the special value
43 .BR AT_FDCWD ,
44 then
45 .I pathname
46 is interpreted relative to
47 the current working directory of the calling process.
48 If
49 .I pathname
50 is the empty string and
51 .B AT_EMPTY_PATH
52 is specified in
53 .IR flags ,
54 then the mount properties of the mount identified by
55 .I dirfd
56 are changed.
57 (See
58 .BR openat (2)
59 for an explanation of why the
60 .I dirfd
61 argument is useful.)
62 .PP
63 The
64 .BR mount_setattr ()
65 system call uses an extensible structure
66 .RI ( "struct mount_attr" )
67 to allow for future extensions.
68 Any non-flag extensions to
69 .BR mount_setattr ()
70 will be implemented as new fields appended to the this structure,
71 with a zero value in a new field resulting in the kernel behaving
72 as though that extension field was not present.
73 Therefore,
74 the caller
75 .I must
76 zero-fill this structure on initialization.
77 See the "Extensibility" subsection under
78 .B NOTES
79 for more details.
80 .PP
81 The
82 .I size
83 argument should usually be specified as
84 .IR "sizeof(struct mount_attr)" .
85 However, if the caller is using a kernel that supports an extended
86 .IR "struct mount_attr" ,
87 but the caller does not intend to make use of these features,
88 it is possible to pass the size of an earlier
89 version of the structure together with the extended structure.
90 This allows the kernel to not copy later parts of the structure
91 that aren't used anyway.
92 With each extension that changes the size of
93 .IR "struct mount_attr" ,
94 the kernel will expose a definition of the form
95 .BI MOUNT_ATTR_SIZE_VER number\c
96 \&.
97 For example, the macro for the size of the initial version of
98 .I struct mount_attr
99 is
100 .BR MOUNT_ATTR_SIZE_VER0 .
101 .PP
102 The
103 .I flags
104 argument can be used to alter the pathname resolution behavior.
105 The supported values are:
106 .TP
107 .B AT_EMPTY_PATH
108 If
109 .I pathname
110 is the empty string,
111 change the mount properties on
112 .I dirfd
113 itself.
114 .TP
115 .B AT_RECURSIVE
116 Change the mount properties of the entire mount tree.
117 .TP
118 .B AT_SYMLINK_NOFOLLOW
119 Don't follow trailing symbolic links.
120 .TP
121 .B AT_NO_AUTOMOUNT
122 Don't trigger automounts.
123 .PP
124 The
125 .I attr
126 argument of
127 .BR mount_setattr ()
128 is a structure of the following form:
129 .PP
130 .in +4n
131 .EX
132 struct mount_attr {
133 __u64 attr_set; /* Mount properties to set */
134 __u64 attr_clr; /* Mount properties to clear */
135 __u64 propagation; /* Mount propagation type */
136 __u64 userns_fd; /* User namespace file descriptor */
137 };
138 .EE
139 .in
140 .PP
141 The
142 .I attr_set
143 and
144 .I attr_clr
145 members are used to specify the mount properties that
146 are supposed to be set or cleared for a mount or mount tree.
147 Flags set in
148 .I attr_set
149 enable a property on a mount or mount tree,
150 and flags set in
151 .I attr_clr
152 remove a property from a mount or mount tree.
153 .PP
154 When changing mount properties,
155 the kernel will first clear the flags specified
156 in the
157 .I attr_clr
158 field,
159 and then set the flags specified in the
160 .I attr_set
161 field.
162 For example, these settings:
163 .PP
164 .in +4n
165 .EX
166 struct mount_attr attr = {
167 .attr_clr = MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NODEV,
168 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
169 };
170 .EE
171 .in
172 .PP
173 are equivalent to the following steps:
174 .PP
175 .in +4n
176 .EX
177 unsigned int current_mnt_flags = mnt->mnt_flags;
178
179 /*
180 * Clear all flags set in .attr_clr,
181 * clearing MOUNT_ATTR_NOEXEC and MOUNT_ATTR_NODEV.
182 */
183 current_mnt_flags &= ~attr->attr_clr;
184
185 /*
186 * Now set all flags set in .attr_set,
187 * applying MOUNT_ATTR_RDONLY and MOUNT_ATTR_NOSUID.
188 */
189 current_mnt_flags |= attr->attr_set;
190
191 mnt->mnt_flags = current_mnt_flags;
192 .EE
193 .in
194 .PP
195 As a result of this change, the mount or mount tree (a) is read-only;
196 (b) blocks the execution of set-user-ID and set-group-ID programs;
197 (c) allows execution of programs; and (d) allows access to devices.
198 .PP
199 Multiple changes with the same set of flags requested
200 in
201 .I attr_clr
202 and
203 .I attr_set
204 are guaranteed to be idempotent after the changes have been applied.
205 .PP
206 The following mount attributes can be specified in the
207 .I attr_set
208 or
209 .I attr_clr
210 fields:
211 .TP
212 .B MOUNT_ATTR_RDONLY
213 If set in
214 .IR attr_set ,
215 makes the mount read-only.
216 If set in
217 .IR attr_clr ,
218 removes the read-only setting if set on the mount.
219 .TP
220 .B MOUNT_ATTR_NOSUID
221 If set in
222 .IR attr_set ,
223 causes the mount not to honor the set-user-ID and set-group-ID mode bits and
224 file capabilities when executing programs.
225 If set in
226 .IR attr_clr ,
227 clears the set-user-ID, set-group-ID,
228 and file capability restriction if set on this mount.
229 .TP
230 .B MOUNT_ATTR_NODEV
231 If set in
232 .IR attr_set ,
233 prevents access to devices on this mount.
234 If set in
235 .IR attr_clr ,
236 removes the restriction that prevented accessing devices on this mount.
237 .TP
238 .B MOUNT_ATTR_NOEXEC
239 If set in
240 .IR attr_set ,
241 prevents executing programs on this mount.
242 If set in
243 .IR attr_clr ,
244 removes the restriction that prevented executing programs on this mount.
245 .TP
246 .B MOUNT_ATTR_NOSYMFOLLOW
247 If set in
248 .IR attr_set ,
249 prevents following symbolic links on this mount.
250 If set in
251 .IR attr_clr ,
252 removes the restriction that prevented following symbolic links on this mount.
253 .TP
254 .B MOUNT_ATTR_NODIRATIME
255 If set in
256 .IR attr_set ,
257 prevents updating access time for directories on this mount.
258 If set in
259 .IR attr_clr ,
260 removes the restriction that prevented updating access time for directories.
261 Note that
262 .B MOUNT_ATTR_NODIRATIME
263 can be combined with other access-time settings
264 and is implied by the noatime setting.
265 All other access-time settings are mutually exclusive.
266 .TP
267 .BR MOUNT_ATTR__ATIME " - changing access-time settings"
268 The access-time values listed below are an enumeration that
269 includes the value zero, expressed in the bits defined by the mask
270 .BR MOUNT_ATTR__ATIME .
271 Even though these bits are an enumeration
272 (in contrast to the other mount flags such as
273 .BR MOUNT_ATTR_NOEXEC ),
274 they are nonetheless passed in
275 .I attr_set
276 and
277 .I attr_clr
278 for consistency with
279 .BR fsmount (2),
280 which introduced this behavior.
281 .IP
282 Note that,
283 since the access-time values are an enumeration rather than bit values,
284 a caller wanting to transition to a different access-time setting
285 cannot simply specify the access-time setting in
286 .IR attr_set ,
287 but must also include
288 .B MOUNT_ATTR__ATIME
289 in the
290 .I attr_clr
291 field.
292 The kernel will verify that
293 .B MOUNT_ATTR__ATIME
294 isn't partially set in
295 .IR attr_clr
296 (i.e., either all bits in the
297 .B MOUNT_ATTR__ATIME
298 bit field are either set or clear), and that
299 .I attr_set
300 doesn't have any access-time bits set if
301 .B MOUNT_ATTR__ATIME
302 isn't set in
303 .IR attr_clr .
304 .RS
305 .TP
306 .B MOUNT_ATTR_RELATIME
307 When a file is accessed via this mount,
308 update the file's last access time (atime)
309 only if the current value of atime is less than or equal to
310 the file's last modification time (mtime) or last status change time (ctime).
311 .IP
312 To enable this access-time setting on a mount or mount tree,
313 .B MOUNT_ATTR_RELATIME
314 must be set in
315 .I attr_set
316 and
317 .B MOUNT_ATTR__ATIME
318 must be set in the
319 .I attr_clr
320 field.
321 .TP
322 .B MOUNT_ATTR_NOATIME
323 Do not update access times for (all types of) files on this mount.
324 .IP
325 To enable this access-time setting on a mount or mount tree,
326 .B MOUNT_ATTR_NOATIME
327 must be set in
328 .I attr_set
329 and
330 .B MOUNT_ATTR__ATIME
331 must be set in the
332 .I attr_clr
333 field.
334 .TP
335 .B MOUNT_ATTR_STRICTATIME
336 Always update the last access time (atime)
337 when files are accessed on this mount.
338 .IP
339 To enable this access-time setting on a mount or mount tree,
340 .B MOUNT_ATTR_STRICTATIME
341 must be set in
342 .I attr_set
343 and
344 .B MOUNT_ATTR__ATIME
345 must be set in the
346 .I attr_clr
347 field.
348 .RE
349 .TP
350 .B MOUNT_ATTR_IDMAP
351 If set in
352 .IR attr_set ,
353 creates an ID-mapped mount.
354 The ID mapping is taken from the user namespace specified in
355 .I userns_fd
356 and attached to the mount.
357 .IP
358 Since it is not supported to
359 change the ID mapping of a mount after it has been ID mapped,
360 it is invalid to specify
361 .B MOUNT_ATTR_IDMAP
362 in
363 .IR attr_clr .
364 .IP
365 For further details, see the subsection "ID-mapped mounts" under NOTES.
366 .PP
367 The
368 .I propagation
369 field is used to specify the propagation type of the mount or mount tree.
370 This field either has the value zero,
371 meaning leave the propagation type unchanged, or it has one of
372 the following values:
373 .TP
374 .B MS_PRIVATE
375 Turn all mounts into private mounts.
376 .TP
377 .B MS_SHARED
378 Turn all mounts into shared mounts.
379 .TP
380 .B MS_SLAVE
381 Turn all mounts into dependent mounts.
382 .TP
383 .B MS_UNBINDABLE
384 Turn all mounts into unbindable mounts.
385 .PP
386 For further details on the above propagation types, see
387 .BR mount_namespaces (7).
388 .SH RETURN VALUE
389 On success,
390 .BR mount_setattr ()
391 returns zero.
392 On error,
393 \-1 is returned and
394 .I errno
395 is set to indicate the cause of the error.
396 .SH ERRORS
397 .TP
398 .B EBADF
399 .I pathname
400 is relative but
401 .I dirfd
402 is neither
403 .B AT_FDCWD
404 nor a valid file descriptor.
405 .TP
406 .B EBADF
407 .I userns_fd
408 is not a valid file descriptor.
409 .TP
410 .B EBUSY
411 The caller tried to change the mount to
412 .BR MOUNT_ATTR_RDONLY ,
413 but the mount still holds files open for writing.
414 .TP
415 .B EINVAL
416 The pathname specified via the
417 .I dirfd
418 and
419 .I pathname
420 arguments to
421 .BR mount_setattr ()
422 isn't a mount point.
423 .TP
424 .B EINVAL
425 An unsupported value was set in
426 .IR flags .
427 .TP
428 .B EINVAL
429 An unsupported value was specified in the
430 .I attr_set
431 field of
432 .IR mount_attr .
433 .TP
434 .B EINVAL
435 An unsupported value was specified in the
436 .I attr_clr
437 field of
438 .IR mount_attr .
439 .TP
440 .B EINVAL
441 An unsupported value was specified in the
442 .I propagation
443 field of
444 .IR mount_attr .
445 .TP
446 .B EINVAL
447 More than one of
448 .BR MS_SHARED ,
449 .BR MS_SLAVE ,
450 .BR MS_PRIVATE ,
451 or
452 .B MS_UNBINDABLE
453 was set in the
454 .I propagation
455 field of
456 .IR mount_attr .
457 .TP
458 .B EINVAL
459 An access-time setting was specified in the
460 .I attr_set
461 field without
462 .B MOUNT_ATTR__ATIME
463 being set in the
464 .I attr_clr
465 field.
466 .TP
467 .B EINVAL
468 .B MOUNT_ATTR_IDMAP
469 was specified in
470 .IR attr_clr .
471 .TP
472 .B EINVAL
473 A file descriptor value was specified in
474 .I userns_fd
475 which exceeds
476 .BR INT_MAX .
477 .TP
478 .B EINVAL
479 A valid file descriptor value was specified in
480 .IR userns_fd ,
481 but the file descriptor did not refer to a user namespace.
482 .TP
483 .B EINVAL
484 The underlying filesystem does not support ID-mapped mounts.
485 .TP
486 .B EINVAL
487 The mount that is to be ID mapped is not a detached mount;
488 that is, the mount has not previously been visible in a mount namespace.
489 .TP
490 .B EINVAL
491 A partial access-time setting was specified in
492 .I attr_clr
493 instead of
494 .B MOUNT_ATTR__ATIME
495 being set.
496 .TP
497 .B EINVAL
498 The mount is located outside the caller's mount namespace.
499 .TP
500 .B EINVAL
501 The underlying filesystem has been mounted in a mount namespace that is
502 owned by a noninitial user namespace
503 .TP
504 .B ENOENT
505 A pathname was empty or had a nonexistent component.
506 .TP
507 .B ENOMEM
508 When changing mount propagation to
509 .BR MS_SHARED ,
510 a new peer group ID needs to be allocated for all mounts without a peer group
511 ID set.
512 This allocation failed because there was not
513 enough memory to allocate the relevant internal structures.
514 .TP
515 .B ENOSPC
516 When changing mount propagation to
517 .BR MS_SHARED ,
518 a new peer group ID needs to be allocated for all mounts without a peer group
519 ID set.
520 This allocation failed because
521 the kernel has run out of IDs.
522 .\" Christian Bruner: i.e. someone has somehow managed to
523 .\" allocate so many peer groups and managed to keep the kernel running
524 .\" (???) that the ida has ran out of ids
525 .\" Note that technically further error codes are possible that are
526 .\" specific to the ID allocation implementation used.
527 .TP
528 .B EPERM
529 One of the mounts had at least one of
530 .BR MOUNT_ATTR_NOATIME ,
531 .BR MOUNT_ATTR_NODEV ,
532 .BR MOUNT_ATTR_NODIRATIME ,
533 .BR MOUNT_ATTR_NOEXEC ,
534 .BR MOUNT_ATTR_NOSUID ,
535 or
536 .B MOUNT_ATTR_RDONLY
537 set and the flag is locked.
538 Mount attributes become locked on a mount if:
539 .RS
540 .IP \(bu 3
541 A new mount or mount tree is created causing mount propagation across user
542 namespaces
543 (i.e., propagation to a mount namespace owned by a different user namespace).
544 The kernel will lock the aforementioned flags to prevent these sensitive
545 properties from being altered.
546 .IP \(bu
547 A new mount and user namespace pair is created.
548 This happens for example when specifying
549 .B CLONE_NEWUSER | CLONE_NEWNS
550 in
551 .BR unshare (2),
552 .BR clone (2),
553 or
554 .BR clone3 (2).
555 The aforementioned flags become locked in the new mount namespace
556 to prevent sensitive mount properties from being altered.
557 Since the newly created mount namespace will be owned by the
558 newly created user namespace,
559 a calling process that is privileged in the new
560 user namespace would\(emin the absence of such locking\(embe
561 able to alter sensitive mount properties (e.g., to remount a mount
562 that was marked read-only as read-write in the new mount namespace).
563 .RE
564 .TP
565 .B EPERM
566 A valid file descriptor value was specified in
567 .IR userns_fd ,
568 but the file descriptor refers to the initial user namespace.
569 .TP
570 .B EPERM
571 An attempt was made to add an ID mapping to a mount that is already ID mapped.
572 .TP
573 .B EPERM
574 The caller does not have
575 .B CAP_SYS_ADMIN
576 in the initial user namespace.
577 .SH VERSIONS
578 .BR mount_setattr ()
579 first appeared in Linux 5.12.
580 .\" commit 7d6beb71da3cc033649d641e1e608713b8220290
581 .\" commit 2a1867219c7b27f928e2545782b86daaf9ad50bd
582 .\" commit 9caccd41541a6f7d6279928d9f971f6642c361af
583 .SH CONFORMING TO
584 .BR mount_setattr ()
585 is Linux-specific.
586 .SH NOTES
587 .SS ID-mapped mounts
588 Creating an ID-mapped mount makes it possible to
589 change the ownership of all files located under a mount.
590 Thus, ID-mapped mounts make it possible to
591 change ownership in a temporary and localized way.
592 It is a localized change because the ownership changes are
593 visible only via a specific mount.
594 All other users and locations where the filesystem is exposed are unaffected.
595 It is a temporary change because
596 the ownership changes are tied to the lifetime of the mount.
597 .PP
598 Whenever callers interact with the filesystem through an ID-mapped mount,
599 the ID mapping of the mount will be applied to
600 user and group IDs associated with filesystem objects.
601 This encompasses the user and group IDs associated with inodes
602 and also the following
603 .BR xattr (7)
604 keys:
605 .IP \(bu 3
606 .IR security.capability ,
607 whenever filesystem capabilities
608 are stored or returned in the
609 .B VFS_CAP_REVISION_3
610 format,
611 which stores a root user ID alongside the capabilities
612 (see
613 .BR capabilities (7)).
614 .IP \(bu
615 .I system.posix_acl_access
616 and
617 .IR system.posix_acl_default ,
618 whenever user IDs or group IDs are stored in
619 .B ACL_USER
620 or
621 .B ACL_GROUP
622 entries.
623 .PP
624 The following conditions must be met in order to create an ID-mapped mount:
625 .IP \(bu 3
626 The caller must have the
627 .B CAP_SYS_ADMIN
628 capability in the initial user namespace.
629 .IP \(bu
630 The filesystem must be mounted in a mount namespace
631 that is owned by the initial user namespace.
632 .IP \(bu
633 The underlying filesystem must support ID-mapped mounts.
634 Currently, the
635 .BR xfs (5),
636 .BR ext4 (5),
637 and
638 .B FAT
639 filesystems support ID-mapped mounts
640 with more filesystems being actively worked on.
641 .IP \(bu
642 The mount must not already be ID-mapped.
643 This also implies that the ID mapping of a mount cannot be altered.
644 .IP \(bu
645 The mount must be a detached mount;
646 that is,
647 it must have been created by calling
648 .BR open_tree (2)
649 with the
650 .B OPEN_TREE_CLONE
651 flag and it must not already have been visible in a mount namespace.
652 (To put things another way:
653 the mount must not have been attached to the filesystem hierarchy
654 with a system call such as
655 .BR move_mount (2).)
656 .PP
657 ID mappings can be created for user IDs, group IDs, and project IDs.
658 An ID mapping is essentially a mapping of a range of user or group IDs into
659 another or the same range of user or group IDs.
660 ID mappings are written to map files as three numbers
661 separated by white space.
662 The first two numbers specify the starting user or group ID
663 in each of the two user namespaces.
664 The third number specifies the range of the ID mapping.
665 For example,
666 a mapping for user IDs such as "1000\ 1001\ 1" would indicate that
667 user ID 1000 in the caller's user namespace is mapped to
668 user ID 1001 in its ancestor user namespace.
669 Since the map range is 1,
670 only user ID 1000 is mapped.
671 .PP
672 It is possible to specify up to 340 ID mappings for each ID mapping type.
673 If any user IDs or group IDs are not mapped,
674 all files owned by that unmapped user or group ID will appear as
675 being owned by the overflow user ID or overflow group ID respectively.
676 .PP
677 Further details on setting up ID mappings can be found in
678 .BR user_namespaces (7).
679 .PP
680 In the common case, the user namespace passed in
681 .I userns_fd
682 (together with
683 .B MOUNT_ATTR_IDMAP
684 in
685 .IR attr_set )
686 to create an ID-mapped mount will be the user namespace of a container.
687 In other scenarios it will be a dedicated user namespace associated with
688 a user's login session as is the case for portable home directories in
689 .BR systemd-homed.service (8)).
690 It is also perfectly fine to create a dedicated user namespace
691 for the sake of ID mapping a mount.
692 .PP
693 ID-mapped mounts can be useful in the following
694 and a variety of other scenarios:
695 .IP \(bu 3
696 Sharing files or filesystems
697 between multiple users or multiple machines,
698 especially in complex scenarios.
699 For example,
700 ID-mapped mounts are used to implement portable home directories in
701 .BR systemd-homed.service (8),
702 where they allow users to move their home directory
703 to an external storage device
704 and use it on multiple computers
705 where they are assigned different user IDs and group IDs.
706 This effectively makes it possible to
707 assign random user IDs and group IDs at login time.
708 .IP \(bu
709 Sharing files or filesystems
710 from the host with unprivileged containers.
711 This allows a user to avoid having to change ownership permanently through
712 .BR chown (2).
713 .IP \(bu
714 ID mapping a container's root filesystem.
715 Users don't need to change ownership permanently through
716 .BR chown (2).
717 Especially for large root filesystems, using
718 .BR chown (2)
719 can be prohibitively expensive.
720 .IP \(bu
721 Sharing files or filesystems
722 between containers with non-overlapping ID mappings.
723 .IP \(bu
724 Implementing discretionary access (DAC) permission checking
725 for filesystems lacking a concept of ownership.
726 .IP \(bu
727 Efficiently changing ownership on a per-mount basis.
728 In contrast to
729 .BR chown (2),
730 changing ownership of large sets of files is instantaneous with
731 ID-mapped mounts.
732 This is especially useful when ownership of
733 an entire root filesystem of a virtual machine or container
734 is to be changed as mentioned above.
735 With ID-mapped mounts,
736 a single
737 .BR mount_setattr ()
738 system call will be sufficient to change the ownership of all files.
739 .IP \(bu
740 Taking the current ownership into account.
741 ID mappings specify precisely
742 what a user or group ID is supposed to be mapped to.
743 This contrasts with the
744 .BR chown (2)
745 system call which cannot by itself
746 take the current ownership of the files it changes into account.
747 It simply changes the ownership to the specified user ID and group ID.
748 .IP \(bu
749 Locally and temporarily restricted ownership changes.
750 ID-mapped mounts make it possible to change ownership locally,
751 restricting the ownership changes to specific mounts,
752 and temporarily as the ownership changes only apply as long as the mount exists.
753 By contrast,
754 changing ownership via the
755 .BR chown (2)
756 system call changes the ownership globally and permanently.
757 .\"
758 .SS Extensibility
759 In order to allow for future extensibility,
760 .BR mount_setattr ()
761 requires the user-space application to specify the size of the
762 .I mount_attr
763 structure that it is passing.
764 By providing this information, it is possible for
765 .BR mount_setattr ()
766 to provide both forwards- and backwards-compatibility, with
767 .I size
768 acting as an implicit version number.
769 (Because new extension fields will always
770 be appended, the structure size will always increase.)
771 This extensibility design is very similar to other system calls such as
772 .BR perf_setattr (2),
773 .BR perf_event_open (2),
774 .BR clone3 (2)
775 and
776 .BR openat2 (2).
777 .PP
778 Let
779 .I usize
780 be the size of the structure as specified by the user-space application,
781 and let
782 .I ksize
783 be the size of the structure which the kernel supports,
784 then there are three cases to consider:
785 .IP \(bu 3
786 If
787 .I ksize
788 equals
789 .IR usize ,
790 then there is no version mismatch and
791 .I attr
792 can be used verbatim.
793 .IP \(bu
794 If
795 .I ksize
796 is larger than
797 .IR usize ,
798 then there are some extension fields that the kernel supports
799 which the user-space application is unaware of.
800 Because a zero value in any added extension field signifies a no-op,
801 the kernel treats all of the extension fields
802 not provided by the user-space application
803 as having zero values.
804 This provides backwards-compatibility.
805 .IP \(bu
806 If
807 .I ksize
808 is smaller than
809 .IR usize ,
810 then there are some extension fields which the user-space application is aware
811 of but which the kernel does not support.
812 Because any extension field must have its zero values signify a no-op,
813 the kernel can safely ignore the unsupported extension fields
814 if they are all zero.
815 If any unsupported extension fields are non-zero,
816 then \-1 is returned and
817 .I errno
818 is set to
819 .BR E2BIG .
820 This provides forwards-compatibility.
821 .PP
822 Because the definition of
823 .I struct mount_attr
824 may change in the future
825 (with new fields being added when system headers are updated),
826 user-space applications should zero-fill
827 .I struct mount_attr
828 to ensure that recompiling the program with new headers will not result in
829 spurious errors at runtime.
830 The simplest way is to use a designated initializer:
831 .PP
832 .in +4n
833 .EX
834 struct mount_attr attr = {
835 .attr_set = MOUNT_ATTR_RDONLY,
836 .attr_clr = MOUNT_ATTR_NODEV
837 };
838 .EE
839 .in
840 .PP
841 Alternatively, the structure can be zero-filled using
842 .BR memset (3)
843 or similar functions:
844 .PP
845 .in +4n
846 .EX
847 struct mount_attr attr;
848 memset(&attr, 0, sizeof(attr));
849 attr.attr_set = MOUNT_ATTR_RDONLY;
850 attr.attr_clr = MOUNT_ATTR_NODEV;
851 .EE
852 .in
853 .PP
854 A user-space application that wishes to determine which extensions the running
855 kernel supports can do so by conducting a binary search on
856 .I size
857 with a structure which has every byte nonzero
858 (to find the largest value which doesn't produce an error of
859 .BR E2BIG ).
860 .SH EXAMPLES
861 .EX
862 /*
863 * This program allows the caller to create a new detached mount
864 * and set various properties on it.
865 */
866 #define _GNU_SOURCE
867 #include <errno.h>
868 #include <fcntl.h>
869 #include <getopt.h>
870 #include <linux/mount.h>
871 #include <linux/types.h>
872 #include <stdbool.h>
873 #include <stdio.h>
874 #include <stdlib.h>
875 #include <string.h>
876 #include <sys/syscall.h>
877 #include <unistd.h>
878
879 static inline int
880 mount_setattr(int dirfd, const char *pathname, unsigned int flags,
881 struct mount_attr *attr, size_t size)
882 {
883 return syscall(SYS_mount_setattr, dirfd, pathname, flags,
884 attr, size);
885 }
886
887 static inline int
888 open_tree(int dirfd, const char *filename, unsigned int flags)
889 {
890 return syscall(SYS_open_tree, dirfd, filename, flags);
891 }
892
893 static inline int
894 move_mount(int from_dirfd, const char *from_pathname,
895 int to_dirfd, const char *to_pathname, unsigned int flags)
896 {
897 return syscall(SYS_move_mount, from_dirfd, from_pathname,
898 to_dirfd, to_pathname, flags);
899 }
900
901 static const struct option longopts[] = {
902 {"map\-mount", required_argument, NULL, 'a'},
903 {"recursive", no_argument, NULL, 'b'},
904 {"read\-only", no_argument, NULL, 'c'},
905 {"block\-setid", no_argument, NULL, 'd'},
906 {"block\-devices", no_argument, NULL, 'e'},
907 {"block\-exec", no_argument, NULL, 'f'},
908 {"no\-access\-time", no_argument, NULL, 'g'},
909 { NULL, 0, NULL, 0 },
910 };
911
912 #define exit_log(format, ...) do \e
913 { \e
914 fprintf(stderr, format, ##__VA_ARGS__); \e
915 exit(EXIT_FAILURE); \e
916 } while (0)
917
918 int
919 main(int argc, char *argv[])
920 {
921 struct mount_attr *attr = &(struct mount_attr){};
922 int fd_userns = \-1;
923 bool recursive = false;
924 int index = 0;
925 int ret;
926
927 while ((ret = getopt_long_only(argc, argv, "",
928 longopts, &index)) != \-1) {
929 switch (ret) {
930 case 'a':
931 fd_userns = open(optarg, O_RDONLY | O_CLOEXEC);
932 if (fd_userns == \-1)
933 exit_log("%m \- Failed top open %s\en", optarg);
934 break;
935 case 'b':
936 recursive = true;
937 break;
938 case 'c':
939 attr\->attr_set |= MOUNT_ATTR_RDONLY;
940 break;
941 case 'd':
942 attr\->attr_set |= MOUNT_ATTR_NOSUID;
943 break;
944 case 'e':
945 attr\->attr_set |= MOUNT_ATTR_NODEV;
946 break;
947 case 'f':
948 attr\->attr_set |= MOUNT_ATTR_NOEXEC;
949 break;
950 case 'g':
951 attr\->attr_set |= MOUNT_ATTR_NOATIME;
952 attr\->attr_clr |= MOUNT_ATTR__ATIME;
953 break;
954 default:
955 exit_log("Invalid argument specified");
956 }
957 }
958
959 if ((argc \- optind) < 2)
960 exit_log("Missing source or target mount point\en");
961
962 const char *source = argv[optind];
963 const char *target = argv[optind + 1];
964
965 /* In the following, \-1 as the \(aqdirfd\(aq argument ensures that
966 open_tree() fails if \(aqsource\(aq is not an absolute pathname. */
967 .\" Christian Brauner
968 .\" When writing programs I like to never use relative paths with AT_FDCWD
969 .\" because. Because making assumptions about the current working directory
970 .\" of the calling process is just too easy to get wrong; especially when
971 .\" pivot_root() or chroot() are in play.
972 .\" My absolut preference (joke intended) is to open a well-known starting
973 .\" point with an absolute path to get a dirfd and then scope all future
974 .\" operations beneath that dirfd. This already works with old-style
975 .\" openat() and _very_ cautious programming but openat2() and its
976 .\" resolve-flag space have made this **chef's kiss**.
977 .\" If I can't operate based on a well-known dirfd I use absolute paths
978 .\" with a -EBADF dirfd passed to *at() functions.
979
980 int fd_tree = open_tree(\-1, source,
981 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
982 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0));
983 if (fd_tree == \-1)
984 exit_log("%m \- Failed to open %s\en", source);
985
986 if (fd_userns >= 0) {
987 attr\->attr_set |= MOUNT_ATTR_IDMAP;
988 attr\->userns_fd = fd_userns;
989 }
990
991 ret = mount_setattr(fd_tree, "",
992 AT_EMPTY_PATH | (recursive ? AT_RECURSIVE : 0),
993 attr, sizeof(struct mount_attr));
994 if (ret == \-1)
995 exit_log("%m \- Failed to change mount attributes\en");
996
997 close(fd_userns);
998
999 /* In the following, \-1 as the \(aqto_dirfd\(aq argument ensures that
1000 open_tree() fails if \(aqtarget\(aq is not an absolute pathname. */
1001
1002 ret = move_mount(fd_tree, "", \-1, target,
1003 MOVE_MOUNT_F_EMPTY_PATH);
1004 if (ret == \-1)
1005 exit_log("%m \- Failed to attach mount to %s\en", target);
1006
1007 close(fd_tree);
1008
1009 exit(EXIT_SUCCESS);
1010 }
1011 .EE
1012 .SH SEE ALSO
1013 .BR newgidmap (1),
1014 .BR newuidmap (1),
1015 .BR clone (2),
1016 .BR mount (2),
1017 .BR unshare (2),
1018 .BR proc (5),
1019 .BR capabilities (7),
1020 .BR mount_namespaces (7),
1021 .BR user_namespaces (7),
1022 .BR xattr (7)