]>
git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-patch-uid.c
2 This file is part of systemd.
4 Copyright 2016 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
21 #include <linux/magic.h>
30 #include "dirent-util.h"
33 #include "nspawn-patch-uid.h"
34 #include "stat-util.h"
35 #include "stdio-util.h"
36 #include "string-util.h"
38 #include "user-util.h"
42 static int get_acl(int fd
, const char *name
, acl_type_t type
, acl_t
*ret
) {
43 char procfs_path
[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
50 _cleanup_close_
int child_fd
= -1;
52 child_fd
= openat(fd
, name
, O_PATH
|O_CLOEXEC
|O_NOFOLLOW
);
56 xsprintf(procfs_path
, "/proc/self/fd/%i", child_fd
);
57 acl
= acl_get_file(procfs_path
, type
);
58 } else if (type
== ACL_TYPE_ACCESS
)
61 xsprintf(procfs_path
, "/proc/self/fd/%i", fd
);
62 acl
= acl_get_file(procfs_path
, type
);
71 static int set_acl(int fd
, const char *name
, acl_type_t type
, acl_t acl
) {
72 char procfs_path
[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
79 _cleanup_close_
int child_fd
= -1;
81 child_fd
= openat(fd
, name
, O_PATH
|O_CLOEXEC
|O_NOFOLLOW
);
85 xsprintf(procfs_path
, "/proc/self/fd/%i", child_fd
);
86 r
= acl_set_file(procfs_path
, type
, acl
);
87 } else if (type
== ACL_TYPE_ACCESS
)
88 r
= acl_set_fd(fd
, acl
);
90 xsprintf(procfs_path
, "/proc/self/fd/%i", fd
);
91 r
= acl_set_file(procfs_path
, type
, acl
);
99 static int shift_acl(acl_t acl
, uid_t shift
, acl_t
*ret
) {
100 _cleanup_(acl_freep
) acl_t copy
= NULL
;
107 r
= acl_get_entry(acl
, ACL_FIRST_ENTRY
, &i
);
111 uid_t
*old_uid
, new_uid
;
115 if (acl_get_tag_type(i
, &tag
) < 0)
118 if (IN_SET(tag
, ACL_USER
, ACL_GROUP
)) {
120 /* We don't distuingish here between uid_t and gid_t, let's make sure the compiler checks that
121 * this is actually OK */
122 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
124 old_uid
= acl_get_qualifier(i
);
128 new_uid
= shift
| (*old_uid
& UINT32_C(0xFFFF));
129 if (!uid_is_valid(new_uid
))
132 modify
= new_uid
!= *old_uid
;
133 if (modify
&& !copy
) {
136 /* There's no copy of the ACL yet? if so, let's create one, and start the loop from the
137 * beginning, so that we copy all entries, starting from the first, this time. */
139 n
= acl_entries(acl
);
147 /* Seek back to the beginning */
148 r
= acl_get_entry(acl
, ACL_FIRST_ENTRY
, &i
);
156 acl_entry_t new_entry
;
158 if (acl_create_entry(©
, &new_entry
) < 0)
161 if (acl_copy_entry(new_entry
, i
) < 0)
165 if (acl_set_qualifier(new_entry
, &new_uid
) < 0)
169 r
= acl_get_entry(acl
, ACL_NEXT_ENTRY
, &i
);
180 static int patch_acls(int fd
, const char *name
, const struct stat
*st
, uid_t shift
) {
181 _cleanup_(acl_freep
) acl_t acl
= NULL
, shifted
= NULL
;
182 bool changed
= false;
188 /* ACLs are not supported on symlinks, there's no point in trying */
189 if (S_ISLNK(st
->st_mode
))
192 r
= get_acl(fd
, name
, ACL_TYPE_ACCESS
, &acl
);
193 if (r
== -EOPNOTSUPP
)
198 r
= shift_acl(acl
, shift
, &shifted
);
202 r
= set_acl(fd
, name
, ACL_TYPE_ACCESS
, shifted
);
209 if (S_ISDIR(st
->st_mode
)) {
213 acl
= shifted
= NULL
;
215 r
= get_acl(fd
, name
, ACL_TYPE_DEFAULT
, &acl
);
219 r
= shift_acl(acl
, shift
, &shifted
);
223 r
= set_acl(fd
, name
, ACL_TYPE_DEFAULT
, shifted
);
236 static int patch_acls(int fd
, const char *name
, const struct stat
*st
, uid_t shift
) {
242 static int patch_fd(int fd
, const char *name
, const struct stat
*st
, uid_t shift
) {
245 bool changed
= false;
251 new_uid
= shift
| (st
->st_uid
& UINT32_C(0xFFFF));
252 new_gid
= (gid_t
) shift
| (st
->st_gid
& UINT32_C(0xFFFF));
254 if (!uid_is_valid(new_uid
) || !gid_is_valid(new_gid
))
257 if (st
->st_uid
!= new_uid
|| st
->st_gid
!= new_gid
) {
259 r
= fchownat(fd
, name
, new_uid
, new_gid
, AT_SYMLINK_NOFOLLOW
);
261 r
= fchown(fd
, new_uid
, new_gid
);
265 /* The Linux kernel alters the mode in some cases of chown(). Let's undo this. */
267 if (!S_ISLNK(st
->st_mode
))
268 r
= fchmodat(fd
, name
, st
->st_mode
, 0);
269 else /* AT_SYMLINK_NOFOLLOW is not available for fchmodat() */
272 r
= fchmod(fd
, st
->st_mode
);
279 r
= patch_acls(fd
, name
, st
, shift
);
283 return r
> 0 || changed
;
287 * Check if the filesystem is fully compatible with user namespaces or
288 * UID/GID patching. Some filesystems in this list can be fully mounted inside
289 * user namespaces, however their inodes may relate to host resources or only
290 * valid in the global user namespace, therefore no patching should be applied.
292 static int is_fs_fully_userns_compatible(int fd
) {
297 if (fstatfs(fd
, &sfs
) < 0)
300 return F_TYPE_EQUAL(sfs
.f_type
, BINFMTFS_MAGIC
) ||
301 F_TYPE_EQUAL(sfs
.f_type
, CGROUP_SUPER_MAGIC
) ||
302 F_TYPE_EQUAL(sfs
.f_type
, CGROUP2_SUPER_MAGIC
) ||
303 F_TYPE_EQUAL(sfs
.f_type
, DEBUGFS_MAGIC
) ||
304 F_TYPE_EQUAL(sfs
.f_type
, DEVPTS_SUPER_MAGIC
) ||
305 F_TYPE_EQUAL(sfs
.f_type
, EFIVARFS_MAGIC
) ||
306 F_TYPE_EQUAL(sfs
.f_type
, HUGETLBFS_MAGIC
) ||
307 F_TYPE_EQUAL(sfs
.f_type
, MQUEUE_MAGIC
) ||
308 F_TYPE_EQUAL(sfs
.f_type
, PROC_SUPER_MAGIC
) ||
309 F_TYPE_EQUAL(sfs
.f_type
, PSTOREFS_MAGIC
) ||
310 F_TYPE_EQUAL(sfs
.f_type
, SELINUX_MAGIC
) ||
311 F_TYPE_EQUAL(sfs
.f_type
, SMACK_MAGIC
) ||
312 F_TYPE_EQUAL(sfs
.f_type
, SECURITYFS_MAGIC
) ||
313 F_TYPE_EQUAL(sfs
.f_type
, BPF_FS_MAGIC
) ||
314 F_TYPE_EQUAL(sfs
.f_type
, TRACEFS_MAGIC
) ||
315 F_TYPE_EQUAL(sfs
.f_type
, SYSFS_MAGIC
);
318 static int recurse_fd(int fd
, bool donate_fd
, const struct stat
*st
, uid_t shift
, bool is_toplevel
) {
319 bool changed
= false;
324 /* We generally want to permit crossing of mount boundaries when patching the UIDs/GIDs. However, we
325 * probably shouldn't do this for /proc and /sys if that is already mounted into place. Hence, let's
326 * stop the recursion when we hit procfs, sysfs or some other special file systems. */
327 r
= is_fs_fully_userns_compatible(fd
);
331 r
= 0; /* don't recurse */
335 r
= patch_fd(fd
, NULL
, st
, shift
);
337 _cleanup_free_
char *name
= NULL
;
340 /* When we hit a ready-only subtree we simply skip it, but log about it. */
341 (void) fd_get_path(fd
, &name
);
342 log_debug("Skippping read-only file or directory %s.", strna(name
));
353 if (S_ISDIR(st
->st_mode
)) {
354 _cleanup_closedir_
DIR *d
= NULL
;
360 copy
= fcntl(fd
, F_DUPFD_CLOEXEC
, 3);
377 FOREACH_DIRENT_ALL(de
, d
, r
= -errno
; goto finish
) {
380 if (dot_or_dot_dot(de
->d_name
))
383 if (fstatat(dirfd(d
), de
->d_name
, &fst
, AT_SYMLINK_NOFOLLOW
) < 0) {
388 if (S_ISDIR(fst
.st_mode
)) {
391 subdir_fd
= openat(dirfd(d
), de
->d_name
, O_RDONLY
|O_NONBLOCK
|O_DIRECTORY
|O_CLOEXEC
|O_NOFOLLOW
|O_NOATIME
);
398 r
= recurse_fd(subdir_fd
, true, &fst
, shift
, false);
405 r
= patch_fd(dirfd(d
), de
->d_name
, &fst
, shift
);
423 static int fd_patch_uid_internal(int fd
, bool donate_fd
, uid_t shift
, uid_t range
) {
429 /* Recursively adjusts the UID/GIDs of all files of a directory tree. This is used to automatically fix up an
430 * OS tree to the used user namespace UID range. Note that this automatic adjustment only works for UID ranges
431 * following the concept that the upper 16bit of a UID identify the container, and the lower 16bit are the actual
432 * UID within the container. */
434 if ((shift
& 0xFFFF) != 0) {
435 /* We only support containers where the shift starts at a 2^16 boundary */
440 if (range
!= 0x10000) {
441 /* We only support containers with 16bit UID ranges for the patching logic */
446 if (fstat(fd
, &st
) < 0) {
451 if ((uint32_t) st
.st_uid
>> 16 != (uint32_t) st
.st_gid
>> 16) {
452 /* We only support containers where the uid/gid container ID match */
457 /* Try to detect if the range is already right. Of course, this a pretty drastic optimization, as we assume
458 * that if the top-level dir has the right upper 16bit assigned, then everything below will have too... */
459 if (((uint32_t) (st
.st_uid
^ shift
) >> 16) == 0)
462 return recurse_fd(fd
, donate_fd
, &st
, shift
, true);
471 int fd_patch_uid(int fd
, uid_t shift
, uid_t range
) {
472 return fd_patch_uid_internal(fd
, false, shift
, range
);
475 int path_patch_uid(const char *path
, uid_t shift
, uid_t range
) {
478 fd
= open(path
, O_RDONLY
|O_NONBLOCK
|O_DIRECTORY
|O_CLOEXEC
|O_NOFOLLOW
|O_NOATIME
);
482 return fd_patch_uid_internal(fd
, true, shift
, range
);