From: Dorjoy Chowdhury Date: Sat, 16 May 2026 14:42:39 +0000 (+0200) Subject: openat2: new OPENAT2_REGULAR flag support X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8b82cacad92ebae9619872a5a69c570eba30140b;p=thirdparty%2Flinux.git openat2: new OPENAT2_REGULAR flag support This flag indicates the path should be opened if it's a regular file. This is useful to write secure programs that want to avoid being tricked into opening device nodes with special semantics while thinking they operate on regular files. This is a requested feature from the uapi-group[1]. The previously introduced EFTYPE error code is returned when the path doesn't refer to a regular file. For example, if openat2 is called on path /dev/null with OPENAT2_REGULAR in the flag param, it will return -EFTYPE. When used in combination with O_CREAT, either the regular file is created, or if the path already exists, it is opened if it's a regular file. Otherwise, -EFTYPE is returned. When OPENAT2_REGULAR is combined with O_DIRECTORY, -EINVAL is returned as it doesn't make sense to open a path that is both a directory and a regular file. The UAPI bit lives in the upper 32 bits of open_how::flags (((__u64)1 << 32)) so that open(2) and openat(2) -- whose @flags argument is a C int -- cannot physically express it. This is a structural guarantee, not a runtime mask: the bit is unrepresentable in 32 bits. Because the rest of the VFS open path narrows to 32 bits in several places (op->open_flag, f->f_flags, the unsigned open_flag argument of i_op->atomic_open()), build_open_flags() translates OPENAT2_REGULAR into a kernel-internal lower-32-bit carrier __O_REGULAR (bit 4, unused as an O_* on every architecture) before the assignment to op->open_flag. __O_REGULAR then rides through the existing channels exactly like __FMODE_EXEC. do_dentry_open() strips it so it cannot leak back to userspace via fcntl(F_GETFL). Four BUILD_BUG_ON_MSG() invariants in build_open_flags() prevent any future bit collision or accidental low-32 redefinition: - VALID_OPEN_FLAGS fits in 32 bits. - OPENAT2_REGULAR lives in the upper 32 bits. - OPENAT2_REGULAR does not alias any open()/openat() flag. - __O_REGULAR does not alias any user-visible flag. [1]: https://uapi-group.org/kernel-features/#ability-to-only-open-regular-files Christian Brauner says: Move OPENAT2_REGULAR to the upper 32 bits of open_how::flags with a kernel-internal __O_REGULAR carrier so that open(2)/openat(2) cannot encode the flag; add BUILD_BUG_ON_MSG() invariants and register __O_REGULAR in the fcntl_init() allocation-uniqueness BUILD_BUG_ON() (bit count 21 -> 22). Signed-off-by: Dorjoy Chowdhury Link: https://patch.msgid.link/20260328172314.45807-2-dorjoychy111@gmail.com Reviewed-by: Jeff Layton Reviewed-by: Aleksa Sarai Signed-off-by: Christian Brauner (Amutable) --- diff --git a/fs/ceph/file.c b/fs/ceph/file.c index d54d71669176b..0ad42e1cc3058 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -996,6 +996,10 @@ retry: ceph_init_inode_acls(newino, &as_ctx); file->f_mode |= FMODE_CREATED; } + if ((flags & __O_REGULAR) && !d_is_reg(dentry)) { + err = -EFTYPE; + goto out_req; + } err = finish_open(file, dentry, ceph_open); } out_req: diff --git a/fs/fcntl.c b/fs/fcntl.c index 7d2165855a9c5..b3ea135b74d8b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1169,10 +1169,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(22 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | - __FMODE_EXEC)); + __FMODE_EXEC | __O_REGULAR)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e9bf4879c07f7..e9895dea0da49 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -738,6 +738,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl); error = PTR_ERR(inode); if (!IS_ERR(inode)) { + if (file && (file->f_flags & __O_REGULAR) && + !S_ISREG(inode->i_mode)) { + iput(inode); + inode = NULL; + error = -EFTYPE; + goto fail_gunlock; + } if (S_ISDIR(inode->i_mode)) { iput(inode); inode = NULL; diff --git a/fs/namei.c b/fs/namei.c index c7fac83c9a85e..e1fe0f28b9236 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4679,6 +4679,10 @@ static int do_open(struct nameidata *nd, if (unlikely(error)) return error; } + + if ((open_flag & __O_REGULAR) && !d_is_reg(nd->path.dentry)) + return -EFTYPE; + if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) return -ENOTDIR; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e9ce1883288c5..1b9c368fb1338 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2194,6 +2194,10 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, break; case -EISDIR: case -ENOTDIR: + if (open_flags & __O_REGULAR) { + err = -EFTYPE; + break; + } goto no_open; case -ELOOP: if (!(open_flags & O_NOFOLLOW)) diff --git a/fs/open.c b/fs/open.c index 9e0164a8c1fbe..5458668a68e11 100644 --- a/fs/open.c +++ b/fs/open.c @@ -960,7 +960,7 @@ static int do_dentry_open(struct file *f, if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO) f->f_mode |= FMODE_CAN_ODIRECT; - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | __O_REGULAR); f->f_iocb_flags = iocb_flags(f); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); @@ -1184,7 +1184,15 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) int acc_mode = ACC_MODE(flags); BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS), - "struct open_flags doesn't yet handle flags > 32 bits"); + "VALID_OPEN_FLAGS must fit in 32 bits"); + /* The whole point: OPENAT2_REGULAR must be unrepresentable in int. */ + BUILD_BUG_ON_MSG(!upper_32_bits(OPENAT2_REGULAR), + "OPENAT2_REGULAR must live in the upper 32 bits of open_how::flags"); + /* Prevent a future bit collision between UAPI and internal carrier. */ + BUILD_BUG_ON_MSG(OPENAT2_REGULAR & VALID_OPEN_FLAGS, + "OPENAT2_REGULAR must not alias any open()/openat() flag"); + BUILD_BUG_ON_MSG(__O_REGULAR & VALID_OPENAT2_FLAGS, + "__O_REGULAR must not alias any user-visible flag"); /* * Strip flags that aren't relevant in determining struct open_flags. @@ -1196,7 +1204,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) * values before calling build_open_flags(), but openat2(2) checks all * of its arguments. */ - if (flags & ~VALID_OPEN_FLAGS) + if (flags & ~VALID_OPENAT2_FLAGS) return -EINVAL; if (how->resolve & ~VALID_RESOLVE_FLAGS) return -EINVAL; @@ -1236,6 +1244,14 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) if (!(acc_mode & MAY_WRITE)) return -EINVAL; } + /* + * Asking to open a directory and a regular file at the same time is + * contradictory. + */ + if ((flags & (O_DIRECTORY | OPENAT2_REGULAR)) == + (O_DIRECTORY | OPENAT2_REGULAR)) + return -EINVAL; + if (flags & O_PATH) { /* O_PATH only permits certain other flags to be set. */ if (flags & ~O_PATH_FLAGS) @@ -1252,6 +1268,19 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) if (flags & __O_SYNC) flags |= O_DSYNC; + /* + * Translate the upper-32-bit UAPI bit OPENAT2_REGULAR into the + * kernel-internal lower-32-bit __O_REGULAR carrier so the bit + * survives the assignment to op->open_flag (an int) below and the + * subsequent flow through f->f_flags (unsigned int) and the + * i_op->atomic_open() callback (unsigned). do_dentry_open() strips + * __O_REGULAR before the file becomes visible to userspace. + */ + if (flags & OPENAT2_REGULAR) { + flags &= ~OPENAT2_REGULAR; + flags |= __O_REGULAR; + } + op->open_flag = flags; /* O_TRUNC implies we need access checks for write permissions */ diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index e4295a5b55b34..88a4a1787ff04 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -241,6 +241,12 @@ static int __cifs_do_create(struct inode *dir, struct dentry *direntry, goto cifs_create_get_file_info; } + if ((oflags & __O_REGULAR) && !S_ISREG(newinode->i_mode)) { + CIFSSMBClose(xid, tcon, fid->netfid); + iput(newinode); + return -EFTYPE; + } + if (S_ISDIR(newinode->i_mode)) { CIFSSMBClose(xid, tcon, fid->netfid); iput(newinode); @@ -458,9 +464,15 @@ cifs_create_set_dentry: goto out_err; } - if (newinode && S_ISDIR(newinode->i_mode)) { - rc = -EISDIR; - goto out_err; + if (newinode) { + if ((oflags & __O_REGULAR) && !S_ISREG(newinode->i_mode)) { + rc = -EFTYPE; + goto out_err; + } + if (S_ISDIR(newinode->i_mode)) { + rc = -EISDIR; + goto out_err; + } } *inode = newinode; diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index c65c5c73d362f..6ad6b9e7a226a 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -4,6 +4,7 @@ #include #include +#include /* List of all valid flags for the open/openat flags argument: */ #define VALID_OPEN_FLAGS \ @@ -12,6 +13,23 @@ FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_EMPTYPATH) +/* List of all valid flags for openat2(2)'s how->flags argument. */ +#define VALID_OPENAT2_FLAGS (VALID_OPEN_FLAGS | OPENAT2_REGULAR) + +/* + * Kernel-internal carrier for OPENAT2_REGULAR. The UAPI bit lives in the + * upper 32 bits of open_how::flags so open()/openat() cannot encode it. + * build_open_flags() translates it to this internal flag, which then + * propagates through op->open_flag and f->f_flags exactly like __FMODE_EXEC. + * do_dentry_open() strips it so userspace cannot observe it via + * fcntl(F_GETFL). + * + * Bit 30 is not claimed by any O_* flag on any architecture and stays clear + * of the sign bit of the int op->open_flag. fcntl_init() enforces that it + * never aliases an open-flag bit. + */ +#define __O_REGULAR (1 << 30) + /* List of all valid flags for the how->resolve argument: */ #define VALID_RESOLVE_FLAGS \ (RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \ diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h index a5feb76049487..575c2c59d14a9 100644 --- a/include/uapi/linux/openat2.h +++ b/include/uapi/linux/openat2.h @@ -22,6 +22,13 @@ struct open_how { __u64 resolve; }; +/* + * how->flags bits exclusive to openat2(2). These live in the upper 32 bits + * of @flags so that they cannot be expressed by open(2) / openat(2), whose + * @flags argument is a C int. + */ +#define OPENAT2_REGULAR ((__u64)1 << 32) /* Only open regular files. */ + /* how->resolve flags for openat2(2). */ #define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */