--- /dev/null
+From 11c2a8700cdcabf9b639b7204a1e38e2a0b6798e Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Mon, 17 Oct 2022 17:06:34 +0200
+Subject: attr: add in_group_or_capable()
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit 11c2a8700cdcabf9b639b7204a1e38e2a0b6798e upstream.
+
+In setattr_{copy,prepare}() we need to perform the same permission
+checks to determine whether we need to drop the setgid bit or not.
+Instead of open-coding it twice add a simple helper the encapsulates the
+logic. We will reuse this helpers to make dropping the setgid bit during
+write operations more consistent in a follow up patch.
+
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/attr.c | 10 +++++-----
+ fs/inode.c | 28 ++++++++++++++++++++++++----
+ fs/internal.h | 2 ++
+ 3 files changed, 31 insertions(+), 9 deletions(-)
+
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -18,6 +18,8 @@
+ #include <linux/evm.h>
+ #include <linux/ima.h>
+
++#include "internal.h"
++
+ /**
+ * chown_ok - verify permissions to chown inode
+ * @mnt_userns: user namespace of the mount @inode was found from
+@@ -140,8 +142,7 @@ int setattr_prepare(struct user_namespac
+ vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+
+ /* Also check the setgid bit! */
+- if (!vfsgid_in_group_p(vfsgid) &&
+- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ if (!in_group_or_capable(mnt_userns, inode, vfsgid))
+ attr->ia_mode &= ~S_ISGID;
+ }
+
+@@ -251,9 +252,8 @@ void setattr_copy(struct user_namespace
+ inode->i_ctime = attr->ia_ctime;
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+- vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+- if (!vfsgid_in_group_p(vfsgid) &&
+- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ if (!in_group_or_capable(mnt_userns, inode,
++ i_gid_into_vfsgid(mnt_userns, inode)))
+ mode &= ~S_ISGID;
+ inode->i_mode = mode;
+ }
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -2488,6 +2488,28 @@ struct timespec64 current_time(struct in
+ EXPORT_SYMBOL(current_time);
+
+ /**
++ * in_group_or_capable - check whether caller is CAP_FSETID privileged
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ * @vfsgid: the new/current vfsgid of @inode
++ *
++ * Check wether @vfsgid is in the caller's group list or if the caller is
++ * privileged with CAP_FSETID over @inode. This can be used to determine
++ * whether the setgid bit can be kept or must be dropped.
++ *
++ * Return: true if the caller is sufficiently privileged, false if not.
++ */
++bool in_group_or_capable(struct user_namespace *mnt_userns,
++ const struct inode *inode, vfsgid_t vfsgid)
++{
++ if (vfsgid_in_group_p(vfsgid))
++ return true;
++ if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
++ return true;
++ return false;
++}
++
++/**
+ * mode_strip_sgid - handle the sgid bit for non-directories
+ * @mnt_userns: User namespace of the mount the inode was created from
+ * @dir: parent directory inode
+@@ -2508,11 +2530,9 @@ umode_t mode_strip_sgid(struct user_name
+ return mode;
+ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
+ return mode;
+- if (in_group_p(i_gid_into_mnt(mnt_userns, dir)))
++ if (in_group_or_capable(mnt_userns, dir,
++ i_gid_into_vfsgid(mnt_userns, dir)))
+ return mode;
+- if (capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
+- return mode;
+-
+ return mode & ~S_ISGID;
+ }
+ EXPORT_SYMBOL(mode_strip_sgid);
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -151,6 +151,8 @@ extern int vfs_open(const struct path *,
+ */
+ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
+ extern int dentry_needs_remove_privs(struct dentry *dentry);
++bool in_group_or_capable(struct user_namespace *mnt_userns,
++ const struct inode *inode, vfsgid_t vfsgid);
+
+ /*
+ * fs-writeback.c
--- /dev/null
+From 72ae017c5451860443a16fb2a8c243bff3e396b8 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Mon, 17 Oct 2022 17:06:36 +0200
+Subject: attr: add setattr_should_drop_sgid()
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit 72ae017c5451860443a16fb2a8c243bff3e396b8 upstream.
+
+The current setgid stripping logic during write and ownership change
+operations is inconsistent and strewn over multiple places. In order to
+consolidate it and make more consistent we'll add a new helper
+setattr_should_drop_sgid(). The function retains the old behavior where
+we remove the S_ISGID bit unconditionally when S_IXGRP is set but also
+when it isn't set and the caller is neither in the group of the inode
+nor privileged over the inode.
+
+We will use this helper both in write operation permission removal such
+as file_remove_privs() as well as in ownership change operations.
+
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/attr.c | 28 ++++++++++++++++++++++++++++
+ fs/internal.h | 6 ++++++
+ 2 files changed, 34 insertions(+)
+
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -20,6 +20,34 @@
+
+ #include "internal.h"
+
++/**
++ * setattr_should_drop_sgid - determine whether the setgid bit needs to be
++ * removed
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ *
++ * This function determines whether the setgid bit needs to be removed.
++ * We retain backwards compatibility and require setgid bit to be removed
++ * unconditionally if S_IXGRP is set. Otherwise we have the exact same
++ * requirements as setattr_prepare() and setattr_copy().
++ *
++ * Return: ATTR_KILL_SGID if setgid bit needs to be removed, 0 otherwise.
++ */
++int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
++ const struct inode *inode)
++{
++ umode_t mode = inode->i_mode;
++
++ if (!(mode & S_ISGID))
++ return 0;
++ if (mode & S_IXGRP)
++ return ATTR_KILL_SGID;
++ if (!in_group_or_capable(mnt_userns, inode,
++ i_gid_into_vfsgid(mnt_userns, inode)))
++ return ATTR_KILL_SGID;
++ return 0;
++}
++
+ /*
+ * The logic we want is
+ *
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -236,3 +236,9 @@ int do_setxattr(struct user_namespace *m
+ struct xattr_ctx *ctx);
+
+ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos);
++
++/*
++ * fs/attr.c
++ */
++int setattr_should_drop_sgid(struct user_namespace *mnt_userns,
++ const struct inode *inode);
--- /dev/null
+From ed5a7047d2011cb6b2bf84ceb6680124cc6a7d95 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Mon, 17 Oct 2022 17:06:37 +0200
+Subject: attr: use consistent sgid stripping checks
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit ed5a7047d2011cb6b2bf84ceb6680124cc6a7d95 upstream.
+
+Currently setgid stripping in file_remove_privs()'s should_remove_suid()
+helper is inconsistent with other parts of the vfs. Specifically, it only
+raises ATTR_KILL_SGID if the inode is S_ISGID and S_IXGRP but not if the
+inode isn't in the caller's groups and the caller isn't privileged over the
+inode although we require this already in setattr_prepare() and
+setattr_copy() and so all filesystem implement this requirement implicitly
+because they have to use setattr_{prepare,copy}() anyway.
+
+But the inconsistency shows up in setgid stripping bugs for overlayfs in
+xfstests (e.g., generic/673, generic/683, generic/685, generic/686,
+generic/687). For example, we test whether suid and setgid stripping works
+correctly when performing various write-like operations as an unprivileged
+user (fallocate, reflink, write, etc.):
+
+echo "Test 1 - qa_user, non-exec file $verb"
+setup_testfile
+chmod a+rws $junk_file
+commit_and_check "$qa_user" "$verb" 64k 64k
+
+The test basically creates a file with 6666 permissions. While the file has
+the S_ISUID and S_ISGID bits set it does not have the S_IXGRP set. On a
+regular filesystem like xfs what will happen is:
+
+sys_fallocate()
+-> vfs_fallocate()
+ -> xfs_file_fallocate()
+ -> file_modified()
+ -> __file_remove_privs()
+ -> dentry_needs_remove_privs()
+ -> should_remove_suid()
+ -> __remove_privs()
+ newattrs.ia_valid = ATTR_FORCE | kill;
+ -> notify_change()
+ -> setattr_copy()
+
+In should_remove_suid() we can see that ATTR_KILL_SUID is raised
+unconditionally because the file in the test has S_ISUID set.
+
+But we also see that ATTR_KILL_SGID won't be set because while the file
+is S_ISGID it is not S_IXGRP (see above) which is a condition for
+ATTR_KILL_SGID being raised.
+
+So by the time we call notify_change() we have attr->ia_valid set to
+ATTR_KILL_SUID | ATTR_FORCE. Now notify_change() sees that
+ATTR_KILL_SUID is set and does:
+
+ia_valid = attr->ia_valid |= ATTR_MODE
+attr->ia_mode = (inode->i_mode & ~S_ISUID);
+
+which means that when we call setattr_copy() later we will definitely
+update inode->i_mode. Note that attr->ia_mode still contains S_ISGID.
+
+Now we call into the filesystem's ->setattr() inode operation which will
+end up calling setattr_copy(). Since ATTR_MODE is set we will hit:
+
+if (ia_valid & ATTR_MODE) {
+ umode_t mode = attr->ia_mode;
+ vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode);
+ if (!vfsgid_in_group_p(vfsgid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ mode &= ~S_ISGID;
+ inode->i_mode = mode;
+}
+
+and since the caller in the test is neither capable nor in the group of the
+inode the S_ISGID bit is stripped.
+
+But assume the file isn't suid then ATTR_KILL_SUID won't be raised which
+has the consequence that neither the setgid nor the suid bits are stripped
+even though it should be stripped because the inode isn't in the caller's
+groups and the caller isn't privileged over the inode.
+
+If overlayfs is in the mix things become a bit more complicated and the bug
+shows up more clearly. When e.g., ovl_setattr() is hit from
+ovl_fallocate()'s call to file_remove_privs() then ATTR_KILL_SUID and
+ATTR_KILL_SGID might be raised but because the check in notify_change() is
+questioning the ATTR_KILL_SGID flag again by requiring S_IXGRP for it to be
+stripped the S_ISGID bit isn't removed even though it should be stripped:
+
+sys_fallocate()
+-> vfs_fallocate()
+ -> ovl_fallocate()
+ -> file_remove_privs()
+ -> dentry_needs_remove_privs()
+ -> should_remove_suid()
+ -> __remove_privs()
+ newattrs.ia_valid = ATTR_FORCE | kill;
+ -> notify_change()
+ -> ovl_setattr()
+ // TAKE ON MOUNTER'S CREDS
+ -> ovl_do_notify_change()
+ -> notify_change()
+ // GIVE UP MOUNTER'S CREDS
+ // TAKE ON MOUNTER'S CREDS
+ -> vfs_fallocate()
+ -> xfs_file_fallocate()
+ -> file_modified()
+ -> __file_remove_privs()
+ -> dentry_needs_remove_privs()
+ -> should_remove_suid()
+ -> __remove_privs()
+ newattrs.ia_valid = attr_force | kill;
+ -> notify_change()
+
+The fix for all of this is to make file_remove_privs()'s
+should_remove_suid() helper to perform the same checks as we already
+require in setattr_prepare() and setattr_copy() and have notify_change()
+not pointlessly requiring S_IXGRP again. It doesn't make any sense in the
+first place because the caller must calculate the flags via
+should_remove_suid() anyway which would raise ATTR_KILL_SGID.
+
+While we're at it we move should_remove_suid() from inode.c to attr.c
+where it belongs with the rest of the iattr helpers. Especially since it
+returns ATTR_KILL_S{G,U}ID flags. We also rename it to
+setattr_should_drop_suidgid() to better reflect that it indicates both
+setuid and setgid bit removal and also that it returns attr flags.
+
+Running xfstests with this doesn't report any regressions. We should really
+try and use consistent checks.
+
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/trace/ftrace.rst | 2 +-
+ fs/attr.c | 33 +++++++++++++++++++--------------
+ fs/fuse/file.c | 2 +-
+ fs/inode.c | 7 ++++---
+ fs/internal.h | 2 +-
+ fs/ocfs2/file.c | 4 ++--
+ fs/open.c | 8 ++++----
+ include/linux/fs.h | 2 +-
+ 8 files changed, 33 insertions(+), 27 deletions(-)
+
+--- a/Documentation/trace/ftrace.rst
++++ b/Documentation/trace/ftrace.rst
+@@ -2940,7 +2940,7 @@ Produces::
+ bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement
+ bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action
+ bash-1994 [000] .... 4342.324899: do_truncate <-do_last
+- bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate
++ bash-1994 [000] .... 4342.324899: setattr_should_drop_suidgid <-do_truncate
+ bash-1994 [000] .... 4342.324899: notify_change <-do_truncate
+ bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change
+ bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -48,34 +48,39 @@ int setattr_should_drop_sgid(struct user
+ return 0;
+ }
+
+-/*
+- * The logic we want is
++/**
++ * setattr_should_drop_suidgid - determine whether the set{g,u}id bit needs to
++ * be dropped
++ * @mnt_userns: user namespace of the mount @inode was found from
++ * @inode: inode to check
++ *
++ * This function determines whether the set{g,u}id bits need to be removed.
++ * If the setuid bit needs to be removed ATTR_KILL_SUID is returned. If the
++ * setgid bit needs to be removed ATTR_KILL_SGID is returned. If both
++ * set{g,u}id bits need to be removed the corresponding mask of both flags is
++ * returned.
+ *
+- * if suid or (sgid and xgrp)
+- * remove privs
++ * Return: A mask of ATTR_KILL_S{G,U}ID indicating which - if any - setid bits
++ * to remove, 0 otherwise.
+ */
+-int should_remove_suid(struct dentry *dentry)
++int setattr_should_drop_suidgid(struct user_namespace *mnt_userns,
++ struct inode *inode)
+ {
+- umode_t mode = d_inode(dentry)->i_mode;
++ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+- /*
+- * sgid without any exec bits is just a mandatory locking mark; leave
+- * it alone. If some exec bits are set, it's a real sgid; kill it.
+- */
+- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+- kill |= ATTR_KILL_SGID;
++ kill |= setattr_should_drop_sgid(mnt_userns, inode);
+
+ if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+ }
+-EXPORT_SYMBOL(should_remove_suid);
++EXPORT_SYMBOL(setattr_should_drop_suidgid);
+
+ /**
+ * chown_ok - verify permissions to chown inode
+@@ -432,7 +437,7 @@ int notify_change(struct user_namespace
+ }
+ }
+ if (ia_valid & ATTR_KILL_SGID) {
+- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
++ if (mode & S_ISGID) {
+ if (!(ia_valid & ATTR_MODE)) {
+ ia_valid = attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = inode->i_mode;
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1313,7 +1313,7 @@ static ssize_t fuse_cache_write_iter(str
+ return err;
+
+ if (fc->handle_killpriv_v2 &&
+- should_remove_suid(file_dentry(file))) {
++ setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ goto writethrough;
+ }
+
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1953,7 +1953,8 @@ EXPORT_SYMBOL(touch_atime);
+ * response to write or truncate. Return 0 if nothing has to be changed.
+ * Negative value on error (change should be denied).
+ */
+-int dentry_needs_remove_privs(struct dentry *dentry)
++int dentry_needs_remove_privs(struct user_namespace *mnt_userns,
++ struct dentry *dentry)
+ {
+ struct inode *inode = d_inode(dentry);
+ int mask = 0;
+@@ -1962,7 +1963,7 @@ int dentry_needs_remove_privs(struct den
+ if (IS_NOSEC(inode))
+ return 0;
+
+- mask = should_remove_suid(dentry);
++ mask = setattr_should_drop_suidgid(mnt_userns, inode);
+ ret = security_inode_need_killpriv(dentry);
+ if (ret < 0)
+ return ret;
+@@ -1994,7 +1995,7 @@ static int __file_remove_privs(struct fi
+ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
+ return 0;
+
+- kill = dentry_needs_remove_privs(dentry);
++ kill = dentry_needs_remove_privs(file_mnt_user_ns(file), dentry);
+ if (kill < 0)
+ return kill;
+
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -150,7 +150,7 @@ extern int vfs_open(const struct path *,
+ * inode.c
+ */
+ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
+-extern int dentry_needs_remove_privs(struct dentry *dentry);
++int dentry_needs_remove_privs(struct user_namespace *, struct dentry *dentry);
+ bool in_group_or_capable(struct user_namespace *mnt_userns,
+ const struct inode *inode, vfsgid_t vfsgid);
+
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1991,7 +1991,7 @@ static int __ocfs2_change_file_space(str
+ }
+ }
+
+- if (file && should_remove_suid(file->f_path.dentry)) {
++ if (file && setattr_should_drop_suidgid(&init_user_ns, file_inode(file))) {
+ ret = __ocfs2_write_remove_suid(inode, di_bh);
+ if (ret) {
+ mlog_errno(ret);
+@@ -2279,7 +2279,7 @@ static int ocfs2_prepare_inode_for_write
+ * inode. There's also the dinode i_size state which
+ * can be lost via setattr during extending writes (we
+ * set inode->i_size at the end of a write. */
+- if (should_remove_suid(dentry)) {
++ if (setattr_should_drop_suidgid(&init_user_ns, inode)) {
+ if (meta_level == 0) {
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -54,7 +54,7 @@ int do_truncate(struct user_namespace *m
+ }
+
+ /* Remove suid, sgid, and file capabilities on truncate too */
+- ret = dentry_needs_remove_privs(dentry);
++ ret = dentry_needs_remove_privs(mnt_userns, dentry);
+ if (ret < 0)
+ return ret;
+ if (ret)
+@@ -723,10 +723,10 @@ retry_deleg:
+ return -EINVAL;
+ if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
+ return -EINVAL;
+- if (!S_ISDIR(inode->i_mode))
+- newattrs.ia_valid |=
+- ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ inode_lock(inode);
++ if (!S_ISDIR(inode->i_mode))
++ newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
++ setattr_should_drop_sgid(mnt_userns, inode);
+ /* Continue to send actual fs values, not the mount values. */
+ error = security_path_chown(
+ path,
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3118,7 +3118,7 @@ extern void __destroy_inode(struct inode
+ extern struct inode *new_inode_pseudo(struct super_block *sb);
+ extern struct inode *new_inode(struct super_block *sb);
+ extern void free_inode_nonrcu(struct inode *inode);
+-extern int should_remove_suid(struct dentry *);
++extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *);
+ extern int file_remove_privs(struct file *);
+
+ /*
--- /dev/null
+From 1fe4850b34ab512ff911e2c035c75fb6438f7307 Mon Sep 17 00:00:00 2001
+From: Martin KaFai Lau <martin.lau@kernel.org>
+Date: Thu, 16 Feb 2023 16:41:48 -0800
+Subject: bpf: bpf_fib_lookup should not return neigh in NUD_FAILED state
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+commit 1fe4850b34ab512ff911e2c035c75fb6438f7307 upstream.
+
+The bpf_fib_lookup() helper does not only look up the fib (ie. route)
+but it also looks up the neigh. Before returning the neigh, the helper
+does not check for NUD_VALID. When a neigh state (neigh->nud_state)
+is in NUD_FAILED, its dmac (neigh->ha) could be all zeros. The helper
+still returns SUCCESS instead of NO_NEIGH in this case. Because of the
+SUCCESS return value, the bpf prog directly uses the returned dmac
+and ends up filling all zero in the eth header.
+
+This patch checks for NUD_VALID and returns NO_NEIGH if the neigh is
+not valid.
+
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20230217004150.2980689-3-martin.lau@linux.dev
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -5807,7 +5807,7 @@ static int bpf_ipv4_fib_lookup(struct ne
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+ }
+
+- if (!neigh)
++ if (!neigh || !(neigh->nud_state & NUD_VALID))
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+@@ -5922,7 +5922,7 @@ static int bpf_ipv6_fib_lookup(struct ne
+ * not needed here.
+ */
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
+- if (!neigh)
++ if (!neigh || !(neigh->nud_state & NUD_VALID))
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
+
+ return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
--- /dev/null
+From ead08b95fa50f40618c72b93a849c4ae30c9cd50 Mon Sep 17 00:00:00 2001
+From: Stylon Wang <stylon.wang@amd.com>
+Date: Thu, 10 Nov 2022 21:53:01 +0800
+Subject: drm/amd/display: Fix race condition in DPIA AUX transfer
+
+From: Stylon Wang <stylon.wang@amd.com>
+
+commit ead08b95fa50f40618c72b93a849c4ae30c9cd50 upstream.
+
+[Why]
+This fix was intended for improving on coding style but in the process
+uncovers a race condition, which explains why we are getting incorrect
+length in DPIA AUX replies. Due to the call path of DPIA AUX going from
+DC back to DM layer then again into DC and the added complexities on top
+of current DC AUX implementation, a proper fix to rely on current dc_lock
+to address the race condition is difficult without a major overhual
+on how DPIA AUX is implemented.
+
+[How]
+- Add a mutex dpia_aux_lock to protect DPIA AUX transfers
+- Remove DMUB_ASYNC_TO_SYNC_ACCESS_* codes and rely solely on
+ aux_return_code_type for error reporting and handling
+- Separate SET_CONFIG from DPIA AUX transfer because they have quiet
+ different processing logic
+- Remove unnecessary type casting to and from void * type
+
+Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
+Acked-by: Jasdeep Dhillon <jdhillon@amd.com>
+Signed-off-by: Stylon Wang <stylon.wang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 147 ++++++--------
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 17 +
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 10
+ 3 files changed, 89 insertions(+), 85 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -147,14 +147,6 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
+ /* Number of bytes in PSP footer for firmware. */
+ #define PSP_FOOTER_BYTES 0x100
+
+-/*
+- * DMUB Async to Sync Mechanism Status
+- */
+-#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
+-#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
+-#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
+-#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4
+-
+ /**
+ * DOC: overview
+ *
+@@ -1456,6 +1448,7 @@ static int amdgpu_dm_init(struct amdgpu_
+ memset(&init_params, 0, sizeof(init_params));
+ #endif
+
++ mutex_init(&adev->dm.dpia_aux_lock);
+ mutex_init(&adev->dm.dc_lock);
+ mutex_init(&adev->dm.audio_lock);
+ spin_lock_init(&adev->dm.vblank_lock);
+@@ -1814,6 +1807,7 @@ static void amdgpu_dm_fini(struct amdgpu
+
+ mutex_destroy(&adev->dm.audio_lock);
+ mutex_destroy(&adev->dm.dc_lock);
++ mutex_destroy(&adev->dm.dpia_aux_lock);
+
+ return;
+ }
+@@ -10198,91 +10192,92 @@ uint32_t dm_read_reg_func(const struct d
+ return value;
+ }
+
+-static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
+- struct dc_context *ctx,
+- uint8_t status_type,
+- uint32_t *operation_result)
++int amdgpu_dm_process_dmub_aux_transfer_sync(
++ struct dc_context *ctx,
++ unsigned int link_index,
++ struct aux_payload *payload,
++ enum aux_return_code_type *operation_result)
+ {
+ struct amdgpu_device *adev = ctx->driver_context;
+- int return_status = -1;
+ struct dmub_notification *p_notify = adev->dm.dmub_notify;
++ int ret = -1;
+
+- if (is_cmd_aux) {
+- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
+- return_status = p_notify->aux_reply.length;
+- *operation_result = p_notify->result;
+- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
+- *operation_result = AUX_RET_ERROR_TIMEOUT;
+- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
+- *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+- } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) {
+- *operation_result = AUX_RET_ERROR_INVALID_REPLY;
+- } else {
+- *operation_result = AUX_RET_ERROR_UNKNOWN;
++ mutex_lock(&adev->dm.dpia_aux_lock);
++ if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
++ *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
++ goto out;
++ }
++
++ if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
++ DRM_ERROR("wait_for_completion_timeout timeout!");
++ *operation_result = AUX_RET_ERROR_TIMEOUT;
++ goto out;
++ }
++
++ if (p_notify->result != AUX_RET_SUCCESS) {
++ /*
++ * Transient states before tunneling is enabled could
++ * lead to this error. We can ignore this for now.
++ */
++ if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
++ DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
++ payload->address, payload->length,
++ p_notify->result);
+ }
+- } else {
+- if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
+- return_status = 0;
+- *operation_result = p_notify->sc_status;
+- } else {
+- *operation_result = SET_CONFIG_UNKNOWN_ERROR;
++ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
++ goto out;
++ }
++
++
++ payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
++ if (!payload->write && p_notify->aux_reply.length &&
++ (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
++
++ if (payload->length != p_notify->aux_reply.length) {
++ DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
++ p_notify->aux_reply.length,
++ payload->address, payload->length);
++ *operation_result = AUX_RET_ERROR_INVALID_REPLY;
++ goto out;
+ }
++
++ memcpy(payload->data, p_notify->aux_reply.data,
++ p_notify->aux_reply.length);
+ }
+
+- return return_status;
++ /* success */
++ ret = p_notify->aux_reply.length;
++ *operation_result = p_notify->result;
++out:
++ mutex_unlock(&adev->dm.dpia_aux_lock);
++ return ret;
+ }
+
+-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
+- unsigned int link_index, void *cmd_payload, void *operation_result)
++int amdgpu_dm_process_dmub_set_config_sync(
++ struct dc_context *ctx,
++ unsigned int link_index,
++ struct set_config_cmd_payload *payload,
++ enum set_config_status *operation_result)
+ {
+ struct amdgpu_device *adev = ctx->driver_context;
+- int ret = 0;
++ bool is_cmd_complete;
++ int ret;
+
+- if (is_cmd_aux) {
+- dc_process_dmub_aux_transfer_async(ctx->dc,
+- link_index, (struct aux_payload *)cmd_payload);
+- } else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
+- (struct set_config_cmd_payload *)cmd_payload,
+- adev->dm.dmub_notify)) {
+- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
+- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
+- (uint32_t *)operation_result);
+- }
++ mutex_lock(&adev->dm.dpia_aux_lock);
++ is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
++ link_index, payload, adev->dm.dmub_notify);
+
+- ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
+- if (ret == 0) {
++ if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
++ ret = 0;
++ *operation_result = adev->dm.dmub_notify->sc_status;
++ } else {
+ DRM_ERROR("wait_for_completion_timeout timeout!");
+- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
+- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
+- (uint32_t *)operation_result);
+- }
+-
+- if (is_cmd_aux) {
+- if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
+- struct aux_payload *payload = (struct aux_payload *)cmd_payload;
+-
+- payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
+- if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
+- payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
+-
+- if (payload->length != adev->dm.dmub_notify->aux_reply.length) {
+- DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n",
+- payload->address, payload->length,
+- adev->dm.dmub_notify->aux_reply.length);
+- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx,
+- DMUB_ASYNC_TO_SYNC_ACCESS_INVALID,
+- (uint32_t *)operation_result);
+- }
+-
+- memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
+- adev->dm.dmub_notify->aux_reply.length);
+- }
+- }
++ ret = -1;
++ *operation_result = SET_CONFIG_UNKNOWN_ERROR;
+ }
+
+- return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
+- ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
+- (uint32_t *)operation_result);
++ mutex_unlock(&adev->dm.dpia_aux_lock);
++ return ret;
+ }
+
+ /*
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+@@ -59,7 +59,9 @@
+ #include "signal_types.h"
+ #include "amdgpu_dm_crc.h"
+ struct aux_payload;
++struct set_config_cmd_payload;
+ enum aux_return_code_type;
++enum set_config_status;
+
+ /* Forward declarations */
+ struct amdgpu_device;
+@@ -549,6 +551,13 @@ struct amdgpu_display_manager {
+ * occurred on certain intel platform
+ */
+ bool aux_hpd_discon_quirk;
++
++ /**
++ * @dpia_aux_lock:
++ *
++ * Guards access to DPIA AUX
++ */
++ struct mutex dpia_aux_lock;
+ };
+
+ enum dsc_clock_force_state {
+@@ -792,9 +801,11 @@ void amdgpu_dm_update_connector_after_de
+
+ extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
+
+-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
+- struct dc_context *ctx, unsigned int link_index,
+- void *payload, void *operation_result);
++int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
++ struct aux_payload *payload, enum aux_return_code_type *operation_result);
++
++int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
++ struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
+
+ bool check_seamless_boot_capability(struct amdgpu_device *adev);
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+@@ -844,9 +844,8 @@ int dm_helper_dmub_aux_transfer_sync(
+ struct aux_payload *payload,
+ enum aux_return_code_type *operation_result)
+ {
+- return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
+- link->link_index, (void *)payload,
+- (void *)operation_result);
++ return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
++ operation_result);
+ }
+
+ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
+@@ -854,9 +853,8 @@ int dm_helpers_dmub_set_config_sync(stru
+ struct set_config_cmd_payload *payload,
+ enum set_config_status *operation_result)
+ {
+- return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
+- link->link_index, (void *)payload,
+- (void *)operation_result);
++ return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
++ operation_result);
+ }
+
+ void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
--- /dev/null
+From e383b12709e32d6494c948422070c2464b637e44 Mon Sep 17 00:00:00 2001
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Date: Mon, 23 Jan 2023 14:59:28 -0500
+Subject: drm/amd/display: Move DCN314 DOMAIN power control to DMCUB
+
+From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+
+commit e383b12709e32d6494c948422070c2464b637e44 upstream.
+
+[Why]
+DOMAIN power gating control is now required to be done via firmware
+due to interlock with other power features. This is to avoid
+intermittent issues in the LB memories.
+
+[How]
+If the firmware supports the command then use the new firmware as
+the sequence can avoid potential display corruption issues.
+
+The command will be ignored on firmware that does not support DOMAIN
+power control and the pipes will remain always on - frequent PG cycling
+can cause the issue to occur on the old sequence, so we should avoid it.
+
+Reviewed-by: Hansen Dsouza <hansen.dsouza@amd.com>
+Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
+Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <Mario.Limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c | 24 ++++++++++++++++++
+ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h | 2 +
+ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c | 2 -
+ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 25 +++++++++++++++++++
+ 4 files changed, 52 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+@@ -391,3 +391,27 @@ void dcn314_set_pixels_per_cycle(struct
+ pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc,
+ pix_per_cycle);
+ }
++
++void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on)
++{
++ struct dc_context *ctx = hws->ctx;
++ union dmub_rb_cmd cmd;
++
++ if (hws->ctx->dc->debug.disable_hubp_power_gate)
++ return;
++
++ PERF_TRACE();
++
++ memset(&cmd, 0, sizeof(cmd));
++ cmd.domain_control.header.type = DMUB_CMD__VBIOS;
++ cmd.domain_control.header.sub_type = DMUB_CMD__VBIOS_DOMAIN_CONTROL;
++ cmd.domain_control.header.payload_bytes = sizeof(cmd.domain_control.data);
++ cmd.domain_control.data.inst = hubp_inst;
++ cmd.domain_control.data.power_gate = !power_on;
++
++ dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd);
++ dc_dmub_srv_cmd_execute(ctx->dmub_srv);
++ dc_dmub_srv_wait_idle(ctx->dmub_srv);
++
++ PERF_TRACE();
++}
+--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
++++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h
+@@ -41,4 +41,6 @@ unsigned int dcn314_calculate_dccg_k1_k2
+
+ void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx);
+
++void dcn314_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on);
++
+ #endif /* __DC_HWSS_DCN314_H__ */
+--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
++++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+@@ -137,7 +137,7 @@ static const struct hwseq_private_funcs
+ .plane_atomic_disable = dcn20_plane_atomic_disable,
+ .plane_atomic_power_down = dcn10_plane_atomic_power_down,
+ .enable_power_gating_plane = dcn314_enable_power_gating_plane,
+- .hubp_pg_control = dcn31_hubp_pg_control,
++ .hubp_pg_control = dcn314_hubp_pg_control,
+ .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree,
+ .update_odm = dcn314_update_odm,
+ .dsc_pg_control = dcn314_dsc_pg_control,
+--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
++++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+@@ -450,6 +450,10 @@ enum dmub_cmd_vbios_type {
+ * Query DP alt status on a transmitter.
+ */
+ DMUB_CMD__VBIOS_TRANSMITTER_QUERY_DP_ALT = 26,
++ /**
++ * Controls domain power gating
++ */
++ DMUB_CMD__VBIOS_DOMAIN_CONTROL = 28,
+ };
+
+ //==============================================================================
+@@ -1192,6 +1196,23 @@ struct dmub_rb_cmd_dig1_transmitter_cont
+ };
+
+ /**
++ * struct dmub_rb_cmd_domain_control_data - Data for DOMAIN power control
++ */
++struct dmub_rb_cmd_domain_control_data {
++ uint8_t inst : 6; /**< DOMAIN instance to control */
++ uint8_t power_gate : 1; /**< 1=power gate, 0=power up */
++ uint8_t reserved[3]; /**< Reserved for future use */
++};
++
++/**
++ * struct dmub_rb_cmd_domain_control - Controls DOMAIN power gating
++ */
++struct dmub_rb_cmd_domain_control {
++ struct dmub_cmd_header header; /**< header */
++ struct dmub_rb_cmd_domain_control_data data; /**< payload */
++};
++
++/**
+ * DPIA tunnel command parameters.
+ */
+ struct dmub_cmd_dig_dpia_control_data {
+@@ -3188,6 +3209,10 @@ union dmub_rb_cmd {
+ */
+ struct dmub_rb_cmd_dig1_transmitter_control dig1_transmitter_control;
+ /**
++ * Definition of a DMUB_CMD__VBIOS_DOMAIN_CONTROL command.
++ */
++ struct dmub_rb_cmd_domain_control domain_control;
++ /**
+ * Definition of a DMUB_CMD__PSR_SET_VERSION command.
+ */
+ struct dmub_rb_cmd_psr_set_version psr_set_version;
--- /dev/null
+From 0cf8307adbc6beb5ff3b8a76afedc6e4e0b536a9 Mon Sep 17 00:00:00 2001
+From: Stylon Wang <stylon.wang@amd.com>
+Date: Tue, 10 Jan 2023 14:38:33 +0800
+Subject: drm/amd/display: Properly reuse completion structure
+
+From: Stylon Wang <stylon.wang@amd.com>
+
+commit 0cf8307adbc6beb5ff3b8a76afedc6e4e0b536a9 upstream.
+
+[Why]
+Connecting displays to TBT3 docks often produces invalid
+replies for DPIA AUX requests. It turns out the completion
+structure was not re-initialized before reusing it, resulting
+in immature wake up to completion.
+
+[How]
+Properly call reinit_completion() on reused completion structure.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Solomon Chiu <solomon.chiu@amd.com>
+Acked-by: Alan Liu <HaoPing.Liu@amd.com>
+Signed-off-by: Stylon Wang <stylon.wang@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: "Limonciello, Mario" <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+@@ -10249,6 +10249,7 @@ int amdgpu_dm_process_dmub_aux_transfer_
+ ret = p_notify->aux_reply.length;
+ *operation_result = p_notify->result;
+ out:
++ reinit_completion(&adev->dm.dmub_aux_transfer_done);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
+ }
+@@ -10276,6 +10277,8 @@ int amdgpu_dm_process_dmub_set_config_sy
+ *operation_result = SET_CONFIG_UNKNOWN_ERROR;
+ }
+
++ if (!is_cmd_complete)
++ reinit_completion(&adev->dm.dmub_aux_transfer_done);
+ mutex_unlock(&adev->dm.dpia_aux_lock);
+ return ret;
+ }
--- /dev/null
+From e243e3f94c804ecca9a8241b5babe28f35258ef4 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Mon, 17 Oct 2022 17:06:35 +0200
+Subject: fs: move should_remove_suid()
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit e243e3f94c804ecca9a8241b5babe28f35258ef4 upstream.
+
+Move the helper from inode.c to attr.c. This keeps the the core of the
+set{g,u}id stripping logic in one place when we add follow-up changes.
+It is the better place anyway, since should_remove_suid() returns
+ATTR_KILL_S{G,U}ID flags.
+
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/attr.c | 29 +++++++++++++++++++++++++++++
+ fs/inode.c | 29 -----------------------------
+ 2 files changed, 29 insertions(+), 29 deletions(-)
+
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -20,6 +20,35 @@
+
+ #include "internal.h"
+
++/*
++ * The logic we want is
++ *
++ * if suid or (sgid and xgrp)
++ * remove privs
++ */
++int should_remove_suid(struct dentry *dentry)
++{
++ umode_t mode = d_inode(dentry)->i_mode;
++ int kill = 0;
++
++ /* suid always must be killed */
++ if (unlikely(mode & S_ISUID))
++ kill = ATTR_KILL_SUID;
++
++ /*
++ * sgid without any exec bits is just a mandatory locking mark; leave
++ * it alone. If some exec bits are set, it's a real sgid; kill it.
++ */
++ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
++ kill |= ATTR_KILL_SGID;
++
++ if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
++ return kill;
++
++ return 0;
++}
++EXPORT_SYMBOL(should_remove_suid);
++
+ /**
+ * chown_ok - verify permissions to chown inode
+ * @mnt_userns: user namespace of the mount @inode was found from
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1949,35 +1949,6 @@ skip_update:
+ EXPORT_SYMBOL(touch_atime);
+
+ /*
+- * The logic we want is
+- *
+- * if suid or (sgid and xgrp)
+- * remove privs
+- */
+-int should_remove_suid(struct dentry *dentry)
+-{
+- umode_t mode = d_inode(dentry)->i_mode;
+- int kill = 0;
+-
+- /* suid always must be killed */
+- if (unlikely(mode & S_ISUID))
+- kill = ATTR_KILL_SUID;
+-
+- /*
+- * sgid without any exec bits is just a mandatory locking mark; leave
+- * it alone. If some exec bits are set, it's a real sgid; kill it.
+- */
+- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+- kill |= ATTR_KILL_SGID;
+-
+- if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
+- return kill;
+-
+- return 0;
+-}
+-EXPORT_SYMBOL(should_remove_suid);
+-
+-/*
+ * Return mask of changes for notify_change() that need to be done as a
+ * response to write or truncate. Return 0 if nothing has to be changed.
+ * Negative value on error (change should be denied).
--- /dev/null
+From 8d84e39d76bd83474b26cb44f4b338635676e7e8 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Tue, 22 Nov 2022 11:40:32 +0100
+Subject: fs: use consistent setgid checks in is_sxid()
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit 8d84e39d76bd83474b26cb44f4b338635676e7e8 upstream.
+
+Now that we made the VFS setgid checking consistent an inode can't be
+marked security irrelevant even if the setgid bit is still set. Make
+this function consistent with all other helpers.
+
+Note that enforcing consistent setgid stripping checks for file
+modification and mode- and ownership changes will cause the setgid bit
+to be lost in more cases than useed to be the case. If an unprivileged
+user wrote to a non-executable setgid file that they don't have
+privilege over the setgid bit will be dropped. This will lead to
+temporary failures in some xfstests until they have been updated.
+
+Reported-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/fs.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -3549,7 +3549,7 @@ int __init list_bdev_fs_names(char *buf,
+
+ static inline bool is_sxid(umode_t mode)
+ {
+- return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
++ return mode & (S_ISUID | S_ISGID);
+ }
+
+ static inline int check_sticky(struct user_namespace *mnt_userns,
--- /dev/null
+From 62ec33b44e0f7168ff2886520fec6fb62d03b5a3 Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Thu, 9 Feb 2023 16:22:02 -0800
+Subject: net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 62ec33b44e0f7168ff2886520fec6fb62d03b5a3 upstream.
+
+Christoph Paasch reported that commit b5fc29233d28 ("inet6: Remove
+inet6_destroy_sock() in sk->sk_prot->destroy().") started triggering
+WARN_ON_ONCE(sk->sk_forward_alloc) in sk_stream_kill_queues(). [0 - 2]
+Also, we can reproduce it by a program in [3].
+
+In the commit, we delay freeing ipv6_pinfo.pktoptions from sk->destroy()
+to sk->sk_destruct(), so sk->sk_forward_alloc is no longer zero in
+inet_csk_destroy_sock().
+
+The same check has been in inet_sock_destruct() from at least v2.6,
+we can just remove the WARN_ON_ONCE(). However, among the users of
+sk_stream_kill_queues(), only CAIF is not calling inet_sock_destruct().
+Thus, we add the same WARN_ON_ONCE() to caif_sock_destructor().
+
+[0]: https://lore.kernel.org/netdev/39725AB4-88F1-41B3-B07F-949C5CAEFF4F@icloud.com/
+[1]: https://github.com/multipath-tcp/mptcp_net-next/issues/341
+[2]:
+WARNING: CPU: 0 PID: 3232 at net/core/stream.c:212 sk_stream_kill_queues+0x2f9/0x3e0
+Modules linked in:
+CPU: 0 PID: 3232 Comm: syz-executor.0 Not tainted 6.2.0-rc5ab24eb4698afbe147b424149c529e2a43ec24eb5 #2
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+RIP: 0010:sk_stream_kill_queues+0x2f9/0x3e0
+Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ec 00 00 00 8b ab 08 01 00 00 e9 60 ff ff ff e8 d0 5f b6 fe 0f 0b eb 97 e8 c7 5f b6 fe <0f> 0b eb a0 e8 be 5f b6 fe 0f 0b e9 6a fe ff ff e8 02 07 e3 fe e9
+RSP: 0018:ffff88810570fc68 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: ffff888101f38f40 RSI: ffffffff8285e529 RDI: 0000000000000005
+RBP: 0000000000000ce0 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000ce0 R11: 0000000000000001 R12: ffff8881009e9488
+R13: ffffffff84af2cc0 R14: 0000000000000000 R15: ffff8881009e9458
+FS: 00007f7fdfbd5800(0000) GS:ffff88811b600000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000001b32923000 CR3: 00000001062fc006 CR4: 0000000000170ef0
+Call Trace:
+ <TASK>
+ inet_csk_destroy_sock+0x1a1/0x320
+ __tcp_close+0xab6/0xe90
+ tcp_close+0x30/0xc0
+ inet_release+0xe9/0x1f0
+ inet6_release+0x4c/0x70
+ __sock_release+0xd2/0x280
+ sock_close+0x15/0x20
+ __fput+0x252/0xa20
+ task_work_run+0x169/0x250
+ exit_to_user_mode_prepare+0x113/0x120
+ syscall_exit_to_user_mode+0x1d/0x40
+ do_syscall_64+0x48/0x90
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7f7fdf7ae28d
+Code: c1 20 00 00 75 10 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee fb ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 37 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01
+RSP: 002b:00000000007dfbb0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
+RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7fdf7ae28d
+RDX: 0000000000000000 RSI: ffffffffffffffff RDI: 0000000000000003
+RBP: 0000000000000000 R08: 000000007f338e0f R09: 0000000000000e0f
+R10: 000000007f338e13 R11: 0000000000000293 R12: 00007f7fdefff000
+R13: 00007f7fdefffcd8 R14: 00007f7fdefffce0 R15: 00007f7fdefffcd8
+ </TASK>
+
+[3]: https://lore.kernel.org/netdev/20230208004245.83497-1-kuniyu@amazon.com/
+
+Fixes: b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reported-by: Christoph Paasch <christophpaasch@icloud.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/caif/caif_socket.c | 1 +
+ net/core/stream.c | 1 -
+ 2 files changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/caif/caif_socket.c
++++ b/net/caif/caif_socket.c
+@@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct
+ return;
+ }
+ sk_stream_kill_queues(&cf_sk->sk);
++ WARN_ON_ONCE(sk->sk_forward_alloc);
+ caif_free_client(&cf_sk->layer);
+ }
+
+--- a/net/core/stream.c
++++ b/net/core/stream.c
+@@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *
+ sk_mem_reclaim_final(sk);
+
+ WARN_ON_ONCE(sk->sk_wmem_queued);
+- WARN_ON_ONCE(sk->sk_forward_alloc);
+
+ /* It is _impossible_ for the backlog to contain anything
+ * when we get here. All user references to this socket
x86-cpu-add-lunar-lake-m.patch
drm-amd-display-disable-s-g-display-on-dcn-3.1.2-3.patch
pm-sleep-avoid-using-pr_cont-in-the-tasks-freezing-code.patch
+bpf-bpf_fib_lookup-should-not-return-neigh-in-nud_failed-state.patch
+net-remove-warn_on_once-sk-sk_forward_alloc-from-sk_stream_kill_queues.patch
+vc_screen-don-t-clobber-return-value-in-vcs_read.patch
+drm-amd-display-move-dcn314-domain-power-control-to-dmcub.patch
+drm-amd-display-fix-race-condition-in-dpia-aux-transfer.patch
+drm-amd-display-properly-reuse-completion-structure.patch
+attr-add-in_group_or_capable.patch
+fs-move-should_remove_suid.patch
+attr-add-setattr_should_drop_sgid.patch
+attr-use-consistent-sgid-stripping-checks.patch
+fs-use-consistent-setgid-checks-in-is_sxid.patch
--- /dev/null
+From ae3419fbac845b4d3f3a9fae4cc80c68d82cdf6e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
+Date: Mon, 20 Feb 2023 06:46:12 +0000
+Subject: vc_screen: don't clobber return value in vcs_read
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Weißschuh <linux@weissschuh.net>
+
+commit ae3419fbac845b4d3f3a9fae4cc80c68d82cdf6e upstream.
+
+Commit 226fae124b2d ("vc_screen: move load of struct vc_data pointer in
+vcs_read() to avoid UAF") moved the call to vcs_vc() into the loop.
+
+While doing this it also moved the unconditional assignment of
+
+ ret = -ENXIO;
+
+This unconditional assignment was valid outside the loop but within it
+it clobbers the actual value of ret.
+
+To avoid this only assign "ret = -ENXIO" when actually needed.
+
+[ Also, the 'goto unlock_out" needs to be just a "break", so that it
+ does the right thing when it exits on later iterations when partial
+ success has happened - Linus ]
+
+Reported-by: Storm Dragon <stormdragon2976@gmail.com>
+Link: https://lore.kernel.org/lkml/Y%2FKS6vdql2pIsCiI@hotmail.com/
+Fixes: 226fae124b2d ("vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF")
+Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
+Link: https://lore.kernel.org/lkml/64981d94-d00c-4b31-9063-43ad0a384bde@t-8ch.de/
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/tty/vt/vc_screen.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/tty/vt/vc_screen.c
++++ b/drivers/tty/vt/vc_screen.c
+@@ -403,10 +403,11 @@ vcs_read(struct file *file, char __user
+ unsigned int this_round, skip = 0;
+ int size;
+
+- ret = -ENXIO;
+ vc = vcs_vc(inode, &viewed);
+- if (!vc)
+- goto unlock_out;
++ if (!vc) {
++ ret = -ENXIO;
++ break;
++ }
+
+ /* Check whether we are above size each round,
+ * as copy_to_user at the end of this loop