--- /dev/null
+From 1affc01fdc6035189a5ab2a24948c9419ee0ecf2 Mon Sep 17 00:00:00 2001
+From: Edwin Peer <edwin.peer@broadcom.com>
+Date: Sun, 12 Sep 2021 12:34:48 -0400
+Subject: bnxt_en: make bnxt_free_skbs() safe to call after bnxt_free_mem()
+
+From: Edwin Peer <edwin.peer@broadcom.com>
+
+commit 1affc01fdc6035189a5ab2a24948c9419ee0ecf2 upstream.
+
+The call to bnxt_free_mem(..., false) in the bnxt_half_open_nic() error
+path will deallocate ring descriptor memory via bnxt_free_?x_rings(),
+but because irq_re_init is false, the ring info itself is not freed.
+
+To simplify error paths, deallocation functions have generally been
+written to be safe when called on unallocated memory. It should always
+be safe to call dev_close(), which calls bnxt_free_skbs() a second time,
+even in this semi- allocated ring state.
+
+Calling bnxt_free_skbs() a second time with the rings already freed will
+cause NULL pointer dereference. Fix it by checking the rings are valid
+before proceeding in bnxt_free_tx_skbs() and
+bnxt_free_one_rx_ring_skbs().
+
+Fixes: 975bc99a4a39 ("bnxt_en: Refactor bnxt_free_rx_skbs().")
+Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2680,6 +2680,9 @@ static void bnxt_free_tx_skbs(struct bnx
+ struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
+ int j;
+
++ if (!txr->tx_buf_ring)
++ continue;
++
+ for (j = 0; j < max_idx;) {
+ struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j];
+ struct sk_buff *skb;
+@@ -2764,6 +2767,9 @@ static void bnxt_free_one_rx_ring_skbs(s
+ }
+
+ skip_rx_tpa_free:
++ if (!rxr->rx_buf_ring)
++ goto skip_rx_buf_free;
++
+ for (i = 0; i < max_idx; i++) {
+ struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+ dma_addr_t mapping = rx_buf->mapping;
+@@ -2786,6 +2792,11 @@ skip_rx_tpa_free:
+ kfree(data);
+ }
+ }
++
++skip_rx_buf_free:
++ if (!rxr->rx_agg_ring)
++ goto skip_rx_agg_free;
++
+ for (i = 0; i < max_agg_idx; i++) {
+ struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i];
+ struct page *page = rx_agg_buf->page;
+@@ -2802,6 +2813,8 @@ skip_rx_tpa_free:
+
+ __free_page(page);
+ }
++
++skip_rx_agg_free:
+ if (rxr->rx_page) {
+ __free_page(rxr->rx_page);
+ rxr->rx_page = NULL;
--- /dev/null
+From 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 14 Sep 2021 01:07:57 +0200
+Subject: bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa upstream.
+
+Fix cgroup v1 interference when non-root cgroup v2 BPF programs are used.
+Back in the days, commit bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+embedded per-socket cgroup information into sock->sk_cgrp_data and in order
+to save 8 bytes in struct sock made both mutually exclusive, that is, when
+cgroup v1 socket tagging (e.g. net_cls/net_prio) is used, then cgroup v2
+falls back to the root cgroup in sock_cgroup_ptr() (&cgrp_dfl_root.cgrp).
+
+The assumption made was "there is no reason to mix the two and this is in line
+with how legacy and v2 compatibility is handled" as stated in bd1060a1d671.
+However, with Kubernetes more widely supporting cgroups v2 as well nowadays,
+this assumption no longer holds, and the possibility of the v1/v2 mixed mode
+with the v2 root fallback being hit becomes a real security issue.
+
+Many of the cgroup v2 BPF programs are also used for policy enforcement, just
+to pick _one_ example, that is, to programmatically deny socket related system
+calls like connect(2) or bind(2). A v2 root fallback would implicitly cause
+a policy bypass for the affected Pods.
+
+In production environments, we have recently seen this case due to various
+circumstances: i) a different 3rd party agent and/or ii) a container runtime
+such as [0] in the user's environment configuring legacy cgroup v1 net_cls
+tags, which triggered implicitly mentioned root fallback. Another case is
+Kubernetes projects like kind [1] which create Kubernetes nodes in a container
+and also add cgroup namespaces to the mix, meaning programs which are attached
+to the cgroup v2 root of the cgroup namespace get attached to a non-root
+cgroup v2 path from init namespace point of view. And the latter's root is
+out of reach for agents on a kind Kubernetes node to configure. Meaning, any
+entity on the node setting cgroup v1 net_cls tag will trigger the bypass
+despite cgroup v2 BPF programs attached to the namespace root.
+
+Generally, this mutual exclusiveness does not hold anymore in today's user
+environments and makes cgroup v2 usage from BPF side fragile and unreliable.
+This fix adds proper struct cgroup pointer for the cgroup v2 case to struct
+sock_cgroup_data in order to address these issues; this implicitly also fixes
+the tradeoffs being made back then with regards to races and refcount leaks
+as stated in bd1060a1d671, and removes the fallback, so that cgroup v2 BPF
+programs always operate as expected.
+
+ [0] https://github.com/nestybox/sysbox/
+ [1] https://kind.sigs.k8s.io/
+
+Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/bpf/20210913230759.2313-1-daniel@iogearbox.net
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cgroup-defs.h | 107 ++++++++++---------------------------------
+ include/linux/cgroup.h | 22 --------
+ kernel/cgroup/cgroup.c | 50 ++++----------------
+ net/core/netclassid_cgroup.c | 7 --
+ net/core/netprio_cgroup.c | 10 ----
+ 5 files changed, 41 insertions(+), 155 deletions(-)
+
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_ch
+ * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
+ * per-socket cgroup information except for memcg association.
+ *
+- * On legacy hierarchies, net_prio and net_cls controllers directly set
+- * attributes on each sock which can then be tested by the network layer.
+- * On the default hierarchy, each sock is associated with the cgroup it was
+- * created in and the networking layer can match the cgroup directly.
+- *
+- * To avoid carrying all three cgroup related fields separately in sock,
+- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
+- * On boot, sock_cgroup_data records the cgroup that the sock was created
+- * in so that cgroup2 matches can be made; however, once either net_prio or
+- * net_cls starts being used, the area is overridden to carry prioidx and/or
+- * classid. The two modes are distinguished by whether the lowest bit is
+- * set. Clear bit indicates cgroup pointer while set bit prioidx and
+- * classid.
+- *
+- * While userland may start using net_prio or net_cls at any time, once
+- * either is used, cgroup2 matching no longer works. There is no reason to
+- * mix the two and this is in line with how legacy and v2 compatibility is
+- * handled. On mode switch, cgroup references which are already being
+- * pointed to by socks may be leaked. While this can be remedied by adding
+- * synchronization around sock_cgroup_data, given that the number of leaked
+- * cgroups is bound and highly unlikely to be high, this seems to be the
+- * better trade-off.
++ * On legacy hierarchies, net_prio and net_cls controllers directly
++ * set attributes on each sock which can then be tested by the network
++ * layer. On the default hierarchy, each sock is associated with the
++ * cgroup it was created in and the networking layer can match the
++ * cgroup directly.
+ */
+ struct sock_cgroup_data {
+- union {
+-#ifdef __LITTLE_ENDIAN
+- struct {
+- u8 is_data : 1;
+- u8 no_refcnt : 1;
+- u8 unused : 6;
+- u8 padding;
+- u16 prioidx;
+- u32 classid;
+- } __packed;
+-#else
+- struct {
+- u32 classid;
+- u16 prioidx;
+- u8 padding;
+- u8 unused : 6;
+- u8 no_refcnt : 1;
+- u8 is_data : 1;
+- } __packed;
++ struct cgroup *cgroup; /* v2 */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ u32 classid; /* v1 */
++#endif
++#ifdef CONFIG_CGROUP_NET_PRIO
++ u16 prioidx; /* v1 */
+ #endif
+- u64 val;
+- };
+ };
+
+-/*
+- * There's a theoretical window where the following accessors race with
+- * updaters and return part of the previous pointer as the prioidx or
+- * classid. Such races are short-lived and the result isn't critical.
+- */
+ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
+ {
+- /* fallback to 1 which is always the ID of the root cgroup */
+- return (skcd->is_data & 1) ? skcd->prioidx : 1;
++#ifdef CONFIG_CGROUP_NET_PRIO
++ return READ_ONCE(skcd->prioidx);
++#else
++ return 1;
++#endif
+ }
+
+ static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
+ {
+- /* fallback to 0 which is the unconfigured default classid */
+- return (skcd->is_data & 1) ? skcd->classid : 0;
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ return READ_ONCE(skcd->classid);
++#else
++ return 0;
++#endif
+ }
+
+-/*
+- * If invoked concurrently, the updaters may clobber each other. The
+- * caller is responsible for synchronization.
+- */
+ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
+ u16 prioidx)
+ {
+- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+- if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
+- return;
+-
+- if (!(skcd_buf.is_data & 1)) {
+- skcd_buf.val = 0;
+- skcd_buf.is_data = 1;
+- }
+-
+- skcd_buf.prioidx = prioidx;
+- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_PRIO
++ WRITE_ONCE(skcd->prioidx, prioidx);
++#endif
+ }
+
+ static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
+ u32 classid)
+ {
+- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+- if (sock_cgroup_classid(&skcd_buf) == classid)
+- return;
+-
+- if (!(skcd_buf.is_data & 1)) {
+- skcd_buf.val = 0;
+- skcd_buf.is_data = 1;
+- }
+-
+- skcd_buf.classid = classid;
+- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ WRITE_ONCE(skcd->classid, classid);
++#endif
+ }
+
+ #else /* CONFIG_SOCK_CGROUP_DATA */
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -829,33 +829,13 @@ static inline void cgroup_account_cputim
+ */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-extern spinlock_t cgroup_sk_update_lock;
+-#endif
+-
+-void cgroup_sk_alloc_disable(void);
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd);
+ void cgroup_sk_free(struct sock_cgroup_data *skcd);
+
+ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
+ {
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+- unsigned long v;
+-
+- /*
+- * @skcd->val is 64bit but the following is safe on 32bit too as we
+- * just need the lower ulong to be written and read atomically.
+- */
+- v = READ_ONCE(skcd->val);
+-
+- if (v & 3)
+- return &cgrp_dfl_root.cgrp;
+-
+- return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
+-#else
+- return (struct cgroup *)(unsigned long)skcd->val;
+-#endif
++ return skcd->cgroup;
+ }
+
+ #else /* CONFIG_CGROUP_DATA */
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6559,74 +6559,44 @@ int cgroup_parse_float(const char *input
+ */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-
+-DEFINE_SPINLOCK(cgroup_sk_update_lock);
+-static bool cgroup_sk_alloc_disabled __read_mostly;
+-
+-void cgroup_sk_alloc_disable(void)
+-{
+- if (cgroup_sk_alloc_disabled)
+- return;
+- pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
+- cgroup_sk_alloc_disabled = true;
+-}
+-
+-#else
+-
+-#define cgroup_sk_alloc_disabled false
+-
+-#endif
+-
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ {
+- if (cgroup_sk_alloc_disabled) {
+- skcd->no_refcnt = 1;
+- return;
+- }
+-
+ /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ if (in_interrupt())
+ return;
+
+ rcu_read_lock();
+-
+ while (true) {
+ struct css_set *cset;
+
+ cset = task_css_set(current);
+ if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+- skcd->val = (unsigned long)cset->dfl_cgrp;
++ skcd->cgroup = cset->dfl_cgrp;
+ cgroup_bpf_get(cset->dfl_cgrp);
+ break;
+ }
+ cpu_relax();
+ }
+-
+ rcu_read_unlock();
+ }
+
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
+ {
+- if (skcd->val) {
+- if (skcd->no_refcnt)
+- return;
+- /*
+- * We might be cloning a socket which is left in an empty
+- * cgroup and the cgroup might have already been rmdir'd.
+- * Don't use cgroup_get_live().
+- */
+- cgroup_get(sock_cgroup_ptr(skcd));
+- cgroup_bpf_get(sock_cgroup_ptr(skcd));
+- }
++ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
++
++ /*
++ * We might be cloning a socket which is left in an empty
++ * cgroup and the cgroup might have already been rmdir'd.
++ * Don't use cgroup_get_live().
++ */
++ cgroup_get(cgrp);
++ cgroup_bpf_get(cgrp);
+ }
+
+ void cgroup_sk_free(struct sock_cgroup_data *skcd)
+ {
+ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+- if (skcd->no_refcnt)
+- return;
+ cgroup_bpf_put(cgrp);
+ cgroup_put(cgrp);
+ }
+--- a/net/core/netclassid_cgroup.c
++++ b/net/core/netclassid_cgroup.c
+@@ -71,11 +71,8 @@ static int update_classid_sock(const voi
+ struct update_classid_context *ctx = (void *)v;
+ struct socket *sock = sock_from_file(file);
+
+- if (sock) {
+- spin_lock(&cgroup_sk_update_lock);
++ if (sock)
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
+- spin_unlock(&cgroup_sk_update_lock);
+- }
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_s
+ struct css_task_iter it;
+ struct task_struct *p;
+
+- cgroup_sk_alloc_disable();
+-
+ cs->classid = (u32)value;
+
+ css_task_iter_start(css, 0, &it);
+--- a/net/core/netprio_cgroup.c
++++ b/net/core/netprio_cgroup.c
+@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kern
+ if (!dev)
+ return -ENODEV;
+
+- cgroup_sk_alloc_disable();
+-
+ rtnl_lock();
+
+ ret = netprio_set_prio(of_css(of), dev, prio);
+@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kern
+ static int update_netprio(const void *v, struct file *file, unsigned n)
+ {
+ struct socket *sock = sock_from_file(file);
+- if (sock) {
+- spin_lock(&cgroup_sk_update_lock);
++
++ if (sock)
+ sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+- spin_unlock(&cgroup_sk_update_lock);
+- }
+ return 0;
+ }
+
+@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgrou
+ struct task_struct *p;
+ struct cgroup_subsys_state *css;
+
+- cgroup_sk_alloc_disable();
+-
+ cgroup_taskset_for_each(p, css, tset) {
+ void *v = (void *)(unsigned long)css->id;
+
--- /dev/null
+From 55c21d57eafb7b379bb7b3e93baf9ca2695895b0 Mon Sep 17 00:00:00 2001
+From: David Heidelberg <david@ixit.cz>
+Date: Sun, 12 Sep 2021 18:51:20 +0200
+Subject: dt-bindings: arm: Fix Toradex compatible typo
+
+From: David Heidelberg <david@ixit.cz>
+
+commit 55c21d57eafb7b379bb7b3e93baf9ca2695895b0 upstream.
+
+Fix board compatible typo reported by dtbs_check.
+
+Fixes: f4d1577e9bc6 ("dt-bindings: arm: Convert Tegra board/soc bindings to json-schema")
+Signed-off-by: David Heidelberg <david@ixit.cz>
+Link: https://lore.kernel.org/r/20210912165120.188490-1-david@ixit.cz
+Signed-off-by: Rob Herring <robh@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/devicetree/bindings/arm/tegra.yaml | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Documentation/devicetree/bindings/arm/tegra.yaml
++++ b/Documentation/devicetree/bindings/arm/tegra.yaml
+@@ -54,7 +54,7 @@ properties:
+ - const: toradex,apalis_t30
+ - const: nvidia,tegra30
+ - items:
+- - const: toradex,apalis_t30-eval-v1.1
++ - const: toradex,apalis_t30-v1.1-eval
+ - const: toradex,apalis_t30-eval
+ - const: toradex,apalis_t30-v1.1
+ - const: toradex,apalis_t30
--- /dev/null
+From 273c29e944bda9a20a30c26cfc34c9a3f363280b Mon Sep 17 00:00:00 2001
+From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
+Date: Wed, 8 Sep 2021 09:58:20 -0700
+Subject: ibmvnic: check failover_pending in login response
+
+From: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
+
+commit 273c29e944bda9a20a30c26cfc34c9a3f363280b upstream.
+
+If a failover occurs before a login response is received, the login
+response buffer maybe undefined. Check that there was no failover
+before accessing the login response buffer.
+
+Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
+Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -4700,6 +4700,14 @@ static int handle_login_rsp(union ibmvni
+ return 0;
+ }
+
++ if (adapter->failover_pending) {
++ adapter->init_done_rc = -EAGAIN;
++ netdev_dbg(netdev, "Failover pending, ignoring login response\n");
++ complete(&adapter->init_done);
++ /* login response buffer will be released on reset */
++ return 0;
++ }
++
+ netdev->mtu = adapter->req_mtu - ETH_HLEN;
+
+ netdev_dbg(adapter->netdev, "Login Response Buffer:\n");
--- /dev/null
+From 267cdfa21385d78c794768233678756e32b39ead Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 8 Sep 2021 20:17:18 +1000
+Subject: KVM: PPC: Book3S HV: Tolerate treclaim. in fake-suspend mode changing registers
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 267cdfa21385d78c794768233678756e32b39ead upstream.
+
+POWER9 DD2.2 and 2.3 hardware implements a "fake-suspend" mode where
+certain TM instructions executed in HV=0 mode cause softpatch interrupts
+so the hypervisor can emulate them and prevent problematic processor
+conditions. In this fake-suspend mode, the treclaim. instruction does
+not modify registers.
+
+Unfortunately the rfscv instruction executed by the guest do not
+generate softpatch interrupts, which can cause the hypervisor to lose
+track of the fake-suspend mode, and it can execute this treclaim. while
+not in fake-suspend mode. This modifies GPRs and crashes the hypervisor.
+
+It's not trivial to disable scv in the guest with HFSCR now, because
+they assume a POWER9 has scv available. So this fix saves and restores
+checkpointed registers across the treclaim.
+
+Fixes: 7854f7545bff ("KVM: PPC: Book3S: Rework TM save/restore code and make it C-callable")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210908101718.118522-2-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 36 ++++++++++++++++++++++++++++++--
+ 1 file changed, 34 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
++++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+@@ -2578,7 +2578,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_A
+ /* The following code handles the fake_suspend = 1 case */
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+- stdu r1, -PPC_MIN_STKFRM(r1)
++ stdu r1, -TM_FRAME_SIZE(r1)
+
+ /* Turn on TM. */
+ mfmsr r8
+@@ -2593,10 +2593,42 @@ BEGIN_FTR_SECTION
+ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+ nop
+
++ /*
++ * It's possible that treclaim. may modify registers, if we have lost
++ * track of fake-suspend state in the guest due to it using rfscv.
++ * Save and restore registers in case this occurs.
++ */
++ mfspr r3, SPRN_DSCR
++ mfspr r4, SPRN_XER
++ mfspr r5, SPRN_AMR
++ /* SPRN_TAR would need to be saved here if the kernel ever used it */
++ mfcr r12
++ SAVE_NVGPRS(r1)
++ SAVE_GPR(2, r1)
++ SAVE_GPR(3, r1)
++ SAVE_GPR(4, r1)
++ SAVE_GPR(5, r1)
++ stw r12, 8(r1)
++ std r1, HSTATE_HOST_R1(r13)
++
+ /* We have to treclaim here because that's the only way to do S->N */
+ li r3, TM_CAUSE_KVM_RESCHED
+ TRECLAIM(R3)
+
++ GET_PACA(r13)
++ ld r1, HSTATE_HOST_R1(r13)
++ REST_GPR(2, r1)
++ REST_GPR(3, r1)
++ REST_GPR(4, r1)
++ REST_GPR(5, r1)
++ lwz r12, 8(r1)
++ REST_NVGPRS(r1)
++ mtspr SPRN_DSCR, r3
++ mtspr SPRN_XER, r4
++ mtspr SPRN_AMR, r5
++ mtcr r12
++ HMT_MEDIUM
++
+ /*
+ * We were in fake suspend, so we are not going to save the
+ * register state as the guest checkpointed state (since
+@@ -2624,7 +2656,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+
+- addi r1, r1, PPC_MIN_STKFRM
++ addi r1, r1, TM_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
--- /dev/null
+From 1dc839ec09d3ab2a4156dc98328b8bc3586f2b70 Mon Sep 17 00:00:00 2001
+From: Yufeng Mo <moyufeng@huawei.com>
+Date: Mon, 13 Sep 2021 21:08:22 +0800
+Subject: net: hns3: change affinity_mask to numa node range
+
+From: Yufeng Mo <moyufeng@huawei.com>
+
+commit 1dc839ec09d3ab2a4156dc98328b8bc3586f2b70 upstream.
+
+Currently, affinity_mask is set to a single cpu. As a result,
+irqbalance becomes invalid in SUBSET or EXACT mode. To solve
+this problem, change affinity_mask to numa node range. In this
+way, irqbalance can be performed on the cpu of the numa node.
+
+Fixes: 0812545487ec ("net: hns3: add interrupt affinity support for misc interrupt")
+Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
+Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -1528,9 +1528,10 @@ static void hclge_init_kdump_kernel_conf
+ static int hclge_configure(struct hclge_dev *hdev)
+ {
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
++ const struct cpumask *cpumask = cpu_online_mask;
+ struct hclge_cfg cfg;
+ unsigned int i;
+- int ret;
++ int node, ret;
+
+ ret = hclge_get_cfg(hdev, &cfg);
+ if (ret)
+@@ -1595,11 +1596,12 @@ static int hclge_configure(struct hclge_
+
+ hclge_init_kdump_kernel_config(hdev);
+
+- /* Set the init affinity based on pci func number */
+- i = cpumask_weight(cpumask_of_node(dev_to_node(&hdev->pdev->dev)));
+- i = i ? PCI_FUNC(hdev->pdev->devfn) % i : 0;
+- cpumask_set_cpu(cpumask_local_spread(i, dev_to_node(&hdev->pdev->dev)),
+- &hdev->affinity_mask);
++ /* Set the affinity based on numa node */
++ node = dev_to_node(&hdev->pdev->dev);
++ if (node != NUMA_NO_NODE)
++ cpumask = cpumask_of_node(node);
++
++ cpumask_copy(&hdev->affinity_mask, cpumask);
+
+ return ret;
+ }
--- /dev/null
+From b81d8948746520f989e86d66292ff72b5056114a Mon Sep 17 00:00:00 2001
+From: Yufeng Mo <moyufeng@huawei.com>
+Date: Mon, 13 Sep 2021 21:08:23 +0800
+Subject: net: hns3: disable mac in flr process
+
+From: Yufeng Mo <moyufeng@huawei.com>
+
+commit b81d8948746520f989e86d66292ff72b5056114a upstream.
+
+The firmware will not disable mac in flr process. Therefore, the driver
+needs to proactively disable mac during flr, which is the same as the
+function reset.
+
+Fixes: 35d93a30040c ("net: hns3: adjust the process of PF reset")
+Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
+Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+@@ -8120,11 +8120,12 @@ static void hclge_ae_stop(struct hnae3_h
+ hclge_clear_arfs_rules(hdev);
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+- /* If it is not PF reset, the firmware will disable the MAC,
++ /* If it is not PF reset or FLR, the firmware will disable the MAC,
+ * so it only need to stop phy here.
+ */
+ if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) &&
+- hdev->reset_type != HNAE3_FUNC_RESET) {
++ hdev->reset_type != HNAE3_FUNC_RESET &&
++ hdev->reset_type != HNAE3_FLR_RESET) {
+ hclge_mac_stop_phy(hdev);
+ hclge_update_link_status(hdev);
+ return;
--- /dev/null
+From 427900d27d86b820c559037a984bd403f910860f Mon Sep 17 00:00:00 2001
+From: Jiaran Zhang <zhangjiaran@huawei.com>
+Date: Mon, 13 Sep 2021 21:08:25 +0800
+Subject: net: hns3: fix the timing issue of VF clearing interrupt sources
+
+From: Jiaran Zhang <zhangjiaran@huawei.com>
+
+commit 427900d27d86b820c559037a984bd403f910860f upstream.
+
+Currently, the VF does not clear the interrupt source immediately after
+receiving the interrupt. As a result, if the second interrupt task is
+triggered when processing the first interrupt task, clearing the
+interrupt source before exiting will clear the interrupt sources of the
+two tasks at the same time. As a result, no interrupt is triggered for
+the second task. The VF detects the missed message only when the next
+interrupt is generated.
+
+Clearing it immediately after executing check_evt_cause ensures that:
+1. Even if two interrupt tasks are triggered at the same time, they can
+be processed.
+2. If the second task is triggered during the processing of the first
+task and the interrupt source is not cleared, the interrupt is reported
+after vector0 is enabled.
+
+Fixes: b90fcc5bd904 ("net: hns3: add reset handling for VF when doing Core/Global/IMP reset")
+Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
+Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -2463,6 +2463,8 @@ static irqreturn_t hclgevf_misc_irq_hand
+
+ hclgevf_enable_vector(&hdev->misc_vector, false);
+ event_cause = hclgevf_check_evt_cause(hdev, &clearval);
++ if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER)
++ hclgevf_clear_event_cause(hdev, clearval);
+
+ switch (event_cause) {
+ case HCLGEVF_VECTOR0_EVENT_RST:
+@@ -2475,10 +2477,8 @@ static irqreturn_t hclgevf_misc_irq_hand
+ break;
+ }
+
+- if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) {
+- hclgevf_clear_event_cause(hdev, clearval);
++ if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER)
+ hclgevf_enable_vector(&hdev->misc_vector, true);
+- }
+
+ return IRQ_HANDLED;
+ }
--- /dev/null
+From d18e81183b1cb9c309266cbbce9acd3e0c528d04 Mon Sep 17 00:00:00 2001
+From: Yufeng Mo <moyufeng@huawei.com>
+Date: Mon, 13 Sep 2021 21:08:21 +0800
+Subject: net: hns3: pad the short tunnel frame before sending to hardware
+
+From: Yufeng Mo <moyufeng@huawei.com>
+
+commit d18e81183b1cb9c309266cbbce9acd3e0c528d04 upstream.
+
+The hardware cannot handle short tunnel frames below 65 bytes,
+and will cause vlan tag missing problem. So pads packet size to
+65 bytes for tunnel frames to fix this bug.
+
+Fixes: 3db084d28dc0("net: hns3: Fix for vxlan tx checksum bug")
+Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
+Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+@@ -73,6 +73,7 @@ MODULE_PARM_DESC(tx_sgl, "Minimum number
+ #define HNS3_OUTER_VLAN_TAG 2
+
+ #define HNS3_MIN_TX_LEN 33U
++#define HNS3_MIN_TUN_PKT_LEN 65U
+
+ /* hns3_pci_tbl - PCI Device ID Table
+ *
+@@ -1425,8 +1426,11 @@ static int hns3_set_l2l3l4(struct sk_buf
+ l4.tcp->doff);
+ break;
+ case IPPROTO_UDP:
+- if (hns3_tunnel_csum_bug(skb))
+- return skb_checksum_help(skb);
++ if (hns3_tunnel_csum_bug(skb)) {
++ int ret = skb_put_padto(skb, HNS3_MIN_TUN_PKT_LEN);
++
++ return ret ? ret : skb_checksum_help(skb);
++ }
+
+ hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
+ hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
--- /dev/null
+From ae7aaecc3f2f78b76ab3a8d6178610f55aadfa56 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Wed, 8 Sep 2021 20:17:17 +1000
+Subject: powerpc/64s: system call rfscv workaround for TM bugs
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit ae7aaecc3f2f78b76ab3a8d6178610f55aadfa56 upstream.
+
+The rfscv instruction does not work correctly with the fake-suspend mode
+in POWER9, which can end up with the hypervisor restoring an incorrect
+checkpoint.
+
+Work around this by setting the _TIF_RESTOREALL flag if a system call
+returns to a transaction active state, causing rfid to be used instead
+of rfscv to return, which will do the right thing. The contents of the
+registers are irrelevant because they will be overwritten in this case
+anyway.
+
+Fixes: 7fa95f9adaee7 ("powerpc/64s: system call support for scv/rfscv instructions")
+Reported-by: Eirik Fuller <efuller@redhat.com>
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210908101718.118522-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/interrupt.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/arch/powerpc/kernel/interrupt.c
++++ b/arch/powerpc/kernel/interrupt.c
+@@ -140,6 +140,19 @@ notrace long system_call_exception(long
+ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+ /*
++ * If system call is called with TM active, set _TIF_RESTOREALL to
++ * prevent RFSCV being used to return to userspace, because POWER9
++ * TM implementation has problems with this instruction returning to
++ * transactional state. Final register values are not relevant because
++ * the transaction will be aborted upon return anyway. Or in the case
++ * of unsupported_scv SIGILL fault, the return state does not much
++ * matter because it's an edge case.
++ */
++ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
++ unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
++ current_thread_info()->flags |= _TIF_RESTOREALL;
++
++ /*
+ * If the system call was made with a transaction active, doom it and
+ * return without performing the system call. Unless it was an
+ * unsupported scv vector, in which case it's treated like an illegal
--- /dev/null
+From 3a1e92d0896e928ac2a5b58962d05a39afef2e23 Mon Sep 17 00:00:00 2001
+From: Ganesh Goudar <ganeshgr@linux.ibm.com>
+Date: Thu, 9 Sep 2021 12:13:30 +0530
+Subject: powerpc/mce: Fix access error in mce handler
+
+From: Ganesh Goudar <ganeshgr@linux.ibm.com>
+
+commit 3a1e92d0896e928ac2a5b58962d05a39afef2e23 upstream.
+
+We queue an irq work for deferred processing of mce event in realmode
+mce handler, where translation is disabled. Queuing of the work may
+result in accessing memory outside RMO region, such access needs the
+translation to be enabled for an LPAR running with hash mmu else the
+kernel crashes.
+
+After enabling translation in mce_handle_error() we used to leave it
+enabled to avoid crashing here, but now with the commit
+74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from
+handler") we are restoring the MSR to disable translation.
+
+Hence to fix this enable the translation before queuing the work.
+
+Without this change following trace is seen on injecting SLB multihit in
+an LPAR running with hash mmu.
+
+ Oops: Kernel access of bad area, sig: 11 [#1]
+ LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+ CPU: 5 PID: 1883 Comm: insmod Tainted: G OE 5.14.0-mce+ #137
+ NIP: c000000000735d60 LR: c000000000318640 CTR: 0000000000000000
+ REGS: c00000001ebff9a0 TRAP: 0300 Tainted: G OE (5.14.0-mce+)
+ MSR: 8000000000001003 <SF,ME,RI,LE> CR: 28008228 XER: 00000001
+ CFAR: c00000000031863c DAR: c00000027fa8fe08 DSISR: 40000000 IRQMASK: 0
+ ...
+ NIP llist_add_batch+0x0/0x40
+ LR __irq_work_queue_local+0x70/0xc0
+ Call Trace:
+ 0xc00000001ebffc0c (unreliable)
+ irq_work_queue+0x40/0x70
+ machine_check_queue_event+0xbc/0xd0
+ machine_check_early_common+0x16c/0x1f4
+
+Fixes: 74c3354bc1d89 ("powerpc/pseries/mce: restore msr before returning from handler")
+Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
+[mpe: Fix comment formatting, trim oops in change log for readability]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210909064330.312432-1-ganeshgr@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/mce.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/mce.c
++++ b/arch/powerpc/kernel/mce.c
+@@ -249,6 +249,7 @@ void machine_check_queue_event(void)
+ {
+ int index;
+ struct machine_check_event evt;
++ unsigned long msr;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+@@ -262,8 +263,20 @@ void machine_check_queue_event(void)
+ memcpy(&local_paca->mce_info->mce_event_queue[index],
+ &evt, sizeof(evt));
+
+- /* Queue irq work to process this event later. */
+- irq_work_queue(&mce_event_process_work);
++ /*
++ * Queue irq work to process this event later. Before
++ * queuing the work enable translation for non radix LPAR,
++ * as irq_work_queue may try to access memory outside RMO
++ * region.
++ */
++ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
++ msr = mfmsr();
++ mtmsr(msr | MSR_IR | MSR_DR);
++ irq_work_queue(&mce_event_process_work);
++ mtmsr(msr);
++ } else {
++ irq_work_queue(&mce_event_process_work);
++ }
+ }
+
+ void mce_common_process_ue(struct pt_regs *regs,
--- /dev/null
+From 20e100f52730cd0db609e559799c1712b5f27582 Mon Sep 17 00:00:00 2001
+From: Shai Malin <smalin@marvell.com>
+Date: Fri, 10 Sep 2021 11:33:56 +0300
+Subject: qed: Handle management FW error
+
+From: Shai Malin <smalin@marvell.com>
+
+commit 20e100f52730cd0db609e559799c1712b5f27582 upstream.
+
+Handle MFW (management FW) error response in order to avoid a crash
+during recovery flows.
+
+Changes from v1:
+- Add "Fixes tag".
+
+Fixes: tag 5e7ba042fd05 ("qed: Fix reading stale configuration information")
+Signed-off-by: Ariel Elior <aelior@marvell.com>
+Signed-off-by: Shai Malin <smalin@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+@@ -3368,6 +3368,7 @@ qed_mcp_get_nvm_image_att(struct qed_hwf
+ struct qed_nvm_image_att *p_image_att)
+ {
+ enum nvm_image_type type;
++ int rc;
+ u32 i;
+
+ /* Translate image_id into MFW definitions */
+@@ -3396,7 +3397,10 @@ qed_mcp_get_nvm_image_att(struct qed_hwf
+ return -EINVAL;
+ }
+
+- qed_mcp_nvm_info_populate(p_hwfn);
++ rc = qed_mcp_nvm_info_populate(p_hwfn);
++ if (rc)
++ return rc;
++
+ for (i = 0; i < p_hwfn->nvm_info.num_images; i++)
+ if (type == p_hwfn->nvm_info.image_att[i].image_type)
+ break;
--- /dev/null
+From a8b92b8c1eac8d655a97b1e90f4d83c25d9b9a18 Mon Sep 17 00:00:00 2001
+From: David Hildenbrand <david@redhat.com>
+Date: Thu, 9 Sep 2021 16:59:42 +0200
+Subject: s390/pci_mmio: fully validate the VMA before calling follow_pte()
+
+From: David Hildenbrand <david@redhat.com>
+
+commit a8b92b8c1eac8d655a97b1e90f4d83c25d9b9a18 upstream.
+
+We should not walk/touch page tables outside of VMA boundaries when
+holding only the mmap sem in read mode. Evil user space can modify the
+VMA layout just before this function runs and e.g., trigger races with
+page table removal code since commit dd2283f2605e ("mm: mmap: zap pages
+with read mmap_sem in munmap").
+
+find_vma() does not check if the address is >= the VMA start address;
+use vma_lookup() instead.
+
+Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/pci/pci_mmio.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/pci/pci_mmio.c
++++ b/arch/s390/pci/pci_mmio.c
+@@ -159,7 +159,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, uns
+
+ mmap_read_lock(current->mm);
+ ret = -EINVAL;
+- vma = find_vma(current->mm, mmio_addr);
++ vma = vma_lookup(current->mm, mmio_addr);
+ if (!vma)
+ goto out_unlock_mmap;
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+@@ -298,7 +298,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsi
+
+ mmap_read_lock(current->mm);
+ ret = -EINVAL;
+- vma = find_vma(current->mm, mmio_addr);
++ vma = vma_lookup(current->mm, mmio_addr);
+ if (!vma)
+ goto out_unlock_mmap;
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
--- /dev/null
+From 1b704b27beb11ce147d64b21c914e57afbfb5656 Mon Sep 17 00:00:00 2001
+From: Andrea Claudi <aclaudi@redhat.com>
+Date: Sat, 11 Sep 2021 16:14:18 +0200
+Subject: selftest: net: fix typo in altname test
+
+From: Andrea Claudi <aclaudi@redhat.com>
+
+commit 1b704b27beb11ce147d64b21c914e57afbfb5656 upstream.
+
+If altname deletion of the short alternative name fails, the error
+message printed is: "Failed to add short alternative name".
+This is obviously a typo, as we are testing altname deletion.
+
+Fix this using a proper error message.
+
+Fixes: f95e6c9c4617 ("selftest: net: add alternative names test")
+Signed-off-by: Andrea Claudi <aclaudi@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/altnames.sh | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/altnames.sh
++++ b/tools/testing/selftests/net/altnames.sh
+@@ -45,7 +45,7 @@ altnames_test()
+ check_err $? "Got unexpected long alternative name from link show JSON"
+
+ ip link property del $DUMMY_DEV altname $SHORT_NAME
+- check_err $? "Failed to add short alternative name"
++ check_err $? "Failed to delete short alternative name"
+
+ ip -j -p link show $SHORT_NAME &>/dev/null
+ check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
net-af_unix-fix-a-data-race-in-unix_dgram_poll.patch
net-dsa-destroy-the-phylink-instance-on-any-error-in-dsa_slave_phy_setup.patch
revert-ipv4-fix-memory-leaks-in-ip_cmsg_send-callers.patch
+x86-uaccess-fix-32-bit-__get_user_asm_u64-when-cc_has_asm_goto_output-y.patch
+bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch
+tcp-fix-tp-undo_retrans-accounting-in-tcp_sacktag_one.patch
+selftest-net-fix-typo-in-altname-test.patch
+qed-handle-management-fw-error.patch
+udp_tunnel-fix-udp_tunnel_nic-work-queue-type.patch
+dt-bindings-arm-fix-toradex-compatible-typo.patch
+ibmvnic-check-failover_pending-in-login-response.patch
+kvm-ppc-book3s-hv-tolerate-treclaim.-in-fake-suspend-mode-changing-registers.patch
+powerpc-64s-system-call-rfscv-workaround-for-tm-bugs.patch
+powerpc-mce-fix-access-error-in-mce-handler.patch
+s390-pci_mmio-fully-validate-the-vma-before-calling-follow_pte.patch
+bnxt_en-make-bnxt_free_skbs-safe-to-call-after-bnxt_free_mem.patch
+net-hns3-pad-the-short-tunnel-frame-before-sending-to-hardware.patch
+net-hns3-change-affinity_mask-to-numa-node-range.patch
+net-hns3-disable-mac-in-flr-process.patch
+net-hns3-fix-the-timing-issue-of-vf-clearing-interrupt-sources.patch
--- /dev/null
+From 4f884f3962767877d7aabbc1ec124d2c307a4257 Mon Sep 17 00:00:00 2001
+From: zhenggy <zhenggy@chinatelecom.cn>
+Date: Tue, 14 Sep 2021 09:51:15 +0800
+Subject: tcp: fix tp->undo_retrans accounting in tcp_sacktag_one()
+
+From: zhenggy <zhenggy@chinatelecom.cn>
+
+commit 4f884f3962767877d7aabbc1ec124d2c307a4257 upstream.
+
+Commit 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit
+time") may directly retrans a multiple segments TSO/GSO packet without
+split, Since this commit, we can no longer assume that a retransmitted
+packet is a single segment.
+
+This patch fixes the tp->undo_retrans accounting in tcp_sacktag_one()
+that use the actual segments(pcount) of the retransmitted packet.
+
+Before that commit (10d3be569243), the assumption underlying the
+tp->undo_retrans-- seems correct.
+
+Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time")
+Signed-off-by: zhenggy <zhenggy@chinatelecom.cn>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -1314,7 +1314,7 @@ static u8 tcp_sacktag_one(struct sock *s
+ if (dup_sack && (sacked & TCPCB_RETRANS)) {
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
+ after(end_seq, tp->undo_marker))
+- tp->undo_retrans--;
++ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
+ if ((sacked & TCPCB_SACKED_ACKED) &&
+ before(start_seq, state->reord))
+ state->reord = start_seq;
--- /dev/null
+From e50e711351bdc656a8e6ca1022b4293cae8dcd59 Mon Sep 17 00:00:00 2001
+From: Aya Levin <ayal@nvidia.com>
+Date: Mon, 13 Sep 2021 10:53:49 +0300
+Subject: udp_tunnel: Fix udp_tunnel_nic work-queue type
+
+From: Aya Levin <ayal@nvidia.com>
+
+commit e50e711351bdc656a8e6ca1022b4293cae8dcd59 upstream.
+
+Turn udp_tunnel_nic work-queue to an ordered work-queue. This queue
+holds the UDP-tunnel configuration commands of the different netdevs.
+When the netdevs are functions of the same NIC the order of
+execution may be crucial.
+
+Problem example:
+NIC with 2 PFs, both PFs declare offload quota of up to 3 UDP-ports.
+ $ifconfig eth2 1.1.1.1/16 up
+
+ $ip link add eth2_19503 type vxlan id 5049 remote 1.1.1.2 dev eth2 dstport 19053
+ $ip link set dev eth2_19503 up
+
+ $ip link add eth2_19504 type vxlan id 5049 remote 1.1.1.3 dev eth2 dstport 19054
+ $ip link set dev eth2_19504 up
+
+ $ip link add eth2_19505 type vxlan id 5049 remote 1.1.1.4 dev eth2 dstport 19055
+ $ip link set dev eth2_19505 up
+
+ $ip link add eth2_19506 type vxlan id 5049 remote 1.1.1.5 dev eth2 dstport 19056
+ $ip link set dev eth2_19506 up
+
+NIC RX port offload infrastructure offloads the first 3 UDP-ports (on
+all devices which sets NETIF_F_RX_UDP_TUNNEL_PORT feature) and not
+UDP-port 19056. So both PFs gets this offload configuration.
+
+ $ip link set dev eth2_19504 down
+
+This triggers udp-tunnel-core to remove the UDP-port 19504 from
+offload-ports-list and offload UDP-port 19056 instead.
+
+In this scenario it is important that the UDP-port of 19504 will be
+removed from both PFs before trying to add UDP-port 19056. The NIC can
+stop offloading a UDP-port only when all references are removed.
+Otherwise the NIC may report exceeding of the offload quota.
+
+Fixes: cc4e3835eff4 ("udp_tunnel: add central NIC RX port offload infrastructure")
+Signed-off-by: Aya Levin <ayal@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp_tunnel_nic.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp_tunnel_nic.c
++++ b/net/ipv4/udp_tunnel_nic.c
+@@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_mo
+ {
+ int err;
+
+- udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
++ udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
+ if (!udp_tunnel_nic_workqueue)
+ return -ENOMEM;
+
--- /dev/null
+From a69ae291e1cc2d08ae77c2029579c59c9bde5061 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will@kernel.org>
+Date: Mon, 13 Sep 2021 17:35:47 +0100
+Subject: x86/uaccess: Fix 32-bit __get_user_asm_u64() when CC_HAS_ASM_GOTO_OUTPUT=y
+
+From: Will Deacon <will@kernel.org>
+
+commit a69ae291e1cc2d08ae77c2029579c59c9bde5061 upstream.
+
+Commit 865c50e1d279 ("x86/uaccess: utilize CONFIG_CC_HAS_ASM_GOTO_OUTPUT")
+added an optimised version of __get_user_asm() for x86 using 'asm goto'.
+
+Like the non-optimised code, the 32-bit implementation of 64-bit
+get_user() expands to a pair of 32-bit accesses. Unlike the
+non-optimised code, the _original_ pointer is incremented to copy the
+high word instead of loading through a new pointer explicitly
+constructed to point at a 32-bit type. Consequently, if the pointer
+points at a 64-bit type then we end up loading the wrong data for the
+upper 32-bits.
+
+This was observed as a mount() failure in Android targeting i686 after
+b0cfcdd9b967 ("d_path: make 'prepend()' fill up the buffer exactly on
+overflow") because the call to copy_from_kernel_nofault() from
+prepend_copy() ends up in __get_kernel_nofault() and casts the source
+pointer to a 'u64 __user *'. An attempt to mount at "/debug_ramdisk"
+therefore ends up failing trying to mount "/debumdismdisk".
+
+Use the existing '__gu_ptr' source pointer to unsigned int for 32-bit
+__get_user_asm_u64() instead of the original pointer.
+
+Cc: Bill Wendling <morbo@google.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Fixes: 865c50e1d279 ("x86/uaccess: utilize CONFIG_CC_HAS_ASM_GOTO_OUTPUT")
+Signed-off-by: Will Deacon <will@kernel.org>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Tested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -301,8 +301,8 @@ do { \
+ unsigned int __gu_low, __gu_high; \
+ const unsigned int __user *__gu_ptr; \
+ __gu_ptr = (const void __user *)(ptr); \
+- __get_user_asm(__gu_low, ptr, "l", "=r", label); \
+- __get_user_asm(__gu_high, ptr+1, "l", "=r", label); \
++ __get_user_asm(__gu_low, __gu_ptr, "l", "=r", label); \
++ __get_user_asm(__gu_high, __gu_ptr+1, "l", "=r", label); \
+ (x) = ((unsigned long long)__gu_high << 32) | __gu_low; \
+ } while (0)
+ #else