From: Greg Kroah-Hartman Date: Sun, 28 Jun 2020 12:23:26 +0000 (+0200) Subject: 5.7-stable patches X-Git-Tag: v5.7.7~62 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3f0dacfa9fc8c12eabba3fb7add8f35399e271e3;p=thirdparty%2Fkernel%2Fstable-queue.git 5.7-stable patches added patches: bareudp-fixed-multiproto-mode-configuration.patch bnxt_en-do-not-enable-legacy-tx-push-on-older-firmware.patch bnxt_en-fix-statistics-counters-issue-during-ifdown-with-older-firmware.patch bnxt_en-read-vpd-info-only-for-pfs.patch bnxt_en-store-the-running-firmware-version-code.patch bpf-tcp-bpf_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch ethtool-fix-check-in-ethtool_rx_flow_rule_create.patch geneve-allow-changing-df-behavior-after-creation.patch ibmveth-fix-max-mtu-limit.patch ionic-update-the-queue-count-on-open.patch ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch mlxsw-spectrum-do-not-rely-on-machine-endianness.patch mptcp-drop-sndr_key-in-mptcp_syn_options.patch mptcp-fix-memory-leak-in-mptcp_subflow_create_socket.patch mvpp2-ethtool-rxtx-stats-fix.patch net-bridge-enfore-alignment-for-ethernet-address.patch net-core-reduce-recursion-limit-value.patch net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch net-dsa-bcm_sf2-fix-node-reference-count.patch net-ethtool-add-missing-netif_f_gso_fraglist-feature-string.patch net-ethtool-add-missing-string-for-netif_f_gso_tunnel_remcsum.patch net-fix-memleak-in-register_netdevice.patch net-fix-the-arp-error-in-some-cases.patch net-increment-xmit_recursion-level-in-dev_direct_xmit.patch net-macb-call-pm_runtime_put_sync-on-failure-path.patch net-macb-undo-operations-in-case-of-failure.patch net-phy-check-harder-for-errors-in-get_phy_id.patch net-phy-mscc-avoid-skcipher-api-for-single-block-aes-encryption.patch net-phy-smsc-fix-printing-too-many-logs.patch net-phylink-ensure-manual-pause-mode-configuration-takes-effect.patch net-phylink-fix-ethtool-a-with-attached-phys.patch net-usb-ax88179_178a-fix-packet-alignment-padding.patch of-of_mdio-correct-loop-scanning-logic.patch openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch r8169-fix-firmware-not-resetting-tp-ocp_base.patch rocker-fix-incorrect-error-handling-in-dma_rings_init.patch rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch sch_cake-fix-a-few-style-nits.patch sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch wireguard-device-avoid-circular-netns-references.patch --- diff --git a/queue-5.7/bareudp-fixed-multiproto-mode-configuration.patch b/queue-5.7/bareudp-fixed-multiproto-mode-configuration.patch new file mode 100644 index 00000000000..edabd185adc --- /dev/null +++ b/queue-5.7/bareudp-fixed-multiproto-mode-configuration.patch @@ -0,0 +1,31 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Martin +Date: Wed, 17 Jun 2020 22:30:23 +0530 +Subject: bareudp: Fixed multiproto mode configuration + +From: Martin + +[ Upstream commit 4c98045c9b74feab837be58986c0517d3cc661f1 ] + +Code to handle multiproto configuration is missing. + +Fixes: 4b5f67232d95 ("net: Special handling for IP & MPLS") +Signed-off-by: Martin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bareudp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/bareudp.c ++++ b/drivers/net/bareudp.c +@@ -572,6 +572,9 @@ static int bareudp2info(struct nlattr *d + if (data[IFLA_BAREUDP_SRCPORT_MIN]) + conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); + ++ if (data[IFLA_BAREUDP_MULTIPROTO_MODE]) ++ conf->multi_proto_mode = true; ++ + return 0; + } + diff --git a/queue-5.7/bnxt_en-do-not-enable-legacy-tx-push-on-older-firmware.patch b/queue-5.7/bnxt_en-do-not-enable-legacy-tx-push-on-older-firmware.patch new file mode 100644 index 00000000000..890740f8a80 --- /dev/null +++ b/queue-5.7/bnxt_en-do-not-enable-legacy-tx-push-on-older-firmware.patch @@ -0,0 +1,45 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Michael Chan +Date: Tue, 23 Jun 2020 19:01:36 -0400 +Subject: bnxt_en: Do not enable legacy TX push on older firmware. + +From: Michael Chan + +[ Upstream commit fed7edd18143c68c63ea049999a7e861123de6de ] + +Older firmware may not support legacy TX push properly and may not +be disabling it. So we check certain firmware versions that may +have this problem and disable legacy TX push unconditionally. + +Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") +Reviewed-by: Edwin Peer +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -6953,7 +6953,8 @@ static int __bnxt_hwrm_func_qcaps(struct + bp->fw_cap |= BNXT_FW_CAP_ERR_RECOVER_RELOAD; + + bp->tx_push_thresh = 0; +- if (flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) ++ if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && ++ BNXT_FW_MAJ(bp) > 217) + bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; + + hw_resc->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -1732,6 +1732,7 @@ struct bnxt { + u64 fw_ver_code; + #define BNXT_FW_VER_CODE(maj, min, bld, rsv) \ + ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) ++#define BNXT_FW_MAJ(bp) ((bp)->fw_ver_code >> 48) + + __be16 vxlan_port; + u8 vxlan_port_cnt; diff --git a/queue-5.7/bnxt_en-fix-statistics-counters-issue-during-ifdown-with-older-firmware.patch b/queue-5.7/bnxt_en-fix-statistics-counters-issue-during-ifdown-with-older-firmware.patch new file mode 100644 index 00000000000..770a2dacea8 --- /dev/null +++ b/queue-5.7/bnxt_en-fix-statistics-counters-issue-during-ifdown-with-older-firmware.patch @@ -0,0 +1,63 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Michael Chan +Date: Tue, 23 Jun 2020 19:01:37 -0400 +Subject: bnxt_en: Fix statistics counters issue during ifdown with older firmware. + +From: Michael Chan + +[ Upstream commit c2dec363feb41544a76c8083aca2378990e17166 ] + +On older firmware, the hardware statistics are not cleared when the +driver frees the hardware stats contexts during ifdown. The driver +expects these stats to be cleared and saves a copy before freeing +the stats contexts. During the next ifup, the driver will likely +allocate the same hardware stats contexts and this will cause a big +increase in the counters as the old counters are added back to the +saved counters. + +We fix it by making an additional firmware call to clear the counters +before freeing the hw stats contexts when the firmware is the older +20.x firmware. + +Fixes: b8875ca356f1 ("bnxt_en: Save ring statistics before reset.") +Reported-by: Jakub Kicinski +Reviewed-by: Vasundhara Volam +Signed-off-by: Michael Chan +Tested-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -6293,6 +6293,7 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) + + static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp) + { ++ struct hwrm_stat_ctx_clr_stats_input req0 = {0}; + struct hwrm_stat_ctx_free_input req = {0}; + int i; + +@@ -6302,6 +6303,7 @@ static void bnxt_hwrm_stat_ctx_free(stru + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + return; + ++ bnxt_hwrm_cmd_hdr_init(bp, &req0, HWRM_STAT_CTX_CLR_STATS, -1, -1); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1); + + mutex_lock(&bp->hwrm_cmd_lock); +@@ -6311,7 +6313,11 @@ static void bnxt_hwrm_stat_ctx_free(stru + + if (cpr->hw_stats_ctx_id != INVALID_STATS_CTX_ID) { + req.stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id); +- ++ if (BNXT_FW_MAJ(bp) <= 20) { ++ req0.stat_ctx_id = req.stat_ctx_id; ++ _hwrm_send_message(bp, &req0, sizeof(req0), ++ HWRM_CMD_TIMEOUT); ++ } + _hwrm_send_message(bp, &req, sizeof(req), + HWRM_CMD_TIMEOUT); + diff --git a/queue-5.7/bnxt_en-read-vpd-info-only-for-pfs.patch b/queue-5.7/bnxt_en-read-vpd-info-only-for-pfs.patch new file mode 100644 index 00000000000..9de83660cb3 --- /dev/null +++ b/queue-5.7/bnxt_en-read-vpd-info-only-for-pfs.patch @@ -0,0 +1,34 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Vasundhara Volam +Date: Tue, 23 Jun 2020 19:01:38 -0400 +Subject: bnxt_en: Read VPD info only for PFs + +From: Vasundhara Volam + +[ Upstream commit c55e28a8b43fcd7dc71868bd165705bc7741a7ca ] + +Virtual functions does not have VPD information. This patch modifies +calling bnxt_read_vpd_info() only for PFs and avoids an unnecessary +error log. + +Fixes: a0d0fd70fed5 ("bnxt_en: Read partno and serialno of the board from VPD") +Signed-off-by: Vasundhara Volam +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -11884,7 +11884,8 @@ static int bnxt_init_one(struct pci_dev + dev->ethtool_ops = &bnxt_ethtool_ops; + pci_set_drvdata(pdev, dev); + +- bnxt_vpd_read_info(bp); ++ if (BNXT_PF(bp)) ++ bnxt_vpd_read_info(bp); + + rc = bnxt_alloc_hwrm_resources(bp); + if (rc) diff --git a/queue-5.7/bnxt_en-store-the-running-firmware-version-code.patch b/queue-5.7/bnxt_en-store-the-running-firmware-version-code.patch new file mode 100644 index 00000000000..ab012bdde1d --- /dev/null +++ b/queue-5.7/bnxt_en-store-the-running-firmware-version-code.patch @@ -0,0 +1,78 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Michael Chan +Date: Tue, 23 Jun 2020 19:01:35 -0400 +Subject: bnxt_en: Store the running firmware version code. + +From: Michael Chan + +[ Upstream commit d0ad2ea2bc185835f8a749302ad07b70528d2a09 ] + +We currently only store the firmware version as a string for ethtool +and devlink info. Store it also as a version code. The next 2 +patches will need to check the firmware major version to determine +some workarounds. + +We also use the 16-bit firmware version fields if the firmware is newer +and provides the 16-bit fields. + +Reviewed-by: Edwin Peer +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 22 ++++++++++++++++++---- + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++ + 2 files changed, 22 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -7217,8 +7217,9 @@ static int __bnxt_hwrm_ver_get(struct bn + static int bnxt_hwrm_ver_get(struct bnxt *bp) + { + struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr; ++ u16 fw_maj, fw_min, fw_bld, fw_rsv; + u32 dev_caps_cfg, hwrm_ver; +- int rc; ++ int rc, len; + + bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN; + mutex_lock(&bp->hwrm_cmd_lock); +@@ -7250,9 +7251,22 @@ static int bnxt_hwrm_ver_get(struct bnxt + resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b, + resp->hwrm_intf_upd_8b); + +- snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d.%d", +- resp->hwrm_fw_maj_8b, resp->hwrm_fw_min_8b, +- resp->hwrm_fw_bld_8b, resp->hwrm_fw_rsvd_8b); ++ fw_maj = le16_to_cpu(resp->hwrm_fw_major); ++ if (bp->hwrm_spec_code > 0x10803 && fw_maj) { ++ fw_min = le16_to_cpu(resp->hwrm_fw_minor); ++ fw_bld = le16_to_cpu(resp->hwrm_fw_build); ++ fw_rsv = le16_to_cpu(resp->hwrm_fw_patch); ++ len = FW_VER_STR_LEN; ++ } else { ++ fw_maj = resp->hwrm_fw_maj_8b; ++ fw_min = resp->hwrm_fw_min_8b; ++ fw_bld = resp->hwrm_fw_bld_8b; ++ fw_rsv = resp->hwrm_fw_rsvd_8b; ++ len = BC_HWRM_STR_LEN; ++ } ++ bp->fw_ver_code = BNXT_FW_VER_CODE(fw_maj, fw_min, fw_bld, fw_rsv); ++ snprintf(bp->fw_ver_str, len, "%d.%d.%d.%d", fw_maj, fw_min, fw_bld, ++ fw_rsv); + + if (strlen(resp->active_pkg_name)) { + int fw_ver_len = strlen(bp->fw_ver_str); +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -1729,6 +1729,10 @@ struct bnxt { + #define PHY_VER_STR_LEN (FW_VER_STR_LEN - BC_HWRM_STR_LEN) + char fw_ver_str[FW_VER_STR_LEN]; + char hwrm_ver_supp[FW_VER_STR_LEN]; ++ u64 fw_ver_code; ++#define BNXT_FW_VER_CODE(maj, min, bld, rsv) \ ++ ((u64)(maj) << 48 | (u64)(min) << 32 | (u64)(bld) << 16 | (rsv)) ++ + __be16 vxlan_port; + u8 vxlan_port_cnt; + __le16 vxlan_fw_dst_port_id; diff --git a/queue-5.7/bpf-tcp-bpf_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch b/queue-5.7/bpf-tcp-bpf_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch new file mode 100644 index 00000000000..457955c069f --- /dev/null +++ b/queue-5.7/bpf-tcp-bpf_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch @@ -0,0 +1,60 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Neal Cardwell +Date: Wed, 24 Jun 2020 12:42:03 -0400 +Subject: bpf: tcp: bpf_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT + +From: Neal Cardwell + +[ Upstream commit 7d21d54d624777358ab6c7be7ff778808fef70ba ] + +Apply the fix from: + "tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT" +to the BPF implementation of TCP CUBIC congestion control. + +Repeating the commit description here for completeness: + +Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where +Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an +ACK when the minimum rtt of a connection goes down. From inspection it +is clear from the existing code that this could happen in an example +like the following: + +o The first 8 RTT samples in a round trip are 150ms, resulting in a + curr_rtt of 150ms and a delay_min of 150ms. + +o The 9th RTT sample is 100ms. The curr_rtt does not change after the + first 8 samples, so curr_rtt remains 150ms. But delay_min can be + lowered at any time, so delay_min falls to 100ms. The code executes + the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min + of 100ms, and the curr_rtt is declared far enough above delay_min to + force a (spurious) exit of Slow start. + +The fix here is simple: allow every RTT sample in a round trip to +lower the curr_rtt. + +Fixes: 6de4a9c430b5 ("bpf: tcp: Add bpf_cubic example") +Reported-by: Mirja Kuehlewind +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/bpf_cubic.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c ++++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c +@@ -480,10 +480,9 @@ static __always_inline void hystart_upda + + if (hystart_detect & HYSTART_DELAY) { + /* obtain the minimum delay of more than sampling packets */ ++ if (ca->curr_rtt > delay) ++ ca->curr_rtt = delay; + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { +- if (ca->curr_rtt > delay) +- ca->curr_rtt = delay; +- + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + diff --git a/queue-5.7/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch b/queue-5.7/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch new file mode 100644 index 00000000000..f98edfebb9c --- /dev/null +++ b/queue-5.7/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch @@ -0,0 +1,51 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Claudiu Manoil +Date: Fri, 26 Jun 2020 19:17:29 +0300 +Subject: enetc: Fix tx rings bitmap iteration range, irq handling + +From: Claudiu Manoil + +[ Upstream commit 0574e2000fc3103cbc69ba82ec1175ce171fdf5e ] + +The rings bitmap of an interrupt vector encodes +which of the device's rings were assigned to that +interrupt vector. +Hence the iteration range of the tx rings bitmap +(for_each_set_bit()) should be the total number of +Tx rings of that netdevice instead of the number of +rings assigned to the interrupt vector. +Since there are 2 cores, and one interrupt vector for +each core, the number of rings asigned to an interrupt +vector is half the number of available rings. +The impact of this error is that the upper half of the +tx rings could still generate interrupts during napi +polling. + +Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") +Signed-off-by: Claudiu Manoil +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/freescale/enetc/enetc.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc.c +@@ -266,7 +266,7 @@ static irqreturn_t enetc_msix(int irq, v + /* disable interrupts */ + enetc_wr_reg(v->rbier, 0); + +- for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) ++ for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) + enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0); + + napi_schedule_irqoff(&v->napi); +@@ -302,7 +302,7 @@ static int enetc_poll(struct napi_struct + /* enable interrupts */ + enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE); + +- for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings) ++ for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) + enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), + ENETC_TBIER_TXTIE); + diff --git a/queue-5.7/ethtool-fix-check-in-ethtool_rx_flow_rule_create.patch b/queue-5.7/ethtool-fix-check-in-ethtool_rx_flow_rule_create.patch new file mode 100644 index 00000000000..a38587c3ab7 --- /dev/null +++ b/queue-5.7/ethtool-fix-check-in-ethtool_rx_flow_rule_create.patch @@ -0,0 +1,30 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Gaurav Singh +Date: Sun, 21 Jun 2020 11:30:17 -0400 +Subject: ethtool: Fix check in ethtool_rx_flow_rule_create + +From: Gaurav Singh + +[ Upstream commit 21a739c64d3e9871186483a0cc3e7b52638c3d59 ] + +Fix check in ethtool_rx_flow_rule_create + +Fixes: eca4205f9ec3 ("ethtool: add ethtool_rx_flow_spec to flow_rule structure translator") +Signed-off-by: Gaurav Singh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ethtool/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ethtool/ioctl.c ++++ b/net/ethtool/ioctl.c +@@ -2957,7 +2957,7 @@ ethtool_rx_flow_rule_create(const struct + sizeof(match->mask.ipv6.dst)); + } + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || +- memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { ++ memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = diff --git a/queue-5.7/geneve-allow-changing-df-behavior-after-creation.patch b/queue-5.7/geneve-allow-changing-df-behavior-after-creation.patch new file mode 100644 index 00000000000..e3ae713c99f --- /dev/null +++ b/queue-5.7/geneve-allow-changing-df-behavior-after-creation.patch @@ -0,0 +1,43 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Sabrina Dubroca +Date: Thu, 18 Jun 2020 12:13:22 +0200 +Subject: geneve: allow changing DF behavior after creation + +From: Sabrina Dubroca + +[ Upstream commit 56c09de347e40804fc8dad155272fb9609e0a97b ] + +Currently, trying to change the DF parameter of a geneve device does +nothing: + + # ip -d link show geneve1 + 14: geneve1: + link/ether + geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 + # ip link set geneve1 type geneve id 1 df unset + # ip -d link show geneve1 + 14: geneve1: + link/ether + geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 + +We just need to update the value in geneve_changelink. + +Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1649,6 +1649,7 @@ static int geneve_changelink(struct net_ + geneve->collect_md = metadata; + geneve->use_udp6_rx_checksums = use_udp6_rx_checksums; + geneve->ttl_inherit = ttl_inherit; ++ geneve->df = df; + geneve_unquiesce(geneve, gs4, gs6); + + return 0; diff --git a/queue-5.7/ibmveth-fix-max-mtu-limit.patch b/queue-5.7/ibmveth-fix-max-mtu-limit.patch new file mode 100644 index 00000000000..b1320c40198 --- /dev/null +++ b/queue-5.7/ibmveth-fix-max-mtu-limit.patch @@ -0,0 +1,35 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Thomas Falcon +Date: Thu, 18 Jun 2020 10:43:46 -0500 +Subject: ibmveth: Fix max MTU limit + +From: Thomas Falcon + +[ Upstream commit 5948378b26d89f8aa5eac37629dbd0616ce8d7a7 ] + +The max MTU limit defined for ibmveth is not accounting for +virtual ethernet buffer overhead, which is twenty-two additional +bytes set aside for the ethernet header and eight additional bytes +of an opaque handle reserved for use by the hypervisor. Update the +max MTU to reflect this overhead. + +Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more drivers") +Fixes: 110447f8269a ("ethernet: fix min/max MTU typos") +Signed-off-by: Thomas Falcon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/ibmveth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ibm/ibmveth.c ++++ b/drivers/net/ethernet/ibm/ibmveth.c +@@ -1715,7 +1715,7 @@ static int ibmveth_probe(struct vio_dev + } + + netdev->min_mtu = IBMVETH_MIN_MTU; +- netdev->max_mtu = ETH_MAX_MTU; ++ netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; + + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); + diff --git a/queue-5.7/ionic-update-the-queue-count-on-open.patch b/queue-5.7/ionic-update-the-queue-count-on-open.patch new file mode 100644 index 00000000000..bbb4043a4c5 --- /dev/null +++ b/queue-5.7/ionic-update-the-queue-count-on-open.patch @@ -0,0 +1,40 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Shannon Nelson +Date: Thu, 25 Jun 2020 22:58:37 -0700 +Subject: ionic: update the queue count on open + +From: Shannon Nelson + +[ Upstream commit fa48494cce5f6360b0f8683cdf258fb45c666287 ] + +Let the network stack know the real number of queues that +we are using. + +v2: added error checking + +Fixes: 49d3b493673a ("ionic: disable the queues on link down") +Signed-off-by: Shannon Nelson +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/pensando/ionic/ionic_lif.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c +@@ -1653,6 +1653,14 @@ int ionic_open(struct net_device *netdev + if (err) + goto err_out; + ++ err = netif_set_real_num_tx_queues(netdev, lif->nxqs); ++ if (err) ++ goto err_txrx_deinit; ++ ++ err = netif_set_real_num_rx_queues(netdev, lif->nxqs); ++ if (err) ++ goto err_txrx_deinit; ++ + /* don't start the queues until we have link */ + if (netif_carrier_ok(netdev)) { + err = ionic_start_queues(lif); diff --git a/queue-5.7/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch b/queue-5.7/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch new file mode 100644 index 00000000000..0bd36d0b5f3 --- /dev/null +++ b/queue-5.7/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch @@ -0,0 +1,114 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 16:04:00 +0000 +Subject: ip6_gre: fix use-after-free in ip6gre_tunnel_lookup() + +From: Taehee Yoo + +[ Upstream commit dafabb6590cb15f300b77c095d50312e2c7c8e0f ] + +In the datapath, the ip6gre_tunnel_lookup() is used and it internally uses +fallback tunnel device pointer, which is fb_tunnel_dev. +This pointer variable should be set to NULL when a fb interface is deleted. +But there is no routine to set fb_tunnel_dev pointer to NULL. +So, this pointer will be still used after interface is deleted and +it eventually results in the use-after-free problem. + +Test commands: + ip netns add A + ip netns add B + ip link add eth0 type veth peer name eth1 + ip link set eth0 netns A + ip link set eth1 netns B + + ip netns exec A ip link set lo up + ip netns exec A ip link set eth0 up + ip netns exec A ip link add ip6gre1 type ip6gre local fc:0::1 \ + remote fc:0::2 + ip netns exec A ip -6 a a fc:100::1/64 dev ip6gre1 + ip netns exec A ip link set ip6gre1 up + ip netns exec A ip -6 a a fc:0::1/64 dev eth0 + ip netns exec A ip link set ip6gre0 up + + ip netns exec B ip link set lo up + ip netns exec B ip link set eth1 up + ip netns exec B ip link add ip6gre1 type ip6gre local fc:0::2 \ + remote fc:0::1 + ip netns exec B ip -6 a a fc:100::2/64 dev ip6gre1 + ip netns exec B ip link set ip6gre1 up + ip netns exec B ip -6 a a fc:0::2/64 dev eth1 + ip netns exec B ip link set ip6gre0 up + ip netns exec A ping fc:100::2 -s 60000 & + ip netns del B + +Splat looks like: +[ 73.087285][ C1] BUG: KASAN: use-after-free in ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.088361][ C1] Read of size 4 at addr ffff888040559218 by task ping/1429 +[ 73.089317][ C1] +[ 73.089638][ C1] CPU: 1 PID: 1429 Comm: ping Not tainted 5.7.0+ #602 +[ 73.090531][ C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 73.091725][ C1] Call Trace: +[ 73.092160][ C1] +[ 73.092556][ C1] dump_stack+0x96/0xdb +[ 73.093122][ C1] print_address_description.constprop.6+0x2cc/0x450 +[ 73.094016][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.094894][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.095767][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.096619][ C1] kasan_report+0x154/0x190 +[ 73.097209][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.097989][ C1] ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.098750][ C1] ? gre_del_protocol+0x60/0x60 [gre] +[ 73.099500][ C1] gre_rcv+0x1c5/0x1450 [ip6_gre] +[ 73.100199][ C1] ? ip6gre_header+0xf00/0xf00 [ip6_gre] +[ 73.100985][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 73.101830][ C1] ? ip6_input_finish+0x5/0xf0 +[ 73.102483][ C1] ip6_protocol_deliver_rcu+0xcbb/0x1510 +[ 73.103296][ C1] ip6_input_finish+0x5b/0xf0 +[ 73.103920][ C1] ip6_input+0xcd/0x2c0 +[ 73.104473][ C1] ? ip6_input_finish+0xf0/0xf0 +[ 73.105115][ C1] ? rcu_read_lock_held+0x90/0xa0 +[ 73.105783][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 73.106548][ C1] ipv6_rcv+0x1f1/0x300 +[ ... ] + +Suggested-by: Eric Dumazet +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_loo + gre_proto == htons(ETH_P_ERSPAN2)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; ++ struct net_device *ndev; + + for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || +@@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_loo + if (t && t->dev->flags & IFF_UP) + return t; + +- dev = ign->fb_tunnel_dev; +- if (dev && dev->flags & IFF_UP) +- return netdev_priv(dev); ++ ndev = READ_ONCE(ign->fb_tunnel_dev); ++ if (ndev && ndev->flags & IFF_UP) ++ return netdev_priv(ndev); + + return NULL; + } +@@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct + + ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); ++ if (ign->fb_tunnel_dev == dev) ++ WRITE_ONCE(ign->fb_tunnel_dev, NULL); + dst_cache_reset(&t->dst_cache); + dev_put(dev); + } diff --git a/queue-5.7/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch b/queue-5.7/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch new file mode 100644 index 00000000000..9c65b5d6c19 --- /dev/null +++ b/queue-5.7/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch @@ -0,0 +1,117 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 16:51:51 +0000 +Subject: ip_tunnel: fix use-after-free in ip_tunnel_lookup() + +From: Taehee Yoo + +[ Upstream commit ba61539c6ae57f4146284a5cb4f7b7ed8d42bf45 ] + +In the datapath, the ip_tunnel_lookup() is used and it internally uses +fallback tunnel device pointer, which is fb_tunnel_dev. +This pointer variable should be set to NULL when a fb interface is deleted. +But there is no routine to set fb_tunnel_dev pointer to NULL. +So, this pointer will be still used after interface is deleted and +it eventually results in the use-after-free problem. + +Test commands: + ip netns add A + ip netns add B + ip link add eth0 type veth peer name eth1 + ip link set eth0 netns A + ip link set eth1 netns B + + ip netns exec A ip link set lo up + ip netns exec A ip link set eth0 up + ip netns exec A ip link add gre1 type gre local 10.0.0.1 \ + remote 10.0.0.2 + ip netns exec A ip link set gre1 up + ip netns exec A ip a a 10.0.100.1/24 dev gre1 + ip netns exec A ip a a 10.0.0.1/24 dev eth0 + + ip netns exec B ip link set lo up + ip netns exec B ip link set eth1 up + ip netns exec B ip link add gre1 type gre local 10.0.0.2 \ + remote 10.0.0.1 + ip netns exec B ip link set gre1 up + ip netns exec B ip a a 10.0.100.2/24 dev gre1 + ip netns exec B ip a a 10.0.0.2/24 dev eth1 + ip netns exec A hping3 10.0.100.2 -2 --flood -d 60000 & + ip netns del B + +Splat looks like: +[ 77.793450][ C3] ================================================================== +[ 77.794702][ C3] BUG: KASAN: use-after-free in ip_tunnel_lookup+0xcc4/0xf30 +[ 77.795573][ C3] Read of size 4 at addr ffff888060bd9c84 by task hping3/2905 +[ 77.796398][ C3] +[ 77.796664][ C3] CPU: 3 PID: 2905 Comm: hping3 Not tainted 5.8.0-rc1+ #616 +[ 77.797474][ C3] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 77.798453][ C3] Call Trace: +[ 77.798815][ C3] +[ 77.799142][ C3] dump_stack+0x9d/0xdb +[ 77.799605][ C3] print_address_description.constprop.7+0x2cc/0x450 +[ 77.800365][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.800908][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.801517][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.802145][ C3] kasan_report+0x154/0x190 +[ 77.802821][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.803503][ C3] ip_tunnel_lookup+0xcc4/0xf30 +[ 77.804165][ C3] __ipgre_rcv+0x1ab/0xaa0 [ip_gre] +[ 77.804862][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 77.805621][ C3] gre_rcv+0x304/0x1910 [ip_gre] +[ 77.806293][ C3] ? lock_acquire+0x1a9/0x870 +[ 77.806925][ C3] ? gre_rcv+0xfe/0x354 [gre] +[ 77.807559][ C3] ? erspan_xmit+0x2e60/0x2e60 [ip_gre] +[ 77.808305][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 77.809032][ C3] ? rcu_read_lock_held+0x90/0xa0 +[ 77.809713][ C3] gre_rcv+0x1b8/0x354 [gre] +[ ... ] + +Suggested-by: Eric Dumazet +Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struc + __be32 remote, __be32 local, + __be32 key) + { +- unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; ++ struct net_device *ndev; ++ unsigned int hash; + + hash = ip_tunnel_hash(key, remote); + head = &itn->tunnels[hash]; +@@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struc + if (t && t->dev->flags & IFF_UP) + return t; + +- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) +- return netdev_priv(itn->fb_tunnel_dev); ++ ndev = READ_ONCE(itn->fb_tunnel_dev); ++ if (ndev && ndev->flags & IFF_UP) ++ return netdev_priv(ndev); + + return NULL; + } +@@ -1245,9 +1247,9 @@ void ip_tunnel_uninit(struct net_device + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); +- /* fb_tunnel_dev will be unregisted in net-exit call. */ +- if (itn->fb_tunnel_dev != dev) +- ip_tunnel_del(itn, netdev_priv(dev)); ++ ip_tunnel_del(itn, netdev_priv(dev)); ++ if (itn->fb_tunnel_dev == dev) ++ WRITE_ONCE(itn->fb_tunnel_dev, NULL); + + dst_cache_reset(&tunnel->dst_cache); + } diff --git a/queue-5.7/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch b/queue-5.7/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch new file mode 100644 index 00000000000..987d705f9ab --- /dev/null +++ b/queue-5.7/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch @@ -0,0 +1,57 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Wang Hai +Date: Thu, 11 Jun 2020 15:57:50 +0800 +Subject: mld: fix memory leak in ipv6_mc_destroy_dev() + +From: Wang Hai + +[ Upstream commit ea2fce88d2fd678ed9d45354ff49b73f1d5615dd ] + +Commit a84d01647989 ("mld: fix memory leak in mld_del_delrec()") fixed +the memory leak of MLD, but missing the ipv6_mc_destroy_dev() path, in +which mca_sources are leaked after ma_put(). + +Using ip6_mc_clear_src() to take care of the missing free. + +BUG: memory leak +unreferenced object 0xffff8881113d3180 (size 64): + comm "syz-executor071", pid 389, jiffies 4294887985 (age 17.943s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 ff 02 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 ................ + backtrace: + [<000000002cbc483c>] kmalloc include/linux/slab.h:555 [inline] + [<000000002cbc483c>] kzalloc include/linux/slab.h:669 [inline] + [<000000002cbc483c>] ip6_mc_add1_src net/ipv6/mcast.c:2237 [inline] + [<000000002cbc483c>] ip6_mc_add_src+0x7f5/0xbb0 net/ipv6/mcast.c:2357 + [<0000000058b8b1ff>] ip6_mc_source+0xe0c/0x1530 net/ipv6/mcast.c:449 + [<000000000bfc4fb5>] do_ipv6_setsockopt.isra.12+0x1b2c/0x3b30 net/ipv6/ipv6_sockglue.c:754 + [<00000000e4e7a722>] ipv6_setsockopt+0xda/0x150 net/ipv6/ipv6_sockglue.c:950 + [<0000000029260d9a>] rawv6_setsockopt+0x45/0x100 net/ipv6/raw.c:1081 + [<000000005c1b46f9>] __sys_setsockopt+0x131/0x210 net/socket.c:2132 + [<000000008491f7db>] __do_sys_setsockopt net/socket.c:2148 [inline] + [<000000008491f7db>] __se_sys_setsockopt net/socket.c:2145 [inline] + [<000000008491f7db>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2145 + [<00000000c7bc11c5>] do_syscall_64+0xa1/0x530 arch/x86/entry/common.c:295 + [<000000005fb7a3f3>] entry_SYSCALL_64_after_hwframe+0x49/0xb3 + +Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") +Reported-by: Hulk Robot +Signed-off-by: Wang Hai +Acked-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/mcast.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -2618,6 +2618,7 @@ void ipv6_mc_destroy_dev(struct inet6_de + idev->mc_list = i->next; + + write_unlock_bh(&idev->lock); ++ ip6_mc_clear_src(i); + ma_put(i); + write_lock_bh(&idev->lock); + } diff --git a/queue-5.7/mlxsw-spectrum-do-not-rely-on-machine-endianness.patch b/queue-5.7/mlxsw-spectrum-do-not-rely-on-machine-endianness.patch new file mode 100644 index 00000000000..f0b58429fca --- /dev/null +++ b/queue-5.7/mlxsw-spectrum-do-not-rely-on-machine-endianness.patch @@ -0,0 +1,94 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Ido Schimmel +Date: Sun, 21 Jun 2020 11:29:17 +0300 +Subject: mlxsw: spectrum: Do not rely on machine endianness + +From: Ido Schimmel + +[ Upstream commit f3fe412b0a634286a6a3753c3f9ff201e6bec716 ] + +The second commit cited below performed a cast of 'u32 buffsize' to +'(u16 *)' when calling mlxsw_sp_port_headroom_8x_adjust(): + +mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize); + +Colin noted that this will behave differently on big endian +architectures compared to little endian architectures. + +Fix this by following Colin's suggestion and have the function accept +and return 'u32' instead of passing the current size by reference. + +Fixes: da382875c616 ("mlxsw: spectrum: Extend to support Spectrum-3 ASIC") +Fixes: 60833d54d56c ("mlxsw: spectrum: Adjust headroom buffers for 8x ports") +Signed-off-by: Ido Schimmel +Reported-by: Colin Ian King +Suggested-by: Colin Ian King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++-- + drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 8 +++----- + drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 2 +- + drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 2 +- + 4 files changed, 7 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -990,10 +990,10 @@ int __mlxsw_sp_port_headroom_set(struct + + lossy = !(pfc || pause_en); + thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); +- mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &thres_cells); ++ thres_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, thres_cells); + delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, + pfc, pause_en); +- mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &delay_cells); ++ delay_cells = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, delay_cells); + total_cells = thres_cells + delay_cells; + + taken_headroom_cells += total_cells; +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +@@ -395,17 +395,15 @@ mlxsw_sp_port_vlan_find_by_vid(const str + return NULL; + } + +-static inline void ++static inline u32 + mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, +- u16 *p_size) ++ u32 size_cells) + { + /* Ports with eight lanes use two headroom buffers between which the + * configured headroom size is split. Therefore, multiply the calculated + * headroom size by two. + */ +- if (mlxsw_sp_port->mapping.width != 8) +- return; +- *p_size *= 2; ++ return mlxsw_sp_port->mapping.width == 8 ? 2 * size_cells : size_cells; + } + + enum mlxsw_sp_flood_type { +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +@@ -312,7 +312,7 @@ static int mlxsw_sp_port_pb_init(struct + + if (i == MLXSW_SP_PB_UNUSED) + continue; +- mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &size); ++ size = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, size); + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); + } + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +@@ -776,7 +776,7 @@ mlxsw_sp_span_port_buffsize_update(struc + speed = 0; + + buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu); +- mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize); ++ buffsize = mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, buffsize); + mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } diff --git a/queue-5.7/mptcp-drop-sndr_key-in-mptcp_syn_options.patch b/queue-5.7/mptcp-drop-sndr_key-in-mptcp_syn_options.patch new file mode 100644 index 00000000000..a007c305f34 --- /dev/null +++ b/queue-5.7/mptcp-drop-sndr_key-in-mptcp_syn_options.patch @@ -0,0 +1,33 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Geliang Tang +Date: Mon, 22 Jun 2020 19:45:58 +0800 +Subject: mptcp: drop sndr_key in mptcp_syn_options + +From: Geliang Tang + +[ Upstream commit b562f58bbc12444219b74a5d6524977a3d87a022 ] + +In RFC 8684, we don't need to send sndr_key in SYN package anymore, so drop +it. + +Fixes: cc7972ea1932 ("mptcp: parse and emit MP_CAPABLE option according to v1 spec") +Signed-off-by: Geliang Tang +Reviewed-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/options.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/mptcp/options.c ++++ b/net/mptcp/options.c +@@ -336,9 +336,7 @@ bool mptcp_syn_options(struct sock *sk, + */ + subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; + if (subflow->request_mptcp) { +- pr_debug("local_key=%llu", subflow->local_key); + opts->suboptions = OPTION_MPTCP_MPC_SYN; +- opts->sndr_key = subflow->local_key; + *size = TCPOLEN_MPTCP_MPC_SYN; + return true; + } else if (subflow->request_join) { diff --git a/queue-5.7/mptcp-fix-memory-leak-in-mptcp_subflow_create_socket.patch b/queue-5.7/mptcp-fix-memory-leak-in-mptcp_subflow_create_socket.patch new file mode 100644 index 00000000000..538907f2c84 --- /dev/null +++ b/queue-5.7/mptcp-fix-memory-leak-in-mptcp_subflow_create_socket.patch @@ -0,0 +1,54 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Wei Yongjun +Date: Mon, 15 Jun 2020 09:35:22 +0800 +Subject: mptcp: fix memory leak in mptcp_subflow_create_socket() + +From: Wei Yongjun + +[ Upstream commit b8ad540dd4e40566c520dff491fc06c71ae6b989 ] + +socket malloced by sock_create_kern() should be release before return +in the error handling, otherwise it cause memory leak. + +unreferenced object 0xffff88810910c000 (size 1216): + comm "00000003_test_m", pid 12238, jiffies 4295050289 (age 54.237s) + hex dump (first 32 bytes): + 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 2f 30 0a 81 88 ff ff ........./0..... + backtrace: + [<00000000e877f89f>] sock_alloc_inode+0x18/0x1c0 + [<0000000093d1dd51>] alloc_inode+0x63/0x1d0 + [<000000005673fec6>] new_inode_pseudo+0x14/0xe0 + [<00000000b5db6be8>] sock_alloc+0x3c/0x260 + [<00000000e7e3cbb2>] __sock_create+0x89/0x620 + [<0000000023e48593>] mptcp_subflow_create_socket+0xc0/0x5e0 + [<00000000419795e4>] __mptcp_socket_create+0x1ad/0x3f0 + [<00000000b2f942e8>] mptcp_stream_connect+0x281/0x4f0 + [<00000000c80cd5cc>] __sys_connect_file+0x14d/0x190 + [<00000000dc761f11>] __sys_connect+0x128/0x160 + [<000000008b14e764>] __x64_sys_connect+0x6f/0xb0 + [<000000007b4f93bd>] do_syscall_64+0xa1/0x530 + [<00000000d3e770b6>] entry_SYSCALL_64_after_hwframe+0x49/0xb3 + +Fixes: 2303f994b3e1 ("mptcp: Associate MPTCP context with TCP socket") +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/subflow.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1015,8 +1015,10 @@ int mptcp_subflow_create_socket(struct s + err = tcp_set_ulp(sf->sk, "mptcp"); + release_sock(sf->sk); + +- if (err) ++ if (err) { ++ sock_release(sf); + return err; ++ } + + /* the newly created socket really belongs to the owning MPTCP master + * socket, even if for additional subflows the allocation is performed diff --git a/queue-5.7/mvpp2-ethtool-rxtx-stats-fix.patch b/queue-5.7/mvpp2-ethtool-rxtx-stats-fix.patch new file mode 100644 index 00000000000..d883908acd4 --- /dev/null +++ b/queue-5.7/mvpp2-ethtool-rxtx-stats-fix.patch @@ -0,0 +1,39 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Sven Auhagen +Date: Sun, 14 Jun 2020 09:19:17 +0200 +Subject: mvpp2: ethtool rxtx stats fix + +From: Sven Auhagen + +[ Upstream commit cc970925feb9a38c2f0d34305518e00a3084ce85 ] + +The ethtool rx and tx queue statistics are reporting wrong values. +Fix reading out the correct ones. + +Signed-off-by: Sven Auhagen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -1544,7 +1544,7 @@ static void mvpp2_read_stats(struct mvpp + for (q = 0; q < port->ntxqs; q++) + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) + *pstats++ += mvpp2_read_index(port->priv, +- MVPP22_CTRS_TX_CTR(port->id, i), ++ MVPP22_CTRS_TX_CTR(port->id, q), + mvpp2_ethtool_txq_regs[i].offset); + + /* Rxqs are numbered from 0 from the user standpoint, but not from the +@@ -1553,7 +1553,7 @@ static void mvpp2_read_stats(struct mvpp + for (q = 0; q < port->nrxqs; q++) + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) + *pstats++ += mvpp2_read_index(port->priv, +- port->first_rxq + i, ++ port->first_rxq + q, + mvpp2_ethtool_rxq_regs[i].offset); + } + diff --git a/queue-5.7/net-bridge-enfore-alignment-for-ethernet-address.patch b/queue-5.7/net-bridge-enfore-alignment-for-ethernet-address.patch new file mode 100644 index 00000000000..b40e807f766 --- /dev/null +++ b/queue-5.7/net-bridge-enfore-alignment-for-ethernet-address.patch @@ -0,0 +1,43 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Thomas Martitz +Date: Thu, 25 Jun 2020 14:26:03 +0200 +Subject: net: bridge: enfore alignment for ethernet address + +From: Thomas Martitz + +[ Upstream commit db7202dec92e6caa2706c21d6fc359af318bde2e ] + +The eth_addr member is passed to ether_addr functions that require +2-byte alignment, therefore the member must be properly aligned +to avoid unaligned accesses. + +The problem is in place since the initial merge of multicast to unicast: +commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast + +Fixes: 6db6f0eae605 ("bridge: multicast to unicast") +Cc: Roopa Prabhu +Cc: Nikolay Aleksandrov +Cc: David S. Miller +Cc: Jakub Kicinski +Cc: Felix Fietkau +Cc: stable@vger.kernel.org +Signed-off-by: Thomas Martitz +Acked-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_private.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -217,8 +217,8 @@ struct net_bridge_port_group { + struct rcu_head rcu; + struct timer_list timer; + struct br_ip addr; ++ unsigned char eth_addr[ETH_ALEN] __aligned(2); + unsigned char flags; +- unsigned char eth_addr[ETH_ALEN]; + }; + + struct net_bridge_mdb_entry { diff --git a/queue-5.7/net-core-reduce-recursion-limit-value.patch b/queue-5.7/net-core-reduce-recursion-limit-value.patch new file mode 100644 index 00000000000..41b303297e3 --- /dev/null +++ b/queue-5.7/net-core-reduce-recursion-limit-value.patch @@ -0,0 +1,81 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 15:52:05 +0000 +Subject: net: core: reduce recursion limit value + +From: Taehee Yoo + +[ Upstream commit fb7861d14c8d7edac65b2fcb6e8031cb138457b2 ] + +In the current code, ->ndo_start_xmit() can be executed recursively only +10 times because of stack memory. +But, in the case of the vxlan, 10 recursion limit value results in +a stack overflow. +In the current code, the nested interface is limited by 8 depth. +There is no critical reason that the recursion limitation value should +be 10. +So, it would be good to be the same value with the limitation value of +nesting interface depth. + +Test commands: + ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 + ip link set vxlan10 up + ip a a 192.168.10.1/24 dev vxlan10 + ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent + + for i in {9..0} + do + let A=$i+1 + ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 + ip link set vxlan$i up + ip a a 192.168.$i.1/24 dev vxlan$i + ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent + bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self + done + hping3 192.168.10.2 -2 -d 60000 + +Splat looks like: +[ 103.814237][ T1127] ============================================================================= +[ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 +[ 103.873187][ T1127] ----------------------------------------------------------------------------- +[ 103.873187][ T1127] +[ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 +[ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 +[ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 103.883006][ T1127] Call Trace: +[ 103.883324][ T1127] dump_stack+0x96/0xdb +[ 103.883716][ T1127] slab_err+0xad/0xd0 +[ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 +[ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 +[ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 +[ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.886316][ T1127] check_slab+0x97/0xb0 +[ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 +[ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 +[ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 +[ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 +[ 103.889675][ T1127] __slab_alloc+0x3e/0x80 +[ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 +[ ... ] + +Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3125,7 +3125,7 @@ static inline int dev_recursion_level(vo + return this_cpu_read(softnet_data.xmit.recursion); + } + +-#define XMIT_RECURSION_LIMIT 10 ++#define XMIT_RECURSION_LIMIT 8 + static inline bool dev_xmit_recursion(void) + { + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > diff --git a/queue-5.7/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch b/queue-5.7/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch new file mode 100644 index 00000000000..7c27435559b --- /dev/null +++ b/queue-5.7/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch @@ -0,0 +1,57 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Tariq Toukan +Date: Mon, 22 Jun 2020 23:26:04 +0300 +Subject: net: Do not clear the sock TX queue in sk_set_socket() + +From: Tariq Toukan + +[ Upstream commit 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 ] + +Clearing the sock TX queue in sk_set_socket() might cause unexpected +out-of-order transmit when called from sock_orphan(), as outstanding +packets can pick a different TX queue and bypass the ones already queued. + +This is undesired in general. More specifically, it breaks the in-order +scheduling property guarantee for device-offloaded TLS sockets. + +Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it +explicitly only where needed. + +Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping") +Signed-off-by: Tariq Toukan +Reviewed-by: Boris Pismenny +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 1 - + net/core/sock.c | 2 ++ + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1846,7 +1846,6 @@ static inline int sk_rx_queue_get(const + + static inline void sk_set_socket(struct sock *sk, struct socket *sock) + { +- sk_tx_queue_clear(sk); + sk->sk_socket = sock; + } + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1678,6 +1678,7 @@ struct sock *sk_alloc(struct net *net, i + cgroup_sk_alloc(&sk->sk_cgrp_data); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); ++ sk_tx_queue_clear(sk); + } + + return sk; +@@ -1901,6 +1902,7 @@ struct sock *sk_clone_lock(const struct + */ + sk_refcnt_debug_inc(newsk); + sk_set_socket(newsk, NULL); ++ sk_tx_queue_clear(newsk); + RCU_INIT_POINTER(newsk->sk_wq, NULL); + + if (newsk->sk_prot->sockets_allocated) diff --git a/queue-5.7/net-dsa-bcm_sf2-fix-node-reference-count.patch b/queue-5.7/net-dsa-bcm_sf2-fix-node-reference-count.patch new file mode 100644 index 00000000000..5c5a3d60e40 --- /dev/null +++ b/queue-5.7/net-dsa-bcm_sf2-fix-node-reference-count.patch @@ -0,0 +1,72 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Florian Fainelli +Date: Wed, 17 Jun 2020 20:42:44 -0700 +Subject: net: dsa: bcm_sf2: Fix node reference count + +From: Florian Fainelli + +[ Upstream commit 8dbe4c5d5e40fe140221024f7b16bec9f310bf70 ] + +of_find_node_by_name() will do an of_node_put() on the "from" argument. +With CONFIG_OF_DYNAMIC enabled which checks for device_node reference +counts, we would be getting a warning like this: + +[ 6.347230] refcount_t: increment on 0; use-after-free. +[ 6.352498] WARNING: CPU: 3 PID: 77 at lib/refcount.c:156 +refcount_inc_checked+0x38/0x44 +[ 6.360601] Modules linked in: +[ 6.363661] CPU: 3 PID: 77 Comm: kworker/3:1 Tainted: G W +5.4.46-gb78b3e9956e6 #13 +[ 6.372546] Hardware name: BCM97278SV (DT) +[ 6.376649] Workqueue: events deferred_probe_work_func +[ 6.381796] pstate: 60000005 (nZCv daif -PAN -UAO) +[ 6.386595] pc : refcount_inc_checked+0x38/0x44 +[ 6.391133] lr : refcount_inc_checked+0x38/0x44 +... +[ 6.478791] Call trace: +[ 6.481243] refcount_inc_checked+0x38/0x44 +[ 6.485433] kobject_get+0x3c/0x4c +[ 6.488840] of_node_get+0x24/0x34 +[ 6.492247] of_irq_find_parent+0x3c/0xe0 +[ 6.496263] of_irq_parse_one+0xe4/0x1d0 +[ 6.500191] irq_of_parse_and_map+0x44/0x84 +[ 6.504381] bcm_sf2_sw_probe+0x22c/0x844 +[ 6.508397] platform_drv_probe+0x58/0xa8 +[ 6.512413] really_probe+0x238/0x3fc +[ 6.516081] driver_probe_device+0x11c/0x12c +[ 6.520358] __device_attach_driver+0xa8/0x100 +[ 6.524808] bus_for_each_drv+0xb4/0xd0 +[ 6.528650] __device_attach+0xd0/0x164 +[ 6.532493] device_initial_probe+0x24/0x30 +[ 6.536682] bus_probe_device+0x38/0x98 +[ 6.540524] deferred_probe_work_func+0xa8/0xd4 +[ 6.545061] process_one_work+0x178/0x288 +[ 6.549078] process_scheduled_works+0x44/0x48 +[ 6.553529] worker_thread+0x218/0x270 +[ 6.557285] kthread+0xdc/0xe4 +[ 6.560344] ret_from_fork+0x10/0x18 +[ 6.563925] ---[ end trace 68f65caf69bb152a ]--- + +Fix this by adding a of_node_get() to increment the reference count +prior to the call. + +Fixes: afa3b592953b ("net: dsa: bcm_sf2: Ensure correct sub-node is parsed") +Signed-off-by: Florian Fainelli +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -1147,6 +1147,8 @@ static int bcm_sf2_sw_probe(struct platf + set_bit(0, priv->cfp.used); + set_bit(0, priv->cfp.unique); + ++ /* Balance of_node_put() done by of_find_node_by_name() */ ++ of_node_get(dn); + ports = of_find_node_by_name(dn, "ports"); + if (ports) { + bcm_sf2_identify_ports(priv, ports); diff --git a/queue-5.7/net-ethtool-add-missing-netif_f_gso_fraglist-feature-string.patch b/queue-5.7/net-ethtool-add-missing-netif_f_gso_fraglist-feature-string.patch new file mode 100644 index 00000000000..c117f250bb2 --- /dev/null +++ b/queue-5.7/net-ethtool-add-missing-netif_f_gso_fraglist-feature-string.patch @@ -0,0 +1,34 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Alexander Lobakin +Date: Wed, 17 Jun 2020 20:42:47 +0000 +Subject: net: ethtool: add missing NETIF_F_GSO_FRAGLIST feature string + +From: Alexander Lobakin + +[ Upstream commit eddbf5d0204e550ee59de02bdc19fe90d4203dd6 ] + +Commit 3b33583265ed ("net: Add fraglist GRO/GSO feature flags") missed +an entry for NETIF_F_GSO_FRAGLIST in netdev_features_strings array. As +a result, fraglist GSO feature is not shown in 'ethtool -k' output and +can't be toggled on/off. +The fix is trivial. + +Fixes: 3b33583265ed ("net: Add fraglist GRO/GSO feature flags") +Signed-off-by: Alexander Lobakin +Reviewed-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ethtool/common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ethtool/common.c ++++ b/net/ethtool/common.c +@@ -44,6 +44,7 @@ const char netdev_features_strings[NETDE + [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", + [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", + [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", ++ [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list", + + [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", + [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", diff --git a/queue-5.7/net-ethtool-add-missing-string-for-netif_f_gso_tunnel_remcsum.patch b/queue-5.7/net-ethtool-add-missing-string-for-netif_f_gso_tunnel_remcsum.patch new file mode 100644 index 00000000000..dbcb0e747a9 --- /dev/null +++ b/queue-5.7/net-ethtool-add-missing-string-for-netif_f_gso_tunnel_remcsum.patch @@ -0,0 +1,41 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Alexander Lobakin +Date: Tue, 23 Jun 2020 10:43:48 +0000 +Subject: net: ethtool: add missing string for NETIF_F_GSO_TUNNEL_REMCSUM + +From: Alexander Lobakin + +[ Upstream commit b4730ae6a443afe611afb4fb651c885c51003c15 ] + +Commit e585f2363637 ("udp: Changes to udp_offload to support remote +checksum offload") added new GSO type and a corresponding netdev +feature, but missed Ethtool's 'netdev_features_strings' table. +Give it a name so it will be exposed to userspace and become available +for manual configuration. + +v3: + - decouple from "netdev_features_strings[] cleanup" series; + - no functional changes. + +v2: + - don't split the "Fixes:" tag across lines; + - no functional changes. + +Fixes: e585f2363637 ("udp: Changes to udp_offload to support remote checksum offload") +Signed-off-by: Alexander Lobakin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ethtool/common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ethtool/common.c ++++ b/net/ethtool/common.c +@@ -40,6 +40,7 @@ const char netdev_features_strings[NETDE + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", + [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", ++ [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation", + [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", + [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", + [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", diff --git a/queue-5.7/net-fix-memleak-in-register_netdevice.patch b/queue-5.7/net-fix-memleak-in-register_netdevice.patch new file mode 100644 index 00000000000..44f9183c189 --- /dev/null +++ b/queue-5.7/net-fix-memleak-in-register_netdevice.patch @@ -0,0 +1,87 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Yang Yingliang +Date: Tue, 16 Jun 2020 09:39:21 +0000 +Subject: net: fix memleak in register_netdevice() + +From: Yang Yingliang + +[ Upstream commit 814152a89ed52c722ab92e9fbabcac3cb8a39245 ] + +I got a memleak report when doing some fuzz test: + +unreferenced object 0xffff888112584000 (size 13599): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 32 bytes): + 74 61 70 30 00 00 00 00 00 00 00 00 00 00 00 00 tap0............ + 00 ee d9 19 81 88 ff ff 00 00 00 00 00 00 00 00 ................ + backtrace: + [<000000002f60ba65>] __kmalloc_node+0x309/0x3a0 + [<0000000075b211ec>] kvmalloc_node+0x7f/0xc0 + [<00000000d3a97396>] alloc_netdev_mqs+0x76/0xfc0 + [<00000000609c3655>] __tun_chr_ioctl+0x1456/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +unreferenced object 0xffff888111845cc0 (size 8): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 8 bytes): + 74 61 70 30 00 88 ff ff tap0.... + backtrace: + [<000000004c159777>] kstrdup+0x35/0x70 + [<00000000d8b496ad>] kstrdup_const+0x3d/0x50 + [<00000000494e884a>] kvasprintf_const+0xf1/0x180 + [<0000000097880a2b>] kobject_set_name_vargs+0x56/0x140 + [<000000008fbdfc7b>] dev_set_name+0xab/0xe0 + [<000000005b99e3b4>] netdev_register_kobject+0xc0/0x390 + [<00000000602704fe>] register_netdevice+0xb61/0x1250 + [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +unreferenced object 0xffff88811886d800 (size 512): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 32 bytes): + 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... + ff ff ff ff ff ff ff ff c0 66 3d a3 ff ff ff ff .........f=..... + backtrace: + [<0000000050315800>] device_add+0x61e/0x1950 + [<0000000021008dfb>] netdev_register_kobject+0x17e/0x390 + [<00000000602704fe>] register_netdevice+0xb61/0x1250 + [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +If call_netdevice_notifiers() failed, then rollback_registered() +calls netdev_unregister_kobject() which holds the kobject. The +reference cannot be put because the netdev won't be add to todo +list, so it will leads a memleak, we need put the reference to +avoid memleak. + +Reported-by: Hulk Robot +Signed-off-by: Yang Yingliang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9435,6 +9435,13 @@ int register_netdevice(struct net_device + rcu_barrier(); + + dev->reg_state = NETREG_UNREGISTERED; ++ /* We should put the kobject that hold in ++ * netdev_unregister_kobject(), otherwise ++ * the net device cannot be freed when ++ * driver calls free_netdev(), because the ++ * kobject is being hold. ++ */ ++ kobject_put(&dev->dev.kobj); + } + /* + * Prevent userspace races by waiting until the network diff --git a/queue-5.7/net-fix-the-arp-error-in-some-cases.patch b/queue-5.7/net-fix-the-arp-error-in-some-cases.patch new file mode 100644 index 00000000000..e25ada38caf --- /dev/null +++ b/queue-5.7/net-fix-the-arp-error-in-some-cases.patch @@ -0,0 +1,50 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: guodeqing +Date: Wed, 17 Jun 2020 10:07:16 +0800 +Subject: net: Fix the arp error in some cases + +From: guodeqing + +[ Upstream commit 5eea3a63ff4aba6a26002e657a6d21934b7e2b96 ] + +ie., +$ ifconfig eth0 6.6.6.6 netmask 255.255.255.0 + +$ ip rule add from 6.6.6.6 table 6666 + +$ ip route add 9.9.9.9 via 6.6.6.6 + +$ ping -I 6.6.6.6 9.9.9.9 +PING 9.9.9.9 (9.9.9.9) from 6.6.6.6 : 56(84) bytes of data. + +3 packets transmitted, 0 received, 100% packet loss, time 2079ms + +$ arp +Address HWtype HWaddress Flags Mask Iface +6.6.6.6 (incomplete) eth0 + +The arp request address is error, this is because fib_table_lookup in +fib_check_nh lookup the destnation 9.9.9.9 nexthop, the scope of +the fib result is RT_SCOPE_LINK,the correct scope is RT_SCOPE_HOST. +Here I add a check of whether this is RT_TABLE_MAIN to solve this problem. + +Fixes: 3bfd847203c6 ("net: Use passed in table for nexthop lookups") +Signed-off-by: guodeqing +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + +- if (table) ++ if (table && table != RT_TABLE_MAIN) + tbl = fib_get_table(net, table); + + if (tbl) diff --git a/queue-5.7/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch b/queue-5.7/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch new file mode 100644 index 00000000000..a2bb09cd194 --- /dev/null +++ b/queue-5.7/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch @@ -0,0 +1,105 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Eric Dumazet +Date: Wed, 17 Jun 2020 22:23:25 -0700 +Subject: net: increment xmit_recursion level in dev_direct_xmit() + +From: Eric Dumazet + +[ Upstream commit 0ad6f6e767ec2f613418cbc7ebe5ec4c35af540c ] + +Back in commit f60e5990d9c1 ("ipv6: protect skb->sk accesses +from recursive dereference inside the stack") Hannes added code +so that IPv6 stack would not trust skb->sk for typical cases +where packet goes through 'standard' xmit path (__dev_queue_xmit()) + +Alas af_packet had a dev_direct_xmit() path that was not +dealing yet with xmit_recursion level. + +Also change sk_mc_loop() to dump a stack once only. + +Without this patch, syzbot was able to trigger : + +[1] +[ 153.567378] WARNING: CPU: 7 PID: 11273 at net/core/sock.c:721 sk_mc_loop+0x51/0x70 +[ 153.567378] Modules linked in: nfnetlink ip6table_raw ip6table_filter iptable_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv4 nf_defrag_ipv6 iptable_filter macsec macvtap tap macvlan 8021q hsr wireguard libblake2s blake2s_x86_64 libblake2s_generic udp_tunnel ip6_udp_tunnel libchacha20poly1305 poly1305_x86_64 chacha_x86_64 libchacha curve25519_x86_64 libcurve25519_generic netdevsim batman_adv dummy team bridge stp llc w1_therm wire i2c_mux_pca954x i2c_mux cdc_acm ehci_pci ehci_hcd mlx4_en mlx4_ib ib_uverbs ib_core mlx4_core +[ 153.567386] CPU: 7 PID: 11273 Comm: b159172088 Not tainted 5.8.0-smp-DEV #273 +[ 153.567387] RIP: 0010:sk_mc_loop+0x51/0x70 +[ 153.567388] Code: 66 83 f8 0a 75 24 0f b6 4f 12 b8 01 00 00 00 31 d2 d3 e0 a9 bf ef ff ff 74 07 48 8b 97 f0 02 00 00 0f b6 42 3a 83 e0 01 5d c3 <0f> 0b b8 01 00 00 00 5d c3 0f b6 87 18 03 00 00 5d c0 e8 04 83 e0 +[ 153.567388] RSP: 0018:ffff95c69bb93990 EFLAGS: 00010212 +[ 153.567388] RAX: 0000000000000011 RBX: ffff95c6e0ee3e00 RCX: 0000000000000007 +[ 153.567389] RDX: ffff95c69ae50000 RSI: ffff95c6c30c3000 RDI: ffff95c6c30c3000 +[ 153.567389] RBP: ffff95c69bb93990 R08: ffff95c69a77f000 R09: 0000000000000008 +[ 153.567389] R10: 0000000000000040 R11: 00003e0e00026128 R12: ffff95c6c30c3000 +[ 153.567390] R13: ffff95c6cc4fd500 R14: ffff95c6f84500c0 R15: ffff95c69aa13c00 +[ 153.567390] FS: 00007fdc3a283700(0000) GS:ffff95c6ff9c0000(0000) knlGS:0000000000000000 +[ 153.567390] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 153.567391] CR2: 00007ffee758e890 CR3: 0000001f9ba20003 CR4: 00000000001606e0 +[ 153.567391] Call Trace: +[ 153.567391] ip6_finish_output2+0x34e/0x550 +[ 153.567391] __ip6_finish_output+0xe7/0x110 +[ 153.567391] ip6_finish_output+0x2d/0xb0 +[ 153.567392] ip6_output+0x77/0x120 +[ 153.567392] ? __ip6_finish_output+0x110/0x110 +[ 153.567392] ip6_local_out+0x3d/0x50 +[ 153.567392] ipvlan_queue_xmit+0x56c/0x5e0 +[ 153.567393] ? ksize+0x19/0x30 +[ 153.567393] ipvlan_start_xmit+0x18/0x50 +[ 153.567393] dev_direct_xmit+0xf3/0x1c0 +[ 153.567393] packet_direct_xmit+0x69/0xa0 +[ 153.567394] packet_sendmsg+0xbf0/0x19b0 +[ 153.567394] ? plist_del+0x62/0xb0 +[ 153.567394] sock_sendmsg+0x65/0x70 +[ 153.567394] sock_write_iter+0x93/0xf0 +[ 153.567394] new_sync_write+0x18e/0x1a0 +[ 153.567395] __vfs_write+0x29/0x40 +[ 153.567395] vfs_write+0xb9/0x1b0 +[ 153.567395] ksys_write+0xb1/0xe0 +[ 153.567395] __x64_sys_write+0x1a/0x20 +[ 153.567395] do_syscall_64+0x43/0x70 +[ 153.567396] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 153.567396] RIP: 0033:0x453549 +[ 153.567396] Code: Bad RIP value. +[ 153.567396] RSP: 002b:00007fdc3a282cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 153.567397] RAX: ffffffffffffffda RBX: 00000000004d32d0 RCX: 0000000000453549 +[ 153.567397] RDX: 0000000000000020 RSI: 0000000020000300 RDI: 0000000000000003 +[ 153.567398] RBP: 00000000004d32d8 R08: 0000000000000000 R09: 0000000000000000 +[ 153.567398] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d32dc +[ 153.567398] R13: 00007ffee742260f R14: 00007fdc3a282dc0 R15: 00007fdc3a283700 +[ 153.567399] ---[ end trace c1d5ae2b1059ec62 ]--- + +f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 ++ + net/core/sock.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4109,10 +4109,12 @@ int dev_direct_xmit(struct sk_buff *skb, + + local_bh_disable(); + ++ dev_xmit_recursion_inc(); + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq, false); + HARD_TX_UNLOCK(dev, txq); ++ dev_xmit_recursion_dec(); + + local_bh_enable(); + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -707,7 +707,7 @@ bool sk_mc_loop(struct sock *sk) + return inet6_sk(sk)->mc_loop; + #endif + } +- WARN_ON(1); ++ WARN_ON_ONCE(1); + return true; + } + EXPORT_SYMBOL(sk_mc_loop); diff --git a/queue-5.7/net-macb-call-pm_runtime_put_sync-on-failure-path.patch b/queue-5.7/net-macb-call-pm_runtime_put_sync-on-failure-path.patch new file mode 100644 index 00000000000..d26d36632ed --- /dev/null +++ b/queue-5.7/net-macb-call-pm_runtime_put_sync-on-failure-path.patch @@ -0,0 +1,47 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Claudiu Beznea +Date: Wed, 24 Jun 2020 13:08:17 +0300 +Subject: net: macb: call pm_runtime_put_sync on failure path + +From: Claudiu Beznea + +[ Upstream commit 0eaf228d574bd82a9aed73e3953bfb81721f4227 ] + +Call pm_runtime_put_sync() on failure path of at91ether_open. + +Fixes: e6a41c23df0d ("net: macb: ensure interface is not suspended on at91rm9200") +Signed-off-by: Claudiu Beznea +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_main.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -3837,7 +3837,7 @@ static int at91ether_open(struct net_dev + + ret = at91ether_start(dev); + if (ret) +- return ret; ++ goto pm_exit; + + /* Enable MAC interrupts */ + macb_writel(lp, IER, MACB_BIT(RCOMP) | +@@ -3850,11 +3850,15 @@ static int at91ether_open(struct net_dev + + ret = macb_phylink_connect(lp); + if (ret) +- return ret; ++ goto pm_exit; + + netif_start_queue(dev); + + return 0; ++ ++pm_exit: ++ pm_runtime_put_sync(&lp->pdev->dev); ++ return ret; + } + + /* Close the interface */ diff --git a/queue-5.7/net-macb-undo-operations-in-case-of-failure.patch b/queue-5.7/net-macb-undo-operations-in-case-of-failure.patch new file mode 100644 index 00000000000..949e0fe3687 --- /dev/null +++ b/queue-5.7/net-macb-undo-operations-in-case-of-failure.patch @@ -0,0 +1,46 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Claudiu Beznea +Date: Thu, 18 Jun 2020 11:37:40 +0300 +Subject: net: macb: undo operations in case of failure + +From: Claudiu Beznea + +[ Upstream commit faa620876b01d6744f1599e279042bb8149247ab ] + +Undo previously done operation in case macb_phylink_connect() +fails. Since macb_reset_hw() is the 1st undo operation the +napi_exit label was renamed to reset_hw. + +Fixes: 7897b071ac3b ("net: macb: convert to phylink") +Signed-off-by: Claudiu Beznea +Acked-by: Nicolas Ferre +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_main.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -2558,7 +2558,7 @@ static int macb_open(struct net_device * + + err = macb_phylink_connect(bp); + if (err) +- goto napi_exit; ++ goto reset_hw; + + netif_tx_start_all_queues(dev); + +@@ -2567,9 +2567,11 @@ static int macb_open(struct net_device * + + return 0; + +-napi_exit: ++reset_hw: ++ macb_reset_hw(bp); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + napi_disable(&queue->napi); ++ macb_free_consistent(bp); + pm_exit: + pm_runtime_put_sync(&bp->pdev->dev); + return err; diff --git a/queue-5.7/net-phy-check-harder-for-errors-in-get_phy_id.patch b/queue-5.7/net-phy-check-harder-for-errors-in-get_phy_id.patch new file mode 100644 index 00000000000..8eef27328d1 --- /dev/null +++ b/queue-5.7/net-phy-check-harder-for-errors-in-get_phy_id.patch @@ -0,0 +1,50 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Florian Fainelli +Date: Fri, 19 Jun 2020 11:47:47 -0700 +Subject: net: phy: Check harder for errors in get_phy_id() + +From: Florian Fainelli + +[ Upstream commit b2ffc75e2e990b09903f9d15ccd53bc5f3a4217c ] + +Commit 02a6efcab675 ("net: phy: allow scanning busses with missing +phys") added a special condition to return -ENODEV in case -ENODEV or +-EIO was returned from the first read of the MII_PHYSID1 register. + +In case the MDIO bus data line pull-up is not strong enough, the MDIO +bus controller will not flag this as a read error. This can happen when +a pluggable daughter card is not connected and weak internal pull-ups +are used (since that is the only option, otherwise the pins are +floating). + +The second read of MII_PHYSID2 will be correctly flagged an error +though, but now we will return -EIO which will be treated as a hard +error, thus preventing MDIO bus scanning loops to continue succesfully. + +Apply the same logic to both register reads, thus allowing the scanning +logic to proceed. + +Fixes: 02a6efcab675 ("net: phy: allow scanning busses with missing phys") +Reviewed-by: Andrew Lunn +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -798,8 +798,10 @@ static int get_phy_id(struct mii_bus *bu + + /* Grab the bits from PHYIR2, and put them in the lower half */ + phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); +- if (phy_reg < 0) +- return -EIO; ++ if (phy_reg < 0) { ++ /* returning -ENODEV doesn't stop bus scanning */ ++ return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; ++ } + + *phy_id |= phy_reg; + diff --git a/queue-5.7/net-phy-mscc-avoid-skcipher-api-for-single-block-aes-encryption.patch b/queue-5.7/net-phy-mscc-avoid-skcipher-api-for-single-block-aes-encryption.patch new file mode 100644 index 00000000000..580a9ace951 --- /dev/null +++ b/queue-5.7/net-phy-mscc-avoid-skcipher-api-for-single-block-aes-encryption.patch @@ -0,0 +1,114 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Ard Biesheuvel +Date: Thu, 25 Jun 2020 09:18:16 +0200 +Subject: net: phy: mscc: avoid skcipher API for single block AES encryption + +From: Ard Biesheuvel + +[ Upstream commit 8acd2edbe0e8e36261d98d89ce91b810dd7f4b0d ] + +The skcipher API dynamically instantiates the transformation object +on request that implements the requested algorithm optimally on the +given platform. This notion of optimality only matters for cases like +bulk network or disk encryption, where performance can be a bottleneck, +or in cases where the algorithm itself is not known at compile time. + +In the mscc case, we are dealing with AES encryption of a single +block, and so neither concern applies, and we are better off using +the AES library interface, which is lightweight and safe for this +kind of use. + +Note that the scatterlist API does not permit references to buffers +that are located on the stack, so the existing code is incorrect in +any case, but avoiding the skcipher and scatterlist APIs entirely is +the most straight-forward approach to fixing this. + +Cc: Antoine Tenart +Cc: Andrew Lunn +Cc: Florian Fainelli +Cc: Heiner Kallweit +Cc: "David S. Miller" +Cc: Jakub Kicinski +Cc: +Fixes: 28c5107aa904e ("net: phy: mscc: macsec support") +Reviewed-by: Eric Biggers +Signed-off-by: Ard Biesheuvel +Tested-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/Kconfig | 3 -- + drivers/net/phy/mscc/mscc_macsec.c | 40 ++++++++----------------------------- + 2 files changed, 10 insertions(+), 33 deletions(-) + +--- a/drivers/net/phy/Kconfig ++++ b/drivers/net/phy/Kconfig +@@ -461,8 +461,7 @@ config MICROCHIP_T1_PHY + config MICROSEMI_PHY + tristate "Microsemi PHYs" + depends on MACSEC || MACSEC=n +- select CRYPTO_AES +- select CRYPTO_ECB ++ select CRYPTO_LIB_AES if MACSEC + ---help--- + Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs + +--- a/drivers/net/phy/mscc/mscc_macsec.c ++++ b/drivers/net/phy/mscc/mscc_macsec.c +@@ -10,7 +10,7 @@ + #include + #include + +-#include ++#include + + #include + +@@ -500,39 +500,17 @@ static u32 vsc8584_macsec_flow_context_i + static int vsc8584_macsec_derive_key(const u8 key[MACSEC_KEYID_LEN], + u16 key_len, u8 hkey[16]) + { +- struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); +- struct skcipher_request *req = NULL; +- struct scatterlist src, dst; +- DECLARE_CRYPTO_WAIT(wait); +- u32 input[4] = {0}; ++ const u8 input[AES_BLOCK_SIZE] = {0}; ++ struct crypto_aes_ctx ctx; + int ret; + +- if (IS_ERR(tfm)) +- return PTR_ERR(tfm); ++ ret = aes_expandkey(&ctx, key, key_len); ++ if (ret) ++ return ret; + +- req = skcipher_request_alloc(tfm, GFP_KERNEL); +- if (!req) { +- ret = -ENOMEM; +- goto out; +- } +- +- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | +- CRYPTO_TFM_REQ_MAY_SLEEP, crypto_req_done, +- &wait); +- ret = crypto_skcipher_setkey(tfm, key, key_len); +- if (ret < 0) +- goto out; +- +- sg_init_one(&src, input, 16); +- sg_init_one(&dst, hkey, 16); +- skcipher_request_set_crypt(req, &src, &dst, 16, NULL); +- +- ret = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); +- +-out: +- skcipher_request_free(req); +- crypto_free_skcipher(tfm); +- return ret; ++ aes_encrypt(&ctx, hkey, input); ++ memzero_explicit(&ctx, sizeof(ctx)); ++ return 0; + } + + static int vsc8584_macsec_transformation(struct phy_device *phydev, diff --git a/queue-5.7/net-phy-smsc-fix-printing-too-many-logs.patch b/queue-5.7/net-phy-smsc-fix-printing-too-many-logs.patch new file mode 100644 index 00000000000..03b25a3cabc --- /dev/null +++ b/queue-5.7/net-phy-smsc-fix-printing-too-many-logs.patch @@ -0,0 +1,64 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Dejin Zheng +Date: Sat, 20 Jun 2020 22:55:34 +0800 +Subject: net: phy: smsc: fix printing too many logs + +From: Dejin Zheng + +[ Upstream commit 6d61f483f148b856d47a6c96d5d84054d5a9f849 ] + +Commit 7ae7ad2f11ef47 ("net: phy: smsc: use phy_read_poll_timeout() +to simplify the code") will print a lot of logs as follows when Ethernet +cable is not connected: + +[ 4.473105] SMSC LAN8710/LAN8720 2188000.ethernet-1:00: lan87xx_read_status failed: -110 + +When wait 640 ms for check ENERGYON bit, the timeout should not be +regarded as an actual error and an error message also should not be +printed. due to a hardware bug in LAN87XX device, it leads to unstable +detection of plugging in Ethernet cable when LAN87xx is in Energy Detect +Power-Down mode. the workaround for it involves, when the link is down, +and at each read_status() call: + +- disable EDPD mode, forcing the PHY out of low-power mode +- waiting 640ms to see if we have any energy detected from the media +- re-enable entry to EDPD mode + +This is presumably enough to allow the PHY to notice that a cable is +connected, and resume normal operations to negotiate with the partner. +The problem is that when no media is detected, the 640ms wait times +out and this commit was modified to prints an error message. it is an +inappropriate conversion by used phy_read_poll_timeout() to introduce +this bug. so fix this issue by use read_poll_timeout() to replace +phy_read_poll_timeout(). + +Fixes: 7ae7ad2f11ef47 ("net: phy: smsc: use phy_read_poll_timeout() to simplify the code") +Reported-by: Kevin Groeneveld +Signed-off-by: Dejin Zheng +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/smsc.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/net/phy/smsc.c ++++ b/drivers/net/phy/smsc.c +@@ -122,10 +122,13 @@ static int lan87xx_read_status(struct ph + if (rc < 0) + return rc; + +- /* Wait max 640 ms to detect energy */ +- phy_read_poll_timeout(phydev, MII_LAN83C185_CTRL_STATUS, rc, +- rc & MII_LAN83C185_ENERGYON, 10000, +- 640000, true); ++ /* Wait max 640 ms to detect energy and the timeout is not ++ * an actual error. ++ */ ++ read_poll_timeout(phy_read, rc, ++ rc & MII_LAN83C185_ENERGYON || rc < 0, ++ 10000, 640000, true, phydev, ++ MII_LAN83C185_CTRL_STATUS); + if (rc < 0) + return rc; + diff --git a/queue-5.7/net-phylink-ensure-manual-pause-mode-configuration-takes-effect.patch b/queue-5.7/net-phylink-ensure-manual-pause-mode-configuration-takes-effect.patch new file mode 100644 index 00000000000..d5c1129937c --- /dev/null +++ b/queue-5.7/net-phylink-ensure-manual-pause-mode-configuration-takes-effect.patch @@ -0,0 +1,86 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Russell King +Date: Tue, 23 Jun 2020 17:47:29 +0100 +Subject: net: phylink: ensure manual pause mode configuration takes effect + +From: Russell King + +[ Upstream commit 2e919bc446faee429ac862a6cdb5e40017051f6b ] + +We have been relying on link events and mac_config() when the manual +pause modes are changed. With recent developments, such as moving +the programming of link state to mac_link_up(), this no longer works. + +To ensure that we update the MAC, we must generate a link-down followed +by a link-up event; we can do that by setting mac_link_dropped and +triggering a resolve. + +Fixes: 91a208f2185a ("net: phylink: propagate resolved link config via mac_link_up()") +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phylink.c | 27 ++++++++++++++++++++++----- + 1 file changed, 22 insertions(+), 5 deletions(-) + +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -1480,6 +1480,8 @@ int phylink_ethtool_set_pauseparam(struc + struct ethtool_pauseparam *pause) + { + struct phylink_link_state *config = &pl->link_config; ++ bool manual_changed; ++ int pause_state; + + ASSERT_RTNL(); + +@@ -1494,15 +1496,15 @@ int phylink_ethtool_set_pauseparam(struc + !pause->autoneg && pause->rx_pause != pause->tx_pause) + return -EINVAL; + +- mutex_lock(&pl->state_mutex); +- config->pause = 0; ++ pause_state = 0; + if (pause->autoneg) +- config->pause |= MLO_PAUSE_AN; ++ pause_state |= MLO_PAUSE_AN; + if (pause->rx_pause) +- config->pause |= MLO_PAUSE_RX; ++ pause_state |= MLO_PAUSE_RX; + if (pause->tx_pause) +- config->pause |= MLO_PAUSE_TX; ++ pause_state |= MLO_PAUSE_TX; + ++ mutex_lock(&pl->state_mutex); + /* + * See the comments for linkmode_set_pause(), wrt the deficiencies + * with the current implementation. A solution to this issue would +@@ -1519,6 +1521,12 @@ int phylink_ethtool_set_pauseparam(struc + linkmode_set_pause(config->advertising, pause->tx_pause, + pause->rx_pause); + ++ manual_changed = (config->pause ^ pause_state) & MLO_PAUSE_AN || ++ (!(pause_state & MLO_PAUSE_AN) && ++ (config->pause ^ pause_state) & MLO_PAUSE_TXRX_MASK); ++ ++ config->pause = pause_state; ++ + if (!pl->phydev && !test_bit(PHYLINK_DISABLE_STOPPED, + &pl->phylink_disable_state)) + phylink_pcs_config(pl, true, &pl->link_config); +@@ -1534,6 +1542,15 @@ int phylink_ethtool_set_pauseparam(struc + phy_set_asym_pause(pl->phydev, pause->rx_pause, + pause->tx_pause); + ++ /* If the manual pause settings changed, make sure we trigger a ++ * resolve to update their state; we can not guarantee that the ++ * link will cycle. ++ */ ++ if (manual_changed) { ++ pl->mac_link_dropped = true; ++ phylink_run_resolve(pl); ++ } ++ + return 0; + } + EXPORT_SYMBOL_GPL(phylink_ethtool_set_pauseparam); diff --git a/queue-5.7/net-phylink-fix-ethtool-a-with-attached-phys.patch b/queue-5.7/net-phylink-fix-ethtool-a-with-attached-phys.patch new file mode 100644 index 00000000000..c27aa0b17b1 --- /dev/null +++ b/queue-5.7/net-phylink-fix-ethtool-a-with-attached-phys.patch @@ -0,0 +1,53 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Russell King +Date: Tue, 23 Jun 2020 17:47:23 +0100 +Subject: net: phylink: fix ethtool -A with attached PHYs + +From: Russell King + +[ Upstream commit c718af2d00a37587b09e5958d142da7569f3d55b ] + +Fix a phylink's ethtool set_pauseparam support deadlock caused by phylib +interacting with phylink: we must not hold the state lock while calling +phylib functions that may call into phylink_phy_change(). + +Fixes: f904f15ea9b5 ("net: phylink: allow ethtool -A to change flow control advertisement") +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phylink.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/drivers/net/phy/phylink.c ++++ b/drivers/net/phy/phylink.c +@@ -1519,18 +1519,20 @@ int phylink_ethtool_set_pauseparam(struc + linkmode_set_pause(config->advertising, pause->tx_pause, + pause->rx_pause); + +- /* If we have a PHY, phylib will call our link state function if the +- * mode has changed, which will trigger a resolve and update the MAC +- * configuration. ++ if (!pl->phydev && !test_bit(PHYLINK_DISABLE_STOPPED, ++ &pl->phylink_disable_state)) ++ phylink_pcs_config(pl, true, &pl->link_config); ++ ++ mutex_unlock(&pl->state_mutex); ++ ++ /* If we have a PHY, a change of the pause frame advertisement will ++ * cause phylib to renegotiate (if AN is enabled) which will in turn ++ * call our phylink_phy_change() and trigger a resolve. Note that ++ * we can't hold our state mutex while calling phy_set_asym_pause(). + */ +- if (pl->phydev) { ++ if (pl->phydev) + phy_set_asym_pause(pl->phydev, pause->rx_pause, + pause->tx_pause); +- } else if (!test_bit(PHYLINK_DISABLE_STOPPED, +- &pl->phylink_disable_state)) { +- phylink_pcs_config(pl, true, &pl->link_config); +- } +- mutex_unlock(&pl->state_mutex); + + return 0; + } diff --git a/queue-5.7/net-usb-ax88179_178a-fix-packet-alignment-padding.patch b/queue-5.7/net-usb-ax88179_178a-fix-packet-alignment-padding.patch new file mode 100644 index 00000000000..5573a629433 --- /dev/null +++ b/queue-5.7/net-usb-ax88179_178a-fix-packet-alignment-padding.patch @@ -0,0 +1,69 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Jeremy Kerr +Date: Mon, 15 Jun 2020 10:54:56 +0800 +Subject: net: usb: ax88179_178a: fix packet alignment padding + +From: Jeremy Kerr + +[ Upstream commit e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 ] + +Using a AX88179 device (0b95:1790), I see two bytes of appended data on +every RX packet. For example, this 48-byte ping, using 0xff as a +payload byte: + + 04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64 + 0x0000: 000a cd35 ea50 000a cd35 ea4f 0800 4500 + 0x0010: 0054 c116 4000 4001 f63e c0a8 0101 c0a8 + 0x0020: 0102 0800 b633 098f 0001 87ea cd5e 0000 + 0x0030: 0000 dcf2 0600 0000 0000 ffff ffff ffff + 0x0040: ffff ffff ffff ffff ffff ffff ffff ffff + 0x0050: ffff ffff ffff ffff ffff ffff ffff ffff + 0x0060: ffff 961f + +Those last two bytes - 96 1f - aren't part of the original packet. + +In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte +'alignment pseudo header' from the start of the packet, and sets the +length from a per-packet field populated by hardware. It looks like that +length field *includes* the 2-byte header; the current driver assumes +that it's excluded. + +This change trims the 2-byte alignment header after we've set the packet +length, so the resulting packet length is correct. While we're moving +the comment around, this also fixes the spelling of 'pseudo'. + +Signed-off-by: Jeremy Kerr +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/ax88179_178a.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -1414,10 +1414,10 @@ static int ax88179_rx_fixup(struct usbne + } + + if (pkt_cnt == 0) { +- /* Skip IP alignment psudo header */ +- skb_pull(skb, 2); + skb->len = pkt_len; +- skb_set_tail_pointer(skb, pkt_len); ++ /* Skip IP alignment pseudo header */ ++ skb_pull(skb, 2); ++ skb_set_tail_pointer(skb, skb->len); + skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; +@@ -1426,8 +1426,9 @@ static int ax88179_rx_fixup(struct usbne + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (ax_skb) { + ax_skb->len = pkt_len; +- ax_skb->data = skb->data + 2; +- skb_set_tail_pointer(ax_skb, pkt_len); ++ /* Skip IP alignment pseudo header */ ++ skb_pull(ax_skb, 2); ++ skb_set_tail_pointer(ax_skb, ax_skb->len); + ax_skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); diff --git a/queue-5.7/of-of_mdio-correct-loop-scanning-logic.patch b/queue-5.7/of-of_mdio-correct-loop-scanning-logic.patch new file mode 100644 index 00000000000..7c3a21321ed --- /dev/null +++ b/queue-5.7/of-of_mdio-correct-loop-scanning-logic.patch @@ -0,0 +1,49 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Florian Fainelli +Date: Fri, 19 Jun 2020 11:47:46 -0700 +Subject: of: of_mdio: Correct loop scanning logic + +From: Florian Fainelli + +[ Upstream commit 5a8d7f126c97d04d893f5e5be2b286437a0d01b0 ] + +Commit 209c65b61d94 ("drivers/of/of_mdio.c:fix of_mdiobus_register()") +introduced a break of the loop on the premise that a successful +registration should exit the loop. The premise is correct but not to +code, because rc && rc != -ENODEV is just a special error condition, +that means we would exit the loop even with rc == -ENODEV which is +absolutely not correct since this is the error code to indicate to the +MDIO bus layer that scanning should continue. + +Fix this by explicitly checking for rc = 0 as the only valid condition +to break out of the loop. + +Fixes: 209c65b61d94 ("drivers/of/of_mdio.c:fix of_mdiobus_register()") +Reviewed-by: Andrew Lunn +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/of/of_mdio.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/of/of_mdio.c ++++ b/drivers/of/of_mdio.c +@@ -303,10 +303,15 @@ int of_mdiobus_register(struct mii_bus * + child, addr); + + if (of_mdiobus_child_is_phy(child)) { ++ /* -ENODEV is the return code that PHYLIB has ++ * standardized on to indicate that bus ++ * scanning should continue. ++ */ + rc = of_mdiobus_register_phy(mdio, child, addr); +- if (rc && rc != -ENODEV) ++ if (!rc) ++ break; ++ if (rc != -ENODEV) + goto unregister; +- break; + } + } + } diff --git a/queue-5.7/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch b/queue-5.7/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch new file mode 100644 index 00000000000..eadab2d1a2f --- /dev/null +++ b/queue-5.7/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch @@ -0,0 +1,53 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Lorenzo Bianconi +Date: Tue, 23 Jun 2020 18:33:15 +0200 +Subject: openvswitch: take into account de-fragmentation/gso_size in execute_check_pkt_len + +From: Lorenzo Bianconi + +[ Upstream commit 17843655708e1941c0653af3cd61be6948e36f43 ] + +ovs connection tracking module performs de-fragmentation on incoming +fragmented traffic. Take info account if traffic has been de-fragmented +in execute_check_pkt_len action otherwise we will perform the wrong +nested action considering the original packet size. This issue typically +occurs if ovs-vswitchd adds a rule in the pipeline that requires connection +tracking (e.g. OVN stateful ACLs) before execute_check_pkt_len action. +Moreover take into account GSO fragment size for GSO packet in +execute_check_pkt_len routine + +Fixes: 4d5ec89fc8d14 ("net: openvswitch: Add a new action check_pkt_len") +Signed-off-by: Lorenzo Bianconi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/actions.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -1169,9 +1169,10 @@ static int execute_check_pkt_len(struct + struct sw_flow_key *key, + const struct nlattr *attr, bool last) + { ++ struct ovs_skb_cb *ovs_cb = OVS_CB(skb); + const struct nlattr *actions, *cpl_arg; ++ int len, max_len, rem = nla_len(attr); + const struct check_pkt_len_arg *arg; +- int rem = nla_len(attr); + bool clone_flow_key; + + /* The first netlink attribute in 'attr' is always +@@ -1180,7 +1181,11 @@ static int execute_check_pkt_len(struct + cpl_arg = nla_data(attr); + arg = nla_data(cpl_arg); + +- if (skb->len <= arg->pkt_len) { ++ len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len; ++ max_len = arg->pkt_len; ++ ++ if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) || ++ len <= max_len) { + /* Second netlink attribute in 'attr' is always + * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. + */ diff --git a/queue-5.7/r8169-fix-firmware-not-resetting-tp-ocp_base.patch b/queue-5.7/r8169-fix-firmware-not-resetting-tp-ocp_base.patch new file mode 100644 index 00000000000..c7d594d9ac2 --- /dev/null +++ b/queue-5.7/r8169-fix-firmware-not-resetting-tp-ocp_base.patch @@ -0,0 +1,39 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Heiner Kallweit +Date: Thu, 18 Jun 2020 23:25:50 +0200 +Subject: r8169: fix firmware not resetting tp->ocp_base + +From: Heiner Kallweit + +[ Upstream commit 89fbd26cca7ec9e82ec4787a4b6e95939b57d073 ] + +Typically the firmware takes care that tp->ocp_base is reset to its +default value. That's not the case (at least) for RTL8117. +As a result subsequent PHY access reads/writes the wrong page and +the link is broken. Fix this be resetting tp->ocp_base explicitly. + +Fixes: 229c1e0dfd3d ("r8169: load firmware for RTL8168fp/RTL8117") +Reported-by: Aaron Ma +Tested-by: Aaron Ma +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169_main.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -2192,8 +2192,11 @@ static void rtl_release_firmware(struct + void r8169_apply_firmware(struct rtl8169_private *tp) + { + /* TODO: release firmware if rtl_fw_write_firmware signals failure. */ +- if (tp->rtl_fw) ++ if (tp->rtl_fw) { + rtl_fw_write_firmware(tp, tp->rtl_fw); ++ /* At least one firmware doesn't reset tp->ocp_base. */ ++ tp->ocp_base = OCP_STD_PHY_BASE; ++ } + } + + static void rtl8168_config_eee_mac(struct rtl8169_private *tp) diff --git a/queue-5.7/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch b/queue-5.7/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch new file mode 100644 index 00000000000..d8d8721f594 --- /dev/null +++ b/queue-5.7/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch @@ -0,0 +1,36 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Aditya Pakki +Date: Fri, 12 Jun 2020 15:27:55 -0500 +Subject: rocker: fix incorrect error handling in dma_rings_init + +From: Aditya Pakki + +[ Upstream commit 58d0c864e1a759a15c9df78f50ea5a5c32b3989e ] + +In rocker_dma_rings_init, the goto blocks in case of errors +caused by the functions rocker_dma_cmd_ring_waits_alloc() and +rocker_dma_ring_create() are incorrect. The patch fixes the +order consistent with cleanup in rocker_dma_rings_fini(). + +Signed-off-by: Aditya Pakki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/rocker/rocker_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/rocker/rocker_main.c ++++ b/drivers/net/ethernet/rocker/rocker_main.c +@@ -647,10 +647,10 @@ static int rocker_dma_rings_init(struct + err_dma_event_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->event_ring); + err_dma_event_ring_create: ++ rocker_dma_cmd_ring_waits_free(rocker); ++err_dma_cmd_ring_waits_alloc: + rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, + PCI_DMA_BIDIRECTIONAL); +-err_dma_cmd_ring_waits_alloc: +- rocker_dma_cmd_ring_waits_free(rocker); + err_dma_cmd_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); + return err; diff --git a/queue-5.7/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch b/queue-5.7/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch new file mode 100644 index 00000000000..76ca4663bfc --- /dev/null +++ b/queue-5.7/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch @@ -0,0 +1,146 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: David Howells +Date: Fri, 19 Jun 2020 23:38:16 +0100 +Subject: rxrpc: Fix notification call on completion of discarded calls + +From: David Howells + +[ Upstream commit 0041cd5a50442db6e456b145892a0eaf2dff061f ] + +When preallocated service calls are being discarded, they're passed to +->discard_new_call() to have the caller clean up any attached higher-layer +preallocated pieces before being marked completed. However, the act of +marking them completed now invokes the call's notification function - which +causes a problem because that function might assume that the previously +freed pieces of memory are still there. + +Fix this by setting a dummy notification function on the socket after +calling ->discard_new_call(). + +This results in the following kasan message when the kafs module is +removed. + +================================================================== +BUG: KASAN: use-after-free in afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 +Write of size 1 at addr ffff8880946c39e4 by task kworker/u4:1/21 + +CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.8.0-rc1-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: netns cleanup_net +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x18f/0x20d lib/dump_stack.c:118 + print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383 + __kasan_report mm/kasan/report.c:513 [inline] + kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 + afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 + rxrpc_notify_socket+0x1db/0x5d0 net/rxrpc/recvmsg.c:40 + __rxrpc_set_call_completion.part.0+0x172/0x410 net/rxrpc/recvmsg.c:76 + __rxrpc_call_completed net/rxrpc/recvmsg.c:112 [inline] + rxrpc_call_completed+0xca/0xf0 net/rxrpc/recvmsg.c:111 + rxrpc_discard_prealloc+0x781/0xab0 net/rxrpc/call_accept.c:233 + rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 + afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 + afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 + ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 + cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 + process_one_work+0x965/0x1690 kernel/workqueue.c:2269 + worker_thread+0x96/0xe10 kernel/workqueue.c:2415 + kthread+0x3b5/0x4a0 kernel/kthread.c:291 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 + +Allocated by task 6820: + save_stack+0x1b/0x40 mm/kasan/common.c:48 + set_track mm/kasan/common.c:56 [inline] + __kasan_kmalloc mm/kasan/common.c:494 [inline] + __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467 + kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551 + kmalloc include/linux/slab.h:555 [inline] + kzalloc include/linux/slab.h:669 [inline] + afs_alloc_call+0x55/0x630 fs/afs/rxrpc.c:141 + afs_charge_preallocation+0xe9/0x2d0 fs/afs/rxrpc.c:757 + afs_open_socket+0x292/0x360 fs/afs/rxrpc.c:92 + afs_net_init+0xa6c/0xe30 fs/afs/main.c:125 + ops_init+0xaf/0x420 net/core/net_namespace.c:151 + setup_net+0x2de/0x860 net/core/net_namespace.c:341 + copy_net_ns+0x293/0x590 net/core/net_namespace.c:482 + create_new_namespaces+0x3fb/0xb30 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231 + ksys_unshare+0x43d/0x8e0 kernel/fork.c:2983 + __do_sys_unshare kernel/fork.c:3051 [inline] + __se_sys_unshare kernel/fork.c:3049 [inline] + __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3049 + do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Freed by task 21: + save_stack+0x1b/0x40 mm/kasan/common.c:48 + set_track mm/kasan/common.c:56 [inline] + kasan_set_free_info mm/kasan/common.c:316 [inline] + __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:455 + __cache_free mm/slab.c:3426 [inline] + kfree+0x109/0x2b0 mm/slab.c:3757 + afs_put_call+0x585/0xa40 fs/afs/rxrpc.c:190 + rxrpc_discard_prealloc+0x764/0xab0 net/rxrpc/call_accept.c:230 + rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 + afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 + afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 + ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 + cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 + process_one_work+0x965/0x1690 kernel/workqueue.c:2269 + worker_thread+0x96/0xe10 kernel/workqueue.c:2415 + kthread+0x3b5/0x4a0 kernel/kthread.c:291 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 + +The buggy address belongs to the object at ffff8880946c3800 + which belongs to the cache kmalloc-1k of size 1024 +The buggy address is located 484 bytes inside of + 1024-byte region [ffff8880946c3800, ffff8880946c3c00) +The buggy address belongs to the page: +page:ffffea000251b0c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 +flags: 0xfffe0000000200(slab) +raw: 00fffe0000000200 ffffea0002546508 ffffea00024fa248 ffff8880aa000c40 +raw: 0000000000000000 ffff8880946c3000 0000000100000002 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8880946c3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880946c3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff8880946c3980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8880946c3a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880946c3a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Reported-by: syzbot+d3eccef36ddbd02713e9@syzkaller.appspotmail.com +Fixes: 5ac0d62226a0 ("rxrpc: Fix missing notification") +Signed-off-by: David Howells +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/call_accept.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/rxrpc/call_accept.c ++++ b/net/rxrpc/call_accept.c +@@ -22,6 +22,11 @@ + #include + #include "ar-internal.h" + ++static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, ++ unsigned long user_call_ID) ++{ ++} ++ + /* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. +@@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); ++ if (call->notify_rx) ++ call->notify_rx = rxrpc_dummy_notify; + rxrpc_put_call(call, rxrpc_call_put_kernel); + } + rxrpc_call_completed(call); diff --git a/queue-5.7/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch b/queue-5.7/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch new file mode 100644 index 00000000000..890b8cff6bc --- /dev/null +++ b/queue-5.7/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch @@ -0,0 +1,62 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: "Toke Høiland-Jørgensen" +Date: Thu, 25 Jun 2020 22:12:08 +0200 +Subject: sch_cake: don't call diffserv parsing code when it is not needed + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c ] + +As a further optimisation of the diffserv parsing codepath, we can skip it +entirely if CAKE is configured to neither use diffserv-based +classification, nor to zero out the diffserv bits. + +Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1514,7 +1514,7 @@ static unsigned int cake_drop(struct Qdi + return idx + (tin << 16); + } + +-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) ++static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) + { + const int offset = skb_network_offset(skb); + u16 *buf, buf_; +@@ -1575,14 +1575,17 @@ static struct cake_tin_data *cake_select + { + struct cake_sched_data *q = qdisc_priv(sch); + u32 tin, mark; ++ bool wash; + u8 dscp; + + /* Tin selection: Default to diffserv-based selection, allow overriding +- * using firewall marks or skb->priority. ++ * using firewall marks or skb->priority. Call DSCP parsing early if ++ * wash is enabled, otherwise defer to below to skip unneeded parsing. + */ +- dscp = cake_handle_diffserv(skb, +- q->rate_flags & CAKE_FLAG_WASH); + mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; ++ wash = !!(q->rate_flags & CAKE_FLAG_WASH); ++ if (wash) ++ dscp = cake_handle_diffserv(skb, wash); + + if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) + tin = 0; +@@ -1596,6 +1599,8 @@ static struct cake_tin_data *cake_select + tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; + + else { ++ if (!wash) ++ dscp = cake_handle_diffserv(skb, wash); + tin = q->tin_index[dscp]; + + if (unlikely(tin >= q->tin_cnt)) diff --git a/queue-5.7/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch b/queue-5.7/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch new file mode 100644 index 00000000000..f23e347e2f8 --- /dev/null +++ b/queue-5.7/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch @@ -0,0 +1,96 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Ilya Ponetayev +Date: Thu, 25 Jun 2020 22:12:07 +0200 +Subject: sch_cake: don't try to reallocate or unshare skb unconditionally + +From: Ilya Ponetayev + +[ Upstream commit 9208d2863ac689a563b92f2161d8d1e7127d0add ] + +cake_handle_diffserv() tries to linearize mac and network header parts of +skb and to make it writable unconditionally. In some cases it leads to full +skb reallocation, which reduces throughput and increases CPU load. Some +measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core +CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable() +reallocates skb, if skb was allocated in ethernet driver via so-called +'build skb' method from page cache (it was discovered by strange increase +of kmalloc-2048 slab at first). + +Obtain DSCP value via read-only skb_header_pointer() call, and leave +linearization only for DSCP bleaching or ECN CE setting. And, as an +additional optimisation, skip diffserv parsing entirely if it is not needed +by the current configuration. + +Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") +Signed-off-by: Ilya Ponetayev +[ fix a few style issues, reflow commit message ] +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++----------- + 1 file changed, 30 insertions(+), 11 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1516,30 +1516,49 @@ static unsigned int cake_drop(struct Qdi + + static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) + { +- int wlen = skb_network_offset(skb); ++ const int offset = skb_network_offset(skb); ++ u16 *buf, buf_; + u8 dscp; + + switch (tc_skb_protocol(skb)) { + case htons(ETH_P_IP): +- wlen += sizeof(struct iphdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) ++ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); ++ if (unlikely(!buf)) + return 0; + +- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; +- if (wash && dscp) ++ /* ToS is in the second byte of iphdr */ ++ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2; ++ ++ if (wash && dscp) { ++ const int wlen = offset + sizeof(struct iphdr); ++ ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); ++ } ++ + return dscp; + + case htons(ETH_P_IPV6): +- wlen += sizeof(struct ipv6hdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) ++ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); ++ if (unlikely(!buf)) + return 0; + +- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; +- if (wash && dscp) ++ /* Traffic class is in the first and second bytes of ipv6hdr */ ++ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2; ++ ++ if (wash && dscp) { ++ const int wlen = offset + sizeof(struct ipv6hdr); ++ ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); ++ } ++ + return dscp; + + case htons(ETH_P_ARP): diff --git a/queue-5.7/sch_cake-fix-a-few-style-nits.patch b/queue-5.7/sch_cake-fix-a-few-style-nits.patch new file mode 100644 index 00000000000..0196e8263b6 --- /dev/null +++ b/queue-5.7/sch_cake-fix-a-few-style-nits.patch @@ -0,0 +1,41 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: "Toke Høiland-Jørgensen" +Date: Thu, 25 Jun 2020 22:12:09 +0200 +Subject: sch_cake: fix a few style nits + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 3f608f0c41360b11b04c763f348b712f651c8bac ] + +I spotted a few nits when comparing the in-tree version of sch_cake with +the out-of-tree one: A redundant error variable declaration shadowing an +outer declaration, and an indentation alignment issue. Fix both of these. + +Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -2678,7 +2678,7 @@ static int cake_init(struct Qdisc *sch, + qdisc_watchdog_init(&q->watchdog, sch); + + if (opt) { +- int err = cake_change(sch, opt, extack); ++ err = cake_change(sch, opt, extack); + + if (err) + return err; +@@ -2995,7 +2995,7 @@ static int cake_dump_class_stats(struct + PUT_STAT_S32(BLUE_TIMER_US, + ktime_to_us( + ktime_sub(now, +- flow->cvars.blue_timer))); ++ flow->cvars.blue_timer))); + } + if (flow->cvars.dropping) { + PUT_STAT_S32(DROP_NEXT_US, diff --git a/queue-5.7/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch b/queue-5.7/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch new file mode 100644 index 00000000000..e0b13a33088 --- /dev/null +++ b/queue-5.7/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch @@ -0,0 +1,88 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Marcelo Ricardo Leitner +Date: Wed, 24 Jun 2020 17:34:18 -0300 +Subject: sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket + +From: Marcelo Ricardo Leitner + +[ Upstream commit 471e39df96b9a4c4ba88a2da9e25a126624d7a9c ] + +If a socket is set ipv6only, it will still send IPv4 addresses in the +INIT and INIT_ACK packets. This potentially misleads the peer into using +them, which then would cause association termination. + +The fix is to not add IPv4 addresses to ipv6only sockets. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Corey Minyard +Signed-off-by: Marcelo Ricardo Leitner +Tested-by: Corey Minyard +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/constants.h | 8 +++++--- + net/sctp/associola.c | 5 ++++- + net/sctp/bind_addr.c | 1 + + net/sctp/protocol.c | 3 ++- + 4 files changed, 12 insertions(+), 5 deletions(-) + +--- a/include/net/sctp/constants.h ++++ b/include/net/sctp/constants.h +@@ -353,11 +353,13 @@ enum { + ipv4_is_anycast_6to4(a)) + + /* Flags used for the bind address copy functions. */ +-#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by ++#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by + local sock family */ +-#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by ++#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by ++ local sock family */ ++#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by + peer */ +-#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by ++#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by + peer */ + + /* Reasons to retransmit. */ +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -1565,12 +1565,15 @@ void sctp_assoc_rwnd_decrease(struct sct + int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, + enum sctp_scope scope, gfp_t gfp) + { ++ struct sock *sk = asoc->base.sk; + int flags; + + /* Use scoping rules to determine the subset of addresses from + * the endpoint. + */ +- flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; ++ flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; ++ if (!inet_v6_ipv6only(sk)) ++ flags |= SCTP_ADDR4_ALLOWED; + if (asoc->peer.ipv4_address) + flags |= SCTP_ADDR4_PEERSUPP; + if (asoc->peer.ipv6_address) +--- a/net/sctp/bind_addr.c ++++ b/net/sctp/bind_addr.c +@@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net + * well as the remote peer. + */ + if ((((AF_INET == addr->sa.sa_family) && ++ (flags & SCTP_ADDR4_ALLOWED) && + (flags & SCTP_ADDR4_PEERSUPP))) || + (((AF_INET6 == addr->sa.sa_family) && + (flags & SCTP_ADDR6_ALLOWED) && +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net + * sock as well as the remote peer. + */ + if (addr->a.sa.sa_family == AF_INET && +- !(copy_flags & SCTP_ADDR4_PEERSUPP)) ++ (!(copy_flags & SCTP_ADDR4_ALLOWED) || ++ !(copy_flags & SCTP_ADDR4_PEERSUPP))) + continue; + if (addr->a.sa.sa_family == AF_INET6 && + (!(copy_flags & SCTP_ADDR6_ALLOWED) || diff --git a/queue-5.7/series b/queue-5.7/series index 527592dfea3..c50018ff551 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -1,2 +1,51 @@ spi-spi-fsl-dspi-free-dma-memory-with-matching-function.patch block-bio-integrity-don-t-free-buf-if-bio_integrity_add_page-failed.patch +enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch +ethtool-fix-check-in-ethtool_rx_flow_rule_create.patch +geneve-allow-changing-df-behavior-after-creation.patch +ibmveth-fix-max-mtu-limit.patch +mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch +mlxsw-spectrum-do-not-rely-on-machine-endianness.patch +mvpp2-ethtool-rxtx-stats-fix.patch +net-bridge-enfore-alignment-for-ethernet-address.patch +net-core-reduce-recursion-limit-value.patch +net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch +net-ethtool-add-missing-string-for-netif_f_gso_tunnel_remcsum.patch +net-fix-memleak-in-register_netdevice.patch +net-fix-the-arp-error-in-some-cases.patch +net-increment-xmit_recursion-level-in-dev_direct_xmit.patch +net-usb-ax88179_178a-fix-packet-alignment-padding.patch +openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch +rocker-fix-incorrect-error-handling-in-dma_rings_init.patch +rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch +sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch +tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch +tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch +tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch +ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch +net-phy-check-harder-for-errors-in-get_phy_id.patch +ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch +bnxt_en-store-the-running-firmware-version-code.patch +bnxt_en-do-not-enable-legacy-tx-push-on-older-firmware.patch +bnxt_en-fix-statistics-counters-issue-during-ifdown-with-older-firmware.patch +bnxt_en-read-vpd-info-only-for-pfs.patch +net-phylink-fix-ethtool-a-with-attached-phys.patch +net-phylink-ensure-manual-pause-mode-configuration-takes-effect.patch +sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch +sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch +sch_cake-fix-a-few-style-nits.patch +tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch +bpf-tcp-bpf_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch +net-macb-undo-operations-in-case-of-failure.patch +r8169-fix-firmware-not-resetting-tp-ocp_base.patch +mptcp-drop-sndr_key-in-mptcp_syn_options.patch +mptcp-fix-memory-leak-in-mptcp_subflow_create_socket.patch +net-ethtool-add-missing-netif_f_gso_fraglist-feature-string.patch +net-macb-call-pm_runtime_put_sync-on-failure-path.patch +net-phy-mscc-avoid-skcipher-api-for-single-block-aes-encryption.patch +of-of_mdio-correct-loop-scanning-logic.patch +wireguard-device-avoid-circular-netns-references.patch +bareudp-fixed-multiproto-mode-configuration.patch +ionic-update-the-queue-count-on-open.patch +net-dsa-bcm_sf2-fix-node-reference-count.patch +net-phy-smsc-fix-printing-too-many-logs.patch diff --git a/queue-5.7/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch b/queue-5.7/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch new file mode 100644 index 00000000000..aafa4630949 --- /dev/null +++ b/queue-5.7/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch @@ -0,0 +1,97 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Denis Kirjanov +Date: Thu, 25 Jun 2020 14:51:06 +0300 +Subject: tcp: don't ignore ECN CWR on pure ACK + +From: Denis Kirjanov + +[ Upstream commit 2570284060b48f3f79d8f1a2698792f36c385e9a ] + +there is a problem with the CWR flag set in an incoming ACK segment +and it leads to the situation when the ECE flag is latched forever + +the following packetdrill script shows what happens: + +// Stack receives incoming segments with CE set ++0.1 <[ect0] . 11001:12001(1000) ack 1001 win 65535 ++0.0 <[ce] . 12001:13001(1000) ack 1001 win 65535 ++0.0 <[ect0] P. 13001:14001(1000) ack 1001 win 65535 + +// Stack repsonds with ECN ECHO ++0.0 >[noecn] . 1001:1001(0) ack 12001 ++0.0 >[noecn] E. 1001:1001(0) ack 13001 ++0.0 >[noecn] E. 1001:1001(0) ack 14001 + +// Write a packet ++0.1 write(3, ..., 1000) = 1000 ++0.0 >[ect0] PE. 1001:2001(1000) ack 14001 + +// Pure ACK received ++0.01 <[noecn] W. 14001:14001(0) ack 2001 win 65535 + +// Since CWR was sent, this packet should NOT have ECE set + ++0.1 write(3, ..., 1000) = 1000 ++0.0 >[ect0] P. 2001:3001(1000) ack 14001 +// but Linux will still keep ECE latched here, with packetdrill +// flagging a missing ECE flag, expecting +// >[ect0] PE. 2001:3001(1000) ack 14001 +// in the script + +In the situation above we will continue to send ECN ECHO packets +and trigger the peer to reduce the congestion window. To avoid that +we can check CWR on pure ACKs received. + +v3: +- Add a sequence check to avoid sending an ACK to an ACK + +v2: +- Adjusted the comment +- move CWR check before checking for unacknowledged packets + +Signed-off-by: Denis Kirjanov +Acked-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -261,7 +261,8 @@ static void tcp_ecn_accept_cwr(struct so + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ +- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; ++ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) ++ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } + } + +@@ -3683,6 +3684,15 @@ static int tcp_ack(struct sock *sk, cons + tcp_in_ack_event(sk, ack_ev_flags); + } + ++ /* This is a deviation from RFC3168 since it states that: ++ * "When the TCP data sender is ready to set the CWR bit after reducing ++ * the congestion window, it SHOULD set the CWR bit only on the first ++ * new data packet that it transmits." ++ * We accept CWR on pure ACKs to be more robust ++ * with widely-deployed TCP implementations that do this. ++ */ ++ tcp_ecn_accept_cwr(sk, skb); ++ + /* We passed data and got it acked, remove any soft error + * log. Something worked... + */ +@@ -4780,8 +4790,6 @@ static void tcp_data_queue(struct sock * + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + +- tcp_ecn_accept_cwr(sk, skb); +- + tp->rx_opt.dsack = 0; + + /* Queue data for delivery to the user. diff --git a/queue-5.7/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch b/queue-5.7/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch new file mode 100644 index 00000000000..430f895e9d3 --- /dev/null +++ b/queue-5.7/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch @@ -0,0 +1,94 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Eric Dumazet +Date: Mon, 15 Jun 2020 20:37:07 -0700 +Subject: tcp: grow window for OOO packets only for SACK flows + +From: Eric Dumazet + +[ Upstream commit 662051215c758ae8545451628816204ed6cd372d ] + +Back in 2013, we made a change that broke fast retransmit +for non SACK flows. + +Indeed, for these flows, a sender needs to receive three duplicate +ACK before starting fast retransmit. Sending ACK with different +receive window do not count. + +Even if enabling SACK is strongly recommended these days, +there still are some cases where it has to be disabled. + +Not increasing the window seems better than having to +rely on RTO. + +After the fix, following packetdrill test gives : + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 514 +// Quick ack + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 2001:3001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 3001:4001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 4001:5001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 1001:2001(1000) ack 1 win 514 +// Hole is repaired. + +0 > . 1:1(0) ack 5001 win 272 + +Fixes: 4e4f1fc22681 ("tcp: properly increase rcv_ssthresh for ofo packets") +Signed-off-by: Eric Dumazet +Reported-by: Venkat Venkatsubra +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4603,7 +4603,11 @@ static void tcp_data_queue_ofo(struct so + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { + coalesce_done: +- tcp_grow_window(sk, skb); ++ /* For non sack flows, do not grow window to force DUPACK ++ * and trigger fast retransmit. ++ */ ++ if (tcp_is_sack(tp)) ++ tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; +@@ -4687,7 +4691,11 @@ add_sack: + tcp_sack_new_ofo_skb(sk, seq, end_seq); + end: + if (skb) { +- tcp_grow_window(sk, skb); ++ /* For non sack flows, do not grow window to force DUPACK ++ * and trigger fast retransmit. ++ */ ++ if (tcp_is_sack(tp)) ++ tcp_grow_window(sk, skb); + skb_condense(skb); + skb_set_owner_r(skb, sk); + } diff --git a/queue-5.7/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch b/queue-5.7/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch new file mode 100644 index 00000000000..1e301910993 --- /dev/null +++ b/queue-5.7/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch @@ -0,0 +1,54 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: Neal Cardwell +Date: Wed, 24 Jun 2020 12:42:02 -0400 +Subject: tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT + +From: Neal Cardwell + +[ Upstream commit b344579ca8478598937215f7005d6c7b84d28aee ] + +Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where +Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an +ACK when the minimum rtt of a connection goes down. From inspection it +is clear from the existing code that this could happen in an example +like the following: + +o The first 8 RTT samples in a round trip are 150ms, resulting in a + curr_rtt of 150ms and a delay_min of 150ms. + +o The 9th RTT sample is 100ms. The curr_rtt does not change after the + first 8 samples, so curr_rtt remains 150ms. But delay_min can be + lowered at any time, so delay_min falls to 100ms. The code executes + the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min + of 100ms, and the curr_rtt is declared far enough above delay_min to + force a (spurious) exit of Slow start. + +The fix here is simple: allow every RTT sample in a round trip to +lower the curr_rtt. + +Fixes: ae27e98a5152 ("[TCP] CUBIC v2.3") +Reported-by: Mirja Kuehlewind +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cubic.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -432,10 +432,9 @@ static void hystart_update(struct sock * + + if (hystart_detect & HYSTART_DELAY) { + /* obtain the minimum delay of more than sampling packets */ ++ if (ca->curr_rtt > delay) ++ ca->curr_rtt = delay; + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { +- if (ca->curr_rtt > delay) +- ca->curr_rtt = delay; +- + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + diff --git a/queue-5.7/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch b/queue-5.7/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch new file mode 100644 index 00000000000..3d0bc0a4b58 --- /dev/null +++ b/queue-5.7/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch @@ -0,0 +1,37 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: David Christensen +Date: Wed, 17 Jun 2020 11:51:17 -0700 +Subject: tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes + +From: David Christensen + +[ Upstream commit 3a2656a211caf35e56afc9425e6e518fa52f7fbc ] + +The driver function tg3_io_error_detected() calls napi_disable twice, +without an intervening napi_enable, when the number of EEH errors exceeds +eeh_max_freezes, resulting in an indefinite sleep while holding rtnl_lock. + +Add check for pcierr_recovery which skips code already executed for the +"Frozen" state. + +Signed-off-by: David Christensen +Reviewed-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -18170,8 +18170,8 @@ static pci_ers_result_t tg3_io_error_det + + rtnl_lock(); + +- /* We probably don't have netdev yet */ +- if (!netdev || !netif_running(netdev)) ++ /* Could be second call or maybe we don't have netdev yet */ ++ if (!netdev || tp->pcierr_recovery || !netif_running(netdev)) + goto done; + + /* We needn't recover from permanent error */ diff --git a/queue-5.7/wireguard-device-avoid-circular-netns-references.patch b/queue-5.7/wireguard-device-avoid-circular-netns-references.patch new file mode 100644 index 00000000000..326d7b6a3d7 --- /dev/null +++ b/queue-5.7/wireguard-device-avoid-circular-netns-references.patch @@ -0,0 +1,298 @@ +From foo@baz Sun 28 Jun 2020 02:22:12 PM CEST +From: "Jason A. Donenfeld" +Date: Tue, 23 Jun 2020 03:59:45 -0600 +Subject: wireguard: device: avoid circular netns references + +From: "Jason A. Donenfeld" + +[ Upstream commit 900575aa33a3eaaef802b31de187a85c4a4b4bd0 ] + +Before, we took a reference to the creating netns if the new netns was +different. This caused issues with circular references, with two +wireguard interfaces swapping namespaces. The solution is to rather not +take any extra references at all, but instead simply invalidate the +creating netns pointer when that netns is deleted. + +In order to prevent this from happening again, this commit improves the +rough object leak tracking by allowing it to account for created and +destroyed interfaces, aside from just peers and keys. That then makes it +possible to check for the object leak when having two interfaces take a +reference to each others' namespaces. + +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") +Signed-off-by: Jason A. Donenfeld +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireguard/device.c | 58 +++++++++++++---------------- + drivers/net/wireguard/device.h | 3 - + drivers/net/wireguard/netlink.c | 14 ++++--- + drivers/net/wireguard/socket.c | 25 +++++++++--- + tools/testing/selftests/wireguard/netns.sh | 13 ++++++ + 5 files changed, 67 insertions(+), 46 deletions(-) + +--- a/drivers/net/wireguard/device.c ++++ b/drivers/net/wireguard/device.c +@@ -45,17 +45,18 @@ static int wg_open(struct net_device *de + if (dev_v6) + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; + ++ mutex_lock(&wg->device_update_lock); + ret = wg_socket_init(wg, wg->incoming_port); + if (ret < 0) +- return ret; +- mutex_lock(&wg->device_update_lock); ++ goto out; + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_send_staged_packets(peer); + if (peer->persistent_keepalive_interval) + wg_packet_send_keepalive(peer); + } ++out: + mutex_unlock(&wg->device_update_lock); +- return 0; ++ return ret; + } + + #ifdef CONFIG_PM_SLEEP +@@ -225,6 +226,7 @@ static void wg_destruct(struct net_devic + list_del(&wg->device_list); + rtnl_unlock(); + mutex_lock(&wg->device_update_lock); ++ rcu_assign_pointer(wg->creating_net, NULL); + wg->incoming_port = 0; + wg_socket_reinit(wg, NULL, NULL); + /* The final references are cleared in the below calls to destroy_workqueue. */ +@@ -240,13 +242,11 @@ static void wg_destruct(struct net_devic + skb_queue_purge(&wg->incoming_handshakes); + free_percpu(dev->tstats); + free_percpu(wg->incoming_handshakes_worker); +- if (wg->have_creating_net_ref) +- put_net(wg->creating_net); + kvfree(wg->index_hashtable); + kvfree(wg->peer_hashtable); + mutex_unlock(&wg->device_update_lock); + +- pr_debug("%s: Interface deleted\n", dev->name); ++ pr_debug("%s: Interface destroyed\n", dev->name); + free_netdev(dev); + } + +@@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_ne + struct wg_device *wg = netdev_priv(dev); + int ret = -ENOMEM; + +- wg->creating_net = src_net; ++ rcu_assign_pointer(wg->creating_net, src_net); + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); +@@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __r + .newlink = wg_newlink, + }; + +-static int wg_netdevice_notification(struct notifier_block *nb, +- unsigned long action, void *data) ++static void wg_netns_pre_exit(struct net *net) + { +- struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; +- struct wg_device *wg = netdev_priv(dev); +- +- ASSERT_RTNL(); +- +- if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) +- return 0; ++ struct wg_device *wg; + +- if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { +- put_net(wg->creating_net); +- wg->have_creating_net_ref = false; +- } else if (dev_net(dev) != wg->creating_net && +- !wg->have_creating_net_ref) { +- wg->have_creating_net_ref = true; +- get_net(wg->creating_net); ++ rtnl_lock(); ++ list_for_each_entry(wg, &device_list, device_list) { ++ if (rcu_access_pointer(wg->creating_net) == net) { ++ pr_debug("%s: Creating namespace exiting\n", wg->dev->name); ++ netif_carrier_off(wg->dev); ++ mutex_lock(&wg->device_update_lock); ++ rcu_assign_pointer(wg->creating_net, NULL); ++ wg_socket_reinit(wg, NULL, NULL); ++ mutex_unlock(&wg->device_update_lock); ++ } + } +- return 0; ++ rtnl_unlock(); + } + +-static struct notifier_block netdevice_notifier = { +- .notifier_call = wg_netdevice_notification ++static struct pernet_operations pernet_ops = { ++ .pre_exit = wg_netns_pre_exit + }; + + int __init wg_device_init(void) +@@ -429,18 +425,18 @@ int __init wg_device_init(void) + return ret; + #endif + +- ret = register_netdevice_notifier(&netdevice_notifier); ++ ret = register_pernet_device(&pernet_ops); + if (ret) + goto error_pm; + + ret = rtnl_link_register(&link_ops); + if (ret) +- goto error_netdevice; ++ goto error_pernet; + + return 0; + +-error_netdevice: +- unregister_netdevice_notifier(&netdevice_notifier); ++error_pernet: ++ unregister_pernet_device(&pernet_ops); + error_pm: + #ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +@@ -451,7 +447,7 @@ error_pm: + void wg_device_uninit(void) + { + rtnl_link_unregister(&link_ops); +- unregister_netdevice_notifier(&netdevice_notifier); ++ unregister_pernet_device(&pernet_ops); + #ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); + #endif +--- a/drivers/net/wireguard/device.h ++++ b/drivers/net/wireguard/device.h +@@ -40,7 +40,7 @@ struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue; + struct sock __rcu *sock4, *sock6; +- struct net *creating_net; ++ struct net __rcu *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; + struct workqueue_struct *packet_crypt_wq; +@@ -56,7 +56,6 @@ struct wg_device { + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; +- bool have_creating_net_ref; + }; + + int wg_device_init(void); +--- a/drivers/net/wireguard/netlink.c ++++ b/drivers/net/wireguard/netlink.c +@@ -511,11 +511,15 @@ static int wg_set_device(struct sk_buff + if (flags & ~__WGDEVICE_F_ALL) + goto out; + +- ret = -EPERM; +- if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || +- info->attrs[WGDEVICE_A_FWMARK]) && +- !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) +- goto out; ++ if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { ++ struct net *net; ++ rcu_read_lock(); ++ net = rcu_dereference(wg->creating_net); ++ ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; ++ rcu_read_unlock(); ++ if (ret) ++ goto out; ++ } + + ++wg->device_update_gen; + +--- a/drivers/net/wireguard/socket.c ++++ b/drivers/net/wireguard/socket.c +@@ -347,6 +347,7 @@ static void set_sock_opts(struct socket + + int wg_socket_init(struct wg_device *wg, u16 port) + { ++ struct net *net; + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, +@@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, + }; + #endif + ++ rcu_read_lock(); ++ net = rcu_dereference(wg->creating_net); ++ net = net ? maybe_get_net(net) : NULL; ++ rcu_read_unlock(); ++ if (unlikely(!net)) ++ return -ENONET; ++ + #if IS_ENABLED(CONFIG_IPV6) + retry: + #endif + +- ret = udp_sock_create(wg->creating_net, &port4, &new4); ++ ret = udp_sock_create(net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); +- return ret; ++ goto out; + } + set_sock_opts(new4); +- setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); ++ setup_udp_tunnel_sock(net, new4, &cfg); + + #if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; +- ret = udp_sock_create(wg->creating_net, &port6, &new6); ++ ret = udp_sock_create(net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); +- return ret; ++ goto out; + } + set_sock_opts(new6); +- setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); ++ setup_udp_tunnel_sock(net, new6, &cfg); + } + #endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); +- return 0; ++ ret = 0; ++out: ++ put_net(net); ++ return ret; + } + + void wg_socket_reinit(struct wg_device *wg, struct sock *new4, +--- a/tools/testing/selftests/wireguard/netns.sh ++++ b/tools/testing/selftests/wireguard/netns.sh +@@ -587,9 +587,20 @@ ip0 link set wg0 up + kill $ncat_pid + ip0 link del wg0 + ++# Ensure there aren't circular reference loops ++ip1 link add wg1 type wireguard ++ip2 link add wg2 type wireguard ++ip1 link set wg1 netns $netns2 ++ip2 link set wg2 netns $netns1 ++pp ip netns delete $netns1 ++pp ip netns delete $netns2 ++pp ip netns add $netns1 ++pp ip netns add $netns2 ++ ++sleep 2 # Wait for cleanup and grace periods + declare -A objects + while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do +- [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue ++ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" + done < /dev/kmsg + alldeleted=1