From: Greg Kroah-Hartman Date: Sat, 26 Jan 2019 09:28:16 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.9.154~72 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0c7949a038b72c6b0de5e3d8c609c2fe646e4c2f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch erspan-build-the-header-with-the-right-proto-according-to-erspan_ver.patch ip6_gre-fix-tunnel-list-corruption-for-x-netns.patch ip6_gre-update-version-related-info-when-changing-link.patch mlxsw-pci-increase-pci-sw-reset-timeout.patch mlxsw-pci-ring-cq-s-doorbell-before-rdq-s.patch mlxsw-spectrum_fid-update-dummy-fid-index.patch net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch net-fix-usage-of-pskb_trim_rcsum.patch net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch net-phy-marvell-errata-for-mv88e6390-internal-phys.patch net-phy-marvell-fix-deadlock-from-wrong-locking.patch net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch net-phy-phy-driver-features-are-mandatory.patch net-sched-act_tunnel_key-fix-memory-leak-in-case-of-action-replace.patch net-sched-cls_flower-allocate-mask-dynamically-in-fl_change.patch net_sched-refetch-skb-protocol-for-each-filter.patch openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch udp-with-udp_segment-release-on-error-path.patch vhost-log-dirty-page-correctly.patch --- diff --git a/queue-4.19/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch b/queue-4.19/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch new file mode 100644 index 00000000000..57c9af4d706 --- /dev/null +++ b/queue-4.19/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch @@ -0,0 +1,90 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: "Lendacky, Thomas" +Date: Thu, 17 Jan 2019 14:20:14 +0000 +Subject: amd-xgbe: Fix mdio access for non-zero ports and clause 45 PHYs + +From: "Lendacky, Thomas" + +[ Upstream commit 5ab3121beeb76aa6090195b67d237115860dd9ec ] + +The XGBE hardware has support for performing MDIO operations using an +MDIO command request. The driver mistakenly uses the mdio port address +as the MDIO command request device address instead of the MDIO command +request port address. Additionally, the driver does not properly check +for and create a clause 45 MDIO command. + +Check the supplied MDIO register to determine if the request is a clause +45 operation (MII_ADDR_C45). For a clause 45 operation, extract the device +address and register number from the supplied MDIO register and use them +to set the MDIO command request device address and register number fields. +For a clause 22 operation, the MDIO request device address is set to zero +and the MDIO command request register number is set to the supplied MDIO +register. In either case, the supplied MDIO port address is used as the +MDIO command request port address. + +Fixes: 732f2ab7afb9 ("amd-xgbe: Add support for MDIO attached PHYs") +Signed-off-by: Tom Lendacky +Tested-by: Shyam Sundar S K +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 -- + drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 22 ++++++++++++++++------ + 2 files changed, 16 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h +@@ -431,8 +431,6 @@ + #define MAC_MDIOSCAR_PA_WIDTH 5 + #define MAC_MDIOSCAR_RA_INDEX 0 + #define MAC_MDIOSCAR_RA_WIDTH 16 +-#define MAC_MDIOSCAR_REG_INDEX 0 +-#define MAC_MDIOSCAR_REG_WIDTH 21 + #define MAC_MDIOSCCDR_BUSY_INDEX 22 + #define MAC_MDIOSCCDR_BUSY_WIDTH 1 + #define MAC_MDIOSCCDR_CMD_INDEX 16 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +@@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct x + } + } + ++static unsigned int xgbe_create_mdio_sca(int port, int reg) ++{ ++ unsigned int mdio_sca, da; ++ ++ da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; ++ ++ mdio_sca = 0; ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); ++ ++ return mdio_sca; ++} ++ + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) + { +@@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struc + + reinit_completion(&pdata->mdio_complete); + +- mdio_sca = 0; +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); ++ mdio_sca = xgbe_create_mdio_sca(addr, reg); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; +@@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct + + reinit_completion(&pdata->mdio_complete); + +- mdio_sca = 0; +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); ++ mdio_sca = xgbe_create_mdio_sca(addr, reg); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; diff --git a/queue-4.19/erspan-build-the-header-with-the-right-proto-according-to-erspan_ver.patch b/queue-4.19/erspan-build-the-header-with-the-right-proto-according-to-erspan_ver.patch new file mode 100644 index 00000000000..2bee0431427 --- /dev/null +++ b/queue-4.19/erspan-build-the-header-with-the-right-proto-according-to-erspan_ver.patch @@ -0,0 +1,138 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Xin Long +Date: Mon, 14 Jan 2019 18:10:06 +0800 +Subject: erspan: build the header with the right proto according to erspan_ver + +From: Xin Long + +[ Upstream commit 20704bd1633dd5afb29a321d3a615c9c8e9c9d05 ] + +As said in draft-foschiano-erspan-03#section4: + + Different frame variants known as "ERSPAN Types" can be + distinguished based on the GRE "Protocol Type" field value: Type I + and II's value is 0x88BE while Type III's is 0x22EB [ETYPES]. + +So set it properly in erspan_xmit() according to erspan_ver. While at +it, also remove the unused parameter 'proto' in erspan_fb_xmit(). + +Fixes: 94d7d8f29287 ("ip6_gre: add erspan v2 support") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 23 ++++++++++++++--------- + net/ipv6/ip6_gre.c | 6 ++++-- + 2 files changed, 18 insertions(+), 11 deletions(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -570,8 +570,7 @@ err_free_skb: + dev->stats.tx_dropped++; + } + +-static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, +- __be16 proto) ++static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_info *tun_info; +@@ -579,10 +578,10 @@ static void erspan_fb_xmit(struct sk_buf + struct erspan_metadata *md; + struct rtable *rt = NULL; + bool truncate = false; ++ __be16 df, proto; + struct flowi4 fl; + int tunnel_hlen; + int version; +- __be16 df; + int nhoff; + int thoff; + +@@ -627,18 +626,20 @@ static void erspan_fb_xmit(struct sk_buf + if (version == 1) { + erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), + ntohl(md->u.index), truncate, true); ++ proto = htons(ETH_P_ERSPAN); + } else if (version == 2) { + erspan_build_header_v2(skb, + ntohl(tunnel_id_to_key32(key->tun_id)), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, true); ++ proto = htons(ETH_P_ERSPAN2); + } else { + goto err_free_rt; + } + + gre_build_header(skb, 8, TUNNEL_SEQ, +- htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); ++ proto, 0, htonl(tunnel->o_seqno++)); + + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + +@@ -722,12 +723,13 @@ static netdev_tx_t erspan_xmit(struct sk + { + struct ip_tunnel *tunnel = netdev_priv(dev); + bool truncate = false; ++ __be16 proto; + + if (!pskb_inet_may_pull(skb)) + goto free_skb; + + if (tunnel->collect_md) { +- erspan_fb_xmit(skb, dev, skb->protocol); ++ erspan_fb_xmit(skb, dev); + return NETDEV_TX_OK; + } + +@@ -743,19 +745,22 @@ static netdev_tx_t erspan_xmit(struct sk + } + + /* Push ERSPAN header */ +- if (tunnel->erspan_ver == 1) ++ if (tunnel->erspan_ver == 1) { + erspan_build_header(skb, ntohl(tunnel->parms.o_key), + tunnel->index, + truncate, true); +- else if (tunnel->erspan_ver == 2) ++ proto = htons(ETH_P_ERSPAN); ++ } else if (tunnel->erspan_ver == 2) { + erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), + tunnel->dir, tunnel->hwid, + truncate, true); +- else ++ proto = htons(ETH_P_ERSPAN2); ++ } else { + goto free_skb; ++ } + + tunnel->parms.o_flags &= ~TUNNEL_KEY; +- __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); ++ __gre_xmit(skb, dev, &tunnel->parms.iph, proto); + return NETDEV_TX_OK; + + free_skb: +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -938,6 +938,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit + __u8 dsfield = false; + struct flowi6 fl6; + int err = -EINVAL; ++ __be16 proto; + __u32 mtu; + int nhoff; + int thoff; +@@ -1051,8 +1052,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit + } + + /* Push GRE header. */ +- gre_build_header(skb, 8, TUNNEL_SEQ, +- htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); ++ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) ++ : htons(ETH_P_ERSPAN2); ++ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + + /* TooBig packet may have updated dst->dev's mtu */ + if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) diff --git a/queue-4.19/ip6_gre-fix-tunnel-list-corruption-for-x-netns.patch b/queue-4.19/ip6_gre-fix-tunnel-list-corruption-for-x-netns.patch new file mode 100644 index 00000000000..4a709768e58 --- /dev/null +++ b/queue-4.19/ip6_gre-fix-tunnel-list-corruption-for-x-netns.patch @@ -0,0 +1,41 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Olivier Matz +Date: Wed, 9 Jan 2019 10:57:21 +0100 +Subject: ip6_gre: fix tunnel list corruption for x-netns + +From: Olivier Matz + +[ Upstream commit ab5098fa25b91cb6fe0a0676f17abb64f2bbf024 ] + +In changelink ops, the ip6gre_net pointer is retrieved from +dev_net(dev), which is wrong in case of x-netns. Thus, the tunnel is not +unlinked from its current list and is relinked into another net +namespace. This corrupts the tunnel lists and can later trigger a kernel +oops. + +Fix this by retrieving the netns from device private area. + +Fixes: c8632fc30bb0 ("net: ip6_gre: Split up ip6gre_changelink()") +Cc: Petr Machata +Signed-off-by: Olivier Matz +Acked-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -2047,9 +2047,9 @@ static int ip6gre_changelink(struct net_ + struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); ++ struct ip6_tnl *t = netdev_priv(dev); ++ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + struct __ip6_tnl_parm p; +- struct ip6_tnl *t; + + t = ip6gre_changelink_common(dev, tb, data, &p, extack); + if (IS_ERR(t)) diff --git a/queue-4.19/ip6_gre-update-version-related-info-when-changing-link.patch b/queue-4.19/ip6_gre-update-version-related-info-when-changing-link.patch new file mode 100644 index 00000000000..8cfe3e28db7 --- /dev/null +++ b/queue-4.19/ip6_gre-update-version-related-info-when-changing-link.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Hangbin Liu +Date: Thu, 10 Jan 2019 11:17:42 +0800 +Subject: ip6_gre: update version related info when changing link + +From: Hangbin Liu + +[ Upstream commit 80b3671e9377916bf2b02e56113fa7377ce5705a ] + +We forgot to update ip6erspan version related info when changing link, +which will cause setting new hwid failed. + +Reported-by: Jianlin Shi +Fixes: 94d7d8f292870 ("ip6_gre: add erspan v2 support") +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1187,6 +1187,10 @@ static void ip6gre_tnl_copy_tnl_parm(str + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + t->parms.fwmark = p->fwmark; ++ t->parms.erspan_ver = p->erspan_ver; ++ t->parms.index = p->index; ++ t->parms.dir = p->dir; ++ t->parms.hwid = p->hwid; + dst_cache_reset(&t->dst_cache); + } + diff --git a/queue-4.19/mlxsw-pci-increase-pci-sw-reset-timeout.patch b/queue-4.19/mlxsw-pci-increase-pci-sw-reset-timeout.patch new file mode 100644 index 00000000000..066f0207e3a --- /dev/null +++ b/queue-4.19/mlxsw-pci-increase-pci-sw-reset-timeout.patch @@ -0,0 +1,35 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Nir Dotan +Date: Fri, 18 Jan 2019 15:57:56 +0000 +Subject: mlxsw: pci: Increase PCI SW reset timeout + +From: Nir Dotan + +[ Upstream commit d2f372ba0914e5722ac28e15f2ed2db61bcf0e44 ] + +Spectrum-2 PHY layer introduces a calibration period which is a part of the +Spectrum-2 firmware boot process. Hence increase the SW timeout waiting for +the firmware to come out of boot. This does not increase system boot time +in cases where the firmware PHY calibration process is done quickly. + +Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") +Signed-off-by: Nir Dotan +Acked-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h ++++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +@@ -27,7 +27,7 @@ + + #define MLXSW_PCI_SW_RESET 0xF0010 + #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) +-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 ++#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000 + #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 + #define MLXSW_PCI_FW_READY 0xA1844 + #define MLXSW_PCI_FW_READY_MASK 0xFFFF diff --git a/queue-4.19/mlxsw-pci-ring-cq-s-doorbell-before-rdq-s.patch b/queue-4.19/mlxsw-pci-ring-cq-s-doorbell-before-rdq-s.patch new file mode 100644 index 00000000000..d6301c2c230 --- /dev/null +++ b/queue-4.19/mlxsw-pci-ring-cq-s-doorbell-before-rdq-s.patch @@ -0,0 +1,100 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Ido Schimmel +Date: Fri, 18 Jan 2019 15:57:55 +0000 +Subject: mlxsw: pci: Ring CQ's doorbell before RDQ's + +From: Ido Schimmel + +When a packet should be trapped to the CPU the device consumes a WQE +(work queue element) from an RDQ (receive descriptor queue) and copies +the packet to the address specified in the WQE. The device then tries to +post a CQE (completion queue element) that contains various metadata +(e.g., ingress port) about the packet to a CQ (completion queue). + +In case the device managed to consume a WQE, but did not manage to post +the corresponding CQE, it will get stuck. This unlikely situation can be +triggered due to the scheme the driver is currently using to process +CQEs. + +The driver will consume up to 512 CQEs at a time and after processing +each corresponding WQE it will ring the RDQ's doorbell, letting the +device know that a new WQE was posted for it to consume. Only after +processing all the CQEs (up to 512), the driver will ring the CQ's +doorbell, letting the device know that new ones can be posted. + +Fix this by having the driver ring the CQ's doorbell for every processed +CQE, but before ringing the RDQ's doorbell. This guarantees that +whenever we post a new WQE, there is a corresponding CQE available. Copy +the currently processed CQE to prevent the device from overwriting it +with a new CQE after ringing the doorbell. + +Note that the driver still arms the CQ only after processing all the +pending CQEs, so that interrupts for this CQ will only be delivered +after the driver finished its processing. + +Before commit 8404f6f2e8ed ("mlxsw: pci: Allow to use CQEs of version 1 +and version 2") the issue was virtually impossible to trigger since the +number of CQEs was twice the number of WQEs and the number of CQEs +processed at a time was equal to the number of available WQEs. + +Fixes: 8404f6f2e8ed ("mlxsw: pci: Allow to use CQEs of version 1 and version 2") +Signed-off-by: Ido Schimmel +Reported-by: Semion Lisyansky +Tested-by: Semion Lisyansky +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 +++++++----- + drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 1 + + 2 files changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c +@@ -604,29 +604,31 @@ static void mlxsw_pci_cq_tasklet(unsigne + u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); + u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); + u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); ++ char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; ++ ++ memcpy(ncqe, cqe, q->elem_size); ++ mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + + if (sendq) { + struct mlxsw_pci_queue *sdq; + + sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, +- wqe_counter, cqe); ++ wqe_counter, ncqe); + q->u.cq.comp_sdq_count++; + } else { + struct mlxsw_pci_queue *rdq; + + rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, +- wqe_counter, q->u.cq.v, cqe); ++ wqe_counter, q->u.cq.v, ncqe); + q->u.cq.comp_rdq_count++; + } + if (++items == credits) + break; + } +- if (items) { +- mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); ++ if (items) + mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); +- } + } + + static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) +--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h ++++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +@@ -53,6 +53,7 @@ + #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ + #define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ + #define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ ++#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE + #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ + #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) + #define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) diff --git a/queue-4.19/mlxsw-spectrum_fid-update-dummy-fid-index.patch b/queue-4.19/mlxsw-spectrum_fid-update-dummy-fid-index.patch new file mode 100644 index 00000000000..f8fd36c7713 --- /dev/null +++ b/queue-4.19/mlxsw-spectrum_fid-update-dummy-fid-index.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Nir Dotan +Date: Fri, 18 Jan 2019 15:57:59 +0000 +Subject: mlxsw: spectrum_fid: Update dummy FID index + +From: Nir Dotan + +[ Upstream commit a11dcd6497915ba79d95ef4fe2541aaac27f6201 ] + +When using a tc flower action of egress mirred redirect, the driver adds +an implicit FID setting action. This implicit action sets a dummy FID to +the packet and is used as part of a design for trapping unmatched flows +in OVS. While this implicit FID setting action is supposed to be a NOP +when a redirect action is added, in Spectrum-2 the FID record is +consulted as the dummy FID index is an 802.1D FID index and the packet +is dropped instead of being redirected. + +Set the dummy FID index value to be within 802.1Q range. This satisfies +both Spectrum-1 which ignores the FID and Spectrum-2 which identifies it +as an 802.1Q FID and will then follow the redirect action. + +Fixes: c3ab435466d5 ("mlxsw: spectrum: Extend to support Spectrum-2 ASIC") +Signed-off-by: Nir Dotan +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +@@ -696,8 +696,8 @@ static const struct mlxsw_sp_fid_ops mlx + static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { + .type = MLXSW_SP_FID_TYPE_DUMMY, + .fid_size = sizeof(struct mlxsw_sp_fid), +- .start_index = MLXSW_SP_RFID_BASE - 1, +- .end_index = MLXSW_SP_RFID_BASE - 1, ++ .start_index = VLAN_N_VID - 1, ++ .end_index = VLAN_N_VID - 1, + .ops = &mlxsw_sp_fid_dummy_ops, + }; + diff --git a/queue-4.19/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch b/queue-4.19/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch new file mode 100644 index 00000000000..1877cd9ab22 --- /dev/null +++ b/queue-4.19/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch @@ -0,0 +1,69 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Yunjian Wang +Date: Thu, 17 Jan 2019 09:46:41 +0800 +Subject: net: bridge: Fix ethernet header pointer before check skb forwardable + +From: Yunjian Wang + +[ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ] + +The skb header should be set to ethernet header before using +is_skb_forwardable. Because the ethernet header length has been +considered in is_skb_forwardable(including dev->hard_header_len +length). + +To reproduce the issue: +1, add 2 ports on linux bridge br using following commands: +$ brctl addbr br +$ brctl addif br eth0 +$ brctl addif br eth1 +2, the MTU of eth0 and eth1 is 1500 +3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) +from eth0 to eth1 + +So the expect result is packet larger than 1500 cannot pass through +eth0 and eth1. But currently, the packet passes through success, it +means eth1's MTU limit doesn't take effect. + +Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") +Cc: bridge@lists.linux-foundation.org +Cc: Nkolay Aleksandrov +Cc: Roopa Prabhu +Cc: Stephen Hemminger +Signed-off-by: Yunjian Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_forward.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -36,10 +36,10 @@ static inline int should_deliver(const s + + int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) + { ++ skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + +- skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && +@@ -98,12 +98,11 @@ static void __br_forward(const struct ne + net = dev_net(indev); + } else { + if (unlikely(netpoll_tx_running(to->br->dev))) { +- if (!is_skb_forwardable(skb->dev, skb)) { ++ skb_push(skb, ETH_HLEN); ++ if (!is_skb_forwardable(skb->dev, skb)) + kfree_skb(skb); +- } else { +- skb_push(skb, ETH_HLEN); ++ else + br_netpoll_send_skb(to, skb); +- } + return; + } + br_hook = NF_BR_LOCAL_OUT; diff --git a/queue-4.19/net-fix-usage-of-pskb_trim_rcsum.patch b/queue-4.19/net-fix-usage-of-pskb_trim_rcsum.patch new file mode 100644 index 00000000000..8c4be88d3e3 --- /dev/null +++ b/queue-4.19/net-fix-usage-of-pskb_trim_rcsum.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Ross Lagerwall +Date: Thu, 17 Jan 2019 15:34:38 +0000 +Subject: net: Fix usage of pskb_trim_rcsum + +From: Ross Lagerwall + +[ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ] + +In certain cases, pskb_trim_rcsum() may change skb pointers. +Reinitialize header pointers afterwards to avoid potential +use-after-frees. Add a note in the documentation of +pskb_trim_rcsum(). Found by KASAN. + +Signed-off-by: Ross Lagerwall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 1 + + include/linux/skbuff.h | 1 + + net/bridge/br_netfilter_ipv6.c | 1 + + net/bridge/netfilter/nft_reject_bridge.c | 1 + + net/ipv4/ip_input.c | 1 + + 5 files changed, 5 insertions(+) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb + if (pskb_trim_rcsum(skb, len)) + goto drop; + ++ ph = pppoe_hdr(skb); + pn = pppoe_pernet(dev_net(dev)); + + /* Note that get_item does a sock_hold(), so sk_pppox(po) +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -3178,6 +3178,7 @@ int pskb_trim_rcsum_slow(struct sk_buff + * + * This is exactly the same as pskb_trim except that it ensures the + * checksum of received packets are still valid after the operation. ++ * It can change skb pointers. + */ + + static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) +--- a/net/bridge/br_netfilter_ipv6.c ++++ b/net/bridge/br_netfilter_ipv6.c +@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, st + IPSTATS_MIB_INDISCARDS); + goto drop; + } ++ hdr = ipv6_hdr(skb); + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; +--- a/net/bridge/netfilter/nft_reject_bridge.c ++++ b/net/bridge/netfilter/nft_reject_bridge.c +@@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + ++ ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -489,6 +489,7 @@ static struct sk_buff *ip_rcv_core(struc + goto drop; + } + ++ iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl*4; + + /* Remove any debris in the socket control block */ diff --git a/queue-4.19/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch b/queue-4.19/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch new file mode 100644 index 00000000000..b709b59a91c --- /dev/null +++ b/queue-4.19/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch @@ -0,0 +1,148 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Ido Schimmel +Date: Wed, 9 Jan 2019 09:57:39 +0000 +Subject: net: ipv4: Fix memory leak in network namespace dismantle + +From: Ido Schimmel + +[ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] + +IPv4 routing tables are flushed in two cases: + +1. In response to events in the netdev and inetaddr notification chains +2. When a network namespace is being dismantled + +In both cases only routes associated with a dead nexthop group are +flushed. However, a nexthop group will only be marked as dead in case it +is populated with actual nexthops using a nexthop device. This is not +the case when the route in question is an error route (e.g., +'blackhole', 'unreachable'). + +Therefore, when a network namespace is being dismantled such routes are +not flushed and leaked [1]. + +To reproduce: +# ip netns add blue +# ip -n blue route add unreachable 192.0.2.0/24 +# ip netns del blue + +Fix this by not skipping error routes that are not marked with +RTNH_F_DEAD when flushing the routing tables. + +To prevent the flushing of such routes in case #1, add a parameter to +fib_table_flush() that indicates if the table is flushed as part of +namespace dismantle or not. + +Note that this problem does not exist in IPv6 since error routes are +associated with the loopback device. + +[1] +unreferenced object 0xffff888066650338 (size 56): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... + e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ + backtrace: + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff +unreferenced object 0xffff888061621c88 (size 48): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... + backtrace: + [<00000000733609e3>] fib_table_insert+0x978/0x1500 + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff + +Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") +Signed-off-by: Ido Schimmel +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip_fib.h | 2 +- + net/ipv4/fib_frontend.c | 4 ++-- + net/ipv4/fib_trie.c | 15 ++++++++++++--- + 3 files changed, 15 insertions(+), 6 deletions(-) + +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -230,7 +230,7 @@ int fib_table_delete(struct net *, struc + struct netlink_ext_ack *extack); + int fib_table_dump(struct fib_table *table, struct sk_buff *skb, + struct netlink_callback *cb); +-int fib_table_flush(struct net *net, struct fib_table *table); ++int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); + struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); + void fib_table_flush_external(struct fib_table *table); + void fib_free_table(struct fib_table *tb); +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -203,7 +203,7 @@ static void fib_flush(struct net *net) + struct fib_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) +- flushed += fib_table_flush(net, tb); ++ flushed += fib_table_flush(net, tb, false); + } + + if (flushed) +@@ -1357,7 +1357,7 @@ static void ip_fib_net_exit(struct net * + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); +- fib_table_flush(net, tb); ++ fib_table_flush(net, tb, true); + fib_free_table(tb); + } + } +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib + } + + /* Caller must hold RTNL. */ +-int fib_table_flush(struct net *net, struct fib_table *tb) ++int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) + { + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; +@@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, str + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + +- if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || +- tb->tb_id != fa->tb_id) { ++ if (!fi || tb->tb_id != fa->tb_id || ++ (!(fi->fib_flags & RTNH_F_DEAD) && ++ !fib_props[fa->fa_type].error)) { ++ slen = fa->fa_slen; ++ continue; ++ } ++ ++ /* Do not flush error routes if network namespace is ++ * not being dismantled ++ */ ++ if (!flush_all && fib_props[fa->fa_type].error) { + slen = fa->fa_slen; + continue; + } diff --git a/queue-4.19/net-phy-marvell-errata-for-mv88e6390-internal-phys.patch b/queue-4.19/net-phy-marvell-errata-for-mv88e6390-internal-phys.patch new file mode 100644 index 00000000000..58095ca9cd9 --- /dev/null +++ b/queue-4.19/net-phy-marvell-errata-for-mv88e6390-internal-phys.patch @@ -0,0 +1,71 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Andrew Lunn +Date: Thu, 10 Jan 2019 22:48:36 +0100 +Subject: net: phy: marvell: Errata for mv88e6390 internal PHYs + +From: Andrew Lunn + +[ Upstream commit 8cbcdc1a51999ca81db2956608b917aacd28d837 ] + +The VOD can be out of spec, unless some magic value is poked into an +undocumented register in an undocumented page. + +Fixes: e4cf8a38fc0d ("net: phy: Marvell: Add mv88e6390 internal PHY") +Signed-off-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/marvell.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +--- a/drivers/net/phy/marvell.c ++++ b/drivers/net/phy/marvell.c +@@ -1063,6 +1063,39 @@ static int m88e1145_config_init(struct p + return 0; + } + ++/* The VOD can be out of specification on link up. Poke an ++ * undocumented register, in an undocumented page, with a magic value ++ * to fix this. ++ */ ++static int m88e6390_errata(struct phy_device *phydev) ++{ ++ int err; ++ ++ err = phy_write(phydev, MII_BMCR, ++ BMCR_ANENABLE | BMCR_SPEED1000 | BMCR_FULLDPLX); ++ if (err) ++ return err; ++ ++ usleep_range(300, 400); ++ ++ err = phy_write_paged(phydev, 0xf8, 0x08, 0x36); ++ if (err) ++ return err; ++ ++ return genphy_soft_reset(phydev); ++} ++ ++static int m88e6390_config_aneg(struct phy_device *phydev) ++{ ++ int err; ++ ++ err = m88e6390_errata(phydev); ++ if (err) ++ return err; ++ ++ return m88e1510_config_aneg(phydev); ++} ++ + /** + * fiber_lpa_to_ethtool_lpa_t + * @lpa: value of the MII_LPA register for fiber link +@@ -2313,7 +2346,7 @@ static struct phy_driver marvell_drivers + .flags = PHY_HAS_INTERRUPT, + .probe = m88e6390_probe, + .config_init = &marvell_config_init, +- .config_aneg = &m88e1510_config_aneg, ++ .config_aneg = &m88e6390_config_aneg, + .read_status = &marvell_read_status, + .ack_interrupt = &marvell_ack_interrupt, + .config_intr = &marvell_config_intr, diff --git a/queue-4.19/net-phy-marvell-fix-deadlock-from-wrong-locking.patch b/queue-4.19/net-phy-marvell-fix-deadlock-from-wrong-locking.patch new file mode 100644 index 00000000000..4f574d26a01 --- /dev/null +++ b/queue-4.19/net-phy-marvell-fix-deadlock-from-wrong-locking.patch @@ -0,0 +1,32 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Andrew Lunn +Date: Fri, 11 Jan 2019 00:15:21 +0100 +Subject: net: phy: marvell: Fix deadlock from wrong locking + +From: Andrew Lunn + +[ Upstream commit e0a7328fad9979104f73e19bedca821ef3262ae1 ] + +m88e1318_set_wol() takes the lock as part of phy_select_page(). Don't +take the lock again with phy_read(), use the unlocked __phy_read(). + +Fixes: 424ca4c55121 ("net: phy: marvell: fix paged access races") +Reported-by: Åke Rehnman +Signed-off-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/marvell.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/marvell.c ++++ b/drivers/net/phy/marvell.c +@@ -1451,7 +1451,7 @@ static int m88e1318_set_wol(struct phy_d + * before enabling it if !phy_interrupt_is_valid() + */ + if (!phy_interrupt_is_valid(phydev)) +- phy_read(phydev, MII_M1011_IEVENT); ++ __phy_read(phydev, MII_M1011_IEVENT); + + /* Enable the WOL interrupt */ + err = __phy_modify(phydev, MII_88E1318S_PHY_CSIER, 0, diff --git a/queue-4.19/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch b/queue-4.19/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch new file mode 100644 index 00000000000..dff6d973786 --- /dev/null +++ b/queue-4.19/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch @@ -0,0 +1,92 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Thomas Petazzoni +Date: Wed, 16 Jan 2019 10:53:58 +0100 +Subject: net: phy: mdio_bus: add missing device_del() in mdiobus_register() error handling + +From: Thomas Petazzoni + +[ Upstream commit e40e2a2e78664fa90ea4b9bdf4a84efce2fea9d9 ] + +The current code in __mdiobus_register() doesn't properly handle +failures returned by the devm_gpiod_get_optional() call: it returns +immediately, without unregistering the device that was added by the +call to device_register() earlier in the function. + +This leaves a stale device, which then causes a NULL pointer +dereference in the code that handles deferred probing: + +[ 1.489982] Unable to handle kernel NULL pointer dereference at virtual address 00000074 +[ 1.498110] pgd = (ptrval) +[ 1.500838] [00000074] *pgd=00000000 +[ 1.504432] Internal error: Oops: 17 [#1] SMP ARM +[ 1.509133] Modules linked in: +[ 1.512192] CPU: 1 PID: 51 Comm: kworker/1:3 Not tainted 4.20.0-00039-g3b73a4cc8b3e-dirty #99 +[ 1.520708] Hardware name: Xilinx Zynq Platform +[ 1.525261] Workqueue: events deferred_probe_work_func +[ 1.530403] PC is at klist_next+0x10/0xfc +[ 1.534403] LR is at device_for_each_child+0x40/0x94 +[ 1.539361] pc : [] lr : [] psr: 200e0013 +[ 1.545628] sp : ceeefe68 ip : 00000001 fp : ffffe000 +[ 1.550863] r10: 00000000 r9 : c0c66790 r8 : 00000000 +[ 1.556079] r7 : c0457d44 r6 : 00000000 r5 : ceeefe8c r4 : cfa2ec78 +[ 1.562604] r3 : 00000064 r2 : c0457d44 r1 : ceeefe8c r0 : 00000064 +[ 1.569129] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none +[ 1.576263] Control: 18c5387d Table: 0ed7804a DAC: 00000051 +[ 1.582013] Process kworker/1:3 (pid: 51, stack limit = 0x(ptrval)) +[ 1.588280] Stack: (0xceeefe68 to 0xceef0000) +[ 1.592630] fe60: cfa2ec78 c0c03c08 00000000 c0457d44 00000000 c0c66790 +[ 1.600814] fe80: 00000000 c0455d90 ceeefeac 00000064 00000000 0d7a542e cee9d494 cfa2ec78 +[ 1.608998] fea0: cfa2ec78 00000000 c0457d44 c0457d7c cee9d494 c0c03c08 00000000 c0455dac +[ 1.617182] fec0: cf98ba44 cf926a00 cee9d494 0d7a542e 00000000 cf935a10 cf935a10 cf935a10 +[ 1.625366] fee0: c0c4e9b8 c0457d7c c0c4e80c 00000001 cf935a10 c0457df4 cf935a10 c0c4e99c +[ 1.633550] ff00: c0c4e99c c045a27c c0c4e9c4 ced63f80 cfde8a80 cfdebc00 00000000 c013893c +[ 1.641734] ff20: cfde8a80 cfde8a80 c07bd354 ced63f80 ced63f94 cfde8a80 00000008 c0c02d00 +[ 1.649936] ff40: cfde8a98 cfde8a80 ffffe000 c0139a30 ffffe000 c0c6624a c07bd354 00000000 +[ 1.658120] ff60: ffffe000 cee9e780 ceebfe00 00000000 ceeee000 ced63f80 c0139788 cf8cdea4 +[ 1.666304] ff80: cee9e79c c013e598 00000001 ceebfe00 c013e44c 00000000 00000000 00000000 +[ 1.674488] ffa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 +[ 1.682671] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +[ 1.690855] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 +[ 1.699058] [] (klist_next) from [] (device_for_each_child+0x40/0x94) +[ 1.707241] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) +[ 1.716476] [] (device_reorder_to_tail) from [] (device_for_each_child+0x5c/0x94) +[ 1.725692] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) +[ 1.734927] [] (device_reorder_to_tail) from [] (device_pm_move_to_tail+0x28/0x40) +[ 1.744235] [] (device_pm_move_to_tail) from [] (deferred_probe_work_func+0x58/0x8c) +[ 1.753746] [] (deferred_probe_work_func) from [] (process_one_work+0x210/0x4fc) +[ 1.762888] [] (process_one_work) from [] (worker_thread+0x2a8/0x5c0) +[ 1.771072] [] (worker_thread) from [] (kthread+0x14c/0x154) +[ 1.778482] [] (kthread) from [] (ret_from_fork+0x14/0x2c) +[ 1.785689] Exception stack(0xceeeffb0 to 0xceeefff8) +[ 1.790739] ffa0: 00000000 00000000 00000000 00000000 +[ 1.798923] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +[ 1.807107] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 +[ 1.813724] Code: e92d47f0 e1a05000 e8900048 e1a00003 (e5937010) +[ 1.819844] ---[ end trace 3c2c0c8b65399ec9 ]--- + +The actual error that we had from devm_gpiod_get_optional() was +-EPROBE_DEFER, due to the GPIO being provided by a driver that is +probed later than the Ethernet controller driver. + +To fix this, we simply add the missing device_del() invocation in the +error path. + +Fixes: 69226896ad636 ("mdio_bus: Issue GPIO RESET to PHYs") +Signed-off-by: Thomas Petazzoni +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/mdio_bus.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/phy/mdio_bus.c ++++ b/drivers/net/phy/mdio_bus.c +@@ -391,6 +391,7 @@ int __mdiobus_register(struct mii_bus *b + if (IS_ERR(gpiod)) { + dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", + bus->id); ++ device_del(&bus->dev); + return PTR_ERR(gpiod); + } else if (gpiod) { + bus->reset_gpiod = gpiod; diff --git a/queue-4.19/net-phy-phy-driver-features-are-mandatory.patch b/queue-4.19/net-phy-phy-driver-features-are-mandatory.patch new file mode 100644 index 00000000000..d7020ae3e8e --- /dev/null +++ b/queue-4.19/net-phy-phy-driver-features-are-mandatory.patch @@ -0,0 +1,52 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Camelia Groza +Date: Thu, 17 Jan 2019 14:22:36 +0200 +Subject: net: phy: phy driver features are mandatory + +From: Camelia Groza + +[ Upstream commit 3e64cf7a435ed0500e3adaa8aada2272d3ae8abc ] + +Since phy driver features became a link_mode bitmap, phy drivers that +don't have a list of features configured will cause the kernel to crash +when probed. + +Prevent the phy driver from registering if the features field is missing. + +Fixes: 719655a14971 ("net: phy: Replace phy driver features u32 with link_mode bitmap") +Reported-by: Scott Wood +Signed-off-by: Camelia Groza +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 5 +++++ + include/linux/phy.h | 4 ++-- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -1917,6 +1917,11 @@ int phy_driver_register(struct phy_drive + { + int retval; + ++ if (WARN_ON(!new_driver->features)) { ++ pr_err("%s: Driver features are missing\n", new_driver->name); ++ return -EINVAL; ++ } ++ + new_driver->mdiodrv.flags |= MDIO_DEVICE_IS_PHY; + new_driver->mdiodrv.driver.name = new_driver->name; + new_driver->mdiodrv.driver.bus = &mdio_bus_type; +--- a/include/linux/phy.h ++++ b/include/linux/phy.h +@@ -491,8 +491,8 @@ struct phy_device { + * only works for PHYs with IDs which match this field + * name: The friendly name of this PHY type + * phy_id_mask: Defines the important bits of the phy_id +- * features: A list of features (speed, duplex, etc) supported +- * by this PHY ++ * features: A mandatory list of features (speed, duplex, etc) ++ * supported by this PHY + * flags: A bitfield defining certain other features this PHY + * supports (like interrupts) + * diff --git a/queue-4.19/net-sched-act_tunnel_key-fix-memory-leak-in-case-of-action-replace.patch b/queue-4.19/net-sched-act_tunnel_key-fix-memory-leak-in-case-of-action-replace.patch new file mode 100644 index 00000000000..4c53b15a808 --- /dev/null +++ b/queue-4.19/net-sched-act_tunnel_key-fix-memory-leak-in-case-of-action-replace.patch @@ -0,0 +1,93 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Davide Caratti +Date: Thu, 10 Jan 2019 20:21:02 +0100 +Subject: net/sched: act_tunnel_key: fix memory leak in case of action replace + +From: Davide Caratti + +[ Upstream commit 9174c3df1cd181c14913138d50ccbe539bb08335 ] + +running the following TDC test cases: + + 7afc - Replace tunnel_key set action with all parameters + 364d - Replace tunnel_key set action with all parameters and cookie + +it's possible to trigger kmemleak warnings like: + + unreferenced object 0xffff94797127ab40 (size 192): + comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ + 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... + backtrace: + [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] + [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 + [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 + [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 + [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 + [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 + [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 + [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 + [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 + [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 + [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 + [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 + [<000000004b82ac81>] do_syscall_64+0x5b/0x180 + [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<000000002926b2ab>] 0xffffffffffffffff + +when the tunnel_key action is replaced, the kernel forgets to release the +dst metadata: ensure they are released by tunnel_key_init(), the same way +it's done in tunnel_key_release(). + +Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") +Signed-off-by: Davide Caratti +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_tunnel_key.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +--- a/net/sched/act_tunnel_key.c ++++ b/net/sched/act_tunnel_key.c +@@ -197,6 +197,15 @@ static const struct nla_policy tunnel_ke + [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, + }; + ++static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) ++{ ++ if (!p) ++ return; ++ if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) ++ dst_release(&p->tcft_enc_metadata->dst); ++ kfree_rcu(p, rcu); ++} ++ + static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind, bool rtnl_held, +@@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *n + rcu_swap_protected(t->params, params_new, + lockdep_is_held(&t->tcf_lock)); + spin_unlock_bh(&t->tcf_lock); +- if (params_new) +- kfree_rcu(params_new, rcu); ++ tunnel_key_release_params(params_new); + + if (ret == ACT_P_CREATED) + tcf_idr_insert(tn, *a); +@@ -385,12 +393,7 @@ static void tunnel_key_release(struct tc + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, 1); +- if (params) { +- if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) +- dst_release(¶ms->tcft_enc_metadata->dst); +- +- kfree_rcu(params, rcu); +- } ++ tunnel_key_release_params(params); + } + + static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, diff --git a/queue-4.19/net-sched-cls_flower-allocate-mask-dynamically-in-fl_change.patch b/queue-4.19/net-sched-cls_flower-allocate-mask-dynamically-in-fl_change.patch new file mode 100644 index 00000000000..4d59e16105d --- /dev/null +++ b/queue-4.19/net-sched-cls_flower-allocate-mask-dynamically-in-fl_change.patch @@ -0,0 +1,92 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Ivan Vecera +Date: Wed, 16 Jan 2019 16:53:52 +0100 +Subject: net/sched: cls_flower: allocate mask dynamically in fl_change() + +From: Ivan Vecera + +[ Upstream commit 2cddd20147826aef283115abb00012d4dafe3cdb ] + +Recent changes (especially 05cd271fd61a ("cls_flower: Support multiple +masks per priority")) in the fl_flow_mask structure grow it and its +current size e.g. on x86_64 with defconfig is 760 bytes and more than +1024 bytes with some debug options enabled. Prior the mentioned commit +its size was 176 bytes (using defconfig on x86_64). +With regard to this fact it's reasonable to allocate this structure +dynamically in fl_change() to reduce its stack size. + +v2: +- use kzalloc() instead of kcalloc() + +Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") +Cc: Jiri Pirko +Cc: Paul Blakey +Acked-by: Jiri Pirko +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_flower.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -1176,17 +1176,23 @@ static int fl_change(struct net *net, st + struct cls_fl_head *head = rtnl_dereference(tp->root); + struct cls_fl_filter *fold = *arg; + struct cls_fl_filter *fnew; ++ struct fl_flow_mask *mask; + struct nlattr **tb; +- struct fl_flow_mask mask = {}; + int err; + + if (!tca[TCA_OPTIONS]) + return -EINVAL; + +- tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); +- if (!tb) ++ mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); ++ if (!mask) + return -ENOBUFS; + ++ tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); ++ if (!tb) { ++ err = -ENOBUFS; ++ goto errout_mask_alloc; ++ } ++ + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], + fl_policy, NULL); + if (err < 0) +@@ -1229,12 +1235,12 @@ static int fl_change(struct net *net, st + } + } + +- err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, ++ err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, + tp->chain->tmplt_priv, extack); + if (err) + goto errout_idr; + +- err = fl_check_assign_mask(head, fnew, fold, &mask); ++ err = fl_check_assign_mask(head, fnew, fold, mask); + if (err) + goto errout_idr; + +@@ -1281,6 +1287,7 @@ static int fl_change(struct net *net, st + } + + kfree(tb); ++ kfree(mask); + return 0; + + errout_mask: +@@ -1294,6 +1301,8 @@ errout: + kfree(fnew); + errout_tb: + kfree(tb); ++errout_mask_alloc: ++ kfree(mask); + return err; + } + diff --git a/queue-4.19/net_sched-refetch-skb-protocol-for-each-filter.patch b/queue-4.19/net_sched-refetch-skb-protocol-for-each-filter.patch new file mode 100644 index 00000000000..1557eba52bb --- /dev/null +++ b/queue-4.19/net_sched-refetch-skb-protocol-for-each-filter.patch @@ -0,0 +1,60 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Cong Wang +Date: Fri, 11 Jan 2019 18:55:42 -0800 +Subject: net_sched: refetch skb protocol for each filter + +From: Cong Wang + +[ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ] + +Martin reported a set of filters don't work after changing +from reclassify to continue. Looking into the code, it +looks like skb protocol is not always fetched for each +iteration of the filters. But, as demonstrated by Martin, +TC actions could modify skb->protocol, for example act_vlan, +this means we have to refetch skb protocol in each iteration, +rather than using the one we fetch in the beginning of the loop. + +This bug is _not_ introduced by commit 3b3ae880266d +("net: sched: consolidate tc_classify{,_compat}"), technically, +if act_vlan is the only action that modifies skb protocol, then +it is commit c7e2b9689ef8 ("sched: introduce vlan action") which +introduced this bug. + +Reported-by: Martin Olsson +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -960,7 +960,6 @@ static int tcf_block_cb_call(struct tcf_ + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) + { +- __be16 protocol = tc_skb_protocol(skb); + #ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; + const struct tcf_proto *orig_tp = tp; +@@ -970,6 +969,7 @@ int tcf_classify(struct sk_buff *skb, co + reclassify: + #endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { ++ __be16 protocol = tc_skb_protocol(skb); + int err; + + if (tp->protocol != protocol && +@@ -1002,7 +1002,6 @@ reset: + } + + tp = first_tp; +- protocol = tc_skb_protocol(skb); + goto reclassify; + #endif + } diff --git a/queue-4.19/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch b/queue-4.19/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch new file mode 100644 index 00000000000..ae1207e9462 --- /dev/null +++ b/queue-4.19/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Ross Lagerwall +Date: Mon, 14 Jan 2019 09:16:56 +0000 +Subject: openvswitch: Avoid OOB read when parsing flow nlattrs + +From: Ross Lagerwall + +[ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ] + +For nested and variable attributes, the expected length of an attribute +is not known and marked by a negative number. This results in an OOB +read when the expected length is later used to check if the attribute is +all zeros. Fix this by using the actual length of the attribute rather +than the expected length. + +Signed-off-by: Ross Lagerwall +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const st + return -EINVAL; + } + +- if (!nz || !is_all_zero(nla_data(nla), expected_len)) { ++ if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { + attrs |= 1 << type; + a[type] = nla; + } diff --git a/queue-4.19/series b/queue-4.19/series new file mode 100644 index 00000000000..186abb90116 --- /dev/null +++ b/queue-4.19/series @@ -0,0 +1,21 @@ +amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch +net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch +net-fix-usage-of-pskb_trim_rcsum.patch +net-phy-marvell-errata-for-mv88e6390-internal-phys.patch +net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch +net-phy-phy-driver-features-are-mandatory.patch +net-sched-act_tunnel_key-fix-memory-leak-in-case-of-action-replace.patch +net_sched-refetch-skb-protocol-for-each-filter.patch +openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch +vhost-log-dirty-page-correctly.patch +mlxsw-pci-increase-pci-sw-reset-timeout.patch +net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch +mlxsw-spectrum_fid-update-dummy-fid-index.patch +mlxsw-pci-ring-cq-s-doorbell-before-rdq-s.patch +net-sched-cls_flower-allocate-mask-dynamically-in-fl_change.patch +udp-with-udp_segment-release-on-error-path.patch +ip6_gre-fix-tunnel-list-corruption-for-x-netns.patch +erspan-build-the-header-with-the-right-proto-according-to-erspan_ver.patch +net-phy-marvell-fix-deadlock-from-wrong-locking.patch +ip6_gre-update-version-related-info-when-changing-link.patch +tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch diff --git a/queue-4.19/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch b/queue-4.19/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch new file mode 100644 index 00000000000..9d1c397a44f --- /dev/null +++ b/queue-4.19/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Willem de Bruijn +Date: Thu, 10 Jan 2019 14:40:33 -0500 +Subject: tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state + +From: Willem de Bruijn + +[ Upstream commit 13d7f46386e060df31b727c9975e38306fa51e7a ] + +TCP transmission with MSG_ZEROCOPY fails if the peer closes its end of +the connection and so transitions this socket to CLOSE_WAIT state. + +Transmission in close wait state is acceptable. Other similar tests in +the stack (e.g., in FastOpen) accept both states. Relax this test, too. + +Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg276886.html +Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg227390.html +Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") +Reported-by: Marek Majkowski +Signed-off-by: Willem de Bruijn +CC: Yuchung Cheng +CC: Neal Cardwell +CC: Soheil Hassas Yeganeh +CC: Alexey Kodanev +Acked-by: Soheil Hassas Yeganeh +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, + flags = msg->msg_flags; + + if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { +- if (sk->sk_state != TCP_ESTABLISHED) { ++ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { + err = -EINVAL; + goto out_err; + } diff --git a/queue-4.19/udp-with-udp_segment-release-on-error-path.patch b/queue-4.19/udp-with-udp_segment-release-on-error-path.patch new file mode 100644 index 00000000000..3a2d4bab5fa --- /dev/null +++ b/queue-4.19/udp-with-udp_segment-release-on-error-path.patch @@ -0,0 +1,82 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Willem de Bruijn +Date: Tue, 15 Jan 2019 11:40:02 -0500 +Subject: udp: with udp_segment release on error path + +From: Willem de Bruijn + +[ Upstream commit 0f149c9fec3cd720628ecde83bfc6f64c1e7dcb6 ] + +Failure __ip_append_data triggers udp_flush_pending_frames, but these +tests happen later. The skb must be freed directly. + +Fixes: bec1f6f697362 ("udp: generate gso with UDP_SEGMENT") +Reported-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 16 ++++++++++++---- + net/ipv6/udp.c | 16 ++++++++++++---- + 2 files changed, 24 insertions(+), 8 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -785,15 +785,23 @@ static int udp_send_skb(struct sk_buff * + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + +- if (hlen + cork->gso_size > cork->fragsize) ++ if (hlen + cork->gso_size > cork->fragsize) { ++ kfree_skb(skb); + return -EINVAL; +- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) ++ } ++ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { ++ kfree_skb(skb); + return -EINVAL; +- if (sk->sk_no_check_tx) ++ } ++ if (sk->sk_no_check_tx) { ++ kfree_skb(skb); + return -EINVAL; ++ } + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || +- dst_xfrm(skb_dst(skb))) ++ dst_xfrm(skb_dst(skb))) { ++ kfree_skb(skb); + return -EIO; ++ } + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -1056,15 +1056,23 @@ static int udp_v6_send_skb(struct sk_buf + const int hlen = skb_network_header_len(skb) + + sizeof(struct udphdr); + +- if (hlen + cork->gso_size > cork->fragsize) ++ if (hlen + cork->gso_size > cork->fragsize) { ++ kfree_skb(skb); + return -EINVAL; +- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) ++ } ++ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { ++ kfree_skb(skb); + return -EINVAL; +- if (udp_sk(sk)->no_check6_tx) ++ } ++ if (udp_sk(sk)->no_check6_tx) { ++ kfree_skb(skb); + return -EINVAL; ++ } + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || +- dst_xfrm(skb_dst(skb))) ++ dst_xfrm(skb_dst(skb))) { ++ kfree_skb(skb); + return -EIO; ++ } + + skb_shinfo(skb)->gso_size = cork->gso_size; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; diff --git a/queue-4.19/vhost-log-dirty-page-correctly.patch b/queue-4.19/vhost-log-dirty-page-correctly.patch new file mode 100644 index 00000000000..96d050bc1bc --- /dev/null +++ b/queue-4.19/vhost-log-dirty-page-correctly.patch @@ -0,0 +1,202 @@ +From foo@baz Sat Jan 26 10:22:29 CET 2019 +From: Jason Wang +Date: Wed, 16 Jan 2019 16:54:42 +0800 +Subject: vhost: log dirty page correctly + +From: Jason Wang + +[ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] + +Vhost dirty page logging API is designed to sync through GPA. But we +try to log GIOVA when device IOTLB is enabled. This is wrong and may +lead to missing data after migration. + +To solve this issue, when logging with device IOTLB enabled, we will: + +1) reuse the device IOTLB translation result of GIOVA->HVA mapping to + get HVA, for writable descriptor, get HVA through iovec. For used + ring update, translate its GIOVA to HVA +2) traverse the GPA->HVA mapping to get the possible GPA and log + through GPA. Pay attention this reverse mapping is not guaranteed + to be unique, so we should log each possible GPA in this case. + +This fix the failure of scp to guest during migration. In -next, we +will probably support passing GIOVA->GPA instead of GIOVA->HVA. + +Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") +Reported-by: Jintack Lim +Cc: Jintack Lim +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 + + drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- + drivers/vhost/vhost.h | 3 + + 3 files changed, 87 insertions(+), 16 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -1024,7 +1024,8 @@ static void handle_rx(struct vhost_net * + if (nvq->done_idx > VHOST_NET_BATCH) + vhost_net_signal_used(nvq); + if (unlikely(vq_log)) +- vhost_log_write(vq, vq_log, log, vhost_len); ++ vhost_log_write(vq, vq_log, log, vhost_len, ++ vq->iov, in); + total_len += vhost_len; + if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { + vhost_poll_queue(&vq->poll); +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -1733,13 +1733,87 @@ static int log_write(void __user *log_ba + return r; + } + ++static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) ++{ ++ struct vhost_umem *umem = vq->umem; ++ struct vhost_umem_node *u; ++ u64 start, end, l, min; ++ int r; ++ bool hit = false; ++ ++ while (len) { ++ min = len; ++ /* More than one GPAs can be mapped into a single HVA. So ++ * iterate all possible umems here to be safe. ++ */ ++ list_for_each_entry(u, &umem->umem_list, link) { ++ if (u->userspace_addr > hva - 1 + len || ++ u->userspace_addr - 1 + u->size < hva) ++ continue; ++ start = max(u->userspace_addr, hva); ++ end = min(u->userspace_addr - 1 + u->size, ++ hva - 1 + len); ++ l = end - start + 1; ++ r = log_write(vq->log_base, ++ u->start + start - u->userspace_addr, ++ l); ++ if (r < 0) ++ return r; ++ hit = true; ++ min = min(l, min); ++ } ++ ++ if (!hit) ++ return -EFAULT; ++ ++ len -= min; ++ hva += min; ++ } ++ ++ return 0; ++} ++ ++static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) ++{ ++ struct iovec iov[64]; ++ int i, ret; ++ ++ if (!vq->iotlb) ++ return log_write(vq->log_base, vq->log_addr + used_offset, len); ++ ++ ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, ++ len, iov, 64, VHOST_ACCESS_WO); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < ret; i++) { ++ ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len) ++ unsigned int log_num, u64 len, struct iovec *iov, int count) + { + int i, r; + + /* Make sure data written is seen before log. */ + smp_wmb(); ++ ++ if (vq->iotlb) { ++ for (i = 0; i < count; i++) { ++ r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (r < 0) ++ return r; ++ } ++ return 0; ++ } ++ + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); +@@ -1769,9 +1843,8 @@ static int vhost_update_used_flags(struc + smp_wmb(); + /* Log used flag write. */ + used = &vq->used->flags; +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof vq->used->flags); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof vq->used->flags); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -1789,9 +1862,8 @@ static int vhost_update_avail_event(stru + smp_wmb(); + /* Log avail event write */ + used = vhost_avail_event(vq); +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof *vhost_avail_event(vq)); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof *vhost_avail_event(vq)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -2191,10 +2263,8 @@ static int __vhost_add_used_n(struct vho + /* Make sure data is seen before log. */ + smp_wmb(); + /* Log used ring entry write. */ +- log_write(vq->log_base, +- vq->log_addr + +- ((void __user *)used - (void __user *)vq->used), +- count * sizeof *used); ++ log_used(vq, ((void __user *)used - (void __user *)vq->used), ++ count * sizeof *used); + } + old = vq->last_used_idx; + new = (vq->last_used_idx += count); +@@ -2236,9 +2306,8 @@ int vhost_add_used_n(struct vhost_virtqu + /* Make sure used idx is seen before log. */ + smp_wmb(); + /* Log used index update. */ +- log_write(vq->log_base, +- vq->log_addr + offsetof(struct vring_used, idx), +- sizeof vq->used->idx); ++ log_used(vq, offsetof(struct vring_used, idx), ++ sizeof vq->used->idx); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +--- a/drivers/vhost/vhost.h ++++ b/drivers/vhost/vhost.h +@@ -205,7 +205,8 @@ bool vhost_vq_avail_empty(struct vhost_d + bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); + + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len); ++ unsigned int log_num, u64 len, ++ struct iovec *iov, int count); + int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + + struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);