From: Greg Kroah-Hartman Date: Sat, 26 Jan 2019 09:51:25 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.9.154~71 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6ff85d3ee93971b8a47acec433e7a4352d61a910;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch net-fix-usage-of-pskb_trim_rcsum.patch net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch net_sched-refetch-skb-protocol-for-each-filter.patch openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch vhost-log-dirty-page-correctly.patch --- diff --git a/queue-4.14/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch b/queue-4.14/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch new file mode 100644 index 00000000000..d954fbd3274 --- /dev/null +++ b/queue-4.14/amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch @@ -0,0 +1,90 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: "Lendacky, Thomas" +Date: Thu, 17 Jan 2019 14:20:14 +0000 +Subject: amd-xgbe: Fix mdio access for non-zero ports and clause 45 PHYs + +From: "Lendacky, Thomas" + +[ Upstream commit 5ab3121beeb76aa6090195b67d237115860dd9ec ] + +The XGBE hardware has support for performing MDIO operations using an +MDIO command request. The driver mistakenly uses the mdio port address +as the MDIO command request device address instead of the MDIO command +request port address. Additionally, the driver does not properly check +for and create a clause 45 MDIO command. + +Check the supplied MDIO register to determine if the request is a clause +45 operation (MII_ADDR_C45). For a clause 45 operation, extract the device +address and register number from the supplied MDIO register and use them +to set the MDIO command request device address and register number fields. +For a clause 22 operation, the MDIO request device address is set to zero +and the MDIO command request register number is set to the supplied MDIO +register. In either case, the supplied MDIO port address is used as the +MDIO command request port address. + +Fixes: 732f2ab7afb9 ("amd-xgbe: Add support for MDIO attached PHYs") +Signed-off-by: Tom Lendacky +Tested-by: Shyam Sundar S K +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amd/xgbe/xgbe-common.h | 2 -- + drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 22 ++++++++++++++++------ + 2 files changed, 16 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h +@@ -431,8 +431,6 @@ + #define MAC_MDIOSCAR_PA_WIDTH 5 + #define MAC_MDIOSCAR_RA_INDEX 0 + #define MAC_MDIOSCAR_RA_WIDTH 16 +-#define MAC_MDIOSCAR_REG_INDEX 0 +-#define MAC_MDIOSCAR_REG_WIDTH 21 + #define MAC_MDIOSCCDR_BUSY_INDEX 22 + #define MAC_MDIOSCCDR_BUSY_WIDTH 1 + #define MAC_MDIOSCCDR_CMD_INDEX 16 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +@@ -1284,6 +1284,20 @@ static void xgbe_write_mmd_regs(struct x + } + } + ++static unsigned int xgbe_create_mdio_sca(int port, int reg) ++{ ++ unsigned int mdio_sca, da; ++ ++ da = (reg & MII_ADDR_C45) ? reg >> 16 : 0; ++ ++ mdio_sca = 0; ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, RA, reg); ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, PA, port); ++ XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, da); ++ ++ return mdio_sca; ++} ++ + static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr, + int reg, u16 val) + { +@@ -1291,9 +1305,7 @@ static int xgbe_write_ext_mii_regs(struc + + reinit_completion(&pdata->mdio_complete); + +- mdio_sca = 0; +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); ++ mdio_sca = xgbe_create_mdio_sca(addr, reg); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; +@@ -1317,9 +1329,7 @@ static int xgbe_read_ext_mii_regs(struct + + reinit_completion(&pdata->mdio_complete); + +- mdio_sca = 0; +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg); +- XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr); ++ mdio_sca = xgbe_create_mdio_sca(addr, reg); + XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca); + + mdio_sccd = 0; diff --git a/queue-4.14/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch b/queue-4.14/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch new file mode 100644 index 00000000000..f10c0f347cf --- /dev/null +++ b/queue-4.14/net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch @@ -0,0 +1,69 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Yunjian Wang +Date: Thu, 17 Jan 2019 09:46:41 +0800 +Subject: net: bridge: Fix ethernet header pointer before check skb forwardable + +From: Yunjian Wang + +[ Upstream commit 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 ] + +The skb header should be set to ethernet header before using +is_skb_forwardable. Because the ethernet header length has been +considered in is_skb_forwardable(including dev->hard_header_len +length). + +To reproduce the issue: +1, add 2 ports on linux bridge br using following commands: +$ brctl addbr br +$ brctl addif br eth0 +$ brctl addif br eth1 +2, the MTU of eth0 and eth1 is 1500 +3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) +from eth0 to eth1 + +So the expect result is packet larger than 1500 cannot pass through +eth0 and eth1. But currently, the packet passes through success, it +means eth1's MTU limit doesn't take effect. + +Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") +Cc: bridge@lists.linux-foundation.org +Cc: Nkolay Aleksandrov +Cc: Roopa Prabhu +Cc: Stephen Hemminger +Signed-off-by: Yunjian Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_forward.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/net/bridge/br_forward.c ++++ b/net/bridge/br_forward.c +@@ -35,10 +35,10 @@ static inline int should_deliver(const s + + int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) + { ++ skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + +- skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && +@@ -96,12 +96,11 @@ static void __br_forward(const struct ne + net = dev_net(indev); + } else { + if (unlikely(netpoll_tx_running(to->br->dev))) { +- if (!is_skb_forwardable(skb->dev, skb)) { ++ skb_push(skb, ETH_HLEN); ++ if (!is_skb_forwardable(skb->dev, skb)) + kfree_skb(skb); +- } else { +- skb_push(skb, ETH_HLEN); ++ else + br_netpoll_send_skb(to, skb); +- } + return; + } + br_hook = NF_BR_LOCAL_OUT; diff --git a/queue-4.14/net-fix-usage-of-pskb_trim_rcsum.patch b/queue-4.14/net-fix-usage-of-pskb_trim_rcsum.patch new file mode 100644 index 00000000000..54921ae6855 --- /dev/null +++ b/queue-4.14/net-fix-usage-of-pskb_trim_rcsum.patch @@ -0,0 +1,75 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Ross Lagerwall +Date: Thu, 17 Jan 2019 15:34:38 +0000 +Subject: net: Fix usage of pskb_trim_rcsum + +From: Ross Lagerwall + +[ Upstream commit 6c57f0458022298e4da1729c67bd33ce41c14e7a ] + +In certain cases, pskb_trim_rcsum() may change skb pointers. +Reinitialize header pointers afterwards to avoid potential +use-after-frees. Add a note in the documentation of +pskb_trim_rcsum(). Found by KASAN. + +Signed-off-by: Ross Lagerwall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 1 + + include/linux/skbuff.h | 1 + + net/bridge/br_netfilter_ipv6.c | 1 + + net/bridge/netfilter/nft_reject_bridge.c | 1 + + net/ipv4/ip_input.c | 1 + + 5 files changed, 5 insertions(+) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb + if (pskb_trim_rcsum(skb, len)) + goto drop; + ++ ph = pppoe_hdr(skb); + pn = pppoe_pernet(dev_net(dev)); + + /* Note that get_item does a sock_hold(), so sk_pppox(po) +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -3163,6 +3163,7 @@ int pskb_trim_rcsum_slow(struct sk_buff + * + * This is exactly the same as pskb_trim except that it ensures the + * checksum of received packets are still valid after the operation. ++ * It can change skb pointers. + */ + + static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) +--- a/net/bridge/br_netfilter_ipv6.c ++++ b/net/bridge/br_netfilter_ipv6.c +@@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, st + IPSTATS_MIB_INDISCARDS); + goto drop; + } ++ hdr = ipv6_hdr(skb); + } + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + goto drop; +--- a/net/bridge/netfilter/nft_reject_bridge.c ++++ b/net/bridge/netfilter/nft_reject_bridge.c +@@ -230,6 +230,7 @@ static bool reject6_br_csum_ok(struct sk + pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) + return false; + ++ ip6h = ipv6_hdr(skb); + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -481,6 +481,7 @@ int ip_rcv(struct sk_buff *skb, struct n + goto drop; + } + ++ iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl*4; + + /* Remove any debris in the socket control block */ diff --git a/queue-4.14/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch b/queue-4.14/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch new file mode 100644 index 00000000000..4dc256052b6 --- /dev/null +++ b/queue-4.14/net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch @@ -0,0 +1,148 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Ido Schimmel +Date: Wed, 9 Jan 2019 09:57:39 +0000 +Subject: net: ipv4: Fix memory leak in network namespace dismantle + +From: Ido Schimmel + +[ Upstream commit f97f4dd8b3bb9d0993d2491e0f22024c68109184 ] + +IPv4 routing tables are flushed in two cases: + +1. In response to events in the netdev and inetaddr notification chains +2. When a network namespace is being dismantled + +In both cases only routes associated with a dead nexthop group are +flushed. However, a nexthop group will only be marked as dead in case it +is populated with actual nexthops using a nexthop device. This is not +the case when the route in question is an error route (e.g., +'blackhole', 'unreachable'). + +Therefore, when a network namespace is being dismantled such routes are +not flushed and leaked [1]. + +To reproduce: +# ip netns add blue +# ip -n blue route add unreachable 192.0.2.0/24 +# ip netns del blue + +Fix this by not skipping error routes that are not marked with +RTNH_F_DEAD when flushing the routing tables. + +To prevent the flushing of such routes in case #1, add a parameter to +fib_table_flush() that indicates if the table is flushed as part of +namespace dismantle or not. + +Note that this problem does not exist in IPv6 since error routes are +associated with the loopback device. + +[1] +unreferenced object 0xffff888066650338 (size 56): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... + e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ + backtrace: + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff +unreferenced object 0xffff888061621c88 (size 48): + comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) + hex dump (first 32 bytes): + 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk + 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... + backtrace: + [<00000000733609e3>] fib_table_insert+0x978/0x1500 + [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 + [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 + [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 + [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 + [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 + [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 + [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 + [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 + [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 + [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe + [<000000003a8b605b>] 0xffffffffffffffff + +Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") +Signed-off-by: Ido Schimmel +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip_fib.h | 2 +- + net/ipv4/fib_frontend.c | 4 ++-- + net/ipv4/fib_trie.c | 15 ++++++++++++--- + 3 files changed, 15 insertions(+), 6 deletions(-) + +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -233,7 +233,7 @@ int fib_table_delete(struct net *, struc + struct netlink_ext_ack *extack); + int fib_table_dump(struct fib_table *table, struct sk_buff *skb, + struct netlink_callback *cb); +-int fib_table_flush(struct net *net, struct fib_table *table); ++int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); + struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); + void fib_table_flush_external(struct fib_table *table); + void fib_free_table(struct fib_table *tb); +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -193,7 +193,7 @@ static void fib_flush(struct net *net) + struct fib_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) +- flushed += fib_table_flush(net, tb); ++ flushed += fib_table_flush(net, tb, false); + } + + if (flushed) +@@ -1299,7 +1299,7 @@ static void ip_fib_net_exit(struct net * + + hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { + hlist_del(&tb->tb_hlist); +- fib_table_flush(net, tb); ++ fib_table_flush(net, tb, true); + fib_free_table(tb); + } + } +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1836,7 +1836,7 @@ void fib_table_flush_external(struct fib + } + + /* Caller must hold RTNL. */ +-int fib_table_flush(struct net *net, struct fib_table *tb) ++int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) + { + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; +@@ -1884,8 +1884,17 @@ int fib_table_flush(struct net *net, str + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + +- if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || +- tb->tb_id != fa->tb_id) { ++ if (!fi || tb->tb_id != fa->tb_id || ++ (!(fi->fib_flags & RTNH_F_DEAD) && ++ !fib_props[fa->fa_type].error)) { ++ slen = fa->fa_slen; ++ continue; ++ } ++ ++ /* Do not flush error routes if network namespace is ++ * not being dismantled ++ */ ++ if (!flush_all && fib_props[fa->fa_type].error) { + slen = fa->fa_slen; + continue; + } diff --git a/queue-4.14/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch b/queue-4.14/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch new file mode 100644 index 00000000000..ab78a9d9057 --- /dev/null +++ b/queue-4.14/net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch @@ -0,0 +1,92 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Thomas Petazzoni +Date: Wed, 16 Jan 2019 10:53:58 +0100 +Subject: net: phy: mdio_bus: add missing device_del() in mdiobus_register() error handling + +From: Thomas Petazzoni + +[ Upstream commit e40e2a2e78664fa90ea4b9bdf4a84efce2fea9d9 ] + +The current code in __mdiobus_register() doesn't properly handle +failures returned by the devm_gpiod_get_optional() call: it returns +immediately, without unregistering the device that was added by the +call to device_register() earlier in the function. + +This leaves a stale device, which then causes a NULL pointer +dereference in the code that handles deferred probing: + +[ 1.489982] Unable to handle kernel NULL pointer dereference at virtual address 00000074 +[ 1.498110] pgd = (ptrval) +[ 1.500838] [00000074] *pgd=00000000 +[ 1.504432] Internal error: Oops: 17 [#1] SMP ARM +[ 1.509133] Modules linked in: +[ 1.512192] CPU: 1 PID: 51 Comm: kworker/1:3 Not tainted 4.20.0-00039-g3b73a4cc8b3e-dirty #99 +[ 1.520708] Hardware name: Xilinx Zynq Platform +[ 1.525261] Workqueue: events deferred_probe_work_func +[ 1.530403] PC is at klist_next+0x10/0xfc +[ 1.534403] LR is at device_for_each_child+0x40/0x94 +[ 1.539361] pc : [] lr : [] psr: 200e0013 +[ 1.545628] sp : ceeefe68 ip : 00000001 fp : ffffe000 +[ 1.550863] r10: 00000000 r9 : c0c66790 r8 : 00000000 +[ 1.556079] r7 : c0457d44 r6 : 00000000 r5 : ceeefe8c r4 : cfa2ec78 +[ 1.562604] r3 : 00000064 r2 : c0457d44 r1 : ceeefe8c r0 : 00000064 +[ 1.569129] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none +[ 1.576263] Control: 18c5387d Table: 0ed7804a DAC: 00000051 +[ 1.582013] Process kworker/1:3 (pid: 51, stack limit = 0x(ptrval)) +[ 1.588280] Stack: (0xceeefe68 to 0xceef0000) +[ 1.592630] fe60: cfa2ec78 c0c03c08 00000000 c0457d44 00000000 c0c66790 +[ 1.600814] fe80: 00000000 c0455d90 ceeefeac 00000064 00000000 0d7a542e cee9d494 cfa2ec78 +[ 1.608998] fea0: cfa2ec78 00000000 c0457d44 c0457d7c cee9d494 c0c03c08 00000000 c0455dac +[ 1.617182] fec0: cf98ba44 cf926a00 cee9d494 0d7a542e 00000000 cf935a10 cf935a10 cf935a10 +[ 1.625366] fee0: c0c4e9b8 c0457d7c c0c4e80c 00000001 cf935a10 c0457df4 cf935a10 c0c4e99c +[ 1.633550] ff00: c0c4e99c c045a27c c0c4e9c4 ced63f80 cfde8a80 cfdebc00 00000000 c013893c +[ 1.641734] ff20: cfde8a80 cfde8a80 c07bd354 ced63f80 ced63f94 cfde8a80 00000008 c0c02d00 +[ 1.649936] ff40: cfde8a98 cfde8a80 ffffe000 c0139a30 ffffe000 c0c6624a c07bd354 00000000 +[ 1.658120] ff60: ffffe000 cee9e780 ceebfe00 00000000 ceeee000 ced63f80 c0139788 cf8cdea4 +[ 1.666304] ff80: cee9e79c c013e598 00000001 ceebfe00 c013e44c 00000000 00000000 00000000 +[ 1.674488] ffa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000 +[ 1.682671] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +[ 1.690855] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 +[ 1.699058] [] (klist_next) from [] (device_for_each_child+0x40/0x94) +[ 1.707241] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) +[ 1.716476] [] (device_reorder_to_tail) from [] (device_for_each_child+0x5c/0x94) +[ 1.725692] [] (device_for_each_child) from [] (device_reorder_to_tail+0x38/0x88) +[ 1.734927] [] (device_reorder_to_tail) from [] (device_pm_move_to_tail+0x28/0x40) +[ 1.744235] [] (device_pm_move_to_tail) from [] (deferred_probe_work_func+0x58/0x8c) +[ 1.753746] [] (deferred_probe_work_func) from [] (process_one_work+0x210/0x4fc) +[ 1.762888] [] (process_one_work) from [] (worker_thread+0x2a8/0x5c0) +[ 1.771072] [] (worker_thread) from [] (kthread+0x14c/0x154) +[ 1.778482] [] (kthread) from [] (ret_from_fork+0x14/0x2c) +[ 1.785689] Exception stack(0xceeeffb0 to 0xceeefff8) +[ 1.790739] ffa0: 00000000 00000000 00000000 00000000 +[ 1.798923] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +[ 1.807107] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 +[ 1.813724] Code: e92d47f0 e1a05000 e8900048 e1a00003 (e5937010) +[ 1.819844] ---[ end trace 3c2c0c8b65399ec9 ]--- + +The actual error that we had from devm_gpiod_get_optional() was +-EPROBE_DEFER, due to the GPIO being provided by a driver that is +probed later than the Ethernet controller driver. + +To fix this, we simply add the missing device_del() invocation in the +error path. + +Fixes: 69226896ad636 ("mdio_bus: Issue GPIO RESET to PHYs") +Signed-off-by: Thomas Petazzoni +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/mdio_bus.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/phy/mdio_bus.c ++++ b/drivers/net/phy/mdio_bus.c +@@ -358,6 +358,7 @@ int __mdiobus_register(struct mii_bus *b + if (IS_ERR(gpiod)) { + dev_err(&bus->dev, "mii_bus %s couldn't get reset GPIO\n", + bus->id); ++ device_del(&bus->dev); + return PTR_ERR(gpiod); + } else if (gpiod) { + bus->reset_gpiod = gpiod; diff --git a/queue-4.14/net_sched-refetch-skb-protocol-for-each-filter.patch b/queue-4.14/net_sched-refetch-skb-protocol-for-each-filter.patch new file mode 100644 index 00000000000..5e1f0137632 --- /dev/null +++ b/queue-4.14/net_sched-refetch-skb-protocol-for-each-filter.patch @@ -0,0 +1,60 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Cong Wang +Date: Fri, 11 Jan 2019 18:55:42 -0800 +Subject: net_sched: refetch skb protocol for each filter + +From: Cong Wang + +[ Upstream commit cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 ] + +Martin reported a set of filters don't work after changing +from reclassify to continue. Looking into the code, it +looks like skb protocol is not always fetched for each +iteration of the filters. But, as demonstrated by Martin, +TC actions could modify skb->protocol, for example act_vlan, +this means we have to refetch skb protocol in each iteration, +rather than using the one we fetch in the beginning of the loop. + +This bug is _not_ introduced by commit 3b3ae880266d +("net: sched: consolidate tc_classify{,_compat}"), technically, +if act_vlan is the only action that modifies skb protocol, then +it is commit c7e2b9689ef8 ("sched: introduce vlan action") which +introduced this bug. + +Reported-by: Martin Olsson +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Acked-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -318,7 +318,6 @@ EXPORT_SYMBOL(tcf_block_put); + int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) + { +- __be16 protocol = tc_skb_protocol(skb); + #ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; + const struct tcf_proto *orig_tp = tp; +@@ -328,6 +327,7 @@ int tcf_classify(struct sk_buff *skb, co + reclassify: + #endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { ++ __be16 protocol = tc_skb_protocol(skb); + int err; + + if (tp->protocol != protocol && +@@ -359,7 +359,6 @@ reset: + } + + tp = first_tp; +- protocol = tc_skb_protocol(skb); + goto reclassify; + #endif + } diff --git a/queue-4.14/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch b/queue-4.14/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch new file mode 100644 index 00000000000..0642e914a8b --- /dev/null +++ b/queue-4.14/openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch @@ -0,0 +1,34 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Ross Lagerwall +Date: Mon, 14 Jan 2019 09:16:56 +0000 +Subject: openvswitch: Avoid OOB read when parsing flow nlattrs + +From: Ross Lagerwall + +[ Upstream commit 04a4af334b971814eedf4e4a413343ad3287d9a9 ] + +For nested and variable attributes, the expected length of an attribute +is not known and marked by a negative number. This results in an OOB +read when the expected length is later used to check if the attribute is +all zeros. Fix this by using the actual length of the attribute rather +than the expected length. + +Signed-off-by: Ross Lagerwall +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -459,7 +459,7 @@ static int __parse_flow_nlattrs(const st + return -EINVAL; + } + +- if (!nz || !is_all_zero(nla_data(nla), expected_len)) { ++ if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { + attrs |= 1 << type; + a[type] = nla; + } diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..b88600c35c1 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,9 @@ +amd-xgbe-fix-mdio-access-for-non-zero-ports-and-clause-45-phys.patch +net-bridge-fix-ethernet-header-pointer-before-check-skb-forwardable.patch +net-fix-usage-of-pskb_trim_rcsum.patch +net-phy-mdio_bus-add-missing-device_del-in-mdiobus_register-error-handling.patch +net_sched-refetch-skb-protocol-for-each-filter.patch +openvswitch-avoid-oob-read-when-parsing-flow-nlattrs.patch +vhost-log-dirty-page-correctly.patch +net-ipv4-fix-memory-leak-in-network-namespace-dismantle.patch +tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch diff --git a/queue-4.14/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch b/queue-4.14/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch new file mode 100644 index 00000000000..94e855eb146 --- /dev/null +++ b/queue-4.14/tcp-allow-msg_zerocopy-transmission-also-in-close_wait-state.patch @@ -0,0 +1,43 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Willem de Bruijn +Date: Thu, 10 Jan 2019 14:40:33 -0500 +Subject: tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state + +From: Willem de Bruijn + +[ Upstream commit 13d7f46386e060df31b727c9975e38306fa51e7a ] + +TCP transmission with MSG_ZEROCOPY fails if the peer closes its end of +the connection and so transitions this socket to CLOSE_WAIT state. + +Transmission in close wait state is acceptable. Other similar tests in +the stack (e.g., in FastOpen) accept both states. Relax this test, too. + +Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg276886.html +Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg227390.html +Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") +Reported-by: Marek Majkowski +Signed-off-by: Willem de Bruijn +CC: Yuchung Cheng +CC: Neal Cardwell +CC: Soheil Hassas Yeganeh +CC: Alexey Kodanev +Acked-by: Soheil Hassas Yeganeh +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1178,7 +1178,7 @@ int tcp_sendmsg_locked(struct sock *sk, + flags = msg->msg_flags; + + if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { +- if (sk->sk_state != TCP_ESTABLISHED) { ++ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { + err = -EINVAL; + goto out_err; + } diff --git a/queue-4.14/vhost-log-dirty-page-correctly.patch b/queue-4.14/vhost-log-dirty-page-correctly.patch new file mode 100644 index 00000000000..feca7822b05 --- /dev/null +++ b/queue-4.14/vhost-log-dirty-page-correctly.patch @@ -0,0 +1,202 @@ +From foo@baz Sat Jan 26 10:34:35 CET 2019 +From: Jason Wang +Date: Wed, 16 Jan 2019 16:54:42 +0800 +Subject: vhost: log dirty page correctly + +From: Jason Wang + +[ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] + +Vhost dirty page logging API is designed to sync through GPA. But we +try to log GIOVA when device IOTLB is enabled. This is wrong and may +lead to missing data after migration. + +To solve this issue, when logging with device IOTLB enabled, we will: + +1) reuse the device IOTLB translation result of GIOVA->HVA mapping to + get HVA, for writable descriptor, get HVA through iovec. For used + ring update, translate its GIOVA to HVA +2) traverse the GPA->HVA mapping to get the possible GPA and log + through GPA. Pay attention this reverse mapping is not guaranteed + to be unique, so we should log each possible GPA in this case. + +This fix the failure of scp to guest during migration. In -next, we +will probably support passing GIOVA->GPA instead of GIOVA->HVA. + +Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") +Reported-by: Jintack Lim +Cc: Jintack Lim +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 3 + + drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- + drivers/vhost/vhost.h | 3 + + 3 files changed, 87 insertions(+), 16 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -851,7 +851,8 @@ static void handle_rx(struct vhost_net * + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + headcount); + if (unlikely(vq_log)) +- vhost_log_write(vq, vq_log, log, vhost_len); ++ vhost_log_write(vq, vq_log, log, vhost_len, ++ vq->iov, in); + total_len += vhost_len; + if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + vhost_poll_queue(&vq->poll); +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -1726,13 +1726,87 @@ static int log_write(void __user *log_ba + return r; + } + ++static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) ++{ ++ struct vhost_umem *umem = vq->umem; ++ struct vhost_umem_node *u; ++ u64 start, end, l, min; ++ int r; ++ bool hit = false; ++ ++ while (len) { ++ min = len; ++ /* More than one GPAs can be mapped into a single HVA. So ++ * iterate all possible umems here to be safe. ++ */ ++ list_for_each_entry(u, &umem->umem_list, link) { ++ if (u->userspace_addr > hva - 1 + len || ++ u->userspace_addr - 1 + u->size < hva) ++ continue; ++ start = max(u->userspace_addr, hva); ++ end = min(u->userspace_addr - 1 + u->size, ++ hva - 1 + len); ++ l = end - start + 1; ++ r = log_write(vq->log_base, ++ u->start + start - u->userspace_addr, ++ l); ++ if (r < 0) ++ return r; ++ hit = true; ++ min = min(l, min); ++ } ++ ++ if (!hit) ++ return -EFAULT; ++ ++ len -= min; ++ hva += min; ++ } ++ ++ return 0; ++} ++ ++static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) ++{ ++ struct iovec iov[64]; ++ int i, ret; ++ ++ if (!vq->iotlb) ++ return log_write(vq->log_base, vq->log_addr + used_offset, len); ++ ++ ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, ++ len, iov, 64, VHOST_ACCESS_WO); ++ if (ret) ++ return ret; ++ ++ for (i = 0; i < ret; i++) { ++ ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len) ++ unsigned int log_num, u64 len, struct iovec *iov, int count) + { + int i, r; + + /* Make sure data written is seen before log. */ + smp_wmb(); ++ ++ if (vq->iotlb) { ++ for (i = 0; i < count; i++) { ++ r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, ++ iov[i].iov_len); ++ if (r < 0) ++ return r; ++ } ++ return 0; ++ } ++ + for (i = 0; i < log_num; ++i) { + u64 l = min(log[i].len, len); + r = log_write(vq->log_base, log[i].addr, l); +@@ -1762,9 +1836,8 @@ static int vhost_update_used_flags(struc + smp_wmb(); + /* Log used flag write. */ + used = &vq->used->flags; +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof vq->used->flags); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof vq->used->flags); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -1782,9 +1855,8 @@ static int vhost_update_avail_event(stru + smp_wmb(); + /* Log avail event write */ + used = vhost_avail_event(vq); +- log_write(vq->log_base, vq->log_addr + +- (used - (void __user *)vq->used), +- sizeof *vhost_avail_event(vq)); ++ log_used(vq, (used - (void __user *)vq->used), ++ sizeof *vhost_avail_event(vq)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +@@ -2189,10 +2261,8 @@ static int __vhost_add_used_n(struct vho + /* Make sure data is seen before log. */ + smp_wmb(); + /* Log used ring entry write. */ +- log_write(vq->log_base, +- vq->log_addr + +- ((void __user *)used - (void __user *)vq->used), +- count * sizeof *used); ++ log_used(vq, ((void __user *)used - (void __user *)vq->used), ++ count * sizeof *used); + } + old = vq->last_used_idx; + new = (vq->last_used_idx += count); +@@ -2234,9 +2304,8 @@ int vhost_add_used_n(struct vhost_virtqu + /* Make sure used idx is seen before log. */ + smp_wmb(); + /* Log used index update. */ +- log_write(vq->log_base, +- vq->log_addr + offsetof(struct vring_used, idx), +- sizeof vq->used->idx); ++ log_used(vq, offsetof(struct vring_used, idx), ++ sizeof vq->used->idx); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); + } +--- a/drivers/vhost/vhost.h ++++ b/drivers/vhost/vhost.h +@@ -208,7 +208,8 @@ bool vhost_vq_avail_empty(struct vhost_d + bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); + + int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, +- unsigned int log_num, u64 len); ++ unsigned int log_num, u64 len, ++ struct iovec *iov, int count); + int vq_iotlb_prefetch(struct vhost_virtqueue *vq); + + struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);