From: Greg Kroah-Hartman Date: Wed, 21 Sep 2016 11:06:42 +0000 (+0200) Subject: potential 4.4 network patches... X-Git-Tag: v4.4.22~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ffb6a34649d0f6612f912558861bedc6de25070d;p=thirdparty%2Fkernel%2Fstable-queue.git potential 4.4 network patches... --- diff --git a/net-4.4/bonding-fix-bonding-crash.patch b/net-4.4/bonding-fix-bonding-crash.patch new file mode 100644 index 00000000000..73d1a4889f5 --- /dev/null +++ b/net-4.4/bonding-fix-bonding-crash.patch @@ -0,0 +1,93 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Mahesh Bandewar +Date: Thu, 1 Sep 2016 22:18:34 -0700 +Subject: bonding: Fix bonding crash + +From: Mahesh Bandewar + + +[ Upstream commit 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c ] + +Following few steps will crash kernel - + + (a) Create bonding master + > modprobe bonding miimon=50 + (b) Create macvlan bridge on eth2 + > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \ + type macvlan + (c) Now try adding eth2 into the bond + > echo +eth2 > /sys/class/net/bond0/bonding/slaves + + +Bonding does lots of things before checking if the device enslaved is +busy or not. + +In this case when the notifier call-chain sends notifications, the +bond_netdev_event() assumes that the rx_handler /rx_handler_data is +registered while the bond_enslave() hasn't progressed far enough to +register rx_handler for the new slave. + +This patch adds a rx_handler check that can be performed right at the +beginning of the enslave code to avoid getting into this situation. + +Signed-off-by: Mahesh Bandewar +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 7 ++++--- + include/linux/netdevice.h | 1 + + net/core/dev.c | 16 ++++++++++++++++ + 3 files changed, 21 insertions(+), 3 deletions(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1317,9 +1317,10 @@ int bond_enslave(struct net_device *bond + slave_dev->name); + } + +- /* already enslaved */ +- if (slave_dev->flags & IFF_SLAVE) { +- netdev_dbg(bond_dev, "Error: Device was already enslaved\n"); ++ /* already in-use? */ ++ if (netdev_is_rx_handler_busy(slave_dev)) { ++ netdev_err(bond_dev, ++ "Error: Device is in use and cannot be enslaved\n"); + return -EBUSY; + } + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3037,6 +3037,7 @@ static inline void napi_free_frags(struc + napi->skb = NULL; + } + ++bool netdev_is_rx_handler_busy(struct net_device *dev); + int netdev_rx_handler_register(struct net_device *dev, + rx_handler_func_t *rx_handler, + void *rx_handler_data); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3722,6 +3722,22 @@ static inline struct sk_buff *handle_ing + } + + /** ++ * netdev_is_rx_handler_busy - check if receive handler is registered ++ * @dev: device to check ++ * ++ * Check if a receive handler is already registered for a given device. ++ * Return true if there one. ++ * ++ * The caller must hold the rtnl_mutex. ++ */ ++bool netdev_is_rx_handler_busy(struct net_device *dev) ++{ ++ ASSERT_RTNL(); ++ return dev && rtnl_dereference(dev->rx_handler); ++} ++EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy); ++ ++/** + * netdev_rx_handler_register - register receive handler + * @dev: device to register a handler for + * @rx_handler: receive handler to register diff --git a/net-4.4/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch b/net-4.4/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch new file mode 100644 index 00000000000..537516c614c --- /dev/null +++ b/net-4.4/bridge-re-introduce-fix-parsing-of-mldv2-reports.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Davide Caratti +Date: Wed, 31 Aug 2016 14:16:44 +0200 +Subject: bridge: re-introduce 'fix parsing of MLDv2 reports' + +From: Davide Caratti + + +[ Upstream commit 9264251ee2a55bce8fb93826b3f581fb9eb7e2c2 ] + +commit bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with +INCLUDE and no sources as a leave") seems to have accidentally reverted +commit 47cc84ce0c2f ("bridge: fix parsing of MLDv2 reports"). This +commit brings back a change to br_ip6_multicast_mld2_report() where +parsing of MLDv2 reports stops when the first group is successfully +added to the MDB cache. + +Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave") +Signed-off-by: Davide Caratti +Acked-by: Nikolay Aleksandrov +Acked-by: Thadeu Lima de Souza Cascardo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1113,7 +1113,7 @@ static int br_ip6_multicast_mld2_report( + } else { + err = br_ip6_multicast_add_group(br, port, + &grec->grec_mca, vid); +- if (!err) ++ if (err) + break; + } + } diff --git a/net-4.4/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch b/net-4.4/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch new file mode 100644 index 00000000000..15aefb9d432 --- /dev/null +++ b/net-4.4/ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch @@ -0,0 +1,101 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: David Forster +Date: Wed, 3 Aug 2016 15:13:01 +0100 +Subject: ipv4: panic in leaf_walk_rcu due to stale node pointer + +From: David Forster + + +[ Upstream commit 94d9f1c5906b20053efe375b6d66610bca4b8b64 ] + +Panic occurs when issuing "cat /proc/net/route" whilst +populating FIB with > 1M routes. + +Use of cached node pointer in fib_route_get_idx is unsafe. + + BUG: unable to handle kernel paging request at ffffc90001630024 + IP: [] leaf_walk_rcu+0x10/0xe0 + PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0 + Oops: 0000 [#1] SMP + Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac + snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti + acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd +tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod + CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4 + Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 + task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000 + RIP: 0010:[] [] leaf_walk_rcu+0x10/0xe0 + RSP: 0018:ffff88011a05fda0 EFLAGS: 00010202 + RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20 + RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950 + RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000 + R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950 + R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480 + FS: 00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0 + Stack: + ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00 + ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000 + ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80 + Call Trace: + [] ? fib_route_seq_start+0x93/0xc0 + [] ? seq_read+0x149/0x380 + [] ? fsnotify+0x3b4/0x500 + [] ? process_echoes+0x70/0x70 + [] ? proc_reg_read+0x47/0x70 + [] ? __vfs_read+0x23/0xd0 + [] ? rw_verify_area+0x52/0xf0 + [] ? vfs_read+0x81/0x120 + [] ? SyS_read+0x42/0xa0 + [] ? entry_SYSCALL_64_fastpath+0x16/0x75 + Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 +a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89 + RIP [] leaf_walk_rcu+0x10/0xe0 + RSP + CR2: ffffc90001630024 + +Signed-off-by: Dave Forster +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2453,9 +2453,7 @@ struct fib_route_iter { + static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, + loff_t pos) + { +- struct fib_table *tb = iter->main_tb; + struct key_vector *l, **tp = &iter->tnode; +- struct trie *t; + t_key key; + + /* use cache location of next-to-find key */ +@@ -2463,8 +2461,6 @@ static struct key_vector *fib_route_get_ + pos -= iter->pos; + key = iter->key; + } else { +- t = (struct trie *)tb->tb_data; +- iter->tnode = t->kv; + iter->pos = 0; + key = 0; + } +@@ -2505,12 +2501,12 @@ static void *fib_route_seq_start(struct + return NULL; + + iter->main_tb = tb; ++ t = (struct trie *)tb->tb_data; ++ iter->tnode = t->kv; + + if (*pos != 0) + return fib_route_get_idx(iter, *pos); + +- t = (struct trie *)tb->tb_data; +- iter->tnode = t->kv; + iter->pos = 0; + iter->key = 0; + diff --git a/net-4.4/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch b/net-4.4/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch new file mode 100644 index 00000000000..b37a6c3ac94 --- /dev/null +++ b/net-4.4/ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch @@ -0,0 +1,60 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Wei Yongjun +Date: Mon, 5 Sep 2016 16:06:31 +0800 +Subject: ipv6: addrconf: fix dev refcont leak when DAD failed + +From: Wei Yongjun + + +[ Upstream commit 751eb6b6042a596b0080967c1a529a9fe98dac1d ] + +In general, when DAD detected IPv6 duplicate address, ifp->state +will be set to INET6_IFADDR_STATE_ERRDAD and DAD is stopped by a +delayed work, the call tree should be like this: + +ndisc_recv_ns + -> addrconf_dad_failure <- missing ifp put + -> addrconf_mod_dad_work + -> schedule addrconf_dad_work() + -> addrconf_dad_stop() <- missing ifp hold before call it + +addrconf_dad_failure() called with ifp refcont holding but not put. +addrconf_dad_work() call addrconf_dad_stop() without extra holding +refcount. This will not cause any issue normally. + +But the race between addrconf_dad_failure() and addrconf_dad_work() +may cause ifp refcount leak and netdevice can not be unregister, +dmesg show the following messages: + +IPv6: eth0: IPv6 duplicate address fe80::XX:XXXX:XXXX:XX detected! +... +unregister_netdevice: waiting for eth0 to become free. Usage count = 1 + +Cc: stable@vger.kernel.org +Fixes: c15b1ccadb32 ("ipv6: move DAD and addrconf_verify processing +to workqueue") +Signed-off-by: Wei Yongjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -1898,6 +1898,7 @@ errdad: + spin_unlock_bh(&ifp->lock); + + addrconf_mod_dad_work(ifp, 0); ++ in6_ifa_put(ifp); + } + + /* Join to solicited addr multicast group. +@@ -3609,6 +3610,7 @@ static void addrconf_dad_work(struct wor + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { ++ in6_ifa_hold(ifp); + addrconf_dad_stop(ifp, 1); + goto out; + } diff --git a/net-4.4/ipv6-release-dst-in-ping_v6_sendmsg.patch b/net-4.4/ipv6-release-dst-in-ping_v6_sendmsg.patch new file mode 100644 index 00000000000..6dfc011fc71 --- /dev/null +++ b/net-4.4/ipv6-release-dst-in-ping_v6_sendmsg.patch @@ -0,0 +1,53 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Dave Jones +Date: Fri, 2 Sep 2016 14:39:50 -0400 +Subject: ipv6: release dst in ping_v6_sendmsg + +From: Dave Jones + + +[ Upstream commit 03c2778a938aaba0893f6d6cdc29511d91a79848 ] + +Neither the failure or success paths of ping_v6_sendmsg release +the dst it acquires. This leads to a flood of warnings from +"net/core/dst.c:288 dst_release" on older kernels that +don't have 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 backported. + +That patch optimistically hoped this had been fixed post 3.10, but +it seems at least one case wasn't, where I've seen this triggered +a lot from machines doing unprivileged icmp sockets. + +Cc: Martin Lau +Signed-off-by: Dave Jones +Acked-by: Martin KaFai Lau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ping.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -150,8 +150,10 @@ int ping_v6_sendmsg(struct sock *sk, str + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); +- if (!np) +- return -EBADF; ++ if (!np) { ++ err = -EBADF; ++ goto dst_err_out; ++ } + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; +@@ -186,6 +188,9 @@ int ping_v6_sendmsg(struct sock *sk, str + } + release_sock(sk); + ++dst_err_out: ++ dst_release(dst); ++ + if (err) + return err; + diff --git a/net-4.4/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch b/net-4.4/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch new file mode 100644 index 00000000000..93f39beb07d --- /dev/null +++ b/net-4.4/net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Florian Fainelli +Date: Wed, 24 Aug 2016 11:01:20 -0700 +Subject: net: dsa: bcm_sf2: Fix race condition while unmasking interrupts + +From: Florian Fainelli + + +[ Upstream commit 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c ] + +We kept shadow copies of which interrupt sources we have enabled and +disabled, but due to an order bug in how intrl2_mask_clear was defined, +we could run into the following scenario: + +CPU0 CPU1 +intrl2_1_mask_clear(..) +sets INTRL2_CPU_MASK_CLEAR + bcm_sf2_switch_1_isr + read INTRL2_CPU_STATUS and masks with stale + irq1_mask value +updates irq1_mask value + +Which would make us loop again and again trying to process and interrupt +we are not clearing since our copy of whether it was enabled before +still indicates it was not. Fix this by updating the shadow copy first, +and then unasking at the HW level. + +Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/dsa/bcm_sf2.h ++++ b/drivers/net/dsa/bcm_sf2.h +@@ -187,8 +187,8 @@ static inline void name##_writeq(struct + static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \ + u32 mask) \ + { \ +- intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ + priv->irq##which##_mask &= ~(mask); \ ++ intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ + } \ + static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \ + u32 mask) \ diff --git a/net-4.4/net-irda-handle-iriap_register_lsap-allocation-failure.patch b/net-4.4/net-irda-handle-iriap_register_lsap-allocation-failure.patch new file mode 100644 index 00000000000..e257d771044 --- /dev/null +++ b/net-4.4/net-irda-handle-iriap_register_lsap-allocation-failure.patch @@ -0,0 +1,74 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Vegard Nossum +Date: Fri, 12 Aug 2016 10:29:13 +0200 +Subject: net/irda: handle iriap_register_lsap() allocation failure + +From: Vegard Nossum + + +[ Upstream commit 5ba092efc7ddff040777ae7162f1d195f513571b ] + +If iriap_register_lsap() fails to allocate memory, self->lsap is +set to NULL. However, none of the callers handle the failure and +irlmp_connect_request() will happily dereference it: + + iriap_register_lsap: Unable to allocated LSAP! + ================================================================================ + UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2 + member access within null pointer of type 'struct lsap_cb' + CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org + 04/01/2014 + 0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3 + ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880 + ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a + Call Trace: + [] dump_stack+0xac/0xfc + [] ubsan_epilogue+0xd/0x8a + [] __ubsan_handle_type_mismatch+0x157/0x411 + [] irlmp_connect_request+0x7ac/0x970 + [] iriap_connect_request+0xa0/0x160 + [] state_s_disconnect+0x88/0xd0 + [] iriap_do_client_event+0x94/0x120 + [] iriap_getvaluebyclass_request+0x3e0/0x6d0 + [] irda_find_lsap_sel+0x1eb/0x630 + [] irda_connect+0x828/0x12d0 + [] SYSC_connect+0x22b/0x340 + [] SyS_connect+0x9/0x10 + [] do_syscall_64+0x1b3/0x4b0 + [] entry_SYSCALL64_slow_path+0x25/0x25 + ================================================================================ + +The bug seems to have been around since forever. + +There's more problems with missing error checks in iriap_init() (and +indeed all of irda_init()), but that's a bigger problem that needs +very careful review and testing. This patch will fix the most serious +bug (as it's easily reached from unprivileged userspace). + +I have tested my patch with a reproducer. + +Signed-off-by: Vegard Nossum +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/iriap.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/irda/iriap.c ++++ b/net/irda/iriap.c +@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_s + + self->magic = IAS_MAGIC; + self->mode = mode; +- if (mode == IAS_CLIENT) +- iriap_register_lsap(self, slsap_sel, mode); ++ if (mode == IAS_CLIENT) { ++ if (iriap_register_lsap(self, slsap_sel, mode)) { ++ kfree(self); ++ return NULL; ++ } ++ } + + self->confirm = callback; + self->priv = priv; diff --git a/net-4.4/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch b/net-4.4/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch new file mode 100644 index 00000000000..cad493a01fd --- /dev/null +++ b/net-4.4/net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch @@ -0,0 +1,258 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Paul Blakey +Date: Thu, 18 Aug 2016 21:09:05 +0300 +Subject: net/mlx5: Added missing check of msg length in verifying its signature + +From: Paul Blakey + + +[ Upstream commit 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 ] + +Set and verify signature calculates the signature for each of the +mailbox nodes, even for those that are unused (from cache). Added +a missing length check to set and verify only those which are used. + +While here, also moved the setting of msg's nodes token to where we +already go over them. This saves a pass because checksum is disabled, +and the only useful thing remaining that set signature does is setting +the token. + +Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB +adapters') +Signed-off-by: Paul Blakey +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 83 ++++++++++++++++---------- + 1 file changed, 53 insertions(+), 30 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst( + return cmd->cmd_buf + (idx << cmd->log_stride); + } + +-static u8 xor8_buf(void *buf, int len) ++static u8 xor8_buf(void *buf, size_t offset, int len) + { + u8 *ptr = buf; + u8 sum = 0; + int i; ++ int end = len + offset; + +- for (i = 0; i < len; i++) ++ for (i = offset; i < end; i++) + sum ^= ptr[i]; + + return sum; +@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len) + + static int verify_block_sig(struct mlx5_cmd_prot_block *block) + { +- if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) ++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); ++ int xor_len = sizeof(*block) - sizeof(block->data) - 1; ++ ++ if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) + return -EINVAL; + +- if (xor8_buf(block, sizeof(*block)) != 0xff) ++ if (xor8_buf(block, 0, sizeof(*block)) != 0xff) + return -EINVAL; + + return 0; + } + +-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, +- int csum) ++static void calc_block_sig(struct mlx5_cmd_prot_block *block) + { +- block->token = token; +- if (csum) { +- block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - +- sizeof(block->data) - 2); +- block->sig = ~xor8_buf(block, sizeof(*block) - 1); +- } ++ int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; ++ size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); ++ ++ block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); ++ block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); + } + +-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) ++static void calc_chain_sig(struct mlx5_cmd_msg *msg) + { + struct mlx5_cmd_mailbox *next = msg->next; ++ int size = msg->len; ++ int blen = size - min_t(int, sizeof(msg->first.data), size); ++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) ++ / MLX5_CMD_DATA_BLOCK_SIZE; ++ int i = 0; + +- while (next) { +- calc_block_sig(next->buf, token, csum); ++ for (i = 0; i < n && next; i++) { ++ calc_block_sig(next->buf); + next = next->next; + } + } + + static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) + { +- ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); +- calc_chain_sig(ent->in, ent->token, csum); +- calc_chain_sig(ent->out, ent->token, csum); ++ ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); ++ if (csum) { ++ calc_chain_sig(ent->in); ++ calc_chain_sig(ent->out); ++ } + } + + static void poll_timeout(struct mlx5_cmd_work_ent *ent) +@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_ + struct mlx5_cmd_mailbox *next = ent->out->next; + int err; + u8 sig; ++ int size = ent->out->len; ++ int blen = size - min_t(int, sizeof(ent->out->first.data), size); ++ int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) ++ / MLX5_CMD_DATA_BLOCK_SIZE; ++ int i = 0; + +- sig = xor8_buf(ent->lay, sizeof(*ent->lay)); ++ sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (sig != 0xff) + return -EINVAL; + +- while (next) { ++ for (i = 0; i < n && next; i++) { + err = verify_block_sig(next->buf); + if (err) + return err; +@@ -641,7 +655,6 @@ static void cmd_work_handler(struct work + spin_unlock_irqrestore(&cmd->alloc_lock, flags); + } + +- ent->token = alloc_token(cmd); + cmd->ent_arr[ent->idx] = ent; + lay = get_inst(cmd, ent->idx); + ent->lay = lay; +@@ -755,7 +768,8 @@ static u8 *get_status_ptr(struct mlx5_ou + static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t callback, +- void *context, int page_queue, u8 *status) ++ void *context, int page_queue, u8 *status, ++ u8 token) + { + struct mlx5_cmd *cmd = &dev->cmd; + struct mlx5_cmd_work_ent *ent; +@@ -772,6 +786,8 @@ static int mlx5_cmd_invoke(struct mlx5_c + if (IS_ERR(ent)) + return PTR_ERR(ent); + ++ ent->token = token; ++ + if (!callback) + init_completion(&ent->done); + +@@ -844,7 +860,8 @@ static const struct file_operations fops + .write = dbg_write, + }; + +-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) ++static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, ++ u8 token) + { + struct mlx5_cmd_prot_block *block; + struct mlx5_cmd_mailbox *next; +@@ -870,6 +887,7 @@ static int mlx5_copy_to_msg(struct mlx5_ + memcpy(block->data, from, copy); + from += copy; + size -= copy; ++ block->token = token; + next = next->next; + } + +@@ -939,7 +957,8 @@ static void free_cmd_box(struct mlx5_cor + } + + static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, +- gfp_t flags, int size) ++ gfp_t flags, int size, ++ u8 token) + { + struct mlx5_cmd_mailbox *tmp, *head = NULL; + struct mlx5_cmd_prot_block *block; +@@ -968,6 +987,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_c + tmp->next = head; + block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); + block->block_num = cpu_to_be32(n - i - 1); ++ block->token = token; + head = tmp; + } + msg->next = head; +@@ -1351,7 +1371,7 @@ static struct mlx5_cmd_msg *alloc_msg(st + } + + if (IS_ERR(msg)) +- msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); ++ msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); + + return msg; + } +@@ -1376,6 +1396,7 @@ static int cmd_exec(struct mlx5_core_dev + int err; + u8 status = 0; + u32 drv_synd; ++ u8 token; + + if (pci_channel_offline(dev->pdev) || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { +@@ -1394,20 +1415,22 @@ static int cmd_exec(struct mlx5_core_dev + return err; + } + +- err = mlx5_copy_to_msg(inb, in, in_size); ++ token = alloc_token(&dev->cmd); ++ ++ err = mlx5_copy_to_msg(inb, in, in_size, token); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + goto out_in; + } + +- outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); ++ outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); + if (IS_ERR(outb)) { + err = PTR_ERR(outb); + goto out_in; + } + + err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, +- pages_queue, &status); ++ pages_queue, &status, token); + if (err) + goto out_out; + +@@ -1475,7 +1498,7 @@ static int create_msg_cache(struct mlx5_ + INIT_LIST_HEAD(&cmd->cache.med.head); + + for (i = 0; i < NUM_LONG_LISTS; i++) { +- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); ++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; +@@ -1485,7 +1508,7 @@ static int create_msg_cache(struct mlx5_ + } + + for (i = 0; i < NUM_MED_LISTS; i++) { +- msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); ++ msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0); + if (IS_ERR(msg)) { + err = PTR_ERR(msg); + goto ex_err; diff --git a/net-4.4/net-smc91x-fix-smc-accesses.patch b/net-4.4/net-smc91x-fix-smc-accesses.patch new file mode 100644 index 00000000000..0a283ff7e00 --- /dev/null +++ b/net-4.4/net-smc91x-fix-smc-accesses.patch @@ -0,0 +1,263 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Russell King +Date: Sat, 27 Aug 2016 17:33:03 +0100 +Subject: net: smc91x: fix SMC accesses + +From: Russell King + + +[ Upstream commit 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb ] + +Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM +machines") broke some ARM platforms through several mistakes. Firstly, +the access size must correspond to the following rule: + +(a) at least one of 16-bit or 8-bit access size must be supported +(b) 32-bit accesses are optional, and may be enabled in addition to + the above. + +Secondly, it provides no emulation of 16-bit accesses, instead blindly +making 16-bit accesses even when the platform specifies that only 8-bit +is supported. + +Reorganise smc91x.h so we can make use of the existing 16-bit access +emulation already provided - if 16-bit accesses are supported, use +16-bit accesses directly, otherwise if 8-bit accesses are supported, +use the provided 16-bit access emulation. If neither, BUG(). This +exactly reflects the driver behaviour prior to the commit being fixed. + +Since the conversion incorrectly cut down the available access sizes on +several platforms, we also need to go through every platform and fix up +the overly-restrictive access size: Arnd assumed that if a platform can +perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access +size needed to be specified - not so, all available access sizes must +be specified. + +This likely fixes some performance regressions in doing this: if a +platform does not support 8-bit accesses, 8-bit accesses have been +emulated by performing a 16-bit read-modify-write access. + +Tested on the Intel Assabet/Neponset platform, which supports only 8-bit +accesses, which was broken by the original commit. + +Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines") +Signed-off-by: Russell King +Tested-by: Robert Jarzmik +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mach-pxa/idp.c | 3 - + arch/arm/mach-pxa/xcep.c | 3 - + arch/arm/mach-realview/core.c | 3 - + arch/arm/mach-sa1100/pleb.c | 2 + arch/blackfin/mach-bf561/boards/cm_bf561.c | 3 - + arch/blackfin/mach-bf561/boards/ezkit.c | 3 - + drivers/net/ethernet/smsc/smc91x.c | 7 +++ + drivers/net/ethernet/smsc/smc91x.h | 65 ++++++++++++++++++++--------- + include/linux/smc91x.h | 10 ++++ + 9 files changed, 73 insertions(+), 26 deletions(-) + +--- a/arch/arm/mach-pxa/idp.c ++++ b/arch/arm/mach-pxa/idp.c +@@ -83,7 +83,8 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_USE_DMA | SMC91X_NOWAIT, + }; + + static struct platform_device smc91x_device = { +--- a/arch/arm/mach-pxa/xcep.c ++++ b/arch/arm/mach-pxa/xcep.c +@@ -120,7 +120,8 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata xcep_smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT | SMC91X_USE_DMA, + }; + + static struct platform_device smc91x_device = { +--- a/arch/arm/mach-realview/core.c ++++ b/arch/arm/mach-realview/core.c +@@ -95,7 +95,8 @@ static struct smsc911x_platform_config s + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + }; + + static struct platform_device realview_eth_device = { +--- a/arch/arm/mach-sa1100/pleb.c ++++ b/arch/arm/mach-sa1100/pleb.c +@@ -45,7 +45,7 @@ static struct resource smc91x_resources[ + }; + + static struct smc91x_platdata smc91x_platdata = { +- .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT, + }; + + static struct platform_device smc91x_device = { +--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c ++++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c +@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb + #include + + static struct smc91x_platdata smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + .leda = RPC_LED_100_10, + .ledb = RPC_LED_TX_RX, + }; +--- a/arch/blackfin/mach-bf561/boards/ezkit.c ++++ b/arch/blackfin/mach-bf561/boards/ezkit.c +@@ -134,7 +134,8 @@ static struct platform_device net2272_bf + #include + + static struct smc91x_platdata smc91x_info = { +- .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, ++ .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | ++ SMC91X_NOWAIT, + .leda = RPC_LED_100_10, + .ledb = RPC_LED_TX_RX, + }; +--- a/drivers/net/ethernet/smsc/smc91x.c ++++ b/drivers/net/ethernet/smsc/smc91x.c +@@ -2269,6 +2269,13 @@ static int smc_drv_probe(struct platform + if (pd) { + memcpy(&lp->cfg, pd, sizeof(lp->cfg)); + lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags); ++ ++ if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) { ++ dev_err(&pdev->dev, ++ "at least one of 8-bit or 16-bit access support is required.\n"); ++ ret = -ENXIO; ++ goto out_free_netdev; ++ } + } + + #if IS_BUILTIN(CONFIG_OF) +--- a/drivers/net/ethernet/smsc/smc91x.h ++++ b/drivers/net/ethernet/smsc/smc91x.h +@@ -37,6 +37,27 @@ + #include + + /* ++ * Any 16-bit access is performed with two 8-bit accesses if the hardware ++ * can't do it directly. Most registers are 16-bit so those are mandatory. ++ */ ++#define SMC_outw_b(x, a, r) \ ++ do { \ ++ unsigned int __val16 = (x); \ ++ unsigned int __reg = (r); \ ++ SMC_outb(__val16, a, __reg); \ ++ SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT)); \ ++ } while (0) ++ ++#define SMC_inw_b(a, r) \ ++ ({ \ ++ unsigned int __val16; \ ++ unsigned int __reg = r; \ ++ __val16 = SMC_inb(a, __reg); \ ++ __val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \ ++ __val16; \ ++ }) ++ ++/* + * Define your architecture specific bus configuration parameters here. + */ + +@@ -55,10 +76,30 @@ + #define SMC_IO_SHIFT (lp->io_shift) + + #define SMC_inb(a, r) readb((a) + (r)) +-#define SMC_inw(a, r) readw((a) + (r)) ++#define SMC_inw(a, r) \ ++ ({ \ ++ unsigned int __smc_r = r; \ ++ SMC_16BIT(lp) ? readw((a) + __smc_r) : \ ++ SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) : \ ++ ({ BUG(); 0; }); \ ++ }) ++ + #define SMC_inl(a, r) readl((a) + (r)) + #define SMC_outb(v, a, r) writeb(v, (a) + (r)) ++#define SMC_outw(v, a, r) \ ++ do { \ ++ unsigned int __v = v, __smc_r = r; \ ++ if (SMC_16BIT(lp)) \ ++ __SMC_outw(__v, a, __smc_r); \ ++ else if (SMC_8BIT(lp)) \ ++ SMC_outw_b(__v, a, __smc_r); \ ++ else \ ++ BUG(); \ ++ } while (0) ++ + #define SMC_outl(v, a, r) writel(v, (a) + (r)) ++#define SMC_insb(a, r, p, l) readsb((a) + (r), p, l) ++#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, l) + #define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) + #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) + #define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) +@@ -66,7 +107,7 @@ + #define SMC_IRQ_FLAGS (-1) /* from resource */ + + /* We actually can't write halfwords properly if not word aligned */ +-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) ++static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg) + { + if ((machine_is_mainstone() || machine_is_stargate2() || + machine_is_pxa_idp()) && reg & 2) { +@@ -405,24 +446,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, s + + #if ! SMC_CAN_USE_16BIT + +-/* +- * Any 16-bit access is performed with two 8-bit accesses if the hardware +- * can't do it directly. Most registers are 16-bit so those are mandatory. +- */ +-#define SMC_outw(x, ioaddr, reg) \ +- do { \ +- unsigned int __val16 = (x); \ +- SMC_outb( __val16, ioaddr, reg ); \ +- SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\ +- } while (0) +-#define SMC_inw(ioaddr, reg) \ +- ({ \ +- unsigned int __val16; \ +- __val16 = SMC_inb( ioaddr, reg ); \ +- __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \ +- __val16; \ +- }) +- ++#define SMC_outw(x, ioaddr, reg) SMC_outw_b(x, ioaddr, reg) ++#define SMC_inw(ioaddr, reg) SMC_inw_b(ioaddr, reg) + #define SMC_insw(a, r, p, l) BUG() + #define SMC_outsw(a, r, p, l) BUG() + +--- a/include/linux/smc91x.h ++++ b/include/linux/smc91x.h +@@ -1,6 +1,16 @@ + #ifndef __SMC91X_H__ + #define __SMC91X_H__ + ++/* ++ * These bits define which access sizes a platform can support, rather ++ * than the maximal access size. So, if your platform can do 16-bit ++ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both ++ * SMC91X_USE_16BIT and SMC91X_USE_32BIT. ++ * ++ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or ++ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is ++ * an invalid configuration. ++ */ + #define SMC91X_USE_8BIT (1 << 0) + #define SMC91X_USE_16BIT (1 << 1) + #define SMC91X_USE_32BIT (1 << 2) diff --git a/net-4.4/revert-phy-irq-cannot-be-shared.patch b/net-4.4/revert-phy-irq-cannot-be-shared.patch new file mode 100644 index 00000000000..ebaf78d91aa --- /dev/null +++ b/net-4.4/revert-phy-irq-cannot-be-shared.patch @@ -0,0 +1,45 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Xander Huff +Date: Wed, 24 Aug 2016 16:47:53 -0500 +Subject: Revert "phy: IRQ cannot be shared" + +From: Xander Huff + + +[ Upstream commit c3e70edd7c2eed6acd234627a6007627f5c76e8e ] + +This reverts: + commit 33c133cc7598 ("phy: IRQ cannot be shared") + +On hardware with multiple PHY devices hooked up to the same IRQ line, allow +them to share it. + +Sergei Shtylyov says: + "I'm not sure now what was the reason I concluded that the IRQ sharing + was impossible... most probably I thought that the kernel IRQ handling + code exited the loop over the IRQ actions once IRQ_HANDLED was returned + -- which is obviously not so in reality..." + +Signed-off-by: Xander Huff +Signed-off-by: Nathan Sullivan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -640,8 +640,10 @@ phy_err: + int phy_start_interrupts(struct phy_device *phydev) + { + atomic_set(&phydev->irq_disable, 0); +- if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt", +- phydev) < 0) { ++ if (request_irq(phydev->irq, phy_interrupt, ++ IRQF_SHARED, ++ "phy_interrupt", ++ phydev) < 0) { + pr_warn("%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, phydev->irq); + phydev->irq = PHY_POLL; diff --git a/net-4.4/series b/net-4.4/series new file mode 100644 index 00000000000..ae06bd3c24d --- /dev/null +++ b/net-4.4/series @@ -0,0 +1,15 @@ +ipv4-panic-in-leaf_walk_rcu-due-to-stale-node-pointer.patch +vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch +net-irda-handle-iriap_register_lsap-allocation-failure.patch +tipc-fix-null-pointer-dereference-in-shutdown.patch +tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch +net-mlx5-added-missing-check-of-msg-length-in-verifying-its-signature.patch +tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch +net-dsa-bcm_sf2-fix-race-condition-while-unmasking-interrupts.patch +revert-phy-irq-cannot-be-shared.patch +net-smc91x-fix-smc-accesses.patch +bridge-re-introduce-fix-parsing-of-mldv2-reports.patch +bonding-fix-bonding-crash.patch +ipv6-release-dst-in-ping_v6_sendmsg.patch +ipv6-addrconf-fix-dev-refcont-leak-when-dad-failed.patch +tcp-cwnd-does-not-increase-in-tcp-yeah.patch diff --git a/net-4.4/tcp-cwnd-does-not-increase-in-tcp-yeah.patch b/net-4.4/tcp-cwnd-does-not-increase-in-tcp-yeah.patch new file mode 100644 index 00000000000..9fe20184559 --- /dev/null +++ b/net-4.4/tcp-cwnd-does-not-increase-in-tcp-yeah.patch @@ -0,0 +1,38 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Artem Germanov +Date: Wed, 7 Sep 2016 10:49:36 -0700 +Subject: tcp: cwnd does not increase in TCP YeAH + +From: Artem Germanov + + +[ Upstream commit db7196a0d0984b933ccf2cd6a60e26abf466e8a3 ] + +Commit 76174004a0f19785a328f40388e87e982bbf69b9 +(tcp: do not slow start when cwnd equals ssthresh ) +introduced regression in TCP YeAH. Using 100ms delay 1% loss virtual +ethernet link kernel 4.2 shows bandwidth ~500KB/s for single TCP +connection and kernel 4.3 and above (including 4.8-rc4) shows bandwidth +~100KB/s. + That is caused by stalled cwnd when cwnd equals ssthresh. This patch +fixes it by proper increasing cwnd in this case. + +Signed-off-by: Artem Germanov +Acked-by: Dmitry Adamushko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_yeah.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_yeah.c ++++ b/net/ipv4/tcp_yeah.c +@@ -75,7 +75,7 @@ static void tcp_yeah_cong_avoid(struct s + if (!tcp_is_cwnd_limited(sk)) + return; + +- if (tp->snd_cwnd <= tp->snd_ssthresh) ++ if (tcp_in_slow_start(tp)) + tcp_slow_start(tp, acked); + + else if (!yeah->doing_reno_now) { diff --git a/net-4.4/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch b/net-4.4/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch new file mode 100644 index 00000000000..b6a54a4e2d5 --- /dev/null +++ b/net-4.4/tcp-fix-use-after-free-in-tcp_xmit_retransmit_queue.patch @@ -0,0 +1,53 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Eric Dumazet +Date: Wed, 17 Aug 2016 05:56:26 -0700 +Subject: tcp: fix use after free in tcp_xmit_retransmit_queue() + +From: Eric Dumazet + + +[ Upstream commit bb1fceca22492109be12640d49f5ea5a544c6bb4 ] + +When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the +tail of the write queue using tcp_add_write_queue_tail() + +Then it attempts to copy user data into this fresh skb. + +If the copy fails, we undo the work and remove the fresh skb. + +Unfortunately, this undo lacks the change done to tp->highest_sack and +we can leave a dangling pointer (to a freed skb) + +Later, tcp_xmit_retransmit_queue() can dereference this pointer and +access freed memory. For regular kernels where memory is not unmapped, +this might cause SACK bugs because tcp_highest_sack_seq() is buggy, +returning garbage instead of tp->snd_nxt, but with various debug +features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel. + +This bug was found by Marco Grassi thanks to syzkaller. + +Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb") +Reported-by: Marco Grassi +Signed-off-by: Eric Dumazet +Cc: Ilpo Järvinen +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Reviewed-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1510,6 +1510,8 @@ static inline void tcp_check_send_head(s + { + if (sk->sk_send_head == skb_unlinked) + sk->sk_send_head = NULL; ++ if (tcp_sk(sk)->highest_sack == skb_unlinked) ++ tcp_sk(sk)->highest_sack = NULL; + } + + static inline void tcp_init_send_head(struct sock *sk) diff --git a/net-4.4/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch b/net-4.4/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch new file mode 100644 index 00000000000..4c784a1f126 --- /dev/null +++ b/net-4.4/tcp-properly-scale-window-in-tcp_v_reqsk_send_ack.patch @@ -0,0 +1,79 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Eric Dumazet +Date: Mon, 22 Aug 2016 11:31:10 -0700 +Subject: tcp: properly scale window in tcp_v[46]_reqsk_send_ack() + +From: Eric Dumazet + + +[ Upstream commit 20a2b49fc538540819a0c552877086548cff8d8d ] + +When sending an ack in SYN_RECV state, we must scale the offered +window if wscale option was negotiated and accepted. + +Tested: + Following packetdrill test demonstrates the issue : + +0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + +// Establish a connection. ++0 < S 0:0(0) win 20000 ++0 > S. 0:0(0) ack 1 win 28960 + ++0 < . 1:11(10) ack 1 win 156 +// check that window is properly scaled ! ++0 > . 1:1(0) ack 1 win 226 + +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 8 +++++++- + net/ipv6/tcp_ipv6.c | 8 +++++++- + 2 files changed, 14 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -808,8 +808,14 @@ static void tcp_v4_reqsk_send_ack(const + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + ++ /* RFC 7323 2.3 ++ * The window field (SEG.WND) of every outgoing segment, with the ++ * exception of segments, MUST be right-shifted by ++ * Rcv.Wind.Shift bits: ++ */ + tcp_v4_send_ack(sock_net(sk), skb, seq, +- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, ++ tcp_rsk(req)->rcv_nxt, ++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp, + req->ts_recent, + 0, +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -932,9 +932,15 @@ static void tcp_v6_reqsk_send_ack(const + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ ++ /* RFC 7323 2.3 ++ * The window field (SEG.WND) of every outgoing segment, with the ++ * exception of segments, MUST be right-shifted by ++ * Rcv.Wind.Shift bits: ++ */ + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, +- tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, ++ tcp_rsk(req)->rcv_nxt, ++ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, + tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + 0, 0); diff --git a/net-4.4/tipc-fix-null-pointer-dereference-in-shutdown.patch b/net-4.4/tipc-fix-null-pointer-dereference-in-shutdown.patch new file mode 100644 index 00000000000..f9772d75be3 --- /dev/null +++ b/net-4.4/tipc-fix-null-pointer-dereference-in-shutdown.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Vegard Nossum +Date: Sat, 23 Jul 2016 08:15:04 +0200 +Subject: tipc: fix NULL pointer dereference in shutdown() + +From: Vegard Nossum + + +[ Upstream commit d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd ] + +tipc_msg_create() can return a NULL skb and if so, we shouldn't try to +call tipc_node_xmit_skb() on it. + + general protection fault: 0000 [#1] PREEMPT SMP KASAN + CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000 + RIP: 0010:[] [] tipc_node_xmit_skb+0x6b/0x140 + RSP: 0018:ffff8800595bfce8 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0 + RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580 + RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000 + R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f + R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000 + FS: 00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0 + DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 + Stack: + 0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208 + ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018 + 0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611 + Call Trace: + [] tipc_shutdown+0x553/0x880 + [] SyS_shutdown+0x14b/0x170 + [] do_syscall_64+0x19c/0x410 + [] entry_SYSCALL64_slow_path+0x25/0x25 + Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00 + RIP [] tipc_node_xmit_skb+0x6b/0x140 + RSP + ---[ end trace 57b0484e351e71f1 ]--- + +I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure +userspace is equipped to handle that. Anyway, this is better than a GPF +and looks somewhat consistent with other tipc_msg_create() callers. + +Signed-off-by: Vegard Nossum +Acked-by: Ying Xue +Acked-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -2111,7 +2111,8 @@ restart: + TIPC_CONN_MSG, SHORT_H_SIZE, + 0, dnode, onode, dport, oport, + TIPC_CONN_SHUTDOWN); +- tipc_node_xmit_skb(net, skb, dnode, tsk->portid); ++ if (skb) ++ tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + } + tsk->connected = 0; + sock->state = SS_DISCONNECTING; diff --git a/net-4.4/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch b/net-4.4/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch new file mode 100644 index 00000000000..8f6f8ff0bfe --- /dev/null +++ b/net-4.4/vti-flush-x-netns-xfrm-cache-when-vti-interface-is-removed.patch @@ -0,0 +1,159 @@ +From foo@baz Wed Sep 21 12:45:10 CEST 2016 +From: Lance Richardson +Date: Tue, 9 Aug 2016 15:29:42 -0400 +Subject: vti: flush x-netns xfrm cache when vti interface is removed + +From: Lance Richardson + + +[ Upstream commit a5d0dc810abf3d6b241777467ee1d6efb02575fc ] + +When executing the script included below, the netns delete operation +hangs with the following message (repeated at 10 second intervals): + + kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1 + +This occurs because a reference to the lo interface in the "secure" netns +is still held by a dst entry in the xfrm bundle cache in the init netns. + +Address this problem by garbage collecting the tunnel netns flow cache +when a cross-namespace vti interface receives a NETDEV_DOWN notification. + +A more detailed description of the problem scenario (referencing commands +in the script below): + +(1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 + + The vti_test interface is created in the init namespace. vti_tunnel_init() + attaches a struct ip_tunnel to the vti interface's netdev_priv(dev), + setting the tunnel net to &init_net. + +(2) ip link set vti_test netns secure + + The vti_test interface is moved to the "secure" netns. Note that + the associated struct ip_tunnel still has tunnel->net set to &init_net. + +(3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 + + The first packet sent using the vti device causes xfrm_lookup() to be + called as follows: + + dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0); + + Note that tunnel->net is the init namespace, while skb_dst(skb) references + the vti_test interface in the "secure" namespace. The returned dst + references an interface in the init namespace. + + Also note that the first parameter to xfrm_lookup() determines which flow + cache is used to store the computed xfrm bundle, so after xfrm_lookup() + returns there will be a cached bundle in the init namespace flow cache + with a dst referencing a device in the "secure" namespace. + +(4) ip netns del secure + + Kernel begins to delete the "secure" namespace. At some point the + vti_test interface is deleted, at which point dst_ifdown() changes + the dst->dev in the cached xfrm bundle flow from vti_test to lo (still + in the "secure" namespace however). + Since nothing has happened to cause the init namespace's flow cache + to be garbage collected, this dst remains attached to the flow cache, + so the kernel loops waiting for the last reference to lo to go away. + + +ip link add br1 type bridge +ip link set dev br1 up +ip addr add dev br1 1.1.1.1/8 + +ip netns add secure +ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 +ip link set vti_test netns secure +ip netns exec secure ip link set vti_test up +ip netns exec secure ip link s lo up +ip netns exec secure ip addr add dev lo 192.168.100.1/24 +ip netns exec secure ip route add 192.168.200.0/24 dev vti_test +ip xfrm policy flush +ip xfrm state flush +ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \ + proto esp mode tunnel mark 1 +ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \ + proto esp mode tunnel mark 1 +ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \ + mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 +ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \ + mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 + +ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 + +ip netns del secure + + +Reported-by: Hangbin Liu +Reported-by: Jan Tluka +Signed-off-by: Lance Richardson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -540,6 +540,33 @@ static struct rtnl_link_ops vti_link_ops + .get_link_net = ip_tunnel_get_link_net, + }; + ++static bool is_vti_tunnel(const struct net_device *dev) ++{ ++ return dev->netdev_ops == &vti_netdev_ops; ++} ++ ++static int vti_device_event(struct notifier_block *unused, ++ unsigned long event, void *ptr) ++{ ++ struct net_device *dev = netdev_notifier_info_to_dev(ptr); ++ struct ip_tunnel *tunnel = netdev_priv(dev); ++ ++ if (!is_vti_tunnel(dev)) ++ return NOTIFY_DONE; ++ ++ switch (event) { ++ case NETDEV_DOWN: ++ if (!net_eq(tunnel->net, dev_net(dev))) ++ xfrm_garbage_collect(tunnel->net); ++ break; ++ } ++ return NOTIFY_DONE; ++} ++ ++static struct notifier_block vti_notifier_block __read_mostly = { ++ .notifier_call = vti_device_event, ++}; ++ + static int __init vti_init(void) + { + const char *msg; +@@ -547,6 +574,8 @@ static int __init vti_init(void) + + pr_info("IPv4 over IPsec tunneling driver\n"); + ++ register_netdevice_notifier(&vti_notifier_block); ++ + msg = "tunnel device"; + err = register_pernet_device(&vti_net_ops); + if (err < 0) +@@ -579,6 +608,7 @@ xfrm_proto_ah_failed: + xfrm_proto_esp_failed: + unregister_pernet_device(&vti_net_ops); + pernet_dev_failed: ++ unregister_netdevice_notifier(&vti_notifier_block); + pr_err("vti init: failed to register %s\n", msg); + return err; + } +@@ -590,6 +620,7 @@ static void __exit vti_fini(void) + xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); + xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); + unregister_pernet_device(&vti_net_ops); ++ unregister_netdevice_notifier(&vti_notifier_block); + } + + module_init(vti_init);