From: Greg Kroah-Hartman Date: Fri, 12 Aug 2016 07:35:30 +0000 (+0200) Subject: 4.6-stable patches X-Git-Tag: v3.14.76~23 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=eecad1ec16f51411c9b6295b74d494271ffea879;p=thirdparty%2Fkernel%2Fstable-queue.git 4.6-stable patches added patches: bonding-set-carrier-off-for-devices-created-through-netlink.patch bridge-fix-incorrect-re-injection-of-lldp-packets.patch macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch ppp-defer-netns-reference-release-for-ppp-channel.patch qed-fix-setting-clearing-bit-in-completion-bitmap.patch tcp-consider-recv-buf-for-the-initial-window-scale.patch tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch tcp-make-challenge-acks-less-predictable.patch vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch --- diff --git a/queue-4.6/bonding-set-carrier-off-for-devices-created-through-netlink.patch b/queue-4.6/bonding-set-carrier-off-for-devices-created-through-netlink.patch new file mode 100644 index 00000000000..4ead61e6364 --- /dev/null +++ b/queue-4.6/bonding-set-carrier-off-for-devices-created-through-netlink.patch @@ -0,0 +1,47 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Beniamino Galvani +Date: Wed, 13 Jul 2016 18:25:08 +0200 +Subject: bonding: set carrier off for devices created through netlink + +From: Beniamino Galvani + +[ Upstream commit 005db31d5f5f7c31cfdc43505d77eb3ca5cf8ec6 ] + +Commit e826eafa65c6 ("bonding: Call netif_carrier_off after +register_netdevice") moved netif_carrier_off() from bond_init() to +bond_create(), but the latter is called only for initial default +devices and ones created through sysfs: + + $ modprobe bonding + $ echo +bond1 > /sys/class/net/bonding_masters + $ ip link add bond2 type bond + $ grep "MII Status" /proc/net/bonding/* + /proc/net/bonding/bond0:MII Status: down + /proc/net/bonding/bond1:MII Status: down + /proc/net/bonding/bond2:MII Status: up + +Ensure that carrier is initially off also for devices created through +netlink. + +Signed-off-by: Beniamino Galvani +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_netlink.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_ + if (err < 0) + return err; + +- return register_netdevice(bond_dev); ++ err = register_netdevice(bond_dev); ++ ++ netif_carrier_off(bond_dev); ++ ++ return err; + } + + static size_t bond_get_size(const struct net_device *bond_dev) diff --git a/queue-4.6/bridge-fix-incorrect-re-injection-of-lldp-packets.patch b/queue-4.6/bridge-fix-incorrect-re-injection-of-lldp-packets.patch new file mode 100644 index 00000000000..d3f7b7e68f7 --- /dev/null +++ b/queue-4.6/bridge-fix-incorrect-re-injection-of-lldp-packets.patch @@ -0,0 +1,77 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Ido Schimmel +Date: Fri, 22 Jul 2016 14:56:20 +0300 +Subject: bridge: Fix incorrect re-injection of LLDP packets + +From: Ido Schimmel + +[ Upstream commit baedbe55884c003819f5c8c063ec3d2569414296 ] + +Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook +returns QUEUE or STOLEN verdict") caused LLDP packets arriving through a +bridge port to be re-injected to the Rx path with skb->dev set to the +bridge device, but this breaks the lldpad daemon. + +The lldpad daemon opens a packet socket with protocol set to ETH_P_LLDP +for any valid device on the system, which doesn't not include soft +devices such as bridge and VLAN. + +Since packet sockets (ptype_base) are processed in the Rx path after the +Rx handler, LLDP packets with skb->dev set to the bridge device never +reach the lldpad daemon. + +Fix this by making the bridge's Rx handler re-inject LLDP packets with +RX_HANDLER_PASS, which effectively restores the behaviour prior to the +mentioned commit. + +This means netfilter will never receive LLDP packets coming through a +bridge port, as I don't see a way in which we can have okfn() consume +the packet without breaking existing behaviour. I've already carried out +a similar fix for STP packets in commit 56fae404fb2c ("bridge: Fix +incorrect re-injection of STP packets"). + +Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") +Signed-off-by: Ido Schimmel +Reviewed-by: Jiri Pirko +Cc: Florian Westphal +Cc: John Fastabend +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_input.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +--- a/net/bridge/br_input.c ++++ b/net/bridge/br_input.c +@@ -213,6 +213,16 @@ drop: + } + EXPORT_SYMBOL_GPL(br_handle_frame_finish); + ++static void __br_handle_local_finish(struct sk_buff *skb) ++{ ++ struct net_bridge_port *p = br_port_get_rcu(skb->dev); ++ u16 vid = 0; ++ ++ /* check if vlan is allowed, to avoid spoofing */ ++ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) ++ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); ++} ++ + /* note: already called with rcu_read_lock */ + static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) + { +@@ -279,6 +289,14 @@ rx_handler_result_t br_handle_frame(stru + case 0x01: /* IEEE MAC (Pause) */ + goto drop; + ++ case 0x0E: /* 802.1AB LLDP */ ++ fwd_mask |= p->br->group_fwd_mask; ++ if (fwd_mask & (1u << dest[5])) ++ goto forward; ++ *pskb = skb; ++ __br_handle_local_finish(skb); ++ return RX_HANDLER_PASS; ++ + default: + /* Allow selective forwarding for most other protocols */ + fwd_mask |= p->br->group_fwd_mask; diff --git a/queue-4.6/macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch b/queue-4.6/macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch new file mode 100644 index 00000000000..d298a36a9f7 --- /dev/null +++ b/queue-4.6/macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch @@ -0,0 +1,41 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Beniamino Galvani +Date: Tue, 26 Jul 2016 12:24:53 +0200 +Subject: macsec: ensure rx_sa is set when validation is disabled + +From: Beniamino Galvani + +[ Upstream commit e3a3b626010a14fe067f163c2c43409d5afcd2a9 ] + +macsec_decrypt() is not called when validation is disabled and so +macsec_skb_cb(skb)->rx_sa is not set; but it is used later in +macsec_post_decrypt(), ensure that it's always initialized. + +Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") +Signed-off-by: Beniamino Galvani +Acked-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macsec.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -914,7 +914,6 @@ static struct sk_buff *macsec_decrypt(st + } + + macsec_skb_cb(skb)->req = req; +- macsec_skb_cb(skb)->rx_sa = rx_sa; + skb->dev = dev; + aead_request_set_callback(req, 0, macsec_decrypt_done, skb); + +@@ -1141,6 +1140,8 @@ static rx_handler_result_t macsec_handle + } + } + ++ macsec_skb_cb(skb)->rx_sa = rx_sa; ++ + /* Disabled && !changed text => skip validation */ + if (hdr->tci_an & MACSEC_TCI_C || + secy->validate_frames != MACSEC_VALIDATE_DISABLED) diff --git a/queue-4.6/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch b/queue-4.6/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch new file mode 100644 index 00000000000..418e71e66ee --- /dev/null +++ b/queue-4.6/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch @@ -0,0 +1,32 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Florian Fainelli +Date: Fri, 15 Jul 2016 15:42:52 -0700 +Subject: net: bgmac: Fix infinite loop in bgmac_dma_tx_add() + +From: Florian Fainelli + +[ Upstream commit e86663c475d384ab5f46cb5637e9b7ad08c5c505 ] + +Nothing is decrementing the index "i" while we are cleaning up the +fragments we could not successful transmit. + +Fixes: 9cde94506eacf ("bgmac: implement scatter/gather support") +Reported-by: coverity (CID 1352048) +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bgmac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -231,7 +231,7 @@ err_dma: + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), + DMA_TO_DEVICE); + +- while (i > 0) { ++ while (i-- > 0) { + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; + struct bgmac_slot_info *slot = &ring->slots[index]; + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); diff --git a/queue-4.6/net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch b/queue-4.6/net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch new file mode 100644 index 00000000000..6be08d8273c --- /dev/null +++ b/queue-4.6/net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch @@ -0,0 +1,42 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Mike Manning +Date: Fri, 22 Jul 2016 18:32:11 +0100 +Subject: net: ipv6: Always leave anycast and multicast groups on link down + +From: Mike Manning + +[ Upstream commit ea06f7176413e2538d13bb85b65387d0917943d9 ] + +Default kernel behavior is to delete IPv6 addresses on link +down, which entails deletion of the multicast and the +subnet-router anycast addresses. These deletions do not +happen with sysctl setting to keep global IPv6 addresses on +link down, so every link down/up causes an increment of the +anycast and multicast refcounts. These bogus refcounts may +stop these addrs from being removed on subsequent calls to +delete them. The solution is to leave the groups for the +multicast and subnet anycast on link down for the callflow +when global IPv6 addresses are kept. + +Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") +Signed-off-by: Mike Manning +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3563,6 +3563,10 @@ restart: + if (state != INET6_IFADDR_STATE_DEAD) { + __ipv6_ifa_notify(RTM_DELADDR, ifa); + inet6addr_notifier_call_chain(NETDEV_DOWN, ifa); ++ } else { ++ if (idev->cnf.forwarding) ++ addrconf_leave_anycast(ifa); ++ addrconf_leave_solict(ifa->idev, &ifa->addr); + } + + write_lock_bh(&idev->lock); diff --git a/queue-4.6/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch b/queue-4.6/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch new file mode 100644 index 00000000000..7b7f01ba12a --- /dev/null +++ b/queue-4.6/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch @@ -0,0 +1,69 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Vegard Nossum +Date: Sat, 23 Jul 2016 07:43:50 +0200 +Subject: net/irda: fix NULL pointer dereference on memory allocation failure + +From: Vegard Nossum + +[ Upstream commit d3e6952cfb7ba5f4bfa29d4803ba91f96ce1204d ] + +I ran into this: + + kasan: CONFIG_KASAN_INLINE enabled + kasan: GPF could be caused by NULL-ptr deref or user memory access + general protection fault: 0000 [#1] PREEMPT SMP KASAN + CPU: 2 PID: 2012 Comm: trinity-c3 Not tainted 4.7.0-rc7+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + task: ffff8800b745f2c0 ti: ffff880111740000 task.ti: ffff880111740000 + RIP: 0010:[] [] irttp_connect_request+0x36/0x710 + RSP: 0018:ffff880111747bb8 EFLAGS: 00010286 + RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000069dd8358 + RDX: 0000000000000009 RSI: 0000000000000027 RDI: 0000000000000048 + RBP: ffff880111747c00 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000069dd8358 R11: 1ffffffff0759723 R12: 0000000000000000 + R13: ffff88011a7e4780 R14: 0000000000000027 R15: 0000000000000000 + FS: 00007fc738404700(0000) GS:ffff88011af00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fc737fdfb10 CR3: 0000000118087000 CR4: 00000000000006e0 + Stack: + 0000000000000200 ffff880111747bd8 ffffffff810ee611 ffff880119f1f220 + ffff880119f1f4f8 ffff880119f1f4f0 ffff88011a7e4780 ffff880119f1f232 + ffff880119f1f220 ffff880111747d58 ffffffff82bca542 0000000000000000 + Call Trace: + [] irda_connect+0x562/0x1190 + [] SYSC_connect+0x202/0x2a0 + [] SyS_connect+0x9/0x10 + [] do_syscall_64+0x19c/0x410 + [] entry_SYSCALL64_slow_path+0x25/0x25 + Code: 41 89 ca 48 89 e5 41 57 41 56 41 55 41 54 41 89 d7 53 48 89 fb 48 83 c7 48 48 89 fa 41 89 f6 48 c1 ea 03 48 83 ec 20 4c 8b 65 10 <0f> b6 04 02 84 c0 74 08 84 c0 0f 8e 4c 04 00 00 80 7b 48 00 74 + RIP [] irttp_connect_request+0x36/0x710 + RSP + ---[ end trace 4cda2588bc055b30 ]--- + +The problem is that irda_open_tsap() can fail and leave self->tsap = NULL, +and then irttp_connect_request() almost immediately dereferences it. + +Cc: stable@vger.kernel.org +Signed-off-by: Vegard Nossum +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/af_irda.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *s + } + + /* Check if we have opened a local TSAP */ +- if (!self->tsap) +- irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (!self->tsap) { ++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (err) ++ goto out; ++ } + + /* Move to connecting socket, start sending Connect Requests */ + sock->state = SS_CONNECTING; diff --git a/queue-4.6/ppp-defer-netns-reference-release-for-ppp-channel.patch b/queue-4.6/ppp-defer-netns-reference-release-for-ppp-channel.patch new file mode 100644 index 00000000000..118b5f98717 --- /dev/null +++ b/queue-4.6/ppp-defer-netns-reference-release-for-ppp-channel.patch @@ -0,0 +1,55 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: WANG Cong +Date: Tue, 5 Jul 2016 22:12:36 -0700 +Subject: ppp: defer netns reference release for ppp channel + +From: WANG Cong + +[ Upstream commit 205e1e255c479f3fd77446415706463b282f94e4 ] + +Matt reported that we have a NULL pointer dereference +in ppp_pernet() from ppp_connect_channel(), +i.e. pch->chan_net is NULL. + +This is due to that a parallel ppp_unregister_channel() +could happen while we are in ppp_connect_channel(), during +which pch->chan_net set to NULL. Since we need a reference +to net per channel, it makes sense to sync the refcnt +with the life time of the channel, therefore we should +release this reference when we destroy it. + +Fixes: 1f461dcdd296 ("ppp: take reference on channels netns") +Reported-by: Matt Bennett +Cc: Paul Mackerras +Cc: linux-ppp@vger.kernel.org +Cc: Guillaume Nault +Cc: Cyrill Gorcunov +Signed-off-by: Cong Wang +Reviewed-by: Cyrill Gorcunov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -2404,8 +2404,6 @@ ppp_unregister_channel(struct ppp_channe + spin_lock_bh(&pn->all_channels_lock); + list_del(&pch->list); + spin_unlock_bh(&pn->all_channels_lock); +- put_net(pch->chan_net); +- pch->chan_net = NULL; + + pch->file.dead = 1; + wake_up_interruptible(&pch->file.rwait); +@@ -2999,6 +2997,9 @@ ppp_disconnect_channel(struct channel *p + */ + static void ppp_destroy_channel(struct channel *pch) + { ++ put_net(pch->chan_net); ++ pch->chan_net = NULL; ++ + atomic_dec(&channel_count); + + if (!pch->file.dead) { diff --git a/queue-4.6/qed-fix-setting-clearing-bit-in-completion-bitmap.patch b/queue-4.6/qed-fix-setting-clearing-bit-in-completion-bitmap.patch new file mode 100644 index 00000000000..31f8928ecce --- /dev/null +++ b/queue-4.6/qed-fix-setting-clearing-bit-in-completion-bitmap.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Manish Chopra +Date: Mon, 25 Jul 2016 19:07:46 +0300 +Subject: qed: Fix setting/clearing bit in completion bitmap + +From: Manish Chopra + +[ Upstream commit 59d3f1ceb69b54569685d0c34dff16a1e0816b19 ] + +Slowpath completion handling is incorrectly changing +SPQ_RING_SIZE bits instead of a single one. + +Fixes: 76a9a3642a0b ("qed: fix handling of concurrent ramrods") +Signed-off-by: Manish Chopra +Signed-off-by: Yuval Mintz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_spq.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c +@@ -791,13 +791,12 @@ int qed_spq_completion(struct qed_hwfn * + * in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE); ++ __set_bit(pos, p_spq->p_comp_bitmap); + + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { +- bitmap_clear(p_spq->p_comp_bitmap, +- p_spq->comp_bitmap_idx, +- SPQ_RING_SIZE); ++ __clear_bit(p_spq->comp_bitmap_idx, ++ p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } diff --git a/queue-4.6/series b/queue-4.6/series index b3708f9e5f1..858e1af22b5 100644 --- a/queue-4.6/series +++ b/queue-4.6/series @@ -1,2 +1,14 @@ libnvdimm-dax-record-the-specified-alignment-of-a-dax-device-instance.patch libnvdimm-pfn-dax-fix-initialization-vs-autodetect-for-mode-alignment.patch +ppp-defer-netns-reference-release-for-ppp-channel.patch +tcp-make-challenge-acks-less-predictable.patch +tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch +bonding-set-carrier-off-for-devices-created-through-netlink.patch +net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch +vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch +bridge-fix-incorrect-re-injection-of-lldp-packets.patch +net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch +net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch +qed-fix-setting-clearing-bit-in-completion-bitmap.patch +macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch +tcp-consider-recv-buf-for-the-initial-window-scale.patch diff --git a/queue-4.6/tcp-consider-recv-buf-for-the-initial-window-scale.patch b/queue-4.6/tcp-consider-recv-buf-for-the-initial-window-scale.patch new file mode 100644 index 00000000000..ca1c3acc748 --- /dev/null +++ b/queue-4.6/tcp-consider-recv-buf-for-the-initial-window-scale.patch @@ -0,0 +1,44 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Soheil Hassas Yeganeh +Date: Fri, 29 Jul 2016 09:34:02 -0400 +Subject: tcp: consider recv buf for the initial window scale + +From: Soheil Hassas Yeganeh + +[ Upstream commit f626300a3e776ccc9671b0dd94698fb3aa315966 ] + +tcp_select_initial_window() intends to advertise a window +scaling for the maximum possible window size. To do so, +it considers the maximum of net.ipv4.tcp_rmem[2] and +net.core.rmem_max as the only possible upper-bounds. +However, users with CAP_NET_ADMIN can use SO_RCVBUFFORCE +to set the socket's receive buffer size to values +larger than net.ipv4.tcp_rmem[2] and net.core.rmem_max. +Thus, SO_RCVBUFFORCE is effectively ignored by +tcp_select_initial_window(). + +To fix this, consider the maximum of net.ipv4.tcp_rmem[2], +net.core.rmem_max and socket's initial buffer space. + +Fixes: b0573dea1fb3 ("[NET]: Introduce SO_{SND,RCV}BUFFORCE socket options") +Signed-off-by: Soheil Hassas Yeganeh +Suggested-by: Neal Cardwell +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __spa + /* Set window scaling on max possible window + * See RFC1323 for an explanation of the limit to 14 + */ +- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); ++ space = max_t(u32, space, sysctl_tcp_rmem[2]); ++ space = max_t(u32, space, sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); + while (space > 65535 && (*rcv_wscale) < 14) { + space >>= 1; diff --git a/queue-4.6/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch b/queue-4.6/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch new file mode 100644 index 00000000000..bf1b36c51c4 --- /dev/null +++ b/queue-4.6/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch @@ -0,0 +1,102 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Jason Baron +Date: Thu, 14 Jul 2016 11:38:40 -0400 +Subject: tcp: enable per-socket rate limiting of all 'challenge acks' + +From: Jason Baron + +[ Upstream commit 083ae308280d13d187512b9babe3454342a7987e ] + +The per-socket rate limit for 'challenge acks' was introduced in the +context of limiting ack loops: + +commit f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock") + +And I think it can be extended to rate limit all 'challenge acks' on a +per-socket basis. + +Since we have the global tcp_challenge_ack_limit, this patch allows for +tcp_challenge_ack_limit to be set to a large value and effectively rely on +the per-socket limit, or set tcp_challenge_ack_limit to a lower value and +still prevents a single connections from consuming the entire challenge ack +quota. + +It further moves in the direction of eliminating the global limit at some +point, as Eric Dumazet has suggested. This a follow-up to: +Subject: tcp: make challenge acks less predictable + +Cc: Eric Dumazet +Cc: David S. Miller +Cc: Neal Cardwell +Cc: Yuchung Cheng +Cc: Yue Cao +Signed-off-by: Jason Baron +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 39 ++++++++++++++++++++++----------------- + 1 file changed, 22 insertions(+), 17 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3423,6 +3423,23 @@ static int tcp_ack_update_window(struct + return flag; + } + ++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, ++ u32 *last_oow_ack_time) ++{ ++ if (*last_oow_ack_time) { ++ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); ++ ++ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { ++ NET_INC_STATS_BH(net, mib_idx); ++ return true; /* rate-limited: don't send yet! */ ++ } ++ } ++ ++ *last_oow_ack_time = tcp_time_stamp; ++ ++ return false; /* not rate-limited: go ahead, send dupack now! */ ++} ++ + /* Return true if we're currently rate-limiting out-of-window ACKs and + * thus shouldn't send a dupack right now. We rate-limit dupacks in + * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS +@@ -3436,21 +3453,9 @@ bool tcp_oow_rate_limited(struct net *ne + /* Data packets without SYNs are not likely part of an ACK loop. */ + if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && + !tcp_hdr(skb)->syn) +- goto not_rate_limited; +- +- if (*last_oow_ack_time) { +- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); ++ return false; + +- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { +- NET_INC_STATS_BH(net, mib_idx); +- return true; /* rate-limited: don't send yet! */ +- } +- } +- +- *last_oow_ack_time = tcp_time_stamp; +- +-not_rate_limited: +- return false; /* not rate-limited: go ahead, send dupack now! */ ++ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time); + } + + /* RFC 5961 7 [ACK Throttling] */ +@@ -3463,9 +3468,9 @@ static void tcp_send_challenge_ack(struc + u32 count, now; + + /* First check our per-socket dupack rate limit. */ +- if (tcp_oow_rate_limited(sock_net(sk), skb, +- LINUX_MIB_TCPACKSKIPPEDCHALLENGE, +- &tp->last_oow_ack_time)) ++ if (__tcp_oow_rate_limited(sock_net(sk), ++ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, ++ &tp->last_oow_ack_time)) + return; + + /* Then check host-wide RFC 5961 rate limit. */ diff --git a/queue-4.6/tcp-make-challenge-acks-less-predictable.patch b/queue-4.6/tcp-make-challenge-acks-less-predictable.patch new file mode 100644 index 00000000000..8ca30d18057 --- /dev/null +++ b/queue-4.6/tcp-make-challenge-acks-less-predictable.patch @@ -0,0 +1,81 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Eric Dumazet +Date: Sun, 10 Jul 2016 10:04:02 +0200 +Subject: tcp: make challenge acks less predictable + +From: Eric Dumazet + +[ Upstream commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 ] + +Yue Cao claims that current host rate limiting of challenge ACKS +(RFC 5961) could leak enough information to allow a patient attacker +to hijack TCP sessions. He will soon provide details in an academic +paper. + +This patch increases the default limit from 100 to 1000, and adds +some randomization so that the attacker can no longer hijack +sessions without spending a considerable amount of probes. + +Based on initial analysis and patch from Linus. + +Note that we also have per socket rate limiting, so it is tempting +to remove the host limit in the future. + +v2: randomize the count of challenge acks per second, not the period. + +Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2") +Reported-by: Yue Cao +Signed-off-by: Eric Dumazet +Suggested-by: Linus Torvalds +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_most + EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); + + /* rfc5961 challenge ack rate limiting */ +-int sysctl_tcp_challenge_ack_limit = 100; ++int sysctl_tcp_challenge_ack_limit = 1000; + + int sysctl_tcp_stdurg __read_mostly; + int sysctl_tcp_rfc1337 __read_mostly; +@@ -3460,7 +3460,7 @@ static void tcp_send_challenge_ack(struc + static u32 challenge_timestamp; + static unsigned int challenge_count; + struct tcp_sock *tp = tcp_sk(sk); +- u32 now; ++ u32 count, now; + + /* First check our per-socket dupack rate limit. */ + if (tcp_oow_rate_limited(sock_net(sk), skb, +@@ -3468,13 +3468,18 @@ static void tcp_send_challenge_ack(struc + &tp->last_oow_ack_time)) + return; + +- /* Then check the check host-wide RFC 5961 rate limit. */ ++ /* Then check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; + if (now != challenge_timestamp) { ++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; ++ + challenge_timestamp = now; +- challenge_count = 0; ++ WRITE_ONCE(challenge_count, half + ++ prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + } +- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { ++ count = READ_ONCE(challenge_count); ++ if (count > 0) { ++ WRITE_ONCE(challenge_count, count - 1); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } diff --git a/queue-4.6/vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch b/queue-4.6/vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch new file mode 100644 index 00000000000..1e315dc9890 --- /dev/null +++ b/queue-4.6/vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch @@ -0,0 +1,89 @@ +From foo@baz Fri Aug 12 09:34:33 CEST 2016 +From: Paolo Abeni +Date: Thu, 14 Jul 2016 18:00:10 +0200 +Subject: vlan: use a valid default mtu value for vlan over macsec + +From: Paolo Abeni + +[ Upstream commit 18d3df3eab23796d7f852f9c6bb60962b8372ced ] + +macsec can't cope with mtu frames which need vlan tag insertion, and +vlan device set the default mtu equal to the underlying dev's one. +By default vlan over macsec devices use invalid mtu, dropping +all the large packets. +This patch adds a netif helper to check if an upper vlan device +needs mtu reduction. The helper is used during vlan devices +initialization to set a valid default and during mtu updating to +forbid invalid, too bit, mtu values. +The helper currently only check if the lower dev is a macsec device, +if we get more users, we need to update only the helper (possibly +reserving an additional IFF bit). + +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 7 +++++++ + net/8021q/vlan_dev.c | 10 ++++++---- + net/8021q/vlan_netlink.c | 7 +++++-- + 3 files changed, 18 insertions(+), 6 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -4156,6 +4156,13 @@ static inline void netif_keep_dst(struct + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM); + } + ++/* return true if dev can't cope with mtu frames that need vlan tag insertion */ ++static inline bool netif_reduces_vlan_mtu(struct net_device *dev) ++{ ++ /* TODO: reserve and use an additional IFF bit, if we get more users */ ++ return dev->priv_flags & IFF_MACSEC; ++} ++ + extern struct pernet_operations __net_initdata loopback_net_ops; + + /* Logging, debugging and troubleshooting/diagnostic helpers. */ +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_x + + static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) + { +- /* TODO: gotta make sure the underlying layer can handle it, +- * maybe an IFF_VLAN_CAPABLE flag for devices? +- */ +- if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu) ++ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; ++ unsigned int max_mtu = real_dev->mtu; ++ ++ if (netif_reduces_vlan_mtu(real_dev)) ++ max_mtu -= VLAN_HLEN; ++ if (max_mtu < new_mtu) + return -ERANGE; + + dev->mtu = new_mtu; +--- a/net/8021q/vlan_netlink.c ++++ b/net/8021q/vlan_netlink.c +@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_ + { + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev; ++ unsigned int max_mtu; + __be16 proto; + int err; + +@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_ + if (err < 0) + return err; + ++ max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN : ++ real_dev->mtu; + if (!tb[IFLA_MTU]) +- dev->mtu = real_dev->mtu; +- else if (dev->mtu > real_dev->mtu) ++ dev->mtu = max_mtu; ++ else if (dev->mtu > max_mtu) + return -EINVAL; + + err = vlan_changelink(dev, tb, data);