--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Beniamino Galvani <bgalvani@redhat.com>
+Date: Wed, 13 Jul 2016 18:25:08 +0200
+Subject: bonding: set carrier off for devices created through netlink
+
+From: Beniamino Galvani <bgalvani@redhat.com>
+
+[ Upstream commit 005db31d5f5f7c31cfdc43505d77eb3ca5cf8ec6 ]
+
+Commit e826eafa65c6 ("bonding: Call netif_carrier_off after
+register_netdevice") moved netif_carrier_off() from bond_init() to
+bond_create(), but the latter is called only for initial default
+devices and ones created through sysfs:
+
+ $ modprobe bonding
+ $ echo +bond1 > /sys/class/net/bonding_masters
+ $ ip link add bond2 type bond
+ $ grep "MII Status" /proc/net/bonding/*
+ /proc/net/bonding/bond0:MII Status: down
+ /proc/net/bonding/bond1:MII Status: down
+ /proc/net/bonding/bond2:MII Status: up
+
+Ensure that carrier is initially off also for devices created through
+netlink.
+
+Signed-off-by: Beniamino Galvani <bgalvani@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_netlink.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_
+ if (err < 0)
+ return err;
+
+- return register_netdevice(bond_dev);
++ err = register_netdevice(bond_dev);
++
++ netif_carrier_off(bond_dev);
++
++ return err;
+ }
+
+ static size_t bond_get_size(const struct net_device *bond_dev)
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Fri, 22 Jul 2016 14:56:20 +0300
+Subject: bridge: Fix incorrect re-injection of LLDP packets
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit baedbe55884c003819f5c8c063ec3d2569414296 ]
+
+Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook
+returns QUEUE or STOLEN verdict") caused LLDP packets arriving through a
+bridge port to be re-injected to the Rx path with skb->dev set to the
+bridge device, but this breaks the lldpad daemon.
+
+The lldpad daemon opens a packet socket with protocol set to ETH_P_LLDP
+for any valid device on the system, which doesn't not include soft
+devices such as bridge and VLAN.
+
+Since packet sockets (ptype_base) are processed in the Rx path after the
+Rx handler, LLDP packets with skb->dev set to the bridge device never
+reach the lldpad daemon.
+
+Fix this by making the bridge's Rx handler re-inject LLDP packets with
+RX_HANDLER_PASS, which effectively restores the behaviour prior to the
+mentioned commit.
+
+This means netfilter will never receive LLDP packets coming through a
+bridge port, as I don't see a way in which we can have okfn() consume
+the packet without breaking existing behaviour. I've already carried out
+a similar fix for STP packets in commit 56fae404fb2c ("bridge: Fix
+incorrect re-injection of STP packets").
+
+Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Cc: Florian Westphal <fw@strlen.de>
+Cc: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_input.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -213,6 +213,16 @@ drop:
+ }
+ EXPORT_SYMBOL_GPL(br_handle_frame_finish);
+
++static void __br_handle_local_finish(struct sk_buff *skb)
++{
++ struct net_bridge_port *p = br_port_get_rcu(skb->dev);
++ u16 vid = 0;
++
++ /* check if vlan is allowed, to avoid spoofing */
++ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
++ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
++}
++
+ /* note: already called with rcu_read_lock */
+ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+ {
+@@ -279,6 +289,14 @@ rx_handler_result_t br_handle_frame(stru
+ case 0x01: /* IEEE MAC (Pause) */
+ goto drop;
+
++ case 0x0E: /* 802.1AB LLDP */
++ fwd_mask |= p->br->group_fwd_mask;
++ if (fwd_mask & (1u << dest[5]))
++ goto forward;
++ *pskb = skb;
++ __br_handle_local_finish(skb);
++ return RX_HANDLER_PASS;
++
+ default:
+ /* Allow selective forwarding for most other protocols */
+ fwd_mask |= p->br->group_fwd_mask;
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Beniamino Galvani <bgalvani@redhat.com>
+Date: Tue, 26 Jul 2016 12:24:53 +0200
+Subject: macsec: ensure rx_sa is set when validation is disabled
+
+From: Beniamino Galvani <bgalvani@redhat.com>
+
+[ Upstream commit e3a3b626010a14fe067f163c2c43409d5afcd2a9 ]
+
+macsec_decrypt() is not called when validation is disabled and so
+macsec_skb_cb(skb)->rx_sa is not set; but it is used later in
+macsec_post_decrypt(), ensure that it's always initialized.
+
+Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
+Signed-off-by: Beniamino Galvani <bgalvani@redhat.com>
+Acked-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macsec.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -914,7 +914,6 @@ static struct sk_buff *macsec_decrypt(st
+ }
+
+ macsec_skb_cb(skb)->req = req;
+- macsec_skb_cb(skb)->rx_sa = rx_sa;
+ skb->dev = dev;
+ aead_request_set_callback(req, 0, macsec_decrypt_done, skb);
+
+@@ -1141,6 +1140,8 @@ static rx_handler_result_t macsec_handle
+ }
+ }
+
++ macsec_skb_cb(skb)->rx_sa = rx_sa;
++
+ /* Disabled && !changed text => skip validation */
+ if (hdr->tci_an & MACSEC_TCI_C ||
+ secy->validate_frames != MACSEC_VALIDATE_DISABLED)
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 15 Jul 2016 15:42:52 -0700
+Subject: net: bgmac: Fix infinite loop in bgmac_dma_tx_add()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit e86663c475d384ab5f46cb5637e9b7ad08c5c505 ]
+
+Nothing is decrementing the index "i" while we are cleaning up the
+fragments we could not successful transmit.
+
+Fixes: 9cde94506eacf ("bgmac: implement scatter/gather support")
+Reported-by: coverity (CID 1352048)
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -231,7 +231,7 @@ err_dma:
+ dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+ DMA_TO_DEVICE);
+
+- while (i > 0) {
++ while (i-- > 0) {
+ int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+ struct bgmac_slot_info *slot = &ring->slots[index];
+ u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Mike Manning <mmanning@brocade.com>
+Date: Fri, 22 Jul 2016 18:32:11 +0100
+Subject: net: ipv6: Always leave anycast and multicast groups on link down
+
+From: Mike Manning <mmanning@brocade.com>
+
+[ Upstream commit ea06f7176413e2538d13bb85b65387d0917943d9 ]
+
+Default kernel behavior is to delete IPv6 addresses on link
+down, which entails deletion of the multicast and the
+subnet-router anycast addresses. These deletions do not
+happen with sysctl setting to keep global IPv6 addresses on
+link down, so every link down/up causes an increment of the
+anycast and multicast refcounts. These bogus refcounts may
+stop these addrs from being removed on subsequent calls to
+delete them. The solution is to leave the groups for the
+multicast and subnet anycast on link down for the callflow
+when global IPv6 addresses are kept.
+
+Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
+Signed-off-by: Mike Manning <mmanning@brocade.com>
+Acked-by: David Ahern <dsa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3563,6 +3563,10 @@ restart:
+ if (state != INET6_IFADDR_STATE_DEAD) {
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
++ } else {
++ if (idev->cnf.forwarding)
++ addrconf_leave_anycast(ifa);
++ addrconf_leave_solict(ifa->idev, &ifa->addr);
+ }
+
+ write_lock_bh(&idev->lock);
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Sat, 23 Jul 2016 07:43:50 +0200
+Subject: net/irda: fix NULL pointer dereference on memory allocation failure
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+[ Upstream commit d3e6952cfb7ba5f4bfa29d4803ba91f96ce1204d ]
+
+I ran into this:
+
+ kasan: CONFIG_KASAN_INLINE enabled
+ kasan: GPF could be caused by NULL-ptr deref or user memory access
+ general protection fault: 0000 [#1] PREEMPT SMP KASAN
+ CPU: 2 PID: 2012 Comm: trinity-c3 Not tainted 4.7.0-rc7+ #19
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+ task: ffff8800b745f2c0 ti: ffff880111740000 task.ti: ffff880111740000
+ RIP: 0010:[<ffffffff82bbf066>] [<ffffffff82bbf066>] irttp_connect_request+0x36/0x710
+ RSP: 0018:ffff880111747bb8 EFLAGS: 00010286
+ RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000069dd8358
+ RDX: 0000000000000009 RSI: 0000000000000027 RDI: 0000000000000048
+ RBP: ffff880111747c00 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000069dd8358 R11: 1ffffffff0759723 R12: 0000000000000000
+ R13: ffff88011a7e4780 R14: 0000000000000027 R15: 0000000000000000
+ FS: 00007fc738404700(0000) GS:ffff88011af00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007fc737fdfb10 CR3: 0000000118087000 CR4: 00000000000006e0
+ Stack:
+ 0000000000000200 ffff880111747bd8 ffffffff810ee611 ffff880119f1f220
+ ffff880119f1f4f8 ffff880119f1f4f0 ffff88011a7e4780 ffff880119f1f232
+ ffff880119f1f220 ffff880111747d58 ffffffff82bca542 0000000000000000
+ Call Trace:
+ [<ffffffff82bca542>] irda_connect+0x562/0x1190
+ [<ffffffff825ae582>] SYSC_connect+0x202/0x2a0
+ [<ffffffff825b4489>] SyS_connect+0x9/0x10
+ [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
+ [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25
+ Code: 41 89 ca 48 89 e5 41 57 41 56 41 55 41 54 41 89 d7 53 48 89 fb 48 83 c7 48 48 89 fa 41 89 f6 48 c1 ea 03 48 83 ec 20 4c 8b 65 10 <0f> b6 04 02 84 c0 74 08 84 c0 0f 8e 4c 04 00 00 80 7b 48 00 74
+ RIP [<ffffffff82bbf066>] irttp_connect_request+0x36/0x710
+ RSP <ffff880111747bb8>
+ ---[ end trace 4cda2588bc055b30 ]---
+
+The problem is that irda_open_tsap() can fail and leave self->tsap = NULL,
+and then irttp_connect_request() almost immediately dereferences it.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/irda/af_irda.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *s
+ }
+
+ /* Check if we have opened a local TSAP */
+- if (!self->tsap)
+- irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (!self->tsap) {
++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++ if (err)
++ goto out;
++ }
+
+ /* Move to connecting socket, start sending Connect Requests */
+ sock->state = SS_CONNECTING;
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 5 Jul 2016 22:12:36 -0700
+Subject: ppp: defer netns reference release for ppp channel
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 205e1e255c479f3fd77446415706463b282f94e4 ]
+
+Matt reported that we have a NULL pointer dereference
+in ppp_pernet() from ppp_connect_channel(),
+i.e. pch->chan_net is NULL.
+
+This is due to that a parallel ppp_unregister_channel()
+could happen while we are in ppp_connect_channel(), during
+which pch->chan_net set to NULL. Since we need a reference
+to net per channel, it makes sense to sync the refcnt
+with the life time of the channel, therefore we should
+release this reference when we destroy it.
+
+Fixes: 1f461dcdd296 ("ppp: take reference on channels netns")
+Reported-by: Matt Bennett <Matt.Bennett@alliedtelesis.co.nz>
+Cc: Paul Mackerras <paulus@samba.org>
+Cc: linux-ppp@vger.kernel.org
+Cc: Guillaume Nault <g.nault@alphalink.fr>
+Cc: Cyrill Gorcunov <gorcunov@openvz.org>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -2404,8 +2404,6 @@ ppp_unregister_channel(struct ppp_channe
+ spin_lock_bh(&pn->all_channels_lock);
+ list_del(&pch->list);
+ spin_unlock_bh(&pn->all_channels_lock);
+- put_net(pch->chan_net);
+- pch->chan_net = NULL;
+
+ pch->file.dead = 1;
+ wake_up_interruptible(&pch->file.rwait);
+@@ -2999,6 +2997,9 @@ ppp_disconnect_channel(struct channel *p
+ */
+ static void ppp_destroy_channel(struct channel *pch)
+ {
++ put_net(pch->chan_net);
++ pch->chan_net = NULL;
++
+ atomic_dec(&channel_count);
+
+ if (!pch->file.dead) {
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Manish Chopra <manish.chopra@qlogic.com>
+Date: Mon, 25 Jul 2016 19:07:46 +0300
+Subject: qed: Fix setting/clearing bit in completion bitmap
+
+From: Manish Chopra <manish.chopra@qlogic.com>
+
+[ Upstream commit 59d3f1ceb69b54569685d0c34dff16a1e0816b19 ]
+
+Slowpath completion handling is incorrectly changing
+SPQ_RING_SIZE bits instead of a single one.
+
+Fixes: 76a9a3642a0b ("qed: fix handling of concurrent ramrods")
+Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
+Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_spq.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+@@ -791,13 +791,12 @@ int qed_spq_completion(struct qed_hwfn *
+ * in a bitmap and increasing the chain consumer only
+ * for the first successive completed entries.
+ */
+- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
++ __set_bit(pos, p_spq->p_comp_bitmap);
+
+ while (test_bit(p_spq->comp_bitmap_idx,
+ p_spq->p_comp_bitmap)) {
+- bitmap_clear(p_spq->p_comp_bitmap,
+- p_spq->comp_bitmap_idx,
+- SPQ_RING_SIZE);
++ __clear_bit(p_spq->comp_bitmap_idx,
++ p_spq->p_comp_bitmap);
+ p_spq->comp_bitmap_idx++;
+ qed_chain_return_produced(&p_spq->chain);
+ }
libnvdimm-dax-record-the-specified-alignment-of-a-dax-device-instance.patch
libnvdimm-pfn-dax-fix-initialization-vs-autodetect-for-mode-alignment.patch
+ppp-defer-netns-reference-release-for-ppp-channel.patch
+tcp-make-challenge-acks-less-predictable.patch
+tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch
+bonding-set-carrier-off-for-devices-created-through-netlink.patch
+net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch
+vlan-use-a-valid-default-mtu-value-for-vlan-over-macsec.patch
+bridge-fix-incorrect-re-injection-of-lldp-packets.patch
+net-ipv6-always-leave-anycast-and-multicast-groups-on-link-down.patch
+net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch
+qed-fix-setting-clearing-bit-in-completion-bitmap.patch
+macsec-ensure-rx_sa-is-set-when-validation-is-disabled.patch
+tcp-consider-recv-buf-for-the-initial-window-scale.patch
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Soheil Hassas Yeganeh <soheil@google.com>
+Date: Fri, 29 Jul 2016 09:34:02 -0400
+Subject: tcp: consider recv buf for the initial window scale
+
+From: Soheil Hassas Yeganeh <soheil@google.com>
+
+[ Upstream commit f626300a3e776ccc9671b0dd94698fb3aa315966 ]
+
+tcp_select_initial_window() intends to advertise a window
+scaling for the maximum possible window size. To do so,
+it considers the maximum of net.ipv4.tcp_rmem[2] and
+net.core.rmem_max as the only possible upper-bounds.
+However, users with CAP_NET_ADMIN can use SO_RCVBUFFORCE
+to set the socket's receive buffer size to values
+larger than net.ipv4.tcp_rmem[2] and net.core.rmem_max.
+Thus, SO_RCVBUFFORCE is effectively ignored by
+tcp_select_initial_window().
+
+To fix this, consider the maximum of net.ipv4.tcp_rmem[2],
+net.core.rmem_max and socket's initial buffer space.
+
+Fixes: b0573dea1fb3 ("[NET]: Introduce SO_{SND,RCV}BUFFORCE socket options")
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Suggested-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -236,7 +236,8 @@ void tcp_select_initial_window(int __spa
+ /* Set window scaling on max possible window
+ * See RFC1323 for an explanation of the limit to 14
+ */
+- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
++ space = max_t(u32, space, sysctl_tcp_rmem[2]);
++ space = max_t(u32, space, sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
+ while (space > 65535 && (*rcv_wscale) < 14) {
+ space >>= 1;
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Jason Baron <jbaron@akamai.com>
+Date: Thu, 14 Jul 2016 11:38:40 -0400
+Subject: tcp: enable per-socket rate limiting of all 'challenge acks'
+
+From: Jason Baron <jbaron@akamai.com>
+
+[ Upstream commit 083ae308280d13d187512b9babe3454342a7987e ]
+
+The per-socket rate limit for 'challenge acks' was introduced in the
+context of limiting ack loops:
+
+commit f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock")
+
+And I think it can be extended to rate limit all 'challenge acks' on a
+per-socket basis.
+
+Since we have the global tcp_challenge_ack_limit, this patch allows for
+tcp_challenge_ack_limit to be set to a large value and effectively rely on
+the per-socket limit, or set tcp_challenge_ack_limit to a lower value and
+still prevents a single connections from consuming the entire challenge ack
+quota.
+
+It further moves in the direction of eliminating the global limit at some
+point, as Eric Dumazet has suggested. This a follow-up to:
+Subject: tcp: make challenge acks less predictable
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Yue Cao <ycao009@ucr.edu>
+Signed-off-by: Jason Baron <jbaron@akamai.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 39 ++++++++++++++++++++++-----------------
+ 1 file changed, 22 insertions(+), 17 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3423,6 +3423,23 @@ static int tcp_ack_update_window(struct
+ return flag;
+ }
+
++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
++ u32 *last_oow_ack_time)
++{
++ if (*last_oow_ack_time) {
++ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++
++ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
++ NET_INC_STATS_BH(net, mib_idx);
++ return true; /* rate-limited: don't send yet! */
++ }
++ }
++
++ *last_oow_ack_time = tcp_time_stamp;
++
++ return false; /* not rate-limited: go ahead, send dupack now! */
++}
++
+ /* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+@@ -3436,21 +3453,9 @@ bool tcp_oow_rate_limited(struct net *ne
+ /* Data packets without SYNs are not likely part of an ACK loop. */
+ if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+ !tcp_hdr(skb)->syn)
+- goto not_rate_limited;
+-
+- if (*last_oow_ack_time) {
+- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++ return false;
+
+- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+- NET_INC_STATS_BH(net, mib_idx);
+- return true; /* rate-limited: don't send yet! */
+- }
+- }
+-
+- *last_oow_ack_time = tcp_time_stamp;
+-
+-not_rate_limited:
+- return false; /* not rate-limited: go ahead, send dupack now! */
++ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
+ }
+
+ /* RFC 5961 7 [ACK Throttling] */
+@@ -3463,9 +3468,9 @@ static void tcp_send_challenge_ack(struc
+ u32 count, now;
+
+ /* First check our per-socket dupack rate limit. */
+- if (tcp_oow_rate_limited(sock_net(sk), skb,
+- LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+- &tp->last_oow_ack_time))
++ if (__tcp_oow_rate_limited(sock_net(sk),
++ LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
++ &tp->last_oow_ack_time))
+ return;
+
+ /* Then check host-wide RFC 5961 rate limit. */
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 10 Jul 2016 10:04:02 +0200
+Subject: tcp: make challenge acks less predictable
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 ]
+
+Yue Cao claims that current host rate limiting of challenge ACKS
+(RFC 5961) could leak enough information to allow a patient attacker
+to hijack TCP sessions. He will soon provide details in an academic
+paper.
+
+This patch increases the default limit from 100 to 1000, and adds
+some randomization so that the attacker can no longer hijack
+sessions without spending a considerable amount of probes.
+
+Based on initial analysis and patch from Linus.
+
+Note that we also have per socket rate limiting, so it is tempting
+to remove the host limit in the future.
+
+v2: randomize the count of challenge acks per second, not the period.
+
+Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
+Reported-by: Yue Cao <ycao009@ucr.edu>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_most
+ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+
+ /* rfc5961 challenge ack rate limiting */
+-int sysctl_tcp_challenge_ack_limit = 100;
++int sysctl_tcp_challenge_ack_limit = 1000;
+
+ int sysctl_tcp_stdurg __read_mostly;
+ int sysctl_tcp_rfc1337 __read_mostly;
+@@ -3460,7 +3460,7 @@ static void tcp_send_challenge_ack(struc
+ static u32 challenge_timestamp;
+ static unsigned int challenge_count;
+ struct tcp_sock *tp = tcp_sk(sk);
+- u32 now;
++ u32 count, now;
+
+ /* First check our per-socket dupack rate limit. */
+ if (tcp_oow_rate_limited(sock_net(sk), skb,
+@@ -3468,13 +3468,18 @@ static void tcp_send_challenge_ack(struc
+ &tp->last_oow_ack_time))
+ return;
+
+- /* Then check the check host-wide RFC 5961 rate limit. */
++ /* Then check host-wide RFC 5961 rate limit. */
+ now = jiffies / HZ;
+ if (now != challenge_timestamp) {
++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
++
+ challenge_timestamp = now;
+- challenge_count = 0;
++ WRITE_ONCE(challenge_count, half +
++ prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ }
+- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
++ count = READ_ONCE(challenge_count);
++ if (count > 0) {
++ WRITE_ONCE(challenge_count, count - 1);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ tcp_send_ack(sk);
+ }
--- /dev/null
+From foo@baz Fri Aug 12 09:34:33 CEST 2016
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 14 Jul 2016 18:00:10 +0200
+Subject: vlan: use a valid default mtu value for vlan over macsec
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 18d3df3eab23796d7f852f9c6bb60962b8372ced ]
+
+macsec can't cope with mtu frames which need vlan tag insertion, and
+vlan device set the default mtu equal to the underlying dev's one.
+By default vlan over macsec devices use invalid mtu, dropping
+all the large packets.
+This patch adds a netif helper to check if an upper vlan device
+needs mtu reduction. The helper is used during vlan devices
+initialization to set a valid default and during mtu updating to
+forbid invalid, too bit, mtu values.
+The helper currently only check if the lower dev is a macsec device,
+if we get more users, we need to update only the helper (possibly
+reserving an additional IFF bit).
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h | 7 +++++++
+ net/8021q/vlan_dev.c | 10 ++++++----
+ net/8021q/vlan_netlink.c | 7 +++++--
+ 3 files changed, 18 insertions(+), 6 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4156,6 +4156,13 @@ static inline void netif_keep_dst(struct
+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+ }
+
++/* return true if dev can't cope with mtu frames that need vlan tag insertion */
++static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
++{
++ /* TODO: reserve and use an additional IFF bit, if we get more users */
++ return dev->priv_flags & IFF_MACSEC;
++}
++
+ extern struct pernet_operations __net_initdata loopback_net_ops;
+
+ /* Logging, debugging and troubleshooting/diagnostic helpers. */
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_x
+
+ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
+ {
+- /* TODO: gotta make sure the underlying layer can handle it,
+- * maybe an IFF_VLAN_CAPABLE flag for devices?
+- */
+- if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
++ struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
++ unsigned int max_mtu = real_dev->mtu;
++
++ if (netif_reduces_vlan_mtu(real_dev))
++ max_mtu -= VLAN_HLEN;
++ if (max_mtu < new_mtu)
+ return -ERANGE;
+
+ dev->mtu = new_mtu;
+--- a/net/8021q/vlan_netlink.c
++++ b/net/8021q/vlan_netlink.c
+@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_
+ {
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct net_device *real_dev;
++ unsigned int max_mtu;
+ __be16 proto;
+ int err;
+
+@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_
+ if (err < 0)
+ return err;
+
++ max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN :
++ real_dev->mtu;
+ if (!tb[IFLA_MTU])
+- dev->mtu = real_dev->mtu;
+- else if (dev->mtu > real_dev->mtu)
++ dev->mtu = max_mtu;
++ else if (dev->mtu > max_mtu)
+ return -EINVAL;
+
+ err = vlan_changelink(dev, tb, data);