]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Aug 2016 07:35:01 +0000 (09:35 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 12 Aug 2016 07:35:01 +0000 (09:35 +0200)
added patches:
bonding-set-carrier-off-for-devices-created-through-netlink.patch
ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch
net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch
net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch
qed-fix-setting-clearing-bit-in-completion-bitmap.patch
tcp-consider-recv-buf-for-the-initial-window-scale.patch
tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch
tcp-make-challenge-acks-less-predictable.patch

queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch [new file with mode: 0644]
queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch [new file with mode: 0644]
queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch [new file with mode: 0644]
queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch [new file with mode: 0644]
queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch [new file with mode: 0644]
queue-4.4/series [new file with mode: 0644]
queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch [new file with mode: 0644]
queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch [new file with mode: 0644]
queue-4.4/tcp-make-challenge-acks-less-predictable.patch [new file with mode: 0644]

diff --git a/queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch b/queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch
new file mode 100644 (file)
index 0000000..bfb62fa
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Beniamino Galvani <bgalvani@redhat.com>
+Date: Wed, 13 Jul 2016 18:25:08 +0200
+Subject: bonding: set carrier off for devices created through netlink
+
+From: Beniamino Galvani <bgalvani@redhat.com>
+
+[ Upstream commit 005db31d5f5f7c31cfdc43505d77eb3ca5cf8ec6 ]
+
+Commit e826eafa65c6 ("bonding: Call netif_carrier_off after
+register_netdevice") moved netif_carrier_off() from bond_init() to
+bond_create(), but the latter is called only for initial default
+devices and ones created through sysfs:
+
+ $ modprobe bonding
+ $ echo +bond1 > /sys/class/net/bonding_masters
+ $ ip link add bond2 type bond
+ $ grep "MII Status" /proc/net/bonding/*
+ /proc/net/bonding/bond0:MII Status: down
+ /proc/net/bonding/bond1:MII Status: down
+ /proc/net/bonding/bond2:MII Status: up
+
+Ensure that carrier is initially off also for devices created through
+netlink.
+
+Signed-off-by: Beniamino Galvani <bgalvani@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_netlink.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_netlink.c
++++ b/drivers/net/bonding/bond_netlink.c
+@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_
+       if (err < 0)
+               return err;
+-      return register_netdevice(bond_dev);
++      err = register_netdevice(bond_dev);
++
++      netif_carrier_off(bond_dev);
++
++      return err;
+ }
+ static size_t bond_get_size(const struct net_device *bond_dev)
diff --git a/queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch b/queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch
new file mode 100644 (file)
index 0000000..0ae43f5
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Julian Anastasov <ja@ssi.bg>
+Date: Sun, 10 Jul 2016 21:11:55 +0300
+Subject: ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space
+
+From: Julian Anastasov <ja@ssi.bg>
+
+[ Upstream commit 80610229ef7b26615dbb6cb6e873709a60bacc9f ]
+
+Vegard Nossum is reporting for a crash in fib_dump_info
+when nh_dev = NULL and fib_nhs == 1:
+
+Pid: 50, comm: netlink.exe Not tainted 4.7.0-rc5+
+RIP: 0033:[<00000000602b3d18>]
+RSP: 0000000062623890  EFLAGS: 00010202
+RAX: 0000000000000000 RBX: 000000006261b800 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000024 RDI: 000000006245ba00
+RBP: 00000000626238f0 R08: 000000000000029c R09: 0000000000000000
+R10: 0000000062468038 R11: 000000006245ba00 R12: 000000006245ba00
+R13: 00000000625f96c0 R14: 00000000601e16f0 R15: 0000000000000000
+Kernel panic - not syncing: Kernel mode fault at addr 0x2e0, ip 0x602b3d18
+CPU: 0 PID: 50 Comm: netlink.exe Not tainted 4.7.0-rc5+ #581
+Stack:
+ 626238f0 960226a02 00000400 000000fe
+ 62623910 600afca7 62623970 62623a48
+ 62468038 00000018 00000000 00000000
+Call Trace:
+ [<602b3e93>] rtmsg_fib+0xd3/0x190
+ [<602b6680>] fib_table_insert+0x260/0x500
+ [<602b0e5d>] inet_rtm_newroute+0x4d/0x60
+ [<60250def>] rtnetlink_rcv_msg+0x8f/0x270
+ [<60267079>] netlink_rcv_skb+0xc9/0xe0
+ [<60250d4b>] rtnetlink_rcv+0x3b/0x50
+ [<60265400>] netlink_unicast+0x1a0/0x2c0
+ [<60265e47>] netlink_sendmsg+0x3f7/0x470
+ [<6021dc9a>] sock_sendmsg+0x3a/0x90
+ [<6021e0d0>] ___sys_sendmsg+0x300/0x360
+ [<6021fa64>] __sys_sendmsg+0x54/0xa0
+ [<6021fac0>] SyS_sendmsg+0x10/0x20
+ [<6001ea68>] handle_syscall+0x88/0x90
+ [<600295fd>] userspace+0x3fd/0x500
+ [<6001ac55>] fork_handler+0x85/0x90
+
+$ addr2line -e vmlinux -i 0x602b3d18
+include/linux/inetdevice.h:222
+net/ipv4/fib_semantics.c:1264
+
+Problem happens when RTNH_F_LINKDOWN is provided from user space
+when creating routes that do not use the flag, catched with
+netlink fuzzer.
+
+Currently, the kernel allows user space to set both flags
+to nh_flags and fib_flags but this is not intentional, the
+assumption was that they are not set. Fix this by rejecting
+both flags with EINVAL.
+
+Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
+Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down")
+Signed-off-by: Julian Anastasov <ja@ssi.bg>
+Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
+Cc: Dinesh Dutt <ddutt@cumulusnetworks.com>
+Cc: Scott Feldman <sfeldma@gmail.com>
+Reviewed-by: Andy Gospodarek <gospo@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *
+               if (!rtnh_ok(rtnh, remaining))
+                       return -EINVAL;
++              if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
++                      return -EINVAL;
++
+               nexthop_nh->nh_flags =
+                       (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
+               nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
+@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct
+       if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+               goto err_inval;
++      if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
++              goto err_inval;
++
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+       if (cfg->fc_mp) {
+               nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
diff --git a/queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch b/queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch
new file mode 100644 (file)
index 0000000..cbc335d
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 15 Jul 2016 15:42:52 -0700
+Subject: net: bgmac: Fix infinite loop in bgmac_dma_tx_add()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit e86663c475d384ab5f46cb5637e9b7ad08c5c505 ]
+
+Nothing is decrementing the index "i" while we are cleaning up the
+fragments we could not successful transmit.
+
+Fixes: 9cde94506eacf ("bgmac: implement scatter/gather support")
+Reported-by: coverity (CID 1352048)
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -219,7 +219,7 @@ err_dma:
+       dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
+                        DMA_TO_DEVICE);
+-      while (i > 0) {
++      while (i-- > 0) {
+               int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
+               struct bgmac_slot_info *slot = &ring->slots[index];
+               u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
diff --git a/queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch b/queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch
new file mode 100644 (file)
index 0000000..61601ba
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Vegard Nossum <vegard.nossum@oracle.com>
+Date: Sat, 23 Jul 2016 07:43:50 +0200
+Subject: net/irda: fix NULL pointer dereference on memory allocation failure
+
+From: Vegard Nossum <vegard.nossum@oracle.com>
+
+[ Upstream commit d3e6952cfb7ba5f4bfa29d4803ba91f96ce1204d ]
+
+I ran into this:
+
+    kasan: CONFIG_KASAN_INLINE enabled
+    kasan: GPF could be caused by NULL-ptr deref or user memory access
+    general protection fault: 0000 [#1] PREEMPT SMP KASAN
+    CPU: 2 PID: 2012 Comm: trinity-c3 Not tainted 4.7.0-rc7+ #19
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
+    task: ffff8800b745f2c0 ti: ffff880111740000 task.ti: ffff880111740000
+    RIP: 0010:[<ffffffff82bbf066>]  [<ffffffff82bbf066>] irttp_connect_request+0x36/0x710
+    RSP: 0018:ffff880111747bb8  EFLAGS: 00010286
+    RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000069dd8358
+    RDX: 0000000000000009 RSI: 0000000000000027 RDI: 0000000000000048
+    RBP: ffff880111747c00 R08: 0000000000000000 R09: 0000000000000000
+    R10: 0000000069dd8358 R11: 1ffffffff0759723 R12: 0000000000000000
+    R13: ffff88011a7e4780 R14: 0000000000000027 R15: 0000000000000000
+    FS:  00007fc738404700(0000) GS:ffff88011af00000(0000) knlGS:0000000000000000
+    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+    CR2: 00007fc737fdfb10 CR3: 0000000118087000 CR4: 00000000000006e0
+    Stack:
+     0000000000000200 ffff880111747bd8 ffffffff810ee611 ffff880119f1f220
+     ffff880119f1f4f8 ffff880119f1f4f0 ffff88011a7e4780 ffff880119f1f232
+     ffff880119f1f220 ffff880111747d58 ffffffff82bca542 0000000000000000
+    Call Trace:
+     [<ffffffff82bca542>] irda_connect+0x562/0x1190
+     [<ffffffff825ae582>] SYSC_connect+0x202/0x2a0
+     [<ffffffff825b4489>] SyS_connect+0x9/0x10
+     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
+     [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25
+    Code: 41 89 ca 48 89 e5 41 57 41 56 41 55 41 54 41 89 d7 53 48 89 fb 48 83 c7 48 48 89 fa 41 89 f6 48 c1 ea 03 48 83 ec 20 4c 8b 65 10 <0f> b6 04 02 84 c0 74 08 84 c0 0f 8e 4c 04 00 00 80 7b 48 00 74
+    RIP  [<ffffffff82bbf066>] irttp_connect_request+0x36/0x710
+     RSP <ffff880111747bb8>
+    ---[ end trace 4cda2588bc055b30 ]---
+
+The problem is that irda_open_tsap() can fail and leave self->tsap = NULL,
+and then irttp_connect_request() almost immediately dereferences it.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/irda/af_irda.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *s
+       }
+       /* Check if we have opened a local TSAP */
+-      if (!self->tsap)
+-              irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++      if (!self->tsap) {
++              err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
++              if (err)
++                      goto out;
++      }
+       /* Move to connecting socket, start sending Connect Requests */
+       sock->state = SS_CONNECTING;
diff --git a/queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch b/queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch
new file mode 100644 (file)
index 0000000..2e73c3a
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Manish Chopra <manish.chopra@qlogic.com>
+Date: Mon, 25 Jul 2016 19:07:46 +0300
+Subject: qed: Fix setting/clearing bit in completion bitmap
+
+From: Manish Chopra <manish.chopra@qlogic.com>
+
+[ Upstream commit 59d3f1ceb69b54569685d0c34dff16a1e0816b19 ]
+
+Slowpath completion handling is incorrectly changing
+SPQ_RING_SIZE bits instead of a single one.
+
+Fixes: 76a9a3642a0b ("qed: fix handling of concurrent ramrods")
+Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
+Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_spq.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
+@@ -794,13 +794,12 @@ int qed_spq_completion(struct qed_hwfn *
+                        * in a bitmap and increasing the chain consumer only
+                        * for the first successive completed entries.
+                        */
+-                      bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
++                      __set_bit(pos, p_spq->p_comp_bitmap);
+                       while (test_bit(p_spq->comp_bitmap_idx,
+                                       p_spq->p_comp_bitmap)) {
+-                              bitmap_clear(p_spq->p_comp_bitmap,
+-                                           p_spq->comp_bitmap_idx,
+-                                           SPQ_RING_SIZE);
++                              __clear_bit(p_spq->comp_bitmap_idx,
++                                          p_spq->p_comp_bitmap);
+                               p_spq->comp_bitmap_idx++;
+                               qed_chain_return_produced(&p_spq->chain);
+                       }
diff --git a/queue-4.4/series b/queue-4.4/series
new file mode 100644 (file)
index 0000000..8de901e
--- /dev/null
@@ -0,0 +1,8 @@
+tcp-make-challenge-acks-less-predictable.patch
+tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch
+ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch
+bonding-set-carrier-off-for-devices-created-through-netlink.patch
+net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch
+net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch
+qed-fix-setting-clearing-bit-in-completion-bitmap.patch
+tcp-consider-recv-buf-for-the-initial-window-scale.patch
diff --git a/queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch b/queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch
new file mode 100644 (file)
index 0000000..6c5ad5d
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Soheil Hassas Yeganeh <soheil@google.com>
+Date: Fri, 29 Jul 2016 09:34:02 -0400
+Subject: tcp: consider recv buf for the initial window scale
+
+From: Soheil Hassas Yeganeh <soheil@google.com>
+
+[ Upstream commit f626300a3e776ccc9671b0dd94698fb3aa315966 ]
+
+tcp_select_initial_window() intends to advertise a window
+scaling for the maximum possible window size. To do so,
+it considers the maximum of net.ipv4.tcp_rmem[2] and
+net.core.rmem_max as the only possible upper-bounds.
+However, users with CAP_NET_ADMIN can use SO_RCVBUFFORCE
+to set the socket's receive buffer size to values
+larger than net.ipv4.tcp_rmem[2] and net.core.rmem_max.
+Thus, SO_RCVBUFFORCE is effectively ignored by
+tcp_select_initial_window().
+
+To fix this, consider the maximum of net.ipv4.tcp_rmem[2],
+net.core.rmem_max and socket's initial buffer space.
+
+Fixes: b0573dea1fb3 ("[NET]: Introduce SO_{SND,RCV}BUFFORCE socket options")
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Suggested-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -239,7 +239,8 @@ void tcp_select_initial_window(int __spa
+               /* Set window scaling on max possible window
+                * See RFC1323 for an explanation of the limit to 14
+                */
+-              space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
++              space = max_t(u32, space, sysctl_tcp_rmem[2]);
++              space = max_t(u32, space, sysctl_rmem_max);
+               space = min_t(u32, space, *window_clamp);
+               while (space > 65535 && (*rcv_wscale) < 14) {
+                       space >>= 1;
diff --git a/queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch b/queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch
new file mode 100644 (file)
index 0000000..65302c4
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Jason Baron <jbaron@akamai.com>
+Date: Thu, 14 Jul 2016 11:38:40 -0400
+Subject: tcp: enable per-socket rate limiting of all 'challenge acks'
+
+From: Jason Baron <jbaron@akamai.com>
+
+[ Upstream commit 083ae308280d13d187512b9babe3454342a7987e ]
+
+The per-socket rate limit for 'challenge acks' was introduced in the
+context of limiting ack loops:
+
+commit f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock")
+
+And I think it can be extended to rate limit all 'challenge acks' on a
+per-socket basis.
+
+Since we have the global tcp_challenge_ack_limit, this patch allows for
+tcp_challenge_ack_limit to be set to a large value and effectively rely on
+the per-socket limit, or set tcp_challenge_ack_limit to a lower value and
+still prevents a single connections from consuming the entire challenge ack
+quota.
+
+It further moves in the direction of eliminating the global limit at some
+point, as Eric Dumazet has suggested. This a follow-up to:
+Subject: tcp: make challenge acks less predictable
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Yue Cao <ycao009@ucr.edu>
+Signed-off-by: Jason Baron <jbaron@akamai.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   39 ++++++++++++++++++++++-----------------
+ 1 file changed, 22 insertions(+), 17 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3390,6 +3390,23 @@ static int tcp_ack_update_window(struct
+       return flag;
+ }
++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
++                                 u32 *last_oow_ack_time)
++{
++      if (*last_oow_ack_time) {
++              s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++
++              if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
++                      NET_INC_STATS_BH(net, mib_idx);
++                      return true;    /* rate-limited: don't send yet! */
++              }
++      }
++
++      *last_oow_ack_time = tcp_time_stamp;
++
++      return false;   /* not rate-limited: go ahead, send dupack now! */
++}
++
+ /* Return true if we're currently rate-limiting out-of-window ACKs and
+  * thus shouldn't send a dupack right now. We rate-limit dupacks in
+  * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+@@ -3403,21 +3420,9 @@ bool tcp_oow_rate_limited(struct net *ne
+       /* Data packets without SYNs are not likely part of an ACK loop. */
+       if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+           !tcp_hdr(skb)->syn)
+-              goto not_rate_limited;
+-
+-      if (*last_oow_ack_time) {
+-              s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
++              return false;
+-              if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+-                      NET_INC_STATS_BH(net, mib_idx);
+-                      return true;    /* rate-limited: don't send yet! */
+-              }
+-      }
+-
+-      *last_oow_ack_time = tcp_time_stamp;
+-
+-not_rate_limited:
+-      return false;   /* not rate-limited: go ahead, send dupack now! */
++      return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
+ }
+ /* RFC 5961 7 [ACK Throttling] */
+@@ -3430,9 +3435,9 @@ static void tcp_send_challenge_ack(struc
+       u32 count, now;
+       /* First check our per-socket dupack rate limit. */
+-      if (tcp_oow_rate_limited(sock_net(sk), skb,
+-                               LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+-                               &tp->last_oow_ack_time))
++      if (__tcp_oow_rate_limited(sock_net(sk),
++                                 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
++                                 &tp->last_oow_ack_time))
+               return;
+       /* Then check host-wide RFC 5961 rate limit. */
diff --git a/queue-4.4/tcp-make-challenge-acks-less-predictable.patch b/queue-4.4/tcp-make-challenge-acks-less-predictable.patch
new file mode 100644 (file)
index 0000000..f56f20f
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Fri Aug 12 09:33:59 CEST 2016
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 10 Jul 2016 10:04:02 +0200
+Subject: tcp: make challenge acks less predictable
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 ]
+
+Yue Cao claims that current host rate limiting of challenge ACKS
+(RFC 5961) could leak enough information to allow a patient attacker
+to hijack TCP sessions. He will soon provide details in an academic
+paper.
+
+This patch increases the default limit from 100 to 1000, and adds
+some randomization so that the attacker can no longer hijack
+sessions without spending a considerable amount of probes.
+
+Based on initial analysis and patch from Linus.
+
+Note that we also have per socket rate limiting, so it is tempting
+to remove the host limit in the future.
+
+v2: randomize the count of challenge acks per second, not the period.
+
+Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
+Reported-by: Yue Cao <ycao009@ucr.edu>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Yuchung Cheng <ycheng@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_most
+ EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+ /* rfc5961 challenge ack rate limiting */
+-int sysctl_tcp_challenge_ack_limit = 100;
++int sysctl_tcp_challenge_ack_limit = 1000;
+ int sysctl_tcp_stdurg __read_mostly;
+ int sysctl_tcp_rfc1337 __read_mostly;
+@@ -3427,7 +3427,7 @@ static void tcp_send_challenge_ack(struc
+       static u32 challenge_timestamp;
+       static unsigned int challenge_count;
+       struct tcp_sock *tp = tcp_sk(sk);
+-      u32 now;
++      u32 count, now;
+       /* First check our per-socket dupack rate limit. */
+       if (tcp_oow_rate_limited(sock_net(sk), skb,
+@@ -3435,13 +3435,18 @@ static void tcp_send_challenge_ack(struc
+                                &tp->last_oow_ack_time))
+               return;
+-      /* Then check the check host-wide RFC 5961 rate limit. */
++      /* Then check host-wide RFC 5961 rate limit. */
+       now = jiffies / HZ;
+       if (now != challenge_timestamp) {
++              u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
++
+               challenge_timestamp = now;
+-              challenge_count = 0;
++              WRITE_ONCE(challenge_count, half +
++                         prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+       }
+-      if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
++      count = READ_ONCE(challenge_count);
++      if (count > 0) {
++              WRITE_ONCE(challenge_count, count - 1);
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+               tcp_send_ack(sk);
+       }