From: Greg Kroah-Hartman Date: Fri, 12 Aug 2016 07:35:01 +0000 (+0200) Subject: 4.4-stable patches X-Git-Tag: v3.14.76~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cea89570ca086430fdd3a52a3b7a011a661b7dbf;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: bonding-set-carrier-off-for-devices-created-through-netlink.patch ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch qed-fix-setting-clearing-bit-in-completion-bitmap.patch tcp-consider-recv-buf-for-the-initial-window-scale.patch tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch tcp-make-challenge-acks-less-predictable.patch --- diff --git a/queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch b/queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch new file mode 100644 index 00000000000..bfb62fa96c2 --- /dev/null +++ b/queue-4.4/bonding-set-carrier-off-for-devices-created-through-netlink.patch @@ -0,0 +1,47 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Beniamino Galvani +Date: Wed, 13 Jul 2016 18:25:08 +0200 +Subject: bonding: set carrier off for devices created through netlink + +From: Beniamino Galvani + +[ Upstream commit 005db31d5f5f7c31cfdc43505d77eb3ca5cf8ec6 ] + +Commit e826eafa65c6 ("bonding: Call netif_carrier_off after +register_netdevice") moved netif_carrier_off() from bond_init() to +bond_create(), but the latter is called only for initial default +devices and ones created through sysfs: + + $ modprobe bonding + $ echo +bond1 > /sys/class/net/bonding_masters + $ ip link add bond2 type bond + $ grep "MII Status" /proc/net/bonding/* + /proc/net/bonding/bond0:MII Status: down + /proc/net/bonding/bond1:MII Status: down + /proc/net/bonding/bond2:MII Status: up + +Ensure that carrier is initially off also for devices created through +netlink. + +Signed-off-by: Beniamino Galvani +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_netlink.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_ + if (err < 0) + return err; + +- return register_netdevice(bond_dev); ++ err = register_netdevice(bond_dev); ++ ++ netif_carrier_off(bond_dev); ++ ++ return err; + } + + static size_t bond_get_size(const struct net_device *bond_dev) diff --git a/queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch b/queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch new file mode 100644 index 00000000000..0ae43f524be --- /dev/null +++ b/queue-4.4/ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch @@ -0,0 +1,91 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Julian Anastasov +Date: Sun, 10 Jul 2016 21:11:55 +0300 +Subject: ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space + +From: Julian Anastasov + +[ Upstream commit 80610229ef7b26615dbb6cb6e873709a60bacc9f ] + +Vegard Nossum is reporting for a crash in fib_dump_info +when nh_dev = NULL and fib_nhs == 1: + +Pid: 50, comm: netlink.exe Not tainted 4.7.0-rc5+ +RIP: 0033:[<00000000602b3d18>] +RSP: 0000000062623890 EFLAGS: 00010202 +RAX: 0000000000000000 RBX: 000000006261b800 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 0000000000000024 RDI: 000000006245ba00 +RBP: 00000000626238f0 R08: 000000000000029c R09: 0000000000000000 +R10: 0000000062468038 R11: 000000006245ba00 R12: 000000006245ba00 +R13: 00000000625f96c0 R14: 00000000601e16f0 R15: 0000000000000000 +Kernel panic - not syncing: Kernel mode fault at addr 0x2e0, ip 0x602b3d18 +CPU: 0 PID: 50 Comm: netlink.exe Not tainted 4.7.0-rc5+ #581 +Stack: + 626238f0 960226a02 00000400 000000fe + 62623910 600afca7 62623970 62623a48 + 62468038 00000018 00000000 00000000 +Call Trace: + [<602b3e93>] rtmsg_fib+0xd3/0x190 + [<602b6680>] fib_table_insert+0x260/0x500 + [<602b0e5d>] inet_rtm_newroute+0x4d/0x60 + [<60250def>] rtnetlink_rcv_msg+0x8f/0x270 + [<60267079>] netlink_rcv_skb+0xc9/0xe0 + [<60250d4b>] rtnetlink_rcv+0x3b/0x50 + [<60265400>] netlink_unicast+0x1a0/0x2c0 + [<60265e47>] netlink_sendmsg+0x3f7/0x470 + [<6021dc9a>] sock_sendmsg+0x3a/0x90 + [<6021e0d0>] ___sys_sendmsg+0x300/0x360 + [<6021fa64>] __sys_sendmsg+0x54/0xa0 + [<6021fac0>] SyS_sendmsg+0x10/0x20 + [<6001ea68>] handle_syscall+0x88/0x90 + [<600295fd>] userspace+0x3fd/0x500 + [<6001ac55>] fork_handler+0x85/0x90 + +$ addr2line -e vmlinux -i 0x602b3d18 +include/linux/inetdevice.h:222 +net/ipv4/fib_semantics.c:1264 + +Problem happens when RTNH_F_LINKDOWN is provided from user space +when creating routes that do not use the flag, catched with +netlink fuzzer. + +Currently, the kernel allows user space to set both flags +to nh_flags and fib_flags but this is not intentional, the +assumption was that they are not set. Fix this by rejecting +both flags with EINVAL. + +Reported-by: Vegard Nossum +Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down") +Signed-off-by: Julian Anastasov +Cc: Andy Gospodarek +Cc: Dinesh Dutt +Cc: Scott Feldman +Reviewed-by: Andy Gospodarek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info * + if (!rtnh_ok(rtnh, remaining)) + return -EINVAL; + ++ if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) ++ return -EINVAL; ++ + nexthop_nh->nh_flags = + (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nexthop_nh->nh_oif = rtnh->rtnh_ifindex; +@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) + goto err_inval; + ++ if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) ++ goto err_inval; ++ + #ifdef CONFIG_IP_ROUTE_MULTIPATH + if (cfg->fc_mp) { + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); diff --git a/queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch b/queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch new file mode 100644 index 00000000000..cbc335d9379 --- /dev/null +++ b/queue-4.4/net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch @@ -0,0 +1,32 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Florian Fainelli +Date: Fri, 15 Jul 2016 15:42:52 -0700 +Subject: net: bgmac: Fix infinite loop in bgmac_dma_tx_add() + +From: Florian Fainelli + +[ Upstream commit e86663c475d384ab5f46cb5637e9b7ad08c5c505 ] + +Nothing is decrementing the index "i" while we are cleaning up the +fragments we could not successful transmit. + +Fixes: 9cde94506eacf ("bgmac: implement scatter/gather support") +Reported-by: coverity (CID 1352048) +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bgmac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -219,7 +219,7 @@ err_dma: + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb), + DMA_TO_DEVICE); + +- while (i > 0) { ++ while (i-- > 0) { + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS; + struct bgmac_slot_info *slot = &ring->slots[index]; + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1); diff --git a/queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch b/queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch new file mode 100644 index 00000000000..61601bab779 --- /dev/null +++ b/queue-4.4/net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch @@ -0,0 +1,69 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Vegard Nossum +Date: Sat, 23 Jul 2016 07:43:50 +0200 +Subject: net/irda: fix NULL pointer dereference on memory allocation failure + +From: Vegard Nossum + +[ Upstream commit d3e6952cfb7ba5f4bfa29d4803ba91f96ce1204d ] + +I ran into this: + + kasan: CONFIG_KASAN_INLINE enabled + kasan: GPF could be caused by NULL-ptr deref or user memory access + general protection fault: 0000 [#1] PREEMPT SMP KASAN + CPU: 2 PID: 2012 Comm: trinity-c3 Not tainted 4.7.0-rc7+ #19 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 + task: ffff8800b745f2c0 ti: ffff880111740000 task.ti: ffff880111740000 + RIP: 0010:[] [] irttp_connect_request+0x36/0x710 + RSP: 0018:ffff880111747bb8 EFLAGS: 00010286 + RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000069dd8358 + RDX: 0000000000000009 RSI: 0000000000000027 RDI: 0000000000000048 + RBP: ffff880111747c00 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000069dd8358 R11: 1ffffffff0759723 R12: 0000000000000000 + R13: ffff88011a7e4780 R14: 0000000000000027 R15: 0000000000000000 + FS: 00007fc738404700(0000) GS:ffff88011af00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fc737fdfb10 CR3: 0000000118087000 CR4: 00000000000006e0 + Stack: + 0000000000000200 ffff880111747bd8 ffffffff810ee611 ffff880119f1f220 + ffff880119f1f4f8 ffff880119f1f4f0 ffff88011a7e4780 ffff880119f1f232 + ffff880119f1f220 ffff880111747d58 ffffffff82bca542 0000000000000000 + Call Trace: + [] irda_connect+0x562/0x1190 + [] SYSC_connect+0x202/0x2a0 + [] SyS_connect+0x9/0x10 + [] do_syscall_64+0x19c/0x410 + [] entry_SYSCALL64_slow_path+0x25/0x25 + Code: 41 89 ca 48 89 e5 41 57 41 56 41 55 41 54 41 89 d7 53 48 89 fb 48 83 c7 48 48 89 fa 41 89 f6 48 c1 ea 03 48 83 ec 20 4c 8b 65 10 <0f> b6 04 02 84 c0 74 08 84 c0 0f 8e 4c 04 00 00 80 7b 48 00 74 + RIP [] irttp_connect_request+0x36/0x710 + RSP + ---[ end trace 4cda2588bc055b30 ]--- + +The problem is that irda_open_tsap() can fail and leave self->tsap = NULL, +and then irttp_connect_request() almost immediately dereferences it. + +Cc: stable@vger.kernel.org +Signed-off-by: Vegard Nossum +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/af_irda.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *s + } + + /* Check if we have opened a local TSAP */ +- if (!self->tsap) +- irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (!self->tsap) { ++ err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); ++ if (err) ++ goto out; ++ } + + /* Move to connecting socket, start sending Connect Requests */ + sock->state = SS_CONNECTING; diff --git a/queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch b/queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch new file mode 100644 index 00000000000..2e73c3a0730 --- /dev/null +++ b/queue-4.4/qed-fix-setting-clearing-bit-in-completion-bitmap.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Manish Chopra +Date: Mon, 25 Jul 2016 19:07:46 +0300 +Subject: qed: Fix setting/clearing bit in completion bitmap + +From: Manish Chopra + +[ Upstream commit 59d3f1ceb69b54569685d0c34dff16a1e0816b19 ] + +Slowpath completion handling is incorrectly changing +SPQ_RING_SIZE bits instead of a single one. + +Fixes: 76a9a3642a0b ("qed: fix handling of concurrent ramrods") +Signed-off-by: Manish Chopra +Signed-off-by: Yuval Mintz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_spq.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c +@@ -794,13 +794,12 @@ int qed_spq_completion(struct qed_hwfn * + * in a bitmap and increasing the chain consumer only + * for the first successive completed entries. + */ +- bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE); ++ __set_bit(pos, p_spq->p_comp_bitmap); + + while (test_bit(p_spq->comp_bitmap_idx, + p_spq->p_comp_bitmap)) { +- bitmap_clear(p_spq->p_comp_bitmap, +- p_spq->comp_bitmap_idx, +- SPQ_RING_SIZE); ++ __clear_bit(p_spq->comp_bitmap_idx, ++ p_spq->p_comp_bitmap); + p_spq->comp_bitmap_idx++; + qed_chain_return_produced(&p_spq->chain); + } diff --git a/queue-4.4/series b/queue-4.4/series new file mode 100644 index 00000000000..8de901e68f1 --- /dev/null +++ b/queue-4.4/series @@ -0,0 +1,8 @@ +tcp-make-challenge-acks-less-predictable.patch +tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch +ipv4-reject-rtnh_f_dead-and-rtnh_f_linkdown-from-user-space.patch +bonding-set-carrier-off-for-devices-created-through-netlink.patch +net-bgmac-fix-infinite-loop-in-bgmac_dma_tx_add.patch +net-irda-fix-null-pointer-dereference-on-memory-allocation-failure.patch +qed-fix-setting-clearing-bit-in-completion-bitmap.patch +tcp-consider-recv-buf-for-the-initial-window-scale.patch diff --git a/queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch b/queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch new file mode 100644 index 00000000000..6c5ad5d79fe --- /dev/null +++ b/queue-4.4/tcp-consider-recv-buf-for-the-initial-window-scale.patch @@ -0,0 +1,44 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Soheil Hassas Yeganeh +Date: Fri, 29 Jul 2016 09:34:02 -0400 +Subject: tcp: consider recv buf for the initial window scale + +From: Soheil Hassas Yeganeh + +[ Upstream commit f626300a3e776ccc9671b0dd94698fb3aa315966 ] + +tcp_select_initial_window() intends to advertise a window +scaling for the maximum possible window size. To do so, +it considers the maximum of net.ipv4.tcp_rmem[2] and +net.core.rmem_max as the only possible upper-bounds. +However, users with CAP_NET_ADMIN can use SO_RCVBUFFORCE +to set the socket's receive buffer size to values +larger than net.ipv4.tcp_rmem[2] and net.core.rmem_max. +Thus, SO_RCVBUFFORCE is effectively ignored by +tcp_select_initial_window(). + +To fix this, consider the maximum of net.ipv4.tcp_rmem[2], +net.core.rmem_max and socket's initial buffer space. + +Fixes: b0573dea1fb3 ("[NET]: Introduce SO_{SND,RCV}BUFFORCE socket options") +Signed-off-by: Soheil Hassas Yeganeh +Suggested-by: Neal Cardwell +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -239,7 +239,8 @@ void tcp_select_initial_window(int __spa + /* Set window scaling on max possible window + * See RFC1323 for an explanation of the limit to 14 + */ +- space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); ++ space = max_t(u32, space, sysctl_tcp_rmem[2]); ++ space = max_t(u32, space, sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); + while (space > 65535 && (*rcv_wscale) < 14) { + space >>= 1; diff --git a/queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch b/queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch new file mode 100644 index 00000000000..65302c45346 --- /dev/null +++ b/queue-4.4/tcp-enable-per-socket-rate-limiting-of-all-challenge-acks.patch @@ -0,0 +1,102 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Jason Baron +Date: Thu, 14 Jul 2016 11:38:40 -0400 +Subject: tcp: enable per-socket rate limiting of all 'challenge acks' + +From: Jason Baron + +[ Upstream commit 083ae308280d13d187512b9babe3454342a7987e ] + +The per-socket rate limit for 'challenge acks' was introduced in the +context of limiting ack loops: + +commit f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock") + +And I think it can be extended to rate limit all 'challenge acks' on a +per-socket basis. + +Since we have the global tcp_challenge_ack_limit, this patch allows for +tcp_challenge_ack_limit to be set to a large value and effectively rely on +the per-socket limit, or set tcp_challenge_ack_limit to a lower value and +still prevents a single connections from consuming the entire challenge ack +quota. + +It further moves in the direction of eliminating the global limit at some +point, as Eric Dumazet has suggested. This a follow-up to: +Subject: tcp: make challenge acks less predictable + +Cc: Eric Dumazet +Cc: David S. Miller +Cc: Neal Cardwell +Cc: Yuchung Cheng +Cc: Yue Cao +Signed-off-by: Jason Baron +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 39 ++++++++++++++++++++++----------------- + 1 file changed, 22 insertions(+), 17 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3390,6 +3390,23 @@ static int tcp_ack_update_window(struct + return flag; + } + ++static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, ++ u32 *last_oow_ack_time) ++{ ++ if (*last_oow_ack_time) { ++ s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); ++ ++ if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { ++ NET_INC_STATS_BH(net, mib_idx); ++ return true; /* rate-limited: don't send yet! */ ++ } ++ } ++ ++ *last_oow_ack_time = tcp_time_stamp; ++ ++ return false; /* not rate-limited: go ahead, send dupack now! */ ++} ++ + /* Return true if we're currently rate-limiting out-of-window ACKs and + * thus shouldn't send a dupack right now. We rate-limit dupacks in + * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS +@@ -3403,21 +3420,9 @@ bool tcp_oow_rate_limited(struct net *ne + /* Data packets without SYNs are not likely part of an ACK loop. */ + if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) && + !tcp_hdr(skb)->syn) +- goto not_rate_limited; +- +- if (*last_oow_ack_time) { +- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time); ++ return false; + +- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { +- NET_INC_STATS_BH(net, mib_idx); +- return true; /* rate-limited: don't send yet! */ +- } +- } +- +- *last_oow_ack_time = tcp_time_stamp; +- +-not_rate_limited: +- return false; /* not rate-limited: go ahead, send dupack now! */ ++ return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time); + } + + /* RFC 5961 7 [ACK Throttling] */ +@@ -3430,9 +3435,9 @@ static void tcp_send_challenge_ack(struc + u32 count, now; + + /* First check our per-socket dupack rate limit. */ +- if (tcp_oow_rate_limited(sock_net(sk), skb, +- LINUX_MIB_TCPACKSKIPPEDCHALLENGE, +- &tp->last_oow_ack_time)) ++ if (__tcp_oow_rate_limited(sock_net(sk), ++ LINUX_MIB_TCPACKSKIPPEDCHALLENGE, ++ &tp->last_oow_ack_time)) + return; + + /* Then check host-wide RFC 5961 rate limit. */ diff --git a/queue-4.4/tcp-make-challenge-acks-less-predictable.patch b/queue-4.4/tcp-make-challenge-acks-less-predictable.patch new file mode 100644 index 00000000000..f56f20fdee1 --- /dev/null +++ b/queue-4.4/tcp-make-challenge-acks-less-predictable.patch @@ -0,0 +1,81 @@ +From foo@baz Fri Aug 12 09:33:59 CEST 2016 +From: Eric Dumazet +Date: Sun, 10 Jul 2016 10:04:02 +0200 +Subject: tcp: make challenge acks less predictable + +From: Eric Dumazet + +[ Upstream commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 ] + +Yue Cao claims that current host rate limiting of challenge ACKS +(RFC 5961) could leak enough information to allow a patient attacker +to hijack TCP sessions. He will soon provide details in an academic +paper. + +This patch increases the default limit from 100 to 1000, and adds +some randomization so that the attacker can no longer hijack +sessions without spending a considerable amount of probes. + +Based on initial analysis and patch from Linus. + +Note that we also have per socket rate limiting, so it is tempting +to remove the host limit in the future. + +v2: randomize the count of challenge acks per second, not the period. + +Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2") +Reported-by: Yue Cao +Signed-off-by: Eric Dumazet +Suggested-by: Linus Torvalds +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_most + EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); + + /* rfc5961 challenge ack rate limiting */ +-int sysctl_tcp_challenge_ack_limit = 100; ++int sysctl_tcp_challenge_ack_limit = 1000; + + int sysctl_tcp_stdurg __read_mostly; + int sysctl_tcp_rfc1337 __read_mostly; +@@ -3427,7 +3427,7 @@ static void tcp_send_challenge_ack(struc + static u32 challenge_timestamp; + static unsigned int challenge_count; + struct tcp_sock *tp = tcp_sk(sk); +- u32 now; ++ u32 count, now; + + /* First check our per-socket dupack rate limit. */ + if (tcp_oow_rate_limited(sock_net(sk), skb, +@@ -3435,13 +3435,18 @@ static void tcp_send_challenge_ack(struc + &tp->last_oow_ack_time)) + return; + +- /* Then check the check host-wide RFC 5961 rate limit. */ ++ /* Then check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; + if (now != challenge_timestamp) { ++ u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; ++ + challenge_timestamp = now; +- challenge_count = 0; ++ WRITE_ONCE(challenge_count, half + ++ prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + } +- if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { ++ count = READ_ONCE(challenge_count); ++ if (count > 0) { ++ WRITE_ONCE(challenge_count, count - 1); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + }