From 993ecce03098ade6e9ac835d4625051adf54cc13 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Dec 2019 13:38:09 +0100 Subject: [PATCH] 4.4-stable patches added patches: inet-protect-against-too-small-mtu-values.patch net-bridge-deny-dev_set_mac_address-when-unregistering.patch net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch --- ...protect-against-too-small-mtu-values.patch | 176 ++++++++++++++++++ ...wakeup-processes-in-module_wq-on-mod.patch | 9 +- ...v_set_mac_address-when-unregistering.patch | 76 ++++++++ ...ernet-ti-cpsw-fix-extra-rx-interrupt.patch | 36 ++++ queue-4.4/series | 8 + ...d-syncookies-due-to-stale-timestamps.patch | 107 +++++++++++ ...l-overestimation-of-tcp-option-space.patch | 46 +++++ ...s_recent_stamp-with-read-write-_once.patch | 50 +++++ ...-of-acks-not-matching-a-child-socket.patch | 76 ++++++++ ...of-tipc-module-init-and-exit-routine.patch | 159 ++++++++++++++++ 10 files changed, 736 insertions(+), 7 deletions(-) create mode 100644 queue-4.4/inet-protect-against-too-small-mtu-values.patch create mode 100644 queue-4.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch create mode 100644 queue-4.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch create mode 100644 queue-4.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch create mode 100644 queue-4.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch create mode 100644 queue-4.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch create mode 100644 queue-4.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch create mode 100644 queue-4.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch diff --git a/queue-4.4/inet-protect-against-too-small-mtu-values.patch b/queue-4.4/inet-protect-against-too-small-mtu-values.patch new file mode 100644 index 00000000000..e50da1385e3 --- /dev/null +++ b/queue-4.4/inet-protect-against-too-small-mtu-values.patch @@ -0,0 +1,176 @@ +From foo@baz Tue 17 Dec 2019 09:44:32 PM CET +From: Eric Dumazet +Date: Thu, 5 Dec 2019 20:43:46 -0800 +Subject: inet: protect against too small mtu values. + +From: Eric Dumazet + +[ Upstream commit 501a90c945103e8627406763dac418f20f3837b2 ] + +syzbot was once again able to crash a host by setting a very small mtu +on loopback device. + +Let's make inetdev_valid_mtu() available in include/net/ip.h, +and use it in ip_setup_cork(), so that we protect both ip_append_page() +and __ip_append_data() + +Also add a READ_ONCE() when the device mtu is read. + +Pairs this lockless read with one WRITE_ONCE() in __dev_set_mtu(), +even if other code paths might write over this field. + +Add a big comment in include/linux/netdevice.h about dev->mtu +needing READ_ONCE()/WRITE_ONCE() annotations. + +Hopefully we will add the missing ones in followup patches. + +[1] + +refcount_t: saturated; leaking memory. +WARNING: CPU: 0 PID: 9464 at lib/refcount.c:22 refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 +Kernel panic - not syncing: panic_on_warn set ... +CPU: 0 PID: 9464 Comm: syz-executor850 Not tainted 5.4.0-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x197/0x210 lib/dump_stack.c:118 + panic+0x2e3/0x75c kernel/panic.c:221 + __warn.cold+0x2f/0x3e kernel/panic.c:582 + report_bug+0x289/0x300 lib/bug.c:195 + fixup_bug arch/x86/kernel/traps.c:174 [inline] + fixup_bug arch/x86/kernel/traps.c:169 [inline] + do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267 + do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286 + invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027 +RIP: 0010:refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22 +Code: 06 31 ff 89 de e8 c8 f5 e6 fd 84 db 0f 85 6f ff ff ff e8 7b f4 e6 fd 48 c7 c7 e0 71 4f 88 c6 05 56 a6 a4 06 01 e8 c7 a8 b7 fd <0f> 0b e9 50 ff ff ff e8 5c f4 e6 fd 0f b6 1d 3d a6 a4 06 31 ff 89 +RSP: 0018:ffff88809689f550 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: ffffffff815e4336 RDI: ffffed1012d13e9c +RBP: ffff88809689f560 R08: ffff88809c50a3c0 R09: fffffbfff15d31b1 +R10: fffffbfff15d31b0 R11: ffffffff8ae98d87 R12: 0000000000000001 +R13: 0000000000040100 R14: ffff888099041104 R15: ffff888218d96e40 + refcount_add include/linux/refcount.h:193 [inline] + skb_set_owner_w+0x2b6/0x410 net/core/sock.c:1999 + sock_wmalloc+0xf1/0x120 net/core/sock.c:2096 + ip_append_page+0x7ef/0x1190 net/ipv4/ip_output.c:1383 + udp_sendpage+0x1c7/0x480 net/ipv4/udp.c:1276 + inet_sendpage+0xdb/0x150 net/ipv4/af_inet.c:821 + kernel_sendpage+0x92/0xf0 net/socket.c:3794 + sock_sendpage+0x8b/0xc0 net/socket.c:936 + pipe_to_sendpage+0x2da/0x3c0 fs/splice.c:458 + splice_from_pipe_feed fs/splice.c:512 [inline] + __splice_from_pipe+0x3ee/0x7c0 fs/splice.c:636 + splice_from_pipe+0x108/0x170 fs/splice.c:671 + generic_splice_sendpage+0x3c/0x50 fs/splice.c:842 + do_splice_from fs/splice.c:861 [inline] + direct_splice_actor+0x123/0x190 fs/splice.c:1035 + splice_direct_to_actor+0x3b4/0xa30 fs/splice.c:990 + do_splice_direct+0x1da/0x2a0 fs/splice.c:1078 + do_sendfile+0x597/0xd00 fs/read_write.c:1464 + __do_sys_sendfile64 fs/read_write.c:1525 [inline] + __se_sys_sendfile64 fs/read_write.c:1511 [inline] + __x64_sys_sendfile64+0x1dd/0x220 fs/read_write.c:1511 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x441409 +Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007fffb64c4f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441409 +RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000005 +RBP: 0000000000073b8a R08: 0000000000000010 R09: 0000000000000010 +R10: 0000000000010001 R11: 0000000000000246 R12: 0000000000402180 +R13: 0000000000402210 R14: 0000000000000000 R15: 0000000000000000 +Kernel Offset: disabled +Rebooting in 86400 seconds.. + +Fixes: 1470ddf7f8ce ("inet: Remove explicit write references to sk/inet in ip_append_data") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 5 +++++ + include/net/ip.h | 5 +++++ + net/core/dev.c | 3 ++- + net/ipv4/devinet.c | 5 ----- + net/ipv4/ip_output.c | 14 +++++++++----- + 5 files changed, 21 insertions(+), 11 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1617,6 +1617,11 @@ struct net_device { + unsigned char if_port; + unsigned char dma; + ++ /* Note : dev->mtu is often read without holding a lock. ++ * Writers usually hold RTNL. ++ * It is recommended to use READ_ONCE() to annotate the reads, ++ * and to use WRITE_ONCE() to annotate the writes. ++ */ + unsigned int mtu; + unsigned short type; + unsigned short hard_header_len; +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -596,4 +596,9 @@ extern int sysctl_icmp_msgs_burst; + int ip_misc_proc_init(void); + #endif + ++static inline bool inetdev_valid_mtu(unsigned int mtu) ++{ ++ return likely(mtu >= IPV4_MIN_MTU); ++} ++ + #endif /* _IP_H */ +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -6126,7 +6126,8 @@ static int __dev_set_mtu(struct net_devi + if (ops->ndo_change_mtu) + return ops->ndo_change_mtu(dev, new_mtu); + +- dev->mtu = new_mtu; ++ /* Pairs with all the lockless reads of dev->mtu in the stack */ ++ WRITE_ONCE(dev->mtu, new_mtu); + return 0; + } + +--- a/net/ipv4/devinet.c ++++ b/net/ipv4/devinet.c +@@ -1364,11 +1364,6 @@ skip: + } + } + +-static bool inetdev_valid_mtu(unsigned int mtu) +-{ +- return mtu >= IPV4_MIN_MTU; +-} +- + static void inetdev_send_gratuitous_arp(struct net_device *dev, + struct in_device *in_dev) + +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -1145,13 +1145,17 @@ static int ip_setup_cork(struct sock *sk + rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; +- /* +- * We steal reference to this route, caller should not release it +- */ +- *rtp = NULL; ++ + cork->fragsize = ip_sk_use_pmtu(sk) ? +- dst_mtu(&rt->dst) : rt->dst.dev->mtu; ++ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); ++ ++ if (!inetdev_valid_mtu(cork->fragsize)) ++ return -ENETUNREACH; ++ + cork->dst = &rt->dst; ++ /* We stole this route, caller should not release it. */ ++ *rtp = NULL; ++ + cork->length = 0; + cork->ttl = ipc->ttl; + cork->tos = ipc->tos; diff --git a/queue-4.4/kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch b/queue-4.4/kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch index a0944d3637d..d5b689d59fb 100644 --- a/queue-4.4/kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch +++ b/queue-4.4/kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch @@ -40,14 +40,12 @@ Signed-off-by: Konstantin Khorenko Signed-off-by: Jessica Yu Signed-off-by: Sasha Levin --- - kernel/module.c | 2 ++ + kernel/module.c | 2 ++ 1 file changed, 2 insertions(+) -diff --git a/kernel/module.c b/kernel/module.c -index b940b2825b7b3..2f695b6e1a3e0 100644 --- a/kernel/module.c +++ b/kernel/module.c -@@ -1014,6 +1014,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, +@@ -1014,6 +1014,8 @@ SYSCALL_DEFINE2(delete_module, const cha strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); @@ -56,6 +54,3 @@ index b940b2825b7b3..2f695b6e1a3e0 100644 return 0; out: mutex_unlock(&module_mutex); --- -2.20.1 - diff --git a/queue-4.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch b/queue-4.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch new file mode 100644 index 00000000000..ccf29a11bb0 --- /dev/null +++ b/queue-4.4/net-bridge-deny-dev_set_mac_address-when-unregistering.patch @@ -0,0 +1,76 @@ +From foo@baz Wed 18 Dec 2019 01:33:13 PM CET +From: Nikolay Aleksandrov +Date: Tue, 3 Dec 2019 16:48:06 +0200 +Subject: net: bridge: deny dev_set_mac_address() when unregistering + +From: Nikolay Aleksandrov + +[ Upstream commit c4b4c421857dc7b1cf0dccbd738472360ff2cd70 ] + +We have an interesting memory leak in the bridge when it is being +unregistered and is a slave to a master device which would change the +mac of its slaves on unregister (e.g. bond, team). This is a very +unusual setup but we do end up leaking 1 fdb entry because +dev_set_mac_address() would cause the bridge to insert the new mac address +into its table after all fdbs are flushed, i.e. after dellink() on the +bridge has finished and we call NETDEV_UNREGISTER the bond/team would +release it and will call dev_set_mac_address() to restore its original +address and that in turn will add an fdb in the bridge. +One fix is to check for the bridge dev's reg_state in its +ndo_set_mac_address callback and return an error if the bridge is not in +NETREG_REGISTERED. + +Easy steps to reproduce: + 1. add bond in mode != A/B + 2. add any slave to the bond + 3. add bridge dev as a slave to the bond + 4. destroy the bridge device + +Trace: + unreferenced object 0xffff888035c4d080 (size 128): + comm "ip", pid 4068, jiffies 4296209429 (age 1413.753s) + hex dump (first 32 bytes): + 41 1d c9 36 80 88 ff ff 00 00 00 00 00 00 00 00 A..6............ + d2 19 c9 5e 3f d7 00 00 00 00 00 00 00 00 00 00 ...^?........... + backtrace: + [<00000000ddb525dc>] kmem_cache_alloc+0x155/0x26f + [<00000000633ff1e0>] fdb_create+0x21/0x486 [bridge] + [<0000000092b17e9c>] fdb_insert+0x91/0xdc [bridge] + [<00000000f2a0f0ff>] br_fdb_change_mac_address+0xb3/0x175 [bridge] + [<000000001de02dbd>] br_stp_change_bridge_id+0xf/0xff [bridge] + [<00000000ac0e32b1>] br_set_mac_address+0x76/0x99 [bridge] + [<000000006846a77f>] dev_set_mac_address+0x63/0x9b + [<00000000d30738fc>] __bond_release_one+0x3f6/0x455 [bonding] + [<00000000fc7ec01d>] bond_netdev_event+0x2f2/0x400 [bonding] + [<00000000305d7795>] notifier_call_chain+0x38/0x56 + [<0000000028885d4a>] call_netdevice_notifiers+0x1e/0x23 + [<000000008279477b>] rollback_registered_many+0x353/0x6a4 + [<0000000018ef753a>] unregister_netdevice_many+0x17/0x6f + [<00000000ba854b7a>] rtnl_delete_link+0x3c/0x43 + [<00000000adf8618d>] rtnl_dellink+0x1dc/0x20a + [<000000009b6395fd>] rtnetlink_rcv_msg+0x23d/0x268 + +Fixes: 43598813386f ("bridge: add local MAC address to forwarding table (v2)") +Reported-by: syzbot+2add91c08eb181fea1bf@syzkaller.appspotmail.com +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_device.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -199,6 +199,12 @@ static int br_set_mac_address(struct net + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + ++ /* dev_set_mac_addr() can be called by a master device on bridge's ++ * NETDEV_UNREGISTER, but since it's being destroyed do nothing ++ */ ++ if (dev->reg_state != NETREG_REGISTERED) ++ return -EBUSY; ++ + spin_lock_bh(&br->lock); + if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { + /* Mac address will be changed in br_stp_change_bridge_id(). */ diff --git a/queue-4.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch b/queue-4.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch new file mode 100644 index 00000000000..25337ae302e --- /dev/null +++ b/queue-4.4/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 18 Dec 2019 01:33:13 PM CET +From: Grygorii Strashko +Date: Fri, 6 Dec 2019 14:28:20 +0200 +Subject: net: ethernet: ti: cpsw: fix extra rx interrupt + +From: Grygorii Strashko + +[ Upstream commit 51302f77bedab8768b761ed1899c08f89af9e4e2 ] + +Now RX interrupt is triggered twice every time, because in +cpsw_rx_interrupt() it is asked first and then disabled. So there will be +pending interrupt always, when RX interrupt is enabled again in NAPI +handler. + +Fix it by first disabling IRQ and then do ask. + +Fixes: 870915feabdc ("drivers: net: cpsw: remove disable_irq/enable_irq as irq can be masked from cpsw itself") +Signed-off-by: Grygorii Strashko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/cpsw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ti/cpsw.c ++++ b/drivers/net/ethernet/ti/cpsw.c +@@ -777,8 +777,8 @@ static irqreturn_t cpsw_rx_interrupt(int + { + struct cpsw_priv *priv = dev_id; + +- cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); + writel(0, &priv->wr_regs->rx_en); ++ cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); + + if (priv->quirk_irq) { + disable_irq_nosync(priv->irqs_table[0]); diff --git a/queue-4.4/series b/queue-4.4/series index dba33146671..9d126d56c12 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -140,3 +140,11 @@ blk-mq-make-sure-that-line-break-can-be-printed.patch workqueue-fix-missing-kfree-rescuer-in-destroy_workqueue.patch sunrpc-fix-crash-when-cache_head-become-valid-before.patch kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch +net-bridge-deny-dev_set_mac_address-when-unregistering.patch +tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch +tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch +inet-protect-against-too-small-mtu-values.patch +tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch +tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch +tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch +net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch diff --git a/queue-4.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch b/queue-4.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch new file mode 100644 index 00000000000..f5e94fa1929 --- /dev/null +++ b/queue-4.4/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch @@ -0,0 +1,107 @@ +From foo@baz Tue 17 Dec 2019 09:30:11 PM CET +From: Guillaume Nault +Date: Fri, 6 Dec 2019 12:38:36 +0100 +Subject: tcp: fix rejected syncookies due to stale timestamps + +From: Guillaume Nault + +[ Upstream commit 04d26e7b159a396372646a480f4caa166d1b6720 ] + +If no synflood happens for a long enough period of time, then the +synflood timestamp isn't refreshed and jiffies can advance so much +that time_after32() can't accurately compare them any more. + +Therefore, we can end up in a situation where time_after32(now, +last_overflow + HZ) returns false, just because these two values are +too far apart. In that case, the synflood timestamp isn't updated as +it should be, which can trick tcp_synq_no_recent_overflow() into +rejecting valid syncookies. + +For example, let's consider the following scenario on a system +with HZ=1000: + + * The synflood timestamp is 0, either because that's the timestamp + of the last synflood or, more commonly, because we're working with + a freshly created socket. + + * We receive a new SYN, which triggers synflood protection. Let's say + that this happens when jiffies == 2147484649 (that is, + 'synflood timestamp' + HZ + 2^31 + 1). + + * Then tcp_synq_overflow() doesn't update the synflood timestamp, + because time_after32(2147484649, 1000) returns false. + With: + - 2147484649: the value of jiffies, aka. 'now'. + - 1000: the value of 'last_overflow' + HZ. + + * A bit later, we receive the ACK completing the 3WHS. But + cookie_v[46]_check() rejects it because tcp_synq_no_recent_overflow() + says that we're not under synflood. That's because + time_after32(2147484649, 120000) returns false. + With: + - 2147484649: the value of jiffies, aka. 'now'. + - 120000: the value of 'last_overflow' + TCP_SYNCOOKIE_VALID. + + Of course, in reality jiffies would have increased a bit, but this + condition will last for the next 119 seconds, which is far enough + to accommodate for jiffie's growth. + +Fix this by updating the overflow timestamp whenever jiffies isn't +within the [last_overflow, last_overflow + HZ] range. That shouldn't +have any performance impact since the update still happens at most once +per second. + +Now we're guaranteed to have fresh timestamps while under synflood, so +tcp_synq_no_recent_overflow() can safely use it with time_after32() in +such situations. + +Stale timestamps can still make tcp_synq_no_recent_overflow() return +the wrong verdict when not under synflood. This will be handled in the +next patch. + +For 64 bits architectures, the problem was introduced with the +conversion of ->tw_ts_recent_stamp to 32 bits integer by commit +cca9bab1b72c ("tcp: use monotonic timestamps for PAWS"). +The problem has always been there on 32 bits architectures. + +Fixes: cca9bab1b72c ("tcp: use monotonic timestamps for PAWS") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Guillaume Nault +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/time.h | 12 ++++++++++++ + include/net/tcp.h | 2 +- + 2 files changed, 13 insertions(+), 1 deletion(-) + +--- a/include/linux/time.h ++++ b/include/linux/time.h +@@ -262,4 +262,16 @@ static __always_inline void timespec_add + a->tv_nsec = ns; + } + ++/** ++ * time_between32 - check if a 32-bit timestamp is within a given time range ++ * @t: the time which may be within [l,h] ++ * @l: the lower bound of the range ++ * @h: the higher bound of the range ++ * ++ * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are ++ * treated as 32-bit integers. ++ * ++ * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). ++ */ ++#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) + #endif +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -505,7 +505,7 @@ static inline void tcp_synq_overflow(con + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + +- if (time_after(now, last_overflow + HZ)) ++ if (!time_between32(now, last_overflow, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; + } + diff --git a/queue-4.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch b/queue-4.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch new file mode 100644 index 00000000000..92eda369658 --- /dev/null +++ b/queue-4.4/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch @@ -0,0 +1,46 @@ +From foo@baz Wed 18 Dec 2019 01:33:13 PM CET +From: Eric Dumazet +Date: Thu, 5 Dec 2019 10:10:15 -0800 +Subject: tcp: md5: fix potential overestimation of TCP option space + +From: Eric Dumazet + +[ Upstream commit 9424e2e7ad93ffffa88f882c9bc5023570904b55 ] + +Back in 2008, Adam Langley fixed the corner case of packets for flows +having all of the following options : MD5 TS SACK + +Since MD5 needs 20 bytes, and TS needs 12 bytes, no sack block +can be cooked from the remaining 8 bytes. + +tcp_established_options() correctly sets opts->num_sack_blocks +to zero, but returns 36 instead of 32. + +This means TCP cooks packets with 4 extra bytes at the end +of options, containing unitialized bytes. + +Fixes: 33ad798c924b ("tcp: options clean up") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Acked-by: Neal Cardwell +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -710,8 +710,9 @@ static unsigned int tcp_established_opti + min_t(unsigned int, eff_sacks, + (remaining - TCPOLEN_SACK_BASE_ALIGNED) / + TCPOLEN_SACK_PERBLOCK); +- size += TCPOLEN_SACK_BASE_ALIGNED + +- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; ++ if (likely(opts->num_sack_blocks)) ++ size += TCPOLEN_SACK_BASE_ALIGNED + ++ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + } + + return size; diff --git a/queue-4.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch b/queue-4.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch new file mode 100644 index 00000000000..7a82ea9ee95 --- /dev/null +++ b/queue-4.4/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch @@ -0,0 +1,50 @@ +From foo@baz Tue 17 Dec 2019 09:44:32 PM CET +From: Guillaume Nault +Date: Fri, 6 Dec 2019 12:38:49 +0100 +Subject: tcp: Protect accesses to .ts_recent_stamp with {READ,WRITE}_ONCE() + +From: Guillaume Nault + +[ Upstream commit 721c8dafad26ccfa90ff659ee19755e3377b829d ] + +Syncookies borrow the ->rx_opt.ts_recent_stamp field to store the +timestamp of the last synflood. Protect them with READ_ONCE() and +WRITE_ONCE() since reads and writes aren't serialised. + +Use of .rx_opt.ts_recent_stamp for storing the synflood timestamp was +introduced by a0f82f64e269 ("syncookies: remove last_synq_overflow from +struct tcp_sock"). But unprotected accesses were already there when +timestamp was stored in .last_synq_overflow. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Guillaume Nault +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -502,17 +502,17 @@ struct sock *cookie_v4_check(struct sock + */ + static inline void tcp_synq_overflow(const struct sock *sk) + { +- unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; ++ unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); + unsigned long now = jiffies; + + if (!time_between32(now, last_overflow, last_overflow + HZ)) +- tcp_sk(sk)->rx_opt.ts_recent_stamp = now; ++ WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); + } + + /* syncookies: no recent synqueue overflow on this listening socket? */ + static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) + { +- unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; ++ unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); + + /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, + * then we're under synflood. However, we have to use diff --git a/queue-4.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch b/queue-4.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch new file mode 100644 index 00000000000..4c30c2fcec2 --- /dev/null +++ b/queue-4.4/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch @@ -0,0 +1,76 @@ +From foo@baz Tue 17 Dec 2019 09:44:32 PM CET +From: Guillaume Nault +Date: Fri, 6 Dec 2019 12:38:43 +0100 +Subject: tcp: tighten acceptance of ACKs not matching a child socket + +From: Guillaume Nault + +[ Upstream commit cb44a08f8647fd2e8db5cc9ac27cd8355fa392d8 ] + +When no synflood occurs, the synflood timestamp isn't updated. +Therefore it can be so old that time_after32() can consider it to be +in the future. + +That's a problem for tcp_synq_no_recent_overflow() as it may report +that a recent overflow occurred while, in fact, it's just that jiffies +has grown past 'last_overflow' + TCP_SYNCOOKIE_VALID + 2^31. + +Spurious detection of recent overflows lead to extra syncookie +verification in cookie_v[46]_check(). At that point, the verification +should fail and the packet dropped. But we should have dropped the +packet earlier as we didn't even send a syncookie. + +Let's refine tcp_synq_no_recent_overflow() to report a recent overflow +only if jiffies is within the +[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval. This +way, no spurious recent overflow is reported when jiffies wraps and +'last_overflow' becomes in the future from the point of view of +time_after32(). + +However, if jiffies wraps and enters the +[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval (with +'last_overflow' being a stale synflood timestamp), then +tcp_synq_no_recent_overflow() still erroneously reports an +overflow. In such cases, we have to rely on syncookie verification +to drop the packet. We unfortunately have no way to differentiate +between a fresh and a stale syncookie timestamp. + +In practice, using last_overflow as lower bound is problematic. +If the synflood timestamp is concurrently updated between the time +we read jiffies and the moment we store the timestamp in +'last_overflow', then 'now' becomes smaller than 'last_overflow' and +tcp_synq_no_recent_overflow() returns true, potentially dropping a +valid syncookie. + +Reading jiffies after loading the timestamp could fix the problem, +but that'd require a memory barrier. Let's just accommodate for +potential timestamp growth instead and extend the interval using +'last_overflow - HZ' as lower bound. + +Signed-off-by: Guillaume Nault +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -514,7 +514,15 @@ static inline bool tcp_synq_no_recent_ov + { + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + +- return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); ++ /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, ++ * then we're under synflood. However, we have to use ++ * 'last_overflow - HZ' as lower bound. That's because a concurrent ++ * tcp_synq_overflow() could update .ts_recent_stamp after we read ++ * jiffies but before we store .ts_recent_stamp into last_overflow, ++ * which could lead to rejecting a valid syncookie. ++ */ ++ return !time_between32(jiffies, last_overflow - HZ, ++ last_overflow + TCP_SYNCOOKIE_VALID); + } + + static inline u32 tcp_cookie_time(void) diff --git a/queue-4.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch b/queue-4.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch new file mode 100644 index 00000000000..13573b9fc43 --- /dev/null +++ b/queue-4.4/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch @@ -0,0 +1,159 @@ +From foo@baz Wed 18 Dec 2019 01:33:13 PM CET +From: Taehee Yoo +Date: Fri, 6 Dec 2019 05:25:48 +0000 +Subject: tipc: fix ordering of tipc module init and exit routine + +From: Taehee Yoo + +[ Upstream commit 9cf1cd8ee3ee09ef2859017df2058e2f53c5347f ] + +In order to set/get/dump, the tipc uses the generic netlink +infrastructure. So, when tipc module is inserted, init function +calls genl_register_family(). +After genl_register_family(), set/get/dump commands are immediately +allowed and these callbacks internally use the net_generic. +net_generic is allocated by register_pernet_device() but this +is called after genl_register_family() in the __init function. +So, these callbacks would use un-initialized net_generic. + +Test commands: + #SHELL1 + while : + do + modprobe tipc + modprobe -rv tipc + done + + #SHELL2 + while : + do + tipc link list + done + +Splat looks like: +[ 59.616322][ T2788] kasan: CONFIG_KASAN_INLINE enabled +[ 59.617234][ T2788] kasan: GPF could be caused by NULL-ptr deref or user memory access +[ 59.618398][ T2788] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI +[ 59.619389][ T2788] CPU: 3 PID: 2788 Comm: tipc Not tainted 5.4.0+ #194 +[ 59.620231][ T2788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 59.621428][ T2788] RIP: 0010:tipc_bcast_get_broadcast_mode+0x131/0x310 [tipc] +[ 59.622379][ T2788] Code: c7 c6 ef 8b 38 c0 65 ff 0d 84 83 c9 3f e8 d7 a5 f2 e3 48 8d bb 38 11 00 00 48 b8 00 00 00 00 +[ 59.622550][ T2780] NET: Registered protocol family 30 +[ 59.624627][ T2788] RSP: 0018:ffff88804b09f578 EFLAGS: 00010202 +[ 59.624630][ T2788] RAX: dffffc0000000000 RBX: 0000000000000011 RCX: 000000008bc66907 +[ 59.624631][ T2788] RDX: 0000000000000229 RSI: 000000004b3cf4cc RDI: 0000000000001149 +[ 59.624633][ T2788] RBP: ffff88804b09f588 R08: 0000000000000003 R09: fffffbfff4fb3df1 +[ 59.624635][ T2788] R10: fffffbfff50318f8 R11: ffff888066cadc18 R12: ffffffffa6cc2f40 +[ 59.624637][ T2788] R13: 1ffff11009613eba R14: ffff8880662e9328 R15: ffff8880662e9328 +[ 59.624639][ T2788] FS: 00007f57d8f7b740(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000 +[ 59.624645][ T2788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 59.625875][ T2780] tipc: Started in single node mode +[ 59.626128][ T2788] CR2: 00007f57d887a8c0 CR3: 000000004b140002 CR4: 00000000000606e0 +[ 59.633991][ T2788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 59.635195][ T2788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 59.636478][ T2788] Call Trace: +[ 59.637025][ T2788] tipc_nl_add_bc_link+0x179/0x1470 [tipc] +[ 59.638219][ T2788] ? lock_downgrade+0x6e0/0x6e0 +[ 59.638923][ T2788] ? __tipc_nl_add_link+0xf90/0xf90 [tipc] +[ 59.639533][ T2788] ? tipc_nl_node_dump_link+0x318/0xa50 [tipc] +[ 59.640160][ T2788] ? mutex_lock_io_nested+0x1380/0x1380 +[ 59.640746][ T2788] tipc_nl_node_dump_link+0x4fd/0xa50 [tipc] +[ 59.641356][ T2788] ? tipc_nl_node_reset_link_stats+0x340/0x340 [tipc] +[ 59.642088][ T2788] ? __skb_ext_del+0x270/0x270 +[ 59.642594][ T2788] genl_lock_dumpit+0x85/0xb0 +[ 59.643050][ T2788] netlink_dump+0x49c/0xed0 +[ 59.643529][ T2788] ? __netlink_sendskb+0xc0/0xc0 +[ 59.644044][ T2788] ? __netlink_dump_start+0x190/0x800 +[ 59.644617][ T2788] ? __mutex_unlock_slowpath+0xd0/0x670 +[ 59.645177][ T2788] __netlink_dump_start+0x5a0/0x800 +[ 59.645692][ T2788] genl_rcv_msg+0xa75/0xe90 +[ 59.646144][ T2788] ? __lock_acquire+0xdfe/0x3de0 +[ 59.646692][ T2788] ? genl_family_rcv_msg_attrs_parse+0x320/0x320 +[ 59.647340][ T2788] ? genl_lock_dumpit+0xb0/0xb0 +[ 59.647821][ T2788] ? genl_unlock+0x20/0x20 +[ 59.648290][ T2788] ? genl_parallel_done+0xe0/0xe0 +[ 59.648787][ T2788] ? find_held_lock+0x39/0x1d0 +[ 59.649276][ T2788] ? genl_rcv+0x15/0x40 +[ 59.649722][ T2788] ? lock_contended+0xcd0/0xcd0 +[ 59.650296][ T2788] netlink_rcv_skb+0x121/0x350 +[ 59.650828][ T2788] ? genl_family_rcv_msg_attrs_parse+0x320/0x320 +[ 59.651491][ T2788] ? netlink_ack+0x940/0x940 +[ 59.651953][ T2788] ? lock_acquire+0x164/0x3b0 +[ 59.652449][ T2788] genl_rcv+0x24/0x40 +[ 59.652841][ T2788] netlink_unicast+0x421/0x600 +[ ... ] + +Fixes: 7e4369057806 ("tipc: fix a slab object leak") +Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace") +Signed-off-by: Taehee Yoo +Acked-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/core.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +--- a/net/tipc/core.c ++++ b/net/tipc/core.c +@@ -117,14 +117,6 @@ static int __init tipc_init(void) + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; + +- err = tipc_netlink_start(); +- if (err) +- goto out_netlink; +- +- err = tipc_netlink_compat_start(); +- if (err) +- goto out_netlink_compat; +- + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; +@@ -145,8 +137,21 @@ static int __init tipc_init(void) + if (err) + goto out_bearer; + ++ err = tipc_netlink_start(); ++ if (err) ++ goto out_netlink; ++ ++ err = tipc_netlink_compat_start(); ++ if (err) ++ goto out_netlink_compat; ++ + pr_info("Started in single node mode\n"); + return 0; ++ ++out_netlink_compat: ++ tipc_netlink_stop(); ++out_netlink: ++ tipc_bearer_cleanup(); + out_bearer: + unregister_pernet_device(&tipc_topsrv_net_ops); + out_pernet_topsrv: +@@ -156,22 +161,18 @@ out_socket: + out_pernet: + tipc_unregister_sysctl(); + out_sysctl: +- tipc_netlink_compat_stop(); +-out_netlink_compat: +- tipc_netlink_stop(); +-out_netlink: + pr_err("Unable to start in single node mode\n"); + return err; + } + + static void __exit tipc_exit(void) + { ++ tipc_netlink_compat_stop(); ++ tipc_netlink_stop(); + tipc_bearer_cleanup(); + unregister_pernet_device(&tipc_topsrv_net_ops); + tipc_socket_stop(); + unregister_pernet_device(&tipc_net_ops); +- tipc_netlink_stop(); +- tipc_netlink_compat_stop(); + tipc_unregister_sysctl(); + + pr_info("Deactivated\n"); -- 2.47.3