]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 18 Dec 2019 12:32:05 +0000 (13:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 18 Dec 2019 12:32:05 +0000 (13:32 +0100)
added patches:
inet-protect-against-too-small-mtu-values.patch
net-bridge-deny-dev_set_mac_address-when-unregistering.patch
net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
openvswitch-support-asymmetric-conntrack.patch
tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch

queue-4.9/inet-protect-against-too-small-mtu-values.patch [new file with mode: 0644]
queue-4.9/net-bridge-deny-dev_set_mac_address-when-unregistering.patch [new file with mode: 0644]
queue-4.9/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch [new file with mode: 0644]
queue-4.9/openvswitch-support-asymmetric-conntrack.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch [new file with mode: 0644]
queue-4.9/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch [new file with mode: 0644]
queue-4.9/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch [new file with mode: 0644]
queue-4.9/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch [new file with mode: 0644]
queue-4.9/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch [new file with mode: 0644]

diff --git a/queue-4.9/inet-protect-against-too-small-mtu-values.patch b/queue-4.9/inet-protect-against-too-small-mtu-values.patch
new file mode 100644 (file)
index 0000000..bd74d5e
--- /dev/null
@@ -0,0 +1,176 @@
+From foo@baz Tue 17 Dec 2019 09:44:32 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 5 Dec 2019 20:43:46 -0800
+Subject: inet: protect against too small mtu values.
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 501a90c945103e8627406763dac418f20f3837b2 ]
+
+syzbot was once again able to crash a host by setting a very small mtu
+on loopback device.
+
+Let's make inetdev_valid_mtu() available in include/net/ip.h,
+and use it in ip_setup_cork(), so that we protect both ip_append_page()
+and __ip_append_data()
+
+Also add a READ_ONCE() when the device mtu is read.
+
+Pairs this lockless read with one WRITE_ONCE() in __dev_set_mtu(),
+even if other code paths might write over this field.
+
+Add a big comment in include/linux/netdevice.h about dev->mtu
+needing READ_ONCE()/WRITE_ONCE() annotations.
+
+Hopefully we will add the missing ones in followup patches.
+
+[1]
+
+refcount_t: saturated; leaking memory.
+WARNING: CPU: 0 PID: 9464 at lib/refcount.c:22 refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22
+Kernel panic - not syncing: panic_on_warn set ...
+CPU: 0 PID: 9464 Comm: syz-executor850 Not tainted 5.4.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x197/0x210 lib/dump_stack.c:118
+ panic+0x2e3/0x75c kernel/panic.c:221
+ __warn.cold+0x2f/0x3e kernel/panic.c:582
+ report_bug+0x289/0x300 lib/bug.c:195
+ fixup_bug arch/x86/kernel/traps.c:174 [inline]
+ fixup_bug arch/x86/kernel/traps.c:169 [inline]
+ do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:267
+ do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:286
+ invalid_op+0x23/0x30 arch/x86/entry/entry_64.S:1027
+RIP: 0010:refcount_warn_saturate+0x138/0x1f0 lib/refcount.c:22
+Code: 06 31 ff 89 de e8 c8 f5 e6 fd 84 db 0f 85 6f ff ff ff e8 7b f4 e6 fd 48 c7 c7 e0 71 4f 88 c6 05 56 a6 a4 06 01 e8 c7 a8 b7 fd <0f> 0b e9 50 ff ff ff e8 5c f4 e6 fd 0f b6 1d 3d a6 a4 06 31 ff 89
+RSP: 0018:ffff88809689f550 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffffff815e4336 RDI: ffffed1012d13e9c
+RBP: ffff88809689f560 R08: ffff88809c50a3c0 R09: fffffbfff15d31b1
+R10: fffffbfff15d31b0 R11: ffffffff8ae98d87 R12: 0000000000000001
+R13: 0000000000040100 R14: ffff888099041104 R15: ffff888218d96e40
+ refcount_add include/linux/refcount.h:193 [inline]
+ skb_set_owner_w+0x2b6/0x410 net/core/sock.c:1999
+ sock_wmalloc+0xf1/0x120 net/core/sock.c:2096
+ ip_append_page+0x7ef/0x1190 net/ipv4/ip_output.c:1383
+ udp_sendpage+0x1c7/0x480 net/ipv4/udp.c:1276
+ inet_sendpage+0xdb/0x150 net/ipv4/af_inet.c:821
+ kernel_sendpage+0x92/0xf0 net/socket.c:3794
+ sock_sendpage+0x8b/0xc0 net/socket.c:936
+ pipe_to_sendpage+0x2da/0x3c0 fs/splice.c:458
+ splice_from_pipe_feed fs/splice.c:512 [inline]
+ __splice_from_pipe+0x3ee/0x7c0 fs/splice.c:636
+ splice_from_pipe+0x108/0x170 fs/splice.c:671
+ generic_splice_sendpage+0x3c/0x50 fs/splice.c:842
+ do_splice_from fs/splice.c:861 [inline]
+ direct_splice_actor+0x123/0x190 fs/splice.c:1035
+ splice_direct_to_actor+0x3b4/0xa30 fs/splice.c:990
+ do_splice_direct+0x1da/0x2a0 fs/splice.c:1078
+ do_sendfile+0x597/0xd00 fs/read_write.c:1464
+ __do_sys_sendfile64 fs/read_write.c:1525 [inline]
+ __se_sys_sendfile64 fs/read_write.c:1511 [inline]
+ __x64_sys_sendfile64+0x1dd/0x220 fs/read_write.c:1511
+ do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
+ entry_SYSCALL_64_after_hwframe+0x49/0xbe
+RIP: 0033:0x441409
+Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fffb64c4f78 EFLAGS: 00000246 ORIG_RAX: 0000000000000028
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441409
+RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000005
+RBP: 0000000000073b8a R08: 0000000000000010 R09: 0000000000000010
+R10: 0000000000010001 R11: 0000000000000246 R12: 0000000000402180
+R13: 0000000000402210 R14: 0000000000000000 R15: 0000000000000000
+Kernel Offset: disabled
+Rebooting in 86400 seconds..
+
+Fixes: 1470ddf7f8ce ("inet: Remove explicit write references to sk/inet in ip_append_data")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    5 +++++
+ include/net/ip.h          |    5 +++++
+ net/core/dev.c            |    3 ++-
+ net/ipv4/devinet.c        |    5 -----
+ net/ipv4/ip_output.c      |   14 +++++++++-----
+ 5 files changed, 21 insertions(+), 11 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1730,6 +1730,11 @@ struct net_device {
+       unsigned char           if_port;
+       unsigned char           dma;
++      /* Note : dev->mtu is often read without holding a lock.
++       * Writers usually hold RTNL.
++       * It is recommended to use READ_ONCE() to annotate the reads,
++       * and to use WRITE_ONCE() to annotate the writes.
++       */
+       unsigned int            mtu;
+       unsigned short          type;
+       unsigned short          hard_header_len;
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -620,4 +620,9 @@ extern int sysctl_icmp_msgs_burst;
+ int ip_misc_proc_init(void);
+ #endif
++static inline bool inetdev_valid_mtu(unsigned int mtu)
++{
++      return likely(mtu >= IPV4_MIN_MTU);
++}
++
+ #endif        /* _IP_H */
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6584,7 +6584,8 @@ static int __dev_set_mtu(struct net_devi
+       if (ops->ndo_change_mtu)
+               return ops->ndo_change_mtu(dev, new_mtu);
+-      dev->mtu = new_mtu;
++      /* Pairs with all the lockless reads of dev->mtu in the stack */
++      WRITE_ONCE(dev->mtu, new_mtu);
+       return 0;
+ }
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1386,11 +1386,6 @@ skip:
+       }
+ }
+-static bool inetdev_valid_mtu(unsigned int mtu)
+-{
+-      return mtu >= IPV4_MIN_MTU;
+-}
+-
+ static void inetdev_send_gratuitous_arp(struct net_device *dev,
+                                       struct in_device *in_dev)
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1159,13 +1159,17 @@ static int ip_setup_cork(struct sock *sk
+       rt = *rtp;
+       if (unlikely(!rt))
+               return -EFAULT;
+-      /*
+-       * We steal reference to this route, caller should not release it
+-       */
+-      *rtp = NULL;
++
+       cork->fragsize = ip_sk_use_pmtu(sk) ?
+-                       dst_mtu(&rt->dst) : rt->dst.dev->mtu;
++                       dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
++
++      if (!inetdev_valid_mtu(cork->fragsize))
++              return -ENETUNREACH;
++
+       cork->dst = &rt->dst;
++      /* We stole this route, caller should not release it. */
++      *rtp = NULL;
++
+       cork->length = 0;
+       cork->ttl = ipc->ttl;
+       cork->tos = ipc->tos;
diff --git a/queue-4.9/net-bridge-deny-dev_set_mac_address-when-unregistering.patch b/queue-4.9/net-bridge-deny-dev_set_mac_address-when-unregistering.patch
new file mode 100644 (file)
index 0000000..b6c14c0
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Wed 18 Dec 2019 01:25:23 PM CET
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 3 Dec 2019 16:48:06 +0200
+Subject: net: bridge: deny dev_set_mac_address() when unregistering
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+[ Upstream commit c4b4c421857dc7b1cf0dccbd738472360ff2cd70 ]
+
+We have an interesting memory leak in the bridge when it is being
+unregistered and is a slave to a master device which would change the
+mac of its slaves on unregister (e.g. bond, team). This is a very
+unusual setup but we do end up leaking 1 fdb entry because
+dev_set_mac_address() would cause the bridge to insert the new mac address
+into its table after all fdbs are flushed, i.e. after dellink() on the
+bridge has finished and we call NETDEV_UNREGISTER the bond/team would
+release it and will call dev_set_mac_address() to restore its original
+address and that in turn will add an fdb in the bridge.
+One fix is to check for the bridge dev's reg_state in its
+ndo_set_mac_address callback and return an error if the bridge is not in
+NETREG_REGISTERED.
+
+Easy steps to reproduce:
+ 1. add bond in mode != A/B
+ 2. add any slave to the bond
+ 3. add bridge dev as a slave to the bond
+ 4. destroy the bridge device
+
+Trace:
+ unreferenced object 0xffff888035c4d080 (size 128):
+   comm "ip", pid 4068, jiffies 4296209429 (age 1413.753s)
+   hex dump (first 32 bytes):
+     41 1d c9 36 80 88 ff ff 00 00 00 00 00 00 00 00  A..6............
+     d2 19 c9 5e 3f d7 00 00 00 00 00 00 00 00 00 00  ...^?...........
+   backtrace:
+     [<00000000ddb525dc>] kmem_cache_alloc+0x155/0x26f
+     [<00000000633ff1e0>] fdb_create+0x21/0x486 [bridge]
+     [<0000000092b17e9c>] fdb_insert+0x91/0xdc [bridge]
+     [<00000000f2a0f0ff>] br_fdb_change_mac_address+0xb3/0x175 [bridge]
+     [<000000001de02dbd>] br_stp_change_bridge_id+0xf/0xff [bridge]
+     [<00000000ac0e32b1>] br_set_mac_address+0x76/0x99 [bridge]
+     [<000000006846a77f>] dev_set_mac_address+0x63/0x9b
+     [<00000000d30738fc>] __bond_release_one+0x3f6/0x455 [bonding]
+     [<00000000fc7ec01d>] bond_netdev_event+0x2f2/0x400 [bonding]
+     [<00000000305d7795>] notifier_call_chain+0x38/0x56
+     [<0000000028885d4a>] call_netdevice_notifiers+0x1e/0x23
+     [<000000008279477b>] rollback_registered_many+0x353/0x6a4
+     [<0000000018ef753a>] unregister_netdevice_many+0x17/0x6f
+     [<00000000ba854b7a>] rtnl_delete_link+0x3c/0x43
+     [<00000000adf8618d>] rtnl_dellink+0x1dc/0x20a
+     [<000000009b6395fd>] rtnetlink_rcv_msg+0x23d/0x268
+
+Fixes: 43598813386f ("bridge: add local MAC address to forwarding table (v2)")
+Reported-by: syzbot+2add91c08eb181fea1bf@syzkaller.appspotmail.com
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_device.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -210,6 +210,12 @@ static int br_set_mac_address(struct net
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
++      /* dev_set_mac_addr() can be called by a master device on bridge's
++       * NETDEV_UNREGISTER, but since it's being destroyed do nothing
++       */
++      if (dev->reg_state != NETREG_REGISTERED)
++              return -EBUSY;
++
+       spin_lock_bh(&br->lock);
+       if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
+               /* Mac address will be changed in br_stp_change_bridge_id(). */
diff --git a/queue-4.9/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch b/queue-4.9/net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
new file mode 100644 (file)
index 0000000..7229d84
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed 18 Dec 2019 01:25:23 PM CET
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+Date: Fri, 6 Dec 2019 14:28:20 +0200
+Subject: net: ethernet: ti: cpsw: fix extra rx interrupt
+
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+
+[ Upstream commit 51302f77bedab8768b761ed1899c08f89af9e4e2 ]
+
+Now RX interrupt is triggered twice every time, because in
+cpsw_rx_interrupt() it is asked first and then disabled. So there will be
+pending interrupt always, when RX interrupt is enabled again in NAPI
+handler.
+
+Fix it by first disabling IRQ and then do ask.
+
+Fixes: 870915feabdc ("drivers: net: cpsw: remove disable_irq/enable_irq as irq can be masked from cpsw itself")
+Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ti/cpsw.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ti/cpsw.c
++++ b/drivers/net/ethernet/ti/cpsw.c
+@@ -773,8 +773,8 @@ static irqreturn_t cpsw_rx_interrupt(int
+ {
+       struct cpsw_common *cpsw = dev_id;
+-      cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+       writel(0, &cpsw->wr_regs->rx_en);
++      cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+       if (cpsw->quirk_irq) {
+               disable_irq_nosync(cpsw->irqs_table[0]);
diff --git a/queue-4.9/openvswitch-support-asymmetric-conntrack.patch b/queue-4.9/openvswitch-support-asymmetric-conntrack.patch
new file mode 100644 (file)
index 0000000..709c514
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Wed 18 Dec 2019 01:25:23 PM CET
+From: Aaron Conole <aconole@redhat.com>
+Date: Tue, 3 Dec 2019 16:34:13 -0500
+Subject: openvswitch: support asymmetric conntrack
+
+From: Aaron Conole <aconole@redhat.com>
+
+[ Upstream commit 5d50aa83e2c8e91ced2cca77c198b468ca9210f4 ]
+
+The openvswitch module shares a common conntrack and NAT infrastructure
+exposed via netfilter.  It's possible that a packet needs both SNAT and
+DNAT manipulation, due to e.g. tuple collision.  Netfilter can support
+this because it runs through the NAT table twice - once on ingress and
+again after egress.  The openvswitch module doesn't have such capability.
+
+Like netfilter hook infrastructure, we should run through NAT twice to
+keep the symmetry.
+
+Fixes: 05752523e565 ("openvswitch: Interface with NAT.")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -709,6 +709,17 @@ static int ovs_ct_nat(struct net *net, s
+       }
+       err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
++      if (err == NF_ACCEPT &&
++          ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
++              if (maniptype == NF_NAT_MANIP_SRC)
++                      maniptype = NF_NAT_MANIP_DST;
++              else
++                      maniptype = NF_NAT_MANIP_SRC;
++
++              err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
++                                       maniptype);
++      }
++
+       /* Mark NAT done if successful and update the flow key. */
+       if (err == NF_ACCEPT)
+               ovs_nat_update_key(key, skb, maniptype);
index 1c24d881b33ace06731e2b410365ae486742676c..b00bcef375b4955fc49c64f77898abc230c7571d 100644 (file)
@@ -173,3 +173,12 @@ sunrpc-fix-crash-when-cache_head-become-valid-before.patch
 net-mlx5e-fix-sff-8472-eeprom-length.patch
 kernel-module.c-wakeup-processes-in-module_wq-on-mod.patch
 nvme-host-core-fix-precedence-of-ternary-operator.patch
+net-bridge-deny-dev_set_mac_address-when-unregistering.patch
+net-ethernet-ti-cpsw-fix-extra-rx-interrupt.patch
+openvswitch-support-asymmetric-conntrack.patch
+tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
+tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch
+inet-protect-against-too-small-mtu-values.patch
+tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
+tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
+tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
diff --git a/queue-4.9/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch b/queue-4.9/tcp-fix-rejected-syncookies-due-to-stale-timestamps.patch
new file mode 100644 (file)
index 0000000..f020bf5
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Tue 17 Dec 2019 09:30:11 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:36 +0100
+Subject: tcp: fix rejected syncookies due to stale timestamps
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 04d26e7b159a396372646a480f4caa166d1b6720 ]
+
+If no synflood happens for a long enough period of time, then the
+synflood timestamp isn't refreshed and jiffies can advance so much
+that time_after32() can't accurately compare them any more.
+
+Therefore, we can end up in a situation where time_after32(now,
+last_overflow + HZ) returns false, just because these two values are
+too far apart. In that case, the synflood timestamp isn't updated as
+it should be, which can trick tcp_synq_no_recent_overflow() into
+rejecting valid syncookies.
+
+For example, let's consider the following scenario on a system
+with HZ=1000:
+
+  * The synflood timestamp is 0, either because that's the timestamp
+    of the last synflood or, more commonly, because we're working with
+    a freshly created socket.
+
+  * We receive a new SYN, which triggers synflood protection. Let's say
+    that this happens when jiffies == 2147484649 (that is,
+    'synflood timestamp' + HZ + 2^31 + 1).
+
+  * Then tcp_synq_overflow() doesn't update the synflood timestamp,
+    because time_after32(2147484649, 1000) returns false.
+    With:
+      - 2147484649: the value of jiffies, aka. 'now'.
+      - 1000: the value of 'last_overflow' + HZ.
+
+  * A bit later, we receive the ACK completing the 3WHS. But
+    cookie_v[46]_check() rejects it because tcp_synq_no_recent_overflow()
+    says that we're not under synflood. That's because
+    time_after32(2147484649, 120000) returns false.
+    With:
+      - 2147484649: the value of jiffies, aka. 'now'.
+      - 120000: the value of 'last_overflow' + TCP_SYNCOOKIE_VALID.
+
+    Of course, in reality jiffies would have increased a bit, but this
+    condition will last for the next 119 seconds, which is far enough
+    to accommodate for jiffie's growth.
+
+Fix this by updating the overflow timestamp whenever jiffies isn't
+within the [last_overflow, last_overflow + HZ] range. That shouldn't
+have any performance impact since the update still happens at most once
+per second.
+
+Now we're guaranteed to have fresh timestamps while under synflood, so
+tcp_synq_no_recent_overflow() can safely use it with time_after32() in
+such situations.
+
+Stale timestamps can still make tcp_synq_no_recent_overflow() return
+the wrong verdict when not under synflood. This will be handled in the
+next patch.
+
+For 64 bits architectures, the problem was introduced with the
+conversion of ->tw_ts_recent_stamp to 32 bits integer by commit
+cca9bab1b72c ("tcp: use monotonic timestamps for PAWS").
+The problem has always been there on 32 bits architectures.
+
+Fixes: cca9bab1b72c ("tcp: use monotonic timestamps for PAWS")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/time.h |   12 ++++++++++++
+ include/net/tcp.h    |    2 +-
+ 2 files changed, 13 insertions(+), 1 deletion(-)
+
+--- a/include/linux/time.h
++++ b/include/linux/time.h
+@@ -275,4 +275,16 @@ static __always_inline void timespec_add
+       a->tv_nsec = ns;
+ }
++/**
++ * time_between32 - check if a 32-bit timestamp is within a given time range
++ * @t:        the time which may be within [l,h]
++ * @l:        the lower bound of the range
++ * @h:        the higher bound of the range
++ *
++ * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are
++ * treated as 32-bit integers.
++ *
++ * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)).
++ */
++#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l))
+ #endif
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -497,7 +497,7 @@ static inline void tcp_synq_overflow(con
+       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned long now = jiffies;
+-      if (time_after(now, last_overflow + HZ))
++      if (!time_between32(now, last_overflow, last_overflow + HZ))
+               tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+ }
diff --git a/queue-4.9/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch b/queue-4.9/tcp-md5-fix-potential-overestimation-of-tcp-option-space.patch
new file mode 100644 (file)
index 0000000..7a6729d
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Wed 18 Dec 2019 01:25:23 PM CET
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 5 Dec 2019 10:10:15 -0800
+Subject: tcp: md5: fix potential overestimation of TCP option space
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9424e2e7ad93ffffa88f882c9bc5023570904b55 ]
+
+Back in 2008, Adam Langley fixed the corner case of packets for flows
+having all of the following options : MD5 TS SACK
+
+Since MD5 needs 20 bytes, and TS needs 12 bytes, no sack block
+can be cooked from the remaining 8 bytes.
+
+tcp_established_options() correctly sets opts->num_sack_blocks
+to zero, but returns 36 instead of 32.
+
+This means TCP cooks packets with 4 extra bytes at the end
+of options, containing unitialized bytes.
+
+Fixes: 33ad798c924b ("tcp: options clean up")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -707,8 +707,9 @@ static unsigned int tcp_established_opti
+                       min_t(unsigned int, eff_sacks,
+                             (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
+                             TCPOLEN_SACK_PERBLOCK);
+-              size += TCPOLEN_SACK_BASE_ALIGNED +
+-                      opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
++              if (likely(opts->num_sack_blocks))
++                      size += TCPOLEN_SACK_BASE_ALIGNED +
++                              opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+       }
+       return size;
diff --git a/queue-4.9/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch b/queue-4.9/tcp-protect-accesses-to-.ts_recent_stamp-with-read-write-_once.patch
new file mode 100644 (file)
index 0000000..732684f
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Tue 17 Dec 2019 09:44:32 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:49 +0100
+Subject: tcp: Protect accesses to .ts_recent_stamp with {READ,WRITE}_ONCE()
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit 721c8dafad26ccfa90ff659ee19755e3377b829d ]
+
+Syncookies borrow the ->rx_opt.ts_recent_stamp field to store the
+timestamp of the last synflood. Protect them with READ_ONCE() and
+WRITE_ONCE() since reads and writes aren't serialised.
+
+Use of .rx_opt.ts_recent_stamp for storing the synflood timestamp was
+introduced by a0f82f64e269 ("syncookies: remove last_synq_overflow from
+struct tcp_sock"). But unprotected accesses were already there when
+timestamp was stored in .last_synq_overflow.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -494,17 +494,17 @@ struct sock *cookie_v4_check(struct sock
+  */
+ static inline void tcp_synq_overflow(const struct sock *sk)
+ {
+-      unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
++      unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+       unsigned long now = jiffies;
+       if (!time_between32(now, last_overflow, last_overflow + HZ))
+-              tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
++              WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
+ }
+ /* syncookies: no recent synqueue overflow on this listening socket? */
+ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
+ {
+-      unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
++      unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+       /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
+        * then we're under synflood. However, we have to use
diff --git a/queue-4.9/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch b/queue-4.9/tcp-tighten-acceptance-of-acks-not-matching-a-child-socket.patch
new file mode 100644 (file)
index 0000000..2f32880
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Tue 17 Dec 2019 09:44:32 PM CET
+From: Guillaume Nault <gnault@redhat.com>
+Date: Fri, 6 Dec 2019 12:38:43 +0100
+Subject: tcp: tighten acceptance of ACKs not matching a child socket
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit cb44a08f8647fd2e8db5cc9ac27cd8355fa392d8 ]
+
+When no synflood occurs, the synflood timestamp isn't updated.
+Therefore it can be so old that time_after32() can consider it to be
+in the future.
+
+That's a problem for tcp_synq_no_recent_overflow() as it may report
+that a recent overflow occurred while, in fact, it's just that jiffies
+has grown past 'last_overflow' + TCP_SYNCOOKIE_VALID + 2^31.
+
+Spurious detection of recent overflows lead to extra syncookie
+verification in cookie_v[46]_check(). At that point, the verification
+should fail and the packet dropped. But we should have dropped the
+packet earlier as we didn't even send a syncookie.
+
+Let's refine tcp_synq_no_recent_overflow() to report a recent overflow
+only if jiffies is within the
+[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval. This
+way, no spurious recent overflow is reported when jiffies wraps and
+'last_overflow' becomes in the future from the point of view of
+time_after32().
+
+However, if jiffies wraps and enters the
+[last_overflow, last_overflow + TCP_SYNCOOKIE_VALID] interval (with
+'last_overflow' being a stale synflood timestamp), then
+tcp_synq_no_recent_overflow() still erroneously reports an
+overflow. In such cases, we have to rely on syncookie verification
+to drop the packet. We unfortunately have no way to differentiate
+between a fresh and a stale syncookie timestamp.
+
+In practice, using last_overflow as lower bound is problematic.
+If the synflood timestamp is concurrently updated between the time
+we read jiffies and the moment we store the timestamp in
+'last_overflow', then 'now' becomes smaller than 'last_overflow' and
+tcp_synq_no_recent_overflow() returns true, potentially dropping a
+valid syncookie.
+
+Reading jiffies after loading the timestamp could fix the problem,
+but that'd require a memory barrier. Let's just accommodate for
+potential timestamp growth instead and extend the interval using
+'last_overflow - HZ' as lower bound.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -506,7 +506,15 @@ static inline bool tcp_synq_no_recent_ov
+ {
+       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+-      return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
++      /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
++       * then we're under synflood. However, we have to use
++       * 'last_overflow - HZ' as lower bound. That's because a concurrent
++       * tcp_synq_overflow() could update .ts_recent_stamp after we read
++       * jiffies but before we store .ts_recent_stamp into last_overflow,
++       * which could lead to rejecting a valid syncookie.
++       */
++      return !time_between32(jiffies, last_overflow - HZ,
++                             last_overflow + TCP_SYNCOOKIE_VALID);
+ }
+ static inline u32 tcp_cookie_time(void)
diff --git a/queue-4.9/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch b/queue-4.9/tipc-fix-ordering-of-tipc-module-init-and-exit-routine.patch
new file mode 100644 (file)
index 0000000..a56d2b8
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Wed 18 Dec 2019 01:25:23 PM CET
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Fri, 6 Dec 2019 05:25:48 +0000
+Subject: tipc: fix ordering of tipc module init and exit routine
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 9cf1cd8ee3ee09ef2859017df2058e2f53c5347f ]
+
+In order to set/get/dump, the tipc uses the generic netlink
+infrastructure. So, when tipc module is inserted, init function
+calls genl_register_family().
+After genl_register_family(), set/get/dump commands are immediately
+allowed and these callbacks internally use the net_generic.
+net_generic is allocated by register_pernet_device() but this
+is called after genl_register_family() in the __init function.
+So, these callbacks would use un-initialized net_generic.
+
+Test commands:
+    #SHELL1
+    while :
+    do
+        modprobe tipc
+        modprobe -rv tipc
+    done
+
+    #SHELL2
+    while :
+    do
+        tipc link list
+    done
+
+Splat looks like:
+[   59.616322][ T2788] kasan: CONFIG_KASAN_INLINE enabled
+[   59.617234][ T2788] kasan: GPF could be caused by NULL-ptr deref or user memory access
+[   59.618398][ T2788] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
+[   59.619389][ T2788] CPU: 3 PID: 2788 Comm: tipc Not tainted 5.4.0+ #194
+[   59.620231][ T2788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   59.621428][ T2788] RIP: 0010:tipc_bcast_get_broadcast_mode+0x131/0x310 [tipc]
+[   59.622379][ T2788] Code: c7 c6 ef 8b 38 c0 65 ff 0d 84 83 c9 3f e8 d7 a5 f2 e3 48 8d bb 38 11 00 00 48 b8 00 00 00 00
+[   59.622550][ T2780] NET: Registered protocol family 30
+[   59.624627][ T2788] RSP: 0018:ffff88804b09f578 EFLAGS: 00010202
+[   59.624630][ T2788] RAX: dffffc0000000000 RBX: 0000000000000011 RCX: 000000008bc66907
+[   59.624631][ T2788] RDX: 0000000000000229 RSI: 000000004b3cf4cc RDI: 0000000000001149
+[   59.624633][ T2788] RBP: ffff88804b09f588 R08: 0000000000000003 R09: fffffbfff4fb3df1
+[   59.624635][ T2788] R10: fffffbfff50318f8 R11: ffff888066cadc18 R12: ffffffffa6cc2f40
+[   59.624637][ T2788] R13: 1ffff11009613eba R14: ffff8880662e9328 R15: ffff8880662e9328
+[   59.624639][ T2788] FS:  00007f57d8f7b740(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000
+[   59.624645][ T2788] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   59.625875][ T2780] tipc: Started in single node mode
+[   59.626128][ T2788] CR2: 00007f57d887a8c0 CR3: 000000004b140002 CR4: 00000000000606e0
+[   59.633991][ T2788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   59.635195][ T2788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[   59.636478][ T2788] Call Trace:
+[   59.637025][ T2788]  tipc_nl_add_bc_link+0x179/0x1470 [tipc]
+[   59.638219][ T2788]  ? lock_downgrade+0x6e0/0x6e0
+[   59.638923][ T2788]  ? __tipc_nl_add_link+0xf90/0xf90 [tipc]
+[   59.639533][ T2788]  ? tipc_nl_node_dump_link+0x318/0xa50 [tipc]
+[   59.640160][ T2788]  ? mutex_lock_io_nested+0x1380/0x1380
+[   59.640746][ T2788]  tipc_nl_node_dump_link+0x4fd/0xa50 [tipc]
+[   59.641356][ T2788]  ? tipc_nl_node_reset_link_stats+0x340/0x340 [tipc]
+[   59.642088][ T2788]  ? __skb_ext_del+0x270/0x270
+[   59.642594][ T2788]  genl_lock_dumpit+0x85/0xb0
+[   59.643050][ T2788]  netlink_dump+0x49c/0xed0
+[   59.643529][ T2788]  ? __netlink_sendskb+0xc0/0xc0
+[   59.644044][ T2788]  ? __netlink_dump_start+0x190/0x800
+[   59.644617][ T2788]  ? __mutex_unlock_slowpath+0xd0/0x670
+[   59.645177][ T2788]  __netlink_dump_start+0x5a0/0x800
+[   59.645692][ T2788]  genl_rcv_msg+0xa75/0xe90
+[   59.646144][ T2788]  ? __lock_acquire+0xdfe/0x3de0
+[   59.646692][ T2788]  ? genl_family_rcv_msg_attrs_parse+0x320/0x320
+[   59.647340][ T2788]  ? genl_lock_dumpit+0xb0/0xb0
+[   59.647821][ T2788]  ? genl_unlock+0x20/0x20
+[   59.648290][ T2788]  ? genl_parallel_done+0xe0/0xe0
+[   59.648787][ T2788]  ? find_held_lock+0x39/0x1d0
+[   59.649276][ T2788]  ? genl_rcv+0x15/0x40
+[   59.649722][ T2788]  ? lock_contended+0xcd0/0xcd0
+[   59.650296][ T2788]  netlink_rcv_skb+0x121/0x350
+[   59.650828][ T2788]  ? genl_family_rcv_msg_attrs_parse+0x320/0x320
+[   59.651491][ T2788]  ? netlink_ack+0x940/0x940
+[   59.651953][ T2788]  ? lock_acquire+0x164/0x3b0
+[   59.652449][ T2788]  genl_rcv+0x24/0x40
+[   59.652841][ T2788]  netlink_unicast+0x421/0x600
+[ ... ]
+
+Fixes: 7e4369057806 ("tipc: fix a slab object leak")
+Fixes: a62fbccecd62 ("tipc: make subscriber server support net namespace")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Acked-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/core.c |   29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+--- a/net/tipc/core.c
++++ b/net/tipc/core.c
+@@ -116,14 +116,6 @@ static int __init tipc_init(void)
+       sysctl_tipc_rmem[1] = RCVBUF_DEF;
+       sysctl_tipc_rmem[2] = RCVBUF_MAX;
+-      err = tipc_netlink_start();
+-      if (err)
+-              goto out_netlink;
+-
+-      err = tipc_netlink_compat_start();
+-      if (err)
+-              goto out_netlink_compat;
+-
+       err = tipc_register_sysctl();
+       if (err)
+               goto out_sysctl;
+@@ -144,8 +136,21 @@ static int __init tipc_init(void)
+       if (err)
+               goto out_bearer;
++      err = tipc_netlink_start();
++      if (err)
++              goto out_netlink;
++
++      err = tipc_netlink_compat_start();
++      if (err)
++              goto out_netlink_compat;
++
+       pr_info("Started in single node mode\n");
+       return 0;
++
++out_netlink_compat:
++      tipc_netlink_stop();
++out_netlink:
++      tipc_bearer_cleanup();
+ out_bearer:
+       unregister_pernet_device(&tipc_topsrv_net_ops);
+ out_pernet_topsrv:
+@@ -155,22 +160,18 @@ out_socket:
+ out_pernet:
+       tipc_unregister_sysctl();
+ out_sysctl:
+-      tipc_netlink_compat_stop();
+-out_netlink_compat:
+-      tipc_netlink_stop();
+-out_netlink:
+       pr_err("Unable to start in single node mode\n");
+       return err;
+ }
+ static void __exit tipc_exit(void)
+ {
++      tipc_netlink_compat_stop();
++      tipc_netlink_stop();
+       tipc_bearer_cleanup();
+       unregister_pernet_device(&tipc_topsrv_net_ops);
+       tipc_socket_stop();
+       unregister_pernet_device(&tipc_net_ops);
+-      tipc_netlink_stop();
+-      tipc_netlink_compat_stop();
+       tipc_unregister_sysctl();
+       pr_info("Deactivated\n");