4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)
diff --git a/queue-4.4/ipv4-fix-use-after-free-when-flushing-fib-tables.patch b/queue-4.4/ipv4-fix-use-after-free-when-flushing-fib-tables.patch

new file mode 100644 (file)

index 0000000..c39915c
--- /dev/null
+++ b/queue-4.4/ipv4-fix-use-after-free-when-flushing-fib-tables.patch
@@ -0,0 +1,60 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 20 Dec 2017 19:34:19 +0200
+Subject: ipv4: Fix use-after-free when flushing FIB tables
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+
+[ Upstream commit b4681c2829e24943aadd1a7bb3a30d41d0a20050 ]
+
+Since commit 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") the
+local table uses the same trie allocated for the main table when custom
+rules are not in use.
+
+When a net namespace is dismantled, the main table is flushed and freed
+(via an RCU callback) before the local table. In case the callback is
+invoked before the local table is iterated, a use-after-free can occur.
+
+Fix this by iterating over the FIB tables in reverse order, so that the
+main table is always freed after the local table.
+
+v3: Reworded comment according to Alex's suggestion.
+v2: Add a comment to make the fix more explicit per Dave's and Alex's
+feedback.
+
+Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1252,7 +1252,7 @@ fail:
+ 
+ static void ip_fib_net_exit(struct net *net)
+ {
+-      unsigned int i;
++      int i;
+ 
+       rtnl_lock();
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+@@ -1260,7 +1260,12 @@ static void ip_fib_net_exit(struct net *
+       RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
+       RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
+ #endif
+-      for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++      /* Destroy the tables in reverse order to guarantee that the
++       * local table, ID 255, is destroyed before the main table, ID
++       * 254. This is necessary as the local table may contain
++       * references to data contained in the main table.
++       */
++      for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
+               struct hlist_head *head = &net->ipv4.fib_table_hash[i];
+               struct hlist_node *tmp;
+               struct fib_table *tb;
diff --git a/queue-4.4/ipv4-igmp-guard-against-silly-mtu-values.patch b/queue-4.4/ipv4-igmp-guard-against-silly-mtu-values.patch

new file mode 100644 (file)

index 0000000..e65150d
--- /dev/null
+++ b/queue-4.4/ipv4-igmp-guard-against-silly-mtu-values.patch
@@ -0,0 +1,143 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 11 Dec 2017 07:17:39 -0800
+Subject: ipv4: igmp: guard against silly MTU values
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit b5476022bbada3764609368f03329ca287528dc8 ]
+
+IPv4 stack reacts to changes to small MTU, by disabling itself under
+RTNL.
+
+But there is a window where threads not using RTNL can see a wrong
+device mtu. This can lead to surprises, in igmp code where it is
+assumed the mtu is suitable.
+
+Fix this by reading device mtu once and checking IPv4 minimal MTU.
+
+This patch adds missing IPV4_MIN_MTU define, to not abuse
+ETH_MIN_MTU anymore.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip.h     |    2 ++
+ net/ipv4/devinet.c   |    2 +-
+ net/ipv4/igmp.c      |   24 +++++++++++++++---------
+ net/ipv4/ip_tunnel.c |    4 ++--
+ 4 files changed, 20 insertions(+), 12 deletions(-)
+
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -33,6 +33,8 @@
+ #include <net/flow.h>
+ #include <net/flow_dissector.h>
+ 
++#define IPV4_MIN_MTU          68                      /* RFC 791 */
++
+ struct sock;
+ 
+ struct inet_skb_parm {
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1358,7 +1358,7 @@ skip:
+ 
+ static bool inetdev_valid_mtu(unsigned int mtu)
+ {
+-      return mtu >= 68;
++      return mtu >= IPV4_MIN_MTU;
+ }
+ 
+ static void inetdev_send_gratuitous_arp(struct net_device *dev,
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -410,16 +410,17 @@ static int grec_size(struct ip_mc_list *
+ }
+ 
+ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
+-      int type, struct igmpv3_grec **ppgr)
++      int type, struct igmpv3_grec **ppgr, unsigned int mtu)
+ {
+       struct net_device *dev = pmc->interface->dev;
+       struct igmpv3_report *pih;
+       struct igmpv3_grec *pgr;
+ 
+-      if (!skb)
+-              skb = igmpv3_newpack(dev, dev->mtu);
+-      if (!skb)
+-              return NULL;
++      if (!skb) {
++              skb = igmpv3_newpack(dev, mtu);
++              if (!skb)
++                      return NULL;
++      }
+       pgr = (struct igmpv3_grec *)skb_put(skb, sizeof(struct igmpv3_grec));
+       pgr->grec_type = type;
+       pgr->grec_auxwords = 0;
+@@ -441,12 +442,17 @@ static struct sk_buff *add_grec(struct s
+       struct igmpv3_grec *pgr = NULL;
+       struct ip_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+       int scount, stotal, first, isquery, truncate;
++      unsigned int mtu;
+ 
+       if (pmc->multiaddr == IGMP_ALL_HOSTS)
+               return skb;
+       if (ipv4_is_local_multicast(pmc->multiaddr) && !sysctl_igmp_llm_reports)
+               return skb;
+ 
++      mtu = READ_ONCE(dev->mtu);
++      if (mtu < IPV4_MIN_MTU)
++              return skb;
++
+       isquery = type == IGMPV3_MODE_IS_INCLUDE ||
+                 type == IGMPV3_MODE_IS_EXCLUDE;
+       truncate = type == IGMPV3_MODE_IS_EXCLUDE ||
+@@ -467,7 +473,7 @@ static struct sk_buff *add_grec(struct s
+                   AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+                       if (skb)
+                               igmpv3_sendpack(skb);
+-                      skb = igmpv3_newpack(dev, dev->mtu);
++                      skb = igmpv3_newpack(dev, mtu);
+               }
+       }
+       first = 1;
+@@ -494,12 +500,12 @@ static struct sk_buff *add_grec(struct s
+                               pgr->grec_nsrcs = htons(scount);
+                       if (skb)
+                               igmpv3_sendpack(skb);
+-                      skb = igmpv3_newpack(dev, dev->mtu);
++                      skb = igmpv3_newpack(dev, mtu);
+                       first = 1;
+                       scount = 0;
+               }
+               if (first) {
+-                      skb = add_grhead(skb, pmc, type, &pgr);
++                      skb = add_grhead(skb, pmc, type, &pgr, mtu);
+                       first = 0;
+               }
+               if (!skb)
+@@ -533,7 +539,7 @@ empty_source:
+                               igmpv3_sendpack(skb);
+                               skb = NULL; /* add_grhead will get a new one */
+                       }
+-                      skb = add_grhead(skb, pmc, type, &pgr);
++                      skb = add_grhead(skb, pmc, type, &pgr, mtu);
+               }
+       }
+       if (pgr)
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -400,8 +400,8 @@ static int ip_tunnel_bind_dev(struct net
+       dev->needed_headroom = t_hlen + hlen;
+       mtu -= (dev->hard_header_len + t_hlen);
+ 
+-      if (mtu < 68)
+-              mtu = 68;
++      if (mtu < IPV4_MIN_MTU)
++              mtu = IPV4_MIN_MTU;
+ 
+       return mtu;
+ }
diff --git a/queue-4.4/ipv6-mcast-better-catch-silly-mtu-values.patch b/queue-4.4/ipv6-mcast-better-catch-silly-mtu-values.patch

new file mode 100644 (file)

index 0000000..44b2d1e
--- /dev/null
+++ b/queue-4.4/ipv6-mcast-better-catch-silly-mtu-values.patch
@@ -0,0 +1,149 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 11 Dec 2017 07:03:38 -0800
+Subject: ipv6: mcast: better catch silly mtu values
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit b9b312a7a451e9c098921856e7cfbc201120e1a7 ]
+
+syzkaller reported crashes in IPv6 stack [1]
+
+Xin Long found that lo MTU was set to silly values.
+
+IPv6 stack reacts to changes to small MTU, by disabling itself under
+RTNL.
+
+But there is a window where threads not using RTNL can see a wrong
+device mtu. This can lead to surprises, in mld code where it is assumed
+the mtu is suitable.
+
+Fix this by reading device mtu once and checking IPv6 minimal MTU.
+
+[1]
+ skbuff: skb_over_panic: text:0000000010b86b8d len:196 put:20
+ head:000000003b477e60 data:000000000e85441e tail:0xd4 end:0xc0 dev:lo
+ ------------[ cut here ]------------
+ kernel BUG at net/core/skbuff.c:104!
+ invalid opcode: 0000 [#1] SMP KASAN
+ Dumping ftrace buffer:
+    (ftrace buffer empty)
+ Modules linked in:
+ CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.15.0-rc2-mm1+ #39
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+ Google 01/01/2011
+ RIP: 0010:skb_panic+0x15c/0x1f0 net/core/skbuff.c:100
+ RSP: 0018:ffff8801db307508 EFLAGS: 00010286
+ RAX: 0000000000000082 RBX: ffff8801c517e840 RCX: 0000000000000000
+ RDX: 0000000000000082 RSI: 1ffff1003b660e61 RDI: ffffed003b660e95
+ RBP: ffff8801db307570 R08: 1ffff1003b660e23 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff85bd4020
+ R13: ffffffff84754ed2 R14: 0000000000000014 R15: ffff8801c4e26540
+ FS:  0000000000000000(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000463610 CR3: 00000001c6698000 CR4: 00000000001406e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  <IRQ>
+  skb_over_panic net/core/skbuff.c:109 [inline]
+  skb_put+0x181/0x1c0 net/core/skbuff.c:1694
+  add_grhead.isra.24+0x42/0x3b0 net/ipv6/mcast.c:1695
+  add_grec+0xa55/0x1060 net/ipv6/mcast.c:1817
+  mld_send_cr net/ipv6/mcast.c:1903 [inline]
+  mld_ifc_timer_expire+0x4d2/0x770 net/ipv6/mcast.c:2448
+  call_timer_fn+0x23b/0x840 kernel/time/timer.c:1320
+  expire_timers kernel/time/timer.c:1357 [inline]
+  __run_timers+0x7e1/0xb60 kernel/time/timer.c:1660
+  run_timer_softirq+0x4c/0xb0 kernel/time/timer.c:1686
+  __do_softirq+0x29d/0xbb2 kernel/softirq.c:285
+  invoke_softirq kernel/softirq.c:365 [inline]
+  irq_exit+0x1d3/0x210 kernel/softirq.c:405
+  exiting_irq arch/x86/include/asm/apic.h:540 [inline]
+  smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052
+  apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:920
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Tested-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/mcast.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1668,16 +1668,16 @@ static int grec_size(struct ifmcaddr6 *p
+ }
+ 
+ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+-      int type, struct mld2_grec **ppgr)
++      int type, struct mld2_grec **ppgr, unsigned int mtu)
+ {
+-      struct net_device *dev = pmc->idev->dev;
+       struct mld2_report *pmr;
+       struct mld2_grec *pgr;
+ 
+-      if (!skb)
+-              skb = mld_newpack(pmc->idev, dev->mtu);
+-      if (!skb)
+-              return NULL;
++      if (!skb) {
++              skb = mld_newpack(pmc->idev, mtu);
++              if (!skb)
++                      return NULL;
++      }
+       pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
+       pgr->grec_type = type;
+       pgr->grec_auxwords = 0;
+@@ -1700,10 +1700,15 @@ static struct sk_buff *add_grec(struct s
+       struct mld2_grec *pgr = NULL;
+       struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+       int scount, stotal, first, isquery, truncate;
++      unsigned int mtu;
+ 
+       if (pmc->mca_flags & MAF_NOREPORT)
+               return skb;
+ 
++      mtu = READ_ONCE(dev->mtu);
++      if (mtu < IPV6_MIN_MTU)
++              return skb;
++
+       isquery = type == MLD2_MODE_IS_INCLUDE ||
+                 type == MLD2_MODE_IS_EXCLUDE;
+       truncate = type == MLD2_MODE_IS_EXCLUDE ||
+@@ -1724,7 +1729,7 @@ static struct sk_buff *add_grec(struct s
+                   AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+                       if (skb)
+                               mld_sendpack(skb);
+-                      skb = mld_newpack(idev, dev->mtu);
++                      skb = mld_newpack(idev, mtu);
+               }
+       }
+       first = 1;
+@@ -1751,12 +1756,12 @@ static struct sk_buff *add_grec(struct s
+                               pgr->grec_nsrcs = htons(scount);
+                       if (skb)
+                               mld_sendpack(skb);
+-                      skb = mld_newpack(idev, dev->mtu);
++                      skb = mld_newpack(idev, mtu);
+                       first = 1;
+                       scount = 0;
+               }
+               if (first) {
+-                      skb = add_grhead(skb, pmc, type, &pgr);
++                      skb = add_grhead(skb, pmc, type, &pgr, mtu);
+                       first = 0;
+               }
+               if (!skb)
+@@ -1790,7 +1795,7 @@ empty_source:
+                               mld_sendpack(skb);
+                               skb = NULL; /* add_grhead will get a new one */
+                       }
+-                      skb = add_grhead(skb, pmc, type, &pgr);
++                      skb = add_grhead(skb, pmc, type, &pgr, mtu);
+               }
+       }
+       if (pgr)
diff --git a/queue-4.4/net-bridge-fix-early-call-to-br_stp_change_bridge_id-and-plug-newlink-leaks.patch b/queue-4.4/net-bridge-fix-early-call-to-br_stp_change_bridge_id-and-plug-newlink-leaks.patch

new file mode 100644 (file)

index 0000000..f21485d
--- /dev/null
+++ b/queue-4.4/net-bridge-fix-early-call-to-br_stp_change_bridge_id-and-plug-newlink-leaks.patch
@@ -0,0 +1,94 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Mon, 18 Dec 2017 17:35:09 +0200
+Subject: net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit 84aeb437ab98a2bce3d4b2111c79723aedfceb33 ]
+
+The early call to br_stp_change_bridge_id in bridge's newlink can cause
+a memory leak if an error occurs during the newlink because the fdb
+entries are not cleaned up if a different lladdr was specified, also
+another minor issue is that it generates fdb notifications with
+ifindex = 0. Another unrelated memory leak is the bridge sysfs entries
+which get added on NETDEV_REGISTER event, but are not cleaned up in the
+newlink error path. To remove this special case the call to
+br_stp_change_bridge_id is done after netdev register and we cleanup the
+bridge on changelink error via br_dev_delete to plug all leaks.
+
+This patch makes netlink bridge destruction on newlink error the same as
+dellink and ioctl del which is necessary since at that point we have a
+fully initialized bridge device.
+
+To reproduce the issue:
+$ ip l add br0 address 00:11:22:33:44:55 type bridge group_fwd_mask 1
+RTNETLINK answers: Invalid argument
+
+$ rmmod bridge
+[ 1822.142525] =============================================================================
+[ 1822.143640] BUG bridge_fdb_cache (Tainted: G           O    ): Objects remaining in bridge_fdb_cache on __kmem_cache_shutdown()
+[ 1822.144821] -----------------------------------------------------------------------------
+
+[ 1822.145990] Disabling lock debugging due to kernel taint
+[ 1822.146732] INFO: Slab 0x0000000092a844b2 objects=32 used=2 fp=0x00000000fef011b0 flags=0x1ffff8000000100
+[ 1822.147700] CPU: 2 PID: 13584 Comm: rmmod Tainted: G    B      O     4.15.0-rc2+ #87
+[ 1822.148578] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
+[ 1822.150008] Call Trace:
+[ 1822.150510]  dump_stack+0x78/0xa9
+[ 1822.151156]  slab_err+0xb1/0xd3
+[ 1822.151834]  ? __kmalloc+0x1bb/0x1ce
+[ 1822.152546]  __kmem_cache_shutdown+0x151/0x28b
+[ 1822.153395]  shutdown_cache+0x13/0x144
+[ 1822.154126]  kmem_cache_destroy+0x1c0/0x1fb
+[ 1822.154669]  SyS_delete_module+0x194/0x244
+[ 1822.155199]  ? trace_hardirqs_on_thunk+0x1a/0x1c
+[ 1822.155773]  entry_SYSCALL_64_fastpath+0x23/0x9a
+[ 1822.156343] RIP: 0033:0x7f929bd38b17
+[ 1822.156859] RSP: 002b:00007ffd160e9a98 EFLAGS: 00000202 ORIG_RAX: 00000000000000b0
+[ 1822.157728] RAX: ffffffffffffffda RBX: 00005578316ba090 RCX: 00007f929bd38b17
+[ 1822.158422] RDX: 00007f929bd9ec60 RSI: 0000000000000800 RDI: 00005578316ba0f0
+[ 1822.159114] RBP: 0000000000000003 R08: 00007f929bff5f20 R09: 00007ffd160e8a11
+[ 1822.159808] R10: 00007ffd160e9860 R11: 0000000000000202 R12: 00007ffd160e8a80
+[ 1822.160513] R13: 0000000000000000 R14: 0000000000000000 R15: 00005578316ba090
+[ 1822.161278] INFO: Object 0x000000007645de29 @offset=0
+[ 1822.161666] INFO: Object 0x00000000d5df2ab5 @offset=128
+
+Fixes: 30313a3d5794 ("bridge: Handle IFLA_ADDRESS correctly when creating bridge device")
+Fixes: 5b8d5429daa0 ("bridge: netlink: register netdevice before executing changelink")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -1067,19 +1067,20 @@ static int br_dev_newlink(struct net *sr
+       struct net_bridge *br = netdev_priv(dev);
+       int err;
+ 
++      err = register_netdevice(dev);
++      if (err)
++              return err;
++
+       if (tb[IFLA_ADDRESS]) {
+               spin_lock_bh(&br->lock);
+               br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+               spin_unlock_bh(&br->lock);
+       }
+ 
+-      err = register_netdevice(dev);
+-      if (err)
+-              return err;
+-
+       err = br_changelink(dev, tb, data);
+       if (err)
+-              unregister_netdevice(dev);
++              br_dev_delete(dev, NULL);
++
+       return err;
+ }
+ 
diff --git a/queue-4.4/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch b/queue-4.4/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch

new file mode 100644 (file)

index 0000000..f305f53
--- /dev/null
+++ b/queue-4.4/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch
@@ -0,0 +1,100 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 19 Dec 2017 11:27:56 -0600
+Subject: net: Fix double free and memory corruption in get_net_ns_by_id()
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+
+[ Upstream commit 21b5944350052d2583e82dd59b19a9ba94a007f0 ]
+
+(I can trivially verify that that idr_remove in cleanup_net happens
+ after the network namespace count has dropped to zero --EWB)
+
+Function get_net_ns_by_id() does not check for net::count
+after it has found a peer in netns_ids idr.
+
+It may dereference a peer, after its count has already been
+finaly decremented. This leads to double free and memory
+corruption:
+
+put_net(peer)                                   rtnl_lock()
+atomic_dec_and_test(&peer->count) [count=0]     ...
+__put_net(peer)                                 get_net_ns_by_id(net, id)
+  spin_lock(&cleanup_list_lock)
+  list_add(&net->cleanup_list, &cleanup_list)
+  spin_unlock(&cleanup_list_lock)
+queue_work()                                      peer = idr_find(&net->netns_ids, id)
+  |                                               get_net(peer) [count=1]
+  |                                               ...
+  |                                               (use after final put)
+  v                                               ...
+  cleanup_net()                                   ...
+    spin_lock(&cleanup_list_lock)                 ...
+    list_replace_init(&cleanup_list, ..)          ...
+    spin_unlock(&cleanup_list_lock)               ...
+    ...                                           ...
+    ...                                           put_net(peer)
+    ...                                             atomic_dec_and_test(&peer->count) [count=0]
+    ...                                               spin_lock(&cleanup_list_lock)
+    ...                                               list_add(&net->cleanup_list, &cleanup_list)
+    ...                                               spin_unlock(&cleanup_list_lock)
+    ...                                             queue_work()
+    ...                                           rtnl_unlock()
+    rtnl_lock()                                   ...
+    for_each_net(tmp) {                           ...
+      id = __peernet2id(tmp, peer)                ...
+      spin_lock_irq(&tmp->nsid_lock)              ...
+      idr_remove(&tmp->netns_ids, id)             ...
+      ...                                         ...
+      net_drop_ns()                               ...
+       net_free(peer)                            ...
+    }                                             ...
+  |
+  v
+  cleanup_net()
+    ...
+    (Second free of peer)
+
+Also, put_net() on the right cpu may reorder with left's cpu
+list_replace_init(&cleanup_list, ..), and then cleanup_list
+will be corrupted.
+
+Since cleanup_net() is executed in worker thread, while
+put_net(peer) can happen everywhere, there should be
+enough time for concurrent get_net_ns_by_id() to pick
+the peer up, and the race does not seem to be unlikely.
+The patch fixes the problem in standard way.
+
+(Also, there is possible problem in peernet2id_alloc(), which requires
+check for net::count under nsid_lock and maybe_get_net(peer), but
+in current stable kernel it's used under rtnl_lock() and it has to be
+safe. Openswitch begun to use peernet2id_alloc(), and possibly it should
+be fixed too. While this is not in stable kernel yet, so I'll send
+a separate message to netdev@ later).
+
+Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
+Fixes: 0c7aecd4bde4 "netns: add rtnl cmd to add and get peer netns ids"
+Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
+Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net_namespace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/net_namespace.c
++++ b/net/core/net_namespace.c
+@@ -261,7 +261,7 @@ struct net *get_net_ns_by_id(struct net
+       spin_lock_irqsave(&net->nsid_lock, flags);
+       peer = idr_find(&net->netns_ids, id);
+       if (peer)
+-              get_net(peer);
++              peer = maybe_get_net(peer);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
+       rcu_read_unlock();
+ 
diff --git a/queue-4.4/net-igmp-use-correct-source-address-on-igmpv3-reports.patch b/queue-4.4/net-igmp-use-correct-source-address-on-igmpv3-reports.patch

new file mode 100644 (file)

index 0000000..63031d8
--- /dev/null
+++ b/queue-4.4/net-igmp-use-correct-source-address-on-igmpv3-reports.patch
@@ -0,0 +1,88 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Kevin Cernekee <cernekee@chromium.org>
+Date: Mon, 11 Dec 2017 11:13:45 -0800
+Subject: net: igmp: Use correct source address on IGMPv3 reports
+
+From: Kevin Cernekee <cernekee@chromium.org>
+
+
+[ Upstream commit a46182b00290839fa3fa159d54fd3237bd8669f0 ]
+
+Closing a multicast socket after the final IPv4 address is deleted
+from an interface can generate a membership report that uses the
+source IP from a different interface.  The following test script, run
+from an isolated netns, reproduces the issue:
+
+    #!/bin/bash
+
+    ip link add dummy0 type dummy
+    ip link add dummy1 type dummy
+    ip link set dummy0 up
+    ip link set dummy1 up
+    ip addr add 10.1.1.1/24 dev dummy0
+    ip addr add 192.168.99.99/24 dev dummy1
+
+    tcpdump -U -i dummy0 &
+    socat EXEC:"sleep 2" \
+        UDP4-DATAGRAM:239.101.1.68:8889,ip-add-membership=239.0.1.68:10.1.1.1 &
+
+    sleep 1
+    ip addr del 10.1.1.1/24 dev dummy0
+    sleep 5
+    kill %tcpdump
+
+RFC 3376 specifies that the report must be sent with a valid IP source
+address from the destination subnet, or from address 0.0.0.0.  Add an
+extra check to make sure this is the case.
+
+Signed-off-by: Kevin Cernekee <cernekee@chromium.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/igmp.c |   20 +++++++++++++++++++-
+ 1 file changed, 19 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -89,6 +89,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/times.h>
+ #include <linux/pkt_sched.h>
++#include <linux/byteorder/generic.h>
+ 
+ #include <net/net_namespace.h>
+ #include <net/arp.h>
+@@ -327,6 +328,23 @@ igmp_scount(struct ip_mc_list *pmc, int
+       return scount;
+ }
+ 
++/* source address selection per RFC 3376 section 4.2.13 */
++static __be32 igmpv3_get_srcaddr(struct net_device *dev,
++                               const struct flowi4 *fl4)
++{
++      struct in_device *in_dev = __in_dev_get_rcu(dev);
++
++      if (!in_dev)
++              return htonl(INADDR_ANY);
++
++      for_ifa(in_dev) {
++              if (inet_ifa_match(fl4->saddr, ifa))
++                      return fl4->saddr;
++      } endfor_ifa(in_dev);
++
++      return htonl(INADDR_ANY);
++}
++
+ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
+ {
+       struct sk_buff *skb;
+@@ -374,7 +392,7 @@ static struct sk_buff *igmpv3_newpack(st
+       pip->frag_off = htons(IP_DF);
+       pip->ttl      = 1;
+       pip->daddr    = fl4.daddr;
+-      pip->saddr    = fl4.saddr;
++      pip->saddr    = igmpv3_get_srcaddr(dev, &fl4);
+       pip->protocol = IPPROTO_IGMP;
+       pip->tot_len  = 0;      /* filled in later */
+       ip_select_ident(net, skb, NULL);
diff --git a/queue-4.4/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch b/queue-4.4/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch

new file mode 100644 (file)

index 0000000..0e4c74f
--- /dev/null
+++ b/queue-4.4/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
@@ -0,0 +1,80 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Mohamed Ghannam <simo.ghannam@gmail.com>
+Date: Sun, 10 Dec 2017 03:50:58 +0000
+Subject: net: ipv4: fix for a race condition in raw_sendmsg
+
+From: Mohamed Ghannam <simo.ghannam@gmail.com>
+
+
+[ Upstream commit 8f659a03a0ba9289b9aeb9b4470e6fb263d6f483 ]
+
+inet->hdrincl is racy, and could lead to uninitialized stack pointer
+usage, so its value should be read only once.
+
+Fixes: c008ba5bdc9f ("ipv4: Avoid reading user iov twice after raw_probe_proto_opt")
+Signed-off-by: Mohamed Ghannam <simo.ghannam@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/raw.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/raw.c
++++ b/net/ipv4/raw.c
+@@ -500,11 +500,16 @@ static int raw_sendmsg(struct sock *sk,
+       int err;
+       struct ip_options_data opt_copy;
+       struct raw_frag_vec rfv;
++      int hdrincl;
+ 
+       err = -EMSGSIZE;
+       if (len > 0xFFFF)
+               goto out;
+ 
++      /* hdrincl should be READ_ONCE(inet->hdrincl)
++       * but READ_ONCE() doesn't work with bit fields
++       */
++      hdrincl = inet->hdrincl;
+       /*
+        *      Check the flags.
+        */
+@@ -579,7 +584,7 @@ static int raw_sendmsg(struct sock *sk,
+               /* Linux does not mangle headers on raw sockets,
+                * so that IP options + IP_HDRINCL is non-sense.
+                */
+-              if (inet->hdrincl)
++              if (hdrincl)
+                       goto done;
+               if (ipc.opt->opt.srr) {
+                       if (!daddr)
+@@ -601,9 +606,9 @@ static int raw_sendmsg(struct sock *sk,
+ 
+       flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+                          RT_SCOPE_UNIVERSE,
+-                         inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
++                         hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+                          inet_sk_flowi_flags(sk) |
+-                          (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
++                          (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
+                          daddr, saddr, 0, 0);
+ 
+       if (!saddr && ipc.oif) {
+@@ -612,7 +617,7 @@ static int raw_sendmsg(struct sock *sk,
+                       goto done;
+       }
+ 
+-      if (!inet->hdrincl) {
++      if (!hdrincl) {
+               rfv.msg = msg;
+               rfv.hlen = 0;
+ 
+@@ -637,7 +642,7 @@ static int raw_sendmsg(struct sock *sk,
+               goto do_confirm;
+ back_from_confirm:
+ 
+-      if (inet->hdrincl)
++      if (hdrincl)
+               err = raw_send_hdrinc(sk, &fl4, msg, len,
+                                     &rt, msg->msg_flags);
+ 
diff --git a/queue-4.4/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch b/queue-4.4/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch

new file mode 100644 (file)

index 0000000..cb8c8ec
--- /dev/null
+++ b/queue-4.4/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch
@@ -0,0 +1,36 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Tobias Jordan <Tobias.Jordan@elektrobit.com>
+Date: Wed, 6 Dec 2017 15:23:23 +0100
+Subject: net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case
+
+From: Tobias Jordan <Tobias.Jordan@elektrobit.com>
+
+
+[ Upstream commit 589bf32f09852041fbd3b7ce1a9e703f95c230ba ]
+
+add appropriate calls to clk_disable_unprepare() by jumping to out_mdio
+in case orion_mdio_probe() returns -EPROBE_DEFER.
+
+Found by Linux Driver Verification project (linuxtesting.org).
+
+Fixes: 3d604da1e954 ("net: mvmdio: get and enable optional clock")
+Signed-off-by: Tobias Jordan <Tobias.Jordan@elektrobit.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvmdio.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/marvell/mvmdio.c
++++ b/drivers/net/ethernet/marvell/mvmdio.c
+@@ -241,7 +241,8 @@ static int orion_mdio_probe(struct platf
+                       dev->regs + MVMDIO_ERR_INT_MASK);
+ 
+       } else if (dev->err_interrupt == -EPROBE_DEFER) {
+-              return -EPROBE_DEFER;
++              ret = -EPROBE_DEFER;
++              goto out_mdio;
+       }
+ 
+       mutex_init(&dev->lock);
diff --git a/queue-4.4/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch b/queue-4.4/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch

new file mode 100644 (file)

index 0000000..8e6858b
--- /dev/null
+++ b/queue-4.4/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch
@@ -0,0 +1,38 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+Date: Wed, 20 Dec 2017 18:45:10 -0600
+Subject: net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround
+
+From: Grygorii Strashko <grygorii.strashko@ti.com>
+
+
+[ Upstream commit c1a8d0a3accf64a014d605e6806ce05d1c17adf1 ]
+
+Under some circumstances driver will perform PHY reset in
+ksz9031_read_status() to fix autoneg failure case (idle error count =
+0xFF). When this happens ksz9031 will not detect link status change any
+more when connecting to Netgear 1G switch (link can be recovered sometimes by
+restarting netdevice "ifconfig down up"). Reproduced with TI am572x board
+equipped with ksz9031 PHY while connecting to Netgear 1G switch.
+
+Fix the issue by reconfiguring autonegotiation after PHY reset in
+ksz9031_read_status().
+
+Fixes: d2fd719bcb0e ("net/phy: micrel: Add workaround for bad autoneg")
+Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/micrel.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -541,6 +541,7 @@ static int ksz9031_read_status(struct ph
+               phydev->link = 0;
+               if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+                       phydev->drv->config_intr(phydev);
++              return genphy_config_aneg(phydev);
+       }
+ 
+       return 0;
diff --git a/queue-4.4/net-qmi_wwan-add-sierra-em7565-1199-9091.patch b/queue-4.4/net-qmi_wwan-add-sierra-em7565-1199-9091.patch

new file mode 100644 (file)

index 0000000..5658551
--- /dev/null
+++ b/queue-4.4/net-qmi_wwan-add-sierra-em7565-1199-9091.patch
@@ -0,0 +1,32 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+Date: Mon, 11 Dec 2017 21:51:14 +0100
+Subject: net: qmi_wwan: add Sierra EM7565 1199:9091
+
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+
+
+[ Upstream commit aceef61ee56898cfa7b6960fb60b9326c3860441 ]
+
+Sierra Wireless EM7565 is an Qualcomm MDM9x50 based M.2 modem.
+The USB id is added to qmi_wwan.c to allow QMI communication
+with the EM7565.
+
+Signed-off-by: Sebastian Sjoholm <ssjoholm@mac.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -737,6 +737,7 @@ static const struct usb_device_id produc
+       {QMI_FIXED_INTF(0x1199, 0x9079, 10)},   /* Sierra Wireless EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x907b, 8)},    /* Sierra Wireless EM74xx */
+       {QMI_FIXED_INTF(0x1199, 0x907b, 10)},   /* Sierra Wireless EM74xx */
++      {QMI_FIXED_INTF(0x1199, 0x9091, 8)},    /* Sierra Wireless EM7565 */
+       {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},    /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
+       {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},    /* Alcatel L800MA */
+       {QMI_FIXED_INTF(0x2357, 0x0201, 4)},    /* TP-LINK HSUPA Modem MA180 */
diff --git a/queue-4.4/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch b/queue-4.4/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch

new file mode 100644 (file)

index 0000000..ebc0531
--- /dev/null
+++ b/queue-4.4/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch
@@ -0,0 +1,118 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Shaohua Li <shli@fb.com>
+Date: Wed, 20 Dec 2017 12:10:21 -0800
+Subject: net: reevalulate autoflowlabel setting after sysctl setting
+
+From: Shaohua Li <shli@fb.com>
+
+
+[ Upstream commit 513674b5a2c9c7a67501506419da5c3c77ac6f08 ]
+
+sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2.
+If sockopt doesn't set autoflowlabel, outcome packets from the hosts are
+supposed to not include flowlabel. This is true for normal packet, but
+not for reset packet.
+
+The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if
+we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't
+changed, so the sock will keep the old behavior in terms of auto
+flowlabel. Reset packet is suffering from this problem, because reset
+packet is sent from a special control socket, which is created at boot
+time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control
+socket will always have its ipv6_pinfo.autoflowlabel set, even after
+user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always
+have flowlabel. Normal sock created before sysctl setting suffers from
+the same issue. We can't even turn off autoflowlabel unless we kill all
+socks in the hosts.
+
+To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the
+autoflowlabel setting from user, otherwise we always call
+ip6_default_np_autolabel() which has the new settings of sysctl.
+
+Note, this changes behavior a little bit. Before commit 42240901f7c4
+(ipv6: Implement different admin modes for automatic flow labels), the
+autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes,
+existing connection will change autoflowlabel behavior. After that
+commit, autoflowlabel behavior is sticky in the whole life of the sock.
+With this patch, the behavior isn't sticky again.
+
+Cc: Martin KaFai Lau <kafai@fb.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Tom Herbert <tom@quantonium.net>
+Signed-off-by: Shaohua Li <shli@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ipv6.h     |    3 ++-
+ net/ipv6/af_inet6.c      |    1 -
+ net/ipv6/ip6_output.c    |   12 ++++++++++--
+ net/ipv6/ipv6_sockglue.c |    1 +
+ 4 files changed, 13 insertions(+), 4 deletions(-)
+
+--- a/include/linux/ipv6.h
++++ b/include/linux/ipv6.h
+@@ -215,7 +215,8 @@ struct ipv6_pinfo {
+                                                * 100: prefer care-of address
+                                                */
+                               dontfrag:1,
+-                              autoflowlabel:1;
++                              autoflowlabel:1,
++                              autoflowlabel_set:1;
+       __u8                    min_hopcount;
+       __u8                    tclass;
+       __be32                  rcv_flowinfo;
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -200,7 +200,6 @@ lookup_protocol:
+       np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
+       np->mc_loop     = 1;
+       np->pmtudisc    = IPV6_PMTUDISC_WANT;
+-      np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
+       sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
+ 
+       /* Init the ipv4 part of the socket since we can have sockets
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -148,6 +148,14 @@ int ip6_output(struct net *net, struct s
+                           !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+ }
+ 
++static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
++{
++      if (!np->autoflowlabel_set)
++              return ip6_default_np_autolabel(net);
++      else
++              return np->autoflowlabel;
++}
++
+ /*
+  * xmit an sk_buff (used by TCP, SCTP and DCCP)
+  * Note : socket lock is not held for SYNACK packets, but might be modified
+@@ -211,7 +219,7 @@ int ip6_xmit(const struct sock *sk, stru
+               hlimit = ip6_dst_hoplimit(dst);
+ 
+       ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+-                                                   np->autoflowlabel, fl6));
++                              ip6_autoflowlabel(net, np), fl6));
+ 
+       hdr->payload_len = htons(seg_len);
+       hdr->nexthdr = proto;
+@@ -1675,7 +1683,7 @@ struct sk_buff *__ip6_make_skb(struct so
+ 
+       ip6_flow_hdr(hdr, v6_cork->tclass,
+                    ip6_make_flowlabel(net, skb, fl6->flowlabel,
+-                                      np->autoflowlabel, fl6));
++                                      ip6_autoflowlabel(net, np), fl6));
+       hdr->hop_limit = v6_cork->hop_limit;
+       hdr->nexthdr = proto;
+       hdr->saddr = fl6->saddr;
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -872,6 +872,7 @@ pref_skip_coa:
+               break;
+       case IPV6_AUTOFLOWLABEL:
+               np->autoflowlabel = valbool;
++              np->autoflowlabel_set = 1;
+               retv = 0;
+               break;
+       }
diff --git a/queue-4.4/netlink-add-netns-check-on-taps.patch b/queue-4.4/netlink-add-netns-check-on-taps.patch

new file mode 100644 (file)

index 0000000..85afd5a
--- /dev/null
+++ b/queue-4.4/netlink-add-netns-check-on-taps.patch
@@ -0,0 +1,44 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Kevin Cernekee <cernekee@chromium.org>
+Date: Wed, 6 Dec 2017 12:12:27 -0800
+Subject: netlink: Add netns check on taps
+
+From: Kevin Cernekee <cernekee@chromium.org>
+
+
+[ Upstream commit 93c647643b48f0131f02e45da3bd367d80443291 ]
+
+Currently, a nlmon link inside a child namespace can observe systemwide
+netlink activity.  Filter the traffic so that nlmon can only sniff
+netlink messages from its own netns.
+
+Test case:
+
+    vpnns -- bash -c "ip link add nlmon0 type nlmon; \
+                      ip link set nlmon0 up; \
+                      tcpdump -i nlmon0 -q -w /tmp/nlmon.pcap -U" &
+    sudo ip xfrm state add src 10.1.1.1 dst 10.1.1.2 proto esp \
+        spi 0x1 mode transport \
+        auth sha1 0x6162633132330000000000000000000000000000 \
+        enc aes 0x00000000000000000000000000000000
+    grep --binary abc123 /tmp/nlmon.pcap
+
+Signed-off-by: Kevin Cernekee <cernekee@chromium.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -261,6 +261,9 @@ static int __netlink_deliver_tap_skb(str
+       struct sock *sk = skb->sk;
+       int ret = -ENOMEM;
+ 
++      if (!net_eq(dev_net(dev), sock_net(sk)))
++              return 0;
++
+       dev_hold(dev);
+ 
+       if (is_vmalloc_addr(skb->head))
diff --git a/queue-4.4/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch b/queue-4.4/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch

new file mode 100644 (file)

index 0000000..b78a7d7
--- /dev/null
+++ b/queue-4.4/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch
@@ -0,0 +1,44 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
+Date: Fri, 22 Dec 2017 10:15:20 -0800
+Subject: sctp: Replace use of sockets_allocated with specified macro.
+
+From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
+
+
+[ Upstream commit 8cb38a602478e9f806571f6920b0a3298aabf042 ]
+
+The patch(180d8cd942ce) replaces all uses of struct sock fields'
+memory_pressure, memory_allocated, sockets_allocated, and sysctl_mem
+to accessor macros. But the sockets_allocated field of sctp sock is
+not replaced at all. Then replace it now for unifying the code.
+
+Fixes: 180d8cd942ce ("foundations of per-cgroup memory pressure controlling.")
+Cc: Glauber Costa <glommer@parallels.com>
+Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -4153,7 +4153,7 @@ static int sctp_init_sock(struct sock *s
+       SCTP_DBG_OBJCNT_INC(sock);
+ 
+       local_bh_disable();
+-      percpu_counter_inc(&sctp_sockets_allocated);
++      sk_sockets_allocated_inc(sk);
+       sock_prot_inuse_add(net, sk->sk_prot, 1);
+ 
+       /* Nothing can fail after this block, otherwise
+@@ -4197,7 +4197,7 @@ static void sctp_destroy_sock(struct soc
+       }
+       sctp_endpoint_free(sp->ep);
+       local_bh_disable();
+-      percpu_counter_dec(&sctp_sockets_allocated);
++      sk_sockets_allocated_dec(sk);
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+       local_bh_enable();
+ }
diff --git a/queue-4.4/series b/queue-4.4/series

index 6f7b7b35a2f090b55d5193439f0b05b63e7718dc..c74abf176d3a17cd7e1ecb7c087148dba8ae12e2 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -31,3 +31,19 @@ x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
  x86-mm-enable-cr4.pcide-on-supported-systems.patch
  x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
  kbuild-add-fno-stack-check-to-kernel-build-options.patch
+ipv4-igmp-guard-against-silly-mtu-values.patch
+ipv6-mcast-better-catch-silly-mtu-values.patch
+net-igmp-use-correct-source-address-on-igmpv3-reports.patch
+netlink-add-netns-check-on-taps.patch
+net-qmi_wwan-add-sierra-em7565-1199-9091.patch
+net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch
+tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
+tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch
+net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch
+net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch
+sctp-replace-use-of-sockets_allocated-with-specified-macro.patch
+ipv4-fix-use-after-free-when-flushing-fib-tables.patch
+net-bridge-fix-early-call-to-br_stp_change_bridge_id-and-plug-newlink-leaks.patch
+net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch
+net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch
+sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch
diff --git a/queue-4.4/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch b/queue-4.4/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch

new file mode 100644 (file)

index 0000000..8f8e71b
--- /dev/null
+++ b/queue-4.4/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch
@@ -0,0 +1,47 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Wed, 13 Dec 2017 14:41:06 -0500
+Subject: sock: free skb in skb_complete_tx_timestamp on error
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 35b99dffc3f710cafceee6c8c6ac6a98eb2cb4bf ]
+
+skb_complete_tx_timestamp must ingest the skb it is passed. Call
+kfree_skb if the skb cannot be enqueued.
+
+Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl")
+Fixes: 9ac25fc06375 ("net: fix socket refcounting in skb_complete_tx_timestamp()")
+Reported-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/skbuff.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -3676,7 +3676,7 @@ void skb_complete_tx_timestamp(struct sk
+       struct sock *sk = skb->sk;
+ 
+       if (!skb_may_tx_timestamp(sk, false))
+-              return;
++              goto err;
+ 
+       /* Take a reference to prevent skb_orphan() from freeing the socket,
+        * but only if the socket refcount is not zero.
+@@ -3685,7 +3685,11 @@ void skb_complete_tx_timestamp(struct sk
+               *skb_hwtstamps(skb) = *hwtstamps;
+               __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+               sock_put(sk);
++              return;
+       }
++
++err:
++      kfree_skb(skb);
+ }
+ EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+ 
diff --git a/queue-4.4/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch b/queue-4.4/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch

new file mode 100644 (file)

index 0000000..f38dc61
--- /dev/null
+++ b/queue-4.4/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch
@@ -0,0 +1,55 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Mon, 11 Dec 2017 00:05:46 -0800
+Subject: tcp md5sig: Use skb's saddr when replying to an incoming segment
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+
+[ Upstream commit 30791ac41927ebd3e75486f9504b6d2280463bf0 ]
+
+The MD5-key that belongs to a connection is identified by the peer's
+IP-address. When we are in tcp_v4(6)_reqsk_send_ack(), we are replying
+to an incoming segment from tcp_check_req() that failed the seq-number
+checks.
+
+Thus, to find the correct key, we need to use the skb's saddr and not
+the daddr.
+
+This bug seems to have been there since quite a while, but probably got
+unnoticed because the consequences are not catastrophic. We will call
+tcp_v4_reqsk_send_ack only to send a challenge-ACK back to the peer,
+thus the connection doesn't really fail.
+
+Fixes: 9501f9722922 ("tcp md5sig: Let the caller pass appropriate key for tcp_v{4,6}_do_calc_md5_hash().")
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |    2 +-
+ net/ipv6/tcp_ipv6.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -823,7 +823,7 @@ static void tcp_v4_reqsk_send_ack(const
+                       tcp_time_stamp,
+                       req->ts_recent,
+                       0,
+-                      tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
++                      tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
+                                         AF_INET),
+                       inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+                       ip_hdr(skb)->tos);
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -949,7 +949,7 @@ static void tcp_v6_reqsk_send_ack(const
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+                       tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+-                      tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
++                      tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
+                       0, 0);
+ }
+ 
diff --git a/queue-4.4/tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch b/queue-4.4/tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch

new file mode 100644 (file)

index 0000000..964b078
--- /dev/null
+++ b/queue-4.4/tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch
@@ -0,0 +1,37 @@
+From foo@baz Sun Dec 31 11:20:35 CET 2017
+From: Brian King <brking@linux.vnet.ibm.com>
+Date: Fri, 15 Dec 2017 15:21:50 -0600
+Subject: tg3: Fix rx hang on MTU change with 5717/5719
+
+From: Brian King <brking@linux.vnet.ibm.com>
+
+
+[ Upstream commit 748a240c589824e9121befb1cba5341c319885bc ]
+
+This fixes a hang issue seen when changing the MTU size from 1500 MTU
+to 9000 MTU on both 5717 and 5719 chips. In discussion with Broadcom,
+they've indicated that these chipsets have the same phy as the 57766
+chipset, so the same workarounds apply. This has been tested by IBM
+on both Power 8 and Power 9 systems as well as by Broadcom on x86
+hardware and has been confirmed to resolve the hang issue.
+
+Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -14228,7 +14228,9 @@ static int tg3_change_mtu(struct net_dev
+       /* Reset PHY, otherwise the read DMA engine will be in a mode that
+        * breaks all requests to 256 bytes.
+        */
+-      if (tg3_asic_rev(tp) == ASIC_REV_57766)
++      if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
++          tg3_asic_rev(tp) == ASIC_REV_5717 ||
++          tg3_asic_rev(tp) == ASIC_REV_5719)
+               reset_phy = true;
+ 
+       err = tg3_restart_hw(tp, reset_phy);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 31 Dec 2017 11:29:58 +0000 (12:29 +0100)
queue-4.4/ipv4-fix-use-after-free-when-flushing-fib-tables.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/ipv4-igmp-guard-against-silly-mtu-values.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/ipv6-mcast-better-catch-silly-mtu-values.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-bridge-fix-early-call-to-br_stp_change_bridge_id-and-plug-newlink-leaks.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-fix-double-free-and-memory-corruption-in-get_net_ns_by_id.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-igmp-use-correct-source-address-on-igmpv3-reports.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-ipv4-fix-for-a-race-condition-in-raw_sendmsg.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-mvmdio-disable-unprepare-clocks-in-eprobe_defer-case.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-phy-micrel-ksz9031-reconfigure-autoneg-after-phy-autoneg-workaround.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-qmi_wwan-add-sierra-em7565-1199-9091.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/net-reevalulate-autoflowlabel-setting-after-sysctl-setting.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/netlink-add-netns-check-on-taps.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/sctp-replace-use-of-sockets_allocated-with-specified-macro.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.4/sock-free-skb-in-skb_complete_tx_timestamp-on-error.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/tcp-md5sig-use-skb-s-saddr-when-replying-to-an-incoming-segment.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/tg3-fix-rx-hang-on-mtu-change-with-5717-5719.patch	[new file with mode: 0644]	patch \| blob