4.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)
diff --git a/queue-4.15/cls_u32-add-missing-rcu-annotation.patch b/queue-4.15/cls_u32-add-missing-rcu-annotation.patch

new file mode 100644 (file)

index 0000000..2de5c0c
--- /dev/null
+++ b/queue-4.15/cls_u32-add-missing-rcu-annotation.patch
@@ -0,0 +1,70 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 2 Feb 2018 16:02:22 +0100
+Subject: cls_u32: add missing RCU annotation.
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit 058a6c033488494a6b1477b05fe8e1a16e344462 ]
+
+In a couple of points of the control path, n->ht_down is currently
+accessed without the required RCU annotation. The accesses are
+safe, but sparse complaints. Since we already held the
+rtnl lock, let use rtnl_dereference().
+
+Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs")
+Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_u32.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -544,6 +544,7 @@ static void u32_remove_hw_knode(struct t
+ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+                               u32 flags)
+ {
++      struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_u32_offload cls_u32 = {};
+       bool skip_sw = tc_skip_sw(flags);
+@@ -563,7 +564,7 @@ static int u32_replace_hw_knode(struct t
+       cls_u32.knode.sel = &n->sel;
+       cls_u32.knode.exts = &n->exts;
+       if (n->ht_down)
+-              cls_u32.knode.link_handle = n->ht_down->handle;
++              cls_u32.knode.link_handle = ht->handle;
+ 
+       err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+       if (err < 0) {
+@@ -840,8 +841,9 @@ static void u32_replace_knode(struct tcf
+ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+                                        struct tc_u_knode *n)
+ {
+-      struct tc_u_knode *new;
++      struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+       struct tc_u32_sel *s = &n->sel;
++      struct tc_u_knode *new;
+ 
+       new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
+                     GFP_KERNEL);
+@@ -859,11 +861,11 @@ static struct tc_u_knode *u32_init_knode
+       new->fshift = n->fshift;
+       new->res = n->res;
+       new->flags = n->flags;
+-      RCU_INIT_POINTER(new->ht_down, n->ht_down);
++      RCU_INIT_POINTER(new->ht_down, ht);
+ 
+       /* bump reference count as long as we hold pointer to structure */
+-      if (new->ht_down)
+-              new->ht_down->refcnt++;
++      if (ht)
++              ht->refcnt++;
+ 
+ #ifdef CONFIG_CLS_U32_PERF
+       /* Statistics may be incremented by readers during update
diff --git a/queue-4.15/ip6mr-fix-stale-iterator.patch b/queue-4.15/ip6mr-fix-stale-iterator.patch

new file mode 100644 (file)

index 0000000..fe375f7
--- /dev/null
+++ b/queue-4.15/ip6mr-fix-stale-iterator.patch
@@ -0,0 +1,114 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Wed, 31 Jan 2018 16:29:30 +0200
+Subject: ip6mr: fix stale iterator
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit 4adfa79fc254efb7b0eb3cd58f62c2c3f805f1ba ]
+
+When we dump the ip6mr mfc entries via proc, we initialize an iterator
+with the table to dump but we don't clear the cache pointer which might
+be initialized from a prior read on the same descriptor that ended. This
+can result in lock imbalance (an unnecessary unlock) leading to other
+crashes and hangs. Clear the cache pointer like ipmr does to fix the issue.
+Thanks for the reliable reproducer.
+
+Here's syzbot's trace:
+ WARNING: bad unlock balance detected!
+ 4.15.0-rc3+ #128 Not tainted
+ syzkaller971460/3195 is trying to release lock (mrt_lock) at:
+ [<000000006898068d>] ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553
+ but there are no more locks to release!
+
+ other info that might help us debug this:
+ 1 lock held by syzkaller971460/3195:
+  #0:  (&p->lock){+.+.}, at: [<00000000744a6565>] seq_read+0xd5/0x13d0
+ fs/seq_file.c:165
+
+ stack backtrace:
+ CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+ Google 01/01/2011
+ Call Trace:
+  __dump_stack lib/dump_stack.c:17 [inline]
+  dump_stack+0x194/0x257 lib/dump_stack.c:53
+  print_unlock_imbalance_bug+0x12f/0x140 kernel/locking/lockdep.c:3561
+  __lock_release kernel/locking/lockdep.c:3775 [inline]
+  lock_release+0x5f9/0xda0 kernel/locking/lockdep.c:4023
+  __raw_read_unlock include/linux/rwlock_api_smp.h:225 [inline]
+  _raw_read_unlock+0x1a/0x30 kernel/locking/spinlock.c:255
+  ipmr_mfc_seq_stop+0xe1/0x130 net/ipv6/ip6mr.c:553
+  traverse+0x3bc/0xa00 fs/seq_file.c:135
+  seq_read+0x96a/0x13d0 fs/seq_file.c:189
+  proc_reg_read+0xef/0x170 fs/proc/inode.c:217
+  do_loop_readv_writev fs/read_write.c:673 [inline]
+  do_iter_read+0x3db/0x5b0 fs/read_write.c:897
+  compat_readv+0x1bf/0x270 fs/read_write.c:1140
+  do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189
+  C_SYSC_preadv fs/read_write.c:1209 [inline]
+  compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203
+  do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline]
+  do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389
+  entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125
+ RIP: 0023:0xf7f73c79
+ RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d
+ RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0
+ RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000
+ RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ BUG: sleeping function called from invalid context at lib/usercopy.c:25
+ in_atomic(): 1, irqs_disabled(): 0, pid: 3195, name: syzkaller971460
+ INFO: lockdep is turned off.
+ CPU: 1 PID: 3195 Comm: syzkaller971460 Not tainted 4.15.0-rc3+ #128
+ Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+ Google 01/01/2011
+ Call Trace:
+  __dump_stack lib/dump_stack.c:17 [inline]
+  dump_stack+0x194/0x257 lib/dump_stack.c:53
+  ___might_sleep+0x2b2/0x470 kernel/sched/core.c:6060
+  __might_sleep+0x95/0x190 kernel/sched/core.c:6013
+  __might_fault+0xab/0x1d0 mm/memory.c:4525
+  _copy_to_user+0x2c/0xc0 lib/usercopy.c:25
+  copy_to_user include/linux/uaccess.h:155 [inline]
+  seq_read+0xcb4/0x13d0 fs/seq_file.c:279
+  proc_reg_read+0xef/0x170 fs/proc/inode.c:217
+  do_loop_readv_writev fs/read_write.c:673 [inline]
+  do_iter_read+0x3db/0x5b0 fs/read_write.c:897
+  compat_readv+0x1bf/0x270 fs/read_write.c:1140
+  do_compat_preadv64+0xdc/0x100 fs/read_write.c:1189
+  C_SYSC_preadv fs/read_write.c:1209 [inline]
+  compat_SyS_preadv+0x3b/0x50 fs/read_write.c:1203
+  do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline]
+  do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389
+  entry_SYSENTER_compat+0x51/0x60 arch/x86/entry/entry_64_compat.S:125
+ RIP: 0023:0xf7f73c79
+ RSP: 002b:00000000e574a15c EFLAGS: 00000292 ORIG_RAX: 000000000000014d
+ RAX: ffffffffffffffda RBX: 000000000000000f RCX: 0000000020a3afb0
+ RDX: 0000000000000001 RSI: 0000000000000067 RDI: 0000000000000000
+ RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ WARNING: CPU: 1 PID: 3195 at lib/usercopy.c:26 _copy_to_user+0xb5/0xc0
+ lib/usercopy.c:26
+
+Reported-by: syzbot <bot+eceb3204562c41a438fa1f2335e0fe4f6886d669@syzkaller.appspotmail.com>
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6mr.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct s
+               return ERR_PTR(-ENOENT);
+ 
+       it->mrt = mrt;
++      it->cache = NULL;
+       return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
+               : SEQ_START_TOKEN;
+ }
diff --git a/queue-4.15/ipv6-addrconf-break-critical-section-in-addrconf_verify_rtnl.patch b/queue-4.15/ipv6-addrconf-break-critical-section-in-addrconf_verify_rtnl.patch

new file mode 100644 (file)

index 0000000..df6b7a7
--- /dev/null
+++ b/queue-4.15/ipv6-addrconf-break-critical-section-in-addrconf_verify_rtnl.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 26 Jan 2018 16:10:43 -0800
+Subject: ipv6: addrconf: break critical section in addrconf_verify_rtnl()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e64e469b9a2c22d41b3dd7172118760cec22d473 ]
+
+Heiner reported a lockdep splat [1]
+
+This is caused by attempting GFP_KERNEL allocation while RCU lock is
+held and BH blocked.
+
+We believe that addrconf_verify_rtnl() could run for a long period,
+so instead of using GFP_ATOMIC here as Ido suggested, we should break
+the critical section and restart it after the allocation.
+
+[1]
+[86220.125562] =============================
+[86220.125586] WARNING: suspicious RCU usage
+[86220.125612] 4.15.0-rc7-next-20180110+ #7 Not tainted
+[86220.125641] -----------------------------
+[86220.125666] kernel/sched/core.c:6026 Illegal context switch in RCU-bh read-side critical section!
+[86220.125711]
+               other info that might help us debug this:
+
+[86220.125755]
+               rcu_scheduler_active = 2, debug_locks = 1
+[86220.125792] 4 locks held by kworker/0:2/1003:
+[86220.125817]  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000da8e9b73>] process_one_work+0x1de/0x680
+[86220.125895]  #1:  ((addr_chk_work).work){+.+.}, at: [<00000000da8e9b73>] process_one_work+0x1de/0x680
+[86220.125959]  #2:  (rtnl_mutex){+.+.}, at: [<00000000b06d9510>] rtnl_lock+0x12/0x20
+[86220.126017]  #3:  (rcu_read_lock_bh){....}, at: [<00000000aef52299>] addrconf_verify_rtnl+0x1e/0x510 [ipv6]
+[86220.126111]
+               stack backtrace:
+[86220.126142] CPU: 0 PID: 1003 Comm: kworker/0:2 Not tainted 4.15.0-rc7-next-20180110+ #7
+[86220.126185] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015
+[86220.126250] Workqueue: ipv6_addrconf addrconf_verify_work [ipv6]
+[86220.126288] Call Trace:
+[86220.126312]  dump_stack+0x70/0x9e
+[86220.126337]  lockdep_rcu_suspicious+0xce/0xf0
+[86220.126365]  ___might_sleep+0x1d3/0x240
+[86220.126390]  __might_sleep+0x45/0x80
+[86220.126416]  kmem_cache_alloc_trace+0x53/0x250
+[86220.126458]  ? ipv6_add_addr+0xfe/0x6e0 [ipv6]
+[86220.126498]  ipv6_add_addr+0xfe/0x6e0 [ipv6]
+[86220.126538]  ipv6_create_tempaddr+0x24d/0x430 [ipv6]
+[86220.126580]  ? ipv6_create_tempaddr+0x24d/0x430 [ipv6]
+[86220.126623]  addrconf_verify_rtnl+0x339/0x510 [ipv6]
+[86220.126664]  ? addrconf_verify_rtnl+0x339/0x510 [ipv6]
+[86220.126708]  addrconf_verify_work+0xe/0x20 [ipv6]
+[86220.126738]  process_one_work+0x258/0x680
+[86220.126765]  worker_thread+0x35/0x3f0
+[86220.126790]  kthread+0x124/0x140
+[86220.126813]  ? process_one_work+0x680/0x680
+[86220.126839]  ? kthread_create_worker_on_cpu+0x40/0x40
+[86220.126869]  ? umh_complete+0x40/0x40
+[86220.126893]  ? call_usermodehelper_exec_async+0x12a/0x160
+[86220.126926]  ret_from_fork+0x4b/0x60
+[86220.126999] BUG: sleeping function called from invalid context at mm/slab.h:420
+[86220.127041] in_atomic(): 1, irqs_disabled(): 0, pid: 1003, name: kworker/0:2
+[86220.127082] 4 locks held by kworker/0:2/1003:
+[86220.127107]  #0:  ((wq_completion)"%s"("ipv6_addrconf")){+.+.}, at: [<00000000da8e9b73>] process_one_work+0x1de/0x680
+[86220.127179]  #1:  ((addr_chk_work).work){+.+.}, at: [<00000000da8e9b73>] process_one_work+0x1de/0x680
+[86220.127242]  #2:  (rtnl_mutex){+.+.}, at: [<00000000b06d9510>] rtnl_lock+0x12/0x20
+[86220.127300]  #3:  (rcu_read_lock_bh){....}, at: [<00000000aef52299>] addrconf_verify_rtnl+0x1e/0x510 [ipv6]
+[86220.127414] CPU: 0 PID: 1003 Comm: kworker/0:2 Not tainted 4.15.0-rc7-next-20180110+ #7
+[86220.127463] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015
+[86220.127528] Workqueue: ipv6_addrconf addrconf_verify_work [ipv6]
+[86220.127568] Call Trace:
+[86220.127591]  dump_stack+0x70/0x9e
+[86220.127616]  ___might_sleep+0x14d/0x240
+[86220.127644]  __might_sleep+0x45/0x80
+[86220.127672]  kmem_cache_alloc_trace+0x53/0x250
+[86220.127717]  ? ipv6_add_addr+0xfe/0x6e0 [ipv6]
+[86220.127762]  ipv6_add_addr+0xfe/0x6e0 [ipv6]
+[86220.127807]  ipv6_create_tempaddr+0x24d/0x430 [ipv6]
+[86220.127854]  ? ipv6_create_tempaddr+0x24d/0x430 [ipv6]
+[86220.127903]  addrconf_verify_rtnl+0x339/0x510 [ipv6]
+[86220.127950]  ? addrconf_verify_rtnl+0x339/0x510 [ipv6]
+[86220.127998]  addrconf_verify_work+0xe/0x20 [ipv6]
+[86220.128032]  process_one_work+0x258/0x680
+[86220.128063]  worker_thread+0x35/0x3f0
+[86220.128091]  kthread+0x124/0x140
+[86220.128117]  ? process_one_work+0x680/0x680
+[86220.128146]  ? kthread_create_worker_on_cpu+0x40/0x40
+[86220.128180]  ? umh_complete+0x40/0x40
+[86220.128207]  ? call_usermodehelper_exec_async+0x12a/0x160
+[86220.128243]  ret_from_fork+0x4b/0x60
+
+Fixes: f3d9832e56c4 ("ipv6: addrconf: cleanup locking in ipv6_add_addr")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Heiner Kallweit <hkallweit1@gmail.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -4352,9 +4352,11 @@ restart:
+                                               spin_lock(&ifpub->lock);
+                                               ifpub->regen_count = 0;
+                                               spin_unlock(&ifpub->lock);
++                                              rcu_read_unlock_bh();
+                                               ipv6_create_tempaddr(ifpub, ifp, true);
+                                               in6_ifa_put(ifpub);
+                                               in6_ifa_put(ifp);
++                                              rcu_read_lock_bh();
+                                               goto restart;
+                                       }
+                               } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
diff --git a/queue-4.15/ipv6-change-route-cache-aging-logic.patch b/queue-4.15/ipv6-change-route-cache-aging-logic.patch

new file mode 100644 (file)

index 0000000..08784ad
--- /dev/null
+++ b/queue-4.15/ipv6-change-route-cache-aging-logic.patch
@@ -0,0 +1,68 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Wei Wang <weiwan@google.com>
+Date: Fri, 26 Jan 2018 11:40:17 -0800
+Subject: ipv6: change route cache aging logic
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit 31afeb425f7fad8bcf9561aeb0b8405479f97a98 ]
+
+In current route cache aging logic, if a route has both RTF_EXPIRE and
+RTF_GATEWAY set, the route will only be removed if the neighbor cache
+has no NTF_ROUTER flag. Otherwise, even if the route has expired, it
+won't get deleted.
+Fix this logic to always check if the route has expired first and then
+do the gateway neighbor cache check if previous check decide to not
+remove the exception entry.
+
+Fixes: 1859bac04fb6 ("ipv6: remove from fib tree aged out RTF_CACHE dst")
+Signed-off-by: Wei Wang <weiwan@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1586,12 +1586,19 @@ static void rt6_age_examine_exception(st
+        * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
+        * expired, independently from their aging, as per RFC 8201 section 4
+        */
+-      if (!(rt->rt6i_flags & RTF_EXPIRES) &&
+-          time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+-              RT6_TRACE("aging clone %p\n", rt);
++      if (!(rt->rt6i_flags & RTF_EXPIRES)) {
++              if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
++                      RT6_TRACE("aging clone %p\n", rt);
++                      rt6_remove_exception(bucket, rt6_ex);
++                      return;
++              }
++      } else if (time_after(jiffies, rt->dst.expires)) {
++              RT6_TRACE("purging expired route %p\n", rt);
+               rt6_remove_exception(bucket, rt6_ex);
+               return;
+-      } else if (rt->rt6i_flags & RTF_GATEWAY) {
++      }
++
++      if (rt->rt6i_flags & RTF_GATEWAY) {
+               struct neighbour *neigh;
+               __u8 neigh_flags = 0;
+ 
+@@ -1606,11 +1613,8 @@ static void rt6_age_examine_exception(st
+                       rt6_remove_exception(bucket, rt6_ex);
+                       return;
+               }
+-      } else if (__rt6_check_expired(rt)) {
+-              RT6_TRACE("purging expired route %p\n", rt);
+-              rt6_remove_exception(bucket, rt6_ex);
+-              return;
+       }
++
+       gc_args->more++;
+ }
+ 
diff --git a/queue-4.15/ipv6-fix-so_reuseport-udp-socket-with-implicit-sk_ipv6only.patch b/queue-4.15/ipv6-fix-so_reuseport-udp-socket-with-implicit-sk_ipv6only.patch

new file mode 100644 (file)

index 0000000..efe6446
--- /dev/null
+++ b/queue-4.15/ipv6-fix-so_reuseport-udp-socket-with-implicit-sk_ipv6only.patch
@@ -0,0 +1,82 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Martin KaFai Lau <kafai@fb.com>
+Date: Wed, 24 Jan 2018 23:15:27 -0800
+Subject: ipv6: Fix SO_REUSEPORT UDP socket with implicit sk_ipv6only
+
+From: Martin KaFai Lau <kafai@fb.com>
+
+
+[ Upstream commit 7ece54a60ee2ba7a386308cae73c790bd580589c ]
+
+If a sk_v6_rcv_saddr is !IPV6_ADDR_ANY and !IPV6_ADDR_MAPPED, it
+implicitly implies it is an ipv6only socket.  However, in inet6_bind(),
+this addr_type checking and setting sk->sk_ipv6only to 1 are only done
+after sk->sk_prot->get_port(sk, snum) has been completed successfully.
+
+This inconsistency between sk_v6_rcv_saddr and sk_ipv6only confuses
+the 'get_port()'.
+
+In particular, when binding SO_REUSEPORT UDP sockets,
+udp_reuseport_add_sock(sk,...) is called.  udp_reuseport_add_sock()
+checks "ipv6_only_sock(sk2) == ipv6_only_sock(sk)" before adding sk to
+sk2->sk_reuseport_cb.  In this case, ipv6_only_sock(sk2) could be
+1 while ipv6_only_sock(sk) is still 0 here.  The end result is,
+reuseport_alloc(sk) is called instead of adding sk to the existing
+sk2->sk_reuseport_cb.
+
+It can be reproduced by binding two SO_REUSEPORT UDP sockets on an
+IPv6 address (!ANY and !MAPPED).  Only one of the socket will
+receive packet.
+
+The fix is to set the implicit sk_ipv6only before calling get_port().
+The original sk_ipv6only has to be saved such that it can be restored
+in case get_port() failed.  The situation is similar to the
+inet_reset_saddr(sk) after get_port() has failed.
+
+Thanks to Calvin Owens <calvinowens@fb.com> who created an easy
+reproduction which leads to a fix.
+
+Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+Signed-off-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/af_inet6.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, stru
+       struct net *net = sock_net(sk);
+       __be32 v4addr = 0;
+       unsigned short snum;
++      bool saved_ipv6only;
+       int addr_type = 0;
+       int err = 0;
+ 
+@@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, stru
+       if (!(addr_type & IPV6_ADDR_MULTICAST))
+               np->saddr = addr->sin6_addr;
+ 
++      saved_ipv6only = sk->sk_ipv6only;
++      if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
++              sk->sk_ipv6only = 1;
++
+       /* Make sure we are allowed to bind here. */
+       if ((snum || !inet->bind_address_no_port) &&
+           sk->sk_prot->get_port(sk, snum)) {
++              sk->sk_ipv6only = saved_ipv6only;
+               inet_reset_saddr(sk);
+               err = -EADDRINUSE;
+               goto out;
+       }
+ 
+-      if (addr_type != IPV6_ADDR_ANY) {
++      if (addr_type != IPV6_ADDR_ANY)
+               sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+-              if (addr_type != IPV6_ADDR_MAPPED)
+-                      sk->sk_ipv6only = 1;
+-      }
+       if (snum)
+               sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+       inet->inet_sport = htons(inet->inet_num);
diff --git a/queue-4.15/net-igmp-add-a-missing-rcu-locking-section.patch b/queue-4.15/net-igmp-add-a-missing-rcu-locking-section.patch

new file mode 100644 (file)

index 0000000..70b1aa1
--- /dev/null
+++ b/queue-4.15/net-igmp-add-a-missing-rcu-locking-section.patch
@@ -0,0 +1,81 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 1 Feb 2018 10:26:57 -0800
+Subject: net: igmp: add a missing rcu locking section
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e7aadb27a5415e8125834b84a74477bfbee4eff5 ]
+
+Newly added igmpv3_get_srcaddr() needs to be called under rcu lock.
+
+Timer callbacks do not ensure this locking.
+
+=============================
+WARNING: suspicious RCU usage
+4.15.0+ #200 Not tainted
+-----------------------------
+./include/linux/inetdevice.h:216 suspicious rcu_dereference_check() usage!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+3 locks held by syzkaller616973/4074:
+ #0:  (&mm->mmap_sem){++++}, at: [<00000000bfce669e>] __do_page_fault+0x32d/0xc90 arch/x86/mm/fault.c:1355
+ #1:  ((&im->timer)){+.-.}, at: [<00000000619d2f71>] lockdep_copy_map include/linux/lockdep.h:178 [inline]
+ #1:  ((&im->timer)){+.-.}, at: [<00000000619d2f71>] call_timer_fn+0x1c6/0x820 kernel/time/timer.c:1316
+ #2:  (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] spin_lock_bh include/linux/spinlock.h:315 [inline]
+ #2:  (&(&im->lock)->rlock){+.-.}, at: [<000000005f833c5c>] igmpv3_send_report+0x98/0x5b0 net/ipv4/igmp.c:600
+
+stack backtrace:
+CPU: 0 PID: 4074 Comm: syzkaller616973 Not tainted 4.15.0+ #200
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:17 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:53
+ lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4592
+ __in_dev_get_rcu include/linux/inetdevice.h:216 [inline]
+ igmpv3_get_srcaddr net/ipv4/igmp.c:329 [inline]
+ igmpv3_newpack+0xeef/0x12e0 net/ipv4/igmp.c:389
+ add_grhead.isra.27+0x235/0x300 net/ipv4/igmp.c:432
+ add_grec+0xbd3/0x1170 net/ipv4/igmp.c:565
+ igmpv3_send_report+0xd5/0x5b0 net/ipv4/igmp.c:605
+ igmp_send_report+0xc43/0x1050 net/ipv4/igmp.c:722
+ igmp_timer_expire+0x322/0x5c0 net/ipv4/igmp.c:831
+ call_timer_fn+0x228/0x820 kernel/time/timer.c:1326
+ expire_timers kernel/time/timer.c:1363 [inline]
+ __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666
+ run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692
+ __do_softirq+0x2d7/0xb85 kernel/softirq.c:285
+ invoke_softirq kernel/softirq.c:365 [inline]
+ irq_exit+0x1cc/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:541 [inline]
+ smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052
+ apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:938
+
+Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/igmp.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(st
+       pip->frag_off = htons(IP_DF);
+       pip->ttl      = 1;
+       pip->daddr    = fl4.daddr;
++
++      rcu_read_lock();
+       pip->saddr    = igmpv3_get_srcaddr(dev, &fl4);
++      rcu_read_unlock();
++
+       pip->protocol = IPPROTO_IGMP;
+       pip->tot_len  = 0;      /* filled in later */
+       ip_select_ident(net, skb, NULL);
diff --git a/queue-4.15/net-ipv6-send-unsolicited-na-after-dad.patch b/queue-4.15/net-ipv6-send-unsolicited-na-after-dad.patch

new file mode 100644 (file)

index 0000000..24bf5ee
--- /dev/null
+++ b/queue-4.15/net-ipv6-send-unsolicited-na-after-dad.patch
@@ -0,0 +1,119 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: David Ahern <dsahern@gmail.com>
+Date: Thu, 25 Jan 2018 20:16:29 -0800
+Subject: net: ipv6: send unsolicited NA after DAD
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit c76fe2d98c726224a975a0d0198c3fb50406d325 ]
+
+Unsolicited IPv6 neighbor advertisements should be sent after DAD
+completes. Update ndisc_send_unsol_na to skip tentative, non-optimistic
+addresses and have those sent by addrconf_dad_completed after DAD.
+
+Fixes: 4a6e3c5def13c ("net: ipv6: send unsolicited NA on admin up")
+Reported-by: Vivek Venkatraman <vivek@cumulusnetworks.com>
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |   30 ++++++++++++++++++++++++++----
+ net/ipv6/ndisc.c    |    5 +++++
+ 2 files changed, 31 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_pre
+ 
+ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
+ static void addrconf_dad_work(struct work_struct *w);
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
++                                 bool send_na);
+ static void addrconf_dad_run(struct inet6_dev *idev);
+ static void addrconf_rs_timer(struct timer_list *t);
+ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+@@ -3833,12 +3834,17 @@ static void addrconf_dad_begin(struct in
+            idev->cnf.accept_dad < 1) ||
+           !(ifp->flags&IFA_F_TENTATIVE) ||
+           ifp->flags & IFA_F_NODAD) {
++              bool send_na = false;
++
++              if (ifp->flags & IFA_F_TENTATIVE &&
++                  !(ifp->flags & IFA_F_OPTIMISTIC))
++                      send_na = true;
+               bump_id = ifp->flags & IFA_F_TENTATIVE;
+               ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+               spin_unlock(&ifp->lock);
+               read_unlock_bh(&idev->lock);
+ 
+-              addrconf_dad_completed(ifp, bump_id);
++              addrconf_dad_completed(ifp, bump_id, send_na);
+               return;
+       }
+ 
+@@ -3967,16 +3973,21 @@ static void addrconf_dad_work(struct wor
+       }
+ 
+       if (ifp->dad_probes == 0) {
++              bool send_na = false;
++
+               /*
+                * DAD was successful
+                */
+ 
++              if (ifp->flags & IFA_F_TENTATIVE &&
++                  !(ifp->flags & IFA_F_OPTIMISTIC))
++                      send_na = true;
+               bump_id = ifp->flags & IFA_F_TENTATIVE;
+               ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+               spin_unlock(&ifp->lock);
+               write_unlock_bh(&idev->lock);
+ 
+-              addrconf_dad_completed(ifp, bump_id);
++              addrconf_dad_completed(ifp, bump_id, send_na);
+ 
+               goto out;
+       }
+@@ -4014,7 +4025,8 @@ static bool ipv6_lonely_lladdr(struct in
+       return true;
+ }
+ 
+-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
++static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
++                                 bool send_na)
+ {
+       struct net_device *dev = ifp->idev->dev;
+       struct in6_addr lladdr;
+@@ -4046,6 +4058,16 @@ static void addrconf_dad_completed(struc
+       if (send_mld)
+               ipv6_mc_dad_complete(ifp->idev);
+ 
++      /* send unsolicited NA if enabled */
++      if (send_na &&
++          (ifp->idev->cnf.ndisc_notify ||
++           dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
++              ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
++                            /*router=*/ !!ifp->idev->cnf.forwarding,
++                            /*solicited=*/ false, /*override=*/ true,
++                            /*inc_opt=*/ true);
++      }
++
+       if (send_rs) {
+               /*
+                *      If a host as already performed a random delay
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct n
+ 
+       read_lock_bh(&idev->lock);
+       list_for_each_entry(ifa, &idev->addr_list, if_list) {
++              /* skip tentative addresses until dad completes */
++              if (ifa->flags & IFA_F_TENTATIVE &&
++                  !(ifa->flags & IFA_F_OPTIMISTIC))
++                      continue;
++
+               ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
+                             /*router=*/ !!idev->cnf.forwarding,
+                             /*solicited=*/ false, /*override=*/ true,
diff --git a/queue-4.15/net-sched-fix-use-after-free-in-tcf_block_put_ext.patch b/queue-4.15/net-sched-fix-use-after-free-in-tcf_block_put_ext.patch

new file mode 100644 (file)

index 0000000..26160c5
--- /dev/null
+++ b/queue-4.15/net-sched-fix-use-after-free-in-tcf_block_put_ext.patch
@@ -0,0 +1,84 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Fri, 8 Dec 2017 19:27:27 +0100
+Subject: net: sched: fix use-after-free in tcf_block_put_ext
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit df45bf84e4f5a48f23d4b1a07d21d566e8b587b2 ]
+
+Since the block is freed with last chain being put, once we reach the
+end of iteration of list_for_each_entry_safe, the block may be
+already freed. I'm hitting this only by creating and deleting clsact:
+
+[  202.171952] ==================================================================
+[  202.180182] BUG: KASAN: use-after-free in tcf_block_put_ext+0x240/0x390
+[  202.187590] Read of size 8 at addr ffff880225539a80 by task tc/796
+[  202.194508]
+[  202.196185] CPU: 0 PID: 796 Comm: tc Not tainted 4.15.0-rc2jiri+ #5
+[  202.203200] Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016
+[  202.213613] Call Trace:
+[  202.216369]  dump_stack+0xda/0x169
+[  202.220192]  ? dma_virt_map_sg+0x147/0x147
+[  202.224790]  ? show_regs_print_info+0x54/0x54
+[  202.229691]  ? tcf_chain_destroy+0x1dc/0x250
+[  202.234494]  print_address_description+0x83/0x3d0
+[  202.239781]  ? tcf_block_put_ext+0x240/0x390
+[  202.244575]  kasan_report+0x1ba/0x460
+[  202.248707]  ? tcf_block_put_ext+0x240/0x390
+[  202.253518]  tcf_block_put_ext+0x240/0x390
+[  202.258117]  ? tcf_chain_flush+0x290/0x290
+[  202.262708]  ? qdisc_hash_del+0x82/0x1a0
+[  202.267111]  ? qdisc_hash_add+0x50/0x50
+[  202.271411]  ? __lock_is_held+0x5f/0x1a0
+[  202.275843]  clsact_destroy+0x3d/0x80 [sch_ingress]
+[  202.281323]  qdisc_destroy+0xcb/0x240
+[  202.285445]  qdisc_graft+0x216/0x7b0
+[  202.289497]  tc_get_qdisc+0x260/0x560
+
+Fix this by holding the block also by chain 0 and put chain 0
+explicitly, out of the list_for_each_entry_safe loop at the very
+end of tcf_block_put_ext.
+
+Fixes: efbf78973978 ("net_sched: get rid of rcu_barrier() in tcf_block_put_ext()")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_api.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -343,23 +343,24 @@ void tcf_block_put_ext(struct tcf_block
+ 
+       if (!block)
+               return;
+-      /* Hold a refcnt for all chains, except 0, so that they don't disappear
++      /* Hold a refcnt for all chains, so that they don't disappear
+        * while we are iterating.
+        */
+       list_for_each_entry(chain, &block->chain_list, list)
+-              if (chain->index)
+-                      tcf_chain_hold(chain);
++              tcf_chain_hold(chain);
+ 
+       list_for_each_entry(chain, &block->chain_list, list)
+               tcf_chain_flush(chain);
+ 
+       tcf_block_offload_unbind(block, q, ei);
+ 
+-      /* At this point, all the chains should have refcnt >= 1. Block will be
+-       * freed after all chains are gone.
+-       */
++      /* At this point, all the chains should have refcnt >= 1. */
+       list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+               tcf_chain_put(chain);
++
++      /* Finally, put chain 0 and allow block to be freed. */
++      chain = list_first_entry(&block->chain_list, struct tcf_chain, list);
++      tcf_chain_put(chain);
+ }
+ EXPORT_SYMBOL(tcf_block_put_ext);
+ 
diff --git a/queue-4.15/net_sched-get-rid-of-rcu_barrier-in-tcf_block_put_ext.patch b/queue-4.15/net_sched-get-rid-of-rcu_barrier-in-tcf_block_put_ext.patch

new file mode 100644 (file)

index 0000000..56feead
--- /dev/null
+++ b/queue-4.15/net_sched-get-rid-of-rcu_barrier-in-tcf_block_put_ext.patch
@@ -0,0 +1,126 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Mon, 4 Dec 2017 10:48:18 -0800
+Subject: net_sched: get rid of rcu_barrier() in tcf_block_put_ext()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit efbf78973978b0d25af59bc26c8013a942af6e64 ]
+
+Both Eric and Paolo noticed the rcu_barrier() we use in
+tcf_block_put_ext() could be a performance bottleneck when
+we have a lot of tc classes.
+
+Paolo provided the following to demonstrate the issue:
+
+tc qdisc add dev lo root htb
+for I in `seq 1 1000`; do
+        tc class add dev lo parent 1: classid 1:$I htb rate 100kbit
+        tc qdisc add dev lo parent 1:$I handle $((I + 1)): htb
+        for J in `seq 1 10`; do
+                tc filter add dev lo parent $((I + 1)): u32 match ip src 1.1.1.$J
+        done
+done
+time tc qdisc del dev root
+
+real    0m54.764s
+user    0m0.023s
+sys     0m0.000s
+
+The rcu_barrier() there is to ensure we free the block after all chains
+are gone, that is, to queue tcf_block_put_final() at the tail of workqueue.
+We can achieve this ordering requirement by refcnt'ing tcf block instead,
+that is, the tcf block is freed only when the last chain in this block is
+gone. This also simplifies the code.
+
+Paolo reported after this patch we get:
+
+real    0m0.017s
+user    0m0.000s
+sys     0m0.017s
+
+Tested-by: Paolo Abeni <pabeni@redhat.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jiri Pirko <jiri@mellanox.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |    1 -
+ net/sched/cls_api.c       |   30 +++++++++---------------------
+ 2 files changed, 9 insertions(+), 22 deletions(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -280,7 +280,6 @@ struct tcf_block {
+       struct net *net;
+       struct Qdisc *q;
+       struct list_head cb_list;
+-      struct work_struct work;
+ };
+ 
+ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -217,8 +217,12 @@ static void tcf_chain_flush(struct tcf_c
+ 
+ static void tcf_chain_destroy(struct tcf_chain *chain)
+ {
++      struct tcf_block *block = chain->block;
++
+       list_del(&chain->list);
+       kfree(chain);
++      if (list_empty(&block->chain_list))
++              kfree(block);
+ }
+ 
+ static void tcf_chain_hold(struct tcf_chain *chain)
+@@ -329,27 +333,13 @@ int tcf_block_get(struct tcf_block **p_b
+ }
+ EXPORT_SYMBOL(tcf_block_get);
+ 
+-static void tcf_block_put_final(struct work_struct *work)
+-{
+-      struct tcf_block *block = container_of(work, struct tcf_block, work);
+-      struct tcf_chain *chain, *tmp;
+-
+-      rtnl_lock();
+-
+-      /* At this point, all the chains should have refcnt == 1. */
+-      list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+-              tcf_chain_put(chain);
+-      rtnl_unlock();
+-      kfree(block);
+-}
+-
+ /* XXX: Standalone actions are not allowed to jump to any chain, and bound
+  * actions should be all removed after flushing.
+  */
+ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+                      struct tcf_block_ext_info *ei)
+ {
+-      struct tcf_chain *chain;
++      struct tcf_chain *chain, *tmp;
+ 
+       if (!block)
+               return;
+@@ -365,13 +355,11 @@ void tcf_block_put_ext(struct tcf_block
+ 
+       tcf_block_offload_unbind(block, q, ei);
+ 
+-      INIT_WORK(&block->work, tcf_block_put_final);
+-      /* Wait for existing RCU callbacks to cool down, make sure their works
+-       * have been queued before this. We can not flush pending works here
+-       * because we are holding the RTNL lock.
++      /* At this point, all the chains should have refcnt >= 1. Block will be
++       * freed after all chains are gone.
+        */
+-      rcu_barrier();
+-      tcf_queue_work(&block->work);
++      list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
++              tcf_chain_put(chain);
+ }
+ EXPORT_SYMBOL(tcf_block_put_ext);
+ 
diff --git a/queue-4.15/qlcnic-fix-deadlock-bug.patch b/queue-4.15/qlcnic-fix-deadlock-bug.patch

new file mode 100644 (file)

index 0000000..9ad72c3
--- /dev/null
+++ b/queue-4.15/qlcnic-fix-deadlock-bug.patch
@@ -0,0 +1,191 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Mon, 29 Jan 2018 17:53:42 +0800
+Subject: qlcnic: fix deadlock bug
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+
+[ Upstream commit 233ac3891607f501f08879134d623b303838f478 ]
+
+The following soft lockup was caught. This is a deadlock caused by
+recusive locking.
+
+Process kworker/u40:1:28016 was holding spin lock "mbx->queue_lock" in
+qlcnic_83xx_mailbox_worker(), while a softirq came in and ask the same spin
+lock in qlcnic_83xx_enqueue_mbx_cmd(). This lock should be hold by disable
+bh..
+
+[161846.962125] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/u40:1:28016]
+[161846.962367] Modules linked in: tun ocfs2 xen_netback xen_blkback xen_gntalloc xen_gntdev xen_evtchn xenfs xen_privcmd autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs bnx2fc fcoe libfcoe libfc sunrpc 8021q mrp garp bridge stp llc bonding dm_round_robin dm_multipath iTCO_wdt iTCO_vendor_support pcspkr sb_edac edac_core i2c_i801 shpchp lpc_ich mfd_core ioatdma ipmi_devintf ipmi_si ipmi_msghandler sg ext4 jbd2 mbcache2 sr_mod cdrom sd_mod igb i2c_algo_bit i2c_core ahci libahci megaraid_sas ixgbe dca ptp pps_core vxlan udp_tunnel ip6_udp_tunnel qla2xxx scsi_transport_fc qlcnic crc32c_intel be2iscsi bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi ipv6 cxgb3 mdio libiscsi_tcp qla4xxx iscsi_boot_sysfs libiscsi scsi_transport_iscsi dm_mirror dm_region_hash dm_log dm_mod
+[161846.962454]
+[161846.962460] CPU: 1 PID: 28016 Comm: kworker/u40:1 Not tainted 4.1.12-94.5.9.el6uek.x86_64 #2
+[161846.962463] Hardware name: Oracle Corporation SUN SERVER X4-2L      /ASSY,MB,X4-2L         , BIOS 26050100 09/19/2017
+[161846.962489] Workqueue: qlcnic_mailbox qlcnic_83xx_mailbox_worker [qlcnic]
+[161846.962493] task: ffff8801f2e34600 ti: ffff88004ca5c000 task.ti: ffff88004ca5c000
+[161846.962496] RIP: e030:[<ffffffff810013aa>]  [<ffffffff810013aa>] xen_hypercall_sched_op+0xa/0x20
+[161846.962506] RSP: e02b:ffff880202e43388  EFLAGS: 00000206
+[161846.962509] RAX: 0000000000000000 RBX: ffff8801f6996b70 RCX: ffffffff810013aa
+[161846.962511] RDX: ffff880202e433cc RSI: ffff880202e433b0 RDI: 0000000000000003
+[161846.962513] RBP: ffff880202e433d0 R08: 0000000000000000 R09: ffff8801fe893200
+[161846.962516] R10: ffff8801fe400538 R11: 0000000000000206 R12: ffff880202e4b000
+[161846.962518] R13: 0000000000000050 R14: 0000000000000001 R15: 000000000000020d
+[161846.962528] FS:  0000000000000000(0000) GS:ffff880202e40000(0000) knlGS:ffff880202e40000
+[161846.962531] CS:  e033 DS: 0000 ES: 0000 CR0: 0000000080050033
+[161846.962533] CR2: 0000000002612640 CR3: 00000001bb796000 CR4: 0000000000042660
+[161846.962536] Stack:
+[161846.962538]  ffff880202e43608 0000000000000000 ffffffff813f0442 ffff880202e433b0
+[161846.962543]  0000000000000000 ffff880202e433cc ffffffff00000001 0000000000000000
+[161846.962547]  00000009813f03d6 ffff880202e433e0 ffffffff813f0460 ffff880202e43440
+[161846.962552] Call Trace:
+[161846.962555]  <IRQ>
+[161846.962565]  [<ffffffff813f0442>] ? xen_poll_irq_timeout+0x42/0x50
+[161846.962570]  [<ffffffff813f0460>] xen_poll_irq+0x10/0x20
+[161846.962578]  [<ffffffff81014222>] xen_lock_spinning+0xe2/0x110
+[161846.962583]  [<ffffffff81013f01>] __raw_callee_save_xen_lock_spinning+0x11/0x20
+[161846.962592]  [<ffffffff816e5c57>] ? _raw_spin_lock+0x57/0x80
+[161846.962609]  [<ffffffffa028acfc>] qlcnic_83xx_enqueue_mbx_cmd+0x7c/0xe0 [qlcnic]
+[161846.962623]  [<ffffffffa028e008>] qlcnic_83xx_issue_cmd+0x58/0x210 [qlcnic]
+[161846.962636]  [<ffffffffa028caf2>] qlcnic_83xx_sre_macaddr_change+0x162/0x1d0 [qlcnic]
+[161846.962649]  [<ffffffffa028cb8b>] qlcnic_83xx_change_l2_filter+0x2b/0x30 [qlcnic]
+[161846.962657]  [<ffffffff8160248b>] ? __skb_flow_dissect+0x18b/0x650
+[161846.962670]  [<ffffffffa02856e5>] qlcnic_send_filter+0x205/0x250 [qlcnic]
+[161846.962682]  [<ffffffffa0285c77>] qlcnic_xmit_frame+0x547/0x7b0 [qlcnic]
+[161846.962691]  [<ffffffff8160ac22>] xmit_one+0x82/0x1a0
+[161846.962696]  [<ffffffff8160ad90>] dev_hard_start_xmit+0x50/0xa0
+[161846.962701]  [<ffffffff81630112>] sch_direct_xmit+0x112/0x220
+[161846.962706]  [<ffffffff8160b80f>] __dev_queue_xmit+0x1df/0x5e0
+[161846.962710]  [<ffffffff8160bc33>] dev_queue_xmit_sk+0x13/0x20
+[161846.962721]  [<ffffffffa0575bd5>] bond_dev_queue_xmit+0x35/0x80 [bonding]
+[161846.962729]  [<ffffffffa05769fb>] __bond_start_xmit+0x1cb/0x210 [bonding]
+[161846.962736]  [<ffffffffa0576a71>] bond_start_xmit+0x31/0x60 [bonding]
+[161846.962740]  [<ffffffff8160ac22>] xmit_one+0x82/0x1a0
+[161846.962745]  [<ffffffff8160ad90>] dev_hard_start_xmit+0x50/0xa0
+[161846.962749]  [<ffffffff8160bb1e>] __dev_queue_xmit+0x4ee/0x5e0
+[161846.962754]  [<ffffffff8160bc33>] dev_queue_xmit_sk+0x13/0x20
+[161846.962760]  [<ffffffffa05cfa72>] vlan_dev_hard_start_xmit+0xb2/0x150 [8021q]
+[161846.962764]  [<ffffffff8160ac22>] xmit_one+0x82/0x1a0
+[161846.962769]  [<ffffffff8160ad90>] dev_hard_start_xmit+0x50/0xa0
+[161846.962773]  [<ffffffff8160bb1e>] __dev_queue_xmit+0x4ee/0x5e0
+[161846.962777]  [<ffffffff8160bc33>] dev_queue_xmit_sk+0x13/0x20
+[161846.962789]  [<ffffffffa05adf74>] br_dev_queue_push_xmit+0x54/0xa0 [bridge]
+[161846.962797]  [<ffffffffa05ae4ff>] br_forward_finish+0x2f/0x90 [bridge]
+[161846.962807]  [<ffffffff810b0dad>] ? ttwu_do_wakeup+0x1d/0x100
+[161846.962811]  [<ffffffff815f929b>] ? __alloc_skb+0x8b/0x1f0
+[161846.962818]  [<ffffffffa05ae04d>] __br_forward+0x8d/0x120 [bridge]
+[161846.962822]  [<ffffffff815f613b>] ? __kmalloc_reserve+0x3b/0xa0
+[161846.962829]  [<ffffffff810be55e>] ? update_rq_runnable_avg+0xee/0x230
+[161846.962836]  [<ffffffffa05ae176>] br_forward+0x96/0xb0 [bridge]
+[161846.962845]  [<ffffffffa05af85e>] br_handle_frame_finish+0x1ae/0x420 [bridge]
+[161846.962853]  [<ffffffffa05afc4f>] br_handle_frame+0x17f/0x260 [bridge]
+[161846.962862]  [<ffffffffa05afad0>] ? br_handle_frame_finish+0x420/0x420 [bridge]
+[161846.962867]  [<ffffffff8160d057>] __netif_receive_skb_core+0x1f7/0x870
+[161846.962872]  [<ffffffff8160d6f2>] __netif_receive_skb+0x22/0x70
+[161846.962877]  [<ffffffff8160d913>] netif_receive_skb_internal+0x23/0x90
+[161846.962884]  [<ffffffffa07512ea>] ? xenvif_idx_release+0xea/0x100 [xen_netback]
+[161846.962889]  [<ffffffff816e5a10>] ? _raw_spin_unlock_irqrestore+0x20/0x50
+[161846.962893]  [<ffffffff8160e624>] netif_receive_skb_sk+0x24/0x90
+[161846.962899]  [<ffffffffa075269a>] xenvif_tx_submit+0x2ca/0x3f0 [xen_netback]
+[161846.962906]  [<ffffffffa0753f0c>] xenvif_tx_action+0x9c/0xd0 [xen_netback]
+[161846.962915]  [<ffffffffa07567f5>] xenvif_poll+0x35/0x70 [xen_netback]
+[161846.962920]  [<ffffffff8160e01b>] napi_poll+0xcb/0x1e0
+[161846.962925]  [<ffffffff8160e1c0>] net_rx_action+0x90/0x1c0
+[161846.962931]  [<ffffffff8108aaba>] __do_softirq+0x10a/0x350
+[161846.962938]  [<ffffffff8108ae75>] irq_exit+0x125/0x130
+[161846.962943]  [<ffffffff813f03a9>] xen_evtchn_do_upcall+0x39/0x50
+[161846.962950]  [<ffffffff816e7ffe>] xen_do_hypervisor_callback+0x1e/0x40
+[161846.962952]  <EOI>
+[161846.962959]  [<ffffffff816e5c4a>] ? _raw_spin_lock+0x4a/0x80
+[161846.962964]  [<ffffffff816e5b1e>] ? _raw_spin_lock_irqsave+0x1e/0xa0
+[161846.962978]  [<ffffffffa028e279>] ? qlcnic_83xx_mailbox_worker+0xb9/0x2a0 [qlcnic]
+[161846.962991]  [<ffffffff810a14e1>] ? process_one_work+0x151/0x4b0
+[161846.962995]  [<ffffffff8100c3f2>] ? check_events+0x12/0x20
+[161846.963001]  [<ffffffff810a1960>] ? worker_thread+0x120/0x480
+[161846.963005]  [<ffffffff816e187b>] ? __schedule+0x30b/0x890
+[161846.963010]  [<ffffffff810a1840>] ? process_one_work+0x4b0/0x4b0
+[161846.963015]  [<ffffffff810a1840>] ? process_one_work+0x4b0/0x4b0
+[161846.963021]  [<ffffffff810a6b3e>] ? kthread+0xce/0xf0
+[161846.963025]  [<ffffffff810a6a70>] ? kthread_freezable_should_stop+0x70/0x70
+[161846.963031]  [<ffffffff816e6522>] ? ret_from_fork+0x42/0x70
+[161846.963035]  [<ffffffff810a6a70>] ? kthread_freezable_should_stop+0x70/0x70
+[161846.963037] Code: cc 51 41 53 b8 1c 00 00 00 0f 05 41 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+@@ -3891,7 +3891,7 @@ static void qlcnic_83xx_flush_mbx_queue(
+       struct list_head *head = &mbx->cmd_q;
+       struct qlcnic_cmd_args *cmd = NULL;
+ 
+-      spin_lock(&mbx->queue_lock);
++      spin_lock_bh(&mbx->queue_lock);
+ 
+       while (!list_empty(head)) {
+               cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
+@@ -3902,7 +3902,7 @@ static void qlcnic_83xx_flush_mbx_queue(
+               qlcnic_83xx_notify_cmd_completion(adapter, cmd);
+       }
+ 
+-      spin_unlock(&mbx->queue_lock);
++      spin_unlock_bh(&mbx->queue_lock);
+ }
+ 
+ static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter)
+@@ -3938,12 +3938,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(
+ {
+       struct qlcnic_mailbox *mbx = adapter->ahw->mailbox;
+ 
+-      spin_lock(&mbx->queue_lock);
++      spin_lock_bh(&mbx->queue_lock);
+ 
+       list_del(&cmd->list);
+       mbx->num_cmds--;
+ 
+-      spin_unlock(&mbx->queue_lock);
++      spin_unlock_bh(&mbx->queue_lock);
+ 
+       qlcnic_83xx_notify_cmd_completion(adapter, cmd);
+ }
+@@ -4008,7 +4008,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(s
+               init_completion(&cmd->completion);
+               cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN;
+ 
+-              spin_lock(&mbx->queue_lock);
++              spin_lock_bh(&mbx->queue_lock);
+ 
+               list_add_tail(&cmd->list, &mbx->cmd_q);
+               mbx->num_cmds++;
+@@ -4016,7 +4016,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(s
+               *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT;
+               queue_work(mbx->work_q, &mbx->work);
+ 
+-              spin_unlock(&mbx->queue_lock);
++              spin_unlock_bh(&mbx->queue_lock);
+ 
+               return 0;
+       }
+@@ -4112,15 +4112,15 @@ static void qlcnic_83xx_mailbox_worker(s
+               mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
+               spin_unlock_irqrestore(&mbx->aen_lock, flags);
+ 
+-              spin_lock(&mbx->queue_lock);
++              spin_lock_bh(&mbx->queue_lock);
+ 
+               if (list_empty(head)) {
+-                      spin_unlock(&mbx->queue_lock);
++                      spin_unlock_bh(&mbx->queue_lock);
+                       return;
+               }
+               cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
+ 
+-              spin_unlock(&mbx->queue_lock);
++              spin_unlock_bh(&mbx->queue_lock);
+ 
+               mbx_ops->encode_cmd(adapter, cmd);
+               mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST);
diff --git a/queue-4.15/qmi_wwan-add-support-for-quectel-ep06.patch b/queue-4.15/qmi_wwan-add-support-for-quectel-ep06.patch

new file mode 100644 (file)

index 0000000..1882c2b
--- /dev/null
+++ b/queue-4.15/qmi_wwan-add-support-for-quectel-ep06.patch
@@ -0,0 +1,31 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Kristian Evensen <kristian.evensen@gmail.com>
+Date: Tue, 30 Jan 2018 14:12:55 +0100
+Subject: qmi_wwan: Add support for Quectel EP06
+
+From: Kristian Evensen <kristian.evensen@gmail.com>
+
+
+[ Upstream commit c0b91a56a2e57a5a370655b25d677ae0ebf8a2d0 ]
+
+The Quectel EP06 is a Cat. 6 LTE modem. It uses the same interface as
+the EC20/EC25 for QMI, and requires the same "set DTR"-quirk to work.
+
+Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1245,6 +1245,7 @@ static const struct usb_device_id produc
+       {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0  Mini PCIe */
+       {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
+       {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},    /* Quectel BG96 */
++      {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */
+ 
+       /* 4. Gobi 1000 devices */
+       {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
diff --git a/queue-4.15/r8169-fix-rtl8168ep-take-too-long-to-complete-driver-initialization.patch b/queue-4.15/r8169-fix-rtl8168ep-take-too-long-to-complete-driver-initialization.patch

new file mode 100644 (file)

index 0000000..8483119
--- /dev/null
+++ b/queue-4.15/r8169-fix-rtl8168ep-take-too-long-to-complete-driver-initialization.patch
@@ -0,0 +1,42 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Chunhao Lin <hau@realtek.com>
+Date: Wed, 31 Jan 2018 01:32:36 +0800
+Subject: r8169: fix RTL8168EP take too long to complete driver initialization.
+
+From: Chunhao Lin <hau@realtek.com>
+
+
+[ Upstream commit 086ca23d03c0d2f4088f472386778d293e15c5f6 ]
+
+Driver check the wrong register bit in rtl_ocp_tx_cond() that keep driver
+waiting until timeout.
+
+Fix this by waiting for the right register bit.
+
+Signed-off-by: Chunhao Lin <hau@realtek.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -1395,7 +1395,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond)
+ {
+       void __iomem *ioaddr = tp->mmio_addr;
+ 
+-      return RTL_R8(IBISR0) & 0x02;
++      return RTL_R8(IBISR0) & 0x20;
+ }
+ 
+ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
+@@ -1403,7 +1403,7 @@ static void rtl8168ep_stop_cmac(struct r
+       void __iomem *ioaddr = tp->mmio_addr;
+ 
+       RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
+-      rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000);
++      rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
+       RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
+       RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
+ }
diff --git a/queue-4.15/revert-defer-call-to-mem_cgroup_sk_alloc.patch b/queue-4.15/revert-defer-call-to-mem_cgroup_sk_alloc.patch

new file mode 100644 (file)

index 0000000..f7ee20e
--- /dev/null
+++ b/queue-4.15/revert-defer-call-to-mem_cgroup_sk_alloc.patch
@@ -0,0 +1,97 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Roman Gushchin <guro@fb.com>
+Date: Fri, 2 Feb 2018 15:26:57 +0000
+Subject: Revert "defer call to mem_cgroup_sk_alloc()"
+
+From: Roman Gushchin <guro@fb.com>
+
+
+[ Upstream commit edbe69ef2c90fc86998a74b08319a01c508bd497 ]
+
+This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
+defer call to mem_cgroup_sk_alloc()").
+
+Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
+memcg socket memory accounting, as packets received before memcg
+pointer initialization are not accounted and are causing refcounting
+underflow on socket release.
+
+Actually the free-after-use problem was fixed by
+commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
+sk_clone_lock()") for the cgroup pointer.
+
+So, let's revert it and call mem_cgroup_sk_alloc() just before
+cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
+we're cloning, and it holds a reference to the memcg.
+
+Also, let's drop BUG_ON(mem_cgroup_is_root()) check from
+mem_cgroup_sk_alloc(). I see no reasons why bumping the root
+memcg counter is a good reason to panic, and there are no realistic
+ways to hit it.
+
+Signed-off-by: Roman Gushchin <guro@fb.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c                 |   14 ++++++++++++++
+ net/core/sock.c                 |    5 +----
+ net/ipv4/inet_connection_sock.c |    1 -
+ 3 files changed, 15 insertions(+), 5 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -5828,6 +5828,20 @@ void mem_cgroup_sk_alloc(struct sock *sk
+       if (!mem_cgroup_sockets_enabled)
+               return;
+ 
++      /*
++       * Socket cloning can throw us here with sk_memcg already
++       * filled. It won't however, necessarily happen from
++       * process context. So the test for root memcg given
++       * the current task's memcg won't help us in this case.
++       *
++       * Respecting the original socket's memcg is a better
++       * decision in this case.
++       */
++      if (sk->sk_memcg) {
++              css_get(&sk->sk_memcg->css);
++              return;
++      }
++
+       rcu_read_lock();
+       memcg = mem_cgroup_from_task(current);
+       if (memcg == root_mem_cgroup)
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1675,16 +1675,13 @@ struct sock *sk_clone_lock(const struct
+               newsk->sk_dst_pending_confirm = 0;
+               newsk->sk_wmem_queued   = 0;
+               newsk->sk_forward_alloc = 0;
+-
+-              /* sk->sk_memcg will be populated at accept() time */
+-              newsk->sk_memcg = NULL;
+-
+               atomic_set(&newsk->sk_drops, 0);
+               newsk->sk_send_head     = NULL;
+               newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+               atomic_set(&newsk->sk_zckey, 0);
+ 
+               sock_reset_flag(newsk, SOCK_DONE);
++              mem_cgroup_sk_alloc(newsk);
+               cgroup_sk_alloc(&newsk->sk_cgrp_data);
+ 
+               rcu_read_lock();
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock
+               }
+               spin_unlock_bh(&queue->fastopenq.lock);
+       }
+-      mem_cgroup_sk_alloc(newsk);
+ out:
+       release_sock(sk);
+       if (req)
diff --git a/queue-4.15/rocker-fix-possible-null-pointer-dereference-in-rocker_router_fib_event_work.patch b/queue-4.15/rocker-fix-possible-null-pointer-dereference-in-rocker_router_fib_event_work.patch

new file mode 100644 (file)

index 0000000..4a471bc
--- /dev/null
+++ b/queue-4.15/rocker-fix-possible-null-pointer-dereference-in-rocker_router_fib_event_work.patch
@@ -0,0 +1,79 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Thu, 1 Feb 2018 12:21:15 +0100
+Subject: rocker: fix possible null pointer dereference in rocker_router_fib_event_work
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit a83165f00f16c0e0ef5b7cec3cbd0d4788699265 ]
+
+Currently, rocker user may experience following null pointer
+derefence bug:
+
+[    3.062141] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
+[    3.065163] IP: rocker_router_fib_event_work+0x36/0x110 [rocker]
+
+The problem is uninitialized rocker->wops pointer that is initialized
+only with the first initialized port. So move the port initialization
+before registering the fib events.
+
+Fixes: 936bd486564a ("rocker: use FIB notifications instead of switchdev calls")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/rocker/rocker_main.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/rocker/rocker_main.c
++++ b/drivers/net/ethernet/rocker/rocker_main.c
+@@ -2902,6 +2902,12 @@ static int rocker_probe(struct pci_dev *
+               goto err_alloc_ordered_workqueue;
+       }
+ 
++      err = rocker_probe_ports(rocker);
++      if (err) {
++              dev_err(&pdev->dev, "failed to probe ports\n");
++              goto err_probe_ports;
++      }
++
+       /* Only FIBs pointing to our own netdevs are programmed into
+        * the device, so no need to pass a callback.
+        */
+@@ -2918,22 +2924,16 @@ static int rocker_probe(struct pci_dev *
+ 
+       rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
+ 
+-      err = rocker_probe_ports(rocker);
+-      if (err) {
+-              dev_err(&pdev->dev, "failed to probe ports\n");
+-              goto err_probe_ports;
+-      }
+-
+       dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
+                (int)sizeof(rocker->hw.id), &rocker->hw.id);
+ 
+       return 0;
+ 
+-err_probe_ports:
+-      unregister_switchdev_notifier(&rocker_switchdev_notifier);
+ err_register_switchdev_notifier:
+       unregister_fib_notifier(&rocker->fib_nb);
+ err_register_fib_notifier:
++      rocker_remove_ports(rocker);
++err_probe_ports:
+       destroy_workqueue(rocker->rocker_owq);
+ err_alloc_ordered_workqueue:
+       free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
+@@ -2961,9 +2961,9 @@ static void rocker_remove(struct pci_dev
+ {
+       struct rocker *rocker = pci_get_drvdata(pdev);
+ 
+-      rocker_remove_ports(rocker);
+       unregister_switchdev_notifier(&rocker_switchdev_notifier);
+       unregister_fib_notifier(&rocker->fib_nb);
++      rocker_remove_ports(rocker);
+       rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
+       destroy_workqueue(rocker->rocker_owq);
+       free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
diff --git a/queue-4.15/series b/queue-4.15/series

new file mode 100644 (file)

index 0000000..bb059e1
--- /dev/null
+++ b/queue-4.15/series
@@ -0,0 +1,18 @@
+ip6mr-fix-stale-iterator.patch
+net-igmp-add-a-missing-rcu-locking-section.patch
+qlcnic-fix-deadlock-bug.patch
+qmi_wwan-add-support-for-quectel-ep06.patch
+r8169-fix-rtl8168ep-take-too-long-to-complete-driver-initialization.patch
+tcp-release-sk_frag.page-in-tcp_disconnect.patch
+vhost_net-stop-device-during-reset-owner.patch
+ipv6-addrconf-break-critical-section-in-addrconf_verify_rtnl.patch
+ipv6-change-route-cache-aging-logic.patch
+revert-defer-call-to-mem_cgroup_sk_alloc.patch
+net-ipv6-send-unsolicited-na-after-dad.patch
+rocker-fix-possible-null-pointer-dereference-in-rocker_router_fib_event_work.patch
+tcp_bbr-fix-pacing_gain-to-always-be-unity-when-using-lt_bw.patch
+cls_u32-add-missing-rcu-annotation.patch
+ipv6-fix-so_reuseport-udp-socket-with-implicit-sk_ipv6only.patch
+soreuseport-fix-mem-leak-in-reuseport_add_sock.patch
+net_sched-get-rid-of-rcu_barrier-in-tcf_block_put_ext.patch
+net-sched-fix-use-after-free-in-tcf_block_put_ext.patch
diff --git a/queue-4.15/soreuseport-fix-mem-leak-in-reuseport_add_sock.patch b/queue-4.15/soreuseport-fix-mem-leak-in-reuseport_add_sock.patch

new file mode 100644 (file)

index 0000000..27545a8
--- /dev/null
+++ b/queue-4.15/soreuseport-fix-mem-leak-in-reuseport_add_sock.patch
@@ -0,0 +1,120 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 2 Feb 2018 10:27:27 -0800
+Subject: soreuseport: fix mem leak in reuseport_add_sock()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 4db428a7c9ab07e08783e0fcdc4ca0f555da0567 ]
+
+reuseport_add_sock() needs to deal with attaching a socket having
+its own sk_reuseport_cb, after a prior
+setsockopt(SO_ATTACH_REUSEPORT_?BPF)
+
+Without this fix, not only a WARN_ONCE() was issued, but we were also
+leaking memory.
+
+Thanks to sysbot and Eric Biggers for providing us nice C repros.
+
+------------[ cut here ]------------
+socket already in reuseport group
+WARNING: CPU: 0 PID: 3496 at net/core/sock_reuseport.c:119  
+reuseport_add_sock+0x742/0x9b0 net/core/sock_reuseport.c:117
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 0 PID: 3496 Comm: syzkaller869503 Not tainted 4.15.0-rc6+ #245
+Hardware name: Google Google Compute Engine/Google Compute Engine,
+BIOS  
+Google 01/01/2011
+Call Trace:
+  __dump_stack lib/dump_stack.c:17 [inline]
+  dump_stack+0x194/0x257 lib/dump_stack.c:53
+  panic+0x1e4/0x41c kernel/panic.c:183
+  __warn+0x1dc/0x200 kernel/panic.c:547
+  report_bug+0x211/0x2d0 lib/bug.c:184
+  fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
+  fixup_bug arch/x86/kernel/traps.c:247 [inline]
+  do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
+  do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
+  invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079
+
+Fixes: ef456144da8e ("soreuseport: define reuseport groups")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot+c0ea2226f77a42936bf7@syzkaller.appspotmail.com
+Acked-by: Craig Gallek <kraig@google.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_reuseport.c |   35 ++++++++++++++++++++---------------
+ 1 file changed, 20 insertions(+), 15 deletions(-)
+
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -94,6 +94,16 @@ static struct sock_reuseport *reuseport_
+       return more_reuse;
+ }
+ 
++static void reuseport_free_rcu(struct rcu_head *head)
++{
++      struct sock_reuseport *reuse;
++
++      reuse = container_of(head, struct sock_reuseport, rcu);
++      if (reuse->prog)
++              bpf_prog_destroy(reuse->prog);
++      kfree(reuse);
++}
++
+ /**
+  *  reuseport_add_sock - Add a socket to the reuseport group of another.
+  *  @sk:  New socket to add to the group.
+@@ -102,7 +112,7 @@ static struct sock_reuseport *reuseport_
+  */
+ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
+ {
+-      struct sock_reuseport *reuse;
++      struct sock_reuseport *old_reuse, *reuse;
+ 
+       if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
+               int err = reuseport_alloc(sk2);
+@@ -113,10 +123,13 @@ int reuseport_add_sock(struct sock *sk,
+ 
+       spin_lock_bh(&reuseport_lock);
+       reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
+-                                        lockdep_is_held(&reuseport_lock)),
+-      WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
+-                                          lockdep_is_held(&reuseport_lock)),
+-                "socket already in reuseport group");
++                                        lockdep_is_held(&reuseport_lock));
++      old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
++                                           lockdep_is_held(&reuseport_lock));
++      if (old_reuse && old_reuse->num_socks != 1) {
++              spin_unlock_bh(&reuseport_lock);
++              return -EBUSY;
++      }
+ 
+       if (reuse->num_socks == reuse->max_socks) {
+               reuse = reuseport_grow(reuse);
+@@ -134,19 +147,11 @@ int reuseport_add_sock(struct sock *sk,
+ 
+       spin_unlock_bh(&reuseport_lock);
+ 
++      if (old_reuse)
++              call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+       return 0;
+ }
+ 
+-static void reuseport_free_rcu(struct rcu_head *head)
+-{
+-      struct sock_reuseport *reuse;
+-
+-      reuse = container_of(head, struct sock_reuseport, rcu);
+-      if (reuse->prog)
+-              bpf_prog_destroy(reuse->prog);
+-      kfree(reuse);
+-}
+-
+ void reuseport_detach_sock(struct sock *sk)
+ {
+       struct sock_reuseport *reuse;
diff --git a/queue-4.15/tcp-release-sk_frag.page-in-tcp_disconnect.patch b/queue-4.15/tcp-release-sk_frag.page-in-tcp_disconnect.patch

new file mode 100644 (file)

index 0000000..b240c60
--- /dev/null
+++ b/queue-4.15/tcp-release-sk_frag.page-in-tcp_disconnect.patch
@@ -0,0 +1,38 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Li RongQing <lirongqing@baidu.com>
+Date: Fri, 26 Jan 2018 16:40:41 +0800
+Subject: tcp: release sk_frag.page in tcp_disconnect
+
+From: Li RongQing <lirongqing@baidu.com>
+
+
+[ Upstream commit 9b42d55a66d388e4dd5550107df051a9637564fc ]
+
+socket can be disconnected and gets transformed back to a listening
+socket, if sk_frag.page is not released, which will be cloned into
+a new socket by sk_clone_lock, but the reference count of this page
+is increased, lead to a use after free or double free issue
+
+Signed-off-by: Li RongQing <lirongqing@baidu.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -2434,6 +2434,12 @@ int tcp_disconnect(struct sock *sk, int
+ 
+       WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
+ 
++      if (sk->sk_frag.page) {
++              put_page(sk->sk_frag.page);
++              sk->sk_frag.page = NULL;
++              sk->sk_frag.offset = 0;
++      }
++
+       sk->sk_error_report(sk);
+       return err;
+ }
diff --git a/queue-4.15/tcp_bbr-fix-pacing_gain-to-always-be-unity-when-using-lt_bw.patch b/queue-4.15/tcp_bbr-fix-pacing_gain-to-always-be-unity-when-using-lt_bw.patch

new file mode 100644 (file)

index 0000000..93e47f9
--- /dev/null
+++ b/queue-4.15/tcp_bbr-fix-pacing_gain-to-always-be-unity-when-using-lt_bw.patch
@@ -0,0 +1,56 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 31 Jan 2018 15:43:05 -0500
+Subject: tcp_bbr: fix pacing_gain to always be unity when using lt_bw
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 3aff3b4b986e51bcf4ab249e5d48d39596e0df6a ]
+
+This commit fixes the pacing_gain to remain at BBR_UNIT (1.0) when
+using lt_bw and returning from the PROBE_RTT state to PROBE_BW.
+
+Previously, when using lt_bw, upon exiting PROBE_RTT and entering
+PROBE_BW the bbr_reset_probe_bw_mode() code could sometimes randomly
+end up with a cycle_idx of 0 and hence have bbr_advance_cycle_phase()
+set a pacing gain above 1.0. In such cases this would result in a
+pacing rate that is 1.25x higher than intended, potentially resulting
+in a high loss rate for a little while until we stop using the lt_bw a
+bit later.
+
+This commit is a stable candidate for kernels back as far as 4.9.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Reported-by: Beyers Cronje <bcronje@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -481,7 +481,8 @@ static void bbr_advance_cycle_phase(stru
+ 
+       bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
+       bbr->cycle_mstamp = tp->delivered_mstamp;
+-      bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
++      bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
++                                          bbr_pacing_gain[bbr->cycle_idx];
+ }
+ 
+ /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
+@@ -490,8 +491,7 @@ static void bbr_update_cycle_phase(struc
+ {
+       struct bbr *bbr = inet_csk_ca(sk);
+ 
+-      if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
+-          bbr_is_next_cycle_phase(sk, rs))
++      if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
+               bbr_advance_cycle_phase(sk);
+ }
+ 
diff --git a/queue-4.15/vhost_net-stop-device-during-reset-owner.patch b/queue-4.15/vhost_net-stop-device-during-reset-owner.patch

new file mode 100644 (file)

index 0000000..9b76b85
--- /dev/null
+++ b/queue-4.15/vhost_net-stop-device-during-reset-owner.patch
@@ -0,0 +1,34 @@
+From foo@baz Wed Feb  7 11:29:33 PST 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Thu, 25 Jan 2018 22:03:52 +0800
+Subject: vhost_net: stop device during reset owner
+
+From: Jason Wang <jasowang@redhat.com>
+
+
+[ Upstream commit 4cd879515d686849eec5f718aeac62a70b067d82 ]
+
+We don't stop device before reset owner, this means we could try to
+serve any virtqueue kick before reset dev->worker. This will result a
+warn since the work was pending at llist during owner resetting. Fix
+this by stopping device during owner reset.
+
+Reported-by: syzbot+eb17c6162478cc50632c@syzkaller.appspotmail.com
+Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -1208,6 +1208,7 @@ static long vhost_net_reset_owner(struct
+       }
+       vhost_net_stop(n, &tx_sock, &rx_sock);
+       vhost_net_flush(n);
++      vhost_dev_stop(&n->dev);
+       vhost_dev_reset_owner(&n->dev, umem);
+       vhost_net_vq_reset(n);
+ done:
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 7 Feb 2018 19:30:05 +0000 (11:30 -0800)
queue-4.15/cls_u32-add-missing-rcu-annotation.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/ip6mr-fix-stale-iterator.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/ipv6-addrconf-break-critical-section-in-addrconf_verify_rtnl.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/ipv6-change-route-cache-aging-logic.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/ipv6-fix-so_reuseport-udp-socket-with-implicit-sk_ipv6only.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/net-igmp-add-a-missing-rcu-locking-section.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/net-ipv6-send-unsolicited-na-after-dad.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/net-sched-fix-use-after-free-in-tcf_block_put_ext.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/net_sched-get-rid-of-rcu_barrier-in-tcf_block_put_ext.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/qlcnic-fix-deadlock-bug.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/qmi_wwan-add-support-for-quectel-ep06.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/r8169-fix-rtl8168ep-take-too-long-to-complete-driver-initialization.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/revert-defer-call-to-mem_cgroup_sk_alloc.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/rocker-fix-possible-null-pointer-dereference-in-rocker_router_fib_event_work.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/series	[new file with mode: 0644]	patch \| blob
queue-4.15/soreuseport-fix-mem-leak-in-reuseport_add_sock.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/tcp-release-sk_frag.page-in-tcp_disconnect.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/tcp_bbr-fix-pacing_gain-to-always-be-unity-when-using-lt_bw.patch	[new file with mode: 0644]	patch \| blob
queue-4.15/vhost_net-stop-device-during-reset-owner.patch	[new file with mode: 0644]	patch \| blob