4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)
diff --git a/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch b/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch

new file mode 100644 (file)

index 0000000..420e69f
--- /dev/null
+++ b/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch
@@ -0,0 +1,268 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 Aug 2017 10:16:45 -0700
+Subject: af_key: do not use GFP_KERNEL in atomic contexts
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b ]
+
+pfkey_broadcast() might be called from non process contexts,
+we can not use GFP_KERNEL in these cases [1].
+
+This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in
+af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock()
+section.
+
+[1] : syzkaller reported :
+
+in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439
+3 locks held by syzkaller183439/2932:
+ #0:  (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [<ffffffff83b43888>] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649
+ #1:  (&pfk->dump_lock){+.+.+.}, at: [<ffffffff83b467f6>] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293
+ #2:  (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [<ffffffff83957632>] spin_lock_bh include/linux/spinlock.h:304 [inline]
+ #2:  (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [<ffffffff83957632>] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028
+CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994
+ __might_sleep+0x95/0x190 kernel/sched/core.c:5947
+ slab_pre_alloc_hook mm/slab.h:416 [inline]
+ slab_alloc mm/slab.c:3383 [inline]
+ kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559
+ skb_clone+0x1a0/0x400 net/core/skbuff.c:1037
+ pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207
+ pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281
+ dump_sp+0x3d6/0x500 net/key/af_key.c:2685
+ xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042
+ pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695
+ pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299
+ pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722
+ pfkey_process+0x606/0x710 net/key/af_key.c:2814
+ pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650
+sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ ___sys_sendmsg+0x755/0x890 net/socket.c:2035
+ __sys_sendmsg+0xe5/0x210 net/socket.c:2069
+ SYSC_sendmsg net/socket.c:2080 [inline]
+ SyS_sendmsg+0x2d/0x50 net/socket.c:2076
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x445d79
+RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79
+RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008
+RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700
+R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000
+R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000
+
+Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: David Ahern <dsa@cumulusnetworks.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/key/af_key.c |   48 ++++++++++++++++++++++++++----------------------
+ 1 file changed, 26 insertions(+), 22 deletions(-)
+
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk
+ #define BROADCAST_ONE         1
+ #define BROADCAST_REGISTERED  2
+ #define BROADCAST_PROMISC_ONLY        4
+-static int pfkey_broadcast(struct sk_buff *skb,
++static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
+                          int broadcast_flags, struct sock *one_sk,
+                          struct net *net)
+ {
+@@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buf
+       rcu_read_unlock();
+ 
+       if (one_sk != NULL)
+-              err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
++              err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+ 
+       kfree_skb(skb2);
+       kfree_skb(skb);
+@@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_so
+               hdr = (struct sadb_msg *) pfk->dump.skb->data;
+               hdr->sadb_msg_seq = 0;
+               hdr->sadb_msg_errno = rc;
+-              pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
++              pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+                               &pfk->sk, sock_net(&pfk->sk));
+               pfk->dump.skb = NULL;
+       }
+@@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb
+       hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
+                            sizeof(uint64_t));
+ 
+-      pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
++      pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
+ 
+       return 0;
+ }
+@@ -1396,7 +1396,7 @@ static int pfkey_getspi(struct sock *sk,
+ 
+       xfrm_state_put(x);
+ 
+-      pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
++      pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
+ 
+       return 0;
+ }
+@@ -1483,7 +1483,7 @@ static int key_notify_sa(struct xfrm_sta
+       hdr->sadb_msg_seq = c->seq;
+       hdr->sadb_msg_pid = c->portid;
+ 
+-      pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
++      pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
+ 
+       return 0;
+ }
+@@ -1596,7 +1596,7 @@ static int pfkey_get(struct sock *sk, st
+       out_hdr->sadb_msg_reserved = 0;
+       out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+       out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+-      pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
++      pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
+ 
+       return 0;
+ }
+@@ -1701,8 +1701,8 @@ static int pfkey_register(struct sock *s
+               return -ENOBUFS;
+       }
+ 
+-      pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
+-
++      pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk,
++                      sock_net(sk));
+       return 0;
+ }
+ 
+@@ -1720,7 +1720,8 @@ static int unicast_flush_resp(struct soc
+       hdr->sadb_msg_errno = (uint8_t) 0;
+       hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+ 
+-      return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
++      return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk,
++                             sock_net(sk));
+ }
+ 
+ static int key_notify_sa_flush(const struct km_event *c)
+@@ -1741,7 +1742,7 @@ static int key_notify_sa_flush(const str
+       hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+       hdr->sadb_msg_reserved = 0;
+ 
+-      pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
++      pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+ 
+       return 0;
+ }
+@@ -1798,7 +1799,7 @@ static int dump_sa(struct xfrm_state *x,
+       out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
+ 
+       if (pfk->dump.skb)
+-              pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
++              pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+                               &pfk->sk, sock_net(&pfk->sk));
+       pfk->dump.skb = out_skb;
+ 
+@@ -1886,7 +1887,7 @@ static int pfkey_promisc(struct sock *sk
+               new_hdr->sadb_msg_errno = 0;
+       }
+ 
+-      pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
++      pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
+       return 0;
+ }
+ 
+@@ -2219,7 +2220,7 @@ static int key_notify_policy(struct xfrm
+       out_hdr->sadb_msg_errno = 0;
+       out_hdr->sadb_msg_seq = c->seq;
+       out_hdr->sadb_msg_pid = c->portid;
+-      pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
++      pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
+       return 0;
+ 
+ }
+@@ -2439,7 +2440,7 @@ static int key_pol_get_resp(struct sock
+       out_hdr->sadb_msg_errno = 0;
+       out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
+       out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
+-      pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
++      pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
+       err = 0;
+ 
+ out:
+@@ -2695,7 +2696,7 @@ static int dump_sp(struct xfrm_policy *x
+       out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
+ 
+       if (pfk->dump.skb)
+-              pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
++              pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
+                               &pfk->sk, sock_net(&pfk->sk));
+       pfk->dump.skb = out_skb;
+ 
+@@ -2752,7 +2753,7 @@ static int key_notify_policy_flush(const
+       hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+       hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
+       hdr->sadb_msg_reserved = 0;
+-      pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
++      pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
+       return 0;
+ 
+ }
+@@ -2814,7 +2815,7 @@ static int pfkey_process(struct sock *sk
+       void *ext_hdrs[SADB_EXT_MAX];
+       int err;
+ 
+-      pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
++      pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
+                       BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
+ 
+       memset(ext_hdrs, 0, sizeof(ext_hdrs));
+@@ -3036,7 +3037,8 @@ static int key_notify_sa_expire(struct x
+       out_hdr->sadb_msg_seq = 0;
+       out_hdr->sadb_msg_pid = 0;
+ 
+-      pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
++      pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
++                      xs_net(x));
+       return 0;
+ }
+ 
+@@ -3226,7 +3228,8 @@ static int pfkey_send_acquire(struct xfr
+                      xfrm_ctx->ctx_len);
+       }
+ 
+-      return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
++      return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
++                             xs_net(x));
+ }
+ 
+ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
+@@ -3424,7 +3427,8 @@ static int pfkey_send_new_mapping(struct
+       n_port->sadb_x_nat_t_port_port = sport;
+       n_port->sadb_x_nat_t_port_reserved = 0;
+ 
+-      return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
++      return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
++                             xs_net(x));
+ }
+ 
+ #ifdef CONFIG_NET_KEY_MIGRATE
+@@ -3616,7 +3620,7 @@ static int pfkey_send_migrate(const stru
+       }
+ 
+       /* broadcast migrate message to sockets */
+-      pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
++      pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
+ 
+       return 0;
+ 
diff --git a/queue-4.9/bpf-adjust-verifier-heuristics.patch b/queue-4.9/bpf-adjust-verifier-heuristics.patch

new file mode 100644 (file)

index 0000000..086b063
--- /dev/null
+++ b/queue-4.9/bpf-adjust-verifier-heuristics.patch
@@ -0,0 +1,101 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 18 May 2017 03:00:06 +0200
+Subject: bpf: adjust verifier heuristics
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 3c2ce60bdd3d57051bf85615deec04a694473840 ]
+
+Current limits with regards to processing program paths do not
+really reflect today's needs anymore due to programs becoming
+more complex and verifier smarter, keeping track of more data
+such as const ALU operations, alignment tracking, spilling of
+PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for
+smarter matching of what LLVM generates.
+
+This also comes with the side-effect that we result in fewer
+opportunities to prune search states and thus often need to do
+more work to prove safety than in the past due to different
+register states and stack layout where we mismatch. Generally,
+it's quite hard to determine what caused a sudden increase in
+complexity, it could be caused by something as trivial as a
+single branch somewhere at the beginning of the program where
+LLVM assigned a stack slot that is marked differently throughout
+other branches and thus causing a mismatch, where verifier
+then needs to prove safety for the whole rest of the program.
+Subsequently, programs with even less than half the insn size
+limit can get rejected. We noticed that while some programs
+load fine under pre 4.11, they get rejected due to hitting
+limits on more recent kernels. We saw that in the vast majority
+of cases (90+%) pruning failed due to register mismatches. In
+case of stack mismatches, majority of cases failed due to
+different stack slot types (invalid, spill, misc) rather than
+differences in spilled registers.
+
+This patch makes pruning more aggressive by also adding markers
+that sit at conditional jumps as well. Currently, we only mark
+jump targets for pruning. For example in direct packet access,
+these are usually error paths where we bail out. We found that
+adding these markers, it can reduce number of processed insns
+by up to 30%. Another option is to ignore reg->id in probing
+PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning
+slightly as well by up to 7% observed complexity reduction as
+stand-alone. Meaning, if a previous path with register type
+PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then
+in the current state a PTR_TO_MAP_VALUE_OR_NULL register for
+the same map X must be safe as well. Last but not least the
+patch also adds a scheduling point and bumps the current limit
+for instructions to be processed to a more adequate value.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -139,7 +139,7 @@ struct bpf_verifier_stack_elem {
+       struct bpf_verifier_stack_elem *next;
+ };
+ 
+-#define BPF_COMPLEXITY_LIMIT_INSNS    65536
++#define BPF_COMPLEXITY_LIMIT_INSNS    98304
+ #define BPF_COMPLEXITY_LIMIT_STACK    1024
+ 
+ struct bpf_call_arg_meta {
+@@ -2452,6 +2452,7 @@ peek_stack:
+                               env->explored_states[t + 1] = STATE_LIST_MARK;
+               } else {
+                       /* conditional jump with two edges */
++                      env->explored_states[t] = STATE_LIST_MARK;
+                       ret = push_insn(t, t + 1, FALLTHROUGH, env);
+                       if (ret == 1)
+                               goto peek_stack;
+@@ -2610,6 +2611,12 @@ static bool states_equal(struct bpf_veri
+                    rcur->type != NOT_INIT))
+                       continue;
+ 
++              /* Don't care about the reg->id in this case. */
++              if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
++                  rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
++                  rold->map_ptr == rcur->map_ptr)
++                      continue;
++
+               if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
+                   compare_ptrs_to_packet(rold, rcur))
+                       continue;
+@@ -2744,6 +2751,9 @@ static int do_check(struct bpf_verifier_
+                       goto process_bpf_exit;
+               }
+ 
++              if (need_resched())
++                      cond_resched();
++
+               if (log_level && do_print_state) {
+                       verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+                       print_verifier_state(&env->cur_state);
diff --git a/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch b/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch

new file mode 100644 (file)

index 0000000..798b8d3
--- /dev/null
+++ b/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch
@@ -0,0 +1,90 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 16 Aug 2017 01:45:33 +0200
+Subject: bpf: fix bpf_trace_printk on 32 bit archs
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 88a5c690b66110ad255380d8f629c629cf6ca559 ]
+
+James reported that on MIPS32 bpf_trace_printk() is currently
+broken while MIPS64 works fine:
+
+  bpf_trace_printk() uses conditional operators to attempt to
+  pass different types to __trace_printk() depending on the
+  format operators. This doesn't work as intended on 32-bit
+  architectures where u32 and long are passed differently to
+  u64, since the result of C conditional operators follows the
+  "usual arithmetic conversions" rules, such that the values
+  passed to __trace_printk() will always be u64 [causing issues
+  later in the va_list handling for vscnprintf()].
+
+  For example the samples/bpf/tracex5 test printed lines like
+  below on MIPS32, where the fd and buf have come from the u64
+  fd argument, and the size from the buf argument:
+
+    [...] 1180.941542: 0x00000001: write(fd=1, buf=  (null), size=6258688)
+
+  Instead of this:
+
+    [...] 1625.616026: 0x00000001: write(fd=1, buf=009e4000, size=512)
+
+One way to get it working is to expand various combinations
+of argument types into 8 different combinations for 32 bit
+and 64 bit kernels. Fix tested by James on MIPS32 and MIPS64
+as well that it resolves the issue.
+
+Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()")
+Reported-by: James Hogan <james.hogan@imgtec.com>
+Tested-by: James Hogan <james.hogan@imgtec.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c |   34 ++++++++++++++++++++++++++++++----
+ 1 file changed, 30 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -203,10 +203,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt
+               fmt_cnt++;
+       }
+ 
+-      return __trace_printk(1/* fake ip will not be printed */, fmt,
+-                            mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+-                            mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+-                            mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
++/* Horrid workaround for getting va_list handling working with different
++ * argument type combinations generically for 32 and 64 bit archs.
++ */
++#define __BPF_TP_EMIT()       __BPF_ARG3_TP()
++#define __BPF_TP(...)                                                 \
++      __trace_printk(1 /* Fake ip will not be printed. */,            \
++                     fmt, ##__VA_ARGS__)
++
++#define __BPF_ARG1_TP(...)                                            \
++      ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))        \
++        ? __BPF_TP(arg1, ##__VA_ARGS__)                               \
++        : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32))    \
++            ? __BPF_TP((long)arg1, ##__VA_ARGS__)                     \
++            : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
++
++#define __BPF_ARG2_TP(...)                                            \
++      ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64))        \
++        ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__)                          \
++        : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32))    \
++            ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__)                \
++            : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
++
++#define __BPF_ARG3_TP(...)                                            \
++      ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64))        \
++        ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__)                          \
++        : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32))    \
++            ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__)                \
++            : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
++
++      return __BPF_TP_EMIT();
+ }
+ 
+ static const struct bpf_func_proto bpf_trace_printk_proto = {
diff --git a/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch b/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch

new file mode 100644 (file)

index 0000000..fc53dd9
--- /dev/null
+++ b/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch
@@ -0,0 +1,458 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 21 Jul 2017 00:00:21 +0200
+Subject: bpf: fix mixed signed/unsigned derived min/max value bounds
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit 4cabc5b186b5427b9ee5a7495172542af105f02b ]
+
+Edward reported that there's an issue in min/max value bounds
+tracking when signed and unsigned compares both provide hints
+on limits when having unknown variables. E.g. a program such
+as the following should have been rejected:
+
+   0: (7a) *(u64 *)(r10 -8) = 0
+   1: (bf) r2 = r10
+   2: (07) r2 += -8
+   3: (18) r1 = 0xffff8a94cda93400
+   5: (85) call bpf_map_lookup_elem#1
+   6: (15) if r0 == 0x0 goto pc+7
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
+   7: (7a) *(u64 *)(r10 -16) = -8
+   8: (79) r1 = *(u64 *)(r10 -16)
+   9: (b7) r2 = -1
+  10: (2d) if r1 > r2 goto pc+3
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0
+  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
+  11: (65) if r1 s> 0x1 goto pc+2
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1
+  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
+  12: (0f) r0 += r1
+  13: (72) *(u8 *)(r0 +0) = 0
+  R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1
+  R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
+  14: (b7) r0 = 0
+  15: (95) exit
+
+What happens is that in the first part ...
+
+   8: (79) r1 = *(u64 *)(r10 -16)
+   9: (b7) r2 = -1
+  10: (2d) if r1 > r2 goto pc+3
+
+... r1 carries an unsigned value, and is compared as unsigned
+against a register carrying an immediate. Verifier deduces in
+reg_set_min_max() that since the compare is unsigned and operation
+is greater than (>), that in the fall-through/false case, r1's
+minimum bound must be 0 and maximum bound must be r2. Latter is
+larger than the bound and thus max value is reset back to being
+'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now
+'R1=inv,min_value=0'. The subsequent test ...
+
+  11: (65) if r1 s> 0x1 goto pc+2
+
+... is a signed compare of r1 with immediate value 1. Here,
+verifier deduces in reg_set_min_max() that since the compare
+is signed this time and operation is greater than (>), that
+in the fall-through/false case, we can deduce that r1's maximum
+bound must be 1, meaning with prior test, we result in r1 having
+the following state: R1=inv,min_value=0,max_value=1. Given that
+the actual value this holds is -8, the bounds are wrongly deduced.
+When this is being added to r0 which holds the map_value(_adj)
+type, then subsequent store access in above case will go through
+check_mem_access() which invokes check_map_access_adj(), that
+will then probe whether the map memory is in bounds based
+on the min_value and max_value as well as access size since
+the actual unknown value is min_value <= x <= max_value; commit
+fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{,
+_adj} register types") provides some more explanation on the
+semantics.
+
+It's worth to note in this context that in the current code,
+min_value and max_value tracking are used for two things, i)
+dynamic map value access via check_map_access_adj() and since
+commit 06c1c049721a ("bpf: allow helpers access to variable memory")
+ii) also enforced at check_helper_mem_access() when passing a
+memory address (pointer to packet, map value, stack) and length
+pair to a helper and the length in this case is an unknown value
+defining an access range through min_value/max_value in that
+case. The min_value/max_value tracking is /not/ used in the
+direct packet access case to track ranges. However, the issue
+also affects case ii), for example, the following crafted program
+based on the same principle must be rejected as well:
+
+   0: (b7) r2 = 0
+   1: (bf) r3 = r10
+   2: (07) r3 += -512
+   3: (7a) *(u64 *)(r10 -16) = -8
+   4: (79) r4 = *(u64 *)(r10 -16)
+   5: (b7) r6 = -1
+   6: (2d) if r4 > r6 goto pc+5
+  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
+  R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp
+   7: (65) if r4 s> 0x1 goto pc+4
+  R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512
+  R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1
+  R10=fp
+   8: (07) r4 += 1
+   9: (b7) r5 = 0
+  10: (6a) *(u16 *)(r10 -512) = 0
+  11: (85) call bpf_skb_load_bytes#26
+  12: (b7) r0 = 0
+  13: (95) exit
+
+Meaning, while we initialize the max_value stack slot that the
+verifier thinks we access in the [1,2] range, in reality we
+pass -7 as length which is interpreted as u32 in the helper.
+Thus, this issue is relevant also for the case of helper ranges.
+Resetting both bounds in check_reg_overflow() in case only one
+of them exceeds limits is also not enough as similar test can be
+created that uses values which are within range, thus also here
+learned min value in r1 is incorrect when mixed with later signed
+test to create a range:
+
+   0: (7a) *(u64 *)(r10 -8) = 0
+   1: (bf) r2 = r10
+   2: (07) r2 += -8
+   3: (18) r1 = 0xffff880ad081fa00
+   5: (85) call bpf_map_lookup_elem#1
+   6: (15) if r0 == 0x0 goto pc+7
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
+   7: (7a) *(u64 *)(r10 -16) = -8
+   8: (79) r1 = *(u64 *)(r10 -16)
+   9: (b7) r2 = 2
+  10: (3d) if r2 >= r1 goto pc+3
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
+  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
+  11: (65) if r1 s> 0x4 goto pc+2
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
+  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
+  12: (0f) r0 += r1
+  13: (72) *(u8 *)(r0 +0) = 0
+  R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4
+  R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
+  14: (b7) r0 = 0
+  15: (95) exit
+
+This leaves us with two options for fixing this: i) to invalidate
+all prior learned information once we switch signed context, ii)
+to track min/max signed and unsigned boundaries separately as
+done in [0]. (Given latter introduces major changes throughout
+the whole verifier, it's rather net-next material, thus this
+patch follows option i), meaning we can derive bounds either
+from only signed tests or only unsigned tests.) There is still the
+case of adjust_reg_min_max_vals(), where we adjust bounds on ALU
+operations, meaning programs like the following where boundaries
+on the reg get mixed in context later on when bounds are merged
+on the dst reg must get rejected, too:
+
+   0: (7a) *(u64 *)(r10 -8) = 0
+   1: (bf) r2 = r10
+   2: (07) r2 += -8
+   3: (18) r1 = 0xffff89b2bf87ce00
+   5: (85) call bpf_map_lookup_elem#1
+   6: (15) if r0 == 0x0 goto pc+6
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp
+   7: (7a) *(u64 *)(r10 -16) = -8
+   8: (79) r1 = *(u64 *)(r10 -16)
+   9: (b7) r2 = 2
+  10: (3d) if r2 >= r1 goto pc+2
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
+  R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp
+  11: (b7) r7 = 1
+  12: (65) if r7 s> 0x0 goto pc+2
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
+  R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp
+  13: (b7) r0 = 0
+  14: (95) exit
+
+  from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0
+  R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp
+  15: (0f) r7 += r1
+  16: (65) if r7 s> 0x4 goto pc+2
+  R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3
+  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
+  17: (0f) r0 += r7
+  18: (72) *(u8 *)(r0 +0) = 0
+  R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3
+  R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp
+  19: (b7) r0 = 0
+  20: (95) exit
+
+Meaning, in adjust_reg_min_max_vals() we must also reset range
+values on the dst when src/dst registers have mixed signed/
+unsigned derived min/max value bounds with one unbounded value
+as otherwise they can be added together deducing false boundaries.
+Once both boundaries are established from either ALU ops or
+compare operations w/o mixing signed/unsigned insns, then they
+can safely be added to other regs also having both boundaries
+established. Adding regs with one unbounded side to a map value
+where the bounded side has been learned w/o mixing ops is
+possible, but the resulting map value won't recover from that,
+meaning such op is considered invalid on the time of actual
+access. Invalid bounds are set on the dst reg in case i) src reg,
+or ii) in case dst reg already had them. The only way to recover
+would be to perform i) ALU ops but only 'add' is allowed on map
+value types or ii) comparisons, but these are disallowed on
+pointers in case they span a range. This is fine as only BPF_JEQ
+and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers
+which potentially turn them into PTR_TO_MAP_VALUE type depending
+on the branch, so only here min/max value cannot be invalidated
+for them.
+
+In terms of state pruning, value_from_signed is considered
+as well in states_equal() when dealing with adjusted map values.
+With regards to breaking existing programs, there is a small
+risk, but use-cases are rather quite narrow where this could
+occur and mixing compares probably unlikely.
+
+Joint work with Josef and Edward.
+
+  [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html
+
+Fixes: 484611357c19 ("bpf: allow access into map value arrays")
+Reported-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: Josef Bacik <jbacik@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h |    1 
+ kernel/bpf/verifier.c        |  110 +++++++++++++++++++++++++++++++++++++------
+ 2 files changed, 97 insertions(+), 14 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -40,6 +40,7 @@ struct bpf_reg_state {
+        */
+       s64 min_value;
+       u64 max_value;
++      bool value_from_signed;
+ };
+ 
+ enum bpf_stack_slot_type {
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -682,12 +682,13 @@ static int check_ctx_access(struct bpf_v
+       return -EACCES;
+ }
+ 
+-static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
++static bool __is_pointer_value(bool allow_ptr_leaks,
++                             const struct bpf_reg_state *reg)
+ {
+-      if (env->allow_ptr_leaks)
++      if (allow_ptr_leaks)
+               return false;
+ 
+-      switch (env->cur_state.regs[regno].type) {
++      switch (reg->type) {
+       case UNKNOWN_VALUE:
+       case CONST_IMM:
+               return false;
+@@ -696,6 +697,11 @@ static bool is_pointer_value(struct bpf_
+       }
+ }
+ 
++static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
++{
++      return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
++}
++
+ static int check_ptr_alignment(struct bpf_verifier_env *env,
+                              struct bpf_reg_state *reg, int off, int size)
+ {
+@@ -1592,10 +1598,24 @@ static void adjust_reg_min_max_vals(stru
+       }
+ 
+       /* We don't know anything about what was done to this register, mark it
+-       * as unknown.
++       * as unknown. Also, if both derived bounds came from signed/unsigned
++       * mixed compares and one side is unbounded, we cannot really do anything
++       * with them as boundaries cannot be trusted. Thus, arithmetic of two
++       * regs of such kind will get invalidated bounds on the dst side.
+        */
+-      if (min_val == BPF_REGISTER_MIN_RANGE &&
+-          max_val == BPF_REGISTER_MAX_RANGE) {
++      if ((min_val == BPF_REGISTER_MIN_RANGE &&
++           max_val == BPF_REGISTER_MAX_RANGE) ||
++          (BPF_SRC(insn->code) == BPF_X &&
++           ((min_val != BPF_REGISTER_MIN_RANGE &&
++             max_val == BPF_REGISTER_MAX_RANGE) ||
++            (min_val == BPF_REGISTER_MIN_RANGE &&
++             max_val != BPF_REGISTER_MAX_RANGE) ||
++            (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
++             dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
++            (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
++             dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
++           regs[insn->dst_reg].value_from_signed !=
++           regs[insn->src_reg].value_from_signed)) {
+               reset_reg_range_values(regs, insn->dst_reg);
+               return;
+       }
+@@ -1939,38 +1959,63 @@ static void reg_set_min_max(struct bpf_r
+                           struct bpf_reg_state *false_reg, u64 val,
+                           u8 opcode)
+ {
++      bool value_from_signed = true;
++      bool is_range = true;
++
+       switch (opcode) {
+       case BPF_JEQ:
+               /* If this is false then we know nothing Jon Snow, but if it is
+                * true then we know for sure.
+                */
+               true_reg->max_value = true_reg->min_value = val;
++              is_range = false;
+               break;
+       case BPF_JNE:
+               /* If this is true we know nothing Jon Snow, but if it is false
+                * we know the value for sure;
+                */
+               false_reg->max_value = false_reg->min_value = val;
++              is_range = false;
+               break;
+       case BPF_JGT:
+-              /* Unsigned comparison, the minimum value is 0. */
+-              false_reg->min_value = 0;
++              value_from_signed = false;
++              /* fallthrough */
+       case BPF_JSGT:
++              if (true_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(true_reg, 0);
++              if (false_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(false_reg, 0);
++              if (opcode == BPF_JGT) {
++                      /* Unsigned comparison, the minimum value is 0. */
++                      false_reg->min_value = 0;
++              }
+               /* If this is false then we know the maximum val is val,
+                * otherwise we know the min val is val+1.
+                */
+               false_reg->max_value = val;
++              false_reg->value_from_signed = value_from_signed;
+               true_reg->min_value = val + 1;
++              true_reg->value_from_signed = value_from_signed;
+               break;
+       case BPF_JGE:
+-              /* Unsigned comparison, the minimum value is 0. */
+-              false_reg->min_value = 0;
++              value_from_signed = false;
++              /* fallthrough */
+       case BPF_JSGE:
++              if (true_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(true_reg, 0);
++              if (false_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(false_reg, 0);
++              if (opcode == BPF_JGE) {
++                      /* Unsigned comparison, the minimum value is 0. */
++                      false_reg->min_value = 0;
++              }
+               /* If this is false then we know the maximum value is val - 1,
+                * otherwise we know the mimimum value is val.
+                */
+               false_reg->max_value = val - 1;
++              false_reg->value_from_signed = value_from_signed;
+               true_reg->min_value = val;
++              true_reg->value_from_signed = value_from_signed;
+               break;
+       default:
+               break;
+@@ -1978,6 +2023,12 @@ static void reg_set_min_max(struct bpf_r
+ 
+       check_reg_overflow(false_reg);
+       check_reg_overflow(true_reg);
++      if (is_range) {
++              if (__is_pointer_value(false, false_reg))
++                      reset_reg_range_values(false_reg, 0);
++              if (__is_pointer_value(false, true_reg))
++                      reset_reg_range_values(true_reg, 0);
++      }
+ }
+ 
+ /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
+@@ -1987,39 +2038,64 @@ static void reg_set_min_max_inv(struct b
+                               struct bpf_reg_state *false_reg, u64 val,
+                               u8 opcode)
+ {
++      bool value_from_signed = true;
++      bool is_range = true;
++
+       switch (opcode) {
+       case BPF_JEQ:
+               /* If this is false then we know nothing Jon Snow, but if it is
+                * true then we know for sure.
+                */
+               true_reg->max_value = true_reg->min_value = val;
++              is_range = false;
+               break;
+       case BPF_JNE:
+               /* If this is true we know nothing Jon Snow, but if it is false
+                * we know the value for sure;
+                */
+               false_reg->max_value = false_reg->min_value = val;
++              is_range = false;
+               break;
+       case BPF_JGT:
+-              /* Unsigned comparison, the minimum value is 0. */
+-              true_reg->min_value = 0;
++              value_from_signed = false;
++              /* fallthrough */
+       case BPF_JSGT:
++              if (true_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(true_reg, 0);
++              if (false_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(false_reg, 0);
++              if (opcode == BPF_JGT) {
++                      /* Unsigned comparison, the minimum value is 0. */
++                      true_reg->min_value = 0;
++              }
+               /*
+                * If this is false, then the val is <= the register, if it is
+                * true the register <= to the val.
+                */
+               false_reg->min_value = val;
++              false_reg->value_from_signed = value_from_signed;
+               true_reg->max_value = val - 1;
++              true_reg->value_from_signed = value_from_signed;
+               break;
+       case BPF_JGE:
+-              /* Unsigned comparison, the minimum value is 0. */
+-              true_reg->min_value = 0;
++              value_from_signed = false;
++              /* fallthrough */
+       case BPF_JSGE:
++              if (true_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(true_reg, 0);
++              if (false_reg->value_from_signed != value_from_signed)
++                      reset_reg_range_values(false_reg, 0);
++              if (opcode == BPF_JGE) {
++                      /* Unsigned comparison, the minimum value is 0. */
++                      true_reg->min_value = 0;
++              }
+               /* If this is false then constant < register, if it is true then
+                * the register < constant.
+                */
+               false_reg->min_value = val + 1;
++              false_reg->value_from_signed = value_from_signed;
+               true_reg->max_value = val;
++              true_reg->value_from_signed = value_from_signed;
+               break;
+       default:
+               break;
+@@ -2027,6 +2103,12 @@ static void reg_set_min_max_inv(struct b
+ 
+       check_reg_overflow(false_reg);
+       check_reg_overflow(true_reg);
++      if (is_range) {
++              if (__is_pointer_value(false, false_reg))
++                      reset_reg_range_values(false_reg, 0);
++              if (__is_pointer_value(false, true_reg))
++                      reset_reg_range_values(true_reg, 0);
++      }
+ }
+ 
+ static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
diff --git a/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch b/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch

new file mode 100644 (file)

index 0000000..d8afcaa
--- /dev/null
+++ b/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch
@@ -0,0 +1,102 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: John Fastabend <john.fastabend@gmail.com>
+Date: Sun, 2 Jul 2017 02:13:30 +0200
+Subject: bpf, verifier: add additional patterns to evaluate_reg_imm_alu
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+
+[ Upstream commit 43188702b3d98d2792969a3377a30957f05695e6 ]
+
+Currently the verifier does not track imm across alu operations when
+the source register is of unknown type. This adds additional pattern
+matching to catch this and track imm. We've seen LLVM generating this
+pattern while working on cilium.
+
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 62 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1467,6 +1467,65 @@ static int evaluate_reg_alu(struct bpf_v
+       return 0;
+ }
+ 
++static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env,
++                                      struct bpf_insn *insn)
++{
++      struct bpf_reg_state *regs = env->cur_state.regs;
++      struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
++      struct bpf_reg_state *src_reg = &regs[insn->src_reg];
++      u8 opcode = BPF_OP(insn->code);
++      s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm);
++
++      /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */
++      if (src_reg->imm > 0 && dst_reg->imm) {
++              switch (opcode) {
++              case BPF_ADD:
++                      /* dreg += sreg
++                       * where both have zero upper bits. Adding them
++                       * can only result making one more bit non-zero
++                       * in the larger value.
++                       * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
++                       *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
++                       */
++                      dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
++                      dst_reg->imm--;
++                      break;
++              case BPF_AND:
++                      /* dreg &= sreg
++                       * AND can not extend zero bits only shrink
++                       * Ex.  0x00..00ffffff
++                       *    & 0x0f..ffffffff
++                       *     ----------------
++                       *      0x00..00ffffff
++                       */
++                      dst_reg->imm = max(src_reg->imm, 63 - imm_log2);
++                      break;
++              case BPF_OR:
++                      /* dreg |= sreg
++                       * OR can only extend zero bits
++                       * Ex.  0x00..00ffffff
++                       *    | 0x0f..ffffffff
++                       *     ----------------
++                       *      0x0f..00ffffff
++                       */
++                      dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
++                      break;
++              case BPF_SUB:
++              case BPF_MUL:
++              case BPF_RSH:
++              case BPF_LSH:
++                      /* These may be flushed out later */
++              default:
++                      mark_reg_unknown_value(regs, insn->dst_reg);
++              }
++      } else {
++              mark_reg_unknown_value(regs, insn->dst_reg);
++      }
++
++      dst_reg->type = UNKNOWN_VALUE;
++      return 0;
++}
++
+ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+                               struct bpf_insn *insn)
+ {
+@@ -1475,6 +1534,9 @@ static int evaluate_reg_imm_alu(struct b
+       struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+       u8 opcode = BPF_OP(insn->code);
+ 
++      if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE)
++              return evaluate_reg_imm_alu_unknown(env, insn);
++
+       /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
+        * Don't care about overflow or negative values, just add them
+        */
diff --git a/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch b/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch

new file mode 100644 (file)

index 0000000..d15b60b
--- /dev/null
+++ b/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch
@@ -0,0 +1,104 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Fri, 31 Mar 2017 02:24:02 +0200
+Subject: bpf, verifier: fix alu ops against map_value{, _adj} register types
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+
+[ Upstream commit fce366a9dd0ddc47e7ce05611c266e8574a45116 ]
+
+While looking into map_value_adj, I noticed that alu operations
+directly on the map_value() resp. map_value_adj() register (any
+alu operation on a map_value() register will turn it into a
+map_value_adj() typed register) are not sufficiently protected
+against some of the operations. Two non-exhaustive examples are
+provided that the verifier needs to reject:
+
+ i) BPF_AND on r0 (map_value_adj):
+
+  0: (bf) r2 = r10
+  1: (07) r2 += -8
+  2: (7a) *(u64 *)(r2 +0) = 0
+  3: (18) r1 = 0xbf842a00
+  5: (85) call bpf_map_lookup_elem#1
+  6: (15) if r0 == 0x0 goto pc+2
+   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
+  7: (57) r0 &= 8
+  8: (7a) *(u64 *)(r0 +0) = 22
+   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp
+  9: (95) exit
+
+  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
+  9: (95) exit
+  processed 10 insns
+
+ii) BPF_ADD in 32 bit mode on r0 (map_value_adj):
+
+  0: (bf) r2 = r10
+  1: (07) r2 += -8
+  2: (7a) *(u64 *)(r2 +0) = 0
+  3: (18) r1 = 0xc24eee00
+  5: (85) call bpf_map_lookup_elem#1
+  6: (15) if r0 == 0x0 goto pc+2
+   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
+  7: (04) (u32) r0 += (u32) 0
+  8: (7a) *(u64 *)(r0 +0) = 22
+   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
+  9: (95) exit
+
+  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
+  9: (95) exit
+  processed 10 insns
+
+Issue is, while min_value / max_value boundaries for the access
+are adjusted appropriately, we change the pointer value in a way
+that cannot be sufficiently tracked anymore from its origin.
+Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register
+that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact,
+all the test cases coming with 484611357c19 ("bpf: allow access
+into map value arrays") perform BPF_ADD only on the destination
+register that is PTR_TO_MAP_VALUE_ADJ.
+
+Only for UNKNOWN_VALUE register types such operations make sense,
+f.e. with unknown memory content fetched initially from a constant
+offset from the map value memory into a register. That register is
+then later tested against lower / upper bounds, so that the verifier
+can then do the tracking of min_value / max_value, and properly
+check once that UNKNOWN_VALUE register is added to the destination
+register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the
+original use-case is solving. Note, tracking on what is being
+added is done through adjust_reg_min_max_vals() and later access
+to the map value enforced with these boundaries and the given offset
+from the insn through check_map_access_adj().
+
+Tests will fail for non-root environment due to prohibited pointer
+arithmetic, in particular in check_alu_op(), we bail out on the
+is_pointer_value() check on the dst_reg (which is false in root
+case as we allow for pointer arithmetic via env->allow_ptr_leaks).
+
+Similarly to PTR_TO_PACKET, one way to fix it is to restrict the
+allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit
+mode BPF_ADD. The test_verifier suite runs fine after the patch
+and it also rejects mentioned test cases.
+
+Fixes: 484611357c19 ("bpf: allow access into map value arrays")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Josef Bacik <jbacik@fb.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1870,6 +1870,7 @@ static int check_alu_op(struct bpf_verif
+                * register as unknown.
+                */
+               if (env->allow_ptr_leaks &&
++                  BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
+                   (dst_reg->type == PTR_TO_MAP_VALUE ||
+                    dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
+                       dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
diff --git a/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch b/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch

new file mode 100644 (file)

index 0000000..6d23d07
--- /dev/null
+++ b/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch
@@ -0,0 +1,61 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Edward Cree <ecree@solarflare.com>
+Date: Fri, 21 Jul 2017 14:37:34 +0100
+Subject: bpf/verifier: fix min/max handling in BPF_SUB
+
+From: Edward Cree <ecree@solarflare.com>
+
+
+[ Upstream commit 9305706c2e808ae59f1eb201867f82f1ddf6d7a6 ]
+
+We have to subtract the src max from the dst min, and vice-versa, since
+ (e.g.) the smallest result comes from the largest subtrahend.
+
+Fixes: 484611357c19 ("bpf: allow access into map value arrays")
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1624,10 +1624,12 @@ static void adjust_reg_min_max_vals(stru
+        * do our normal operations to the register, we need to set the values
+        * to the min/max since they are undefined.
+        */
+-      if (min_val == BPF_REGISTER_MIN_RANGE)
+-              dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+-      if (max_val == BPF_REGISTER_MAX_RANGE)
+-              dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
++      if (opcode != BPF_SUB) {
++              if (min_val == BPF_REGISTER_MIN_RANGE)
++                      dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
++              if (max_val == BPF_REGISTER_MAX_RANGE)
++                      dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
++      }
+ 
+       switch (opcode) {
+       case BPF_ADD:
+@@ -1637,10 +1639,17 @@ static void adjust_reg_min_max_vals(stru
+                       dst_reg->max_value += max_val;
+               break;
+       case BPF_SUB:
++              /* If one of our values was at the end of our ranges, then the
++               * _opposite_ value in the dst_reg goes to the end of our range.
++               */
++              if (min_val == BPF_REGISTER_MIN_RANGE)
++                      dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
++              if (max_val == BPF_REGISTER_MAX_RANGE)
++                      dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+               if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+-                      dst_reg->min_value -= min_val;
++                      dst_reg->min_value -= max_val;
+               if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+-                      dst_reg->max_value -= max_val;
++                      dst_reg->max_value -= min_val;
+               break;
+       case BPF_MUL:
+               if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
diff --git a/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch b/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch

new file mode 100644 (file)

index 0000000..2135c21
--- /dev/null
+++ b/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch
@@ -0,0 +1,204 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 16 Aug 2017 07:03:15 -0700
+Subject: dccp: defer ccid_hc_tx_delete() at dismantle time
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 120e9dabaf551c6dc03d3a10a1f026376cb1811c ]
+
+syszkaller team reported another problem in DCCP [1]
+
+Problem here is that the structure holding RTO timer
+(ccid2_hc_tx_rto_expire() handler) is freed too soon.
+
+We can not use del_timer_sync() to cancel the timer
+since this timer wants to grab socket lock (that would risk a dead lock)
+
+Solution is to defer the freeing of memory when all references to
+the socket were released. Socket timers do own a reference, so this
+should fix the issue.
+
+[1]
+
+==================================================================
+BUG: KASAN: use-after-free in ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144
+Read of size 4 at addr ffff8801d2660540 by task kworker/u4:7/3365
+
+CPU: 1 PID: 3365 Comm: kworker/u4:7 Not tainted 4.13.0-rc4+ #3
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: events_unbound call_usermodehelper_exec_work
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x24e/0x340 mm/kasan/report.c:409
+ __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429
+ ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144
+ call_timer_fn+0x233/0x830 kernel/time/timer.c:1268
+ expire_timers kernel/time/timer.c:1307 [inline]
+ __run_timers+0x7fd/0xb90 kernel/time/timer.c:1601
+ run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
+ __do_softirq+0x2f5/0xba3 kernel/softirq.c:284
+ invoke_softirq kernel/softirq.c:364 [inline]
+ irq_exit+0x1cc/0x200 kernel/softirq.c:405
+ exiting_irq arch/x86/include/asm/apic.h:638 [inline]
+ smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:1044
+ apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:702
+RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:824 [inline]
+RIP: 0010:__raw_write_unlock_irq include/linux/rwlock_api_smp.h:267 [inline]
+RIP: 0010:_raw_write_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:343
+RSP: 0018:ffff8801cd50eaa8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10
+RAX: dffffc0000000000 RBX: ffffffff85a090c0 RCX: 0000000000000006
+RDX: 1ffffffff0b595f3 RSI: 1ffff1003962f989 RDI: ffffffff85acaf98
+RBP: ffff8801cd50eab0 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801cc96ea60
+R13: dffffc0000000000 R14: ffff8801cc96e4c0 R15: ffff8801cc96e4c0
+ </IRQ>
+ release_task+0xe9e/0x1a40 kernel/exit.c:220
+ wait_task_zombie kernel/exit.c:1162 [inline]
+ wait_consider_task+0x29b8/0x33c0 kernel/exit.c:1389
+ do_wait_thread kernel/exit.c:1452 [inline]
+ do_wait+0x441/0xa90 kernel/exit.c:1523
+ kernel_wait4+0x1f5/0x370 kernel/exit.c:1665
+ SYSC_wait4+0x134/0x140 kernel/exit.c:1677
+ SyS_wait4+0x2c/0x40 kernel/exit.c:1673
+ call_usermodehelper_exec_sync kernel/kmod.c:286 [inline]
+ call_usermodehelper_exec_work+0x1a0/0x2c0 kernel/kmod.c:323
+ process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2097
+ worker_thread+0x223/0x1860 kernel/workqueue.c:2231
+ kthread+0x35e/0x430 kernel/kthread.c:231
+ ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:425
+
+Allocated by task 21267:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
+ kmem_cache_alloc+0x127/0x750 mm/slab.c:3561
+ ccid_new+0x20e/0x390 net/dccp/ccid.c:151
+ dccp_hdlr_ccid+0x27/0x140 net/dccp/feat.c:44
+ __dccp_feat_activate+0x142/0x2a0 net/dccp/feat.c:344
+ dccp_feat_activate_values+0x34e/0xa90 net/dccp/feat.c:1538
+ dccp_rcv_request_sent_state_process net/dccp/input.c:472 [inline]
+ dccp_rcv_state_process+0xed1/0x1620 net/dccp/input.c:677
+ dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679
+ sk_backlog_rcv include/net/sock.h:911 [inline]
+ __release_sock+0x124/0x360 net/core/sock.c:2269
+ release_sock+0xa4/0x2a0 net/core/sock.c:2784
+ inet_wait_for_connect net/ipv4/af_inet.c:557 [inline]
+ __inet_stream_connect+0x671/0xf00 net/ipv4/af_inet.c:643
+ inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:682
+ SYSC_connect+0x204/0x470 net/socket.c:1642
+ SyS_connect+0x24/0x30 net/socket.c:1623
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 3049:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kmem_cache_free+0x77/0x280 mm/slab.c:3763
+ ccid_hc_tx_delete+0xc5/0x100 net/dccp/ccid.c:190
+ dccp_destroy_sock+0x1d1/0x2b0 net/dccp/proto.c:225
+ inet_csk_destroy_sock+0x166/0x3f0 net/ipv4/inet_connection_sock.c:833
+ dccp_done+0xb7/0xd0 net/dccp/proto.c:145
+ dccp_time_wait+0x13d/0x300 net/dccp/minisocks.c:72
+ dccp_rcv_reset+0x1d1/0x5b0 net/dccp/input.c:160
+ dccp_rcv_state_process+0x8fc/0x1620 net/dccp/input.c:663
+ dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679
+ sk_backlog_rcv include/net/sock.h:911 [inline]
+ __sk_receive_skb+0x33e/0xc00 net/core/sock.c:521
+ dccp_v4_rcv+0xef1/0x1c00 net/dccp/ipv4.c:871
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:248 [inline]
+ ip_local_deliver+0x1ce/0x6d0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:477 [inline]
+ ip_rcv_finish+0x8db/0x19c0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:248 [inline]
+ ip_rcv+0xc3f/0x17d0 net/ipv4/ip_input.c:488
+ __netif_receive_skb_core+0x19af/0x33d0 net/core/dev.c:4417
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4455
+ process_backlog+0x203/0x740 net/core/dev.c:5130
+ napi_poll net/core/dev.c:5527 [inline]
+ net_rx_action+0x792/0x1910 net/core/dev.c:5593
+ __do_softirq+0x2f5/0xba3 kernel/softirq.c:284
+
+The buggy address belongs to the object at ffff8801d2660100
+ which belongs to the cache ccid2_hc_tx_sock of size 1240
+The buggy address is located 1088 bytes inside of
+ 1240-byte region [ffff8801d2660100, ffff8801d26605d8)
+The buggy address belongs to the page:
+page:ffffea0007499800 count:1 mapcount:0 mapping:ffff8801d2660100 index:0x0 compound_mapcount: 0
+flags: 0x200000000008100(slab|head)
+raw: 0200000000008100 ffff8801d2660100 0000000000000000 0000000100000005
+raw: ffffea00075271a0 ffffea0007538820 ffff8801d3aef9c0 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff8801d2660400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8801d2660480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8801d2660500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                                           ^
+ ffff8801d2660580: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc
+ ffff8801d2660600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+==================================================================
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/proto.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -24,6 +24,7 @@
+ #include <net/checksum.h>
+ 
+ #include <net/inet_sock.h>
++#include <net/inet_common.h>
+ #include <net/sock.h>
+ #include <net/xfrm.h>
+ 
+@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int t
+ 
+ EXPORT_SYMBOL_GPL(dccp_packet_name);
+ 
++static void dccp_sk_destruct(struct sock *sk)
++{
++      struct dccp_sock *dp = dccp_sk(sk);
++
++      ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
++      dp->dccps_hc_tx_ccid = NULL;
++      inet_sock_destruct(sk);
++}
++
+ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
+ {
+       struct dccp_sock *dp = dccp_sk(sk);
+@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, cons
+       icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
+       sk->sk_state            = DCCP_CLOSED;
+       sk->sk_write_space      = dccp_write_space;
++      sk->sk_destruct         = dccp_sk_destruct;
+       icsk->icsk_sync_mss     = dccp_sync_mss;
+       dp->dccps_mss_cache     = 536;
+       dp->dccps_rate_last     = jiffies;
+@@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk)
+               dp->dccps_hc_rx_ackvec = NULL;
+       }
+       ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+-      ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+-      dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
++      dp->dccps_hc_rx_ccid = NULL;
+ 
+       /* clean up feature negotiation state */
+       dccp_feat_list_purge(&dp->dccps_featneg);
diff --git a/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch b/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch

new file mode 100644 (file)

index 0000000..4fd8316
--- /dev/null
+++ b/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch
@@ -0,0 +1,76 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 14 Aug 2017 14:10:25 -0700
+Subject: dccp: purge write queue in dccp_destroy_sock()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 7749d4ff88d31b0be17c8683143135adaaadc6a7 ]
+
+syzkaller reported that DCCP could have a non empty
+write queue at dismantle time.
+
+WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ panic+0x1e4/0x417 kernel/panic.c:180
+ __warn+0x1c4/0x1d9 kernel/panic.c:541
+ report_bug+0x211/0x2d0 lib/bug.c:183
+ fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190
+ do_trap_no_signal arch/x86/kernel/traps.c:224 [inline]
+ do_trap+0x260/0x390 arch/x86/kernel/traps.c:273
+ do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310
+ do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323
+ invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846
+RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199
+RSP: 0018:ffff8801d182f108 EFLAGS: 00010297
+RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280
+RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0
+R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8
+ inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835
+ dccp_close+0x84d/0xc10 net/dccp/proto.c:1067
+ inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
+ sock_release+0x8d/0x1e0 net/socket.c:597
+ sock_close+0x16/0x20 net/socket.c:1126
+ __fput+0x327/0x7e0 fs/file_table.c:210
+ ____fput+0x15/0x20 fs/file_table.c:246
+ task_work_run+0x18a/0x260 kernel/task_work.c:116
+ exit_task_work include/linux/task_work.h:21 [inline]
+ do_exit+0xa32/0x1b10 kernel/exit.c:865
+ do_group_exit+0x149/0x400 kernel/exit.c:969
+ get_signal+0x7e8/0x17e0 kernel/signal.c:2330
+ do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808
+ exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157
+ prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline]
+ syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/proto.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/dccp/proto.c
++++ b/net/dccp/proto.c
+@@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk)
+ {
+       struct dccp_sock *dp = dccp_sk(sk);
+ 
+-      /*
+-       * DCCP doesn't use sk_write_queue, just sk_send_head
+-       * for retransmissions
+-       */
++      __skb_queue_purge(&sk->sk_write_queue);
+       if (sk->sk_send_head != NULL) {
+               kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
diff --git a/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch b/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch

new file mode 100644 (file)

index 0000000..d26ac20
--- /dev/null
+++ b/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch
@@ -0,0 +1,61 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 16 Aug 2017 11:09:12 -0700
+Subject: ipv4: better IP_MAX_MTU enforcement
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c780a049f9bf442314335372c9abc4548bfe3e44 ]
+
+While working on yet another syzkaller report, I found
+that our IP_MAX_MTU enforcements were not properly done.
+
+gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and
+final result can be bigger than IP_MAX_MTU :/
+
+This is a problem because device mtu can be changed on other cpus or
+threads.
+
+While this patch does not fix the issue I am working on, it is
+probably worth addressing it.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip.h |    4 ++--
+ net/ipv4/route.c |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -339,7 +339,7 @@ static inline unsigned int ip_dst_mtu_ma
+           !forwarding)
+               return dst_mtu(dst);
+ 
+-      return min(dst->dev->mtu, IP_MAX_MTU);
++      return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+ }
+ 
+ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
+@@ -351,7 +351,7 @@ static inline unsigned int ip_skb_dst_mt
+               return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
+       }
+ 
+-      return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU);
++      return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
+ }
+ 
+ u32 ip_idents_reserve(u32 hash, int segs);
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struc
+       if (mtu)
+               return mtu;
+ 
+-      mtu = dst->dev->mtu;
++      mtu = READ_ONCE(dst->dev->mtu);
+ 
+       if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
+               if (rt->rt_uses_gateway && mtu > 576)
diff --git a/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch b/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch

new file mode 100644 (file)

index 0000000..19f9a44
--- /dev/null
+++ b/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch
@@ -0,0 +1,59 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 15 Aug 2017 05:26:17 -0700
+Subject: ipv4: fix NULL dereference in free_fib_info_rcu()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 187e5b3ac84d3421d2de3aca949b2791fbcad554 ]
+
+If fi->fib_metrics could not be allocated in fib_create_info()
+we attempt to dereference a NULL pointer in free_fib_info_rcu() :
+
+    m = fi->fib_metrics;
+    if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+            kfree(m);
+
+Before my recent patch, we used to call kfree(NULL) and nothing wrong
+happened.
+
+Instead of using RCU to defer freeing while we are under memory stress,
+it seems better to take immediate action.
+
+This was reported by syzkaller team.
+
+Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1044,15 +1044,17 @@ struct fib_info *fib_create_info(struct
+       fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+       if (!fi)
+               goto failure;
+-      fib_info_cnt++;
+       if (cfg->fc_mx) {
+               fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
+-              if (!fi->fib_metrics)
+-                      goto failure;
++              if (unlikely(!fi->fib_metrics)) {
++                      kfree(fi);
++                      return ERR_PTR(err);
++              }
+               atomic_set(&fi->fib_metrics->refcnt, 1);
+-      } else
++      } else {
+               fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+-
++      }
++      fib_info_cnt++;
+       fi->fib_net = net;
+       fi->fib_protocol = cfg->fc_protocol;
+       fi->fib_scope = cfg->fc_scope;
diff --git a/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch b/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch

new file mode 100644 (file)

index 0000000..2e63fd3
--- /dev/null
+++ b/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch
@@ -0,0 +1,140 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Wei Wang <weiwan@google.com>
+Date: Fri, 18 Aug 2017 17:14:49 -0700
+Subject: ipv6: repair fib6 tree in failure case
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit 348a4002729ccab8b888b38cbc099efa2f2a2036 ]
+
+In fib6_add(), it is possible that fib6_add_1() picks an intermediate
+node and sets the node's fn->leaf to NULL in order to add this new
+route. However, if fib6_add_rt2node() fails to add the new
+route for some reason, fn->leaf will be left as NULL and could
+potentially cause crash when fn->leaf is accessed in fib6_locate().
+This patch makes sure fib6_repair_tree() is called to properly repair
+fn->leaf in the above failure case.
+
+Here is the syzkaller reported general protection fault in fib6_locate:
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Modules linked in:
+CPU: 0 PID: 40937 Comm: syz-executor3 Not tainted
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801d7d64100 ti: ffff8801d01a0000 task.ti: ffff8801d01a0000
+RIP: 0010:[<ffffffff82a3e0e1>]  [<ffffffff82a3e0e1>] __ipv6_prefix_equal64_half include/net/ipv6.h:475 [inline]
+RIP: 0010:[<ffffffff82a3e0e1>]  [<ffffffff82a3e0e1>] ipv6_prefix_equal include/net/ipv6.h:492 [inline]
+RIP: 0010:[<ffffffff82a3e0e1>]  [<ffffffff82a3e0e1>] fib6_locate_1 net/ipv6/ip6_fib.c:1210 [inline]
+RIP: 0010:[<ffffffff82a3e0e1>]  [<ffffffff82a3e0e1>] fib6_locate+0x281/0x3c0 net/ipv6/ip6_fib.c:1233
+RSP: 0018:ffff8801d01a36a8  EFLAGS: 00010202
+RAX: 0000000000000020 RBX: ffff8801bc790e00 RCX: ffffc90002983000
+RDX: 0000000000001219 RSI: ffff8801d01a37a0 RDI: 0000000000000100
+RBP: ffff8801d01a36f0 R08: 00000000000000ff R09: 0000000000000000
+R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001
+R13: dffffc0000000000 R14: ffff8801d01a37a0 R15: 0000000000000000
+FS:  00007f6afd68c700(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000004c6340 CR3: 00000000ba41f000 CR4: 00000000001426f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Stack:
+ ffff8801d01a37a8 ffff8801d01a3780 ffffed003a0346f5 0000000c82a23ea0
+ ffff8800b7bd7700 ffff8801d01a3780 ffff8800b6a1c940 ffffffff82a23ea0
+ ffff8801d01a3920 ffff8801d01a3748 ffffffff82a223d6 ffff8801d7d64988
+Call Trace:
+ [<ffffffff82a223d6>] ip6_route_del+0x106/0x570 net/ipv6/route.c:2109
+ [<ffffffff82a23f9d>] inet6_rtm_delroute+0xfd/0x100 net/ipv6/route.c:3075
+ [<ffffffff82621359>] rtnetlink_rcv_msg+0x549/0x7a0 net/core/rtnetlink.c:3450
+ [<ffffffff8274c1d1>] netlink_rcv_skb+0x141/0x370 net/netlink/af_netlink.c:2281
+ [<ffffffff82613ddf>] rtnetlink_rcv+0x2f/0x40 net/core/rtnetlink.c:3456
+ [<ffffffff8274ad38>] netlink_unicast_kernel net/netlink/af_netlink.c:1206 [inline]
+ [<ffffffff8274ad38>] netlink_unicast+0x518/0x750 net/netlink/af_netlink.c:1232
+ [<ffffffff8274b83e>] netlink_sendmsg+0x8ce/0xc30 net/netlink/af_netlink.c:1778
+ [<ffffffff82564aff>] sock_sendmsg_nosec net/socket.c:609 [inline]
+ [<ffffffff82564aff>] sock_sendmsg+0xcf/0x110 net/socket.c:619
+ [<ffffffff82564d62>] sock_write_iter+0x222/0x3a0 net/socket.c:834
+ [<ffffffff8178523d>] new_sync_write+0x1dd/0x2b0 fs/read_write.c:478
+ [<ffffffff817853f4>] __vfs_write+0xe4/0x110 fs/read_write.c:491
+ [<ffffffff81786c38>] vfs_write+0x178/0x4b0 fs/read_write.c:538
+ [<ffffffff817892a9>] SYSC_write fs/read_write.c:585 [inline]
+ [<ffffffff817892a9>] SyS_write+0xd9/0x1b0 fs/read_write.c:577
+ [<ffffffff82c71e32>] entry_SYSCALL_64_fastpath+0x12/0x17
+
+Note: there is no "Fixes" tag as this seems to be a bug introduced
+very early.
+
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |   22 +++++++++++-----------
+ 1 file changed, 11 insertions(+), 11 deletions(-)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -1001,7 +1001,7 @@ int fib6_add(struct fib6_node *root, str
+                       /* Create subtree root node */
+                       sfn = node_alloc();
+                       if (!sfn)
+-                              goto st_failure;
++                              goto failure;
+ 
+                       sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
+                       atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+@@ -1017,12 +1017,12 @@ int fib6_add(struct fib6_node *root, str
+ 
+                       if (IS_ERR(sn)) {
+                               /* If it is failed, discard just allocated
+-                                 root, and then (in st_failure) stale node
++                                 root, and then (in failure) stale node
+                                  in main tree.
+                                */
+                               node_free(sfn);
+                               err = PTR_ERR(sn);
+-                              goto st_failure;
++                              goto failure;
+                       }
+ 
+                       /* Now link new subtree to main tree */
+@@ -1036,7 +1036,7 @@ int fib6_add(struct fib6_node *root, str
+ 
+                       if (IS_ERR(sn)) {
+                               err = PTR_ERR(sn);
+-                              goto st_failure;
++                              goto failure;
+                       }
+               }
+ 
+@@ -1078,22 +1078,22 @@ out:
+                       atomic_inc(&pn->leaf->rt6i_ref);
+               }
+ #endif
+-              if (!(rt->dst.flags & DST_NOCACHE))
+-                      dst_free(&rt->dst);
++              goto failure;
+       }
+       return err;
+ 
+-#ifdef CONFIG_IPV6_SUBTREES
+-      /* Subtree creation failed, probably main tree node
+-         is orphan. If it is, shoot it.
++failure:
++      /* fn->leaf could be NULL if fn is an intermediate node and we
++       * failed to add the new route to it in both subtree creation
++       * failure and fib6_add_rt2node() failure case.
++       * In both cases, fib6_repair_tree() should be called to fix
++       * fn->leaf.
+        */
+-st_failure:
+       if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+               fib6_repair_tree(info->nl_net, fn);
+       if (!(rt->dst.flags & DST_NOCACHE))
+               dst_free(&rt->dst);
+       return err;
+-#endif
+ }
+ 
+ /*
diff --git a/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch b/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch

new file mode 100644 (file)

index 0000000..8996a36
--- /dev/null
+++ b/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch
@@ -0,0 +1,82 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Wei Wang <weiwan@google.com>
+Date: Wed, 16 Aug 2017 11:18:09 -0700
+Subject: ipv6: reset fn->rr_ptr when replacing route
+
+From: Wei Wang <weiwan@google.com>
+
+
+[ Upstream commit 383143f31d7d3525a1dbff733d52fff917f82f15 ]
+
+syzcaller reported the following use-after-free issue in rt6_select():
+BUG: KASAN: use-after-free in rt6_select net/ipv6/route.c:755 [inline] at addr ffff8800bc6994e8
+BUG: KASAN: use-after-free in ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 at addr ffff8800bc6994e8
+Read of size 4 by task syz-executor1/439628
+CPU: 0 PID: 439628 Comm: syz-executor1 Not tainted 4.3.5+ #8
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ 0000000000000000 ffff88018fe435b0 ffffffff81ca384d ffff8801d3588c00
+ ffff8800bc699380 ffff8800bc699500 dffffc0000000000 ffff8801d40a47c0
+ ffff88018fe435d8 ffffffff81735751 ffff88018fe43660 ffff8800bc699380
+Call Trace:
+ [<ffffffff81ca384d>] __dump_stack lib/dump_stack.c:15 [inline]
+ [<ffffffff81ca384d>] dump_stack+0xc1/0x124 lib/dump_stack.c:51
+sctp: [Deprecated]: syz-executor0 (pid 439615) Use of struct sctp_assoc_value in delayed_ack socket option.
+Use struct sctp_sack_info instead
+ [<ffffffff81735751>] kasan_object_err+0x21/0x70 mm/kasan/report.c:158
+ [<ffffffff817359c4>] print_address_description mm/kasan/report.c:196 [inline]
+ [<ffffffff817359c4>] kasan_report_error+0x1b4/0x4a0 mm/kasan/report.c:285
+ [<ffffffff81735d93>] kasan_report mm/kasan/report.c:305 [inline]
+ [<ffffffff81735d93>] __asan_report_load4_noabort+0x43/0x50 mm/kasan/report.c:325
+ [<ffffffff82a28e39>] rt6_select net/ipv6/route.c:755 [inline]
+ [<ffffffff82a28e39>] ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084
+ [<ffffffff82a28fb1>] ip6_pol_route_output+0x81/0xb0 net/ipv6/route.c:1203
+ [<ffffffff82ab0a50>] fib6_rule_action+0x1f0/0x680 net/ipv6/fib6_rules.c:95
+ [<ffffffff8265cbb6>] fib_rules_lookup+0x2a6/0x7a0 net/core/fib_rules.c:223
+ [<ffffffff82ab1430>] fib6_rule_lookup+0xd0/0x250 net/ipv6/fib6_rules.c:41
+ [<ffffffff82a22006>] ip6_route_output+0x1d6/0x2c0 net/ipv6/route.c:1224
+ [<ffffffff829e83d2>] ip6_dst_lookup_tail+0x4d2/0x890 net/ipv6/ip6_output.c:943
+ [<ffffffff829e889a>] ip6_dst_lookup_flow+0x9a/0x250 net/ipv6/ip6_output.c:1079
+ [<ffffffff82a9f7d8>] ip6_datagram_dst_update+0x538/0xd40 net/ipv6/datagram.c:91
+ [<ffffffff82aa0978>] __ip6_datagram_connect net/ipv6/datagram.c:251 [inline]
+ [<ffffffff82aa0978>] ip6_datagram_connect+0x518/0xe50 net/ipv6/datagram.c:272
+ [<ffffffff82aa1313>] ip6_datagram_connect_v6_only+0x63/0x90 net/ipv6/datagram.c:284
+ [<ffffffff8292f790>] inet_dgram_connect+0x170/0x1f0 net/ipv4/af_inet.c:564
+ [<ffffffff82565547>] SYSC_connect+0x1a7/0x2f0 net/socket.c:1582
+ [<ffffffff8256a649>] SyS_connect+0x29/0x30 net/socket.c:1563
+ [<ffffffff82c72032>] entry_SYSCALL_64_fastpath+0x12/0x17
+Object at ffff8800bc699380, in cache ip6_dst_cache size: 384
+
+The root cause of it is that in fib6_add_rt2node(), when it replaces an
+existing route with the new one, it does not update fn->rr_ptr.
+This commit resets fn->rr_ptr to NULL when it points to a route which is
+replaced in fib6_add_rt2node().
+
+Fixes: 27596472473a ("ipv6: fix ECMP route replacement")
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_fib.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -897,6 +897,8 @@ add:
+               }
+               nsiblings = iter->rt6i_nsiblings;
+               fib6_purge_rt(iter, fn, info->nl_net);
++              if (fn->rr_ptr == iter)
++                      fn->rr_ptr = NULL;
+               rt6_release(iter);
+ 
+               if (nsiblings) {
+@@ -909,6 +911,8 @@ add:
+                               if (rt6_qualify_for_ecmp(iter)) {
+                                       *ins = iter->dst.rt6_next;
+                                       fib6_purge_rt(iter, fn, info->nl_net);
++                                      if (fn->rr_ptr == iter)
++                                              fn->rr_ptr = NULL;
+                                       rt6_release(iter);
+                                       nsiblings--;
+                               } else {
diff --git a/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch b/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch

new file mode 100644 (file)

index 0000000..842177c
--- /dev/null
+++ b/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch
@@ -0,0 +1,35 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Colin Ian King <colin.king@canonical.com>
+Date: Thu, 17 Aug 2017 23:14:58 +0100
+Subject: irda: do not leak initialized list.dev to userspace
+
+From: Colin Ian King <colin.king@canonical.com>
+
+
+[ Upstream commit b024d949a3c24255a7ef1a470420eb478949aa4c ]
+
+list.dev has not been initialized and so the copy_to_user is copying
+data from the stack back to user space which is a potential
+information leak. Fix this ensuring all of list is initialized to
+zero.
+
+Detected by CoverityScan, CID#1357894 ("Uninitialized scalar variable")
+
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/irda/af_irda.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/irda/af_irda.c
++++ b/net/irda/af_irda.c
+@@ -2223,7 +2223,7 @@ static int irda_getsockopt(struct socket
+ {
+       struct sock *sk = sock->sk;
+       struct irda_sock *self = irda_sk(sk);
+-      struct irda_device_list list;
++      struct irda_device_list list = { 0 };
+       struct irda_device_info *discoveries;
+       struct irda_ias_set *   ias_opt;        /* IAS get/query params */
+       struct ias_object *     ias_obj;        /* Object in IAS */
diff --git a/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch b/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch

new file mode 100644 (file)

index 0000000..098f8c1
--- /dev/null
+++ b/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch
@@ -0,0 +1,49 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Huy Nguyen <huyn@mellanox.com>
+Date: Thu, 17 Aug 2017 18:29:52 +0300
+Subject: net/mlx4_core: Enable 4K UAR if SRIOV module parameter is not enabled
+
+From: Huy Nguyen <huyn@mellanox.com>
+
+
+[ Upstream commit ca3d89a3ebe79367bd41b6b8ba37664478ae2dba ]
+
+enable_4k_uar module parameter was added in patch cited below to
+address the backward compatibility issue in SRIOV when the VM has
+system's PAGE_SIZE uar implementation and the Hypervisor has 4k uar
+implementation.
+
+The above compatibility issue does not exist in the non SRIOV case.
+In this patch, we always enable 4k uar implementation if SRIOV
+is not enabled on mlx4's supported cards.
+
+Fixes: 76e39ccf9c36 ("net/mlx4_core: Fix backward compatibility on VFs")
+Signed-off-by: Huy Nguyen <huyn@mellanox.com>
+Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/main.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/main.c
++++ b/drivers/net/ethernet/mellanox/mlx4/main.c
+@@ -430,7 +430,7 @@ static int mlx4_dev_cap(struct mlx4_dev
+               /* Virtual PCI function needs to determine UAR page size from
+                * firmware. Only master PCI function can set the uar page size
+                */
+-              if (enable_4k_uar)
++              if (enable_4k_uar || !dev->persist->num_vfs)
+                       dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
+               else
+                       dev->uar_page_shift = PAGE_SHIFT;
+@@ -2269,7 +2269,7 @@ static int mlx4_init_hca(struct mlx4_dev
+ 
+               dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
+ 
+-              if (enable_4k_uar) {
++              if (enable_4k_uar || !dev->persist->num_vfs) {
+                       init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
+                                                   PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
+                       init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
diff --git a/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch b/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch

new file mode 100644 (file)

index 0000000..3bd38a8
--- /dev/null
+++ b/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch
@@ -0,0 +1,54 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 18 Aug 2017 11:01:36 +0800
+Subject: net: sched: fix NULL pointer dereference when action calls some targets
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 4f8a881acc9d1adaf1e552349a0b1df28933a04c ]
+
+As we know in some target's checkentry it may dereference par.entryinfo
+to check entry stuff inside. But when sched action calls xt_check_target,
+par.entryinfo is set with NULL. It would cause kernel panic when calling
+some targets.
+
+It can be reproduce with:
+  # tc qd add dev eth1 ingress handle ffff:
+  # tc filter add dev eth1 parent ffff: u32 match u32 0 0 action xt \
+    -j ECN --ecn-tcp-remove
+
+It could also crash kernel when using target CLUSTERIP or TPROXY.
+
+By now there's no proper value for par.entryinfo in ipt_init_target,
+but it can not be set with NULL. This patch is to void all these
+panics by setting it with an ipt_entry obj with all members = 0.
+
+Note that this issue has been there since the very beginning.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_ipt.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/act_ipt.c
++++ b/net/sched/act_ipt.c
+@@ -41,6 +41,7 @@ static int ipt_init_target(struct xt_ent
+ {
+       struct xt_tgchk_param par;
+       struct xt_target *target;
++      struct ipt_entry e = {};
+       int ret = 0;
+ 
+       target = xt_request_find_target(AF_INET, t->u.user.name,
+@@ -51,6 +52,7 @@ static int ipt_init_target(struct xt_ent
+       t->u.kernel.target = target;
+       memset(&par, 0, sizeof(par));
+       par.table     = table;
++      par.entryinfo = &e;
+       par.target    = target;
+       par.targinfo  = t->data;
+       par.hook_mask = hook;
diff --git a/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch b/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch

new file mode 100644 (file)

index 0000000..9ee2d6e
--- /dev/null
+++ b/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch
@@ -0,0 +1,41 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Sat, 19 Aug 2017 15:37:07 +0300
+Subject: net_sched: fix order of queue length updates in qdisc_replace()
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+
+[ Upstream commit 68a66d149a8c78ec6720f268597302883e48e9fa ]
+
+This important to call qdisc_tree_reduce_backlog() after changing queue
+length. Parent qdisc should deactivate class in ->qlen_notify() called from
+qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero.
+
+Missed class deactivations leads to crashes/warnings at picking packets
+from empty qdisc and corrupting state at reactivating this class in future.
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper")
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sch_generic.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -768,8 +768,11 @@ static inline struct Qdisc *qdisc_replac
+       old = *pold;
+       *pold = new;
+       if (old != NULL) {
+-              qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog);
++              unsigned int qlen = old->q.qlen;
++              unsigned int backlog = old->qstats.backlog;
++
+               qdisc_reset(old);
++              qdisc_tree_reduce_backlog(old, qlen, backlog);
+       }
+       sch_tree_unlock(sch);
+ 
diff --git a/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch b/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch

new file mode 100644 (file)

index 0000000..83e4b15
--- /dev/null
+++ b/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Tue, 15 Aug 2017 16:39:05 +0300
+Subject: net_sched: remove warning from qdisc_hash_add
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+
+[ Upstream commit c90e95147c27b1780e76c6e8fea1b5c78d7d387f ]
+
+It was added in commit e57a784d8cae ("pkt_sched: set root qdisc
+before change() in attach_default_qdiscs()") to hide duplicates
+from "tc qdisc show" for incative deivices.
+
+After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable")
+it triggered when classful qdisc is added to inactive device because
+default qdiscs are added before switching root qdisc.
+
+Anyway after commit ea3274695353 ("net: sched: avoid duplicates in
+qdisc dump") duplicates are filtered right in dumper.
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c |    3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -277,9 +277,6 @@ static struct Qdisc *qdisc_match_from_ro
+ void qdisc_hash_add(struct Qdisc *q)
+ {
+       if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+-              struct Qdisc *root = qdisc_dev(q)->qdisc;
+-
+-              WARN_ON_ONCE(root == &noop_qdisc);
+               ASSERT_RTNL();
+               hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
+       }
diff --git a/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch b/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch

new file mode 100644 (file)

index 0000000..effa7ba
--- /dev/null
+++ b/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Tue, 15 Aug 2017 16:37:04 +0300
+Subject: net_sched/sfq: update hierarchical backlog when drop packet
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+
+[ Upstream commit 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 ]
+
+When sfq_enqueue() drops head packet or packet from another queue it
+have to update backlog at upper qdiscs too.
+
+Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_sfq.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_sfq.c
++++ b/net/sched/sch_sfq.c
+@@ -434,6 +434,7 @@ congestion_drop:
+               qdisc_drop(head, sch, to_free);
+ 
+               slot_queue_add(slot, skb);
++              qdisc_tree_reduce_backlog(sch, 0, delta);
+               return NET_XMIT_CN;
+       }
+ 
+@@ -465,8 +466,10 @@ enqueue:
+       /* Return Congestion Notification only if we dropped a packet
+        * from this flow.
+        */
+-      if (qlen != slot->qlen)
++      if (qlen != slot->qlen) {
++              qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
+               return NET_XMIT_CN;
++      }
+ 
+       /* As we dropped a packet, better let upper stack know this */
+       qdisc_tree_reduce_backlog(sch, 1, dropped);
diff --git a/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch b/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch

new file mode 100644 (file)

index 0000000..0935a7e
--- /dev/null
+++ b/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Colin Ian King <colin.king@canonical.com>
+Date: Fri, 18 Aug 2017 12:11:50 +0100
+Subject: nfp: fix infinite loop on umapping cleanup
+
+From: Colin Ian King <colin.king@canonical.com>
+
+
+[ Upstream commit eac2c68d663effb077210218788952b5a0c1f60e ]
+
+The while loop that performs the dma page unmapping never decrements
+index counter f and hence loops forever. Fix this with a pre-decrement
+on f.
+
+Detected by CoverityScan, CID#1357309 ("Infinite loop")
+
+Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/netronome/nfp/nfp_net_common.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+@@ -871,8 +871,7 @@ static int nfp_net_tx(struct sk_buff *sk
+       return NETDEV_TX_OK;
+ 
+ err_unmap:
+-      --f;
+-      while (f >= 0) {
++      while (--f >= 0) {
+               frag = &skb_shinfo(skb)->frags[f];
+               dma_unmap_page(&nn->pdev->dev,
+                              tx_ring->txbufs[wr_idx].dma_addr,
diff --git a/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch b/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch

new file mode 100644 (file)

index 0000000..033e90e
--- /dev/null
+++ b/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch
@@ -0,0 +1,125 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Wed, 16 Aug 2017 13:30:07 +0800
+Subject: openvswitch: fix skb_panic due to the incorrect actions attrlen
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 494bea39f3201776cdfddc232705f54a0bd210c4 ]
+
+For sw_flow_actions, the actions_len only represents the kernel part's
+size, and when we dump the actions to the userspace, we will do the
+convertions, so it's true size may become bigger than the actions_len.
+
+But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len
+to alloc the skbuff, so the user_skb's size may become insufficient and
+oops will happen like this:
+  skbuff: skb_over_panic: text:ffffffff8148fabf len:1749 put:157 head:
+  ffff881300f39000 data:ffff881300f39000 tail:0x6d5 end:0x6c0 dev:<NULL>
+  ------------[ cut here ]------------
+  kernel BUG at net/core/skbuff.c:129!
+  [...]
+  Call Trace:
+   <IRQ>
+   [<ffffffff8148be82>] skb_put+0x43/0x44
+   [<ffffffff8148fabf>] skb_zerocopy+0x6c/0x1f4
+   [<ffffffffa0290d36>] queue_userspace_packet+0x3a3/0x448 [openvswitch]
+   [<ffffffffa0292023>] ovs_dp_upcall+0x30/0x5c [openvswitch]
+   [<ffffffffa028d435>] output_userspace+0x132/0x158 [openvswitch]
+   [<ffffffffa01e6890>] ? ip6_rcv_finish+0x74/0x77 [ipv6]
+   [<ffffffffa028e277>] do_execute_actions+0xcc1/0xdc8 [openvswitch]
+   [<ffffffffa028e3f2>] ovs_execute_actions+0x74/0x106 [openvswitch]
+   [<ffffffffa0292130>] ovs_dp_process_packet+0xe1/0xfd [openvswitch]
+   [<ffffffffa0292b77>] ? key_extract+0x63c/0x8d5 [openvswitch]
+   [<ffffffffa029848b>] ovs_vport_receive+0xa1/0xc3 [openvswitch]
+  [...]
+
+Also we can find that the actions_len is much little than the orig_len:
+  crash> struct sw_flow_actions 0xffff8812f539d000
+  struct sw_flow_actions {
+    rcu = {
+      next = 0xffff8812f5398800,
+      func = 0xffffe3b00035db32
+    },
+    orig_len = 1384,
+    actions_len = 592,
+    actions = 0xffff8812f539d01c
+  }
+
+So as a quick fix, use the orig_len instead of the actions_len to alloc
+the user_skb.
+
+Last, this oops happened on our system running a relative old kernel, but
+the same risk still exists on the mainline, since we use the wrong
+actions_len from the beginning.
+
+Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet upcall to userspace")
+Cc: Neil McKee <neil.mckee@inmon.com>
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/actions.c  |    1 +
+ net/openvswitch/datapath.c |    7 ++++---
+ net/openvswitch/datapath.h |    2 ++
+ 3 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -1240,6 +1240,7 @@ int ovs_execute_actions(struct datapath
+               goto out;
+       }
+ 
++      OVS_CB(skb)->acts_origlen = acts->orig_len;
+       err = do_execute_actions(dp, skb, key,
+                                acts->actions, acts->actions_len);
+ 
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -383,7 +383,7 @@ static int queue_gso_packets(struct data
+ }
+ 
+ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
+-                            unsigned int hdrlen)
++                            unsigned int hdrlen, int actions_attrlen)
+ {
+       size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+               + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+@@ -400,7 +400,7 @@ static size_t upcall_msg_size(const stru
+ 
+       /* OVS_PACKET_ATTR_ACTIONS */
+       if (upcall_info->actions_len)
+-              size += nla_total_size(upcall_info->actions_len);
++              size += nla_total_size(actions_attrlen);
+ 
+       /* OVS_PACKET_ATTR_MRU */
+       if (upcall_info->mru)
+@@ -467,7 +467,8 @@ static int queue_userspace_packet(struct
+       else
+               hlen = skb->len;
+ 
+-      len = upcall_msg_size(upcall_info, hlen - cutlen);
++      len = upcall_msg_size(upcall_info, hlen - cutlen,
++                            OVS_CB(skb)->acts_origlen);
+       user_skb = genlmsg_new(len, GFP_ATOMIC);
+       if (!user_skb) {
+               err = -ENOMEM;
+--- a/net/openvswitch/datapath.h
++++ b/net/openvswitch/datapath.h
+@@ -100,12 +100,14 @@ struct datapath {
+  * @input_vport: The original vport packet came in on. This value is cached
+  * when a packet is received by OVS.
+  * @mru: The maximum received fragement size; 0 if the packet is not
++ * @acts_origlen: The netlink size of the flow actions applied to this skb.
+  * @cutlen: The number of bytes from the packet end to be removed.
+  * fragmented.
+  */
+ struct ovs_skb_cb {
+       struct vport            *input_vport;
+       u16                     mru;
++      u16                     acts_origlen;
+       u32                     cutlen;
+ };
+ #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/queue-4.9/ptr_ring-use-kmalloc_array.patch b/queue-4.9/ptr_ring-use-kmalloc_array.patch

new file mode 100644 (file)

index 0000000..a1d66b8
--- /dev/null
+++ b/queue-4.9/ptr_ring-use-kmalloc_array.patch
@@ -0,0 +1,73 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 16 Aug 2017 10:36:47 -0700
+Subject: ptr_ring: use kmalloc_array()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 ]
+
+As found by syzkaller, malicious users can set whatever tx_queue_len
+on a tun device and eventually crash the kernel.
+
+Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small
+ring buffer is not fast anyway.
+
+Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/ptr_ring.h  |    9 +++++----
+ include/linux/skb_array.h |    3 ++-
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/include/linux/ptr_ring.h
++++ b/include/linux/ptr_ring.h
+@@ -340,9 +340,9 @@ static inline void *ptr_ring_consume_bh(
+       __PTR_RING_PEEK_CALL_v; \
+ })
+ 
+-static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp)
++static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
+ {
+-      return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp);
++      return kcalloc(size, sizeof(void *), gfp);
+ }
+ 
+ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+@@ -417,7 +417,8 @@ static inline int ptr_ring_resize(struct
+  * In particular if you consume ring in interrupt or BH context, you must
+  * disable interrupts/BH when doing so.
+  */
+-static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings,
++static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
++                                         unsigned int nrings,
+                                          int size,
+                                          gfp_t gfp, void (*destroy)(void *))
+ {
+@@ -425,7 +426,7 @@ static inline int ptr_ring_resize_multip
+       void ***queues;
+       int i;
+ 
+-      queues = kmalloc(nrings * sizeof *queues, gfp);
++      queues = kmalloc_array(nrings, sizeof(*queues), gfp);
+       if (!queues)
+               goto noqueues;
+ 
+--- a/include/linux/skb_array.h
++++ b/include/linux/skb_array.h
+@@ -162,7 +162,8 @@ static inline int skb_array_resize(struc
+ }
+ 
+ static inline int skb_array_resize_multiple(struct skb_array **rings,
+-                                          int nrings, int size, gfp_t gfp)
++                                          int nrings, unsigned int size,
++                                          gfp_t gfp)
+ {
+       BUILD_BUG_ON(offsetof(struct skb_array, ring));
+       return ptr_ring_resize_multiple((struct ptr_ring **)rings,
diff --git a/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch b/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch

new file mode 100644 (file)

index 0000000..d10fa1a
--- /dev/null
+++ b/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch
@@ -0,0 +1,114 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Wed, 16 Aug 2017 20:16:40 +0200
+Subject: sctp: fully initialize the IPv6 address in sctp_v6_to_addr()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 15339e441ec46fbc3bf3486bb1ae4845b0f1bb8d ]
+
+KMSAN reported use of uninitialized sctp_addr->v4.sin_addr.s_addr and
+sctp_addr->v6.sin6_scope_id in sctp_v6_cmp_addr() (see below).
+Make sure all fields of an IPv6 address are initialized, which
+guarantees that the IPv4 fields are also initialized.
+
+==================================================================
+ BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0
+ net/sctp/ipv6.c:517
+ CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+ 01/01/2011
+ Call Trace:
+  dump_stack+0x172/0x1c0 lib/dump_stack.c:42
+  is_logbuf_locked mm/kmsan/kmsan.c:59 [inline]
+  kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938
+  native_save_fl arch/x86/include/asm/irqflags.h:18 [inline]
+  arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline]
+  arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline]
+  __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467
+  sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517
+  sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290
+  sctp_transport_route+0x101/0x570 net/sctp/transport.c:292
+  sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651
+  sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871
+  inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+  sock_sendmsg_nosec net/socket.c:633 [inline]
+  sock_sendmsg net/socket.c:643 [inline]
+  SYSC_sendto+0x608/0x710 net/socket.c:1696
+  SyS_sendto+0x8a/0xb0 net/socket.c:1664
+  entry_SYSCALL_64_fastpath+0x13/0x94
+ RIP: 0033:0x44b479
+ RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479
+ RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006
+ RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c
+ R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff
+ R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000
+ origin description: ----dst_saddr@sctp_v6_get_dst
+ local variable created at:
+  sk_fullsock include/net/sock.h:2321 [inline]
+  inet6_sk include/linux/ipv6.h:309 [inline]
+  sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241
+  sctp_transport_route+0x101/0x570 net/sctp/transport.c:292
+==================================================================
+ BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0
+ net/sctp/ipv6.c:517
+ CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+ 01/01/2011
+ Call Trace:
+  dump_stack+0x172/0x1c0 lib/dump_stack.c:42
+  is_logbuf_locked mm/kmsan/kmsan.c:59 [inline]
+  kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938
+  native_save_fl arch/x86/include/asm/irqflags.h:18 [inline]
+  arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline]
+  arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline]
+  __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467
+  sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517
+  sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290
+  sctp_transport_route+0x101/0x570 net/sctp/transport.c:292
+  sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651
+  sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871
+  inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+  sock_sendmsg_nosec net/socket.c:633 [inline]
+  sock_sendmsg net/socket.c:643 [inline]
+  SYSC_sendto+0x608/0x710 net/socket.c:1696
+  SyS_sendto+0x8a/0xb0 net/socket.c:1664
+  entry_SYSCALL_64_fastpath+0x13/0x94
+ RIP: 0033:0x44b479
+ RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479
+ RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006
+ RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c
+ R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff
+ R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000
+ origin description: ----dst_saddr@sctp_v6_get_dst
+ local variable created at:
+  sk_fullsock include/net/sock.h:2321 [inline]
+  inet6_sk include/linux/ipv6.h:309 [inline]
+  sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241
+  sctp_transport_route+0x101/0x570 net/sctp/transport.c:292
+==================================================================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_a
+ {
+       addr->sa.sa_family = AF_INET6;
+       addr->v6.sin6_port = port;
++      addr->v6.sin6_flowinfo = 0;
+       addr->v6.sin6_addr = *saddr;
++      addr->v6.sin6_scope_id = 0;
+ }
+ 
+ /* Compare addresses exactly.
diff --git a/queue-4.9/series b/queue-4.9/series

index ea3d43eaaebe4c23d59862f1c538964c89ed0bdf..281f050a4b2286d7ca9a9ce0df8ebae79651b709 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -1 +1,26 @@
  sparc64-remove-unnecessary-log-message.patch
+af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch
+dccp-purge-write-queue-in-dccp_destroy_sock.patch
+dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch
+ipv4-fix-null-dereference-in-free_fib_info_rcu.patch
+net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch
+net_sched-remove-warning-from-qdisc_hash_add.patch
+bpf-fix-bpf_trace_printk-on-32-bit-archs.patch
+openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch
+ptr_ring-use-kmalloc_array.patch
+ipv4-better-ip_max_mtu-enforcement.patch
+nfp-fix-infinite-loop-on-umapping-cleanup.patch
+sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch
+tipc-fix-use-after-free.patch
+ipv6-reset-fn-rr_ptr-when-replacing-route.patch
+ipv6-repair-fib6-tree-in-failure-case.patch
+tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch
+net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch
+irda-do-not-leak-initialized-list.dev-to-userspace.patch
+net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch
+net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch
+bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch
+bpf-adjust-verifier-heuristics.patch
+bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch
+bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch
+bpf-verifier-fix-min-max-handling-in-bpf_sub.patch
diff --git a/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch b/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch

new file mode 100644 (file)

index 0000000..bce614b
--- /dev/null
+++ b/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 16 Aug 2017 17:53:36 -0400
+Subject: tcp: when rearming RTO, if RTO time is in past then fire RTO ASAP
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit cdbeb633ca71a02b7b63bfeb94994bf4e1a0b894 ]
+
+In some situations tcp_send_loss_probe() can realize that it's unable
+to send a loss probe (TLP), and falls back to calling tcp_rearm_rto()
+to schedule an RTO timer. In such cases, sometimes tcp_rearm_rto()
+realizes that the RTO was eligible to fire immediately or at some
+point in the past (delta_us <= 0). Previously in such cases
+tcp_rearm_rto() was scheduling such "overdue" RTOs to happen at now +
+icsk_rto, which caused needless delays of hundreds of milliseconds
+(and non-linear behavior that made reproducible testing
+difficult). This commit changes the logic to schedule "overdue" RTOs
+ASAP, rather than at now + icsk_rto.
+
+Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
+Suggested-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3036,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk)
+                       /* delta may not be positive if the socket is locked
+                        * when the retrans timer fires and is rescheduled.
+                        */
+-                      if (delta > 0)
+-                              rto = delta;
++                      delta = max(delta, 1);
+               }
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
+                                         TCP_RTO_MAX);
diff --git a/queue-4.9/tipc-fix-use-after-free.patch b/queue-4.9/tipc-fix-use-after-free.patch

new file mode 100644 (file)

index 0000000..5cef1ef
--- /dev/null
+++ b/queue-4.9/tipc-fix-use-after-free.patch
@@ -0,0 +1,169 @@
+From foo@baz Thu Aug 24 17:44:02 PDT 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 16 Aug 2017 09:41:54 -0700
+Subject: tipc: fix use-after-free
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 5bfd37b4de5c98e86b12bd13be5aa46c7484a125 ]
+
+syszkaller reported use-after-free in tipc [1]
+
+When msg->rep skb is freed, set the pointer to NULL,
+so that caller does not free it again.
+
+[1]
+
+==================================================================
+BUG: KASAN: use-after-free in skb_push+0xd4/0xe0 net/core/skbuff.c:1466
+Read of size 8 at addr ffff8801c6e71e90 by task syz-executor5/4115
+
+CPU: 1 PID: 4115 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #32
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x24e/0x340 mm/kasan/report.c:409
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430
+ skb_push+0xd4/0xe0 net/core/skbuff.c:1466
+ tipc_nl_compat_recv+0x833/0x18f0 net/tipc/netlink_compat.c:1209
+ genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598
+ genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623
+ netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397
+ genl_rcv+0x28/0x40 net/netlink/genetlink.c:634
+ netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline]
+ netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291
+ netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ sock_write_iter+0x31a/0x5d0 net/socket.c:898
+ call_write_iter include/linux/fs.h:1743 [inline]
+ new_sync_write fs/read_write.c:457 [inline]
+ __vfs_write+0x684/0x970 fs/read_write.c:470
+ vfs_write+0x189/0x510 fs/read_write.c:518
+ SYSC_write fs/read_write.c:565 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:557
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x4512e9
+RSP: 002b:00007f3bc8184c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004512e9
+RDX: 0000000000000020 RSI: 0000000020fdb000 RDI: 0000000000000006
+RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b5e76
+R13: 00007f3bc8184b48 R14: 00000000004b5e86 R15: 0000000000000000
+
+Allocated by task 4115:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489
+ kmem_cache_alloc_node+0x13d/0x750 mm/slab.c:3651
+ __alloc_skb+0xf1/0x740 net/core/skbuff.c:219
+ alloc_skb include/linux/skbuff.h:903 [inline]
+ tipc_tlv_alloc+0x26/0xb0 net/tipc/netlink_compat.c:148
+ tipc_nl_compat_dumpit+0xf2/0x3c0 net/tipc/netlink_compat.c:248
+ tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline]
+ tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199
+ genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598
+ genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623
+ netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397
+ genl_rcv+0x28/0x40 net/netlink/genetlink.c:634
+ netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline]
+ netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291
+ netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ sock_write_iter+0x31a/0x5d0 net/socket.c:898
+ call_write_iter include/linux/fs.h:1743 [inline]
+ new_sync_write fs/read_write.c:457 [inline]
+ __vfs_write+0x684/0x970 fs/read_write.c:470
+ vfs_write+0x189/0x510 fs/read_write.c:518
+ SYSC_write fs/read_write.c:565 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:557
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 4115:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kmem_cache_free+0x77/0x280 mm/slab.c:3763
+ kfree_skbmem+0x1a1/0x1d0 net/core/skbuff.c:622
+ __kfree_skb net/core/skbuff.c:682 [inline]
+ kfree_skb+0x165/0x4c0 net/core/skbuff.c:699
+ tipc_nl_compat_dumpit+0x36a/0x3c0 net/tipc/netlink_compat.c:260
+ tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline]
+ tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199
+ genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598
+ genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623
+ netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397
+ genl_rcv+0x28/0x40 net/netlink/genetlink.c:634
+ netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline]
+ netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291
+ netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ sock_write_iter+0x31a/0x5d0 net/socket.c:898
+ call_write_iter include/linux/fs.h:1743 [inline]
+ new_sync_write fs/read_write.c:457 [inline]
+ __vfs_write+0x684/0x970 fs/read_write.c:470
+ vfs_write+0x189/0x510 fs/read_write.c:518
+ SYSC_write fs/read_write.c:565 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:557
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+The buggy address belongs to the object at ffff8801c6e71dc0
+ which belongs to the cache skbuff_head_cache of size 224
+The buggy address is located 208 bytes inside of
+ 224-byte region [ffff8801c6e71dc0, ffff8801c6e71ea0)
+The buggy address belongs to the page:
+page:ffffea00071b9c40 count:1 mapcount:0 mapping:ffff8801c6e71000 index:0x0
+flags: 0x200000000000100(slab)
+raw: 0200000000000100 ffff8801c6e71000 0000000000000000 000000010000000c
+raw: ffffea0007224a20 ffff8801d98caf48 ffff8801d9e79040 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff8801c6e71d80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+ ffff8801c6e71e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8801c6e71e80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc
+                         ^
+ ffff8801c6e71f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8801c6e71f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+==================================================================
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov  <dvyukov@google.com>
+Cc: Jon Maloy <jon.maloy@ericsson.com>
+Cc: Ying Xue <ying.xue@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/netlink_compat.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/tipc/netlink_compat.c
++++ b/net/tipc/netlink_compat.c
+@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct
+       arg = nlmsg_new(0, GFP_KERNEL);
+       if (!arg) {
+               kfree_skb(msg->rep);
++              msg->rep = NULL;
+               return -ENOMEM;
+       }
+ 
+       err = __tipc_nl_compat_dumpit(cmd, msg, arg);
+-      if (err)
++      if (err) {
+               kfree_skb(msg->rep);
+-
++              msg->rep = NULL;
++      }
+       kfree_skb(arg);
+ 
+       return err;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 25 Aug 2017 00:45:34 +0000 (17:45 -0700)
queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-adjust-verifier-heuristics.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ptr_ring-use-kmalloc_array.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history
queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/tipc-fix-use-after-free.patch	[new file with mode: 0644]	patch \| blob