From c08ed6f7b12c0cc3195d4cfe62f57be1556d1111 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 24 Aug 2017 17:45:34 -0700 Subject: [PATCH] 4.9-stable patches added patches: af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch bpf-adjust-verifier-heuristics.patch bpf-fix-bpf_trace_printk-on-32-bit-archs.patch bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch bpf-verifier-fix-min-max-handling-in-bpf_sub.patch dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch dccp-purge-write-queue-in-dccp_destroy_sock.patch ipv4-better-ip_max_mtu-enforcement.patch ipv4-fix-null-dereference-in-free_fib_info_rcu.patch ipv6-repair-fib6-tree-in-failure-case.patch ipv6-reset-fn-rr_ptr-when-replacing-route.patch irda-do-not-leak-initialized-list.dev-to-userspace.patch net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch net_sched-remove-warning-from-qdisc_hash_add.patch net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch nfp-fix-infinite-loop-on-umapping-cleanup.patch openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch ptr_ring-use-kmalloc_array.patch sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch tipc-fix-use-after-free.patch --- ...ot-use-gfp_kernel-in-atomic-contexts.patch | 268 ++++++++++ .../bpf-adjust-verifier-heuristics.patch | 101 ++++ ...fix-bpf_trace_printk-on-32-bit-archs.patch | 90 ++++ ...nsigned-derived-min-max-value-bounds.patch | 458 ++++++++++++++++++ ...nal-patterns-to-evaluate_reg_imm_alu.patch | 102 ++++ ...gainst-map_value-_adj-register-types.patch | 104 ++++ ...fier-fix-min-max-handling-in-bpf_sub.patch | 61 +++ ...-ccid_hc_tx_delete-at-dismantle-time.patch | 204 ++++++++ ...rge-write-queue-in-dccp_destroy_sock.patch | 76 +++ .../ipv4-better-ip_max_mtu-enforcement.patch | 61 +++ ...ull-dereference-in-free_fib_info_rcu.patch | 59 +++ ...pv6-repair-fib6-tree-in-failure-case.patch | 140 ++++++ ...reset-fn-rr_ptr-when-replacing-route.patch | 82 ++++ ...ak-initialized-list.dev-to-userspace.patch | 35 ++ ...riov-module-parameter-is-not-enabled.patch | 49 ++ ...rence-when-action-calls-some-targets.patch | 54 +++ ...ueue-length-updates-in-qdisc_replace.patch | 41 ++ ...d-remove-warning-from-qdisc_hash_add.patch | 40 ++ ...ierarchical-backlog-when-drop-packet.patch | 44 ++ ...ix-infinite-loop-on-umapping-cleanup.patch | 37 ++ ...due-to-the-incorrect-actions-attrlen.patch | 125 +++++ queue-4.9/ptr_ring-use-kmalloc_array.patch | 73 +++ ...-the-ipv6-address-in-sctp_v6_to_addr.patch | 114 +++++ queue-4.9/series | 25 + ...o-time-is-in-past-then-fire-rto-asap.patch | 44 ++ queue-4.9/tipc-fix-use-after-free.patch | 169 +++++++ 26 files changed, 2656 insertions(+) create mode 100644 queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch create mode 100644 queue-4.9/bpf-adjust-verifier-heuristics.patch create mode 100644 queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch create mode 100644 queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch create mode 100644 queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch create mode 100644 queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch create mode 100644 queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch create mode 100644 queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch create mode 100644 queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch create mode 100644 queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch create mode 100644 queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch create mode 100644 queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch create mode 100644 queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch create mode 100644 queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch create mode 100644 queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch create mode 100644 queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch create mode 100644 queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch create mode 100644 queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch create mode 100644 queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch create mode 100644 queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch create mode 100644 queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch create mode 100644 queue-4.9/ptr_ring-use-kmalloc_array.patch create mode 100644 queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch create mode 100644 queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch create mode 100644 queue-4.9/tipc-fix-use-after-free.patch diff --git a/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch b/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch new file mode 100644 index 00000000000..420e69f012a --- /dev/null +++ b/queue-4.9/af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch @@ -0,0 +1,268 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Mon, 14 Aug 2017 10:16:45 -0700 +Subject: af_key: do not use GFP_KERNEL in atomic contexts + +From: Eric Dumazet + + +[ Upstream commit 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b ] + +pfkey_broadcast() might be called from non process contexts, +we can not use GFP_KERNEL in these cases [1]. + +This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in +af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock() +section. + +[1] : syzkaller reported : + +in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439 +3 locks held by syzkaller183439/2932: + #0: (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649 + #1: (&pfk->dump_lock){+.+.+.}, at: [] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293 + #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] spin_lock_bh include/linux/spinlock.h:304 [inline] + #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028 +CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994 + __might_sleep+0x95/0x190 kernel/sched/core.c:5947 + slab_pre_alloc_hook mm/slab.h:416 [inline] + slab_alloc mm/slab.c:3383 [inline] + kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559 + skb_clone+0x1a0/0x400 net/core/skbuff.c:1037 + pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207 + pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281 + dump_sp+0x3d6/0x500 net/key/af_key.c:2685 + xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042 + pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695 + pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299 + pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722 + pfkey_process+0x606/0x710 net/key/af_key.c:2814 + pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650 +sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + ___sys_sendmsg+0x755/0x890 net/socket.c:2035 + __sys_sendmsg+0xe5/0x210 net/socket.c:2069 + SYSC_sendmsg net/socket.c:2080 [inline] + SyS_sendmsg+0x2d/0x50 net/socket.c:2076 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x445d79 +RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79 +RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008 +RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700 +R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000 +R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000 + +Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Cc: David Ahern +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/key/af_key.c | 48 ++++++++++++++++++++++++++---------------------- + 1 file changed, 26 insertions(+), 22 deletions(-) + +--- a/net/key/af_key.c ++++ b/net/key/af_key.c +@@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk + #define BROADCAST_ONE 1 + #define BROADCAST_REGISTERED 2 + #define BROADCAST_PROMISC_ONLY 4 +-static int pfkey_broadcast(struct sk_buff *skb, ++static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, + int broadcast_flags, struct sock *one_sk, + struct net *net) + { +@@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buf + rcu_read_unlock(); + + if (one_sk != NULL) +- err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); ++ err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); + + kfree_skb(skb2); + kfree_skb(skb); +@@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_so + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; +- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, ++ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk, sock_net(&pfk->sk)); + pfk->dump.skb = NULL; + } +@@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / + sizeof(uint64_t)); + +- pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); ++ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); + + return 0; + } +@@ -1396,7 +1396,7 @@ static int pfkey_getspi(struct sock *sk, + + xfrm_state_put(x); + +- pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); ++ pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); + + return 0; + } +@@ -1483,7 +1483,7 @@ static int key_notify_sa(struct xfrm_sta + hdr->sadb_msg_seq = c->seq; + hdr->sadb_msg_pid = c->portid; + +- pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); ++ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); + + return 0; + } +@@ -1596,7 +1596,7 @@ static int pfkey_get(struct sock *sk, st + out_hdr->sadb_msg_reserved = 0; + out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; + out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; +- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); ++ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); + + return 0; + } +@@ -1701,8 +1701,8 @@ static int pfkey_register(struct sock *s + return -ENOBUFS; + } + +- pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); +- ++ pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, ++ sock_net(sk)); + return 0; + } + +@@ -1720,7 +1720,8 @@ static int unicast_flush_resp(struct soc + hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); + +- return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); ++ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, ++ sock_net(sk)); + } + + static int key_notify_sa_flush(const struct km_event *c) +@@ -1741,7 +1742,7 @@ static int key_notify_sa_flush(const str + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); + hdr->sadb_msg_reserved = 0; + +- pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); ++ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); + + return 0; + } +@@ -1798,7 +1799,7 @@ static int dump_sa(struct xfrm_state *x, + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; + + if (pfk->dump.skb) +- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, ++ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk, sock_net(&pfk->sk)); + pfk->dump.skb = out_skb; + +@@ -1886,7 +1887,7 @@ static int pfkey_promisc(struct sock *sk + new_hdr->sadb_msg_errno = 0; + } + +- pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); ++ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); + return 0; + } + +@@ -2219,7 +2220,7 @@ static int key_notify_policy(struct xfrm + out_hdr->sadb_msg_errno = 0; + out_hdr->sadb_msg_seq = c->seq; + out_hdr->sadb_msg_pid = c->portid; +- pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); ++ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); + return 0; + + } +@@ -2439,7 +2440,7 @@ static int key_pol_get_resp(struct sock + out_hdr->sadb_msg_errno = 0; + out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; + out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; +- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); ++ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); + err = 0; + + out: +@@ -2695,7 +2696,7 @@ static int dump_sp(struct xfrm_policy *x + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; + + if (pfk->dump.skb) +- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, ++ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk, sock_net(&pfk->sk)); + pfk->dump.skb = out_skb; + +@@ -2752,7 +2753,7 @@ static int key_notify_policy_flush(const + hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); + hdr->sadb_msg_reserved = 0; +- pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); ++ pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); + return 0; + + } +@@ -2814,7 +2815,7 @@ static int pfkey_process(struct sock *sk + void *ext_hdrs[SADB_EXT_MAX]; + int err; + +- pfkey_broadcast(skb_clone(skb, GFP_KERNEL), ++ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); + + memset(ext_hdrs, 0, sizeof(ext_hdrs)); +@@ -3036,7 +3037,8 @@ static int key_notify_sa_expire(struct x + out_hdr->sadb_msg_seq = 0; + out_hdr->sadb_msg_pid = 0; + +- pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); ++ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, ++ xs_net(x)); + return 0; + } + +@@ -3226,7 +3228,8 @@ static int pfkey_send_acquire(struct xfr + xfrm_ctx->ctx_len); + } + +- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); ++ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, ++ xs_net(x)); + } + + static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, +@@ -3424,7 +3427,8 @@ static int pfkey_send_new_mapping(struct + n_port->sadb_x_nat_t_port_port = sport; + n_port->sadb_x_nat_t_port_reserved = 0; + +- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); ++ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, ++ xs_net(x)); + } + + #ifdef CONFIG_NET_KEY_MIGRATE +@@ -3616,7 +3620,7 @@ static int pfkey_send_migrate(const stru + } + + /* broadcast migrate message to sockets */ +- pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); ++ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); + + return 0; + diff --git a/queue-4.9/bpf-adjust-verifier-heuristics.patch b/queue-4.9/bpf-adjust-verifier-heuristics.patch new file mode 100644 index 00000000000..086b063c0fb --- /dev/null +++ b/queue-4.9/bpf-adjust-verifier-heuristics.patch @@ -0,0 +1,101 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Daniel Borkmann +Date: Thu, 18 May 2017 03:00:06 +0200 +Subject: bpf: adjust verifier heuristics + +From: Daniel Borkmann + + +[ Upstream commit 3c2ce60bdd3d57051bf85615deec04a694473840 ] + +Current limits with regards to processing program paths do not +really reflect today's needs anymore due to programs becoming +more complex and verifier smarter, keeping track of more data +such as const ALU operations, alignment tracking, spilling of +PTR_TO_MAP_VALUE_ADJ registers, and other features allowing for +smarter matching of what LLVM generates. + +This also comes with the side-effect that we result in fewer +opportunities to prune search states and thus often need to do +more work to prove safety than in the past due to different +register states and stack layout where we mismatch. Generally, +it's quite hard to determine what caused a sudden increase in +complexity, it could be caused by something as trivial as a +single branch somewhere at the beginning of the program where +LLVM assigned a stack slot that is marked differently throughout +other branches and thus causing a mismatch, where verifier +then needs to prove safety for the whole rest of the program. +Subsequently, programs with even less than half the insn size +limit can get rejected. We noticed that while some programs +load fine under pre 4.11, they get rejected due to hitting +limits on more recent kernels. We saw that in the vast majority +of cases (90+%) pruning failed due to register mismatches. In +case of stack mismatches, majority of cases failed due to +different stack slot types (invalid, spill, misc) rather than +differences in spilled registers. + +This patch makes pruning more aggressive by also adding markers +that sit at conditional jumps as well. Currently, we only mark +jump targets for pruning. For example in direct packet access, +these are usually error paths where we bail out. We found that +adding these markers, it can reduce number of processed insns +by up to 30%. Another option is to ignore reg->id in probing +PTR_TO_MAP_VALUE_OR_NULL registers, which can help pruning +slightly as well by up to 7% observed complexity reduction as +stand-alone. Meaning, if a previous path with register type +PTR_TO_MAP_VALUE_OR_NULL for map X was found to be safe, then +in the current state a PTR_TO_MAP_VALUE_OR_NULL register for +the same map X must be safe as well. Last but not least the +patch also adds a scheduling point and bumps the current limit +for instructions to be processed to a more adequate value. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -139,7 +139,7 @@ struct bpf_verifier_stack_elem { + struct bpf_verifier_stack_elem *next; + }; + +-#define BPF_COMPLEXITY_LIMIT_INSNS 65536 ++#define BPF_COMPLEXITY_LIMIT_INSNS 98304 + #define BPF_COMPLEXITY_LIMIT_STACK 1024 + + struct bpf_call_arg_meta { +@@ -2452,6 +2452,7 @@ peek_stack: + env->explored_states[t + 1] = STATE_LIST_MARK; + } else { + /* conditional jump with two edges */ ++ env->explored_states[t] = STATE_LIST_MARK; + ret = push_insn(t, t + 1, FALLTHROUGH, env); + if (ret == 1) + goto peek_stack; +@@ -2610,6 +2611,12 @@ static bool states_equal(struct bpf_veri + rcur->type != NOT_INIT)) + continue; + ++ /* Don't care about the reg->id in this case. */ ++ if (rold->type == PTR_TO_MAP_VALUE_OR_NULL && ++ rcur->type == PTR_TO_MAP_VALUE_OR_NULL && ++ rold->map_ptr == rcur->map_ptr) ++ continue; ++ + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && + compare_ptrs_to_packet(rold, rcur)) + continue; +@@ -2744,6 +2751,9 @@ static int do_check(struct bpf_verifier_ + goto process_bpf_exit; + } + ++ if (need_resched()) ++ cond_resched(); ++ + if (log_level && do_print_state) { + verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); + print_verifier_state(&env->cur_state); diff --git a/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch b/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch new file mode 100644 index 00000000000..798b8d37504 --- /dev/null +++ b/queue-4.9/bpf-fix-bpf_trace_printk-on-32-bit-archs.patch @@ -0,0 +1,90 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Daniel Borkmann +Date: Wed, 16 Aug 2017 01:45:33 +0200 +Subject: bpf: fix bpf_trace_printk on 32 bit archs + +From: Daniel Borkmann + + +[ Upstream commit 88a5c690b66110ad255380d8f629c629cf6ca559 ] + +James reported that on MIPS32 bpf_trace_printk() is currently +broken while MIPS64 works fine: + + bpf_trace_printk() uses conditional operators to attempt to + pass different types to __trace_printk() depending on the + format operators. This doesn't work as intended on 32-bit + architectures where u32 and long are passed differently to + u64, since the result of C conditional operators follows the + "usual arithmetic conversions" rules, such that the values + passed to __trace_printk() will always be u64 [causing issues + later in the va_list handling for vscnprintf()]. + + For example the samples/bpf/tracex5 test printed lines like + below on MIPS32, where the fd and buf have come from the u64 + fd argument, and the size from the buf argument: + + [...] 1180.941542: 0x00000001: write(fd=1, buf= (null), size=6258688) + + Instead of this: + + [...] 1625.616026: 0x00000001: write(fd=1, buf=009e4000, size=512) + +One way to get it working is to expand various combinations +of argument types into 8 different combinations for 32 bit +and 64 bit kernels. Fix tested by James on MIPS32 and MIPS64 +as well that it resolves the issue. + +Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") +Reported-by: James Hogan +Tested-by: James Hogan +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -203,10 +203,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt + fmt_cnt++; + } + +- return __trace_printk(1/* fake ip will not be printed */, fmt, +- mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, +- mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, +- mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); ++/* Horrid workaround for getting va_list handling working with different ++ * argument type combinations generically for 32 and 64 bit archs. ++ */ ++#define __BPF_TP_EMIT() __BPF_ARG3_TP() ++#define __BPF_TP(...) \ ++ __trace_printk(1 /* Fake ip will not be printed. */, \ ++ fmt, ##__VA_ARGS__) ++ ++#define __BPF_ARG1_TP(...) \ ++ ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ ++ ? __BPF_TP(arg1, ##__VA_ARGS__) \ ++ : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ ++ ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ ++ : __BPF_TP((u32)arg1, ##__VA_ARGS__))) ++ ++#define __BPF_ARG2_TP(...) \ ++ ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ ++ ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ ++ : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ ++ ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ ++ : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) ++ ++#define __BPF_ARG3_TP(...) \ ++ ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ ++ ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ ++ : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ ++ ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ ++ : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) ++ ++ return __BPF_TP_EMIT(); + } + + static const struct bpf_func_proto bpf_trace_printk_proto = { diff --git a/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch b/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch new file mode 100644 index 00000000000..fc53dd916de --- /dev/null +++ b/queue-4.9/bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch @@ -0,0 +1,458 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Daniel Borkmann +Date: Fri, 21 Jul 2017 00:00:21 +0200 +Subject: bpf: fix mixed signed/unsigned derived min/max value bounds + +From: Daniel Borkmann + + +[ Upstream commit 4cabc5b186b5427b9ee5a7495172542af105f02b ] + +Edward reported that there's an issue in min/max value bounds +tracking when signed and unsigned compares both provide hints +on limits when having unknown variables. E.g. a program such +as the following should have been rejected: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff8a94cda93400 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+7 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = -1 + 10: (2d) if r1 > r2 goto pc+3 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 11: (65) if r1 s> 0x1 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 12: (0f) r0 += r1 + 13: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 + R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 14: (b7) r0 = 0 + 15: (95) exit + +What happens is that in the first part ... + + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = -1 + 10: (2d) if r1 > r2 goto pc+3 + +... r1 carries an unsigned value, and is compared as unsigned +against a register carrying an immediate. Verifier deduces in +reg_set_min_max() that since the compare is unsigned and operation +is greater than (>), that in the fall-through/false case, r1's +minimum bound must be 0 and maximum bound must be r2. Latter is +larger than the bound and thus max value is reset back to being +'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now +'R1=inv,min_value=0'. The subsequent test ... + + 11: (65) if r1 s> 0x1 goto pc+2 + +... is a signed compare of r1 with immediate value 1. Here, +verifier deduces in reg_set_min_max() that since the compare +is signed this time and operation is greater than (>), that +in the fall-through/false case, we can deduce that r1's maximum +bound must be 1, meaning with prior test, we result in r1 having +the following state: R1=inv,min_value=0,max_value=1. Given that +the actual value this holds is -8, the bounds are wrongly deduced. +When this is being added to r0 which holds the map_value(_adj) +type, then subsequent store access in above case will go through +check_mem_access() which invokes check_map_access_adj(), that +will then probe whether the map memory is in bounds based +on the min_value and max_value as well as access size since +the actual unknown value is min_value <= x <= max_value; commit +fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, +_adj} register types") provides some more explanation on the +semantics. + +It's worth to note in this context that in the current code, +min_value and max_value tracking are used for two things, i) +dynamic map value access via check_map_access_adj() and since +commit 06c1c049721a ("bpf: allow helpers access to variable memory") +ii) also enforced at check_helper_mem_access() when passing a +memory address (pointer to packet, map value, stack) and length +pair to a helper and the length in this case is an unknown value +defining an access range through min_value/max_value in that +case. The min_value/max_value tracking is /not/ used in the +direct packet access case to track ranges. However, the issue +also affects case ii), for example, the following crafted program +based on the same principle must be rejected as well: + + 0: (b7) r2 = 0 + 1: (bf) r3 = r10 + 2: (07) r3 += -512 + 3: (7a) *(u64 *)(r10 -16) = -8 + 4: (79) r4 = *(u64 *)(r10 -16) + 5: (b7) r6 = -1 + 6: (2d) if r4 > r6 goto pc+5 + R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 + R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp + 7: (65) if r4 s> 0x1 goto pc+4 + R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 + R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 + R10=fp + 8: (07) r4 += 1 + 9: (b7) r5 = 0 + 10: (6a) *(u16 *)(r10 -512) = 0 + 11: (85) call bpf_skb_load_bytes#26 + 12: (b7) r0 = 0 + 13: (95) exit + +Meaning, while we initialize the max_value stack slot that the +verifier thinks we access in the [1,2] range, in reality we +pass -7 as length which is interpreted as u32 in the helper. +Thus, this issue is relevant also for the case of helper ranges. +Resetting both bounds in check_reg_overflow() in case only one +of them exceeds limits is also not enough as similar test can be +created that uses values which are within range, thus also here +learned min value in r1 is incorrect when mixed with later signed +test to create a range: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff880ad081fa00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+7 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = 2 + 10: (3d) if r2 >= r1 goto pc+3 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 11: (65) if r1 s> 0x4 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 + R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 12: (0f) r0 += r1 + 13: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 + R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 14: (b7) r0 = 0 + 15: (95) exit + +This leaves us with two options for fixing this: i) to invalidate +all prior learned information once we switch signed context, ii) +to track min/max signed and unsigned boundaries separately as +done in [0]. (Given latter introduces major changes throughout +the whole verifier, it's rather net-next material, thus this +patch follows option i), meaning we can derive bounds either +from only signed tests or only unsigned tests.) There is still the +case of adjust_reg_min_max_vals(), where we adjust bounds on ALU +operations, meaning programs like the following where boundaries +on the reg get mixed in context later on when bounds are merged +on the dst reg must get rejected, too: + + 0: (7a) *(u64 *)(r10 -8) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -8 + 3: (18) r1 = 0xffff89b2bf87ce00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+6 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp + 7: (7a) *(u64 *)(r10 -16) = -8 + 8: (79) r1 = *(u64 *)(r10 -16) + 9: (b7) r2 = 2 + 10: (3d) if r2 >= r1 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp + 11: (b7) r7 = 1 + 12: (65) if r7 s> 0x0 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp + 13: (b7) r0 = 0 + 14: (95) exit + + from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 + R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp + 15: (0f) r7 += r1 + 16: (65) if r7 s> 0x4 goto pc+2 + R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp + 17: (0f) r0 += r7 + 18: (72) *(u8 *)(r0 +0) = 0 + R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 + R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp + 19: (b7) r0 = 0 + 20: (95) exit + +Meaning, in adjust_reg_min_max_vals() we must also reset range +values on the dst when src/dst registers have mixed signed/ +unsigned derived min/max value bounds with one unbounded value +as otherwise they can be added together deducing false boundaries. +Once both boundaries are established from either ALU ops or +compare operations w/o mixing signed/unsigned insns, then they +can safely be added to other regs also having both boundaries +established. Adding regs with one unbounded side to a map value +where the bounded side has been learned w/o mixing ops is +possible, but the resulting map value won't recover from that, +meaning such op is considered invalid on the time of actual +access. Invalid bounds are set on the dst reg in case i) src reg, +or ii) in case dst reg already had them. The only way to recover +would be to perform i) ALU ops but only 'add' is allowed on map +value types or ii) comparisons, but these are disallowed on +pointers in case they span a range. This is fine as only BPF_JEQ +and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers +which potentially turn them into PTR_TO_MAP_VALUE type depending +on the branch, so only here min/max value cannot be invalidated +for them. + +In terms of state pruning, value_from_signed is considered +as well in states_equal() when dealing with adjusted map values. +With regards to breaking existing programs, there is a small +risk, but use-cases are rather quite narrow where this could +occur and mixing compares probably unlikely. + +Joint work with Josef and Edward. + + [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html + +Fixes: 484611357c19 ("bpf: allow access into map value arrays") +Reported-by: Edward Cree +Signed-off-by: Daniel Borkmann +Signed-off-by: Edward Cree +Signed-off-by: Josef Bacik +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 110 +++++++++++++++++++++++++++++++++++++------ + 2 files changed, 97 insertions(+), 14 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -40,6 +40,7 @@ struct bpf_reg_state { + */ + s64 min_value; + u64 max_value; ++ bool value_from_signed; + }; + + enum bpf_stack_slot_type { +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -682,12 +682,13 @@ static int check_ctx_access(struct bpf_v + return -EACCES; + } + +-static bool is_pointer_value(struct bpf_verifier_env *env, int regno) ++static bool __is_pointer_value(bool allow_ptr_leaks, ++ const struct bpf_reg_state *reg) + { +- if (env->allow_ptr_leaks) ++ if (allow_ptr_leaks) + return false; + +- switch (env->cur_state.regs[regno].type) { ++ switch (reg->type) { + case UNKNOWN_VALUE: + case CONST_IMM: + return false; +@@ -696,6 +697,11 @@ static bool is_pointer_value(struct bpf_ + } + } + ++static bool is_pointer_value(struct bpf_verifier_env *env, int regno) ++{ ++ return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); ++} ++ + static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) + { +@@ -1592,10 +1598,24 @@ static void adjust_reg_min_max_vals(stru + } + + /* We don't know anything about what was done to this register, mark it +- * as unknown. ++ * as unknown. Also, if both derived bounds came from signed/unsigned ++ * mixed compares and one side is unbounded, we cannot really do anything ++ * with them as boundaries cannot be trusted. Thus, arithmetic of two ++ * regs of such kind will get invalidated bounds on the dst side. + */ +- if (min_val == BPF_REGISTER_MIN_RANGE && +- max_val == BPF_REGISTER_MAX_RANGE) { ++ if ((min_val == BPF_REGISTER_MIN_RANGE && ++ max_val == BPF_REGISTER_MAX_RANGE) || ++ (BPF_SRC(insn->code) == BPF_X && ++ ((min_val != BPF_REGISTER_MIN_RANGE && ++ max_val == BPF_REGISTER_MAX_RANGE) || ++ (min_val == BPF_REGISTER_MIN_RANGE && ++ max_val != BPF_REGISTER_MAX_RANGE) || ++ (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && ++ dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || ++ (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && ++ dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && ++ regs[insn->dst_reg].value_from_signed != ++ regs[insn->src_reg].value_from_signed)) { + reset_reg_range_values(regs, insn->dst_reg); + return; + } +@@ -1939,38 +1959,63 @@ static void reg_set_min_max(struct bpf_r + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) + { ++ bool value_from_signed = true; ++ bool is_range = true; ++ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; ++ is_range = false; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; ++ is_range = false; + break; + case BPF_JGT: +- /* Unsigned comparison, the minimum value is 0. */ +- false_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGT: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGT) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ false_reg->min_value = 0; ++ } + /* If this is false then we know the maximum val is val, + * otherwise we know the min val is val+1. + */ + false_reg->max_value = val; ++ false_reg->value_from_signed = value_from_signed; + true_reg->min_value = val + 1; ++ true_reg->value_from_signed = value_from_signed; + break; + case BPF_JGE: +- /* Unsigned comparison, the minimum value is 0. */ +- false_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGE: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGE) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ false_reg->min_value = 0; ++ } + /* If this is false then we know the maximum value is val - 1, + * otherwise we know the mimimum value is val. + */ + false_reg->max_value = val - 1; ++ false_reg->value_from_signed = value_from_signed; + true_reg->min_value = val; ++ true_reg->value_from_signed = value_from_signed; + break; + default: + break; +@@ -1978,6 +2023,12 @@ static void reg_set_min_max(struct bpf_r + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); ++ if (is_range) { ++ if (__is_pointer_value(false, false_reg)) ++ reset_reg_range_values(false_reg, 0); ++ if (__is_pointer_value(false, true_reg)) ++ reset_reg_range_values(true_reg, 0); ++ } + } + + /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg +@@ -1987,39 +2038,64 @@ static void reg_set_min_max_inv(struct b + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) + { ++ bool value_from_signed = true; ++ bool is_range = true; ++ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; ++ is_range = false; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; ++ is_range = false; + break; + case BPF_JGT: +- /* Unsigned comparison, the minimum value is 0. */ +- true_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGT: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGT) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ true_reg->min_value = 0; ++ } + /* + * If this is false, then the val is <= the register, if it is + * true the register <= to the val. + */ + false_reg->min_value = val; ++ false_reg->value_from_signed = value_from_signed; + true_reg->max_value = val - 1; ++ true_reg->value_from_signed = value_from_signed; + break; + case BPF_JGE: +- /* Unsigned comparison, the minimum value is 0. */ +- true_reg->min_value = 0; ++ value_from_signed = false; ++ /* fallthrough */ + case BPF_JSGE: ++ if (true_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(true_reg, 0); ++ if (false_reg->value_from_signed != value_from_signed) ++ reset_reg_range_values(false_reg, 0); ++ if (opcode == BPF_JGE) { ++ /* Unsigned comparison, the minimum value is 0. */ ++ true_reg->min_value = 0; ++ } + /* If this is false then constant < register, if it is true then + * the register < constant. + */ + false_reg->min_value = val + 1; ++ false_reg->value_from_signed = value_from_signed; + true_reg->max_value = val; ++ true_reg->value_from_signed = value_from_signed; + break; + default: + break; +@@ -2027,6 +2103,12 @@ static void reg_set_min_max_inv(struct b + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); ++ if (is_range) { ++ if (__is_pointer_value(false, false_reg)) ++ reset_reg_range_values(false_reg, 0); ++ if (__is_pointer_value(false, true_reg)) ++ reset_reg_range_values(true_reg, 0); ++ } + } + + static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, diff --git a/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch b/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch new file mode 100644 index 00000000000..d8afcaaa01c --- /dev/null +++ b/queue-4.9/bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch @@ -0,0 +1,102 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: John Fastabend +Date: Sun, 2 Jul 2017 02:13:30 +0200 +Subject: bpf, verifier: add additional patterns to evaluate_reg_imm_alu + +From: John Fastabend + + +[ Upstream commit 43188702b3d98d2792969a3377a30957f05695e6 ] + +Currently the verifier does not track imm across alu operations when +the source register is of unknown type. This adds additional pattern +matching to catch this and track imm. We've seen LLVM generating this +pattern while working on cilium. + +Signed-off-by: John Fastabend +Acked-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 62 insertions(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1467,6 +1467,65 @@ static int evaluate_reg_alu(struct bpf_v + return 0; + } + ++static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env, ++ struct bpf_insn *insn) ++{ ++ struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; ++ struct bpf_reg_state *src_reg = ®s[insn->src_reg]; ++ u8 opcode = BPF_OP(insn->code); ++ s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm); ++ ++ /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */ ++ if (src_reg->imm > 0 && dst_reg->imm) { ++ switch (opcode) { ++ case BPF_ADD: ++ /* dreg += sreg ++ * where both have zero upper bits. Adding them ++ * can only result making one more bit non-zero ++ * in the larger value. ++ * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) ++ * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) ++ */ ++ dst_reg->imm = min(src_reg->imm, 63 - imm_log2); ++ dst_reg->imm--; ++ break; ++ case BPF_AND: ++ /* dreg &= sreg ++ * AND can not extend zero bits only shrink ++ * Ex. 0x00..00ffffff ++ * & 0x0f..ffffffff ++ * ---------------- ++ * 0x00..00ffffff ++ */ ++ dst_reg->imm = max(src_reg->imm, 63 - imm_log2); ++ break; ++ case BPF_OR: ++ /* dreg |= sreg ++ * OR can only extend zero bits ++ * Ex. 0x00..00ffffff ++ * | 0x0f..ffffffff ++ * ---------------- ++ * 0x0f..00ffffff ++ */ ++ dst_reg->imm = min(src_reg->imm, 63 - imm_log2); ++ break; ++ case BPF_SUB: ++ case BPF_MUL: ++ case BPF_RSH: ++ case BPF_LSH: ++ /* These may be flushed out later */ ++ default: ++ mark_reg_unknown_value(regs, insn->dst_reg); ++ } ++ } else { ++ mark_reg_unknown_value(regs, insn->dst_reg); ++ } ++ ++ dst_reg->type = UNKNOWN_VALUE; ++ return 0; ++} ++ + static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) + { +@@ -1475,6 +1534,9 @@ static int evaluate_reg_imm_alu(struct b + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + u8 opcode = BPF_OP(insn->code); + ++ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE) ++ return evaluate_reg_imm_alu_unknown(env, insn); ++ + /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. + * Don't care about overflow or negative values, just add them + */ diff --git a/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch b/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch new file mode 100644 index 00000000000..d15b60b97bb --- /dev/null +++ b/queue-4.9/bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch @@ -0,0 +1,104 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Daniel Borkmann +Date: Fri, 31 Mar 2017 02:24:02 +0200 +Subject: bpf, verifier: fix alu ops against map_value{, _adj} register types + +From: Daniel Borkmann + + +[ Upstream commit fce366a9dd0ddc47e7ce05611c266e8574a45116 ] + +While looking into map_value_adj, I noticed that alu operations +directly on the map_value() resp. map_value_adj() register (any +alu operation on a map_value() register will turn it into a +map_value_adj() typed register) are not sufficiently protected +against some of the operations. Two non-exhaustive examples are +provided that the verifier needs to reject: + + i) BPF_AND on r0 (map_value_adj): + + 0: (bf) r2 = r10 + 1: (07) r2 += -8 + 2: (7a) *(u64 *)(r2 +0) = 0 + 3: (18) r1 = 0xbf842a00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+2 + R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp + 7: (57) r0 &= 8 + 8: (7a) *(u64 *)(r0 +0) = 22 + R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp + 9: (95) exit + + from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp + 9: (95) exit + processed 10 insns + +ii) BPF_ADD in 32 bit mode on r0 (map_value_adj): + + 0: (bf) r2 = r10 + 1: (07) r2 += -8 + 2: (7a) *(u64 *)(r2 +0) = 0 + 3: (18) r1 = 0xc24eee00 + 5: (85) call bpf_map_lookup_elem#1 + 6: (15) if r0 == 0x0 goto pc+2 + R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp + 7: (04) (u32) r0 += (u32) 0 + 8: (7a) *(u64 *)(r0 +0) = 22 + R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp + 9: (95) exit + + from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp + 9: (95) exit + processed 10 insns + +Issue is, while min_value / max_value boundaries for the access +are adjusted appropriately, we change the pointer value in a way +that cannot be sufficiently tracked anymore from its origin. +Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register +that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact, +all the test cases coming with 484611357c19 ("bpf: allow access +into map value arrays") perform BPF_ADD only on the destination +register that is PTR_TO_MAP_VALUE_ADJ. + +Only for UNKNOWN_VALUE register types such operations make sense, +f.e. with unknown memory content fetched initially from a constant +offset from the map value memory into a register. That register is +then later tested against lower / upper bounds, so that the verifier +can then do the tracking of min_value / max_value, and properly +check once that UNKNOWN_VALUE register is added to the destination +register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the +original use-case is solving. Note, tracking on what is being +added is done through adjust_reg_min_max_vals() and later access +to the map value enforced with these boundaries and the given offset +from the insn through check_map_access_adj(). + +Tests will fail for non-root environment due to prohibited pointer +arithmetic, in particular in check_alu_op(), we bail out on the +is_pointer_value() check on the dst_reg (which is false in root +case as we allow for pointer arithmetic via env->allow_ptr_leaks). + +Similarly to PTR_TO_PACKET, one way to fix it is to restrict the +allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit +mode BPF_ADD. The test_verifier suite runs fine after the patch +and it also rejects mentioned test cases. + +Fixes: 484611357c19 ("bpf: allow access into map value arrays") +Signed-off-by: Daniel Borkmann +Reviewed-by: Josef Bacik +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1870,6 +1870,7 @@ static int check_alu_op(struct bpf_verif + * register as unknown. + */ + if (env->allow_ptr_leaks && ++ BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD && + (dst_reg->type == PTR_TO_MAP_VALUE || + dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) + dst_reg->type = PTR_TO_MAP_VALUE_ADJ; diff --git a/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch b/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch new file mode 100644 index 00000000000..6d23d07d5de --- /dev/null +++ b/queue-4.9/bpf-verifier-fix-min-max-handling-in-bpf_sub.patch @@ -0,0 +1,61 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Edward Cree +Date: Fri, 21 Jul 2017 14:37:34 +0100 +Subject: bpf/verifier: fix min/max handling in BPF_SUB + +From: Edward Cree + + +[ Upstream commit 9305706c2e808ae59f1eb201867f82f1ddf6d7a6 ] + +We have to subtract the src max from the dst min, and vice-versa, since + (e.g.) the smallest result comes from the largest subtrahend. + +Fixes: 484611357c19 ("bpf: allow access into map value arrays") +Signed-off-by: Edward Cree +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1624,10 +1624,12 @@ static void adjust_reg_min_max_vals(stru + * do our normal operations to the register, we need to set the values + * to the min/max since they are undefined. + */ +- if (min_val == BPF_REGISTER_MIN_RANGE) +- dst_reg->min_value = BPF_REGISTER_MIN_RANGE; +- if (max_val == BPF_REGISTER_MAX_RANGE) +- dst_reg->max_value = BPF_REGISTER_MAX_RANGE; ++ if (opcode != BPF_SUB) { ++ if (min_val == BPF_REGISTER_MIN_RANGE) ++ dst_reg->min_value = BPF_REGISTER_MIN_RANGE; ++ if (max_val == BPF_REGISTER_MAX_RANGE) ++ dst_reg->max_value = BPF_REGISTER_MAX_RANGE; ++ } + + switch (opcode) { + case BPF_ADD: +@@ -1637,10 +1639,17 @@ static void adjust_reg_min_max_vals(stru + dst_reg->max_value += max_val; + break; + case BPF_SUB: ++ /* If one of our values was at the end of our ranges, then the ++ * _opposite_ value in the dst_reg goes to the end of our range. ++ */ ++ if (min_val == BPF_REGISTER_MIN_RANGE) ++ dst_reg->max_value = BPF_REGISTER_MAX_RANGE; ++ if (max_val == BPF_REGISTER_MAX_RANGE) ++ dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) +- dst_reg->min_value -= min_val; ++ dst_reg->min_value -= max_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) +- dst_reg->max_value -= max_val; ++ dst_reg->max_value -= min_val; + break; + case BPF_MUL: + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) diff --git a/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch b/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch new file mode 100644 index 00000000000..2135c210c44 --- /dev/null +++ b/queue-4.9/dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch @@ -0,0 +1,204 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Wed, 16 Aug 2017 07:03:15 -0700 +Subject: dccp: defer ccid_hc_tx_delete() at dismantle time + +From: Eric Dumazet + + +[ Upstream commit 120e9dabaf551c6dc03d3a10a1f026376cb1811c ] + +syszkaller team reported another problem in DCCP [1] + +Problem here is that the structure holding RTO timer +(ccid2_hc_tx_rto_expire() handler) is freed too soon. + +We can not use del_timer_sync() to cancel the timer +since this timer wants to grab socket lock (that would risk a dead lock) + +Solution is to defer the freeing of memory when all references to +the socket were released. Socket timers do own a reference, so this +should fix the issue. + +[1] + +================================================================== +BUG: KASAN: use-after-free in ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 +Read of size 4 at addr ffff8801d2660540 by task kworker/u4:7/3365 + +CPU: 1 PID: 3365 Comm: kworker/u4:7 Not tainted 4.13.0-rc4+ #3 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: events_unbound call_usermodehelper_exec_work +Call Trace: + + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x24e/0x340 mm/kasan/report.c:409 + __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 + ccid2_hc_tx_rto_expire+0x51c/0x5c0 net/dccp/ccids/ccid2.c:144 + call_timer_fn+0x233/0x830 kernel/time/timer.c:1268 + expire_timers kernel/time/timer.c:1307 [inline] + __run_timers+0x7fd/0xb90 kernel/time/timer.c:1601 + run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 + __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 + invoke_softirq kernel/softirq.c:364 [inline] + irq_exit+0x1cc/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:638 [inline] + smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:1044 + apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:702 +RIP: 0010:arch_local_irq_enable arch/x86/include/asm/paravirt.h:824 [inline] +RIP: 0010:__raw_write_unlock_irq include/linux/rwlock_api_smp.h:267 [inline] +RIP: 0010:_raw_write_unlock_irq+0x56/0x70 kernel/locking/spinlock.c:343 +RSP: 0018:ffff8801cd50eaa8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10 +RAX: dffffc0000000000 RBX: ffffffff85a090c0 RCX: 0000000000000006 +RDX: 1ffffffff0b595f3 RSI: 1ffff1003962f989 RDI: ffffffff85acaf98 +RBP: ffff8801cd50eab0 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801cc96ea60 +R13: dffffc0000000000 R14: ffff8801cc96e4c0 R15: ffff8801cc96e4c0 + + release_task+0xe9e/0x1a40 kernel/exit.c:220 + wait_task_zombie kernel/exit.c:1162 [inline] + wait_consider_task+0x29b8/0x33c0 kernel/exit.c:1389 + do_wait_thread kernel/exit.c:1452 [inline] + do_wait+0x441/0xa90 kernel/exit.c:1523 + kernel_wait4+0x1f5/0x370 kernel/exit.c:1665 + SYSC_wait4+0x134/0x140 kernel/exit.c:1677 + SyS_wait4+0x2c/0x40 kernel/exit.c:1673 + call_usermodehelper_exec_sync kernel/kmod.c:286 [inline] + call_usermodehelper_exec_work+0x1a0/0x2c0 kernel/kmod.c:323 + process_one_work+0xbf3/0x1bc0 kernel/workqueue.c:2097 + worker_thread+0x223/0x1860 kernel/workqueue.c:2231 + kthread+0x35e/0x430 kernel/kthread.c:231 + ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:425 + +Allocated by task 21267: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 + kmem_cache_alloc+0x127/0x750 mm/slab.c:3561 + ccid_new+0x20e/0x390 net/dccp/ccid.c:151 + dccp_hdlr_ccid+0x27/0x140 net/dccp/feat.c:44 + __dccp_feat_activate+0x142/0x2a0 net/dccp/feat.c:344 + dccp_feat_activate_values+0x34e/0xa90 net/dccp/feat.c:1538 + dccp_rcv_request_sent_state_process net/dccp/input.c:472 [inline] + dccp_rcv_state_process+0xed1/0x1620 net/dccp/input.c:677 + dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 + sk_backlog_rcv include/net/sock.h:911 [inline] + __release_sock+0x124/0x360 net/core/sock.c:2269 + release_sock+0xa4/0x2a0 net/core/sock.c:2784 + inet_wait_for_connect net/ipv4/af_inet.c:557 [inline] + __inet_stream_connect+0x671/0xf00 net/ipv4/af_inet.c:643 + inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:682 + SYSC_connect+0x204/0x470 net/socket.c:1642 + SyS_connect+0x24/0x30 net/socket.c:1623 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Freed by task 3049: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kmem_cache_free+0x77/0x280 mm/slab.c:3763 + ccid_hc_tx_delete+0xc5/0x100 net/dccp/ccid.c:190 + dccp_destroy_sock+0x1d1/0x2b0 net/dccp/proto.c:225 + inet_csk_destroy_sock+0x166/0x3f0 net/ipv4/inet_connection_sock.c:833 + dccp_done+0xb7/0xd0 net/dccp/proto.c:145 + dccp_time_wait+0x13d/0x300 net/dccp/minisocks.c:72 + dccp_rcv_reset+0x1d1/0x5b0 net/dccp/input.c:160 + dccp_rcv_state_process+0x8fc/0x1620 net/dccp/input.c:663 + dccp_v4_do_rcv+0xeb/0x160 net/dccp/ipv4.c:679 + sk_backlog_rcv include/net/sock.h:911 [inline] + __sk_receive_skb+0x33e/0xc00 net/core/sock.c:521 + dccp_v4_rcv+0xef1/0x1c00 net/dccp/ipv4.c:871 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:248 [inline] + ip_local_deliver+0x1ce/0x6d0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:477 [inline] + ip_rcv_finish+0x8db/0x19c0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:248 [inline] + ip_rcv+0xc3f/0x17d0 net/ipv4/ip_input.c:488 + __netif_receive_skb_core+0x19af/0x33d0 net/core/dev.c:4417 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4455 + process_backlog+0x203/0x740 net/core/dev.c:5130 + napi_poll net/core/dev.c:5527 [inline] + net_rx_action+0x792/0x1910 net/core/dev.c:5593 + __do_softirq+0x2f5/0xba3 kernel/softirq.c:284 + +The buggy address belongs to the object at ffff8801d2660100 + which belongs to the cache ccid2_hc_tx_sock of size 1240 +The buggy address is located 1088 bytes inside of + 1240-byte region [ffff8801d2660100, ffff8801d26605d8) +The buggy address belongs to the page: +page:ffffea0007499800 count:1 mapcount:0 mapping:ffff8801d2660100 index:0x0 compound_mapcount: 0 +flags: 0x200000000008100(slab|head) +raw: 0200000000008100 ffff8801d2660100 0000000000000000 0000000100000005 +raw: ffffea00075271a0 ffffea0007538820 ffff8801d3aef9c0 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8801d2660400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8801d2660480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff8801d2660500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8801d2660580: fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc fc + ffff8801d2660600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +================================================================== + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Cc: Gerrit Renker +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/proto.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -24,6 +24,7 @@ + #include + + #include ++#include + #include + #include + +@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int t + + EXPORT_SYMBOL_GPL(dccp_packet_name); + ++static void dccp_sk_destruct(struct sock *sk) ++{ ++ struct dccp_sock *dp = dccp_sk(sk); ++ ++ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); ++ dp->dccps_hc_tx_ccid = NULL; ++ inet_sock_destruct(sk); ++} ++ + int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) + { + struct dccp_sock *dp = dccp_sk(sk); +@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, cons + icsk->icsk_syn_retries = sysctl_dccp_request_retries; + sk->sk_state = DCCP_CLOSED; + sk->sk_write_space = dccp_write_space; ++ sk->sk_destruct = dccp_sk_destruct; + icsk->icsk_sync_mss = dccp_sync_mss; + dp->dccps_mss_cache = 536; + dp->dccps_rate_last = jiffies; +@@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk) + dp->dccps_hc_rx_ackvec = NULL; + } + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); +- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); +- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; ++ dp->dccps_hc_rx_ccid = NULL; + + /* clean up feature negotiation state */ + dccp_feat_list_purge(&dp->dccps_featneg); diff --git a/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch b/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch new file mode 100644 index 00000000000..4fd83160518 --- /dev/null +++ b/queue-4.9/dccp-purge-write-queue-in-dccp_destroy_sock.patch @@ -0,0 +1,76 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Mon, 14 Aug 2017 14:10:25 -0700 +Subject: dccp: purge write queue in dccp_destroy_sock() + +From: Eric Dumazet + + +[ Upstream commit 7749d4ff88d31b0be17c8683143135adaaadc6a7 ] + +syzkaller reported that DCCP could have a non empty +write queue at dismantle time. + +WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + panic+0x1e4/0x417 kernel/panic.c:180 + __warn+0x1c4/0x1d9 kernel/panic.c:541 + report_bug+0x211/0x2d0 lib/bug.c:183 + fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 + do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] + do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 + do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 + do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 + invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 +RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 +RSP: 0018:ffff8801d182f108 EFLAGS: 00010297 +RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280 +RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0 +R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8 + inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835 + dccp_close+0x84d/0xc10 net/dccp/proto.c:1067 + inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 + sock_release+0x8d/0x1e0 net/socket.c:597 + sock_close+0x16/0x20 net/socket.c:1126 + __fput+0x327/0x7e0 fs/file_table.c:210 + ____fput+0x15/0x20 fs/file_table.c:246 + task_work_run+0x18a/0x260 kernel/task_work.c:116 + exit_task_work include/linux/task_work.h:21 [inline] + do_exit+0xa32/0x1b10 kernel/exit.c:865 + do_group_exit+0x149/0x400 kernel/exit.c:969 + get_signal+0x7e8/0x17e0 kernel/signal.c:2330 + do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808 + exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157 + prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] + syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263 + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/proto.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk) + { + struct dccp_sock *dp = dccp_sk(sk); + +- /* +- * DCCP doesn't use sk_write_queue, just sk_send_head +- * for retransmissions +- */ ++ __skb_queue_purge(&sk->sk_write_queue); + if (sk->sk_send_head != NULL) { + kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; diff --git a/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch b/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch new file mode 100644 index 00000000000..d26ac20bf99 --- /dev/null +++ b/queue-4.9/ipv4-better-ip_max_mtu-enforcement.patch @@ -0,0 +1,61 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Wed, 16 Aug 2017 11:09:12 -0700 +Subject: ipv4: better IP_MAX_MTU enforcement + +From: Eric Dumazet + + +[ Upstream commit c780a049f9bf442314335372c9abc4548bfe3e44 ] + +While working on yet another syzkaller report, I found +that our IP_MAX_MTU enforcements were not properly done. + +gcc seems to reload dev->mtu for min(dev->mtu, IP_MAX_MTU), and +final result can be bigger than IP_MAX_MTU :/ + +This is a problem because device mtu can be changed on other cpus or +threads. + +While this patch does not fix the issue I am working on, it is +probably worth addressing it. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 4 ++-- + net/ipv4/route.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -339,7 +339,7 @@ static inline unsigned int ip_dst_mtu_ma + !forwarding) + return dst_mtu(dst); + +- return min(dst->dev->mtu, IP_MAX_MTU); ++ return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); + } + + static inline unsigned int ip_skb_dst_mtu(struct sock *sk, +@@ -351,7 +351,7 @@ static inline unsigned int ip_skb_dst_mt + return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); + } + +- return min(skb_dst(skb)->dev->mtu, IP_MAX_MTU); ++ return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); + } + + u32 ip_idents_reserve(u32 hash, int segs); +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struc + if (mtu) + return mtu; + +- mtu = dst->dev->mtu; ++ mtu = READ_ONCE(dst->dev->mtu); + + if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + if (rt->rt_uses_gateway && mtu > 576) diff --git a/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch b/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch new file mode 100644 index 00000000000..19f9a44a937 --- /dev/null +++ b/queue-4.9/ipv4-fix-null-dereference-in-free_fib_info_rcu.patch @@ -0,0 +1,59 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Tue, 15 Aug 2017 05:26:17 -0700 +Subject: ipv4: fix NULL dereference in free_fib_info_rcu() + +From: Eric Dumazet + + +[ Upstream commit 187e5b3ac84d3421d2de3aca949b2791fbcad554 ] + +If fi->fib_metrics could not be allocated in fib_create_info() +we attempt to dereference a NULL pointer in free_fib_info_rcu() : + + m = fi->fib_metrics; + if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + kfree(m); + +Before my recent patch, we used to call kfree(NULL) and nothing wrong +happened. + +Instead of using RCU to defer freeing while we are under memory stress, +it seems better to take immediate action. + +This was reported by syzkaller team. + +Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1044,15 +1044,17 @@ struct fib_info *fib_create_info(struct + fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + if (!fi) + goto failure; +- fib_info_cnt++; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); +- if (!fi->fib_metrics) +- goto failure; ++ if (unlikely(!fi->fib_metrics)) { ++ kfree(fi); ++ return ERR_PTR(err); ++ } + atomic_set(&fi->fib_metrics->refcnt, 1); +- } else ++ } else { + fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; +- ++ } ++ fib_info_cnt++; + fi->fib_net = net; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_scope = cfg->fc_scope; diff --git a/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch b/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch new file mode 100644 index 00000000000..2e63fd3e1e3 --- /dev/null +++ b/queue-4.9/ipv6-repair-fib6-tree-in-failure-case.patch @@ -0,0 +1,140 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Wei Wang +Date: Fri, 18 Aug 2017 17:14:49 -0700 +Subject: ipv6: repair fib6 tree in failure case + +From: Wei Wang + + +[ Upstream commit 348a4002729ccab8b888b38cbc099efa2f2a2036 ] + +In fib6_add(), it is possible that fib6_add_1() picks an intermediate +node and sets the node's fn->leaf to NULL in order to add this new +route. However, if fib6_add_rt2node() fails to add the new +route for some reason, fn->leaf will be left as NULL and could +potentially cause crash when fn->leaf is accessed in fib6_locate(). +This patch makes sure fib6_repair_tree() is called to properly repair +fn->leaf in the above failure case. + +Here is the syzkaller reported general protection fault in fib6_locate: +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Modules linked in: +CPU: 0 PID: 40937 Comm: syz-executor3 Not tainted +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801d7d64100 ti: ffff8801d01a0000 task.ti: ffff8801d01a0000 +RIP: 0010:[] [] __ipv6_prefix_equal64_half include/net/ipv6.h:475 [inline] +RIP: 0010:[] [] ipv6_prefix_equal include/net/ipv6.h:492 [inline] +RIP: 0010:[] [] fib6_locate_1 net/ipv6/ip6_fib.c:1210 [inline] +RIP: 0010:[] [] fib6_locate+0x281/0x3c0 net/ipv6/ip6_fib.c:1233 +RSP: 0018:ffff8801d01a36a8 EFLAGS: 00010202 +RAX: 0000000000000020 RBX: ffff8801bc790e00 RCX: ffffc90002983000 +RDX: 0000000000001219 RSI: ffff8801d01a37a0 RDI: 0000000000000100 +RBP: ffff8801d01a36f0 R08: 00000000000000ff R09: 0000000000000000 +R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000001 +R13: dffffc0000000000 R14: ffff8801d01a37a0 R15: 0000000000000000 +FS: 00007f6afd68c700(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000004c6340 CR3: 00000000ba41f000 CR4: 00000000001426f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Stack: + ffff8801d01a37a8 ffff8801d01a3780 ffffed003a0346f5 0000000c82a23ea0 + ffff8800b7bd7700 ffff8801d01a3780 ffff8800b6a1c940 ffffffff82a23ea0 + ffff8801d01a3920 ffff8801d01a3748 ffffffff82a223d6 ffff8801d7d64988 +Call Trace: + [] ip6_route_del+0x106/0x570 net/ipv6/route.c:2109 + [] inet6_rtm_delroute+0xfd/0x100 net/ipv6/route.c:3075 + [] rtnetlink_rcv_msg+0x549/0x7a0 net/core/rtnetlink.c:3450 + [] netlink_rcv_skb+0x141/0x370 net/netlink/af_netlink.c:2281 + [] rtnetlink_rcv+0x2f/0x40 net/core/rtnetlink.c:3456 + [] netlink_unicast_kernel net/netlink/af_netlink.c:1206 [inline] + [] netlink_unicast+0x518/0x750 net/netlink/af_netlink.c:1232 + [] netlink_sendmsg+0x8ce/0xc30 net/netlink/af_netlink.c:1778 + [] sock_sendmsg_nosec net/socket.c:609 [inline] + [] sock_sendmsg+0xcf/0x110 net/socket.c:619 + [] sock_write_iter+0x222/0x3a0 net/socket.c:834 + [] new_sync_write+0x1dd/0x2b0 fs/read_write.c:478 + [] __vfs_write+0xe4/0x110 fs/read_write.c:491 + [] vfs_write+0x178/0x4b0 fs/read_write.c:538 + [] SYSC_write fs/read_write.c:585 [inline] + [] SyS_write+0xd9/0x1b0 fs/read_write.c:577 + [] entry_SYSCALL_64_fastpath+0x12/0x17 + +Note: there is no "Fixes" tag as this seems to be a bug introduced +very early. + +Signed-off-by: Wei Wang +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -1001,7 +1001,7 @@ int fib6_add(struct fib6_node *root, str + /* Create subtree root node */ + sfn = node_alloc(); + if (!sfn) +- goto st_failure; ++ goto failure; + + sfn->leaf = info->nl_net->ipv6.ip6_null_entry; + atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); +@@ -1017,12 +1017,12 @@ int fib6_add(struct fib6_node *root, str + + if (IS_ERR(sn)) { + /* If it is failed, discard just allocated +- root, and then (in st_failure) stale node ++ root, and then (in failure) stale node + in main tree. + */ + node_free(sfn); + err = PTR_ERR(sn); +- goto st_failure; ++ goto failure; + } + + /* Now link new subtree to main tree */ +@@ -1036,7 +1036,7 @@ int fib6_add(struct fib6_node *root, str + + if (IS_ERR(sn)) { + err = PTR_ERR(sn); +- goto st_failure; ++ goto failure; + } + } + +@@ -1078,22 +1078,22 @@ out: + atomic_inc(&pn->leaf->rt6i_ref); + } + #endif +- if (!(rt->dst.flags & DST_NOCACHE)) +- dst_free(&rt->dst); ++ goto failure; + } + return err; + +-#ifdef CONFIG_IPV6_SUBTREES +- /* Subtree creation failed, probably main tree node +- is orphan. If it is, shoot it. ++failure: ++ /* fn->leaf could be NULL if fn is an intermediate node and we ++ * failed to add the new route to it in both subtree creation ++ * failure and fib6_add_rt2node() failure case. ++ * In both cases, fib6_repair_tree() should be called to fix ++ * fn->leaf. + */ +-st_failure: + if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) + fib6_repair_tree(info->nl_net, fn); + if (!(rt->dst.flags & DST_NOCACHE)) + dst_free(&rt->dst); + return err; +-#endif + } + + /* diff --git a/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch b/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch new file mode 100644 index 00000000000..8996a36170f --- /dev/null +++ b/queue-4.9/ipv6-reset-fn-rr_ptr-when-replacing-route.patch @@ -0,0 +1,82 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Wei Wang +Date: Wed, 16 Aug 2017 11:18:09 -0700 +Subject: ipv6: reset fn->rr_ptr when replacing route + +From: Wei Wang + + +[ Upstream commit 383143f31d7d3525a1dbff733d52fff917f82f15 ] + +syzcaller reported the following use-after-free issue in rt6_select(): +BUG: KASAN: use-after-free in rt6_select net/ipv6/route.c:755 [inline] at addr ffff8800bc6994e8 +BUG: KASAN: use-after-free in ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 at addr ffff8800bc6994e8 +Read of size 4 by task syz-executor1/439628 +CPU: 0 PID: 439628 Comm: syz-executor1 Not tainted 4.3.5+ #8 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + 0000000000000000 ffff88018fe435b0 ffffffff81ca384d ffff8801d3588c00 + ffff8800bc699380 ffff8800bc699500 dffffc0000000000 ffff8801d40a47c0 + ffff88018fe435d8 ffffffff81735751 ffff88018fe43660 ffff8800bc699380 +Call Trace: + [] __dump_stack lib/dump_stack.c:15 [inline] + [] dump_stack+0xc1/0x124 lib/dump_stack.c:51 +sctp: [Deprecated]: syz-executor0 (pid 439615) Use of struct sctp_assoc_value in delayed_ack socket option. +Use struct sctp_sack_info instead + [] kasan_object_err+0x21/0x70 mm/kasan/report.c:158 + [] print_address_description mm/kasan/report.c:196 [inline] + [] kasan_report_error+0x1b4/0x4a0 mm/kasan/report.c:285 + [] kasan_report mm/kasan/report.c:305 [inline] + [] __asan_report_load4_noabort+0x43/0x50 mm/kasan/report.c:325 + [] rt6_select net/ipv6/route.c:755 [inline] + [] ip6_pol_route.isra.46+0x1429/0x1470 net/ipv6/route.c:1084 + [] ip6_pol_route_output+0x81/0xb0 net/ipv6/route.c:1203 + [] fib6_rule_action+0x1f0/0x680 net/ipv6/fib6_rules.c:95 + [] fib_rules_lookup+0x2a6/0x7a0 net/core/fib_rules.c:223 + [] fib6_rule_lookup+0xd0/0x250 net/ipv6/fib6_rules.c:41 + [] ip6_route_output+0x1d6/0x2c0 net/ipv6/route.c:1224 + [] ip6_dst_lookup_tail+0x4d2/0x890 net/ipv6/ip6_output.c:943 + [] ip6_dst_lookup_flow+0x9a/0x250 net/ipv6/ip6_output.c:1079 + [] ip6_datagram_dst_update+0x538/0xd40 net/ipv6/datagram.c:91 + [] __ip6_datagram_connect net/ipv6/datagram.c:251 [inline] + [] ip6_datagram_connect+0x518/0xe50 net/ipv6/datagram.c:272 + [] ip6_datagram_connect_v6_only+0x63/0x90 net/ipv6/datagram.c:284 + [] inet_dgram_connect+0x170/0x1f0 net/ipv4/af_inet.c:564 + [] SYSC_connect+0x1a7/0x2f0 net/socket.c:1582 + [] SyS_connect+0x29/0x30 net/socket.c:1563 + [] entry_SYSCALL_64_fastpath+0x12/0x17 +Object at ffff8800bc699380, in cache ip6_dst_cache size: 384 + +The root cause of it is that in fib6_add_rt2node(), when it replaces an +existing route with the new one, it does not update fn->rr_ptr. +This commit resets fn->rr_ptr to NULL when it points to a route which is +replaced in fib6_add_rt2node(). + +Fixes: 27596472473a ("ipv6: fix ECMP route replacement") +Signed-off-by: Wei Wang +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_fib.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/ip6_fib.c ++++ b/net/ipv6/ip6_fib.c +@@ -897,6 +897,8 @@ add: + } + nsiblings = iter->rt6i_nsiblings; + fib6_purge_rt(iter, fn, info->nl_net); ++ if (fn->rr_ptr == iter) ++ fn->rr_ptr = NULL; + rt6_release(iter); + + if (nsiblings) { +@@ -909,6 +911,8 @@ add: + if (rt6_qualify_for_ecmp(iter)) { + *ins = iter->dst.rt6_next; + fib6_purge_rt(iter, fn, info->nl_net); ++ if (fn->rr_ptr == iter) ++ fn->rr_ptr = NULL; + rt6_release(iter); + nsiblings--; + } else { diff --git a/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch b/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch new file mode 100644 index 00000000000..842177c4e25 --- /dev/null +++ b/queue-4.9/irda-do-not-leak-initialized-list.dev-to-userspace.patch @@ -0,0 +1,35 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Colin Ian King +Date: Thu, 17 Aug 2017 23:14:58 +0100 +Subject: irda: do not leak initialized list.dev to userspace + +From: Colin Ian King + + +[ Upstream commit b024d949a3c24255a7ef1a470420eb478949aa4c ] + +list.dev has not been initialized and so the copy_to_user is copying +data from the stack back to user space which is a potential +information leak. Fix this ensuring all of list is initialized to +zero. + +Detected by CoverityScan, CID#1357894 ("Uninitialized scalar variable") + +Signed-off-by: Colin Ian King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/irda/af_irda.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/irda/af_irda.c ++++ b/net/irda/af_irda.c +@@ -2223,7 +2223,7 @@ static int irda_getsockopt(struct socket + { + struct sock *sk = sock->sk; + struct irda_sock *self = irda_sk(sk); +- struct irda_device_list list; ++ struct irda_device_list list = { 0 }; + struct irda_device_info *discoveries; + struct irda_ias_set * ias_opt; /* IAS get/query params */ + struct ias_object * ias_obj; /* Object in IAS */ diff --git a/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch b/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch new file mode 100644 index 00000000000..098f8c159be --- /dev/null +++ b/queue-4.9/net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Huy Nguyen +Date: Thu, 17 Aug 2017 18:29:52 +0300 +Subject: net/mlx4_core: Enable 4K UAR if SRIOV module parameter is not enabled + +From: Huy Nguyen + + +[ Upstream commit ca3d89a3ebe79367bd41b6b8ba37664478ae2dba ] + +enable_4k_uar module parameter was added in patch cited below to +address the backward compatibility issue in SRIOV when the VM has +system's PAGE_SIZE uar implementation and the Hypervisor has 4k uar +implementation. + +The above compatibility issue does not exist in the non SRIOV case. +In this patch, we always enable 4k uar implementation if SRIOV +is not enabled on mlx4's supported cards. + +Fixes: 76e39ccf9c36 ("net/mlx4_core: Fix backward compatibility on VFs") +Signed-off-by: Huy Nguyen +Reviewed-by: Daniel Jurgens +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/main.c ++++ b/drivers/net/ethernet/mellanox/mlx4/main.c +@@ -430,7 +430,7 @@ static int mlx4_dev_cap(struct mlx4_dev + /* Virtual PCI function needs to determine UAR page size from + * firmware. Only master PCI function can set the uar page size + */ +- if (enable_4k_uar) ++ if (enable_4k_uar || !dev->persist->num_vfs) + dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT; + else + dev->uar_page_shift = PAGE_SHIFT; +@@ -2269,7 +2269,7 @@ static int mlx4_init_hca(struct mlx4_dev + + dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; + +- if (enable_4k_uar) { ++ if (enable_4k_uar || !dev->persist->num_vfs) { + init_hca.log_uar_sz = ilog2(dev->caps.num_uars) + + PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT; + init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12; diff --git a/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch b/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch new file mode 100644 index 00000000000..3bd38a8820b --- /dev/null +++ b/queue-4.9/net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch @@ -0,0 +1,54 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Xin Long +Date: Fri, 18 Aug 2017 11:01:36 +0800 +Subject: net: sched: fix NULL pointer dereference when action calls some targets + +From: Xin Long + + +[ Upstream commit 4f8a881acc9d1adaf1e552349a0b1df28933a04c ] + +As we know in some target's checkentry it may dereference par.entryinfo +to check entry stuff inside. But when sched action calls xt_check_target, +par.entryinfo is set with NULL. It would cause kernel panic when calling +some targets. + +It can be reproduce with: + # tc qd add dev eth1 ingress handle ffff: + # tc filter add dev eth1 parent ffff: u32 match u32 0 0 action xt \ + -j ECN --ecn-tcp-remove + +It could also crash kernel when using target CLUSTERIP or TPROXY. + +By now there's no proper value for par.entryinfo in ipt_init_target, +but it can not be set with NULL. This patch is to void all these +panics by setting it with an ipt_entry obj with all members = 0. + +Note that this issue has been there since the very beginning. + +Signed-off-by: Xin Long +Acked-by: Pablo Neira Ayuso +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_ipt.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/act_ipt.c ++++ b/net/sched/act_ipt.c +@@ -41,6 +41,7 @@ static int ipt_init_target(struct xt_ent + { + struct xt_tgchk_param par; + struct xt_target *target; ++ struct ipt_entry e = {}; + int ret = 0; + + target = xt_request_find_target(AF_INET, t->u.user.name, +@@ -51,6 +52,7 @@ static int ipt_init_target(struct xt_ent + t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); + par.table = table; ++ par.entryinfo = &e; + par.target = target; + par.targinfo = t->data; + par.hook_mask = hook; diff --git a/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch b/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch new file mode 100644 index 00000000000..9ee2d6e34ca --- /dev/null +++ b/queue-4.9/net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch @@ -0,0 +1,41 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Konstantin Khlebnikov +Date: Sat, 19 Aug 2017 15:37:07 +0300 +Subject: net_sched: fix order of queue length updates in qdisc_replace() + +From: Konstantin Khlebnikov + + +[ Upstream commit 68a66d149a8c78ec6720f268597302883e48e9fa ] + +This important to call qdisc_tree_reduce_backlog() after changing queue +length. Parent qdisc should deactivate class in ->qlen_notify() called from +qdisc_tree_reduce_backlog() but this happens only if qdisc->q.qlen in zero. + +Missed class deactivations leads to crashes/warnings at picking packets +from empty qdisc and corrupting state at reactivating this class in future. + +Signed-off-by: Konstantin Khlebnikov +Fixes: 86a7996cc8a0 ("net_sched: introduce qdisc_replace() helper") +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sch_generic.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -768,8 +768,11 @@ static inline struct Qdisc *qdisc_replac + old = *pold; + *pold = new; + if (old != NULL) { +- qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); ++ unsigned int qlen = old->q.qlen; ++ unsigned int backlog = old->qstats.backlog; ++ + qdisc_reset(old); ++ qdisc_tree_reduce_backlog(old, qlen, backlog); + } + sch_tree_unlock(sch); + diff --git a/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch b/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch new file mode 100644 index 00000000000..83e4b1542dd --- /dev/null +++ b/queue-4.9/net_sched-remove-warning-from-qdisc_hash_add.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Konstantin Khlebnikov +Date: Tue, 15 Aug 2017 16:39:05 +0300 +Subject: net_sched: remove warning from qdisc_hash_add + +From: Konstantin Khlebnikov + + +[ Upstream commit c90e95147c27b1780e76c6e8fea1b5c78d7d387f ] + +It was added in commit e57a784d8cae ("pkt_sched: set root qdisc +before change() in attach_default_qdiscs()") to hide duplicates +from "tc qdisc show" for incative deivices. + +After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") +it triggered when classful qdisc is added to inactive device because +default qdiscs are added before switching root qdisc. + +Anyway after commit ea3274695353 ("net: sched: avoid duplicates in +qdisc dump") duplicates are filtered right in dumper. + +Signed-off-by: Konstantin Khlebnikov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -277,9 +277,6 @@ static struct Qdisc *qdisc_match_from_ro + void qdisc_hash_add(struct Qdisc *q) + { + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { +- struct Qdisc *root = qdisc_dev(q)->qdisc; +- +- WARN_ON_ONCE(root == &noop_qdisc); + ASSERT_RTNL(); + hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); + } diff --git a/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch b/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch new file mode 100644 index 00000000000..effa7badf31 --- /dev/null +++ b/queue-4.9/net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Konstantin Khlebnikov +Date: Tue, 15 Aug 2017 16:37:04 +0300 +Subject: net_sched/sfq: update hierarchical backlog when drop packet + +From: Konstantin Khlebnikov + + +[ Upstream commit 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 ] + +When sfq_enqueue() drops head packet or packet from another queue it +have to update backlog at upper qdiscs too. + +Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") +Signed-off-by: Konstantin Khlebnikov +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_sfq.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -434,6 +434,7 @@ congestion_drop: + qdisc_drop(head, sch, to_free); + + slot_queue_add(slot, skb); ++ qdisc_tree_reduce_backlog(sch, 0, delta); + return NET_XMIT_CN; + } + +@@ -465,8 +466,10 @@ enqueue: + /* Return Congestion Notification only if we dropped a packet + * from this flow. + */ +- if (qlen != slot->qlen) ++ if (qlen != slot->qlen) { ++ qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); + return NET_XMIT_CN; ++ } + + /* As we dropped a packet, better let upper stack know this */ + qdisc_tree_reduce_backlog(sch, 1, dropped); diff --git a/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch b/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch new file mode 100644 index 00000000000..0935a7e74db --- /dev/null +++ b/queue-4.9/nfp-fix-infinite-loop-on-umapping-cleanup.patch @@ -0,0 +1,37 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Colin Ian King +Date: Fri, 18 Aug 2017 12:11:50 +0100 +Subject: nfp: fix infinite loop on umapping cleanup + +From: Colin Ian King + + +[ Upstream commit eac2c68d663effb077210218788952b5a0c1f60e ] + +The while loop that performs the dma page unmapping never decrements +index counter f and hence loops forever. Fix this with a pre-decrement +on f. + +Detected by CoverityScan, CID#1357309 ("Infinite loop") + +Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") +Signed-off-by: Colin Ian King +Acked-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -871,8 +871,7 @@ static int nfp_net_tx(struct sk_buff *sk + return NETDEV_TX_OK; + + err_unmap: +- --f; +- while (f >= 0) { ++ while (--f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[wr_idx].dma_addr, diff --git a/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch b/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch new file mode 100644 index 00000000000..033e90e61e9 --- /dev/null +++ b/queue-4.9/openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch @@ -0,0 +1,125 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Liping Zhang +Date: Wed, 16 Aug 2017 13:30:07 +0800 +Subject: openvswitch: fix skb_panic due to the incorrect actions attrlen + +From: Liping Zhang + + +[ Upstream commit 494bea39f3201776cdfddc232705f54a0bd210c4 ] + +For sw_flow_actions, the actions_len only represents the kernel part's +size, and when we dump the actions to the userspace, we will do the +convertions, so it's true size may become bigger than the actions_len. + +But unfortunately, for OVS_PACKET_ATTR_ACTIONS, we use the actions_len +to alloc the skbuff, so the user_skb's size may become insufficient and +oops will happen like this: + skbuff: skb_over_panic: text:ffffffff8148fabf len:1749 put:157 head: + ffff881300f39000 data:ffff881300f39000 tail:0x6d5 end:0x6c0 dev: + ------------[ cut here ]------------ + kernel BUG at net/core/skbuff.c:129! + [...] + Call Trace: + + [] skb_put+0x43/0x44 + [] skb_zerocopy+0x6c/0x1f4 + [] queue_userspace_packet+0x3a3/0x448 [openvswitch] + [] ovs_dp_upcall+0x30/0x5c [openvswitch] + [] output_userspace+0x132/0x158 [openvswitch] + [] ? ip6_rcv_finish+0x74/0x77 [ipv6] + [] do_execute_actions+0xcc1/0xdc8 [openvswitch] + [] ovs_execute_actions+0x74/0x106 [openvswitch] + [] ovs_dp_process_packet+0xe1/0xfd [openvswitch] + [] ? key_extract+0x63c/0x8d5 [openvswitch] + [] ovs_vport_receive+0xa1/0xc3 [openvswitch] + [...] + +Also we can find that the actions_len is much little than the orig_len: + crash> struct sw_flow_actions 0xffff8812f539d000 + struct sw_flow_actions { + rcu = { + next = 0xffff8812f5398800, + func = 0xffffe3b00035db32 + }, + orig_len = 1384, + actions_len = 592, + actions = 0xffff8812f539d01c + } + +So as a quick fix, use the orig_len instead of the actions_len to alloc +the user_skb. + +Last, this oops happened on our system running a relative old kernel, but +the same risk still exists on the mainline, since we use the wrong +actions_len from the beginning. + +Fixes: ccea74457bbd ("openvswitch: include datapath actions with sampled-packet upcall to userspace") +Cc: Neil McKee +Signed-off-by: Liping Zhang +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/actions.c | 1 + + net/openvswitch/datapath.c | 7 ++++--- + net/openvswitch/datapath.h | 2 ++ + 3 files changed, 7 insertions(+), 3 deletions(-) + +--- a/net/openvswitch/actions.c ++++ b/net/openvswitch/actions.c +@@ -1240,6 +1240,7 @@ int ovs_execute_actions(struct datapath + goto out; + } + ++ OVS_CB(skb)->acts_origlen = acts->orig_len; + err = do_execute_actions(dp, skb, key, + acts->actions, acts->actions_len); + +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -383,7 +383,7 @@ static int queue_gso_packets(struct data + } + + static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, +- unsigned int hdrlen) ++ unsigned int hdrlen, int actions_attrlen) + { + size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ +@@ -400,7 +400,7 @@ static size_t upcall_msg_size(const stru + + /* OVS_PACKET_ATTR_ACTIONS */ + if (upcall_info->actions_len) +- size += nla_total_size(upcall_info->actions_len); ++ size += nla_total_size(actions_attrlen); + + /* OVS_PACKET_ATTR_MRU */ + if (upcall_info->mru) +@@ -467,7 +467,8 @@ static int queue_userspace_packet(struct + else + hlen = skb->len; + +- len = upcall_msg_size(upcall_info, hlen - cutlen); ++ len = upcall_msg_size(upcall_info, hlen - cutlen, ++ OVS_CB(skb)->acts_origlen); + user_skb = genlmsg_new(len, GFP_ATOMIC); + if (!user_skb) { + err = -ENOMEM; +--- a/net/openvswitch/datapath.h ++++ b/net/openvswitch/datapath.h +@@ -100,12 +100,14 @@ struct datapath { + * @input_vport: The original vport packet came in on. This value is cached + * when a packet is received by OVS. + * @mru: The maximum received fragement size; 0 if the packet is not ++ * @acts_origlen: The netlink size of the flow actions applied to this skb. + * @cutlen: The number of bytes from the packet end to be removed. + * fragmented. + */ + struct ovs_skb_cb { + struct vport *input_vport; + u16 mru; ++ u16 acts_origlen; + u32 cutlen; + }; + #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/queue-4.9/ptr_ring-use-kmalloc_array.patch b/queue-4.9/ptr_ring-use-kmalloc_array.patch new file mode 100644 index 00000000000..a1d66b8ecb6 --- /dev/null +++ b/queue-4.9/ptr_ring-use-kmalloc_array.patch @@ -0,0 +1,73 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Wed, 16 Aug 2017 10:36:47 -0700 +Subject: ptr_ring: use kmalloc_array() + +From: Eric Dumazet + + +[ Upstream commit 81fbfe8adaf38d4f5a98c19bebfd41c5d6acaee8 ] + +As found by syzkaller, malicious users can set whatever tx_queue_len +on a tun device and eventually crash the kernel. + +Lets remove the ALIGN(XXX, SMP_CACHE_BYTES) thing since a small +ring buffer is not fast anyway. + +Fixes: 2e0ab8ca83c1 ("ptr_ring: array based FIFO for pointers") +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Cc: Michael S. Tsirkin +Cc: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ptr_ring.h | 9 +++++---- + include/linux/skb_array.h | 3 ++- + 2 files changed, 7 insertions(+), 5 deletions(-) + +--- a/include/linux/ptr_ring.h ++++ b/include/linux/ptr_ring.h +@@ -340,9 +340,9 @@ static inline void *ptr_ring_consume_bh( + __PTR_RING_PEEK_CALL_v; \ + }) + +-static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) ++static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) + { +- return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); ++ return kcalloc(size, sizeof(void *), gfp); + } + + static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +@@ -417,7 +417,8 @@ static inline int ptr_ring_resize(struct + * In particular if you consume ring in interrupt or BH context, you must + * disable interrupts/BH when doing so. + */ +-static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, ++static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, ++ unsigned int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) + { +@@ -425,7 +426,7 @@ static inline int ptr_ring_resize_multip + void ***queues; + int i; + +- queues = kmalloc(nrings * sizeof *queues, gfp); ++ queues = kmalloc_array(nrings, sizeof(*queues), gfp); + if (!queues) + goto noqueues; + +--- a/include/linux/skb_array.h ++++ b/include/linux/skb_array.h +@@ -162,7 +162,8 @@ static inline int skb_array_resize(struc + } + + static inline int skb_array_resize_multiple(struct skb_array **rings, +- int nrings, int size, gfp_t gfp) ++ int nrings, unsigned int size, ++ gfp_t gfp) + { + BUILD_BUG_ON(offsetof(struct skb_array, ring)); + return ptr_ring_resize_multiple((struct ptr_ring **)rings, diff --git a/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch b/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch new file mode 100644 index 00000000000..d10fa1a871a --- /dev/null +++ b/queue-4.9/sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch @@ -0,0 +1,114 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Alexander Potapenko +Date: Wed, 16 Aug 2017 20:16:40 +0200 +Subject: sctp: fully initialize the IPv6 address in sctp_v6_to_addr() + +From: Alexander Potapenko + + +[ Upstream commit 15339e441ec46fbc3bf3486bb1ae4845b0f1bb8d ] + +KMSAN reported use of uninitialized sctp_addr->v4.sin_addr.s_addr and +sctp_addr->v6.sin6_scope_id in sctp_v6_cmp_addr() (see below). +Make sure all fields of an IPv6 address are initialized, which +guarantees that the IPv4 fields are also initialized. + +================================================================== + BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 + net/sctp/ipv6.c:517 + CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs + 01/01/2011 + Call Trace: + dump_stack+0x172/0x1c0 lib/dump_stack.c:42 + is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 + native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] + arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] + arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 + sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 + sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 + sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 + sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 + sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg net/socket.c:643 [inline] + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + entry_SYSCALL_64_fastpath+0x13/0x94 + RIP: 0033:0x44b479 + RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c + RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 + RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 + RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c + R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff + R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 + origin description: ----dst_saddr@sctp_v6_get_dst + local variable created at: + sk_fullsock include/net/sock.h:2321 [inline] + inet6_sk include/linux/ipv6.h:309 [inline] + sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 + sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 +================================================================== + BUG: KMSAN: use of uninitialized memory in sctp_v6_cmp_addr+0x8d4/0x9f0 + net/sctp/ipv6.c:517 + CPU: 2 PID: 31056 Comm: syz-executor1 Not tainted 4.11.0-rc5+ #2944 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs + 01/01/2011 + Call Trace: + dump_stack+0x172/0x1c0 lib/dump_stack.c:42 + is_logbuf_locked mm/kmsan/kmsan.c:59 [inline] + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:938 + native_save_fl arch/x86/include/asm/irqflags.h:18 [inline] + arch_local_save_flags arch/x86/include/asm/irqflags.h:72 [inline] + arch_local_irq_save arch/x86/include/asm/irqflags.h:113 [inline] + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:467 + sctp_v6_cmp_addr+0x8d4/0x9f0 net/sctp/ipv6.c:517 + sctp_v6_get_dst+0x8c7/0x1630 net/sctp/ipv6.c:290 + sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 + sctp_assoc_add_peer+0x66d/0x16f0 net/sctp/associola.c:651 + sctp_sendmsg+0x35a5/0x4f90 net/sctp/socket.c:1871 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg net/socket.c:643 [inline] + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + entry_SYSCALL_64_fastpath+0x13/0x94 + RIP: 0033:0x44b479 + RSP: 002b:00007f6213f21c08 EFLAGS: 00000286 ORIG_RAX: 000000000000002c + RAX: ffffffffffffffda RBX: 0000000020000000 RCX: 000000000044b479 + RDX: 0000000000000041 RSI: 0000000020edd000 RDI: 0000000000000006 + RBP: 00000000007080a8 R08: 0000000020b85fe4 R09: 000000000000001c + R10: 0000000000040005 R11: 0000000000000286 R12: 00000000ffffffff + R13: 0000000000003760 R14: 00000000006e5820 R15: 0000000000ff8000 + origin description: ----dst_saddr@sctp_v6_get_dst + local variable created at: + sk_fullsock include/net/sock.h:2321 [inline] + inet6_sk include/linux/ipv6.h:309 [inline] + sctp_v6_get_dst+0x91/0x1630 net/sctp/ipv6.c:241 + sctp_transport_route+0x101/0x570 net/sctp/transport.c:292 +================================================================== + +Signed-off-by: Alexander Potapenko +Reviewed-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_a + { + addr->sa.sa_family = AF_INET6; + addr->v6.sin6_port = port; ++ addr->v6.sin6_flowinfo = 0; + addr->v6.sin6_addr = *saddr; ++ addr->v6.sin6_scope_id = 0; + } + + /* Compare addresses exactly. diff --git a/queue-4.9/series b/queue-4.9/series index ea3d43eaaeb..281f050a4b2 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -1 +1,26 @@ sparc64-remove-unnecessary-log-message.patch +af_key-do-not-use-gfp_kernel-in-atomic-contexts.patch +dccp-purge-write-queue-in-dccp_destroy_sock.patch +dccp-defer-ccid_hc_tx_delete-at-dismantle-time.patch +ipv4-fix-null-dereference-in-free_fib_info_rcu.patch +net_sched-sfq-update-hierarchical-backlog-when-drop-packet.patch +net_sched-remove-warning-from-qdisc_hash_add.patch +bpf-fix-bpf_trace_printk-on-32-bit-archs.patch +openvswitch-fix-skb_panic-due-to-the-incorrect-actions-attrlen.patch +ptr_ring-use-kmalloc_array.patch +ipv4-better-ip_max_mtu-enforcement.patch +nfp-fix-infinite-loop-on-umapping-cleanup.patch +sctp-fully-initialize-the-ipv6-address-in-sctp_v6_to_addr.patch +tipc-fix-use-after-free.patch +ipv6-reset-fn-rr_ptr-when-replacing-route.patch +ipv6-repair-fib6-tree-in-failure-case.patch +tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch +net-mlx4_core-enable-4k-uar-if-sriov-module-parameter-is-not-enabled.patch +irda-do-not-leak-initialized-list.dev-to-userspace.patch +net-sched-fix-null-pointer-dereference-when-action-calls-some-targets.patch +net_sched-fix-order-of-queue-length-updates-in-qdisc_replace.patch +bpf-verifier-add-additional-patterns-to-evaluate_reg_imm_alu.patch +bpf-adjust-verifier-heuristics.patch +bpf-verifier-fix-alu-ops-against-map_value-_adj-register-types.patch +bpf-fix-mixed-signed-unsigned-derived-min-max-value-bounds.patch +bpf-verifier-fix-min-max-handling-in-bpf_sub.patch diff --git a/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch b/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch new file mode 100644 index 00000000000..bce614bf152 --- /dev/null +++ b/queue-4.9/tcp-when-rearming-rto-if-rto-time-is-in-past-then-fire-rto-asap.patch @@ -0,0 +1,44 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Neal Cardwell +Date: Wed, 16 Aug 2017 17:53:36 -0400 +Subject: tcp: when rearming RTO, if RTO time is in past then fire RTO ASAP + +From: Neal Cardwell + + +[ Upstream commit cdbeb633ca71a02b7b63bfeb94994bf4e1a0b894 ] + +In some situations tcp_send_loss_probe() can realize that it's unable +to send a loss probe (TLP), and falls back to calling tcp_rearm_rto() +to schedule an RTO timer. In such cases, sometimes tcp_rearm_rto() +realizes that the RTO was eligible to fire immediately or at some +point in the past (delta_us <= 0). Previously in such cases +tcp_rearm_rto() was scheduling such "overdue" RTOs to happen at now + +icsk_rto, which caused needless delays of hundreds of milliseconds +(and non-linear behavior that made reproducible testing +difficult). This commit changes the logic to schedule "overdue" RTOs +ASAP, rather than at now + icsk_rto. + +Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") +Suggested-by: Yuchung Cheng +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3036,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk) + /* delta may not be positive if the socket is locked + * when the retrans timer fires and is rescheduled. + */ +- if (delta > 0) +- rto = delta; ++ delta = max(delta, 1); + } + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, + TCP_RTO_MAX); diff --git a/queue-4.9/tipc-fix-use-after-free.patch b/queue-4.9/tipc-fix-use-after-free.patch new file mode 100644 index 00000000000..5cef1ef72a2 --- /dev/null +++ b/queue-4.9/tipc-fix-use-after-free.patch @@ -0,0 +1,169 @@ +From foo@baz Thu Aug 24 17:44:02 PDT 2017 +From: Eric Dumazet +Date: Wed, 16 Aug 2017 09:41:54 -0700 +Subject: tipc: fix use-after-free + +From: Eric Dumazet + + +[ Upstream commit 5bfd37b4de5c98e86b12bd13be5aa46c7484a125 ] + +syszkaller reported use-after-free in tipc [1] + +When msg->rep skb is freed, set the pointer to NULL, +so that caller does not free it again. + +[1] + +================================================================== +BUG: KASAN: use-after-free in skb_push+0xd4/0xe0 net/core/skbuff.c:1466 +Read of size 8 at addr ffff8801c6e71e90 by task syz-executor5/4115 + +CPU: 1 PID: 4115 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #32 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x24e/0x340 mm/kasan/report.c:409 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:430 + skb_push+0xd4/0xe0 net/core/skbuff.c:1466 + tipc_nl_compat_recv+0x833/0x18f0 net/tipc/netlink_compat.c:1209 + genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 + genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 + netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 + netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] + netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 + netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + sock_write_iter+0x31a/0x5d0 net/socket.c:898 + call_write_iter include/linux/fs.h:1743 [inline] + new_sync_write fs/read_write.c:457 [inline] + __vfs_write+0x684/0x970 fs/read_write.c:470 + vfs_write+0x189/0x510 fs/read_write.c:518 + SYSC_write fs/read_write.c:565 [inline] + SyS_write+0xef/0x220 fs/read_write.c:557 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4512e9 +RSP: 002b:00007f3bc8184c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004512e9 +RDX: 0000000000000020 RSI: 0000000020fdb000 RDI: 0000000000000006 +RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b5e76 +R13: 00007f3bc8184b48 R14: 00000000004b5e86 R15: 0000000000000000 + +Allocated by task 4115: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:489 + kmem_cache_alloc_node+0x13d/0x750 mm/slab.c:3651 + __alloc_skb+0xf1/0x740 net/core/skbuff.c:219 + alloc_skb include/linux/skbuff.h:903 [inline] + tipc_tlv_alloc+0x26/0xb0 net/tipc/netlink_compat.c:148 + tipc_nl_compat_dumpit+0xf2/0x3c0 net/tipc/netlink_compat.c:248 + tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] + tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 + genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 + genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 + netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 + netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] + netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 + netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + sock_write_iter+0x31a/0x5d0 net/socket.c:898 + call_write_iter include/linux/fs.h:1743 [inline] + new_sync_write fs/read_write.c:457 [inline] + __vfs_write+0x684/0x970 fs/read_write.c:470 + vfs_write+0x189/0x510 fs/read_write.c:518 + SYSC_write fs/read_write.c:565 [inline] + SyS_write+0xef/0x220 fs/read_write.c:557 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Freed by task 4115: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kmem_cache_free+0x77/0x280 mm/slab.c:3763 + kfree_skbmem+0x1a1/0x1d0 net/core/skbuff.c:622 + __kfree_skb net/core/skbuff.c:682 [inline] + kfree_skb+0x165/0x4c0 net/core/skbuff.c:699 + tipc_nl_compat_dumpit+0x36a/0x3c0 net/tipc/netlink_compat.c:260 + tipc_nl_compat_handle net/tipc/netlink_compat.c:1130 [inline] + tipc_nl_compat_recv+0x756/0x18f0 net/tipc/netlink_compat.c:1199 + genl_family_rcv_msg+0x7b7/0xfb0 net/netlink/genetlink.c:598 + genl_rcv_msg+0xb2/0x140 net/netlink/genetlink.c:623 + netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2397 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:634 + netlink_unicast_kernel net/netlink/af_netlink.c:1265 [inline] + netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1291 + netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1854 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + sock_write_iter+0x31a/0x5d0 net/socket.c:898 + call_write_iter include/linux/fs.h:1743 [inline] + new_sync_write fs/read_write.c:457 [inline] + __vfs_write+0x684/0x970 fs/read_write.c:470 + vfs_write+0x189/0x510 fs/read_write.c:518 + SYSC_write fs/read_write.c:565 [inline] + SyS_write+0xef/0x220 fs/read_write.c:557 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +The buggy address belongs to the object at ffff8801c6e71dc0 + which belongs to the cache skbuff_head_cache of size 224 +The buggy address is located 208 bytes inside of + 224-byte region [ffff8801c6e71dc0, ffff8801c6e71ea0) +The buggy address belongs to the page: +page:ffffea00071b9c40 count:1 mapcount:0 mapping:ffff8801c6e71000 index:0x0 +flags: 0x200000000000100(slab) +raw: 0200000000000100 ffff8801c6e71000 0000000000000000 000000010000000c +raw: ffffea0007224a20 ffff8801d98caf48 ffff8801d9e79040 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8801c6e71d80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb + ffff8801c6e71e00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff8801c6e71e80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc + ^ + ffff8801c6e71f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8801c6e71f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +================================================================== + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Cc: Jon Maloy +Cc: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/netlink_compat.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/tipc/netlink_compat.c ++++ b/net/tipc/netlink_compat.c +@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); ++ msg->rep = NULL; + return -ENOMEM; + } + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); +- if (err) ++ if (err) { + kfree_skb(msg->rep); +- ++ msg->rep = NULL; ++ } + kfree_skb(arg); + + return err; -- 2.47.3