From: Greg Kroah-Hartman Date: Wed, 9 Aug 2017 00:18:52 +0000 (-0700) Subject: 4.4-stable patches X-Git-Tag: v4.12.6~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=270cdee74218e52cc042fe888826e98f1146572f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch net-zero-terminate-ifr_name-in-dev_ifname.patch openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch sparc64-prevent-perf-from-running-during-super-critical-sections.patch xen-netback-correctly-schedule-rate-limited-queues.patch --- diff --git a/queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch b/queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch new file mode 100644 index 00000000000..7cb5e024345 --- /dev/null +++ b/queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:20:15 +0800 +Subject: dccp: fix a memleak for dccp_feat_init err process + +From: Xin Long + + +[ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ] + +In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc +memory for rx.val, it should free tx.val before returning an +error. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/feat.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/dccp/feat.c ++++ b/net/dccp/feat.c +@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) + * singleton values (which always leads to failure). + * These settings can still (later) be overridden via sockopts. + */ +- if (ccid_get_builtin_ccids(&tx.val, &tx.len) || +- ccid_get_builtin_ccids(&rx.val, &rx.len)) ++ if (ccid_get_builtin_ccids(&tx.val, &tx.len)) + return -ENOBUFS; ++ if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { ++ kfree(tx.val); ++ return -ENOBUFS; ++ } + + if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) diff --git a/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch new file mode 100644 index 00000000000..e2e50e437bf --- /dev/null +++ b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:19:46 +0800 +Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly + +From: Xin Long + + +[ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ] + +The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk +properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue +exists on dccp_ipv4. + +This patch is to fix it for dccp_ipv4. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -635,6 +635,7 @@ int dccp_v4_conn_request(struct sock *sk + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); ++ reqsk_put(req); + return 0; + + drop_and_free: diff --git a/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch new file mode 100644 index 00000000000..cfad10ec849 --- /dev/null +++ b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:19:09 +0800 +Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly + +From: Xin Long + + +[ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ] + +In dccp_v6_conn_request, after reqsk gets alloced and hashed into +ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer, +one is for hlist, and the other one is for current using. + +The problem is when dccp_v6_conn_request returns and finishes using +reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and +reqsk obj never gets freed. + +Jianlin found this issue when running dccp_memleak.c in a loop, the +system memory would run out. + +dccp_memleak.c: + int s1 = socket(PF_INET6, 6, IPPROTO_IP); + bind(s1, &sa1, 0x20); + listen(s1, 0x9); + int s2 = socket(PF_INET6, 6, IPPROTO_IP); + connect(s2, &sa1, 0x20); + close(s1); + close(s2); + +This patch is to put the reqsk before dccp_v6_conn_request returns, +just as what tcp_conn_request does. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -376,6 +376,7 @@ static int dccp_v6_conn_request(struct s + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); ++ reqsk_put(req); + return 0; + + drop_and_free: diff --git a/queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch b/queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch new file mode 100644 index 00000000000..c8436fce064 --- /dev/null +++ b/queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch @@ -0,0 +1,71 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Mahesh Bandewar +Date: Wed, 19 Jul 2017 15:41:33 -0700 +Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call. + +From: Mahesh Bandewar + + +[ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ] + +Net stack initialization currently initializes fib-trie after the +first call to netdevice_notifier() call. In fact fib_trie initialization +needs to happen before first rtnl_register(). It does not cause any problem +since there are no devices UP at this moment, but trying to bring 'lo' +UP at initialization would make this assumption wrong and exposes the issue. + +Fixes following crash + + Call Trace: + ? alternate_node_alloc+0x76/0xa0 + fib_table_insert+0x1b7/0x4b0 + fib_magic.isra.17+0xea/0x120 + fib_add_ifaddr+0x7b/0x190 + fib_netdev_event+0xc0/0x130 + register_netdevice_notifier+0x1c1/0x1d0 + ip_fib_init+0x72/0x85 + ip_rt_init+0x187/0x1e9 + ip_init+0xe/0x1a + inet_init+0x171/0x26c + ? ipv4_offload_init+0x66/0x66 + do_one_initcall+0x43/0x160 + kernel_init_freeable+0x191/0x219 + ? rest_init+0x80/0x80 + kernel_init+0xe/0x150 + ret_from_fork+0x22/0x30 + Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08 + RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28 + CR2: 0000000000000014 + +Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.") +Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization") + +Signed-off-by: Mahesh Bandewar +Acked-by: "Eric W. Biederman" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1319,13 +1319,14 @@ static struct pernet_operations fib_net_ + + void __init ip_fib_init(void) + { +- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); +- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); +- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); ++ fib_trie_init(); + + register_pernet_subsys(&fib_net_ops); ++ + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); + +- fib_trie_init(); ++ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); ++ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); ++ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + } diff --git a/queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch b/queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch new file mode 100644 index 00000000000..974191c5555 --- /dev/null +++ b/queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch @@ -0,0 +1,146 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Alexander Potapenko +Date: Mon, 17 Jul 2017 12:35:58 +0200 +Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check() + +From: Alexander Potapenko + + +[ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ] + +KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(), +which originated from the TCP request socket created in +cookie_v6_check(): + + ================================================================== + BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0 + CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies. Check SNMP counters. + Call Trace: + + __dump_stack lib/dump_stack.c:16 + dump_stack+0x172/0x1c0 lib/dump_stack.c:52 + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 + skb_set_hash_from_sk ./include/net/sock.h:2011 + tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983 + tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493 + tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284 + tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309 + call_timer_fn+0x240/0x520 kernel/time/timer.c:1268 + expire_timers kernel/time/timer.c:1307 + __run_timers+0xc13/0xf10 kernel/time/timer.c:1601 + run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + invoke_softirq kernel/softirq.c:364 + irq_exit+0x1fa/0x230 kernel/softirq.c:405 + exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657 + smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966 + apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489 + RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36 + RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77 + RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440 + RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 + RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005 + RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770 + RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004 + R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810 + R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4 + + poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293 + SYSC_select+0x4b4/0x4e0 fs/select.c:653 + SyS_select+0x76/0xa0 fs/select.c:634 + entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204 + RIP: 0033:0x4597e7 + RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017 + RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059 + R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20 + R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003 + chained origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_save_stack mm/kmsan/kmsan.c:317 + kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 + __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259 + tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472 + tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103 + tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212 + cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245 + tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 + tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 + tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 + kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 + kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 + reqsk_alloc ./include/net/request_sock.h:87 + inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200 + cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169 + tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 + tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 + tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + ================================================================== + +Similar error is reported for cookie_v4_check(). + +Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets") +Signed-off-by: Alexander Potapenko +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/syncookies.c | 1 + + net/ipv6/syncookies.c | 1 + + 2 files changed, 2 insertions(+) + +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -337,6 +337,7 @@ struct sock *cookie_v4_check(struct sock + treq = tcp_rsk(req); + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; ++ treq->txhash = net_tx_rndhash(); + req->mss = mss; + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; +--- a/net/ipv6/syncookies.c ++++ b/net/ipv6/syncookies.c +@@ -210,6 +210,7 @@ struct sock *cookie_v6_check(struct sock + treq->snt_synack.v64 = 0; + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; ++ treq->txhash = net_tx_rndhash(); + + /* + * We need to lookup the dst_entry to get the correct window size. diff --git a/queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch b/queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch new file mode 100644 index 00000000000..64a6d7df31f --- /dev/null +++ b/queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch @@ -0,0 +1,55 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Sabrina Dubroca +Date: Wed, 19 Jul 2017 22:28:55 +0200 +Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt + +From: Sabrina Dubroca + + +[ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ] + +In some cases, offset can overflow and can cause an infinite loop in +ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and +cap it at IPV6_MAXPLEN, since packets larger than that should be invalid. + +This problem has been here since before the beginning of git history. + +Signed-off-by: Sabrina Dubroca +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/output_core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/ipv6/output_core.c ++++ b/net/ipv6/output_core.c +@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident); + + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) + { +- u16 offset = sizeof(struct ipv6hdr); ++ unsigned int offset = sizeof(struct ipv6hdr); + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); + int found_rhdr = 0; +@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff * + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; ++ unsigned int len; + + switch (**nexthdr) { + +@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff * + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); +- offset += ipv6_optlen(exthdr); ++ len = ipv6_optlen(exthdr); ++ if (len + offset >= IPV6_MAXPLEN) ++ return -EINVAL; ++ offset += len; + *nexthdr = &exthdr->nexthdr; + } + diff --git a/queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch b/queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch new file mode 100644 index 00000000000..3ccf4a8c554 --- /dev/null +++ b/queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Stefano Brivio +Date: Mon, 24 Jul 2017 23:14:28 +0200 +Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment() + +From: Stefano Brivio + + +[ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ] + +RFC 2465 defines ipv6IfStatsOutFragFails as: + + "The number of IPv6 datagrams that have been discarded + because they needed to be fragmented at this output + interface but could not be." + +The existing implementation, instead, would increase the counter +twice in case we fail to allocate room for single fragments: +once for the fragment, once for the datagram. + +This didn't look intentional though. In one of the two affected +affected failure paths, the double increase was simply a result +of a new 'goto fail' statement, introduced to avoid a skb leak. +The other path appears to be affected since at least 2.6.12-rc2. + +Reported-by: Sabrina Dubroca +Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak") +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -647,8 +647,6 @@ int ip6_fragment(struct net *net, struct + *prevhdr = NEXTHDR_FRAGMENT; + tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); + if (!tmp_hdr) { +- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), +- IPSTATS_MIB_FRAGFAILS); + err = -ENOMEM; + goto fail; + } +@@ -767,8 +765,6 @@ slow_path: + frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC); + if (!frag) { +- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), +- IPSTATS_MIB_FRAGFAILS); + err = -ENOMEM; + goto fail; + } diff --git a/queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch b/queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch new file mode 100644 index 00000000000..385fe14ddd1 --- /dev/null +++ b/queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch @@ -0,0 +1,81 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Thomas Jarosch +Date: Sat, 22 Jul 2017 17:14:34 +0200 +Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled + +From: Thomas Jarosch + + +[ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ] + +DMA transfers are not allowed to buffers that are on the stack. +Therefore allocate a buffer to store the result of usb_control_message(). + +Fixes these bugreports: +https://bugzilla.kernel.org/show_bug.cgi?id=195217 + +https://bugzilla.redhat.com/show_bug.cgi?id=1421387 +https://bugzilla.redhat.com/show_bug.cgi?id=1427398 + +Shortened kernel backtrace from 4.11.9-200.fc25.x86_64: +kernel: ------------[ cut here ]------------ +kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587 +kernel: transfer buffer not dma capable +kernel: Call Trace: +kernel: dump_stack+0x63/0x86 +kernel: __warn+0xcb/0xf0 +kernel: warn_slowpath_fmt+0x5a/0x80 +kernel: usb_hcd_map_urb_for_dma+0x37f/0x570 +kernel: ? try_to_del_timer_sync+0x53/0x80 +kernel: usb_hcd_submit_urb+0x34e/0xb90 +kernel: ? schedule_timeout+0x17e/0x300 +kernel: ? del_timer_sync+0x50/0x50 +kernel: ? __slab_free+0xa9/0x300 +kernel: usb_submit_urb+0x2f4/0x560 +kernel: ? urb_destroy+0x24/0x30 +kernel: usb_start_wait_urb+0x6e/0x170 +kernel: usb_control_msg+0xdc/0x120 +kernel: mcs_get_reg+0x36/0x40 [mcs7780] +kernel: mcs_net_open+0xb5/0x5c0 [mcs7780] +... + +Regression goes back to 4.9, so it's a good candidate for -stable. +Though it's the decision of the maintainer. + +Thanks to Dan Williams for adding the "transfer buffer not dma capable" +warning in the first place. It instantly pointed me in the right direction. + +Patch has been tested with transferring data from a Polar watch. + +Signed-off-by: Thomas Jarosch +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/irda/mcs7780.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/net/irda/mcs7780.c ++++ b/drivers/net/irda/mcs7780.c +@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mc + static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val) + { + struct usb_device *dev = mcs->usbdev; +- int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, +- MCS_RD_RTYPE, 0, reg, val, 2, +- msecs_to_jiffies(MCS_CTRL_TIMEOUT)); ++ void *dmabuf; ++ int ret; ++ ++ dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL); ++ if (!dmabuf) ++ return -ENOMEM; ++ ++ ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, ++ MCS_RD_RTYPE, 0, reg, dmabuf, 2, ++ msecs_to_jiffies(MCS_CTRL_TIMEOUT)); ++ ++ memcpy(val, dmabuf, sizeof(__u16)); ++ kfree(dmabuf); + + return ret; + } diff --git a/queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch b/queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch new file mode 100644 index 00000000000..420eda85ea5 --- /dev/null +++ b/queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Marc Gonzalez +Date: Tue, 25 Jul 2017 14:35:03 +0200 +Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically + +From: Marc Gonzalez + + +[ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ] + +Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear +whose responsibility it was to insert the required clock skew, and +in hindsight, some PHY drivers got it wrong. The solution forward +is to introduce a new property, explicitly requiring skew from the +node to which it is attached. In the interim, this driver will handle +all 4 RGMII modes identically (no skew). + +Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller") +Signed-off-by: Marc Gonzalez +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aurora/nb8800.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/aurora/nb8800.c ++++ b/drivers/net/ethernet/aurora/nb8800.c +@@ -608,7 +608,7 @@ static void nb8800_mac_config(struct net + mac_mode |= HALF_DUPLEX; + + if (gigabit) { +- if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) ++ if (phy_interface_is_rgmii(dev->phydev)) + mac_mode |= RGMII_MODE; + + mac_mode |= GMAC_MODE; +@@ -1295,11 +1295,10 @@ static int nb8800_tangox_init(struct net + break; + + case PHY_INTERFACE_MODE_RGMII: +- pad_mode = PAD_MODE_RGMII; +- break; +- ++ case PHY_INTERFACE_MODE_RGMII_ID: ++ case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: +- pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; ++ pad_mode = PAD_MODE_RGMII; + break; + + default: diff --git a/queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch b/queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch new file mode 100644 index 00000000000..bf05a531901 --- /dev/null +++ b/queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Moshe Shemesh +Date: Sun, 25 Jun 2017 18:45:32 +0300 +Subject: net/mlx5: Fix command bad flow on command entry allocation failure + +From: Moshe Shemesh + + +[ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ] + +When driver fail to allocate an entry to send command to FW, it must +notify the calling function and release the memory allocated for +this command. + +Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters') +Signed-off-by: Moshe Shemesh +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -630,6 +630,10 @@ static void dump_command(struct mlx5_cor + pr_debug("\n"); + } + ++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); ++static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, ++ struct mlx5_cmd_msg *msg); ++ + static void cmd_work_handler(struct work_struct *work) + { + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); +@@ -638,16 +642,27 @@ static void cmd_work_handler(struct work + struct mlx5_cmd_layout *lay; + struct semaphore *sem; + unsigned long flags; ++ int alloc_ret; + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { +- ent->idx = alloc_ent(cmd); +- if (ent->idx < 0) { ++ alloc_ret = alloc_ent(cmd); ++ if (alloc_ret < 0) { ++ if (ent->callback) { ++ ent->callback(-EAGAIN, ent->context); ++ mlx5_free_cmd_msg(dev, ent->out); ++ free_msg(dev, ent->in); ++ free_cmd(ent); ++ } else { ++ ent->ret = -EAGAIN; ++ complete(&ent->done); ++ } + mlx5_core_err(dev, "failed to allocate command entry\n"); + up(sem); + return; + } ++ ent->idx = alloc_ret; + } else { + ent->idx = cmd->max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); diff --git a/queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch new file mode 100644 index 00000000000..065c051186f --- /dev/null +++ b/queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Florian Fainelli +Date: Fri, 28 Jul 2017 11:58:36 -0700 +Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine() + +From: Florian Fainelli + + +[ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ] + +Marc reported that he was not getting the PHY library adjust_link() +callback function to run when calling phy_stop() + phy_disconnect() +which does not indeed happen because we set the state machine to +PHY_HALTED but we don't get to run it to process this state past that +point. + +Fix this with a synchronous call to phy_state_machine() in order to have +the state machine actually act on PHY_HALTED, set the PHY device's link +down, turn the network device's carrier off and finally call the +adjust_link() function. + +Reported-by: Marc Gonzalez +Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work") +Signed-off-by: Florian Fainelli +Signed-off-by: Marc Gonzalez +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -541,6 +541,9 @@ void phy_stop_machine(struct phy_device + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) + phydev->state = PHY_UP; + mutex_unlock(&phydev->lock); ++ ++ /* Now we can run the state machine synchronously */ ++ phy_state_machine(&phydev->state_queue.work); + } + + /** diff --git a/queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch b/queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch new file mode 100644 index 00000000000..a29bf4cd290 --- /dev/null +++ b/queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch @@ -0,0 +1,28 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: "David S. Miller" +Date: Wed, 19 Jul 2017 13:33:24 -0700 +Subject: net: Zero terminate ifr_name in dev_ifname(). + +From: "David S. Miller" + + +[ Upstream commit 63679112c536289826fec61c917621de95ba2ade ] + +The ifr.ifr_name is passed around and assumed to be NULL terminated. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev_ioctl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/dev_ioctl.c ++++ b/net/core/dev_ioctl.c +@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, s + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; ++ ifr.ifr_name[IFNAMSIZ-1] = 0; + + error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); + if (error) diff --git a/queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch b/queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch new file mode 100644 index 00000000000..f9ad01ab8ab --- /dev/null +++ b/queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Liping Zhang +Date: Sun, 23 Jul 2017 17:52:23 +0800 +Subject: openvswitch: fix potential out of bound access in parse_ct + +From: Liping Zhang + + +[ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ] + +Before the 'type' is validated, we shouldn't use it to fetch the +ovs_ct_attr_lens's minlen and maxlen, else, out of bound access +may happen. + +Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") +Signed-off-by: Liping Zhang +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/conntrack.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -577,8 +577,8 @@ static int parse_ct(const struct nlattr + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); +- int maxlen = ovs_ct_attr_lens[type].maxlen; +- int minlen = ovs_ct_attr_lens[type].minlen; ++ int maxlen; ++ int minlen; + + if (type > OVS_CT_ATTR_MAX) { + OVS_NLERR(log, +@@ -586,6 +586,9 @@ static int parse_ct(const struct nlattr + type, OVS_CT_ATTR_MAX); + return -EINVAL; + } ++ ++ maxlen = ovs_ct_attr_lens[type].maxlen; ++ minlen = ovs_ct_attr_lens[type].minlen; + if (nla_len(a) < minlen || nla_len(a) > maxlen) { + OVS_NLERR(log, + "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", diff --git a/queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch b/queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch new file mode 100644 index 00000000000..e6263d31765 --- /dev/null +++ b/queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch @@ -0,0 +1,60 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: WANG Cong +Date: Mon, 24 Jul 2017 10:07:32 -0700 +Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired() + +From: WANG Cong + + +[ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ] + +There are multiple reports showing we have a use-after-free in +the timer prb_retire_rx_blk_timer_expired(), where we use struct +tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by +free_pg_vec(). + +The interesting part is it is not freed via packet_release() but +via packet_setsockopt(), which means we are not closing the socket. +Looking into the big and fat function packet_set_ring(), this could +happen if we satisfy the following conditions: + +1. closing == 0, not on packet_release() path +2. req->tp_block_nr == 0, we don't allocate a new pg_vec +3. rx_ring->pg_vec is already set as V3, which means we already called + packet_set_ring() wtih req->tp_block_nr > 0 previously +4. req->tp_frame_nr == 0, pass sanity check +5. po->mapped == 0, never called mmap() + +In this scenario we are clearing the old rx_ring->pg_vec, so we need +to free this pg_vec, but we don't stop the timer on this path because +of closing==0. + +The timer has to be stopped as long as we need to free pg_vec, therefore +the check on closing!=0 is wrong, we should check pg_vec!=NULL instead. + +Thanks to liujian for testing different fixes. + +Reported-by: alexander.levin@verizon.com +Reported-by: Dave Jones +Reported-by: liujian (CE) +Tested-by: liujian (CE) +Cc: Ding Tianhong +Cc: Willem de Bruijn +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4225,7 +4225,7 @@ static int packet_set_ring(struct sock * + register_prot_hook(sk); + } + spin_unlock(&po->bind_lock); +- if (closing && (po->tp_version > TPACKET_V2)) { ++ if (pg_vec && (po->tp_version > TPACKET_V2)) { + /* Because we don't support block-based V3 on tx-ring */ + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, rb_queue); diff --git a/queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch b/queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch new file mode 100644 index 00000000000..a3411ec9cbc --- /dev/null +++ b/queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: WANG Cong +Date: Thu, 20 Jul 2017 11:27:57 -0700 +Subject: rtnetlink: allocate more memory for dev_set_mac_address() + +From: WANG Cong + + +[ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ] + +virtnet_set_mac_address() interprets mac address as struct +sockaddr, but upper layer only allocates dev->addr_len +which is ETH_ALEN + sizeof(sa_family_t) in this case. + +We lack a unified definition for mac address, so just fix +the upper layer, this also allows drivers to interpret it +to struct sockaddr freely. + +Reported-by: David Ahern +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1742,7 +1742,8 @@ static int do_setlink(const struct sk_bu + struct sockaddr *sa; + int len; + +- len = sizeof(sa_family_t) + dev->addr_len; ++ len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, ++ sizeof(*sa)); + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; diff --git a/queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch b/queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch new file mode 100644 index 00000000000..226f917a395 --- /dev/null +++ b/queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch @@ -0,0 +1,140 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Alexander Potapenko +Date: Fri, 14 Jul 2017 18:32:45 +0200 +Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() + +From: Alexander Potapenko + + +[ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ] + +If the length field of the iterator (|pos.p| or |err|) is past the end +of the chunk, we shouldn't access it. + +This bug has been detected by KMSAN. For the following pair of system +calls: + + socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 + sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), + inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, + sin6_scope_id=0}, 28) = 1 + +the tool has reported a use of uninitialized memory: + + ================================================================== + BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 + CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs + 01/01/2011 + Call Trace: + + __dump_stack lib/dump_stack.c:16 + dump_stack+0x172/0x1c0 lib/dump_stack.c:52 + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 + __sctp_rcv_init_lookup net/sctp/input.c:1074 + __sctp_rcv_lookup_harder net/sctp/input.c:1233 + __sctp_rcv_lookup net/sctp/input.c:1255 + sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 + sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 + + do_softirq kernel/softirq.c:328 + __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 + local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 + rcu_read_unlock_bh ./include/linux/rcupdate.h:931 + ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 + ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 + NF_HOOK_COND ./include/linux/netfilter.h:246 + ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 + dst_output ./include/net/dst.h:486 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 + sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 + sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 + sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 + sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 + sctp_side_effects net/sctp/sm_sideeffect.c:1773 + sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 + sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 + sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 + sock_sendmsg net/socket.c:643 + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 + entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 + RIP: 0033:0x401133 + RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c + RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 + RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 + RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c + R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 + R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 + origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 + kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 + slab_alloc_node mm/slub.c:2743 + __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 + __kmalloc_reserve net/core/skbuff.c:138 + __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 + alloc_skb ./include/linux/skbuff.h:933 + sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 + sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 + sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 + sctp_side_effects net/sctp/sm_sideeffect.c:1773 + sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 + sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 + sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 + sock_sendmsg net/socket.c:643 + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 + return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 + ================================================================== + +Signed-off-by: Alexander Potapenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/sctp.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -444,6 +444,8 @@ _sctp_walk_params((pos), (chunk), ntohs( + + #define _sctp_walk_params(pos, chunk, end, member)\ + for (pos.v = chunk->member;\ ++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ ++ (void *)chunk + end) &&\ + pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ + ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + pos.v += WORD_ROUND(ntohs(pos.p->length))) +@@ -454,6 +456,8 @@ _sctp_walk_errors((err), (chunk_hdr), nt + #define _sctp_walk_errors(err, chunk_hdr, end)\ + for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ + sizeof(sctp_chunkhdr_t));\ ++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ ++ (void *)chunk_hdr + end) &&\ + (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ + ntohs(err->length) >= sizeof(sctp_errhdr_t); \ + err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) diff --git a/queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch b/queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch new file mode 100644 index 00000000000..64381c472d2 --- /dev/null +++ b/queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 16:24:59 +0800 +Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors + +From: Xin Long + + +[ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ] + +Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving +_sctp_walk_{params, errors}()") tried to fix the issue that it +may overstep the chunk end for _sctp_walk_{params, errors} with +'chunk_end > offset(length) + sizeof(length)'. + +But it introduced a side effect: When processing INIT, it verifies +the chunks with 'param.v == chunk_end' after iterating all params +by sctp_walk_params(). With the check 'chunk_end > offset(length) ++ sizeof(length)', it would return when the last param is not yet +accessed. Because the last param usually is fwdtsn supported param +whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' + +This is a badly issue even causing sctp couldn't process 4-shakes. +Client would always get abort when connecting to server, due to +the failure of INIT chunk verification on server. + +The patch is to use 'chunk_end <= offset(length) + sizeof(length)' +instead of 'chunk_end < offset(length) + sizeof(length)' for both +_sctp_walk_params and _sctp_walk_errors. + +Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/sctp.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -444,7 +444,7 @@ _sctp_walk_params((pos), (chunk), ntohs( + + #define _sctp_walk_params(pos, chunk, end, member)\ + for (pos.v = chunk->member;\ +- (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ ++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ + (void *)chunk + end) &&\ + pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ + ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ +@@ -456,7 +456,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt + #define _sctp_walk_errors(err, chunk_hdr, end)\ + for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ + sizeof(sctp_chunkhdr_t));\ +- ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ ++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + (void *)chunk_hdr + end) &&\ + (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ + ntohs(err->length) >= sizeof(sctp_errhdr_t); \ diff --git a/queue-4.4/series b/queue-4.4/series index e018215a7b1..2a5b46bec2d 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -20,3 +20,23 @@ media-lirc-lirc_get_rec_resolution-should-return-microseconds.patch f2fs-sanity-check-checkpoint-segno-and-blkoff.patch drm-rcar-du-fix-backport-bug.patch saa7164-fix-double-fetch-pcie-access-condition.patch +ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch +net-zero-terminate-ifr_name-in-dev_ifname.patch +ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch +ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch +rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch +mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch +openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch +packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch +ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch +net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch +dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch +dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch +dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch +sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch +sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch +net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch +net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch +xen-netback-correctly-schedule-rate-limited-queues.patch +sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch +sparc64-prevent-perf-from-running-during-super-critical-sections.patch diff --git a/queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch b/queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch new file mode 100644 index 00000000000..9246691f963 --- /dev/null +++ b/queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch @@ -0,0 +1,335 @@ +From foo@baz Tue Aug 8 16:56:08 PDT 2017 +From: Jane Chu +Date: Tue, 11 Jul 2017 12:00:54 -0600 +Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout + +From: Jane Chu + + +[ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ] + +A large sun4v SPARC system may have moments of intensive xcall activities, +usually caused by unmapping many pages on many CPUs concurrently. This can +flood receivers with CPU mondo interrupts for an extended period, causing +some unlucky senders to hit send-mondo timeout. This problem gets worse +as cpu count increases because sometimes mappings must be invalidated on +all CPUs, and sometimes all CPUs may gang up on a single CPU. + +But a busy system is not a broken system. In the above scenario, as long +as the receiver is making forward progress processing mondo interrupts, +the sender should continue to retry. + +This patch implements the receiver's forward progress meter by introducing +a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range +of 0..NR_CPUS. The receiver increments its counter as soon as it receives +a mondo and the sender tracks the receiver's counter. If the receiver has +stopped making forward progress when the retry limit is reached, the sender +declares send-mondo-timeout and panic; otherwise, the receiver is allowed +to keep making forward progress. + +In addition, it's been observed that PCIe hotplug events generate Correctable +Errors that are handled by hypervisor and then OS. Hypervisor 'borrows' +a guest cpu strand briefly to provide the service. If the cpu strand is +simultaneously the only cpu targeted by a mondo, it may not be available +for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second +is the agreed wait time between hypervisor and guest OS, this patch makes +the adjustment. + +Orabug: 25476541 +Orabug: 26417466 + +Signed-off-by: Jane Chu +Reviewed-by: Steve Sistare +Reviewed-by: Anthony Yznaga +Reviewed-by: Rob Gardner +Reviewed-by: Thomas Tai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/trap_block.h | 1 + arch/sparc/kernel/smp_64.c | 189 ++++++++++++++++++++++-------------- + arch/sparc/kernel/sun4v_ivec.S | 15 ++ + arch/sparc/kernel/traps_64.c | 1 + 4 files changed, 134 insertions(+), 72 deletions(-) + +--- a/arch/sparc/include/asm/trap_block.h ++++ b/arch/sparc/include/asm/trap_block.h +@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR + void init_cur_cpu_trap(struct thread_info *); + void setup_tba(void); + extern int ncpus_probed; ++extern u64 cpu_mondo_counter[NR_CPUS]; + + unsigned long real_hard_smp_processor_id(void); + +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -617,22 +617,48 @@ retry: + } + } + +-/* Multi-cpu list version. */ ++#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) ++#define MONDO_USEC_WAIT_MIN 2 ++#define MONDO_USEC_WAIT_MAX 100 ++#define MONDO_RETRY_LIMIT 500000 ++ ++/* Multi-cpu list version. ++ * ++ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. ++ * Sometimes not all cpus receive the mondo, requiring us to re-send ++ * the mondo until all cpus have received, or cpus are truly stuck ++ * unable to receive mondo, and we timeout. ++ * Occasionally a target cpu strand is borrowed briefly by hypervisor to ++ * perform guest service, such as PCIe error handling. Consider the ++ * service time, 1 second overall wait is reasonable for 1 cpu. ++ * Here two in-between mondo check wait time are defined: 2 usec for ++ * single cpu quick turn around and up to 100usec for large cpu count. ++ * Deliver mondo to large number of cpus could take longer, we adjusts ++ * the retry count as long as target cpus are making forward progress. ++ */ + static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) + { +- int retries, this_cpu, prev_sent, i, saw_cpu_error; ++ int this_cpu, tot_cpus, prev_sent, i, rem; ++ int usec_wait, retries, tot_retries; ++ u16 first_cpu = 0xffff; ++ unsigned long xc_rcvd = 0; + unsigned long status; ++ int ecpuerror_id = 0; ++ int enocpu_id = 0; + u16 *cpu_list; ++ u16 cpu; + + this_cpu = smp_processor_id(); +- + cpu_list = __va(tb->cpu_list_pa); +- +- saw_cpu_error = 0; +- retries = 0; ++ usec_wait = cnt * MONDO_USEC_WAIT_MIN; ++ if (usec_wait > MONDO_USEC_WAIT_MAX) ++ usec_wait = MONDO_USEC_WAIT_MAX; ++ retries = tot_retries = 0; ++ tot_cpus = cnt; + prev_sent = 0; ++ + do { +- int forward_progress, n_sent; ++ int n_sent, mondo_delivered, target_cpu_busy; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, +@@ -640,94 +666,113 @@ static void hypervisor_xcall_deliver(str + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) +- break; ++ goto xcall_done; ++ ++ /* If not these non-fatal errors, panic */ ++ if (unlikely((status != HV_EWOULDBLOCK) && ++ (status != HV_ECPUERROR) && ++ (status != HV_ENOCPU))) ++ goto fatal_errors; + + /* First, see if we made any forward progress. + * ++ * Go through the cpu_list, count the target cpus that have ++ * received our mondo (n_sent), and those that did not (rem). ++ * Re-pack cpu_list with the cpus remain to be retried in the ++ * front - this simplifies tracking the truly stalled cpus. ++ * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. ++ * ++ * EWOULDBLOCK means some target cpus did not receive the ++ * mondo and retry usually helps. ++ * ++ * ECPUERROR means at least one target cpu is in error state, ++ * it's usually safe to skip the faulty cpu and retry. ++ * ++ * ENOCPU means one of the target cpu doesn't belong to the ++ * domain, perhaps offlined which is unexpected, but not ++ * fatal and it's okay to skip the offlined cpu. + */ ++ rem = 0; + n_sent = 0; + for (i = 0; i < cnt; i++) { +- if (likely(cpu_list[i] == 0xffff)) ++ cpu = cpu_list[i]; ++ if (likely(cpu == 0xffff)) { + n_sent++; ++ } else if ((status == HV_ECPUERROR) && ++ (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { ++ ecpuerror_id = cpu + 1; ++ } else if (status == HV_ENOCPU && !cpu_online(cpu)) { ++ enocpu_id = cpu + 1; ++ } else { ++ cpu_list[rem++] = cpu; ++ } + } + +- forward_progress = 0; +- if (n_sent > prev_sent) +- forward_progress = 1; ++ /* No cpu remained, we're done. */ ++ if (rem == 0) ++ break; + +- prev_sent = n_sent; ++ /* Otherwise, update the cpu count for retry. */ ++ cnt = rem; + +- /* If we get a HV_ECPUERROR, then one or more of the cpus +- * in the list are in error state. Use the cpu_state() +- * hypervisor call to find out which cpus are in error state. ++ /* Record the overall number of mondos received by the ++ * first of the remaining cpus. + */ +- if (unlikely(status == HV_ECPUERROR)) { +- for (i = 0; i < cnt; i++) { +- long err; +- u16 cpu; +- +- cpu = cpu_list[i]; +- if (cpu == 0xffff) +- continue; +- +- err = sun4v_cpu_state(cpu); +- if (err == HV_CPU_STATE_ERROR) { +- saw_cpu_error = (cpu + 1); +- cpu_list[i] = 0xffff; +- } +- } +- } else if (unlikely(status != HV_EWOULDBLOCK)) +- goto fatal_mondo_error; ++ if (first_cpu != cpu_list[0]) { ++ first_cpu = cpu_list[0]; ++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu); ++ } + +- /* Don't bother rewriting the CPU list, just leave the +- * 0xffff and non-0xffff entries in there and the +- * hypervisor will do the right thing. +- * +- * Only advance timeout state if we didn't make any +- * forward progress. ++ /* Was any mondo delivered successfully? */ ++ mondo_delivered = (n_sent > prev_sent); ++ prev_sent = n_sent; ++ ++ /* or, was any target cpu busy processing other mondos? */ ++ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); ++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu); ++ ++ /* Retry count is for no progress. If we're making progress, ++ * reset the retry count. + */ +- if (unlikely(!forward_progress)) { +- if (unlikely(++retries > 10000)) +- goto fatal_mondo_timeout; +- +- /* Delay a little bit to let other cpus catch up +- * on their cpu mondo queue work. +- */ +- udelay(2 * cnt); ++ if (likely(mondo_delivered || target_cpu_busy)) { ++ tot_retries += retries; ++ retries = 0; ++ } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { ++ goto fatal_mondo_timeout; + } +- } while (1); + +- if (unlikely(saw_cpu_error)) +- goto fatal_mondo_cpu_error; ++ /* Delay a little bit to let other cpus catch up on ++ * their cpu mondo queue work. ++ */ ++ if (!mondo_delivered) ++ udelay(usec_wait); + +- return; ++ retries++; ++ } while (1); + +-fatal_mondo_cpu_error: +- printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " +- "(including %d) were in error state\n", +- this_cpu, saw_cpu_error - 1); ++xcall_done: ++ if (unlikely(ecpuerror_id > 0)) { ++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", ++ this_cpu, ecpuerror_id - 1); ++ } else if (unlikely(enocpu_id > 0)) { ++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", ++ this_cpu, enocpu_id - 1); ++ } + return; + ++fatal_errors: ++ /* fatal errors include bad alignment, etc */ ++ pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", ++ this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); ++ panic("Unexpected SUN4V mondo error %lu\n", status); ++ + fatal_mondo_timeout: +- printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " +- " progress after %d retries.\n", +- this_cpu, retries); +- goto dump_cpu_list_and_out; +- +-fatal_mondo_error: +- printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", +- this_cpu, status); +- printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " +- "mondo_block_pa(%lx)\n", +- this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); +- +-dump_cpu_list_and_out: +- printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); +- for (i = 0; i < cnt; i++) +- printk("%u ", cpu_list[i]); +- printk("]\n"); ++ /* some cpus being non-responsive to the cpu mondo */ ++ pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", ++ this_cpu, first_cpu, (tot_retries + retries), tot_cpus); ++ panic("SUN4V mondo timeout panic\n"); + } + + static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); +--- a/arch/sparc/kernel/sun4v_ivec.S ++++ b/arch/sparc/kernel/sun4v_ivec.S +@@ -26,6 +26,21 @@ sun4v_cpu_mondo: + ldxa [%g0] ASI_SCRATCHPAD, %g4 + sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + ++ /* Get smp_processor_id() into %g3 */ ++ sethi %hi(trap_block), %g5 ++ or %g5, %lo(trap_block), %g5 ++ sub %g4, %g5, %g3 ++ srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 ++ ++ /* Increment cpu_mondo_counter[smp_processor_id()] */ ++ sethi %hi(cpu_mondo_counter), %g5 ++ or %g5, %lo(cpu_mondo_counter), %g5 ++ sllx %g3, 3, %g3 ++ add %g5, %g3, %g5 ++ ldx [%g5], %g3 ++ add %g3, 1, %g3 ++ stx %g3, [%g5] ++ + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + +--- a/arch/sparc/kernel/traps_64.c ++++ b/arch/sparc/kernel/traps_64.c +@@ -2659,6 +2659,7 @@ void do_getpsr(struct pt_regs *regs) + } + } + ++u64 cpu_mondo_counter[NR_CPUS] = {0}; + struct trap_per_cpu trap_block[NR_CPUS]; + EXPORT_SYMBOL(trap_block); + diff --git a/queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch b/queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch new file mode 100644 index 00000000000..a6bb818b6b4 --- /dev/null +++ b/queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch @@ -0,0 +1,133 @@ +From foo@baz Tue Aug 8 16:56:08 PDT 2017 +From: Rob Gardner +Date: Mon, 17 Jul 2017 09:22:27 -0600 +Subject: sparc64: Prevent perf from running during super critical sections + +From: Rob Gardner + + +[ Upstream commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a ] + +This fixes another cause of random segfaults and bus errors that may +occur while running perf with the callgraph option. + +Critical sections beginning with spin_lock_irqsave() raise the interrupt +level to PIL_NORMAL_MAX (14) and intentionally do not block performance +counter interrupts, which arrive at PIL_NMI (15). + +But some sections of code are "super critical" with respect to perf +because the perf_callchain_user() path accesses user space and may cause +TLB activity as well as faults as it unwinds the user stack. + +One particular critical section occurs in switch_mm: + + spin_lock_irqsave(&mm->context.lock, flags); + ... + load_secondary_context(mm); + tsb_context_switch(mm); + ... + spin_unlock_irqrestore(&mm->context.lock, flags); + +If a perf interrupt arrives in between load_secondary_context() and +tsb_context_switch(), then perf_callchain_user() could execute with +the context ID of one process, but with an active TSB for a different +process. When the user stack is accessed, it is very likely to +incur a TLB miss, since the h/w context ID has been changed. The TLB +will then be reloaded with a translation from the TSB for one process, +but using a context ID for another process. This exposes memory from +one process to another, and since it is a mapping for stack memory, +this usually causes the new process to crash quickly. + +This super critical section needs more protection than is provided +by spin_lock_irqsave() since perf interrupts must not be allowed in. + +Since __tsb_context_switch already goes through the trouble of +disabling interrupts completely, we fix this by moving the secondary +context load down into this better protected region. + +Orabug: 25577560 + +Signed-off-by: Dave Aldridge +Signed-off-by: Rob Gardner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_context_64.h | 12 +++++++----- + arch/sparc/kernel/tsb.S | 12 ++++++++++++ + arch/sparc/power/hibernate.c | 3 +-- + 3 files changed, 20 insertions(+), 7 deletions(-) + +--- a/arch/sparc/include/asm/mmu_context_64.h ++++ b/arch/sparc/include/asm/mmu_context_64.h +@@ -25,9 +25,11 @@ void destroy_context(struct mm_struct *m + void __tsb_context_switch(unsigned long pgd_pa, + struct tsb_config *tsb_base, + struct tsb_config *tsb_huge, +- unsigned long tsb_descr_pa); ++ unsigned long tsb_descr_pa, ++ unsigned long secondary_ctx); + +-static inline void tsb_context_switch(struct mm_struct *mm) ++static inline void tsb_context_switch_ctx(struct mm_struct *mm, ++ unsigned long ctx) + { + __tsb_context_switch(__pa(mm->pgd), + &mm->context.tsb_block[0], +@@ -38,7 +40,8 @@ static inline void tsb_context_switch(st + #else + NULL + #endif +- , __pa(&mm->context.tsb_descr[0])); ++ , __pa(&mm->context.tsb_descr[0]), ++ ctx); + } + + void tsb_grow(struct mm_struct *mm, +@@ -110,8 +113,7 @@ static inline void switch_mm(struct mm_s + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ +- load_secondary_context(mm); +- tsb_context_switch(mm); ++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); + + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the +--- a/arch/sparc/kernel/tsb.S ++++ b/arch/sparc/kernel/tsb.S +@@ -375,6 +375,7 @@ tsb_flush: + * %o1: TSB base config pointer + * %o2: TSB huge config pointer, or NULL if none + * %o3: Hypervisor TSB descriptor physical address ++ * %o4: Secondary context to load, if non-zero + * + * We have to run this whole thing with interrupts + * disabled so that the current cpu doesn't change +@@ -387,6 +388,17 @@ __tsb_context_switch: + rdpr %pstate, %g1 + wrpr %g1, PSTATE_IE, %pstate + ++ brz,pn %o4, 1f ++ mov SECONDARY_CONTEXT, %o5 ++ ++661: stxa %o4, [%o5] ASI_DMMU ++ .section .sun4v_1insn_patch, "ax" ++ .word 661b ++ stxa %o4, [%o5] ASI_MMU ++ .previous ++ flush %g6 ++ ++1: + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + + stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] +--- a/arch/sparc/power/hibernate.c ++++ b/arch/sparc/power/hibernate.c +@@ -35,6 +35,5 @@ void restore_processor_state(void) + { + struct mm_struct *mm = current->active_mm; + +- load_secondary_context(mm); +- tsb_context_switch(mm); ++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); + } diff --git a/queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch b/queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch new file mode 100644 index 00000000000..e738bdc0473 --- /dev/null +++ b/queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch @@ -0,0 +1,82 @@ +From foo@baz Tue Aug 8 16:51:58 PDT 2017 +From: Wei Liu +Date: Wed, 21 Jun 2017 10:21:22 +0100 +Subject: xen-netback: correctly schedule rate-limited queues + +From: Wei Liu + + +[ Upstream commit dfa523ae9f2542bee4cddaea37b3be3e157f6e6b ] + +Add a flag to indicate if a queue is rate-limited. Test the flag in +NAPI poll handler and avoid rescheduling the queue if true, otherwise +we risk locking up the host. The rescheduling will be done in the +timer callback function. + +Reported-by: Jean-Louis Dupond +Signed-off-by: Wei Liu +Tested-by: Jean-Louis Dupond +Reviewed-by: Paul Durrant +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/common.h | 1 + + drivers/net/xen-netback/interface.c | 6 +++++- + drivers/net/xen-netback/netback.c | 6 +++++- + 3 files changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -201,6 +201,7 @@ struct xenvif_queue { /* Per-queue data + unsigned long remaining_credit; + struct timer_list credit_timeout; + u64 credit_window_start; ++ bool rate_limited; + + /* Statistics */ + struct xenvif_stats stats; +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -105,7 +105,11 @@ static int xenvif_poll(struct napi_struc + + if (work_done < budget) { + napi_complete(napi); +- xenvif_napi_schedule_or_enable_events(queue); ++ /* If the queue is rate-limited, it shall be ++ * rescheduled in the timer callback. ++ */ ++ if (likely(!queue->rate_limited)) ++ xenvif_napi_schedule_or_enable_events(queue); + } + + return work_done; +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -687,6 +687,7 @@ static void tx_add_credit(struct xenvif_ + max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ + + queue->remaining_credit = min(max_credit, max_burst); ++ queue->rate_limited = false; + } + + void xenvif_tx_credit_callback(unsigned long data) +@@ -1184,8 +1185,10 @@ static bool tx_credit_exceeded(struct xe + msecs_to_jiffies(queue->credit_usec / 1000); + + /* Timer could already be pending in rare cases. */ +- if (timer_pending(&queue->credit_timeout)) ++ if (timer_pending(&queue->credit_timeout)) { ++ queue->rate_limited = true; + return true; ++ } + + /* Passed the point where we can replenish credit? */ + if (time_after_eq64(now, next_credit)) { +@@ -1200,6 +1203,7 @@ static bool tx_credit_exceeded(struct xe + mod_timer(&queue->credit_timeout, + next_credit); + queue->credit_window_start = next_credit; ++ queue->rate_limited = true; + + return true; + }