]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 9 Aug 2017 00:18:52 +0000 (17:18 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 9 Aug 2017 00:18:52 +0000 (17:18 -0700)
added patches:
dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
net-zero-terminate-ifr_name-in-dev_ifname.patch
openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
sparc64-prevent-perf-from-running-during-super-critical-sections.patch
xen-netback-correctly-schedule-rate-limited-queues.patch

21 files changed:
queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch [new file with mode: 0644]
queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch [new file with mode: 0644]
queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch [new file with mode: 0644]
queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch [new file with mode: 0644]
queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch [new file with mode: 0644]
queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch [new file with mode: 0644]
queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch [new file with mode: 0644]
queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch [new file with mode: 0644]
queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch [new file with mode: 0644]
queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch [new file with mode: 0644]
queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch [new file with mode: 0644]
queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch [new file with mode: 0644]
queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch [new file with mode: 0644]
queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch [new file with mode: 0644]
queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch [new file with mode: 0644]
queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch [new file with mode: 0644]
queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch [new file with mode: 0644]
queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch [new file with mode: 0644]
queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch [new file with mode: 0644]

diff --git a/queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch b/queue-4.4/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
new file mode 100644 (file)
index 0000000..7cb5e02
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:20:15 +0800
+Subject: dccp: fix a memleak for dccp_feat_init err process
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ]
+
+In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc
+memory for rx.val, it should free tx.val before returning an
+error.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/feat.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/dccp/feat.c
++++ b/net/dccp/feat.c
+@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
+        * singleton values (which always leads to failure).
+        * These settings can still (later) be overridden via sockopts.
+        */
+-      if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
+-          ccid_get_builtin_ccids(&rx.val, &rx.len))
++      if (ccid_get_builtin_ccids(&tx.val, &tx.len))
+               return -ENOBUFS;
++      if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
++              kfree(tx.val);
++              return -ENOBUFS;
++      }
+       if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
+           !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
diff --git a/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
new file mode 100644 (file)
index 0000000..e2e50e4
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:46 +0800
+Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ]
+
+The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk
+properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue
+exists on dccp_ipv4.
+
+This patch is to fix it for dccp_ipv4.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -635,6 +635,7 @@ int dccp_v4_conn_request(struct sock *sk
+               goto drop_and_free;
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++      reqsk_put(req);
+       return 0;
+ drop_and_free:
diff --git a/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch b/queue-4.4/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
new file mode 100644 (file)
index 0000000..cfad10e
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:09 +0800
+Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ]
+
+In dccp_v6_conn_request, after reqsk gets alloced and hashed into
+ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer,
+one is for hlist, and the other one is for current using.
+
+The problem is when dccp_v6_conn_request returns and finishes using
+reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and
+reqsk obj never gets freed.
+
+Jianlin found this issue when running dccp_memleak.c in a loop, the
+system memory would run out.
+
+dccp_memleak.c:
+  int s1 = socket(PF_INET6, 6, IPPROTO_IP);
+  bind(s1, &sa1, 0x20);
+  listen(s1, 0x9);
+  int s2 = socket(PF_INET6, 6, IPPROTO_IP);
+  connect(s2, &sa1, 0x20);
+  close(s1);
+  close(s2);
+
+This patch is to put the reqsk before dccp_v6_conn_request returns,
+just as what tcp_conn_request does.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -376,6 +376,7 @@ static int dccp_v6_conn_request(struct s
+               goto drop_and_free;
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++      reqsk_put(req);
+       return 0;
+ drop_and_free:
diff --git a/queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch b/queue-4.4/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
new file mode 100644 (file)
index 0000000..c8436fc
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Wed, 19 Jul 2017 15:41:33 -0700
+Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call.
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+
+[ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ]
+
+Net stack initialization currently initializes fib-trie after the
+first call to netdevice_notifier() call. In fact fib_trie initialization
+needs to happen before first rtnl_register(). It does not cause any problem
+since there are no devices UP at this moment, but trying to bring 'lo'
+UP at initialization would make this assumption wrong and exposes the issue.
+
+Fixes following crash
+
+ Call Trace:
+  ? alternate_node_alloc+0x76/0xa0
+  fib_table_insert+0x1b7/0x4b0
+  fib_magic.isra.17+0xea/0x120
+  fib_add_ifaddr+0x7b/0x190
+  fib_netdev_event+0xc0/0x130
+  register_netdevice_notifier+0x1c1/0x1d0
+  ip_fib_init+0x72/0x85
+  ip_rt_init+0x187/0x1e9
+  ip_init+0xe/0x1a
+  inet_init+0x171/0x26c
+  ? ipv4_offload_init+0x66/0x66
+  do_one_initcall+0x43/0x160
+  kernel_init_freeable+0x191/0x219
+  ? rest_init+0x80/0x80
+  kernel_init+0xe/0x150
+  ret_from_fork+0x22/0x30
+ Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
+ RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28
+ CR2: 0000000000000014
+
+Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.")
+Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")
+
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1319,13 +1319,14 @@ static struct pernet_operations fib_net_
+ void __init ip_fib_init(void)
+ {
+-      rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+-      rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+-      rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
++      fib_trie_init();
+       register_pernet_subsys(&fib_net_ops);
++
+       register_netdevice_notifier(&fib_netdev_notifier);
+       register_inetaddr_notifier(&fib_inetaddr_notifier);
+-      fib_trie_init();
++      rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
++      rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
++      rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ }
diff --git a/queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch b/queue-4.4/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
new file mode 100644 (file)
index 0000000..974191c
--- /dev/null
@@ -0,0 +1,146 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Mon, 17 Jul 2017 12:35:58 +0200
+Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ]
+
+KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(),
+which originated from the TCP request socket created in
+cookie_v6_check():
+
+ ==================================================================
+ BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0
+ CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies.  Check SNMP counters.
+ Call Trace:
+  <IRQ>
+  __dump_stack lib/dump_stack.c:16
+  dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+  kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+  __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+  skb_set_hash_from_sk ./include/net/sock.h:2011
+  tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983
+  tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493
+  tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284
+  tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309
+  call_timer_fn+0x240/0x520 kernel/time/timer.c:1268
+  expire_timers kernel/time/timer.c:1307
+  __run_timers+0xc13/0xf10 kernel/time/timer.c:1601
+  run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+  invoke_softirq kernel/softirq.c:364
+  irq_exit+0x1fa/0x230 kernel/softirq.c:405
+  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657
+  smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966
+  apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489
+ RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36
+ RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77
+ RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440
+ RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10
+ RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005
+ RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770
+ RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004
+ R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810
+ R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4
+  </IRQ>
+  poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293
+  SYSC_select+0x4b4/0x4e0 fs/select.c:653
+  SyS_select+0x76/0xa0 fs/select.c:634
+  entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204
+ RIP: 0033:0x4597e7
+ RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017
+ RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7
+ RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+ RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059
+ R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20
+ R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003
+ chained origin:
+  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+  kmsan_save_stack mm/kmsan/kmsan.c:317
+  kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547
+  __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259
+  tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472
+  tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103
+  tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212
+  cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245
+  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+  NF_HOOK ./include/linux/netfilter.h:257
+  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+  dst_input ./include/net/dst.h:492
+  ip6_rcv_finish net/ipv6/ip6_input.c:69
+  NF_HOOK ./include/linux/netfilter.h:257
+  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+  __netif_receive_skb net/core/dev.c:4246
+  process_backlog+0x667/0xba0 net/core/dev.c:4866
+  napi_poll net/core/dev.c:5268
+  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+ origin:
+  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+  kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+  kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337
+  kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766
+  reqsk_alloc ./include/net/request_sock.h:87
+  inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200
+  cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169
+  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+  NF_HOOK ./include/linux/netfilter.h:257
+  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+  dst_input ./include/net/dst.h:492
+  ip6_rcv_finish net/ipv6/ip6_input.c:69
+  NF_HOOK ./include/linux/netfilter.h:257
+  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+  __netif_receive_skb net/core/dev.c:4246
+  process_backlog+0x667/0xba0 net/core/dev.c:4866
+  napi_poll net/core/dev.c:5268
+  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+ ==================================================================
+
+Similar error is reported for cookie_v4_check().
+
+Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/syncookies.c |    1 +
+ net/ipv6/syncookies.c |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -337,6 +337,7 @@ struct sock *cookie_v4_check(struct sock
+       treq = tcp_rsk(req);
+       treq->rcv_isn           = ntohl(th->seq) - 1;
+       treq->snt_isn           = cookie;
++      treq->txhash            = net_tx_rndhash();
+       req->mss                = mss;
+       ireq->ir_num            = ntohs(th->dest);
+       ireq->ir_rmt_port       = th->source;
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -210,6 +210,7 @@ struct sock *cookie_v6_check(struct sock
+       treq->snt_synack.v64    = 0;
+       treq->rcv_isn = ntohl(th->seq) - 1;
+       treq->snt_isn = cookie;
++      treq->txhash = net_tx_rndhash();
+       /*
+        * We need to lookup the dst_entry to get the correct window size.
diff --git a/queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch b/queue-4.4/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
new file mode 100644 (file)
index 0000000..64a6d7d
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Wed, 19 Jul 2017 22:28:55 +0200
+Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ]
+
+In some cases, offset can overflow and can cause an infinite loop in
+ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
+cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.
+
+This problem has been here since before the beginning of git history.
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/output_core.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident);
+ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+ {
+-      u16 offset = sizeof(struct ipv6hdr);
++      unsigned int offset = sizeof(struct ipv6hdr);
+       unsigned int packet_len = skb_tail_pointer(skb) -
+               skb_network_header(skb);
+       int found_rhdr = 0;
+@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *
+       while (offset <= packet_len) {
+               struct ipv6_opt_hdr *exthdr;
++              unsigned int len;
+               switch (**nexthdr) {
+@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *
+               exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+                                                offset);
+-              offset += ipv6_optlen(exthdr);
++              len = ipv6_optlen(exthdr);
++              if (len + offset >= IPV6_MAXPLEN)
++                      return -EINVAL;
++              offset += len;
+               *nexthdr = &exthdr->nexthdr;
+       }
diff --git a/queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch b/queue-4.4/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
new file mode 100644 (file)
index 0000000..3ccf4a8
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Mon, 24 Jul 2017 23:14:28 +0200
+Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ]
+
+RFC 2465 defines ipv6IfStatsOutFragFails as:
+
+       "The number of IPv6 datagrams that have been discarded
+        because they needed to be fragmented at this output
+        interface but could not be."
+
+The existing implementation, instead, would increase the counter
+twice in case we fail to allocate room for single fragments:
+once for the fragment, once for the datagram.
+
+This didn't look intentional though. In one of the two affected
+affected failure paths, the double increase was simply a result
+of a new 'goto fail' statement, introduced to avoid a skb leak.
+The other path appears to be affected since at least 2.6.12-rc2.
+
+Reported-by: Sabrina Dubroca <sdubroca@redhat.com>
+Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -647,8 +647,6 @@ int ip6_fragment(struct net *net, struct
+               *prevhdr = NEXTHDR_FRAGMENT;
+               tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+               if (!tmp_hdr) {
+-                      IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-                                    IPSTATS_MIB_FRAGFAILS);
+                       err = -ENOMEM;
+                       goto fail;
+               }
+@@ -767,8 +765,6 @@ slow_path:
+               frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+                                hroom + troom, GFP_ATOMIC);
+               if (!frag) {
+-                      IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-                                    IPSTATS_MIB_FRAGFAILS);
+                       err = -ENOMEM;
+                       goto fail;
+               }
diff --git a/queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch b/queue-4.4/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
new file mode 100644 (file)
index 0000000..385fe14
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Date: Sat, 22 Jul 2017 17:14:34 +0200
+Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled
+
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+
+
+[ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ]
+
+DMA transfers are not allowed to buffers that are on the stack.
+Therefore allocate a buffer to store the result of usb_control_message().
+
+Fixes these bugreports:
+https://bugzilla.kernel.org/show_bug.cgi?id=195217
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1421387
+https://bugzilla.redhat.com/show_bug.cgi?id=1427398
+
+Shortened kernel backtrace from 4.11.9-200.fc25.x86_64:
+kernel: ------------[ cut here ]------------
+kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587
+kernel: transfer buffer not dma capable
+kernel: Call Trace:
+kernel: dump_stack+0x63/0x86
+kernel: __warn+0xcb/0xf0
+kernel: warn_slowpath_fmt+0x5a/0x80
+kernel: usb_hcd_map_urb_for_dma+0x37f/0x570
+kernel: ? try_to_del_timer_sync+0x53/0x80
+kernel: usb_hcd_submit_urb+0x34e/0xb90
+kernel: ? schedule_timeout+0x17e/0x300
+kernel: ? del_timer_sync+0x50/0x50
+kernel: ? __slab_free+0xa9/0x300
+kernel: usb_submit_urb+0x2f4/0x560
+kernel: ? urb_destroy+0x24/0x30
+kernel: usb_start_wait_urb+0x6e/0x170
+kernel: usb_control_msg+0xdc/0x120
+kernel: mcs_get_reg+0x36/0x40 [mcs7780]
+kernel: mcs_net_open+0xb5/0x5c0 [mcs7780]
+...
+
+Regression goes back to 4.9, so it's a good candidate for -stable.
+Though it's the decision of the maintainer.
+
+Thanks to Dan Williams for adding the "transfer buffer not dma capable"
+warning in the first place. It instantly pointed me in the right direction.
+
+Patch has been tested with transferring data from a Polar watch.
+
+Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/irda/mcs7780.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/irda/mcs7780.c
++++ b/drivers/net/irda/mcs7780.c
+@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mc
+ static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val)
+ {
+       struct usb_device *dev = mcs->usbdev;
+-      int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+-                                MCS_RD_RTYPE, 0, reg, val, 2,
+-                                msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++      void *dmabuf;
++      int ret;
++
++      dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL);
++      if (!dmabuf)
++              return -ENOMEM;
++
++      ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
++                            MCS_RD_RTYPE, 0, reg, dmabuf, 2,
++                            msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++
++      memcpy(val, dmabuf, sizeof(__u16));
++      kfree(dmabuf);
+       return ret;
+ }
diff --git a/queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch b/queue-4.4/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
new file mode 100644 (file)
index 0000000..420eda8
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Date: Tue, 25 Jul 2017 14:35:03 +0200
+Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically
+
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+
+
+[ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ]
+
+Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear
+whose responsibility it was to insert the required clock skew, and
+in hindsight, some PHY drivers got it wrong. The solution forward
+is to introduce a new property, explicitly requiring skew from the
+node to which it is attached. In the interim, this driver will handle
+all 4 RGMII modes identically (no skew).
+
+Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller")
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aurora/nb8800.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/aurora/nb8800.c
++++ b/drivers/net/ethernet/aurora/nb8800.c
+@@ -608,7 +608,7 @@ static void nb8800_mac_config(struct net
+               mac_mode |= HALF_DUPLEX;
+       if (gigabit) {
+-              if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
++              if (phy_interface_is_rgmii(dev->phydev))
+                       mac_mode |= RGMII_MODE;
+               mac_mode |= GMAC_MODE;
+@@ -1295,11 +1295,10 @@ static int nb8800_tangox_init(struct net
+               break;
+       case PHY_INTERFACE_MODE_RGMII:
+-              pad_mode = PAD_MODE_RGMII;
+-              break;
+-
++      case PHY_INTERFACE_MODE_RGMII_ID:
++      case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+-              pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
++              pad_mode = PAD_MODE_RGMII;
+               break;
+       default:
diff --git a/queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch b/queue-4.4/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
new file mode 100644 (file)
index 0000000..bf05a53
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Sun, 25 Jun 2017 18:45:32 +0300
+Subject: net/mlx5: Fix command bad flow on command entry allocation failure
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ]
+
+When driver fail to allocate an entry to send command to FW, it must
+notify the calling function and release the memory allocated for
+this command.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -630,6 +630,10 @@ static void dump_command(struct mlx5_cor
+               pr_debug("\n");
+ }
++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
++static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
++                            struct mlx5_cmd_msg *msg);
++
+ static void cmd_work_handler(struct work_struct *work)
+ {
+       struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+@@ -638,16 +642,27 @@ static void cmd_work_handler(struct work
+       struct mlx5_cmd_layout *lay;
+       struct semaphore *sem;
+       unsigned long flags;
++      int alloc_ret;
+       sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+       down(sem);
+       if (!ent->page_queue) {
+-              ent->idx = alloc_ent(cmd);
+-              if (ent->idx < 0) {
++              alloc_ret = alloc_ent(cmd);
++              if (alloc_ret < 0) {
++                      if (ent->callback) {
++                              ent->callback(-EAGAIN, ent->context);
++                              mlx5_free_cmd_msg(dev, ent->out);
++                              free_msg(dev, ent->in);
++                              free_cmd(ent);
++                      } else {
++                              ent->ret = -EAGAIN;
++                              complete(&ent->done);
++                      }
+                       mlx5_core_err(dev, "failed to allocate command entry\n");
+                       up(sem);
+                       return;
+               }
++              ent->idx = alloc_ret;
+       } else {
+               ent->idx = cmd->max_reg_cmds;
+               spin_lock_irqsave(&cmd->alloc_lock, flags);
diff --git a/queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.4/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
new file mode 100644 (file)
index 0000000..065c051
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 28 Jul 2017 11:58:36 -0700
+Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ]
+
+Marc reported that he was not getting the PHY library adjust_link()
+callback function to run when calling phy_stop() + phy_disconnect()
+which does not indeed happen because we set the state machine to
+PHY_HALTED but we don't get to run it to process this state past that
+point.
+
+Fix this with a synchronous call to phy_state_machine() in order to have
+the state machine actually act on PHY_HALTED, set the PHY device's link
+down, turn the network device's carrier off and finally call the
+adjust_link() function.
+
+Reported-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -541,6 +541,9 @@ void phy_stop_machine(struct phy_device
+       if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+               phydev->state = PHY_UP;
+       mutex_unlock(&phydev->lock);
++
++      /* Now we can run the state machine synchronously */
++      phy_state_machine(&phydev->state_queue.work);
+ }
+ /**
diff --git a/queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch b/queue-4.4/net-zero-terminate-ifr_name-in-dev_ifname.patch
new file mode 100644 (file)
index 0000000..a29bf4c
--- /dev/null
@@ -0,0 +1,28 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 19 Jul 2017 13:33:24 -0700
+Subject: net: Zero terminate ifr_name in dev_ifname().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 63679112c536289826fec61c917621de95ba2ade ]
+
+The ifr.ifr_name is passed around and assumed to be NULL terminated.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev_ioctl.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev_ioctl.c
++++ b/net/core/dev_ioctl.c
+@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, s
+       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+               return -EFAULT;
++      ifr.ifr_name[IFNAMSIZ-1] = 0;
+       error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
+       if (error)
diff --git a/queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch b/queue-4.4/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
new file mode 100644 (file)
index 0000000..f9ad01a
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Sun, 23 Jul 2017 17:52:23 +0800
+Subject: openvswitch: fix potential out of bound access in parse_ct
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ]
+
+Before the 'type' is validated, we shouldn't use it to fetch the
+ovs_ct_attr_lens's minlen and maxlen, else, out of bound access
+may happen.
+
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -577,8 +577,8 @@ static int parse_ct(const struct nlattr
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
+-              int maxlen = ovs_ct_attr_lens[type].maxlen;
+-              int minlen = ovs_ct_attr_lens[type].minlen;
++              int maxlen;
++              int minlen;
+               if (type > OVS_CT_ATTR_MAX) {
+                       OVS_NLERR(log,
+@@ -586,6 +586,9 @@ static int parse_ct(const struct nlattr
+                                 type, OVS_CT_ATTR_MAX);
+                       return -EINVAL;
+               }
++
++              maxlen = ovs_ct_attr_lens[type].maxlen;
++              minlen = ovs_ct_attr_lens[type].minlen;
+               if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+                       OVS_NLERR(log,
+                                 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
diff --git a/queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch b/queue-4.4/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
new file mode 100644 (file)
index 0000000..e6263d3
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 24 Jul 2017 10:07:32 -0700
+Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ]
+
+There are multiple reports showing we have a use-after-free in
+the timer prb_retire_rx_blk_timer_expired(), where we use struct
+tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by
+free_pg_vec().
+
+The interesting part is it is not freed via packet_release() but
+via packet_setsockopt(), which means we are not closing the socket.
+Looking into the big and fat function packet_set_ring(), this could
+happen if we satisfy the following conditions:
+
+1. closing == 0, not on packet_release() path
+2. req->tp_block_nr == 0, we don't allocate a new pg_vec
+3. rx_ring->pg_vec is already set as V3, which means we already called
+   packet_set_ring() wtih req->tp_block_nr > 0 previously
+4. req->tp_frame_nr == 0, pass sanity check
+5. po->mapped == 0, never called mmap()
+
+In this scenario we are clearing the old rx_ring->pg_vec, so we need
+to free this pg_vec, but we don't stop the timer on this path because
+of closing==0.
+
+The timer has to be stopped as long as we need to free pg_vec, therefore
+the check on closing!=0 is wrong, we should check pg_vec!=NULL instead.
+
+Thanks to liujian for testing different fixes.
+
+Reported-by: alexander.levin@verizon.com
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Reported-by: liujian (CE) <liujian56@huawei.com>
+Tested-by: liujian (CE) <liujian56@huawei.com>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -4225,7 +4225,7 @@ static int packet_set_ring(struct sock *
+               register_prot_hook(sk);
+       }
+       spin_unlock(&po->bind_lock);
+-      if (closing && (po->tp_version > TPACKET_V2)) {
++      if (pg_vec && (po->tp_version > TPACKET_V2)) {
+               /* Because we don't support block-based V3 on tx-ring */
+               if (!tx_ring)
+                       prb_shutdown_retire_blk_timer(po, rb_queue);
diff --git a/queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch b/queue-4.4/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
new file mode 100644 (file)
index 0000000..a3411ec
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 20 Jul 2017 11:27:57 -0700
+Subject: rtnetlink: allocate more memory for dev_set_mac_address()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ]
+
+virtnet_set_mac_address() interprets mac address as struct
+sockaddr, but upper layer only allocates dev->addr_len
+which is ETH_ALEN + sizeof(sa_family_t) in this case.
+
+We lack a unified definition for mac address, so just fix
+the upper layer, this also allows drivers to interpret it
+to struct sockaddr freely.
+
+Reported-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1742,7 +1742,8 @@ static int do_setlink(const struct sk_bu
+               struct sockaddr *sa;
+               int len;
+-              len = sizeof(sa_family_t) + dev->addr_len;
++              len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
++                                                sizeof(*sa));
+               sa = kmalloc(len, GFP_KERNEL);
+               if (!sa) {
+                       err = -ENOMEM;
diff --git a/queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch b/queue-4.4/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
new file mode 100644 (file)
index 0000000..226f917
--- /dev/null
@@ -0,0 +1,140 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Fri, 14 Jul 2017 18:32:45 +0200
+Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ]
+
+If the length field of the iterator (|pos.p| or |err|) is past the end
+of the chunk, we shouldn't access it.
+
+This bug has been detected by KMSAN. For the following pair of system
+calls:
+
+  socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3
+  sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0),
+         inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0,
+         sin6_scope_id=0}, 28) = 1
+
+the tool has reported a use of uninitialized memory:
+
+  ==================================================================
+  BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0
+  CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+  01/01/2011
+  Call Trace:
+   <IRQ>
+   __dump_stack lib/dump_stack.c:16
+   dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+   kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+   __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+   __sctp_rcv_init_lookup net/sctp/input.c:1074
+   __sctp_rcv_lookup_harder net/sctp/input.c:1233
+   __sctp_rcv_lookup net/sctp/input.c:1255
+   sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170
+   sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984
+   ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+   NF_HOOK ./include/linux/netfilter.h:257
+   ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+   dst_input ./include/net/dst.h:492
+   ip6_rcv_finish net/ipv6/ip6_input.c:69
+   NF_HOOK ./include/linux/netfilter.h:257
+   ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+   __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+   __netif_receive_skb net/core/dev.c:4246
+   process_backlog+0x667/0xba0 net/core/dev.c:4866
+   napi_poll net/core/dev.c:5268
+   net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+   __do_softirq+0x485/0x942 kernel/softirq.c:284
+   do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
+   </IRQ>
+   do_softirq kernel/softirq.c:328
+   __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181
+   local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31
+   rcu_read_unlock_bh ./include/linux/rcupdate.h:931
+   ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124
+   ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149
+   NF_HOOK_COND ./include/linux/netfilter.h:246
+   ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163
+   dst_output ./include/net/dst.h:486
+   NF_HOOK ./include/linux/netfilter.h:257
+   ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261
+   sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225
+   sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632
+   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+   sctp_side_effects net/sctp/sm_sideeffect.c:1773
+   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+   sock_sendmsg_nosec net/socket.c:633
+   sock_sendmsg net/socket.c:643
+   SYSC_sendto+0x608/0x710 net/socket.c:1696
+   SyS_sendto+0x8a/0xb0 net/socket.c:1664
+   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+   entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
+  RIP: 0033:0x401133
+  RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+  RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133
+  RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003
+  RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c
+  R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000
+  R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000
+  origin:
+   save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+   kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+   kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211
+   slab_alloc_node mm/slub.c:2743
+   __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351
+   __kmalloc_reserve net/core/skbuff.c:138
+   __alloc_skb+0x26b/0x840 net/core/skbuff.c:231
+   alloc_skb ./include/linux/skbuff.h:933
+   sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570
+   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+   sctp_side_effects net/sctp/sm_sideeffect.c:1773
+   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+   sock_sendmsg_nosec net/socket.c:633
+   sock_sendmsg net/socket.c:643
+   SYSC_sendto+0x608/0x710 net/socket.c:1696
+   SyS_sendto+0x8a/0xb0 net/socket.c:1664
+   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+   return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
+  ==================================================================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -444,6 +444,8 @@ _sctp_walk_params((pos), (chunk), ntohs(
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
++     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++      (void *)chunk + end) &&\
+      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+      pos.v += WORD_ROUND(ntohs(pos.p->length)))
+@@ -454,6 +456,8 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+           sizeof(sctp_chunkhdr_t));\
++     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++      (void *)chunk_hdr + end) &&\
+      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
+      err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
diff --git a/queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch b/queue-4.4/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
new file mode 100644 (file)
index 0000000..64381c4
--- /dev/null
@@ -0,0 +1,59 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 16:24:59 +0800
+Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ]
+
+Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
+_sctp_walk_{params, errors}()") tried to fix the issue that it
+may overstep the chunk end for _sctp_walk_{params, errors} with
+'chunk_end > offset(length) + sizeof(length)'.
+
+But it introduced a side effect: When processing INIT, it verifies
+the chunks with 'param.v == chunk_end' after iterating all params
+by sctp_walk_params(). With the check 'chunk_end > offset(length)
++ sizeof(length)', it would return when the last param is not yet
+accessed. Because the last param usually is fwdtsn supported param
+whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'
+
+This is a badly issue even causing sctp couldn't process 4-shakes.
+Client would always get abort when connecting to server, due to
+the failure of INIT chunk verification on server.
+
+The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
+instead of 'chunk_end < offset(length) + sizeof(length)' for both
+_sctp_walk_params and _sctp_walk_errors.
+
+Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -444,7 +444,7 @@ _sctp_walk_params((pos), (chunk), ntohs(
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
+-     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
+       (void *)chunk + end) &&\
+      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+@@ -456,7 +456,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+           sizeof(sctp_chunkhdr_t));\
+-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+       (void *)chunk_hdr + end) &&\
+      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
index e018215a7b1e494ac027b5cc813ffc364fafd266..2a5b46bec2dd8049ce0c8bac9ed537bbb18e3f92 100644 (file)
@@ -20,3 +20,23 @@ media-lirc-lirc_get_rec_resolution-should-return-microseconds.patch
 f2fs-sanity-check-checkpoint-segno-and-blkoff.patch
 drm-rcar-du-fix-backport-bug.patch
 saa7164-fix-double-fetch-pcie-access-condition.patch
+ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
+net-zero-terminate-ifr_name-in-dev_ifname.patch
+ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
+ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
+rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
+mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
+openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
+packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
+ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
+net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
+dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
+sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
+sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
+net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
+net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
+xen-netback-correctly-schedule-rate-limited-queues.patch
+sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
+sparc64-prevent-perf-from-running-during-super-critical-sections.patch
diff --git a/queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch b/queue-4.4/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
new file mode 100644 (file)
index 0000000..9246691
--- /dev/null
@@ -0,0 +1,335 @@
+From foo@baz Tue Aug  8 16:56:08 PDT 2017
+From: Jane Chu <jane.chu@oracle.com>
+Date: Tue, 11 Jul 2017 12:00:54 -0600
+Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout
+
+From: Jane Chu <jane.chu@oracle.com>
+
+
+[ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ]
+
+A large sun4v SPARC system may have moments of intensive xcall activities,
+usually caused by unmapping many pages on many CPUs concurrently. This can
+flood receivers with CPU mondo interrupts for an extended period, causing
+some unlucky senders to hit send-mondo timeout. This problem gets worse
+as cpu count increases because sometimes mappings must be invalidated on
+all CPUs, and sometimes all CPUs may gang up on a single CPU.
+
+But a busy system is not a broken system. In the above scenario, as long
+as the receiver is making forward progress processing mondo interrupts,
+the sender should continue to retry.
+
+This patch implements the receiver's forward progress meter by introducing
+a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range
+of 0..NR_CPUS. The receiver increments its counter as soon as it receives
+a mondo and the sender tracks the receiver's counter. If the receiver has
+stopped making forward progress when the retry limit is reached, the sender
+declares send-mondo-timeout and panic; otherwise, the receiver is allowed
+to keep making forward progress.
+
+In addition, it's been observed that PCIe hotplug events generate Correctable
+Errors that are handled by hypervisor and then OS. Hypervisor 'borrows'
+a guest cpu strand briefly to provide the service. If the cpu strand is
+simultaneously the only cpu targeted by a mondo, it may not be available
+for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second
+is the agreed wait time between hypervisor and guest OS, this patch makes
+the adjustment.
+
+Orabug: 25476541
+Orabug: 26417466
+
+Signed-off-by: Jane Chu <jane.chu@oracle.com>
+Reviewed-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Anthony Yznaga <anthony.yznaga@oracle.com>
+Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
+Reviewed-by: Thomas Tai <thomas.tai@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/trap_block.h |    1 
+ arch/sparc/kernel/smp_64.c          |  189 ++++++++++++++++++++++--------------
+ arch/sparc/kernel/sun4v_ivec.S      |   15 ++
+ arch/sparc/kernel/traps_64.c        |    1 
+ 4 files changed, 134 insertions(+), 72 deletions(-)
+
+--- a/arch/sparc/include/asm/trap_block.h
++++ b/arch/sparc/include/asm/trap_block.h
+@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR
+ void init_cur_cpu_trap(struct thread_info *);
+ void setup_tba(void);
+ extern int ncpus_probed;
++extern u64 cpu_mondo_counter[NR_CPUS];
+ unsigned long real_hard_smp_processor_id(void);
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -617,22 +617,48 @@ retry:
+       }
+ }
+-/* Multi-cpu list version.  */
++#define       CPU_MONDO_COUNTER(cpuid)        (cpu_mondo_counter[cpuid])
++#define       MONDO_USEC_WAIT_MIN             2
++#define       MONDO_USEC_WAIT_MAX             100
++#define       MONDO_RETRY_LIMIT               500000
++
++/* Multi-cpu list version.
++ *
++ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
++ * Sometimes not all cpus receive the mondo, requiring us to re-send
++ * the mondo until all cpus have received, or cpus are truly stuck
++ * unable to receive mondo, and we timeout.
++ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
++ * perform guest service, such as PCIe error handling. Consider the
++ * service time, 1 second overall wait is reasonable for 1 cpu.
++ * Here two in-between mondo check wait time are defined: 2 usec for
++ * single cpu quick turn around and up to 100usec for large cpu count.
++ * Deliver mondo to large number of cpus could take longer, we adjusts
++ * the retry count as long as target cpus are making forward progress.
++ */
+ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+ {
+-      int retries, this_cpu, prev_sent, i, saw_cpu_error;
++      int this_cpu, tot_cpus, prev_sent, i, rem;
++      int usec_wait, retries, tot_retries;
++      u16 first_cpu = 0xffff;
++      unsigned long xc_rcvd = 0;
+       unsigned long status;
++      int ecpuerror_id = 0;
++      int enocpu_id = 0;
+       u16 *cpu_list;
++      u16 cpu;
+       this_cpu = smp_processor_id();
+-
+       cpu_list = __va(tb->cpu_list_pa);
+-
+-      saw_cpu_error = 0;
+-      retries = 0;
++      usec_wait = cnt * MONDO_USEC_WAIT_MIN;
++      if (usec_wait > MONDO_USEC_WAIT_MAX)
++              usec_wait = MONDO_USEC_WAIT_MAX;
++      retries = tot_retries = 0;
++      tot_cpus = cnt;
+       prev_sent = 0;
++
+       do {
+-              int forward_progress, n_sent;
++              int n_sent, mondo_delivered, target_cpu_busy;
+               status = sun4v_cpu_mondo_send(cnt,
+                                             tb->cpu_list_pa,
+@@ -640,94 +666,113 @@ static void hypervisor_xcall_deliver(str
+               /* HV_EOK means all cpus received the xcall, we're done.  */
+               if (likely(status == HV_EOK))
+-                      break;
++                      goto xcall_done;
++
++              /* If not these non-fatal errors, panic */
++              if (unlikely((status != HV_EWOULDBLOCK) &&
++                      (status != HV_ECPUERROR) &&
++                      (status != HV_ENOCPU)))
++                      goto fatal_errors;
+               /* First, see if we made any forward progress.
+                *
++               * Go through the cpu_list, count the target cpus that have
++               * received our mondo (n_sent), and those that did not (rem).
++               * Re-pack cpu_list with the cpus remain to be retried in the
++               * front - this simplifies tracking the truly stalled cpus.
++               *
+                * The hypervisor indicates successful sends by setting
+                * cpu list entries to the value 0xffff.
++               *
++               * EWOULDBLOCK means some target cpus did not receive the
++               * mondo and retry usually helps.
++               *
++               * ECPUERROR means at least one target cpu is in error state,
++               * it's usually safe to skip the faulty cpu and retry.
++               *
++               * ENOCPU means one of the target cpu doesn't belong to the
++               * domain, perhaps offlined which is unexpected, but not
++               * fatal and it's okay to skip the offlined cpu.
+                */
++              rem = 0;
+               n_sent = 0;
+               for (i = 0; i < cnt; i++) {
+-                      if (likely(cpu_list[i] == 0xffff))
++                      cpu = cpu_list[i];
++                      if (likely(cpu == 0xffff)) {
+                               n_sent++;
++                      } else if ((status == HV_ECPUERROR) &&
++                              (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
++                              ecpuerror_id = cpu + 1;
++                      } else if (status == HV_ENOCPU && !cpu_online(cpu)) {
++                              enocpu_id = cpu + 1;
++                      } else {
++                              cpu_list[rem++] = cpu;
++                      }
+               }
+-              forward_progress = 0;
+-              if (n_sent > prev_sent)
+-                      forward_progress = 1;
++              /* No cpu remained, we're done. */
++              if (rem == 0)
++                      break;
+-              prev_sent = n_sent;
++              /* Otherwise, update the cpu count for retry. */
++              cnt = rem;
+-              /* If we get a HV_ECPUERROR, then one or more of the cpus
+-               * in the list are in error state.  Use the cpu_state()
+-               * hypervisor call to find out which cpus are in error state.
++              /* Record the overall number of mondos received by the
++               * first of the remaining cpus.
+                */
+-              if (unlikely(status == HV_ECPUERROR)) {
+-                      for (i = 0; i < cnt; i++) {
+-                              long err;
+-                              u16 cpu;
+-
+-                              cpu = cpu_list[i];
+-                              if (cpu == 0xffff)
+-                                      continue;
+-
+-                              err = sun4v_cpu_state(cpu);
+-                              if (err == HV_CPU_STATE_ERROR) {
+-                                      saw_cpu_error = (cpu + 1);
+-                                      cpu_list[i] = 0xffff;
+-                              }
+-                      }
+-              } else if (unlikely(status != HV_EWOULDBLOCK))
+-                      goto fatal_mondo_error;
++              if (first_cpu != cpu_list[0]) {
++                      first_cpu = cpu_list[0];
++                      xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++              }
+-              /* Don't bother rewriting the CPU list, just leave the
+-               * 0xffff and non-0xffff entries in there and the
+-               * hypervisor will do the right thing.
+-               *
+-               * Only advance timeout state if we didn't make any
+-               * forward progress.
++              /* Was any mondo delivered successfully? */
++              mondo_delivered = (n_sent > prev_sent);
++              prev_sent = n_sent;
++
++              /* or, was any target cpu busy processing other mondos? */
++              target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
++              xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++
++              /* Retry count is for no progress. If we're making progress,
++               * reset the retry count.
+                */
+-              if (unlikely(!forward_progress)) {
+-                      if (unlikely(++retries > 10000))
+-                              goto fatal_mondo_timeout;
+-
+-                      /* Delay a little bit to let other cpus catch up
+-                       * on their cpu mondo queue work.
+-                       */
+-                      udelay(2 * cnt);
++              if (likely(mondo_delivered || target_cpu_busy)) {
++                      tot_retries += retries;
++                      retries = 0;
++              } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
++                      goto fatal_mondo_timeout;
+               }
+-      } while (1);
+-      if (unlikely(saw_cpu_error))
+-              goto fatal_mondo_cpu_error;
++              /* Delay a little bit to let other cpus catch up on
++               * their cpu mondo queue work.
++               */
++              if (!mondo_delivered)
++                      udelay(usec_wait);
+-      return;
++              retries++;
++      } while (1);
+-fatal_mondo_cpu_error:
+-      printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+-             "(including %d) were in error state\n",
+-             this_cpu, saw_cpu_error - 1);
++xcall_done:
++      if (unlikely(ecpuerror_id > 0)) {
++              pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
++                     this_cpu, ecpuerror_id - 1);
++      } else if (unlikely(enocpu_id > 0)) {
++              pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
++                     this_cpu, enocpu_id - 1);
++      }
+       return;
++fatal_errors:
++      /* fatal errors include bad alignment, etc */
++      pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
++             this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
++      panic("Unexpected SUN4V mondo error %lu\n", status);
++
+ fatal_mondo_timeout:
+-      printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+-             " progress after %d retries.\n",
+-             this_cpu, retries);
+-      goto dump_cpu_list_and_out;
+-
+-fatal_mondo_error:
+-      printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
+-             this_cpu, status);
+-      printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
+-             "mondo_block_pa(%lx)\n",
+-             this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+-
+-dump_cpu_list_and_out:
+-      printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
+-      for (i = 0; i < cnt; i++)
+-              printk("%u ", cpu_list[i]);
+-      printk("]\n");
++      /* some cpus being non-responsive to the cpu mondo */
++      pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
++             this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
++      panic("SUN4V mondo timeout panic\n");
+ }
+ static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
+--- a/arch/sparc/kernel/sun4v_ivec.S
++++ b/arch/sparc/kernel/sun4v_ivec.S
+@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
+       ldxa    [%g0] ASI_SCRATCHPAD, %g4
+       sub     %g4, TRAP_PER_CPU_FAULT_INFO, %g4
++      /* Get smp_processor_id() into %g3 */
++      sethi   %hi(trap_block), %g5
++      or      %g5, %lo(trap_block), %g5
++      sub     %g4, %g5, %g3
++      srlx    %g3, TRAP_BLOCK_SZ_SHIFT, %g3
++
++      /* Increment cpu_mondo_counter[smp_processor_id()] */
++      sethi   %hi(cpu_mondo_counter), %g5
++      or      %g5, %lo(cpu_mondo_counter), %g5
++      sllx    %g3, 3, %g3
++      add     %g5, %g3, %g5
++      ldx     [%g5], %g3
++      add     %g3, 1, %g3
++      stx     %g3, [%g5]
++
+       /* Get CPU mondo queue base phys address into %g7.  */
+       ldx     [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -2659,6 +2659,7 @@ void do_getpsr(struct pt_regs *regs)
+       }
+ }
++u64 cpu_mondo_counter[NR_CPUS] = {0};
+ struct trap_per_cpu trap_block[NR_CPUS];
+ EXPORT_SYMBOL(trap_block);
diff --git a/queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch b/queue-4.4/sparc64-prevent-perf-from-running-during-super-critical-sections.patch
new file mode 100644 (file)
index 0000000..a6bb818
--- /dev/null
@@ -0,0 +1,133 @@
+From foo@baz Tue Aug  8 16:56:08 PDT 2017
+From: Rob Gardner <rob.gardner@oracle.com>
+Date: Mon, 17 Jul 2017 09:22:27 -0600
+Subject: sparc64: Prevent perf from running during super critical sections
+
+From: Rob Gardner <rob.gardner@oracle.com>
+
+
+[ Upstream commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a ]
+
+This fixes another cause of random segfaults and bus errors that may
+occur while running perf with the callgraph option.
+
+Critical sections beginning with spin_lock_irqsave() raise the interrupt
+level to PIL_NORMAL_MAX (14) and intentionally do not block performance
+counter interrupts, which arrive at PIL_NMI (15).
+
+But some sections of code are "super critical" with respect to perf
+because the perf_callchain_user() path accesses user space and may cause
+TLB activity as well as faults as it unwinds the user stack.
+
+One particular critical section occurs in switch_mm:
+
+        spin_lock_irqsave(&mm->context.lock, flags);
+        ...
+        load_secondary_context(mm);
+        tsb_context_switch(mm);
+        ...
+        spin_unlock_irqrestore(&mm->context.lock, flags);
+
+If a perf interrupt arrives in between load_secondary_context() and
+tsb_context_switch(), then perf_callchain_user() could execute with
+the context ID of one process, but with an active TSB for a different
+process. When the user stack is accessed, it is very likely to
+incur a TLB miss, since the h/w context ID has been changed. The TLB
+will then be reloaded with a translation from the TSB for one process,
+but using a context ID for another process. This exposes memory from
+one process to another, and since it is a mapping for stack memory,
+this usually causes the new process to crash quickly.
+
+This super critical section needs more protection than is provided
+by spin_lock_irqsave() since perf interrupts must not be allowed in.
+
+Since __tsb_context_switch already goes through the trouble of
+disabling interrupts completely, we fix this by moving the secondary
+context load down into this better protected region.
+
+Orabug: 25577560
+
+Signed-off-by: Dave Aldridge <david.j.aldridge@oracle.com>
+Signed-off-by: Rob Gardner <rob.gardner@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/mmu_context_64.h |   12 +++++++-----
+ arch/sparc/kernel/tsb.S                 |   12 ++++++++++++
+ arch/sparc/power/hibernate.c            |    3 +--
+ 3 files changed, 20 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/mmu_context_64.h
++++ b/arch/sparc/include/asm/mmu_context_64.h
+@@ -25,9 +25,11 @@ void destroy_context(struct mm_struct *m
+ void __tsb_context_switch(unsigned long pgd_pa,
+                         struct tsb_config *tsb_base,
+                         struct tsb_config *tsb_huge,
+-                        unsigned long tsb_descr_pa);
++                        unsigned long tsb_descr_pa,
++                        unsigned long secondary_ctx);
+-static inline void tsb_context_switch(struct mm_struct *mm)
++static inline void tsb_context_switch_ctx(struct mm_struct *mm,
++                                        unsigned long ctx)
+ {
+       __tsb_context_switch(__pa(mm->pgd),
+                            &mm->context.tsb_block[0],
+@@ -38,7 +40,8 @@ static inline void tsb_context_switch(st
+ #else
+                            NULL
+ #endif
+-                           , __pa(&mm->context.tsb_descr[0]));
++                           , __pa(&mm->context.tsb_descr[0]),
++                           ctx);
+ }
+ void tsb_grow(struct mm_struct *mm,
+@@ -110,8 +113,7 @@ static inline void switch_mm(struct mm_s
+        * cpu0 to update it's TSB because at that point the cpu_vm_mask
+        * only had cpu1 set in it.
+        */
+-      load_secondary_context(mm);
+-      tsb_context_switch(mm);
++      tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+       /* Any time a processor runs a context on an address space
+        * for the first time, we must flush that context out of the
+--- a/arch/sparc/kernel/tsb.S
++++ b/arch/sparc/kernel/tsb.S
+@@ -375,6 +375,7 @@ tsb_flush:
+        * %o1: TSB base config pointer
+        * %o2: TSB huge config pointer, or NULL if none
+        * %o3: Hypervisor TSB descriptor physical address
++       * %o4: Secondary context to load, if non-zero
+        *
+        * We have to run this whole thing with interrupts
+        * disabled so that the current cpu doesn't change
+@@ -387,6 +388,17 @@ __tsb_context_switch:
+       rdpr    %pstate, %g1
+       wrpr    %g1, PSTATE_IE, %pstate
++      brz,pn  %o4, 1f
++       mov    SECONDARY_CONTEXT, %o5
++
++661:  stxa    %o4, [%o5] ASI_DMMU
++      .section .sun4v_1insn_patch, "ax"
++      .word   661b
++      stxa    %o4, [%o5] ASI_MMU
++      .previous
++      flush   %g6
++
++1:
+       TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+       stx     %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
+--- a/arch/sparc/power/hibernate.c
++++ b/arch/sparc/power/hibernate.c
+@@ -35,6 +35,5 @@ void restore_processor_state(void)
+ {
+       struct mm_struct *mm = current->active_mm;
+-      load_secondary_context(mm);
+-      tsb_context_switch(mm);
++      tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+ }
diff --git a/queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch b/queue-4.4/xen-netback-correctly-schedule-rate-limited-queues.patch
new file mode 100644 (file)
index 0000000..e738bdc
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Tue Aug  8 16:51:58 PDT 2017
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Wed, 21 Jun 2017 10:21:22 +0100
+Subject: xen-netback: correctly schedule rate-limited queues
+
+From: Wei Liu <wei.liu2@citrix.com>
+
+
+[ Upstream commit dfa523ae9f2542bee4cddaea37b3be3e157f6e6b ]
+
+Add a flag to indicate if a queue is rate-limited. Test the flag in
+NAPI poll handler and avoid rescheduling the queue if true, otherwise
+we risk locking up the host. The rescheduling will be done in the
+timer callback function.
+
+Reported-by: Jean-Louis Dupond <jean-louis@dupond.be>
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Tested-by: Jean-Louis Dupond <jean-louis@dupond.be>
+Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/common.h    |    1 +
+ drivers/net/xen-netback/interface.c |    6 +++++-
+ drivers/net/xen-netback/netback.c   |    6 +++++-
+ 3 files changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -201,6 +201,7 @@ struct xenvif_queue { /* Per-queue data
+       unsigned long   remaining_credit;
+       struct timer_list credit_timeout;
+       u64 credit_window_start;
++      bool rate_limited;
+       /* Statistics */
+       struct xenvif_stats stats;
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -105,7 +105,11 @@ static int xenvif_poll(struct napi_struc
+       if (work_done < budget) {
+               napi_complete(napi);
+-              xenvif_napi_schedule_or_enable_events(queue);
++              /* If the queue is rate-limited, it shall be
++               * rescheduled in the timer callback.
++               */
++              if (likely(!queue->rate_limited))
++                      xenvif_napi_schedule_or_enable_events(queue);
+       }
+       return work_done;
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -687,6 +687,7 @@ static void tx_add_credit(struct xenvif_
+               max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+       queue->remaining_credit = min(max_credit, max_burst);
++      queue->rate_limited = false;
+ }
+ void xenvif_tx_credit_callback(unsigned long data)
+@@ -1184,8 +1185,10 @@ static bool tx_credit_exceeded(struct xe
+               msecs_to_jiffies(queue->credit_usec / 1000);
+       /* Timer could already be pending in rare cases. */
+-      if (timer_pending(&queue->credit_timeout))
++      if (timer_pending(&queue->credit_timeout)) {
++              queue->rate_limited = true;
+               return true;
++      }
+       /* Passed the point where we can replenish credit? */
+       if (time_after_eq64(now, next_credit)) {
+@@ -1200,6 +1203,7 @@ static bool tx_credit_exceeded(struct xe
+               mod_timer(&queue->credit_timeout,
+                         next_credit);
+               queue->credit_window_start = next_credit;
++              queue->rate_limited = true;
+               return true;
+       }