--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 25 Jul 2017 09:44:25 -0700
+Subject: bonding: commit link status change after propose
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit d94708a553022bf012fa95af10532a134eeb5a52 ]
+
+Commit de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
+moves link status commitment into bond_mii_monitor(), but it still relies
+on the return value of bond_miimon_inspect() as the hint. We need to return
+non-zero as long as we propose a link status change.
+
+Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
+Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -2047,6 +2047,7 @@ static int bond_miimon_inspect(struct bo
+ continue;
+
+ bond_propose_link_state(slave, BOND_LINK_FAIL);
++ commit++;
+ slave->delay = bond->params.downdelay;
+ if (slave->delay) {
+ netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
+@@ -2085,6 +2086,7 @@ static int bond_miimon_inspect(struct bo
+ continue;
+
+ bond_propose_link_state(slave, BOND_LINK_BACK);
++ commit++;
+ slave->delay = bond->params.updelay;
+
+ if (slave->delay) {
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:20:15 +0800
+Subject: dccp: fix a memleak for dccp_feat_init err process
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ]
+
+In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc
+memory for rx.val, it should free tx.val before returning an
+error.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/feat.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/dccp/feat.c
++++ b/net/dccp/feat.c
+@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
+ * singleton values (which always leads to failure).
+ * These settings can still (later) be overridden via sockopts.
+ */
+- if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
+- ccid_get_builtin_ccids(&rx.val, &rx.len))
++ if (ccid_get_builtin_ccids(&tx.val, &tx.len))
+ return -ENOBUFS;
++ if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
++ kfree(tx.val);
++ return -ENOBUFS;
++ }
+
+ if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
+ !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:46 +0800
+Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ]
+
+The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk
+properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue
+exists on dccp_ipv4.
+
+This patch is to fix it for dccp_ipv4.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk
+ goto drop_and_free;
+
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++ reqsk_put(req);
+ return 0;
+
+ drop_and_free:
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:09 +0800
+Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ]
+
+In dccp_v6_conn_request, after reqsk gets alloced and hashed into
+ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer,
+one is for hlist, and the other one is for current using.
+
+The problem is when dccp_v6_conn_request returns and finishes using
+reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and
+reqsk obj never gets freed.
+
+Jianlin found this issue when running dccp_memleak.c in a loop, the
+system memory would run out.
+
+dccp_memleak.c:
+ int s1 = socket(PF_INET6, 6, IPPROTO_IP);
+ bind(s1, &sa1, 0x20);
+ listen(s1, 0x9);
+ int s2 = socket(PF_INET6, 6, IPPROTO_IP);
+ connect(s2, &sa1, 0x20);
+ close(s1);
+ close(s2);
+
+This patch is to put the reqsk before dccp_v6_conn_request returns,
+just as what tcp_conn_request does.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct s
+ goto drop_and_free;
+
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++ reqsk_put(req);
+ return 0;
+
+ drop_and_free:
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Fri, 28 Jul 2017 23:27:44 +0300
+Subject: ipv4: fib: Fix NULL pointer deref during fib_sync_down_dev()
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+
+[ Upstream commit 71ed7ee35ad2c5300f4b51634185a0193b4fb0fa ]
+
+Michał reported a NULL pointer deref during fib_sync_down_dev() when
+unregistering a netdevice. The problem is that we don't check for
+'in_dev' being NULL, which can happen in very specific cases.
+
+Usually routes are flushed upon NETDEV_DOWN sent in either the netdev or
+the inetaddr notification chains. However, if an interface isn't
+configured with any IP address, then it's possible for host routes to be
+flushed following NETDEV_UNREGISTER, after NULLing dev->ip_ptr in
+inetdev_destroy().
+
+To reproduce:
+$ ip link add type dummy
+$ ip route add local 1.1.1.0/24 dev dummy0
+$ ip link del dev dummy0
+
+Fix this by checking for the presence of 'in_dev' before referencing it.
+
+Fixes: 982acb97560c ("ipv4: fib: Notify about nexthop status changes")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Tested-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1372,7 +1372,7 @@ static int call_fib_nh_notifiers(struct
+ return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
+ &info.info);
+ case FIB_EVENT_NH_DEL:
+- if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
++ if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
+ (fib_nh->nh_flags & RTNH_F_DEAD))
+ return call_fib_notifiers(dev_net(fib_nh->nh_dev),
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Wed, 19 Jul 2017 15:41:33 -0700
+Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call.
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+
+[ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ]
+
+Net stack initialization currently initializes fib-trie after the
+first call to netdevice_notifier() call. In fact fib_trie initialization
+needs to happen before first rtnl_register(). It does not cause any problem
+since there are no devices UP at this moment, but trying to bring 'lo'
+UP at initialization would make this assumption wrong and exposes the issue.
+
+Fixes following crash
+
+ Call Trace:
+ ? alternate_node_alloc+0x76/0xa0
+ fib_table_insert+0x1b7/0x4b0
+ fib_magic.isra.17+0xea/0x120
+ fib_add_ifaddr+0x7b/0x190
+ fib_netdev_event+0xc0/0x130
+ register_netdevice_notifier+0x1c1/0x1d0
+ ip_fib_init+0x72/0x85
+ ip_rt_init+0x187/0x1e9
+ ip_init+0xe/0x1a
+ inet_init+0x171/0x26c
+ ? ipv4_offload_init+0x66/0x66
+ do_one_initcall+0x43/0x160
+ kernel_init_freeable+0x191/0x219
+ ? rest_init+0x80/0x80
+ kernel_init+0xe/0x150
+ ret_from_fork+0x22/0x30
+ Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
+ RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28
+ CR2: 0000000000000014
+
+Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.")
+Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")
+
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1327,13 +1327,14 @@ static struct pernet_operations fib_net_
+
+ void __init ip_fib_init(void)
+ {
+- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
++ fib_trie_init();
+
+ register_pernet_subsys(&fib_net_ops);
++
+ register_netdevice_notifier(&fib_netdev_notifier);
+ register_inetaddr_notifier(&fib_inetaddr_notifier);
+
+- fib_trie_init();
++ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
++ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
++ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Mon, 17 Jul 2017 12:35:58 +0200
+Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ]
+
+KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(),
+which originated from the TCP request socket created in
+cookie_v6_check():
+
+ ==================================================================
+ BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0
+ CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies. Check SNMP counters.
+ Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16
+ dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+ kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+ __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+ skb_set_hash_from_sk ./include/net/sock.h:2011
+ tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983
+ tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493
+ tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284
+ tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309
+ call_timer_fn+0x240/0x520 kernel/time/timer.c:1268
+ expire_timers kernel/time/timer.c:1307
+ __run_timers+0xc13/0xf10 kernel/time/timer.c:1601
+ run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614
+ __do_softirq+0x485/0x942 kernel/softirq.c:284
+ invoke_softirq kernel/softirq.c:364
+ irq_exit+0x1fa/0x230 kernel/softirq.c:405
+ exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657
+ smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966
+ apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489
+ RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36
+ RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77
+ RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440
+ RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10
+ RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005
+ RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770
+ RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004
+ R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810
+ R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4
+ </IRQ>
+ poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293
+ SYSC_select+0x4b4/0x4e0 fs/select.c:653
+ SyS_select+0x76/0xa0 fs/select.c:634
+ entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204
+ RIP: 0033:0x4597e7
+ RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017
+ RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7
+ RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+ RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059
+ R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20
+ R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003
+ chained origin:
+ save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+ kmsan_save_stack mm/kmsan/kmsan.c:317
+ kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547
+ __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259
+ tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472
+ tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103
+ tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212
+ cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245
+ tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+ tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+ tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+ ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+ NF_HOOK ./include/linux/netfilter.h:257
+ ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+ dst_input ./include/net/dst.h:492
+ ip6_rcv_finish net/ipv6/ip6_input.c:69
+ NF_HOOK ./include/linux/netfilter.h:257
+ ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+ __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+ __netif_receive_skb net/core/dev.c:4246
+ process_backlog+0x667/0xba0 net/core/dev.c:4866
+ napi_poll net/core/dev.c:5268
+ net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+ __do_softirq+0x485/0x942 kernel/softirq.c:284
+ origin:
+ save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+ kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+ kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337
+ kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766
+ reqsk_alloc ./include/net/request_sock.h:87
+ inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200
+ cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169
+ tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+ tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+ tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+ ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+ NF_HOOK ./include/linux/netfilter.h:257
+ ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+ dst_input ./include/net/dst.h:492
+ ip6_rcv_finish net/ipv6/ip6_input.c:69
+ NF_HOOK ./include/linux/netfilter.h:257
+ ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+ __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+ __netif_receive_skb net/core/dev.c:4246
+ process_backlog+0x667/0xba0 net/core/dev.c:4866
+ napi_poll net/core/dev.c:5268
+ net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+ __do_softirq+0x485/0x942 kernel/softirq.c:284
+ ==================================================================
+
+Similar error is reported for cookie_v4_check().
+
+Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/syncookies.c | 1 +
+ net/ipv6/syncookies.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -332,6 +332,7 @@ struct sock *cookie_v4_check(struct sock
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = cookie;
+ treq->ts_off = 0;
++ treq->txhash = net_tx_rndhash();
+ req->mss = mss;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_rmt_port = th->source;
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -215,6 +215,7 @@ struct sock *cookie_v6_check(struct sock
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = cookie;
+ treq->ts_off = 0;
++ treq->txhash = net_tx_rndhash();
+
+ /*
+ * We need to lookup the dst_entry to get the correct window size.
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Wed, 19 Jul 2017 22:28:55 +0200
+Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ]
+
+In some cases, offset can overflow and can cause an infinite loop in
+ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
+cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.
+
+This problem has been here since before the beginning of git history.
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/output_core.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident);
+
+ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+ {
+- u16 offset = sizeof(struct ipv6hdr);
++ unsigned int offset = sizeof(struct ipv6hdr);
+ unsigned int packet_len = skb_tail_pointer(skb) -
+ skb_network_header(skb);
+ int found_rhdr = 0;
+@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *
+
+ while (offset <= packet_len) {
+ struct ipv6_opt_hdr *exthdr;
++ unsigned int len;
+
+ switch (**nexthdr) {
+
+@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *
+
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+- offset += ipv6_optlen(exthdr);
++ len = ipv6_optlen(exthdr);
++ if (len + offset >= IPV6_MAXPLEN)
++ return -EINVAL;
++ offset += len;
+ *nexthdr = &exthdr->nexthdr;
+ }
+
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Mon, 24 Jul 2017 23:14:28 +0200
+Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ]
+
+RFC 2465 defines ipv6IfStatsOutFragFails as:
+
+ "The number of IPv6 datagrams that have been discarded
+ because they needed to be fragmented at this output
+ interface but could not be."
+
+The existing implementation, instead, would increase the counter
+twice in case we fail to allocate room for single fragments:
+once for the fragment, once for the datagram.
+
+This didn't look intentional though. In one of the two affected
+affected failure paths, the double increase was simply a result
+of a new 'goto fail' statement, introduced to avoid a skb leak.
+The other path appears to be affected since at least 2.6.12-rc2.
+
+Reported-by: Sabrina Dubroca <sdubroca@redhat.com>
+Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct
+ *prevhdr = NEXTHDR_FRAGMENT;
+ tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!tmp_hdr) {
+- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+- IPSTATS_MIB_FRAGFAILS);
+ err = -ENOMEM;
+ goto fail;
+ }
+@@ -793,8 +791,6 @@ slow_path:
+ frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+ hroom + troom, GFP_ATOMIC);
+ if (!frag) {
+- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+- IPSTATS_MIB_FRAGFAILS);
+ err = -ENOMEM;
+ goto fail;
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Date: Sat, 22 Jul 2017 17:14:34 +0200
+Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled
+
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+
+
+[ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ]
+
+DMA transfers are not allowed to buffers that are on the stack.
+Therefore allocate a buffer to store the result of usb_control_message().
+
+Fixes these bugreports:
+https://bugzilla.kernel.org/show_bug.cgi?id=195217
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1421387
+https://bugzilla.redhat.com/show_bug.cgi?id=1427398
+
+Shortened kernel backtrace from 4.11.9-200.fc25.x86_64:
+kernel: ------------[ cut here ]------------
+kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587
+kernel: transfer buffer not dma capable
+kernel: Call Trace:
+kernel: dump_stack+0x63/0x86
+kernel: __warn+0xcb/0xf0
+kernel: warn_slowpath_fmt+0x5a/0x80
+kernel: usb_hcd_map_urb_for_dma+0x37f/0x570
+kernel: ? try_to_del_timer_sync+0x53/0x80
+kernel: usb_hcd_submit_urb+0x34e/0xb90
+kernel: ? schedule_timeout+0x17e/0x300
+kernel: ? del_timer_sync+0x50/0x50
+kernel: ? __slab_free+0xa9/0x300
+kernel: usb_submit_urb+0x2f4/0x560
+kernel: ? urb_destroy+0x24/0x30
+kernel: usb_start_wait_urb+0x6e/0x170
+kernel: usb_control_msg+0xdc/0x120
+kernel: mcs_get_reg+0x36/0x40 [mcs7780]
+kernel: mcs_net_open+0xb5/0x5c0 [mcs7780]
+...
+
+Regression goes back to 4.9, so it's a good candidate for -stable.
+Though it's the decision of the maintainer.
+
+Thanks to Dan Williams for adding the "transfer buffer not dma capable"
+warning in the first place. It instantly pointed me in the right direction.
+
+Patch has been tested with transferring data from a Polar watch.
+
+Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/irda/mcs7780.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/irda/mcs7780.c
++++ b/drivers/net/irda/mcs7780.c
+@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mc
+ static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val)
+ {
+ struct usb_device *dev = mcs->usbdev;
+- int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+- MCS_RD_RTYPE, 0, reg, val, 2,
+- msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++ void *dmabuf;
++ int ret;
++
++ dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL);
++ if (!dmabuf)
++ return -ENOMEM;
++
++ ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
++ MCS_RD_RTYPE, 0, reg, dmabuf, 2,
++ msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++
++ memcpy(val, dmabuf, sizeof(__u16));
++ kfree(dmabuf);
+
+ return ret;
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Date: Thu, 20 Jul 2017 05:20:40 +0000
+Subject: net: bonding: Fix transmit load balancing in balance-alb mode
+
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+
+
+[ Upstream commit cbf5ecb305601d063dc94a57680dfbc3f96c188d ]
+
+balance-alb mode used to have transmit dynamic load balancing feature
+enabled by default. However, transmit dynamic load balancing no longer
+works in balance-alb after commit 8b426dc54cf4 ("bonding: remove
+hardcoded value").
+
+Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to
+send packets. This function uses the parameter tlb_dynamic_lb.
+tlb_dynamic_lb used to have the default value of 1 for balance-alb, but
+now the value is set to 0 except in balance-tlb.
+
+Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb
+for balance-alb similar to balance-tlb.
+
+Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value")
+Signed-off-by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Acked-by: Andy Gospodarek <andy@greyhouse.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -4598,7 +4598,7 @@ static int bond_check_params(struct bond
+ }
+ ad_user_port_key = valptr->value;
+
+- if (bond_mode == BOND_MODE_TLB) {
++ if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) {
+ bond_opt_initstr(&newval, "default");
+ valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
+ &newval);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 20 Jul 2017 12:25:22 -0700
+Subject: net: dsa: b53: Add missing ARL entries for BCM53125
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit be35e8c516c1915a3035d266a2015b41f73ba3f9 ]
+
+The BCM53125 entry was missing an arl_entries member which would
+basically prevent the ARL search from terminating properly. This switch
+has 4 ARL entries, so add that.
+
+Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1668,6 +1668,7 @@ static const struct b53_chip_data b53_sw
+ .dev_name = "BCM53125",
+ .vlans = 4096,
+ .enabled_ports = 0xff,
++ .arl_entries = 4,
+ .cpu_port = B53_CPU_PORT,
+ .vta_regs = B53_VTA_REGS,
+ .duplex_reg = B53_DUPLEX_STAT_GE,
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Martin Hundebøll <mnhu@prevas.dk>
+Date: Wed, 19 Jul 2017 08:17:02 +0200
+Subject: net: dsa: mv88e6xxx: Enable CMODE config support for 6390X
+
+From: Martin Hundebøll <mnhu@prevas.dk>
+
+
+[ Upstream commit bb0a2675f72b458e64f47071e8aabdb225a6af4d ]
+
+Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
+ports 9 & 10') added support for setting the CMODE for the 6390X family,
+but only enabled it for 9290 and 6390 - and left out 6390X.
+
+Fix support for setting the CMODE on 6390X also by assigning
+mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
+mv88e6390x_ops too.
+
+Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10")
+Signed-off-by: Martin Hundebøll <mnhu@prevas.dk>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -3377,6 +3377,7 @@ static const struct mv88e6xxx_ops mv88e6
+ .port_jumbo_config = mv88e6165_port_jumbo_config,
+ .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+ .port_pause_config = mv88e6390_port_pause_config,
++ .port_set_cmode = mv88e6390x_port_set_cmode,
+ .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+ .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+ .stats_snapshot = mv88e6390_g1_stats_snapshot,
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Date: Tue, 25 Jul 2017 14:35:03 +0200
+Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically
+
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+
+
+[ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ]
+
+Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear
+whose responsibility it was to insert the required clock skew, and
+in hindsight, some PHY drivers got it wrong. The solution forward
+is to introduce a new property, explicitly requiring skew from the
+node to which it is attached. In the interim, this driver will handle
+all 4 RGMII modes identically (no skew).
+
+Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller")
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aurora/nb8800.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/aurora/nb8800.c
++++ b/drivers/net/ethernet/aurora/nb8800.c
+@@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net
+ mac_mode |= HALF_DUPLEX;
+
+ if (gigabit) {
+- if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
++ if (phy_interface_is_rgmii(dev->phydev))
+ mac_mode |= RGMII_MODE;
+
+ mac_mode |= GMAC_MODE;
+@@ -1268,11 +1268,10 @@ static int nb8800_tangox_init(struct net
+ break;
+
+ case PHY_INTERFACE_MODE_RGMII:
+- pad_mode = PAD_MODE_RGMII;
+- break;
+-
++ case PHY_INTERFACE_MODE_RGMII_ID:
++ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
+- pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
++ pad_mode = PAD_MODE_RGMII;
+ break;
+
+ default:
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Aviv Heller <avivh@mellanox.com>
+Date: Sun, 2 Jul 2017 19:13:43 +0300
+Subject: net/mlx5: Consider tx_enabled in all modes on remap
+
+From: Aviv Heller <avivh@mellanox.com>
+
+
+[ Upstream commit dc798b4cc0f2a06e7ad7d522403de274b86a0a6f ]
+
+The tx_enabled lag event field is used to determine whether a slave is
+active.
+Current logic uses this value only if the mode is active-backup.
+
+However, LACP mode, although considered a load balancing mode, can mark
+a slave as inactive in certain situations (e.g., LACP timeout).
+
+This fix takes the tx_enabled value into account when remapping, with
+no respect to the LAG mode (this should not affect the behavior in XOR
+mode, since in this mode both slaves are marked as active).
+
+Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
+Signed-off-by: Aviv Heller <avivh@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag.c | 25 ++++++++++---------------
+ 1 file changed, 10 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+@@ -157,22 +157,17 @@ static bool mlx5_lag_is_bonded(struct ml
+ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+ u8 *port1, u8 *port2)
+ {
+- if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+- if (tracker->netdev_state[0].tx_enabled) {
+- *port1 = 1;
+- *port2 = 1;
+- } else {
+- *port1 = 2;
+- *port2 = 2;
+- }
+- } else {
+- *port1 = 1;
+- *port2 = 2;
+- if (!tracker->netdev_state[0].link_up)
+- *port1 = 2;
+- else if (!tracker->netdev_state[1].link_up)
+- *port2 = 1;
++ *port1 = 1;
++ *port2 = 2;
++ if (!tracker->netdev_state[0].tx_enabled ||
++ !tracker->netdev_state[0].link_up) {
++ *port1 = 2;
++ return;
+ }
++
++ if (!tracker->netdev_state[1].tx_enabled ||
++ !tracker->netdev_state[1].link_up)
++ *port2 = 1;
+ }
+
+ static void mlx5_activate_lag(struct mlx5_lag *ldev,
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Sun, 25 Jun 2017 18:45:32 +0300
+Subject: net/mlx5: Fix command bad flow on command entry allocation failure
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ]
+
+When driver fail to allocate an entry to send command to FW, it must
+notify the calling function and release the memory allocated for
+this command.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -777,6 +777,10 @@ static void cb_timeout_handler(struct wo
+ mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ }
+
++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
++static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
++ struct mlx5_cmd_msg *msg);
++
+ static void cmd_work_handler(struct work_struct *work)
+ {
+ struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+@@ -786,16 +790,27 @@ static void cmd_work_handler(struct work
+ struct mlx5_cmd_layout *lay;
+ struct semaphore *sem;
+ unsigned long flags;
++ int alloc_ret;
+
+ sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+ down(sem);
+ if (!ent->page_queue) {
+- ent->idx = alloc_ent(cmd);
+- if (ent->idx < 0) {
++ alloc_ret = alloc_ent(cmd);
++ if (alloc_ret < 0) {
+ mlx5_core_err(dev, "failed to allocate command entry\n");
++ if (ent->callback) {
++ ent->callback(-EAGAIN, ent->context);
++ mlx5_free_cmd_msg(dev, ent->out);
++ free_msg(dev, ent->in);
++ free_cmd(ent);
++ } else {
++ ent->ret = -EAGAIN;
++ complete(&ent->done);
++ }
+ up(sem);
+ return;
+ }
++ ent->idx = alloc_ret;
+ } else {
+ ent->idx = cmd->max_reg_cmds;
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Thu, 6 Jul 2017 15:48:40 +0300
+Subject: net/mlx5: Fix command completion after timeout access invalid structure
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 061870800efb4e3d1ad4082a2569363629bdfcfc ]
+
+Completion on timeout should not free the driver command entry structure
+as it will need to access it again once real completion event from FW
+will occur.
+
+Fixes: 73dd3a4839c1 ('net/mlx5: Avoid using pending command interface slots')
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -955,7 +955,7 @@ static int mlx5_cmd_invoke(struct mlx5_c
+
+ err = wait_func(dev, ent);
+ if (err == -ETIMEDOUT)
+- goto out_free;
++ goto out;
+
+ ds = ent->ts2 - ent->ts1;
+ op = MLX5_GET(mbox_in, in->first.data, opcode);
+@@ -1419,6 +1419,7 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+ mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+ ent->idx);
+ free_ent(cmd, ent->idx);
++ free_cmd(ent);
+ }
+ continue;
+ }
+@@ -1477,7 +1478,8 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+ free_msg(dev, ent->in);
+
+ err = err ? err : ent->status;
+- free_cmd(ent);
++ if (!forced)
++ free_cmd(ent);
+ callback(err, context);
+ } else {
+ complete(&ent->done);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Thu, 6 Jul 2017 16:40:34 +0300
+Subject: net/mlx5: Fix mlx5_add_flow_rules call with correct num of dests
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit bcec601f30fb41e9233674942fa4040a6e63657a ]
+
+When adding ethtool steering rule with action DISCARD we wrongly
+pass a NULL dest with dest_num 1 to mlx5_add_flow_rules().
+What this error seems to have caused is sending VPORT 0
+(MLX5_FLOW_DESTINATION_TYPE_VPORT) as the fte dest instead of no dests.
+We have fte action correctly set to DROP so it might been ignored
+anyways.
+
+To reproduce use:
+ # sudo ethtool --config-nfc <dev> flow-type ether \
+ dst aa:bb:cc:dd:ee:ff action -1
+
+Fixes: 74491de93712 ("net/mlx5: Add multi dest support")
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Mark Bloch <markb@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -320,7 +320,7 @@ add_ethtool_flow_rule(struct mlx5e_priv
+
+ spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+ flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
++ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Thu, 25 May 2017 15:11:26 +0300
+Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 0b794ffae7afa7c4e5accac8791c4b78e8d080ce ]
+
+Fix miscalculation in reserved_at_1a0 field.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -8131,7 +8131,7 @@ struct mlx5_ifc_mtpps_reg_bits {
+ u8 out_pulse_duration[0x10];
+ u8 out_periodic_adjustment[0x10];
+
+- u8 reserved_at_1a0[0x60];
++ u8 reserved_at_1a0[0x40];
+ };
+
+ struct mlx5_ifc_mtppse_reg_bits {
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Thu, 25 May 2017 16:09:34 +0300
+Subject: net/mlx5e: Add field select to MTPPS register
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit fa3676885e3b5be1edfa1b2cc775e20a45b34a19 ]
+
+In order to mark relevant fields while setting the MTPPS register
+add field select. Otherwise it can cause a misconfiguration in
+firmware.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 29 +++++++++++++++-----
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 -
+ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++
+ include/linux/mlx5/mlx5_ifc.h | 10 ++++--
+ 4 files changed, 36 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -53,6 +53,15 @@ enum {
+ MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2,
+ };
+
++enum {
++ MLX5E_MTPPS_FS_ENABLE = BIT(0x0),
++ MLX5E_MTPPS_FS_PATTERN = BIT(0x2),
++ MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3),
++ MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4),
++ MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
++ MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
++};
++
+ void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
+ struct skb_shared_hwtstamps *hwts)
+ {
+@@ -222,7 +231,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_
+
+ /* For future use need to add a loop for finding all 1PPS out pins */
+ MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+- MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF);
++ MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
++ MLX5_SET(mtpps_reg, in, field_select,
++ MLX5E_MTPPS_FS_PIN_MODE |
++ MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
+
+ mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ }
+@@ -258,8 +270,7 @@ static int mlx5e_extts_configure(struct
+ int pin = -1;
+ int err = 0;
+
+- if (!MLX5_CAP_GEN(priv->mdev, pps) ||
+- !MLX5_CAP_GEN(priv->mdev, pps_modify))
++ if (!MLX5_PPS_CAP(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->extts.index >= tstamp->ptp_info.n_pins)
+@@ -278,6 +289,9 @@ static int mlx5e_extts_configure(struct
+ MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
++ MLX5E_MTPPS_FS_PATTERN |
++ MLX5E_MTPPS_FS_ENABLE);
+
+ err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ if (err)
+@@ -303,7 +317,7 @@ static int mlx5e_perout_configure(struct
+ int pin = -1;
+ s64 ns;
+
+- if (!MLX5_CAP_GEN(priv->mdev, pps_modify))
++ if (!MLX5_PPS_CAP(priv->mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= tstamp->ptp_info.n_pins)
+@@ -338,7 +352,10 @@ static int mlx5e_perout_configure(struct
+ MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+-
++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
++ MLX5E_MTPPS_FS_PATTERN |
++ MLX5E_MTPPS_FS_ENABLE |
++ MLX5E_MTPPS_FS_TIME_STAMP);
+ return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ }
+
+@@ -488,7 +505,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+ #define MAX_PIN_NUM 8
+ tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
+ if (tstamp->pps_pin_caps) {
+- if (MLX5_CAP_GEN(priv->mdev, pps))
++ if (MLX5_PPS_CAP(priv->mdev))
+ mlx5e_get_pps_caps(priv, tstamp);
+ if (tstamp->ptp_info.n_pins)
+ mlx5e_init_pin_config(tstamp);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -690,7 +690,7 @@ int mlx5_start_eqs(struct mlx5_core_dev
+ else
+ mlx5_core_dbg(dev, "port_module_event is not set\n");
+
+- if (MLX5_CAP_GEN(dev, pps))
++ if (MLX5_PPS_CAP(dev))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+
+ err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+@@ -153,6 +153,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev
+ int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
+ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+
++#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \
++ MLX5_CAP_GEN((mdev), pps_modify) && \
++ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \
++ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
++
+ void mlx5e_init(void);
+ void mlx5e_cleanup(void);
+
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -7718,8 +7718,10 @@ struct mlx5_ifc_pcam_reg_bits {
+ };
+
+ struct mlx5_ifc_mcam_enhanced_features_bits {
+- u8 reserved_at_0[0x7f];
++ u8 reserved_at_0[0x7d];
+
++ u8 mtpps_enh_out_per_adj[0x1];
++ u8 mtpps_fs[0x1];
+ u8 pcie_performance_group[0x1];
+ };
+
+@@ -8115,7 +8117,8 @@ struct mlx5_ifc_mtpps_reg_bits {
+ u8 reserved_at_78[0x4];
+ u8 cap_pin_4_mode[0x4];
+
+- u8 reserved_at_80[0x80];
++ u8 field_select[0x20];
++ u8 reserved_at_a0[0x60];
+
+ u8 enable[0x1];
+ u8 reserved_at_101[0xb];
+@@ -8130,8 +8133,9 @@ struct mlx5_ifc_mtpps_reg_bits {
+
+ u8 out_pulse_duration[0x10];
+ u8 out_periodic_adjustment[0x10];
++ u8 enhanced_out_periodic_adjustment[0x20];
+
+- u8 reserved_at_1a0[0x40];
++ u8 reserved_at_1c0[0x20];
+ };
+
+ struct mlx5_ifc_mtppse_reg_bits {
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 14:27:02 +0300
+Subject: net/mlx5e: Add missing support for PTP_CLK_REQ_PPS request
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit cf5033089b078303b102b65e3ccbbfa3ce0f4367 ]
+
+Add the missing option to enable the PTP_CLK_PPS function.
+In this case pin should be configured as 1PPS IN first and
+then it will be connected to PPS mechanism.
+Events will be reported as PTP_CLOCK_PPSUSR events to relevant sysfs.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 20 ++++++++++++++++++++
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 -
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -266,6 +266,7 @@ struct mlx5e_pps {
+ u8 pin_caps[MAX_PIN_NUM];
+ struct work_struct out_work;
+ u64 start[MAX_PIN_NUM];
++ u8 enabled;
+ };
+
+ struct mlx5e_tstamp {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -394,6 +394,17 @@ static int mlx5e_perout_configure(struct
+ MLX5E_EVENT_MODE_REPETETIVE & on);
+ }
+
++static int mlx5e_pps_configure(struct ptp_clock_info *ptp,
++ struct ptp_clock_request *rq,
++ int on)
++{
++ struct mlx5e_tstamp *tstamp =
++ container_of(ptp, struct mlx5e_tstamp, ptp_info);
++
++ tstamp->pps_info.enabled = !!on;
++ return 0;
++}
++
+ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+@@ -403,6 +414,8 @@ static int mlx5e_ptp_enable(struct ptp_c
+ return mlx5e_extts_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return mlx5e_perout_configure(ptp, rq, on);
++ case PTP_CLK_REQ_PPS:
++ return mlx5e_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+@@ -448,6 +461,7 @@ static int mlx5e_init_pin_config(struct
+ return -ENOMEM;
+ tstamp->ptp_info.enable = mlx5e_ptp_enable;
+ tstamp->ptp_info.verify = mlx5e_ptp_verify;
++ tstamp->ptp_info.pps = 1;
+
+ for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
+ snprintf(tstamp->ptp_info.pin_config[i].name,
+@@ -499,6 +513,12 @@ void mlx5e_pps_event_handler(struct mlx5
+
+ switch (tstamp->ptp_info.pin_config[pin].func) {
+ case PTP_PF_EXTTS:
++ if (tstamp->pps_info.enabled) {
++ event->type = PTP_CLOCK_PPSUSR;
++ event->pps_times.ts_real = ns_to_timespec64(event->timestamp);
++ } else {
++ event->type = PTP_CLOCK_EXTTS;
++ }
+ ptp_clock_event(tstamp->ptp, event);
+ break;
+ case PTP_PF_PEROUT:
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -365,7 +365,6 @@ static void mlx5e_async_event(struct mlx
+ break;
+ case MLX5_DEV_EVENT_PPS:
+ eqe = (struct mlx5_eqe *)param;
+- ptp_event.type = PTP_CLOCK_EXTTS;
+ ptp_event.index = eqe->data.pps.pin;
+ ptp_event.timestamp =
+ timecounter_cyc2time(&priv->tstamp.clock,
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 14:06:01 +0300
+Subject: net/mlx5e: Change 1PPS out scheme
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 4272f9b88db9223216cdf87314f570f6d81295b4 ]
+
+In order to fix the drift in 1PPS out need to adjust the next pulse.
+On each 1PPS out falling edge driver gets the event, then the event
+handler adjusts the next pulse starting time.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 116 ++++++++++++++-------
+ 2 files changed, 87 insertions(+), 38 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -261,6 +261,13 @@ struct mlx5e_dcbx {
+ };
+ #endif
+
++#define MAX_PIN_NUM 8
++struct mlx5e_pps {
++ u8 pin_caps[MAX_PIN_NUM];
++ struct work_struct out_work;
++ u64 start[MAX_PIN_NUM];
++};
++
+ struct mlx5e_tstamp {
+ rwlock_t lock;
+ struct cyclecounter cycles;
+@@ -272,7 +279,7 @@ struct mlx5e_tstamp {
+ struct mlx5_core_dev *mdev;
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+- u8 *pps_pin_caps;
++ struct mlx5e_pps pps_info;
+ };
+
+ enum {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -82,6 +82,33 @@ static u64 mlx5e_read_internal_timer(con
+ return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
+ }
+
++static void mlx5e_pps_out(struct work_struct *work)
++{
++ struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps,
++ out_work);
++ struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp,
++ pps_info);
++ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
++ unsigned long flags;
++ int i;
++
++ for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
++ u64 tstart;
++
++ write_lock_irqsave(&tstamp->lock, flags);
++ tstart = tstamp->pps_info.start[i];
++ tstamp->pps_info.start[i] = 0;
++ write_unlock_irqrestore(&tstamp->lock, flags);
++ if (!tstart)
++ continue;
++
++ MLX5_SET(mtpps_reg, in, pin, i);
++ MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP);
++ mlx5_set_mtpps(tstamp->mdev, in, sizeof(in));
++ }
++}
++
+ static void mlx5e_timestamp_overflow(struct work_struct *work)
+ {
+ struct delayed_work *dwork = to_delayed_work(work);
+@@ -223,21 +250,6 @@ static int mlx5e_ptp_adjfreq(struct ptp_
+ int neg_adj = 0;
+ struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+ ptp_info);
+- struct mlx5e_priv *priv =
+- container_of(tstamp, struct mlx5e_priv, tstamp);
+-
+- if (MLX5_CAP_GEN(priv->mdev, pps_modify)) {
+- u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+-
+- /* For future use need to add a loop for finding all 1PPS out pins */
+- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+- MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
+- MLX5_SET(mtpps_reg, in, field_select,
+- MLX5E_MTPPS_FS_PIN_MODE |
+- MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
+-
+- mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+- }
+
+ if (delta < 0) {
+ neg_adj = 1;
+@@ -315,7 +327,7 @@ static int mlx5e_perout_configure(struct
+ struct mlx5e_priv *priv =
+ container_of(tstamp, struct mlx5e_priv, tstamp);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+- u64 nsec_now, nsec_delta, time_stamp;
++ u64 nsec_now, nsec_delta, time_stamp = 0;
+ u64 cycles_now, cycles_delta;
+ struct timespec64 ts;
+ unsigned long flags;
+@@ -323,6 +335,7 @@ static int mlx5e_perout_configure(struct
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
++ int err = 0;
+ s64 ns;
+
+ if (!MLX5_PPS_CAP(priv->mdev))
+@@ -373,7 +386,12 @@ static int mlx5e_perout_configure(struct
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+- return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
++ err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
++ if (err)
++ return err;
++
++ return mlx5_set_mtppse(priv->mdev, pin, 0,
++ MLX5E_EVENT_MODE_REPETETIVE & on);
+ }
+
+ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
+@@ -457,22 +475,50 @@ static void mlx5e_get_pps_caps(struct ml
+ tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_out_pins);
+
+- tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+- tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+- tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+- tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+- tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+- tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+- tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+- tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
++ tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
++ tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
++ tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
++ tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
++ tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
++ tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
++ tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
++ tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+ }
+
+ void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
+ struct ptp_clock_event *event)
+ {
++ struct net_device *netdev = priv->netdev;
+ struct mlx5e_tstamp *tstamp = &priv->tstamp;
++ struct timespec64 ts;
++ u64 nsec_now, nsec_delta;
++ u64 cycles_now, cycles_delta;
++ int pin = event->index;
++ s64 ns;
++ unsigned long flags;
+
+- ptp_clock_event(tstamp->ptp, event);
++ switch (tstamp->ptp_info.pin_config[pin].func) {
++ case PTP_PF_EXTTS:
++ ptp_clock_event(tstamp->ptp, event);
++ break;
++ case PTP_PF_PEROUT:
++ mlx5e_ptp_gettime(&tstamp->ptp_info, &ts);
++ cycles_now = mlx5_read_internal_timer(tstamp->mdev);
++ ts.tv_sec += 1;
++ ts.tv_nsec = 0;
++ ns = timespec64_to_ns(&ts);
++ write_lock_irqsave(&tstamp->lock, flags);
++ nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
++ nsec_delta = ns - nsec_now;
++ cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
++ tstamp->cycles.mult);
++ tstamp->pps_info.start[pin] = cycles_now + cycles_delta;
++ queue_work(priv->wq, &tstamp->pps_info.out_work);
++ write_unlock_irqrestore(&tstamp->lock, flags);
++ break;
++ default:
++ netdev_err(netdev, "%s: Unhandled event\n", __func__);
++ }
+ }
+
+ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+@@ -508,6 +554,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+ do_div(ns, NSEC_PER_SEC / 2 / HZ);
+ tstamp->overflow_period = ns;
+
++ INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
+ INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
+ if (tstamp->overflow_period)
+ schedule_delayed_work(&tstamp->overflow_work, 0);
+@@ -519,16 +566,10 @@ void mlx5e_timestamp_init(struct mlx5e_p
+ snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
+
+ /* Initialize 1PPS data structures */
+-#define MAX_PIN_NUM 8
+- tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
+- if (tstamp->pps_pin_caps) {
+- if (MLX5_PPS_CAP(priv->mdev))
+- mlx5e_get_pps_caps(priv, tstamp);
+- if (tstamp->ptp_info.n_pins)
+- mlx5e_init_pin_config(tstamp);
+- } else {
+- mlx5_core_warn(priv->mdev, "1PPS initialization failed\n");
+- }
++ if (MLX5_PPS_CAP(priv->mdev))
++ mlx5e_get_pps_caps(priv, tstamp);
++ if (tstamp->ptp_info.n_pins)
++ mlx5e_init_pin_config(tstamp);
+
+ tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
+ &priv->mdev->pdev->dev);
+@@ -551,7 +592,8 @@ void mlx5e_timestamp_cleanup(struct mlx5
+ priv->tstamp.ptp = NULL;
+ }
+
+- kfree(tstamp->pps_pin_caps);
++ cancel_work_sync(&tstamp->pps_info.out_work);
++
+ kfree(tstamp->ptp_info.pin_config);
+
+ cancel_delayed_work_sync(&tstamp->overflow_work);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 12:01:38 +0300
+Subject: net/mlx5e: Fix broken disable 1PPS flow
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 49c5031ca6f0628ef973a11b17e463e088bf859e ]
+
+Need to disable the MTPPS and unsubscribe from the pulse events
+when user disables the 1PPS functionality.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 75 ++++++++++++---------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -266,6 +266,8 @@ static int mlx5e_extts_configure(struct
+ struct mlx5e_priv *priv =
+ container_of(tstamp, struct mlx5e_priv, tstamp);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
++ u32 field_select = 0;
++ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+@@ -280,18 +282,21 @@ static int mlx5e_extts_configure(struct
+ pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
++ pin_mode = MLX5E_PIN_MODE_IN;
++ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
++ field_select = MLX5E_MTPPS_FS_PIN_MODE |
++ MLX5E_MTPPS_FS_PATTERN |
++ MLX5E_MTPPS_FS_ENABLE;
++ } else {
++ pin = rq->extts.index;
++ field_select = MLX5E_MTPPS_FS_ENABLE;
+ }
+
+- if (rq->extts.flags & PTP_FALLING_EDGE)
+- pattern = 1;
+-
+ MLX5_SET(mtpps_reg, in, pin, pin);
+- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
++ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+- MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+- MLX5E_MTPPS_FS_PATTERN |
+- MLX5E_MTPPS_FS_ENABLE);
++ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ if (err)
+@@ -314,6 +319,9 @@ static int mlx5e_perout_configure(struct
+ u64 cycles_now, cycles_delta;
+ struct timespec64 ts;
+ unsigned long flags;
++ u32 field_select = 0;
++ u8 pin_mode = 0;
++ u8 pattern = 0;
+ int pin = -1;
+ s64 ns;
+
+@@ -328,34 +336,43 @@ static int mlx5e_perout_configure(struct
+ rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+- }
+
+- ts.tv_sec = rq->perout.period.sec;
+- ts.tv_nsec = rq->perout.period.nsec;
+- ns = timespec64_to_ns(&ts);
+- if (on)
++ pin_mode = MLX5E_PIN_MODE_OUT;
++ pattern = MLX5E_OUT_PATTERN_PERIODIC;
++ ts.tv_sec = rq->perout.period.sec;
++ ts.tv_nsec = rq->perout.period.nsec;
++ ns = timespec64_to_ns(&ts);
++
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+- ts.tv_sec = rq->perout.start.sec;
+- ts.tv_nsec = rq->perout.start.nsec;
+- ns = timespec64_to_ns(&ts);
+- cycles_now = mlx5_read_internal_timer(tstamp->mdev);
+- write_lock_irqsave(&tstamp->lock, flags);
+- nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
+- nsec_delta = ns - nsec_now;
+- cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
+- tstamp->cycles.mult);
+- write_unlock_irqrestore(&tstamp->lock, flags);
+- time_stamp = cycles_now + cycles_delta;
++
++ ts.tv_sec = rq->perout.start.sec;
++ ts.tv_nsec = rq->perout.start.nsec;
++ ns = timespec64_to_ns(&ts);
++ cycles_now = mlx5_read_internal_timer(tstamp->mdev);
++ write_lock_irqsave(&tstamp->lock, flags);
++ nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
++ nsec_delta = ns - nsec_now;
++ cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
++ tstamp->cycles.mult);
++ write_unlock_irqrestore(&tstamp->lock, flags);
++ time_stamp = cycles_now + cycles_delta;
++ field_select = MLX5E_MTPPS_FS_PIN_MODE |
++ MLX5E_MTPPS_FS_PATTERN |
++ MLX5E_MTPPS_FS_ENABLE |
++ MLX5E_MTPPS_FS_TIME_STAMP;
++ } else {
++ pin = rq->perout.index;
++ field_select = MLX5E_MTPPS_FS_ENABLE;
++ }
++
+ MLX5_SET(mtpps_reg, in, pin, pin);
+- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+- MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
++ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
++ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+- MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+- MLX5E_MTPPS_FS_PATTERN |
+- MLX5E_MTPPS_FS_ENABLE |
+- MLX5E_MTPPS_FS_TIME_STAMP);
++ MLX5_SET(mtpps_reg, in, field_select, field_select);
++
+ return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ }
+
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Ilan Tayari <ilant@mellanox.com>
+Date: Wed, 5 Jul 2017 10:17:04 +0300
+Subject: net/mlx5e: Fix outer_header_zero() check size
+
+From: Ilan Tayari <ilant@mellanox.com>
+
+
+[ Upstream commit 0242f4a0bb03906010bbf80495512be00494a0ef ]
+
+outer_header_zero() routine checks if the outer_headers match of a
+flow-table entry are all zero.
+
+This function uses the size of whole fte_match_param, instead of just
+the outer_headers member, causing failure to detect all-zeros if
+any other members of the fte_match_param are non-zero.
+
+Use the correct size for zero check.
+
+Fixes: 6dc6071cfcde ("net/mlx5e: Add ethtool flow steering support")
+Signed-off-by: Ilan Tayari <ilant@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5
+
+ static bool outer_header_zero(u32 *match_criteria)
+ {
+- int size = MLX5_ST_SZ_BYTES(fte_match_param);
++ int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
+ char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+ outer_headers);
+
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 12 Jul 2017 17:27:18 +0300
+Subject: net/mlx5e: Fix wrong delay calculation for overflow check scheduling
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit d439c84509a510e864fdc6166c760482cd03fc57 ]
+
+The overflow_period is calculated in seconds. In order to use it
+for delayed work scheduling translation to jiffies is needed.
+
+Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -119,7 +119,8 @@ static void mlx5e_timestamp_overflow(str
+ write_lock_irqsave(&tstamp->lock, flags);
+ timecounter_read(&tstamp->clock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
+- schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
++ schedule_delayed_work(&tstamp->overflow_work,
++ msecs_to_jiffies(tstamp->overflow_period * 1000));
+ }
+
+ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Alex Vesker <valex@mellanox.com>
+Date: Thu, 6 Jul 2017 15:40:32 +0300
+Subject: net/mlx5e: IPoIB, Modify add/remove underlay QPN flows
+
+From: Alex Vesker <valex@mellanox.com>
+
+
+[ Upstream commit 58569ef8f619761548e7d198f59e8ebe3af91d04 ]
+
+On interface remove, the clean-up was done incorrectly causing
+an error in the log:
+"SET_FLOW_TABLE_ROOT(0x92f) op_mod(0x0) failed...syndrome (0x7e9f14)"
+
+This was caused by the following flow:
+-ndo_uninit:
+ Move QP state to RST (this disconnects the QP from FT),
+ the QP cannot be attached to any FT unless it is in RTS.
+
+-mlx5_rdma_netdev_free:
+ cleanup_rx: Destroy FT
+ cleanup_tx: Destroy QP and remove QPN from FT
+
+This caused a problem when destroying current FT we tried to
+re-attach the QP to the next FT which is not needed.
+
+The correct flow is:
+-mlx5_rdma_netdev_free:
+ cleanup_rx: remove QPN from FT & Destroy FT
+ cleanup_tx: Destroy QP
+
+Fixes: 508541146af1 ("net/mlx5: Use underlay QPN from the root name space")
+Signed-off-by: Alex Vesker <valex@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+@@ -160,8 +160,6 @@ out:
+
+ static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+ {
+- mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn);
+-
+ mlx5_core_destroy_qp(mdev, qp);
+ }
+
+@@ -176,8 +174,6 @@ static int mlx5i_init_tx(struct mlx5e_pr
+ return err;
+ }
+
+- mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+-
+ err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
+@@ -235,6 +231,7 @@ static void mlx5i_destroy_flow_steering(
+
+ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+ {
++ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5e_create_indirect_rqt(priv);
+@@ -253,12 +250,18 @@ static int mlx5i_init_rx(struct mlx5e_pr
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+- err = mlx5i_create_flow_steering(priv);
++ err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+ if (err)
+ goto err_destroy_direct_tirs;
+
++ err = mlx5i_create_flow_steering(priv);
++ if (err)
++ goto err_remove_rx_underlay_qpn;
++
+ return 0;
+
++err_remove_rx_underlay_qpn:
++ mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+ err_destroy_direct_tirs:
+ mlx5e_destroy_direct_tirs(priv);
+ err_destroy_indirect_tirs:
+@@ -272,6 +275,9 @@ err_destroy_indirect_rqts:
+
+ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
+ {
++ struct mlx5i_priv *ipriv = priv->ppriv;
++
++ mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+ mlx5i_destroy_flow_steering(priv);
+ mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 12 Jul 2017 17:44:07 +0300
+Subject: net/mlx5e: Schedule overflow check work to mlx5e workqueue
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit f08c39ed0bfb503c7b3e013cd40d036ce6a0941a ]
+
+This is done in order to ensure that work will not run after the cleanup.
+
+Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -114,13 +114,14 @@ static void mlx5e_timestamp_overflow(str
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
+ overflow_work);
++ struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp);
+ unsigned long flags;
+
+ write_lock_irqsave(&tstamp->lock, flags);
+ timecounter_read(&tstamp->clock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
+- schedule_delayed_work(&tstamp->overflow_work,
+- msecs_to_jiffies(tstamp->overflow_period * 1000));
++ queue_delayed_work(priv->wq, &tstamp->overflow_work,
++ msecs_to_jiffies(tstamp->overflow_period * 1000));
+ }
+
+ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
+@@ -578,7 +579,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+ INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
+ INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
+ if (tstamp->overflow_period)
+- schedule_delayed_work(&tstamp->overflow_work, 0);
++ queue_delayed_work(priv->wq, &tstamp->overflow_work, 0);
+ else
+ mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+@@ -614,8 +615,6 @@ void mlx5e_timestamp_cleanup(struct mlx5
+ }
+
+ cancel_work_sync(&tstamp->pps_info.out_work);
+-
+- kfree(tstamp->ptp_info.pin_config);
+-
+ cancel_delayed_work_sync(&tstamp->overflow_work);
++ kfree(tstamp->ptp_info.pin_config);
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 28 Jul 2017 11:58:36 -0700
+Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ]
+
+Marc reported that he was not getting the PHY library adjust_link()
+callback function to run when calling phy_stop() + phy_disconnect()
+which does not indeed happen because we set the state machine to
+PHY_HALTED but we don't get to run it to process this state past that
+point.
+
+Fix this with a synchronous call to phy_state_machine() in order to have
+the state machine actually act on PHY_HALTED, set the PHY device's link
+down, turn the network device's carrier off and finally call the
+adjust_link() function.
+
+Reported-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -749,6 +749,9 @@ void phy_stop_machine(struct phy_device
+ if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+ phydev->state = PHY_UP;
+ mutex_unlock(&phydev->lock);
++
++ /* Now we can run the state machine synchronously */
++ phy_state_machine(&phydev->state_queue.work);
+ }
+
+ /**
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 19 Jul 2017 13:33:24 -0700
+Subject: net: Zero terminate ifr_name in dev_ifname().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 63679112c536289826fec61c917621de95ba2ade ]
+
+The ifr.ifr_name is passed around and assumed to be NULL terminated.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev_ioctl.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev_ioctl.c
++++ b/net/core/dev_ioctl.c
+@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, s
+
+ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ return -EFAULT;
++ ifr.ifr_name[IFNAMSIZ-1] = 0;
+
+ error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
+ if (error)
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Sun, 23 Jul 2017 17:52:23 +0800
+Subject: openvswitch: fix potential out of bound access in parse_ct
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ]
+
+Before the 'type' is validated, we shouldn't use it to fetch the
+ovs_ct_attr_lens's minlen and maxlen, else, out of bound access
+may happen.
+
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -1289,8 +1289,8 @@ static int parse_ct(const struct nlattr
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+- int maxlen = ovs_ct_attr_lens[type].maxlen;
+- int minlen = ovs_ct_attr_lens[type].minlen;
++ int maxlen;
++ int minlen;
+
+ if (type > OVS_CT_ATTR_MAX) {
+ OVS_NLERR(log,
+@@ -1298,6 +1298,9 @@ static int parse_ct(const struct nlattr
+ type, OVS_CT_ATTR_MAX);
+ return -EINVAL;
+ }
++
++ maxlen = ovs_ct_attr_lens[type].maxlen;
++ minlen = ovs_ct_attr_lens[type].minlen;
+ if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+ OVS_NLERR(log,
+ "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 24 Jul 2017 10:07:32 -0700
+Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ]
+
+There are multiple reports showing we have a use-after-free in
+the timer prb_retire_rx_blk_timer_expired(), where we use struct
+tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by
+free_pg_vec().
+
+The interesting part is it is not freed via packet_release() but
+via packet_setsockopt(), which means we are not closing the socket.
+Looking into the big and fat function packet_set_ring(), this could
+happen if we satisfy the following conditions:
+
+1. closing == 0, not on packet_release() path
+2. req->tp_block_nr == 0, we don't allocate a new pg_vec
+3. rx_ring->pg_vec is already set as V3, which means we already called
+ packet_set_ring() wtih req->tp_block_nr > 0 previously
+4. req->tp_frame_nr == 0, pass sanity check
+5. po->mapped == 0, never called mmap()
+
+In this scenario we are clearing the old rx_ring->pg_vec, so we need
+to free this pg_vec, but we don't stop the timer on this path because
+of closing==0.
+
+The timer has to be stopped as long as we need to free pg_vec, therefore
+the check on closing!=0 is wrong, we should check pg_vec!=NULL instead.
+
+Thanks to liujian for testing different fixes.
+
+Reported-by: alexander.levin@verizon.com
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Reported-by: liujian (CE) <liujian56@huawei.com>
+Tested-by: liujian (CE) <liujian56@huawei.com>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -4334,7 +4334,7 @@ static int packet_set_ring(struct sock *
+ register_prot_hook(sk);
+ }
+ spin_unlock(&po->bind_lock);
+- if (closing && (po->tp_version > TPACKET_V2)) {
++ if (pg_vec && (po->tp_version > TPACKET_V2)) {
+ /* Because we don't support block-based V3 on tx-ring */
+ if (!tx_ring)
+ prb_shutdown_retire_blk_timer(po, rb_queue);
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 19 Jul 2017 10:22:40 -0700
+Subject: Revert "rtnetlink: Do not generate notifications for CHANGEADDR event"
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit 3753654e541938717b13f2b25791c3171a3a06aa ]
+
+This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f.
+
+The duplicate CHANGEADDR event message is sent regardless of link
+status whereas the setlink changes only generate a notification when
+the link is up. Not sending a notification when the link is down breaks
+dhcpcd which only processes hwaddr changes when the link is down.
+
+Fixes reported regression:
+ https://bugzilla.kernel.org/show_bug.cgi?id=196355
+
+Reported-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4165,6 +4165,7 @@ static int rtnetlink_event(struct notifi
+
+ switch (event) {
+ case NETDEV_REBOOT:
++ case NETDEV_CHANGEADDR:
+ case NETDEV_CHANGENAME:
+ case NETDEV_FEAT_CHANGE:
+ case NETDEV_BONDING_FAILOVER:
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 20 Jul 2017 11:27:57 -0700
+Subject: rtnetlink: allocate more memory for dev_set_mac_address()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ]
+
+virtnet_set_mac_address() interprets mac address as struct
+sockaddr, but upper layer only allocates dev->addr_len
+which is ETH_ALEN + sizeof(sa_family_t) in this case.
+
+We lack a unified definition for mac address, so just fix
+the upper layer, this also allows drivers to interpret it
+to struct sockaddr freely.
+
+Reported-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1977,7 +1977,8 @@ static int do_setlink(const struct sk_bu
+ struct sockaddr *sa;
+ int len;
+
+- len = sizeof(sa_family_t) + dev->addr_len;
++ len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
++ sizeof(*sa));
+ sa = kmalloc(len, GFP_KERNEL);
+ if (!sa) {
+ err = -ENOMEM;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Fri, 14 Jul 2017 18:32:45 +0200
+Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ]
+
+If the length field of the iterator (|pos.p| or |err|) is past the end
+of the chunk, we shouldn't access it.
+
+This bug has been detected by KMSAN. For the following pair of system
+calls:
+
+ socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3
+ sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0),
+ inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0,
+ sin6_scope_id=0}, 28) = 1
+
+the tool has reported a use of uninitialized memory:
+
+ ==================================================================
+ BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0
+ CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+ 01/01/2011
+ Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16
+ dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+ kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+ __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+ __sctp_rcv_init_lookup net/sctp/input.c:1074
+ __sctp_rcv_lookup_harder net/sctp/input.c:1233
+ __sctp_rcv_lookup net/sctp/input.c:1255
+ sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170
+ sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984
+ ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+ NF_HOOK ./include/linux/netfilter.h:257
+ ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+ dst_input ./include/net/dst.h:492
+ ip6_rcv_finish net/ipv6/ip6_input.c:69
+ NF_HOOK ./include/linux/netfilter.h:257
+ ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+ __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+ __netif_receive_skb net/core/dev.c:4246
+ process_backlog+0x667/0xba0 net/core/dev.c:4866
+ napi_poll net/core/dev.c:5268
+ net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+ __do_softirq+0x485/0x942 kernel/softirq.c:284
+ do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
+ </IRQ>
+ do_softirq kernel/softirq.c:328
+ __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181
+ local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31
+ rcu_read_unlock_bh ./include/linux/rcupdate.h:931
+ ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124
+ ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149
+ NF_HOOK_COND ./include/linux/netfilter.h:246
+ ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163
+ dst_output ./include/net/dst.h:486
+ NF_HOOK ./include/linux/netfilter.h:257
+ ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261
+ sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225
+ sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632
+ sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+ sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+ sctp_side_effects net/sctp/sm_sideeffect.c:1773
+ sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+ sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+ sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+ inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+ sock_sendmsg_nosec net/socket.c:633
+ sock_sendmsg net/socket.c:643
+ SYSC_sendto+0x608/0x710 net/socket.c:1696
+ SyS_sendto+0x8a/0xb0 net/socket.c:1664
+ do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+ entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
+ RIP: 0033:0x401133
+ RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+ RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133
+ RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003
+ RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c
+ R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000
+ R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000
+ origin:
+ save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+ kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+ kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211
+ slab_alloc_node mm/slub.c:2743
+ __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351
+ __kmalloc_reserve net/core/skbuff.c:138
+ __alloc_skb+0x26b/0x840 net/core/skbuff.c:231
+ alloc_skb ./include/linux/skbuff.h:933
+ sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570
+ sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+ sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+ sctp_side_effects net/sctp/sm_sideeffect.c:1773
+ sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+ sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+ sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+ inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+ sock_sendmsg_nosec net/socket.c:633
+ sock_sendmsg net/socket.c:643
+ SYSC_sendto+0x608/0x710 net/socket.c:1696
+ SyS_sendto+0x8a/0xb0 net/socket.c:1664
+ do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+ return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
+ ==================================================================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs(
+
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++ (void *)chunk + end) &&\
+ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+ pos.v += SCTP_PAD4(ntohs(pos.p->length)))
+@@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+ sizeof(sctp_chunkhdr_t));\
++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++ (void *)chunk_hdr + end) &&\
+ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+ ntohs(err->length) >= sizeof(sctp_errhdr_t); \
+ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 14 Jul 2017 22:07:33 +0800
+Subject: sctp: fix an array overflow when all ext chunks are set
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 10b3bf54406bb7f4e78da9bb2a485c5c986678ad ]
+
+Marcelo noticed an array overflow caused by commit c28445c3cb07
+("sctp: add reconf_enable in asoc ep and netns"), in which sctp
+would add SCTP_CID_RECONF into extensions when reconf_enable is
+set in sctp_make_init and sctp_make_init_ack.
+
+Then now when all ext chunks are set, 4 ext chunk ids can be put
+into extensions array while extensions array size is 3. It would
+cause a kernel panic because of this overflow.
+
+This patch is to fix it by defining extensions array size is 4 in
+both sctp_make_init and sctp_make_init_ack.
+
+Fixes: c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_make_chunk.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const
+ sctp_adaptation_ind_param_t aiparam;
+ sctp_supported_ext_param_t ext_param;
+ int num_ext = 0;
+- __u8 extensions[3];
++ __u8 extensions[4];
+ sctp_paramhdr_t *auth_chunks = NULL,
+ *auth_hmacs = NULL;
+
+@@ -396,7 +396,7 @@ struct sctp_chunk *sctp_make_init_ack(co
+ sctp_adaptation_ind_param_t aiparam;
+ sctp_supported_ext_param_t ext_param;
+ int num_ext = 0;
+- __u8 extensions[3];
++ __u8 extensions[4];
+ sctp_paramhdr_t *auth_chunks = NULL,
+ *auth_hmacs = NULL,
+ *auth_random = NULL;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 16:24:59 +0800
+Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ]
+
+Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
+_sctp_walk_{params, errors}()") tried to fix the issue that it
+may overstep the chunk end for _sctp_walk_{params, errors} with
+'chunk_end > offset(length) + sizeof(length)'.
+
+But it introduced a side effect: When processing INIT, it verifies
+the chunks with 'param.v == chunk_end' after iterating all params
+by sctp_walk_params(). With the check 'chunk_end > offset(length)
++ sizeof(length)', it would return when the last param is not yet
+accessed. Because the last param usually is fwdtsn supported param
+whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'
+
+This is a badly issue even causing sctp couldn't process 4-shakes.
+Client would always get abort when connecting to server, due to
+the failure of INIT chunk verification on server.
+
+The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
+instead of 'chunk_end < offset(length) + sizeof(length)' for both
+_sctp_walk_params and _sctp_walk_errors.
+
+Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs(
+
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
+- (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
+ (void *)chunk + end) &&\
+ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+ sizeof(sctp_chunkhdr_t));\
+- ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+ (void *)chunk_hdr + end) &&\
+ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+ ntohs(err->length) >= sizeof(sctp_errhdr_t); \
blk-mq-create-hctx-for-each-present-cpu.patch
block-disable-runtime-pm-for-blk-mq.patch
saa7164-fix-double-fetch-pcie-access-condition.patch
+sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch
+tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch
+tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch
+tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch
+tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch
+tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch
+ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
+wireless-wext-terminate-ifr-name-coming-from-userspace.patch
+net-zero-terminate-ifr_name-in-dev_ifname.patch
+net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch
+revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch
+ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
+net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch
+ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
+rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
+net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch
+mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
+openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
+packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
+ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
+net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
+bonding-commit-link-status-change-after-propose.patch
+dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
+net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch
+net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch
+net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
+sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
+sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
+net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch
+net-mlx5e-fix-outer_header_zero-check-size.patch
+net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch
+net-mlx5e-add-field-select-to-mtpps-register.patch
+net-mlx5e-fix-broken-disable-1pps-flow.patch
+net-mlx5e-change-1pps-out-scheme.patch
+net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch
+net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch
+net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch
+net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch
+udp6-fix-socket-leak-on-early-demux.patch
+net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
+ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch
+virtio_net-fix-truesize-for-mergeable-buffers.patch
+sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
+sparc64-prevent-perf-from-running-during-super-critical-sections.patch
+sparc64-register-hugepages-during-arch-init.patch
+sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch
--- /dev/null
+From foo@baz Tue Aug 8 16:28:31 PDT 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 4 Aug 2017 09:47:52 -0700
+Subject: sparc64: Fix exception handling in UltraSPARC-III memcpy.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ]
+
+Mikael Pettersson reported that some test programs in the strace-4.18
+testsuite cause an OOPS.
+
+After some debugging it turns out that garbage values are returned
+when an exception occurs, causing the fixup memset() to be run with
+bogus arguments.
+
+The problem is that two of the exception handler stubs write the
+successfully copied length into the wrong register.
+
+Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.")
+Reported-by: Mikael Pettersson <mikpelinux@gmail.com>
+Tested-by: Mikael Pettersson <mikpelinux@gmail.com>
+Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/lib/U3memcpy.S | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/lib/U3memcpy.S
++++ b/arch/sparc/lib/U3memcpy.S
+@@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ ENTRY(U3_retl_o2_and_7_plus_GS)
+ and %o2, 7, %o2
+ retl
+- add %o2, GLOBAL_SPARE, %o2
++ add %o2, GLOBAL_SPARE, %o0
+ ENDPROC(U3_retl_o2_and_7_plus_GS)
+ ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+ add GLOBAL_SPARE, 8, GLOBAL_SPARE
+ and %o2, 7, %o2
+ retl
+- add %o2, GLOBAL_SPARE, %o2
++ add %o2, GLOBAL_SPARE, %o0
+ ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+ #endif
+
--- /dev/null
+From foo@baz Tue Aug 8 16:28:31 PDT 2017
+From: Jane Chu <jane.chu@oracle.com>
+Date: Tue, 11 Jul 2017 12:00:54 -0600
+Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout
+
+From: Jane Chu <jane.chu@oracle.com>
+
+
+[ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ]
+
+A large sun4v SPARC system may have moments of intensive xcall activities,
+usually caused by unmapping many pages on many CPUs concurrently. This can
+flood receivers with CPU mondo interrupts for an extended period, causing
+some unlucky senders to hit send-mondo timeout. This problem gets worse
+as cpu count increases because sometimes mappings must be invalidated on
+all CPUs, and sometimes all CPUs may gang up on a single CPU.
+
+But a busy system is not a broken system. In the above scenario, as long
+as the receiver is making forward progress processing mondo interrupts,
+the sender should continue to retry.
+
+This patch implements the receiver's forward progress meter by introducing
+a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range
+of 0..NR_CPUS. The receiver increments its counter as soon as it receives
+a mondo and the sender tracks the receiver's counter. If the receiver has
+stopped making forward progress when the retry limit is reached, the sender
+declares send-mondo-timeout and panic; otherwise, the receiver is allowed
+to keep making forward progress.
+
+In addition, it's been observed that PCIe hotplug events generate Correctable
+Errors that are handled by hypervisor and then OS. Hypervisor 'borrows'
+a guest cpu strand briefly to provide the service. If the cpu strand is
+simultaneously the only cpu targeted by a mondo, it may not be available
+for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second
+is the agreed wait time between hypervisor and guest OS, this patch makes
+the adjustment.
+
+Orabug: 25476541
+Orabug: 26417466
+
+Signed-off-by: Jane Chu <jane.chu@oracle.com>
+Reviewed-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Anthony Yznaga <anthony.yznaga@oracle.com>
+Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
+Reviewed-by: Thomas Tai <thomas.tai@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/trap_block.h | 1
+ arch/sparc/kernel/smp_64.c | 189 ++++++++++++++++++++++--------------
+ arch/sparc/kernel/sun4v_ivec.S | 15 ++
+ arch/sparc/kernel/traps_64.c | 1
+ 4 files changed, 134 insertions(+), 72 deletions(-)
+
+--- a/arch/sparc/include/asm/trap_block.h
++++ b/arch/sparc/include/asm/trap_block.h
+@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR
+ void init_cur_cpu_trap(struct thread_info *);
+ void setup_tba(void);
+ extern int ncpus_probed;
++extern u64 cpu_mondo_counter[NR_CPUS];
+
+ unsigned long real_hard_smp_processor_id(void);
+
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -622,22 +622,48 @@ retry:
+ }
+ }
+
+-/* Multi-cpu list version. */
++#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid])
++#define MONDO_USEC_WAIT_MIN 2
++#define MONDO_USEC_WAIT_MAX 100
++#define MONDO_RETRY_LIMIT 500000
++
++/* Multi-cpu list version.
++ *
++ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
++ * Sometimes not all cpus receive the mondo, requiring us to re-send
++ * the mondo until all cpus have received, or cpus are truly stuck
++ * unable to receive mondo, and we timeout.
++ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
++ * perform guest service, such as PCIe error handling. Consider the
++ * service time, 1 second overall wait is reasonable for 1 cpu.
++ * Here two in-between mondo check wait time are defined: 2 usec for
++ * single cpu quick turn around and up to 100usec for large cpu count.
++ * Deliver mondo to large number of cpus could take longer, we adjusts
++ * the retry count as long as target cpus are making forward progress.
++ */
+ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+ {
+- int retries, this_cpu, prev_sent, i, saw_cpu_error;
++ int this_cpu, tot_cpus, prev_sent, i, rem;
++ int usec_wait, retries, tot_retries;
++ u16 first_cpu = 0xffff;
++ unsigned long xc_rcvd = 0;
+ unsigned long status;
++ int ecpuerror_id = 0;
++ int enocpu_id = 0;
+ u16 *cpu_list;
++ u16 cpu;
+
+ this_cpu = smp_processor_id();
+-
+ cpu_list = __va(tb->cpu_list_pa);
+-
+- saw_cpu_error = 0;
+- retries = 0;
++ usec_wait = cnt * MONDO_USEC_WAIT_MIN;
++ if (usec_wait > MONDO_USEC_WAIT_MAX)
++ usec_wait = MONDO_USEC_WAIT_MAX;
++ retries = tot_retries = 0;
++ tot_cpus = cnt;
+ prev_sent = 0;
++
+ do {
+- int forward_progress, n_sent;
++ int n_sent, mondo_delivered, target_cpu_busy;
+
+ status = sun4v_cpu_mondo_send(cnt,
+ tb->cpu_list_pa,
+@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(str
+
+ /* HV_EOK means all cpus received the xcall, we're done. */
+ if (likely(status == HV_EOK))
+- break;
++ goto xcall_done;
++
++ /* If not these non-fatal errors, panic */
++ if (unlikely((status != HV_EWOULDBLOCK) &&
++ (status != HV_ECPUERROR) &&
++ (status != HV_ENOCPU)))
++ goto fatal_errors;
+
+ /* First, see if we made any forward progress.
+ *
++ * Go through the cpu_list, count the target cpus that have
++ * received our mondo (n_sent), and those that did not (rem).
++ * Re-pack cpu_list with the cpus remain to be retried in the
++ * front - this simplifies tracking the truly stalled cpus.
++ *
+ * The hypervisor indicates successful sends by setting
+ * cpu list entries to the value 0xffff.
++ *
++ * EWOULDBLOCK means some target cpus did not receive the
++ * mondo and retry usually helps.
++ *
++ * ECPUERROR means at least one target cpu is in error state,
++ * it's usually safe to skip the faulty cpu and retry.
++ *
++ * ENOCPU means one of the target cpu doesn't belong to the
++ * domain, perhaps offlined which is unexpected, but not
++ * fatal and it's okay to skip the offlined cpu.
+ */
++ rem = 0;
+ n_sent = 0;
+ for (i = 0; i < cnt; i++) {
+- if (likely(cpu_list[i] == 0xffff))
++ cpu = cpu_list[i];
++ if (likely(cpu == 0xffff)) {
+ n_sent++;
++ } else if ((status == HV_ECPUERROR) &&
++ (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
++ ecpuerror_id = cpu + 1;
++ } else if (status == HV_ENOCPU && !cpu_online(cpu)) {
++ enocpu_id = cpu + 1;
++ } else {
++ cpu_list[rem++] = cpu;
++ }
+ }
+
+- forward_progress = 0;
+- if (n_sent > prev_sent)
+- forward_progress = 1;
++ /* No cpu remained, we're done. */
++ if (rem == 0)
++ break;
+
+- prev_sent = n_sent;
++ /* Otherwise, update the cpu count for retry. */
++ cnt = rem;
+
+- /* If we get a HV_ECPUERROR, then one or more of the cpus
+- * in the list are in error state. Use the cpu_state()
+- * hypervisor call to find out which cpus are in error state.
++ /* Record the overall number of mondos received by the
++ * first of the remaining cpus.
+ */
+- if (unlikely(status == HV_ECPUERROR)) {
+- for (i = 0; i < cnt; i++) {
+- long err;
+- u16 cpu;
+-
+- cpu = cpu_list[i];
+- if (cpu == 0xffff)
+- continue;
+-
+- err = sun4v_cpu_state(cpu);
+- if (err == HV_CPU_STATE_ERROR) {
+- saw_cpu_error = (cpu + 1);
+- cpu_list[i] = 0xffff;
+- }
+- }
+- } else if (unlikely(status != HV_EWOULDBLOCK))
+- goto fatal_mondo_error;
++ if (first_cpu != cpu_list[0]) {
++ first_cpu = cpu_list[0];
++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++ }
+
+- /* Don't bother rewriting the CPU list, just leave the
+- * 0xffff and non-0xffff entries in there and the
+- * hypervisor will do the right thing.
+- *
+- * Only advance timeout state if we didn't make any
+- * forward progress.
++ /* Was any mondo delivered successfully? */
++ mondo_delivered = (n_sent > prev_sent);
++ prev_sent = n_sent;
++
++ /* or, was any target cpu busy processing other mondos? */
++ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++
++ /* Retry count is for no progress. If we're making progress,
++ * reset the retry count.
+ */
+- if (unlikely(!forward_progress)) {
+- if (unlikely(++retries > 10000))
+- goto fatal_mondo_timeout;
+-
+- /* Delay a little bit to let other cpus catch up
+- * on their cpu mondo queue work.
+- */
+- udelay(2 * cnt);
++ if (likely(mondo_delivered || target_cpu_busy)) {
++ tot_retries += retries;
++ retries = 0;
++ } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
++ goto fatal_mondo_timeout;
+ }
+- } while (1);
+
+- if (unlikely(saw_cpu_error))
+- goto fatal_mondo_cpu_error;
++ /* Delay a little bit to let other cpus catch up on
++ * their cpu mondo queue work.
++ */
++ if (!mondo_delivered)
++ udelay(usec_wait);
+
+- return;
++ retries++;
++ } while (1);
+
+-fatal_mondo_cpu_error:
+- printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+- "(including %d) were in error state\n",
+- this_cpu, saw_cpu_error - 1);
++xcall_done:
++ if (unlikely(ecpuerror_id > 0)) {
++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
++ this_cpu, ecpuerror_id - 1);
++ } else if (unlikely(enocpu_id > 0)) {
++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
++ this_cpu, enocpu_id - 1);
++ }
+ return;
+
++fatal_errors:
++ /* fatal errors include bad alignment, etc */
++ pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
++ this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
++ panic("Unexpected SUN4V mondo error %lu\n", status);
++
+ fatal_mondo_timeout:
+- printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+- " progress after %d retries.\n",
+- this_cpu, retries);
+- goto dump_cpu_list_and_out;
+-
+-fatal_mondo_error:
+- printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
+- this_cpu, status);
+- printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
+- "mondo_block_pa(%lx)\n",
+- this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+-
+-dump_cpu_list_and_out:
+- printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
+- for (i = 0; i < cnt; i++)
+- printk("%u ", cpu_list[i]);
+- printk("]\n");
++ /* some cpus being non-responsive to the cpu mondo */
++ pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
++ this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
++ panic("SUN4V mondo timeout panic\n");
+ }
+
+ static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
+--- a/arch/sparc/kernel/sun4v_ivec.S
++++ b/arch/sparc/kernel/sun4v_ivec.S
+@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
+ ldxa [%g0] ASI_SCRATCHPAD, %g4
+ sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4
+
++ /* Get smp_processor_id() into %g3 */
++ sethi %hi(trap_block), %g5
++ or %g5, %lo(trap_block), %g5
++ sub %g4, %g5, %g3
++ srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3
++
++ /* Increment cpu_mondo_counter[smp_processor_id()] */
++ sethi %hi(cpu_mondo_counter), %g5
++ or %g5, %lo(cpu_mondo_counter), %g5
++ sllx %g3, 3, %g3
++ add %g5, %g3, %g5
++ ldx [%g5], %g3
++ add %g3, 1, %g3
++ stx %g3, [%g5]
++
+ /* Get CPU mondo queue base phys address into %g7. */
+ ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
+ }
+ }
+
++u64 cpu_mondo_counter[NR_CPUS] = {0};
+ struct trap_per_cpu trap_block[NR_CPUS];
+ EXPORT_SYMBOL(trap_block);
+
--- /dev/null
+From foo@baz Tue Aug 8 16:28:31 PDT 2017
+From: Rob Gardner <rob.gardner@oracle.com>
+Date: Mon, 17 Jul 2017 09:22:27 -0600
+Subject: sparc64: Prevent perf from running during super critical sections
+
+From: Rob Gardner <rob.gardner@oracle.com>
+
+
+[ Upstream commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a ]
+
+This fixes another cause of random segfaults and bus errors that may
+occur while running perf with the callgraph option.
+
+Critical sections beginning with spin_lock_irqsave() raise the interrupt
+level to PIL_NORMAL_MAX (14) and intentionally do not block performance
+counter interrupts, which arrive at PIL_NMI (15).
+
+But some sections of code are "super critical" with respect to perf
+because the perf_callchain_user() path accesses user space and may cause
+TLB activity as well as faults as it unwinds the user stack.
+
+One particular critical section occurs in switch_mm:
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+ ...
+ load_secondary_context(mm);
+ tsb_context_switch(mm);
+ ...
+ spin_unlock_irqrestore(&mm->context.lock, flags);
+
+If a perf interrupt arrives in between load_secondary_context() and
+tsb_context_switch(), then perf_callchain_user() could execute with
+the context ID of one process, but with an active TSB for a different
+process. When the user stack is accessed, it is very likely to
+incur a TLB miss, since the h/w context ID has been changed. The TLB
+will then be reloaded with a translation from the TSB for one process,
+but using a context ID for another process. This exposes memory from
+one process to another, and since it is a mapping for stack memory,
+this usually causes the new process to crash quickly.
+
+This super critical section needs more protection than is provided
+by spin_lock_irqsave() since perf interrupts must not be allowed in.
+
+Since __tsb_context_switch already goes through the trouble of
+disabling interrupts completely, we fix this by moving the secondary
+context load down into this better protected region.
+
+Orabug: 25577560
+
+Signed-off-by: Dave Aldridge <david.j.aldridge@oracle.com>
+Signed-off-by: Rob Gardner <rob.gardner@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/mmu_context_64.h | 14 +++++++++-----
+ arch/sparc/kernel/tsb.S | 12 ++++++++++++
+ arch/sparc/power/hibernate.c | 3 +--
+ 3 files changed, 22 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/mmu_context_64.h
++++ b/arch/sparc/include/asm/mmu_context_64.h
+@@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *m
+ void __tsb_context_switch(unsigned long pgd_pa,
+ struct tsb_config *tsb_base,
+ struct tsb_config *tsb_huge,
+- unsigned long tsb_descr_pa);
++ unsigned long tsb_descr_pa,
++ unsigned long secondary_ctx);
+
+-static inline void tsb_context_switch(struct mm_struct *mm)
++static inline void tsb_context_switch_ctx(struct mm_struct *mm,
++ unsigned long ctx)
+ {
+ __tsb_context_switch(__pa(mm->pgd),
+ &mm->context.tsb_block[MM_TSB_BASE],
+@@ -40,9 +42,12 @@ static inline void tsb_context_switch(st
+ #else
+ NULL
+ #endif
+- , __pa(&mm->context.tsb_descr[MM_TSB_BASE]));
++ , __pa(&mm->context.tsb_descr[MM_TSB_BASE]),
++ ctx);
+ }
+
++#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0)
++
+ void tsb_grow(struct mm_struct *mm,
+ unsigned long tsb_index,
+ unsigned long mm_rss);
+@@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_s
+ * cpu0 to update it's TSB because at that point the cpu_vm_mask
+ * only had cpu1 set in it.
+ */
+- load_secondary_context(mm);
+- tsb_context_switch(mm);
++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+
+ /* Any time a processor runs a context on an address space
+ * for the first time, we must flush that context out of the
+--- a/arch/sparc/kernel/tsb.S
++++ b/arch/sparc/kernel/tsb.S
+@@ -360,6 +360,7 @@ tsb_flush:
+ * %o1: TSB base config pointer
+ * %o2: TSB huge config pointer, or NULL if none
+ * %o3: Hypervisor TSB descriptor physical address
++ * %o4: Secondary context to load, if non-zero
+ *
+ * We have to run this whole thing with interrupts
+ * disabled so that the current cpu doesn't change
+@@ -372,6 +373,17 @@ __tsb_context_switch:
+ rdpr %pstate, %g1
+ wrpr %g1, PSTATE_IE, %pstate
+
++ brz,pn %o4, 1f
++ mov SECONDARY_CONTEXT, %o5
++
++661: stxa %o4, [%o5] ASI_DMMU
++ .section .sun4v_1insn_patch, "ax"
++ .word 661b
++ stxa %o4, [%o5] ASI_MMU
++ .previous
++ flush %g6
++
++1:
+ TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+
+ stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
+--- a/arch/sparc/power/hibernate.c
++++ b/arch/sparc/power/hibernate.c
+@@ -35,6 +35,5 @@ void restore_processor_state(void)
+ {
+ struct mm_struct *mm = current->active_mm;
+
+- load_secondary_context(mm);
+- tsb_context_switch(mm);
++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:28:31 PDT 2017
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+Date: Wed, 19 Jul 2017 17:12:54 -0700
+Subject: sparc64: Register hugepages during arch init
+
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+
+
+[ Upstream commit 8399e4b88a93fc7bc00fff3b8da9b2e718b7f45e ]
+
+Add hstate for each supported hugepage size using
+arch initcall. This change fixes some hugepage
+parameter parsing inconsistencies:
+
+case 1: no hugepage parameters
+
+ Without hugepage parameters, only a hugepages-8192kB entry is visible
+ in sysfs. It's different from x86_64 where both 2M and 1G hugepage
+ sizes are available.
+
+case 2: default_hugepagesz=[64K|256M|2G]
+
+ When specifying only a default_hugepagesz parameter, the default
+ hugepage size isn't really changed and it stays at 8M. This is again
+ different from x86_64.
+
+Orabug: 25869946
+
+Reviewed-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c | 25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(stru
+ }
+
+ #ifdef CONFIG_HUGETLB_PAGE
++static void __init add_huge_page_size(unsigned long size)
++{
++ unsigned int order;
++
++ if (size_to_hstate(size))
++ return;
++
++ order = ilog2(size) - PAGE_SHIFT;
++ hugetlb_add_hstate(order);
++}
++
++static int __init hugetlbpage_init(void)
++{
++ add_huge_page_size(1UL << HPAGE_64K_SHIFT);
++ add_huge_page_size(1UL << HPAGE_SHIFT);
++ add_huge_page_size(1UL << HPAGE_256MB_SHIFT);
++ add_huge_page_size(1UL << HPAGE_2GB_SHIFT);
++
++ return 0;
++}
++
++arch_initcall(hugetlbpage_init);
++
+ static int __init setup_hugepagesz(char *string)
+ {
+ unsigned long long hugepage_size;
+@@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char
+ goto out;
+ }
+
+- hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
++ add_huge_page_size(hugepage_size);
+ rc = 1;
+
+ out:
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:21 -0400
+Subject: tcp_bbr: cut pacing rate only if filled pipe
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 4aea287e90dd61a48268ff2994b56f9799441b62 ]
+
+In bbr_set_pacing_rate(), which decides whether to cut the pacing
+rate, there was some code that considered exiting STARTUP to be
+equivalent to the notion of filling the pipe (i.e.,
+bbr_full_bw_reached()). Specifically, as the code was structured,
+exiting STARTUP and going into PROBE_RTT could cause us to cut the
+pacing rate down to something silly and low, based on whatever
+bandwidth samples we've had so far, when it's possible that all of
+them have been small app-limited bandwidth samples that are not
+representative of the bandwidth available in the path. (The code was
+correct at the time it was written, but the state machine changed
+without this spot being adjusted correspondingly.)
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -221,12 +221,11 @@ static u64 bbr_rate_bytes_per_sec(struct
+ */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
+- struct bbr *bbr = inet_csk_ca(sk);
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
++ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+ sk->sk_pacing_rate = rate;
+ }
+
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:25 -0400
+Subject: tcp_bbr: init pacing rate on first RTT sample
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 32984565574da7ed3afa10647bb4020d7a9e6c93 ]
+
+Fixes the following behavior: for connections that had no RTT sample
+at the time of initializing congestion control, BBR was initializing
+the pacing rate to a high nominal rate (based an a guess of RTT=1ms,
+in case this is LAN traffic). Then BBR never adjusted the pacing rate
+downward upon obtaining an actual RTT sample, if the connection never
+filled the pipe (e.g. all sends were small app-limited writes()).
+
+This fix adjusts the pacing rate upon obtaining the first RTT sample.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -113,7 +113,8 @@ struct bbr {
+ cwnd_gain:10, /* current gain for setting cwnd */
+ full_bw_cnt:3, /* number of rounds without large bw gains */
+ cycle_idx:3, /* current index in pacing_gain cycle array */
+- unused_b:6;
++ has_seen_rtt:1, /* have we seen an RTT sample yet? */
++ unused_b:5;
+ u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
+ u32 full_bw; /* recent bw, to estimate if pipe is full */
+ };
+@@ -226,11 +227,13 @@ static u32 bbr_bw_to_pacing_rate(struct
+ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
++ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
++ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+@@ -248,8 +251,12 @@ static void bbr_init_pacing_rate_from_rt
+ */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
++ struct tcp_sock *tp = tcp_sk(sk);
++ struct bbr *bbr = inet_csk_ca(sk);
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+
++ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
++ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+ sk->sk_pacing_rate = rate;
+ }
+@@ -838,6 +845,7 @@ static void bbr_init(struct sock *sk)
+
+ minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
+
++ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
+
+ bbr->restore_cwnd = 0;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:22 -0400
+Subject: tcp_bbr: introduce bbr_bw_to_pacing_rate() helper
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit f19fd62dafaf1ed6cf615dba655b82fa9df59074 ]
+
+Introduce a helper to convert a BBR bandwidth and gain factor to a
+pacing rate in bytes per second. This is a pure refactor, but is
+needed for two following fixes.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -212,6 +212,16 @@ static u64 bbr_rate_bytes_per_sec(struct
+ return rate >> BW_SCALE;
+ }
+
++/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
++static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
++{
++ u64 rate = bw;
++
++ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
++ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
++ return rate;
++}
++
+ /* Pace using current bw estimate and a gain factor. In order to help drive the
+ * network toward lower queues while maintaining high utilization and low
+ * latency, the average pacing rate aims to be slightly (~1%) lower than the
+@@ -221,10 +231,8 @@ static u64 bbr_rate_bytes_per_sec(struct
+ */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
+- u64 rate = bw;
++ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+
+- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+ sk->sk_pacing_rate = rate;
+ }
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:23 -0400
+Subject: tcp_bbr: introduce bbr_init_pacing_rate_from_rtt() helper
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 79135b89b8af304456bd67916b80116ddf03d7b6 ]
+
+Introduce a helper to initialize the BBR pacing rate unconditionally,
+based on the current cwnd and RTT estimate. This is a pure refactor,
+but is needed for two following fixes.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -222,6 +222,23 @@ static u32 bbr_bw_to_pacing_rate(struct
+ return rate;
+ }
+
++/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
++static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
++{
++ struct tcp_sock *tp = tcp_sk(sk);
++ u64 bw;
++ u32 rtt_us;
++
++ if (tp->srtt_us) { /* any RTT sample yet? */
++ rtt_us = max(tp->srtt_us >> 3, 1U);
++ } else { /* no RTT sample yet */
++ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
++ }
++ bw = (u64)tp->snd_cwnd * BW_UNIT;
++ do_div(bw, rtt_us);
++ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
++}
++
+ /* Pace using current bw estimate and a gain factor. In order to help drive the
+ * network toward lower queues while maintaining high utilization and low
+ * latency, the average pacing rate aims to be slightly (~1%) lower than the
+@@ -806,7 +823,6 @@ static void bbr_init(struct sock *sk)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+- u64 bw;
+
+ bbr->prior_cwnd = 0;
+ bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
+@@ -822,11 +838,8 @@ static void bbr_init(struct sock *sk)
+
+ minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
+
+- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+- bw = (u64)tp->snd_cwnd * BW_UNIT;
+- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
+ sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
+- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
++ bbr_init_pacing_rate_from_rtt(sk);
+
+ bbr->restore_cwnd = 0;
+ bbr->round_start = 0;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:24 -0400
+Subject: tcp_bbr: remove sk_pacing_rate=0 transient during init
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 ]
+
+Fix a corner case noticed by Eric Dumazet, where BBR's setting
+sk->sk_pacing_rate to 0 during initialization could theoretically
+cause packets in the sending host to hang if there were packets "in
+flight" in the pacing infrastructure at the time the BBR congestion
+control state is initialized. This could occur if the pacing
+infrastructure happened to race with bbr_init() in a way such that the
+pacer read the 0 rather than the immediately following non-zero pacing
+rate.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -838,7 +838,6 @@ static void bbr_init(struct sock *sk)
+
+ minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
+
+- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
+ bbr_init_pacing_rate_from_rtt(sk);
+
+ bbr->restore_cwnd = 0;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 27 Jul 2017 14:45:09 +0200
+Subject: udp6: fix socket leak on early demux
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit c9f2c1ae123a751d4e4f949144500219354d5ee1 ]
+
+When an early demuxed packet reaches __udp6_lib_lookup_skb(), the
+sk reference is retrieved and used, but the relevant reference
+count is leaked and the socket destructor is never called.
+Beyond leaking the sk memory, if there are pending UDP packets
+in the receive queue, even the related accounted memory is leaked.
+
+In the long run, this will cause persistent forward allocation errors
+and no UDP skbs (both ipv4 and ipv6) will be able to reach the
+user-space.
+
+Fix this by explicitly accessing the early demux reference before
+the lookup, and properly decreasing the socket reference count
+after usage.
+
+Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and
+the now obsoleted comment about "socket cache".
+
+The newly added code is derived from the current ipv4 code for the
+similar path.
+
+v1 -> v2:
+ fixed the __udp6_lib_rcv() return code for resubmission,
+ as suggested by Eric
+
+Reported-by: Sam Edwards <CFSworks@gmail.com>
+Reported-by: Marc Haber <mh+netdev@zugschlus.de>
+Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/udp.h | 1 +
+ net/ipv4/udp.c | 3 ++-
+ net/ipv6/udp.c | 27 ++++++++++++++++++---------
+ 3 files changed, 21 insertions(+), 10 deletions(-)
+
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -265,6 +265,7 @@ static inline struct sk_buff *skb_recv_u
+ }
+
+ void udp_v4_early_demux(struct sk_buff *skb);
++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
+ int udp_get_port(struct sock *sk, unsigned short snum,
+ int (*saddr_cmp)(const struct sock *,
+ const struct sock *));
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1762,7 +1762,7 @@ drop:
+ /* For TCP sockets, sk_rx_dst is protected by socket lock
+ * For UDP, we use xchg() to guard against concurrent changes.
+ */
+-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+ struct dst_entry *old;
+
+@@ -2120,6 +2120,7 @@ void udp_destroy_sock(struct sock *sk)
+ encap_destroy(sk);
+ }
+ }
++EXPORT_SYMBOL(udp_sk_rx_dst_set);
+
+ /*
+ * Socket option code for UDP
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_sk
+ struct udp_table *udptable)
+ {
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+- struct sock *sk;
+
+- sk = skb_steal_sock(skb);
+- if (unlikely(sk))
+- return sk;
+ return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+ &iph->daddr, dport, inet6_iif(skb),
+ udptable, skb);
+@@ -798,6 +794,24 @@ int __udp6_lib_rcv(struct sk_buff *skb,
+ if (udp6_csum_init(skb, uh, proto))
+ goto csum_error;
+
++ /* Check if the socket is already available, e.g. due to early demux */
++ sk = skb_steal_sock(skb);
++ if (sk) {
++ struct dst_entry *dst = skb_dst(skb);
++ int ret;
++
++ if (unlikely(sk->sk_rx_dst != dst))
++ udp_sk_rx_dst_set(sk, dst);
++
++ ret = udpv6_queue_rcv_skb(sk, skb);
++ sock_put(sk);
++
++ /* a return value > 0 means to resubmit the input */
++ if (ret > 0)
++ return ret;
++ return 0;
++ }
++
+ /*
+ * Multicast receive code
+ */
+@@ -806,11 +820,6 @@ int __udp6_lib_rcv(struct sk_buff *skb,
+ saddr, daddr, udptable, proto);
+
+ /* Unicast */
+-
+- /*
+- * check socket cache ... must talk to Alan about his plans
+- * for sock caches... i'll skip this for now.
+- */
+ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+ if (sk) {
+ int ret;
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Mon, 31 Jul 2017 21:49:49 +0300
+Subject: virtio_net: fix truesize for mergeable buffers
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+
+[ Upstream commit 1daa8790d0280d2c719658e39bd59fce65efa909 ]
+
+Seth Forshee noticed a performance degradation with some workloads.
+This turns out to be due to packet drops. Euan Kemp noticed that this
+is because we drop all packets where length exceeds the truesize, but
+for some packets we add in extra memory without updating the truesize.
+This in turn was kept around unchanged from ab7db91705e95 ("virtio-net:
+auto-tune mergeable rx buffer size for improved performance"). That
+commit had an internal reason not to account for the extra space: not
+enough bits to do it. No longer true so let's account for the allocated
+length exactly.
+
+Many thanks to Seth Forshee for the report and bisecting and Euan Kemp
+for debugging the issue.
+
+Fixes: 680557cf79f8 ("virtio_net: rework mergeable buffer handling")
+Reported-by: Euan Kemp <euan.kemp@coreos.com>
+Tested-by: Euan Kemp <euan.kemp@coreos.com>
+Reported-by: Seth Forshee <seth.forshee@canonical.com>
+Tested-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -889,21 +889,20 @@ static int add_recvbuf_mergeable(struct
+
+ buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+ buf += headroom; /* advance address leaving hole at front of pkt */
+- ctx = (void *)(unsigned long)len;
+ get_page(alloc_frag->page);
+ alloc_frag->offset += len + headroom;
+ hole = alloc_frag->size - alloc_frag->offset;
+ if (hole < len + headroom) {
+ /* To avoid internal fragmentation, if there is very likely not
+ * enough space for another buffer, add the remaining space to
+- * the current buffer. This extra space is not included in
+- * the truesize stored in ctx.
++ * the current buffer.
+ */
+ len += hole;
+ alloc_frag->offset += hole;
+ }
+
+ sg_init_one(rq->sg, buf, len);
++ ctx = (void *)(unsigned long)len;
+ err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
+ if (err < 0)
+ put_page(virt_to_head_page(buf));
--- /dev/null
+From foo@baz Tue Aug 8 16:27:29 PDT 2017
+From: "Levin, Alexander" <alexander.levin@verizon.com>
+Date: Tue, 18 Jul 2017 04:23:16 +0000
+Subject: wireless: wext: terminate ifr name coming from userspace
+
+From: "Levin, Alexander" <alexander.levin@verizon.com>
+
+
+[ Upstream commit 98de4e0ea47d106846fc0e30ce4e644283fa7fc2 ]
+
+ifr name is assumed to be a valid string by the kernel, but nothing
+was forcing username to pass a valid string.
+
+In turn, this would cause panics as we tried to access the string
+past it's valid memory.
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev_ioctl.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/dev_ioctl.c
++++ b/net/core/dev_ioctl.c
+@@ -423,6 +423,8 @@ int dev_ioctl(struct net *net, unsigned
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
++ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
++
+ return wext_handle_ioctl(net, &iwr, cmd, arg);
+ }
+