4.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)
diff --git a/queue-4.12/bonding-commit-link-status-change-after-propose.patch b/queue-4.12/bonding-commit-link-status-change-after-propose.patch

new file mode 100644 (file)

index 0000000..6c15a52
--- /dev/null
+++ b/queue-4.12/bonding-commit-link-status-change-after-propose.patch
@@ -0,0 +1,45 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Tue, 25 Jul 2017 09:44:25 -0700
+Subject: bonding: commit link status change after propose
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit d94708a553022bf012fa95af10532a134eeb5a52 ]
+
+Commit de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
+moves link status commitment into bond_mii_monitor(), but it still relies
+on the return value of bond_miimon_inspect() as the hint. We need to return
+non-zero as long as we propose a link status change.
+
+Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring")
+Reported-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Tested-by: Benjamin Gilbert <benjamin.gilbert@coreos.com>
+Cc: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Acked-by: Mahesh Bandewar <maheshb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -2047,6 +2047,7 @@ static int bond_miimon_inspect(struct bo
+                               continue;
+ 
+                       bond_propose_link_state(slave, BOND_LINK_FAIL);
++                      commit++;
+                       slave->delay = bond->params.downdelay;
+                       if (slave->delay) {
+                               netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
+@@ -2085,6 +2086,7 @@ static int bond_miimon_inspect(struct bo
+                               continue;
+ 
+                       bond_propose_link_state(slave, BOND_LINK_BACK);
++                      commit++;
+                       slave->delay = bond->params.updelay;
+ 
+                       if (slave->delay) {
diff --git a/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch b/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch

new file mode 100644 (file)

index 0000000..f74c97e
--- /dev/null
+++ b/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:20:15 +0800
+Subject: dccp: fix a memleak for dccp_feat_init err process
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ]
+
+In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc
+memory for rx.val, it should free tx.val before returning an
+error.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/feat.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/dccp/feat.c
++++ b/net/dccp/feat.c
+@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
+        * singleton values (which always leads to failure).
+        * These settings can still (later) be overridden via sockopts.
+        */
+-      if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
+-          ccid_get_builtin_ccids(&rx.val, &rx.len))
++      if (ccid_get_builtin_ccids(&tx.val, &tx.len))
+               return -ENOBUFS;
++      if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
++              kfree(tx.val);
++              return -ENOBUFS;
++      }
+ 
+       if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
+           !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
diff --git a/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch

new file mode 100644 (file)

index 0000000..2deeb88
--- /dev/null
+++ b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
@@ -0,0 +1,33 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:46 +0800
+Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ]
+
+The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk
+properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue
+exists on dccp_ipv4.
+
+This patch is to fix it for dccp_ipv4.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv4.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk
+               goto drop_and_free;
+ 
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++      reqsk_put(req);
+       return 0;
+ 
+ drop_and_free:
diff --git a/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch

new file mode 100644 (file)

index 0000000..453e462
--- /dev/null
+++ b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
@@ -0,0 +1,51 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 14:19:09 +0800
+Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ]
+
+In dccp_v6_conn_request, after reqsk gets alloced and hashed into
+ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer,
+one is for hlist, and the other one is for current using.
+
+The problem is when dccp_v6_conn_request returns and finishes using
+reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and
+reqsk obj never gets freed.
+
+Jianlin found this issue when running dccp_memleak.c in a loop, the
+system memory would run out.
+
+dccp_memleak.c:
+  int s1 = socket(PF_INET6, 6, IPPROTO_IP);
+  bind(s1, &sa1, 0x20);
+  listen(s1, 0x9);
+  int s2 = socket(PF_INET6, 6, IPPROTO_IP);
+  connect(s2, &sa1, 0x20);
+  close(s1);
+  close(s2);
+
+This patch is to put the reqsk before dccp_v6_conn_request returns,
+just as what tcp_conn_request does.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/ipv6.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct s
+               goto drop_and_free;
+ 
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
++      reqsk_put(req);
+       return 0;
+ 
+ drop_and_free:
diff --git a/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch b/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch

new file mode 100644 (file)

index 0000000..0177ac2
--- /dev/null
+++ b/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Fri, 28 Jul 2017 23:27:44 +0300
+Subject: ipv4: fib: Fix NULL pointer deref during fib_sync_down_dev()
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+
+[ Upstream commit 71ed7ee35ad2c5300f4b51634185a0193b4fb0fa ]
+
+Michał reported a NULL pointer deref during fib_sync_down_dev() when
+unregistering a netdevice. The problem is that we don't check for
+'in_dev' being NULL, which can happen in very specific cases.
+
+Usually routes are flushed upon NETDEV_DOWN sent in either the netdev or
+the inetaddr notification chains. However, if an interface isn't
+configured with any IP address, then it's possible for host routes to be
+flushed following NETDEV_UNREGISTER, after NULLing dev->ip_ptr in
+inetdev_destroy().
+
+To reproduce:
+$ ip link add type dummy
+$ ip route add local 1.1.1.0/24 dev dummy0
+$ ip link del dev dummy0
+
+Fix this by checking for the presence of 'in_dev' before referencing it.
+
+Fixes: 982acb97560c ("ipv4: fib: Notify about nexthop status changes")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reported-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Tested-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1372,7 +1372,7 @@ static int call_fib_nh_notifiers(struct
+               return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
+                                         &info.info);
+       case FIB_EVENT_NH_DEL:
+-              if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
++              if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+                    fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
+                   (fib_nh->nh_flags & RTNH_F_DEAD))
+                       return call_fib_notifiers(dev_net(fib_nh->nh_dev),
diff --git a/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch b/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch

new file mode 100644 (file)

index 0000000..b8513b4
--- /dev/null
+++ b/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
@@ -0,0 +1,71 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Mahesh Bandewar <maheshb@google.com>
+Date: Wed, 19 Jul 2017 15:41:33 -0700
+Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call.
+
+From: Mahesh Bandewar <maheshb@google.com>
+
+
+[ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ]
+
+Net stack initialization currently initializes fib-trie after the
+first call to netdevice_notifier() call. In fact fib_trie initialization
+needs to happen before first rtnl_register(). It does not cause any problem
+since there are no devices UP at this moment, but trying to bring 'lo'
+UP at initialization would make this assumption wrong and exposes the issue.
+
+Fixes following crash
+
+ Call Trace:
+  ? alternate_node_alloc+0x76/0xa0
+  fib_table_insert+0x1b7/0x4b0
+  fib_magic.isra.17+0xea/0x120
+  fib_add_ifaddr+0x7b/0x190
+  fib_netdev_event+0xc0/0x130
+  register_netdevice_notifier+0x1c1/0x1d0
+  ip_fib_init+0x72/0x85
+  ip_rt_init+0x187/0x1e9
+  ip_init+0xe/0x1a
+  inet_init+0x171/0x26c
+  ? ipv4_offload_init+0x66/0x66
+  do_one_initcall+0x43/0x160
+  kernel_init_freeable+0x191/0x219
+  ? rest_init+0x80/0x80
+  kernel_init+0xe/0x150
+  ret_from_fork+0x22/0x30
+ Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08
+ RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28
+ CR2: 0000000000000014
+
+Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.")
+Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization")
+
+Signed-off-by: Mahesh Bandewar <maheshb@google.com>
+Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_frontend.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/fib_frontend.c
++++ b/net/ipv4/fib_frontend.c
+@@ -1327,13 +1327,14 @@ static struct pernet_operations fib_net_
+ 
+ void __init ip_fib_init(void)
+ {
+-      rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+-      rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+-      rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
++      fib_trie_init();
+ 
+       register_pernet_subsys(&fib_net_ops);
++
+       register_netdevice_notifier(&fib_netdev_notifier);
+       register_inetaddr_notifier(&fib_inetaddr_notifier);
+ 
+-      fib_trie_init();
++      rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
++      rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
++      rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ }
diff --git a/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch b/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch

new file mode 100644 (file)

index 0000000..10d9d17
--- /dev/null
+++ b/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
@@ -0,0 +1,146 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Mon, 17 Jul 2017 12:35:58 +0200
+Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ]
+
+KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(),
+which originated from the TCP request socket created in
+cookie_v6_check():
+
+ ==================================================================
+ BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0
+ CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies.  Check SNMP counters.
+ Call Trace:
+  <IRQ>
+  __dump_stack lib/dump_stack.c:16
+  dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+  kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+  __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+  skb_set_hash_from_sk ./include/net/sock.h:2011
+  tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983
+  tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493
+  tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284
+  tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309
+  call_timer_fn+0x240/0x520 kernel/time/timer.c:1268
+  expire_timers kernel/time/timer.c:1307
+  __run_timers+0xc13/0xf10 kernel/time/timer.c:1601
+  run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+  invoke_softirq kernel/softirq.c:364
+  irq_exit+0x1fa/0x230 kernel/softirq.c:405
+  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657
+  smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966
+  apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489
+ RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36
+ RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77
+ RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440
+ RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10
+ RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005
+ RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770
+ RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004
+ R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810
+ R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4
+  </IRQ>
+  poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293
+  SYSC_select+0x4b4/0x4e0 fs/select.c:653
+  SyS_select+0x76/0xa0 fs/select.c:634
+  entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204
+ RIP: 0033:0x4597e7
+ RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017
+ RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7
+ RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+ RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059
+ R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20
+ R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003
+ chained origin:
+  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+  kmsan_save_stack mm/kmsan/kmsan.c:317
+  kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547
+  __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259
+  tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472
+  tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103
+  tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212
+  cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245
+  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+  NF_HOOK ./include/linux/netfilter.h:257
+  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+  dst_input ./include/net/dst.h:492
+  ip6_rcv_finish net/ipv6/ip6_input.c:69
+  NF_HOOK ./include/linux/netfilter.h:257
+  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+  __netif_receive_skb net/core/dev.c:4246
+  process_backlog+0x667/0xba0 net/core/dev.c:4866
+  napi_poll net/core/dev.c:5268
+  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+ origin:
+  save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+  kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+  kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337
+  kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766
+  reqsk_alloc ./include/net/request_sock.h:87
+  inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200
+  cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169
+  tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989
+  tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298
+  tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487
+  ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+  NF_HOOK ./include/linux/netfilter.h:257
+  ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+  dst_input ./include/net/dst.h:492
+  ip6_rcv_finish net/ipv6/ip6_input.c:69
+  NF_HOOK ./include/linux/netfilter.h:257
+  ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+  __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+  __netif_receive_skb net/core/dev.c:4246
+  process_backlog+0x667/0xba0 net/core/dev.c:4866
+  napi_poll net/core/dev.c:5268
+  net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+  __do_softirq+0x485/0x942 kernel/softirq.c:284
+ ==================================================================
+
+Similar error is reported for cookie_v4_check().
+
+Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/syncookies.c |    1 +
+ net/ipv6/syncookies.c |    1 +
+ 2 files changed, 2 insertions(+)
+
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -332,6 +332,7 @@ struct sock *cookie_v4_check(struct sock
+       treq->rcv_isn           = ntohl(th->seq) - 1;
+       treq->snt_isn           = cookie;
+       treq->ts_off            = 0;
++      treq->txhash            = net_tx_rndhash();
+       req->mss                = mss;
+       ireq->ir_num            = ntohs(th->dest);
+       ireq->ir_rmt_port       = th->source;
+--- a/net/ipv6/syncookies.c
++++ b/net/ipv6/syncookies.c
+@@ -215,6 +215,7 @@ struct sock *cookie_v6_check(struct sock
+       treq->rcv_isn = ntohl(th->seq) - 1;
+       treq->snt_isn = cookie;
+       treq->ts_off = 0;
++      treq->txhash = net_tx_rndhash();
+ 
+       /*
+        * We need to lookup the dst_entry to get the correct window size.
diff --git a/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch b/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch

new file mode 100644 (file)

index 0000000..f23e876
--- /dev/null
+++ b/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
@@ -0,0 +1,55 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Wed, 19 Jul 2017 22:28:55 +0200
+Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ]
+
+In some cases, offset can overflow and can cause an infinite loop in
+ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and
+cap it at IPV6_MAXPLEN, since packets larger than that should be invalid.
+
+This problem has been here since before the beginning of git history.
+
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/output_core.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident);
+ 
+ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+ {
+-      u16 offset = sizeof(struct ipv6hdr);
++      unsigned int offset = sizeof(struct ipv6hdr);
+       unsigned int packet_len = skb_tail_pointer(skb) -
+               skb_network_header(skb);
+       int found_rhdr = 0;
+@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff *
+ 
+       while (offset <= packet_len) {
+               struct ipv6_opt_hdr *exthdr;
++              unsigned int len;
+ 
+               switch (**nexthdr) {
+ 
+@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff *
+ 
+               exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+                                                offset);
+-              offset += ipv6_optlen(exthdr);
++              len = ipv6_optlen(exthdr);
++              if (len + offset >= IPV6_MAXPLEN)
++                      return -EINVAL;
++              offset += len;
+               *nexthdr = &exthdr->nexthdr;
+       }
+ 
diff --git a/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch b/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch

new file mode 100644 (file)

index 0000000..99dbd53
--- /dev/null
+++ b/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
@@ -0,0 +1,54 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Mon, 24 Jul 2017 23:14:28 +0200
+Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ]
+
+RFC 2465 defines ipv6IfStatsOutFragFails as:
+
+       "The number of IPv6 datagrams that have been discarded
+        because they needed to be fragmented at this output
+        interface but could not be."
+
+The existing implementation, instead, would increase the counter
+twice in case we fail to allocate room for single fragments:
+once for the fragment, once for the datagram.
+
+This didn't look intentional though. In one of the two affected
+affected failure paths, the double increase was simply a result
+of a new 'goto fail' statement, introduced to avoid a skb leak.
+The other path appears to be affected since at least 2.6.12-rc2.
+
+Reported-by: Sabrina Dubroca <sdubroca@redhat.com>
+Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct
+               *prevhdr = NEXTHDR_FRAGMENT;
+               tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+               if (!tmp_hdr) {
+-                      IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-                                    IPSTATS_MIB_FRAGFAILS);
+                       err = -ENOMEM;
+                       goto fail;
+               }
+@@ -793,8 +791,6 @@ slow_path:
+               frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+                                hroom + troom, GFP_ATOMIC);
+               if (!frag) {
+-                      IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-                                    IPSTATS_MIB_FRAGFAILS);
+                       err = -ENOMEM;
+                       goto fail;
+               }
diff --git a/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch b/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch

new file mode 100644 (file)

index 0000000..bb6f8ff
--- /dev/null
+++ b/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
@@ -0,0 +1,81 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Date: Sat, 22 Jul 2017 17:14:34 +0200
+Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled
+
+From: Thomas Jarosch <thomas.jarosch@intra2net.com>
+
+
+[ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ]
+
+DMA transfers are not allowed to buffers that are on the stack.
+Therefore allocate a buffer to store the result of usb_control_message().
+
+Fixes these bugreports:
+https://bugzilla.kernel.org/show_bug.cgi?id=195217
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1421387
+https://bugzilla.redhat.com/show_bug.cgi?id=1427398
+
+Shortened kernel backtrace from 4.11.9-200.fc25.x86_64:
+kernel: ------------[ cut here ]------------
+kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587
+kernel: transfer buffer not dma capable
+kernel: Call Trace:
+kernel: dump_stack+0x63/0x86
+kernel: __warn+0xcb/0xf0
+kernel: warn_slowpath_fmt+0x5a/0x80
+kernel: usb_hcd_map_urb_for_dma+0x37f/0x570
+kernel: ? try_to_del_timer_sync+0x53/0x80
+kernel: usb_hcd_submit_urb+0x34e/0xb90
+kernel: ? schedule_timeout+0x17e/0x300
+kernel: ? del_timer_sync+0x50/0x50
+kernel: ? __slab_free+0xa9/0x300
+kernel: usb_submit_urb+0x2f4/0x560
+kernel: ? urb_destroy+0x24/0x30
+kernel: usb_start_wait_urb+0x6e/0x170
+kernel: usb_control_msg+0xdc/0x120
+kernel: mcs_get_reg+0x36/0x40 [mcs7780]
+kernel: mcs_net_open+0xb5/0x5c0 [mcs7780]
+...
+
+Regression goes back to 4.9, so it's a good candidate for -stable.
+Though it's the decision of the maintainer.
+
+Thanks to Dan Williams for adding the "transfer buffer not dma capable"
+warning in the first place. It instantly pointed me in the right direction.
+
+Patch has been tested with transferring data from a Polar watch.
+
+Signed-off-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/irda/mcs7780.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/irda/mcs7780.c
++++ b/drivers/net/irda/mcs7780.c
+@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mc
+ static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val)
+ {
+       struct usb_device *dev = mcs->usbdev;
+-      int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+-                                MCS_RD_RTYPE, 0, reg, val, 2,
+-                                msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++      void *dmabuf;
++      int ret;
++
++      dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL);
++      if (!dmabuf)
++              return -ENOMEM;
++
++      ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
++                            MCS_RD_RTYPE, 0, reg, dmabuf, 2,
++                            msecs_to_jiffies(MCS_CTRL_TIMEOUT));
++
++      memcpy(val, dmabuf, sizeof(__u16));
++      kfree(dmabuf);
+ 
+       return ret;
+ }
diff --git a/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch b/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch

new file mode 100644 (file)

index 0000000..8414823
--- /dev/null
+++ b/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Date: Thu, 20 Jul 2017 05:20:40 +0000
+Subject: net: bonding: Fix transmit load balancing in balance-alb mode
+
+From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+
+
+[ Upstream commit cbf5ecb305601d063dc94a57680dfbc3f96c188d ]
+
+balance-alb mode used to have transmit dynamic load balancing feature
+enabled by default.  However, transmit dynamic load balancing no longer
+works in balance-alb after commit 8b426dc54cf4 ("bonding: remove
+hardcoded value").
+
+Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to
+send packets.  This function uses the parameter tlb_dynamic_lb.
+tlb_dynamic_lb used to have the default value of 1 for balance-alb, but
+now the value is set to 0 except in balance-tlb.
+
+Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb
+for balance-alb similar to balance-tlb.
+
+Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value")
+Signed-off-by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
+Acked-by: Andy Gospodarek <andy@greyhouse.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -4598,7 +4598,7 @@ static int bond_check_params(struct bond
+       }
+       ad_user_port_key = valptr->value;
+ 
+-      if (bond_mode == BOND_MODE_TLB) {
++      if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) {
+               bond_opt_initstr(&newval, "default");
+               valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
+                                       &newval);
diff --git a/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch b/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch

new file mode 100644 (file)

index 0000000..340113e
--- /dev/null
+++ b/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch
@@ -0,0 +1,33 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Thu, 20 Jul 2017 12:25:22 -0700
+Subject: net: dsa: b53: Add missing ARL entries for BCM53125
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit be35e8c516c1915a3035d266a2015b41f73ba3f9 ]
+
+The BCM53125 entry was missing an arl_entries member which would
+basically prevent the ARL search from terminating properly. This switch
+has 4 ARL entries, so add that.
+
+Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/b53/b53_common.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/b53/b53_common.c
++++ b/drivers/net/dsa/b53/b53_common.c
+@@ -1668,6 +1668,7 @@ static const struct b53_chip_data b53_sw
+               .dev_name = "BCM53125",
+               .vlans = 4096,
+               .enabled_ports = 0xff,
++              .arl_entries = 4,
+               .cpu_port = B53_CPU_PORT,
+               .vta_regs = B53_VTA_REGS,
+               .duplex_reg = B53_DUPLEX_STAT_GE,
diff --git a/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch b/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch

new file mode 100644 (file)

index 0000000..7e89cdc
--- /dev/null
+++ b/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Martin Hundebøll <mnhu@prevas.dk>
+Date: Wed, 19 Jul 2017 08:17:02 +0200
+Subject: net: dsa: mv88e6xxx: Enable CMODE config support for 6390X
+
+From: Martin Hundebøll <mnhu@prevas.dk>
+
+
+[ Upstream commit bb0a2675f72b458e64f47071e8aabdb225a6af4d ]
+
+Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390
+ports 9 & 10') added support for setting the CMODE for the 6390X family,
+but only enabled it for 9290 and 6390 - and left out 6390X.
+
+Fix support for setting the CMODE on 6390X also by assigning
+mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in
+mv88e6390x_ops too.
+
+Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10")
+Signed-off-by: Martin Hundebøll <mnhu@prevas.dk>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -3377,6 +3377,7 @@ static const struct mv88e6xxx_ops mv88e6
+       .port_jumbo_config = mv88e6165_port_jumbo_config,
+       .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+       .port_pause_config = mv88e6390_port_pause_config,
++      .port_set_cmode = mv88e6390x_port_set_cmode,
+       .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+       .port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
diff --git a/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch b/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch

new file mode 100644 (file)

index 0000000..4f259fa
--- /dev/null
+++ b/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
@@ -0,0 +1,51 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Date: Tue, 25 Jul 2017 14:35:03 +0200
+Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically
+
+From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+
+
+[ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ]
+
+Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear
+whose responsibility it was to insert the required clock skew, and
+in hindsight, some PHY drivers got it wrong. The solution forward
+is to introduce a new property, explicitly requiring skew from the
+node to which it is attached. In the interim, this driver will handle
+all 4 RGMII modes identically (no skew).
+
+Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller")
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/aurora/nb8800.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/aurora/nb8800.c
++++ b/drivers/net/ethernet/aurora/nb8800.c
+@@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net
+               mac_mode |= HALF_DUPLEX;
+ 
+       if (gigabit) {
+-              if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
++              if (phy_interface_is_rgmii(dev->phydev))
+                       mac_mode |= RGMII_MODE;
+ 
+               mac_mode |= GMAC_MODE;
+@@ -1268,11 +1268,10 @@ static int nb8800_tangox_init(struct net
+               break;
+ 
+       case PHY_INTERFACE_MODE_RGMII:
+-              pad_mode = PAD_MODE_RGMII;
+-              break;
+-
++      case PHY_INTERFACE_MODE_RGMII_ID:
++      case PHY_INTERFACE_MODE_RGMII_RXID:
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+-              pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
++              pad_mode = PAD_MODE_RGMII;
+               break;
+ 
+       default:
diff --git a/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch b/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch

new file mode 100644 (file)

index 0000000..4b16ba8
--- /dev/null
+++ b/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch
@@ -0,0 +1,64 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Aviv Heller <avivh@mellanox.com>
+Date: Sun, 2 Jul 2017 19:13:43 +0300
+Subject: net/mlx5: Consider tx_enabled in all modes on remap
+
+From: Aviv Heller <avivh@mellanox.com>
+
+
+[ Upstream commit dc798b4cc0f2a06e7ad7d522403de274b86a0a6f ]
+
+The tx_enabled lag event field is used to determine whether a slave is
+active.
+Current logic uses this value only if the mode is active-backup.
+
+However, LACP mode, although considered a load balancing mode, can mark
+a slave as inactive in certain situations (e.g., LACP timeout).
+
+This fix takes the tx_enabled value into account when remapping, with
+no respect to the LAG mode (this should not affect the behavior in XOR
+mode, since in this mode both slaves are marked as active).
+
+Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
+Signed-off-by: Aviv Heller <avivh@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lag.c |   25 ++++++++++---------------
+ 1 file changed, 10 insertions(+), 15 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+@@ -157,22 +157,17 @@ static bool mlx5_lag_is_bonded(struct ml
+ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+                                          u8 *port1, u8 *port2)
+ {
+-      if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+-              if (tracker->netdev_state[0].tx_enabled) {
+-                      *port1 = 1;
+-                      *port2 = 1;
+-              } else {
+-                      *port1 = 2;
+-                      *port2 = 2;
+-              }
+-      } else {
+-              *port1 = 1;
+-              *port2 = 2;
+-              if (!tracker->netdev_state[0].link_up)
+-                      *port1 = 2;
+-              else if (!tracker->netdev_state[1].link_up)
+-                      *port2 = 1;
++      *port1 = 1;
++      *port2 = 2;
++      if (!tracker->netdev_state[0].tx_enabled ||
++          !tracker->netdev_state[0].link_up) {
++              *port1 = 2;
++              return;
+       }
++
++      if (!tracker->netdev_state[1].tx_enabled ||
++          !tracker->netdev_state[1].link_up)
++              *port2 = 1;
+ }
+ 
+ static void mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch b/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch

new file mode 100644 (file)

index 0000000..13e1c03
--- /dev/null
+++ b/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
@@ -0,0 +1,66 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Sun, 25 Jun 2017 18:45:32 +0300
+Subject: net/mlx5: Fix command bad flow on command entry allocation failure
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ]
+
+When driver fail to allocate an entry to send command to FW, it must
+notify the calling function and release the memory allocated for
+this command.
+
+Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters')
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -777,6 +777,10 @@ static void cb_timeout_handler(struct wo
+       mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+ }
+ 
++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
++static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
++                            struct mlx5_cmd_msg *msg);
++
+ static void cmd_work_handler(struct work_struct *work)
+ {
+       struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+@@ -786,16 +790,27 @@ static void cmd_work_handler(struct work
+       struct mlx5_cmd_layout *lay;
+       struct semaphore *sem;
+       unsigned long flags;
++      int alloc_ret;
+ 
+       sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+       down(sem);
+       if (!ent->page_queue) {
+-              ent->idx = alloc_ent(cmd);
+-              if (ent->idx < 0) {
++              alloc_ret = alloc_ent(cmd);
++              if (alloc_ret < 0) {
+                       mlx5_core_err(dev, "failed to allocate command entry\n");
++                      if (ent->callback) {
++                              ent->callback(-EAGAIN, ent->context);
++                              mlx5_free_cmd_msg(dev, ent->out);
++                              free_msg(dev, ent->in);
++                              free_cmd(ent);
++                      } else {
++                              ent->ret = -EAGAIN;
++                              complete(&ent->done);
++                      }
+                       up(sem);
+                       return;
+               }
++              ent->idx = alloc_ret;
+       } else {
+               ent->idx = cmd->max_reg_cmds;
+               spin_lock_irqsave(&cmd->alloc_lock, flags);
diff --git a/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch b/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch

new file mode 100644 (file)

index 0000000..b69bc9a
--- /dev/null
+++ b/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch
@@ -0,0 +1,52 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Thu, 6 Jul 2017 15:48:40 +0300
+Subject: net/mlx5: Fix command completion after timeout access invalid structure
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 061870800efb4e3d1ad4082a2569363629bdfcfc ]
+
+Completion on timeout should not free the driver command entry structure
+as it will need to access it again once real completion event from FW
+will occur.
+
+Fixes: 73dd3a4839c1 ('net/mlx5: Avoid using pending command interface slots')
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Cc: kernel-team@fb.com
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -955,7 +955,7 @@ static int mlx5_cmd_invoke(struct mlx5_c
+ 
+       err = wait_func(dev, ent);
+       if (err == -ETIMEDOUT)
+-              goto out_free;
++              goto out;
+ 
+       ds = ent->ts2 - ent->ts1;
+       op = MLX5_GET(mbox_in, in->first.data, opcode);
+@@ -1419,6 +1419,7 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+                                       mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
+                                                     ent->idx);
+                                       free_ent(cmd, ent->idx);
++                                      free_cmd(ent);
+                               }
+                               continue;
+                       }
+@@ -1477,7 +1478,8 @@ void mlx5_cmd_comp_handler(struct mlx5_c
+                               free_msg(dev, ent->in);
+ 
+                               err = err ? err : ent->status;
+-                              free_cmd(ent);
++                              if (!forced)
++                                      free_cmd(ent);
+                               callback(err, context);
+                       } else {
+                               complete(&ent->done);
diff --git a/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch b/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch

new file mode 100644 (file)

index 0000000..f1ccbac
--- /dev/null
+++ b/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch
@@ -0,0 +1,41 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Paul Blakey <paulb@mellanox.com>
+Date: Thu, 6 Jul 2017 16:40:34 +0300
+Subject: net/mlx5: Fix mlx5_add_flow_rules call with correct num of dests
+
+From: Paul Blakey <paulb@mellanox.com>
+
+
+[ Upstream commit bcec601f30fb41e9233674942fa4040a6e63657a ]
+
+When adding ethtool steering rule with action DISCARD we wrongly
+pass a NULL dest with dest_num 1 to mlx5_add_flow_rules().
+What this error seems to have caused is sending VPORT 0
+(MLX5_FLOW_DESTINATION_TYPE_VPORT) as the fte dest instead of no dests.
+We have fte action correctly set to DROP so it might been ignored
+anyways.
+
+To reproduce use:
+ # sudo ethtool --config-nfc <dev> flow-type ether \
+   dst aa:bb:cc:dd:ee:ff action -1
+
+Fixes: 74491de93712 ("net/mlx5: Add multi dest support")
+Signed-off-by: Paul Blakey <paulb@mellanox.com>
+Reviewed-by: Mark Bloch <markb@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -320,7 +320,7 @@ add_ethtool_flow_rule(struct mlx5e_priv
+ 
+       spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
+       flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+-      rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
++      rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
diff --git a/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch b/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch

new file mode 100644 (file)

index 0000000..f151e02
--- /dev/null
+++ b/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch
@@ -0,0 +1,31 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Thu, 25 May 2017 15:11:26 +0300
+Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 0b794ffae7afa7c4e5accac8791c4b78e8d080ce ]
+
+Fix miscalculation in reserved_at_1a0 field.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mlx5/mlx5_ifc.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -8131,7 +8131,7 @@ struct mlx5_ifc_mtpps_reg_bits {
+       u8         out_pulse_duration[0x10];
+       u8         out_periodic_adjustment[0x10];
+ 
+-      u8         reserved_at_1a0[0x60];
++      u8         reserved_at_1a0[0x40];
+ };
+ 
+ struct mlx5_ifc_mtppse_reg_bits {
diff --git a/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch b/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch

new file mode 100644 (file)

index 0000000..37e8bab
--- /dev/null
+++ b/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch
@@ -0,0 +1,165 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Thu, 25 May 2017 16:09:34 +0300
+Subject: net/mlx5e: Add field select to MTPPS register
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit fa3676885e3b5be1edfa1b2cc775e20a45b34a19 ]
+
+In order to mark relevant fields while setting the MTPPS register
+add field select. Otherwise it can cause a misconfiguration in
+firmware.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c  |   29 +++++++++++++++-----
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c        |    2 -
+ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h |    5 +++
+ include/linux/mlx5/mlx5_ifc.h                       |   10 ++++--
+ 4 files changed, 36 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -53,6 +53,15 @@ enum {
+       MLX5E_EVENT_MODE_ONCE_TILL_ARM  = 0x2,
+ };
+ 
++enum {
++      MLX5E_MTPPS_FS_ENABLE                   = BIT(0x0),
++      MLX5E_MTPPS_FS_PATTERN                  = BIT(0x2),
++      MLX5E_MTPPS_FS_PIN_MODE                 = BIT(0x3),
++      MLX5E_MTPPS_FS_TIME_STAMP               = BIT(0x4),
++      MLX5E_MTPPS_FS_OUT_PULSE_DURATION       = BIT(0x5),
++      MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ          = BIT(0x7),
++};
++
+ void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
+                       struct skb_shared_hwtstamps *hwts)
+ {
+@@ -222,7 +231,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_
+ 
+               /* For future use need to add a loop for finding all 1PPS out pins */
+               MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+-              MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF);
++              MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
++              MLX5_SET(mtpps_reg, in, field_select,
++                       MLX5E_MTPPS_FS_PIN_MODE |
++                       MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
+ 
+               mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+       }
+@@ -258,8 +270,7 @@ static int mlx5e_extts_configure(struct
+       int pin = -1;
+       int err = 0;
+ 
+-      if (!MLX5_CAP_GEN(priv->mdev, pps) ||
+-          !MLX5_CAP_GEN(priv->mdev, pps_modify))
++      if (!MLX5_PPS_CAP(priv->mdev))
+               return -EOPNOTSUPP;
+ 
+       if (rq->extts.index >= tstamp->ptp_info.n_pins)
+@@ -278,6 +289,9 @@ static int mlx5e_extts_configure(struct
+       MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
+       MLX5_SET(mtpps_reg, in, pattern, pattern);
+       MLX5_SET(mtpps_reg, in, enable, on);
++      MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
++                                            MLX5E_MTPPS_FS_PATTERN |
++                                            MLX5E_MTPPS_FS_ENABLE);
+ 
+       err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+       if (err)
+@@ -303,7 +317,7 @@ static int mlx5e_perout_configure(struct
+       int pin = -1;
+       s64 ns;
+ 
+-      if (!MLX5_CAP_GEN(priv->mdev, pps_modify))
++      if (!MLX5_PPS_CAP(priv->mdev))
+               return -EOPNOTSUPP;
+ 
+       if (rq->perout.index >= tstamp->ptp_info.n_pins)
+@@ -338,7 +352,10 @@ static int mlx5e_perout_configure(struct
+       MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
+       MLX5_SET(mtpps_reg, in, enable, on);
+       MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+-
++      MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
++                                            MLX5E_MTPPS_FS_PATTERN |
++                                            MLX5E_MTPPS_FS_ENABLE |
++                                            MLX5E_MTPPS_FS_TIME_STAMP);
+       return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ }
+ 
+@@ -488,7 +505,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+ #define MAX_PIN_NUM   8
+       tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
+       if (tstamp->pps_pin_caps) {
+-              if (MLX5_CAP_GEN(priv->mdev, pps))
++              if (MLX5_PPS_CAP(priv->mdev))
+                       mlx5e_get_pps_caps(priv, tstamp);
+               if (tstamp->ptp_info.n_pins)
+                       mlx5e_init_pin_config(tstamp);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -690,7 +690,7 @@ int mlx5_start_eqs(struct mlx5_core_dev
+       else
+               mlx5_core_dbg(dev, "port_module_event is not set\n");
+ 
+-      if (MLX5_CAP_GEN(dev, pps))
++      if (MLX5_PPS_CAP(dev))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
+ 
+       err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
+--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+@@ -153,6 +153,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev
+ int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
+ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
+ 
++#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&              \
++                          MLX5_CAP_GEN((mdev), pps_modify) &&         \
++                          MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&  \
++                          MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
++
+ void mlx5e_init(void);
+ void mlx5e_cleanup(void);
+ 
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -7718,8 +7718,10 @@ struct mlx5_ifc_pcam_reg_bits {
+ };
+ 
+ struct mlx5_ifc_mcam_enhanced_features_bits {
+-      u8         reserved_at_0[0x7f];
++      u8         reserved_at_0[0x7d];
+ 
++      u8         mtpps_enh_out_per_adj[0x1];
++      u8         mtpps_fs[0x1];
+       u8         pcie_performance_group[0x1];
+ };
+ 
+@@ -8115,7 +8117,8 @@ struct mlx5_ifc_mtpps_reg_bits {
+       u8         reserved_at_78[0x4];
+       u8         cap_pin_4_mode[0x4];
+ 
+-      u8         reserved_at_80[0x80];
++      u8         field_select[0x20];
++      u8         reserved_at_a0[0x60];
+ 
+       u8         enable[0x1];
+       u8         reserved_at_101[0xb];
+@@ -8130,8 +8133,9 @@ struct mlx5_ifc_mtpps_reg_bits {
+ 
+       u8         out_pulse_duration[0x10];
+       u8         out_periodic_adjustment[0x10];
++      u8         enhanced_out_periodic_adjustment[0x20];
+ 
+-      u8         reserved_at_1a0[0x40];
++      u8         reserved_at_1c0[0x20];
+ };
+ 
+ struct mlx5_ifc_mtppse_reg_bits {
diff --git a/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch b/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch

new file mode 100644 (file)

index 0000000..a40ff89
--- /dev/null
+++ b/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch
@@ -0,0 +1,95 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 14:27:02 +0300
+Subject: net/mlx5e: Add missing support for PTP_CLK_REQ_PPS request
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit cf5033089b078303b102b65e3ccbbfa3ce0f4367 ]
+
+Add the missing option to enable the PTP_CLK_PPS function.
+In this case pin should be configured as 1PPS IN first and
+then it will be connected to PPS mechanism.
+Events will be reported as PTP_CLOCK_PPSUSR events to relevant sysfs.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h       |    1 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |   20 ++++++++++++++++++++
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |    1 -
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -266,6 +266,7 @@ struct mlx5e_pps {
+       u8                         pin_caps[MAX_PIN_NUM];
+       struct work_struct         out_work;
+       u64                        start[MAX_PIN_NUM];
++      u8                         enabled;
+ };
+ 
+ struct mlx5e_tstamp {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -394,6 +394,17 @@ static int mlx5e_perout_configure(struct
+                              MLX5E_EVENT_MODE_REPETETIVE & on);
+ }
+ 
++static int mlx5e_pps_configure(struct ptp_clock_info *ptp,
++                             struct ptp_clock_request *rq,
++                             int on)
++{
++      struct mlx5e_tstamp *tstamp =
++              container_of(ptp, struct mlx5e_tstamp, ptp_info);
++
++      tstamp->pps_info.enabled = !!on;
++      return 0;
++}
++
+ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
+                           struct ptp_clock_request *rq,
+                           int on)
+@@ -403,6 +414,8 @@ static int mlx5e_ptp_enable(struct ptp_c
+               return mlx5e_extts_configure(ptp, rq, on);
+       case PTP_CLK_REQ_PEROUT:
+               return mlx5e_perout_configure(ptp, rq, on);
++      case PTP_CLK_REQ_PPS:
++              return mlx5e_pps_configure(ptp, rq, on);
+       default:
+               return -EOPNOTSUPP;
+       }
+@@ -448,6 +461,7 @@ static int mlx5e_init_pin_config(struct
+               return -ENOMEM;
+       tstamp->ptp_info.enable = mlx5e_ptp_enable;
+       tstamp->ptp_info.verify = mlx5e_ptp_verify;
++      tstamp->ptp_info.pps = 1;
+ 
+       for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
+               snprintf(tstamp->ptp_info.pin_config[i].name,
+@@ -499,6 +513,12 @@ void mlx5e_pps_event_handler(struct mlx5
+ 
+       switch (tstamp->ptp_info.pin_config[pin].func) {
+       case PTP_PF_EXTTS:
++              if (tstamp->pps_info.enabled) {
++                      event->type = PTP_CLOCK_PPSUSR;
++                      event->pps_times.ts_real = ns_to_timespec64(event->timestamp);
++              } else {
++                      event->type = PTP_CLOCK_EXTTS;
++              }
+               ptp_clock_event(tstamp->ptp, event);
+               break;
+       case PTP_PF_PEROUT:
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -365,7 +365,6 @@ static void mlx5e_async_event(struct mlx
+               break;
+       case MLX5_DEV_EVENT_PPS:
+               eqe = (struct mlx5_eqe *)param;
+-              ptp_event.type = PTP_CLOCK_EXTTS;
+               ptp_event.index = eqe->data.pps.pin;
+               ptp_event.timestamp =
+                       timecounter_cyc2time(&priv->tstamp.clock,
diff --git a/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch b/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch

new file mode 100644 (file)

index 0000000..81b650d
--- /dev/null
+++ b/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch
@@ -0,0 +1,236 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 14:06:01 +0300
+Subject: net/mlx5e: Change 1PPS out scheme
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 4272f9b88db9223216cdf87314f570f6d81295b4 ]
+
+In order to fix the drift in 1PPS out need to adjust the next pulse.
+On each 1PPS out falling edge driver gets the event, then the event
+handler adjusts the next pulse starting time.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en.h       |    9 +
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |  116 ++++++++++++++-------
+ 2 files changed, 87 insertions(+), 38 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
+@@ -261,6 +261,13 @@ struct mlx5e_dcbx {
+ };
+ #endif
+ 
++#define MAX_PIN_NUM   8
++struct mlx5e_pps {
++      u8                         pin_caps[MAX_PIN_NUM];
++      struct work_struct         out_work;
++      u64                        start[MAX_PIN_NUM];
++};
++
+ struct mlx5e_tstamp {
+       rwlock_t                   lock;
+       struct cyclecounter        cycles;
+@@ -272,7 +279,7 @@ struct mlx5e_tstamp {
+       struct mlx5_core_dev      *mdev;
+       struct ptp_clock          *ptp;
+       struct ptp_clock_info      ptp_info;
+-      u8                        *pps_pin_caps;
++      struct mlx5e_pps           pps_info;
+ };
+ 
+ enum {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -82,6 +82,33 @@ static u64 mlx5e_read_internal_timer(con
+       return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
+ }
+ 
++static void mlx5e_pps_out(struct work_struct *work)
++{
++      struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps,
++                                                out_work);
++      struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp,
++                                                 pps_info);
++      u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
++      unsigned long flags;
++      int i;
++
++      for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
++              u64 tstart;
++
++              write_lock_irqsave(&tstamp->lock, flags);
++              tstart = tstamp->pps_info.start[i];
++              tstamp->pps_info.start[i] = 0;
++              write_unlock_irqrestore(&tstamp->lock, flags);
++              if (!tstart)
++                      continue;
++
++              MLX5_SET(mtpps_reg, in, pin, i);
++              MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
++              MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP);
++              mlx5_set_mtpps(tstamp->mdev, in, sizeof(in));
++      }
++}
++
+ static void mlx5e_timestamp_overflow(struct work_struct *work)
+ {
+       struct delayed_work *dwork = to_delayed_work(work);
+@@ -223,21 +250,6 @@ static int mlx5e_ptp_adjfreq(struct ptp_
+       int neg_adj = 0;
+       struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+                                                 ptp_info);
+-      struct mlx5e_priv *priv =
+-              container_of(tstamp, struct mlx5e_priv, tstamp);
+-
+-      if (MLX5_CAP_GEN(priv->mdev, pps_modify)) {
+-              u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+-
+-              /* For future use need to add a loop for finding all 1PPS out pins */
+-              MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+-              MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
+-              MLX5_SET(mtpps_reg, in, field_select,
+-                       MLX5E_MTPPS_FS_PIN_MODE |
+-                       MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
+-
+-              mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+-      }
+ 
+       if (delta < 0) {
+               neg_adj = 1;
+@@ -315,7 +327,7 @@ static int mlx5e_perout_configure(struct
+       struct mlx5e_priv *priv =
+               container_of(tstamp, struct mlx5e_priv, tstamp);
+       u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+-      u64 nsec_now, nsec_delta, time_stamp;
++      u64 nsec_now, nsec_delta, time_stamp = 0;
+       u64 cycles_now, cycles_delta;
+       struct timespec64 ts;
+       unsigned long flags;
+@@ -323,6 +335,7 @@ static int mlx5e_perout_configure(struct
+       u8 pin_mode = 0;
+       u8 pattern = 0;
+       int pin = -1;
++      int err = 0;
+       s64 ns;
+ 
+       if (!MLX5_PPS_CAP(priv->mdev))
+@@ -373,7 +386,12 @@ static int mlx5e_perout_configure(struct
+       MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+       MLX5_SET(mtpps_reg, in, field_select, field_select);
+ 
+-      return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
++      err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
++      if (err)
++              return err;
++
++      return mlx5_set_mtppse(priv->mdev, pin, 0,
++                             MLX5E_EVENT_MODE_REPETETIVE & on);
+ }
+ 
+ static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
+@@ -457,22 +475,50 @@ static void mlx5e_get_pps_caps(struct ml
+       tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+                                             cap_max_num_of_pps_out_pins);
+ 
+-      tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+-      tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+-      tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+-      tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+-      tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+-      tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+-      tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+-      tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
++      tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
++      tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
++      tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
++      tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
++      tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
++      tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
++      tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
++      tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+ }
+ 
+ void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
+                            struct ptp_clock_event *event)
+ {
++      struct net_device *netdev = priv->netdev;
+       struct mlx5e_tstamp *tstamp = &priv->tstamp;
++      struct timespec64 ts;
++      u64 nsec_now, nsec_delta;
++      u64 cycles_now, cycles_delta;
++      int pin = event->index;
++      s64 ns;
++      unsigned long flags;
+ 
+-      ptp_clock_event(tstamp->ptp, event);
++      switch (tstamp->ptp_info.pin_config[pin].func) {
++      case PTP_PF_EXTTS:
++              ptp_clock_event(tstamp->ptp, event);
++              break;
++      case PTP_PF_PEROUT:
++              mlx5e_ptp_gettime(&tstamp->ptp_info, &ts);
++              cycles_now = mlx5_read_internal_timer(tstamp->mdev);
++              ts.tv_sec += 1;
++              ts.tv_nsec = 0;
++              ns = timespec64_to_ns(&ts);
++              write_lock_irqsave(&tstamp->lock, flags);
++              nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
++              nsec_delta = ns - nsec_now;
++              cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
++                                       tstamp->cycles.mult);
++              tstamp->pps_info.start[pin] = cycles_now + cycles_delta;
++              queue_work(priv->wq, &tstamp->pps_info.out_work);
++              write_unlock_irqrestore(&tstamp->lock, flags);
++              break;
++      default:
++              netdev_err(netdev, "%s: Unhandled event\n", __func__);
++      }
+ }
+ 
+ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+@@ -508,6 +554,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+       do_div(ns, NSEC_PER_SEC / 2 / HZ);
+       tstamp->overflow_period = ns;
+ 
++      INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
+       INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
+       if (tstamp->overflow_period)
+               schedule_delayed_work(&tstamp->overflow_work, 0);
+@@ -519,16 +566,10 @@ void mlx5e_timestamp_init(struct mlx5e_p
+       snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
+ 
+       /* Initialize 1PPS data structures */
+-#define MAX_PIN_NUM   8
+-      tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
+-      if (tstamp->pps_pin_caps) {
+-              if (MLX5_PPS_CAP(priv->mdev))
+-                      mlx5e_get_pps_caps(priv, tstamp);
+-              if (tstamp->ptp_info.n_pins)
+-                      mlx5e_init_pin_config(tstamp);
+-      } else {
+-              mlx5_core_warn(priv->mdev, "1PPS initialization failed\n");
+-      }
++      if (MLX5_PPS_CAP(priv->mdev))
++              mlx5e_get_pps_caps(priv, tstamp);
++      if (tstamp->ptp_info.n_pins)
++              mlx5e_init_pin_config(tstamp);
+ 
+       tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
+                                        &priv->mdev->pdev->dev);
+@@ -551,7 +592,8 @@ void mlx5e_timestamp_cleanup(struct mlx5
+               priv->tstamp.ptp = NULL;
+       }
+ 
+-      kfree(tstamp->pps_pin_caps);
++      cancel_work_sync(&tstamp->pps_info.out_work);
++
+       kfree(tstamp->ptp_info.pin_config);
+ 
+       cancel_delayed_work_sync(&tstamp->overflow_work);
diff --git a/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch b/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch

new file mode 100644 (file)

index 0000000..a60e56e
--- /dev/null
+++ b/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch
@@ -0,0 +1,137 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Sun, 28 May 2017 12:01:38 +0300
+Subject: net/mlx5e: Fix broken disable 1PPS flow
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit 49c5031ca6f0628ef973a11b17e463e088bf859e ]
+
+Need to disable the MTPPS and unsubscribe from the pulse events
+when user disables the 1PPS functionality.
+
+Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |   75 ++++++++++++---------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -266,6 +266,8 @@ static int mlx5e_extts_configure(struct
+       struct mlx5e_priv *priv =
+               container_of(tstamp, struct mlx5e_priv, tstamp);
+       u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
++      u32 field_select = 0;
++      u8 pin_mode = 0;
+       u8 pattern = 0;
+       int pin = -1;
+       int err = 0;
+@@ -280,18 +282,21 @@ static int mlx5e_extts_configure(struct
+               pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index);
+               if (pin < 0)
+                       return -EBUSY;
++              pin_mode = MLX5E_PIN_MODE_IN;
++              pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
++              field_select = MLX5E_MTPPS_FS_PIN_MODE |
++                             MLX5E_MTPPS_FS_PATTERN |
++                             MLX5E_MTPPS_FS_ENABLE;
++      } else {
++              pin = rq->extts.index;
++              field_select = MLX5E_MTPPS_FS_ENABLE;
+       }
+ 
+-      if (rq->extts.flags & PTP_FALLING_EDGE)
+-              pattern = 1;
+-
+       MLX5_SET(mtpps_reg, in, pin, pin);
+-      MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
++      MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+       MLX5_SET(mtpps_reg, in, pattern, pattern);
+       MLX5_SET(mtpps_reg, in, enable, on);
+-      MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+-                                            MLX5E_MTPPS_FS_PATTERN |
+-                                            MLX5E_MTPPS_FS_ENABLE);
++      MLX5_SET(mtpps_reg, in, field_select, field_select);
+ 
+       err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+       if (err)
+@@ -314,6 +319,9 @@ static int mlx5e_perout_configure(struct
+       u64 cycles_now, cycles_delta;
+       struct timespec64 ts;
+       unsigned long flags;
++      u32 field_select = 0;
++      u8 pin_mode = 0;
++      u8 pattern = 0;
+       int pin = -1;
+       s64 ns;
+ 
+@@ -328,34 +336,43 @@ static int mlx5e_perout_configure(struct
+                                  rq->perout.index);
+               if (pin < 0)
+                       return -EBUSY;
+-      }
+ 
+-      ts.tv_sec = rq->perout.period.sec;
+-      ts.tv_nsec = rq->perout.period.nsec;
+-      ns = timespec64_to_ns(&ts);
+-      if (on)
++              pin_mode = MLX5E_PIN_MODE_OUT;
++              pattern = MLX5E_OUT_PATTERN_PERIODIC;
++              ts.tv_sec = rq->perout.period.sec;
++              ts.tv_nsec = rq->perout.period.nsec;
++              ns = timespec64_to_ns(&ts);
++
+               if ((ns >> 1) != 500000000LL)
+                       return -EINVAL;
+-      ts.tv_sec = rq->perout.start.sec;
+-      ts.tv_nsec = rq->perout.start.nsec;
+-      ns = timespec64_to_ns(&ts);
+-      cycles_now = mlx5_read_internal_timer(tstamp->mdev);
+-      write_lock_irqsave(&tstamp->lock, flags);
+-      nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
+-      nsec_delta = ns - nsec_now;
+-      cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
+-                               tstamp->cycles.mult);
+-      write_unlock_irqrestore(&tstamp->lock, flags);
+-      time_stamp = cycles_now + cycles_delta;
++
++              ts.tv_sec = rq->perout.start.sec;
++              ts.tv_nsec = rq->perout.start.nsec;
++              ns = timespec64_to_ns(&ts);
++              cycles_now = mlx5_read_internal_timer(tstamp->mdev);
++              write_lock_irqsave(&tstamp->lock, flags);
++              nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
++              nsec_delta = ns - nsec_now;
++              cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
++                                       tstamp->cycles.mult);
++              write_unlock_irqrestore(&tstamp->lock, flags);
++              time_stamp = cycles_now + cycles_delta;
++              field_select = MLX5E_MTPPS_FS_PIN_MODE |
++                             MLX5E_MTPPS_FS_PATTERN |
++                             MLX5E_MTPPS_FS_ENABLE |
++                             MLX5E_MTPPS_FS_TIME_STAMP;
++      } else {
++              pin = rq->perout.index;
++              field_select = MLX5E_MTPPS_FS_ENABLE;
++      }
++
+       MLX5_SET(mtpps_reg, in, pin, pin);
+-      MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
+-      MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
++      MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
++      MLX5_SET(mtpps_reg, in, pattern, pattern);
+       MLX5_SET(mtpps_reg, in, enable, on);
+       MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+-      MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+-                                            MLX5E_MTPPS_FS_PATTERN |
+-                                            MLX5E_MTPPS_FS_ENABLE |
+-                                            MLX5E_MTPPS_FS_TIME_STAMP);
++      MLX5_SET(mtpps_reg, in, field_select, field_select);
++
+       return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
+ }
+ 
diff --git a/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch b/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch

new file mode 100644 (file)

index 0000000..81a6dff
--- /dev/null
+++ b/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Ilan Tayari <ilant@mellanox.com>
+Date: Wed, 5 Jul 2017 10:17:04 +0300
+Subject: net/mlx5e: Fix outer_header_zero() check size
+
+From: Ilan Tayari <ilant@mellanox.com>
+
+
+[ Upstream commit 0242f4a0bb03906010bbf80495512be00494a0ef ]
+
+outer_header_zero() routine checks if the outer_headers match of a
+flow-table entry are all zero.
+
+This function uses the size of whole fte_match_param, instead of just
+the outer_headers member, causing failure to detect all-zeros if
+any other members of the fte_match_param are non-zero.
+
+Use the correct size for zero check.
+
+Fixes: 6dc6071cfcde ("net/mlx5e: Add ethtool flow steering support")
+Signed-off-by: Ilan Tayari <ilant@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+@@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5
+ 
+ static bool outer_header_zero(u32 *match_criteria)
+ {
+-      int size = MLX5_ST_SZ_BYTES(fte_match_param);
++      int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers);
+       char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
+                                            outer_headers);
+ 
diff --git a/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch b/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch

new file mode 100644 (file)

index 0000000..3aca2ef
--- /dev/null
+++ b/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch
@@ -0,0 +1,33 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 12 Jul 2017 17:27:18 +0300
+Subject: net/mlx5e: Fix wrong delay calculation for overflow check scheduling
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit d439c84509a510e864fdc6166c760482cd03fc57 ]
+
+The overflow_period is calculated in seconds. In order to use it
+for delayed work scheduling translation to jiffies is needed.
+
+Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -119,7 +119,8 @@ static void mlx5e_timestamp_overflow(str
+       write_lock_irqsave(&tstamp->lock, flags);
+       timecounter_read(&tstamp->clock);
+       write_unlock_irqrestore(&tstamp->lock, flags);
+-      schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
++      schedule_delayed_work(&tstamp->overflow_work,
++                            msecs_to_jiffies(tstamp->overflow_period * 1000));
+ }
+ 
+ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
diff --git a/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch b/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch

new file mode 100644 (file)

index 0000000..f96530c
--- /dev/null
+++ b/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch
@@ -0,0 +1,97 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Alex Vesker <valex@mellanox.com>
+Date: Thu, 6 Jul 2017 15:40:32 +0300
+Subject: net/mlx5e: IPoIB, Modify add/remove underlay QPN flows
+
+From: Alex Vesker <valex@mellanox.com>
+
+
+[ Upstream commit 58569ef8f619761548e7d198f59e8ebe3af91d04 ]
+
+On interface remove, the clean-up was done incorrectly causing
+an error in the log:
+"SET_FLOW_TABLE_ROOT(0x92f) op_mod(0x0) failed...syndrome (0x7e9f14)"
+
+This was caused by the following flow:
+-ndo_uninit:
+ Move QP state to RST (this disconnects the QP from FT),
+ the QP cannot be attached to any FT unless it is in RTS.
+
+-mlx5_rdma_netdev_free:
+ cleanup_rx: Destroy FT
+ cleanup_tx: Destroy QP and remove QPN from FT
+
+This caused a problem when destroying current FT we tried to
+re-attach the QP to the next FT which is not needed.
+
+The correct flow is:
+-mlx5_rdma_netdev_free:
+       cleanup_rx: remove QPN from FT & Destroy FT
+       cleanup_tx: Destroy QP
+
+Fixes: 508541146af1 ("net/mlx5: Use underlay QPN from the root name space")
+Signed-off-by: Alex Vesker <valex@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/ipoib.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c
+@@ -160,8 +160,6 @@ out:
+ 
+ static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+ {
+-      mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn);
+-
+       mlx5_core_destroy_qp(mdev, qp);
+ }
+ 
+@@ -176,8 +174,6 @@ static int mlx5i_init_tx(struct mlx5e_pr
+               return err;
+       }
+ 
+-      mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+-
+       err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
+       if (err) {
+               mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
+@@ -235,6 +231,7 @@ static void mlx5i_destroy_flow_steering(
+ 
+ static int mlx5i_init_rx(struct mlx5e_priv *priv)
+ {
++      struct mlx5i_priv *ipriv  = priv->ppriv;
+       int err;
+ 
+       err = mlx5e_create_indirect_rqt(priv);
+@@ -253,12 +250,18 @@ static int mlx5i_init_rx(struct mlx5e_pr
+       if (err)
+               goto err_destroy_indirect_tirs;
+ 
+-      err = mlx5i_create_flow_steering(priv);
++      err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+       if (err)
+               goto err_destroy_direct_tirs;
+ 
++      err = mlx5i_create_flow_steering(priv);
++      if (err)
++              goto err_remove_rx_underlay_qpn;
++
+       return 0;
+ 
++err_remove_rx_underlay_qpn:
++      mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+ err_destroy_direct_tirs:
+       mlx5e_destroy_direct_tirs(priv);
+ err_destroy_indirect_tirs:
+@@ -272,6 +275,9 @@ err_destroy_indirect_rqts:
+ 
+ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
+ {
++      struct mlx5i_priv *ipriv  = priv->ppriv;
++
++      mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
+       mlx5i_destroy_flow_steering(priv);
+       mlx5e_destroy_direct_tirs(priv);
+       mlx5e_destroy_indirect_tirs(priv);
diff --git a/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch b/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch

new file mode 100644 (file)

index 0000000..1212971
--- /dev/null
+++ b/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch
@@ -0,0 +1,58 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Eugenia Emantayev <eugenia@mellanox.com>
+Date: Wed, 12 Jul 2017 17:44:07 +0300
+Subject: net/mlx5e: Schedule overflow check work to mlx5e workqueue
+
+From: Eugenia Emantayev <eugenia@mellanox.com>
+
+
+[ Upstream commit f08c39ed0bfb503c7b3e013cd40d036ce6a0941a ]
+
+This is done in order to ensure that work will not run after the cleanup.
+
+Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support')
+Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_clock.c |   11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+@@ -114,13 +114,14 @@ static void mlx5e_timestamp_overflow(str
+       struct delayed_work *dwork = to_delayed_work(work);
+       struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
+                                                  overflow_work);
++      struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp);
+       unsigned long flags;
+ 
+       write_lock_irqsave(&tstamp->lock, flags);
+       timecounter_read(&tstamp->clock);
+       write_unlock_irqrestore(&tstamp->lock, flags);
+-      schedule_delayed_work(&tstamp->overflow_work,
+-                            msecs_to_jiffies(tstamp->overflow_period * 1000));
++      queue_delayed_work(priv->wq, &tstamp->overflow_work,
++                         msecs_to_jiffies(tstamp->overflow_period * 1000));
+ }
+ 
+ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
+@@ -578,7 +579,7 @@ void mlx5e_timestamp_init(struct mlx5e_p
+       INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
+       INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
+       if (tstamp->overflow_period)
+-              schedule_delayed_work(&tstamp->overflow_work, 0);
++              queue_delayed_work(priv->wq, &tstamp->overflow_work, 0);
+       else
+               mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
+ 
+@@ -614,8 +615,6 @@ void mlx5e_timestamp_cleanup(struct mlx5
+       }
+ 
+       cancel_work_sync(&tstamp->pps_info.out_work);
+-
+-      kfree(tstamp->ptp_info.pin_config);
+-
+       cancel_delayed_work_sync(&tstamp->overflow_work);
++      kfree(tstamp->ptp_info.pin_config);
+ }
diff --git a/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch

new file mode 100644 (file)

index 0000000..d51200e
--- /dev/null
+++ b/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
@@ -0,0 +1,43 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 28 Jul 2017 11:58:36 -0700
+Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ]
+
+Marc reported that he was not getting the PHY library adjust_link()
+callback function to run when calling phy_stop() + phy_disconnect()
+which does not indeed happen because we set the state machine to
+PHY_HALTED but we don't get to run it to process this state past that
+point.
+
+Fix this with a synchronous call to phy_state_machine() in order to have
+the state machine actually act on PHY_HALTED, set the PHY device's link
+down, turn the network device's carrier off and finally call the
+adjust_link() function.
+
+Reported-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -749,6 +749,9 @@ void phy_stop_machine(struct phy_device
+       if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
+               phydev->state = PHY_UP;
+       mutex_unlock(&phydev->lock);
++
++      /* Now we can run the state machine synchronously */
++      phy_state_machine(&phydev->state_queue.work);
+ }
+ 
+ /**
diff --git a/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch b/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch

new file mode 100644 (file)

index 0000000..adb26bb
--- /dev/null
+++ b/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch
@@ -0,0 +1,28 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Wed, 19 Jul 2017 13:33:24 -0700
+Subject: net: Zero terminate ifr_name in dev_ifname().
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 63679112c536289826fec61c917621de95ba2ade ]
+
+The ifr.ifr_name is passed around and assumed to be NULL terminated.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev_ioctl.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/dev_ioctl.c
++++ b/net/core/dev_ioctl.c
+@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, s
+ 
+       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+               return -EFAULT;
++      ifr.ifr_name[IFNAMSIZ-1] = 0;
+ 
+       error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
+       if (error)
diff --git a/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch b/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch

new file mode 100644 (file)

index 0000000..654fe29
--- /dev/null
+++ b/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
@@ -0,0 +1,46 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Liping Zhang <zlpnobody@gmail.com>
+Date: Sun, 23 Jul 2017 17:52:23 +0800
+Subject: openvswitch: fix potential out of bound access in parse_ct
+
+From: Liping Zhang <zlpnobody@gmail.com>
+
+
+[ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ]
+
+Before the 'type' is validated, we shouldn't use it to fetch the
+ovs_ct_attr_lens's minlen and maxlen, else, out of bound access
+may happen.
+
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
+Acked-by: Pravin B Shelar <pshelar@ovn.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -1289,8 +1289,8 @@ static int parse_ct(const struct nlattr
+ 
+       nla_for_each_nested(a, attr, rem) {
+               int type = nla_type(a);
+-              int maxlen = ovs_ct_attr_lens[type].maxlen;
+-              int minlen = ovs_ct_attr_lens[type].minlen;
++              int maxlen;
++              int minlen;
+ 
+               if (type > OVS_CT_ATTR_MAX) {
+                       OVS_NLERR(log,
+@@ -1298,6 +1298,9 @@ static int parse_ct(const struct nlattr
+                                 type, OVS_CT_ATTR_MAX);
+                       return -EINVAL;
+               }
++
++              maxlen = ovs_ct_attr_lens[type].maxlen;
++              minlen = ovs_ct_attr_lens[type].minlen;
+               if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+                       OVS_NLERR(log,
+                                 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
diff --git a/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch b/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch

new file mode 100644 (file)

index 0000000..505a16b
--- /dev/null
+++ b/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
@@ -0,0 +1,60 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Mon, 24 Jul 2017 10:07:32 -0700
+Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ]
+
+There are multiple reports showing we have a use-after-free in
+the timer prb_retire_rx_blk_timer_expired(), where we use struct
+tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by
+free_pg_vec().
+
+The interesting part is it is not freed via packet_release() but
+via packet_setsockopt(), which means we are not closing the socket.
+Looking into the big and fat function packet_set_ring(), this could
+happen if we satisfy the following conditions:
+
+1. closing == 0, not on packet_release() path
+2. req->tp_block_nr == 0, we don't allocate a new pg_vec
+3. rx_ring->pg_vec is already set as V3, which means we already called
+   packet_set_ring() wtih req->tp_block_nr > 0 previously
+4. req->tp_frame_nr == 0, pass sanity check
+5. po->mapped == 0, never called mmap()
+
+In this scenario we are clearing the old rx_ring->pg_vec, so we need
+to free this pg_vec, but we don't stop the timer on this path because
+of closing==0.
+
+The timer has to be stopped as long as we need to free pg_vec, therefore
+the check on closing!=0 is wrong, we should check pg_vec!=NULL instead.
+
+Thanks to liujian for testing different fixes.
+
+Reported-by: alexander.levin@verizon.com
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Reported-by: liujian (CE) <liujian56@huawei.com>
+Tested-by: liujian (CE) <liujian56@huawei.com>
+Cc: Ding Tianhong <dingtianhong@huawei.com>
+Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -4334,7 +4334,7 @@ static int packet_set_ring(struct sock *
+               register_prot_hook(sk);
+       }
+       spin_unlock(&po->bind_lock);
+-      if (closing && (po->tp_version > TPACKET_V2)) {
++      if (pg_vec && (po->tp_version > TPACKET_V2)) {
+               /* Because we don't support block-based V3 on tx-ring */
+               if (!tx_ring)
+                       prb_shutdown_retire_blk_timer(po, rb_queue);
diff --git a/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch b/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch

new file mode 100644 (file)

index 0000000..c660dcc
--- /dev/null
+++ b/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Wed, 19 Jul 2017 10:22:40 -0700
+Subject: Revert "rtnetlink: Do not generate notifications for CHANGEADDR event"
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit 3753654e541938717b13f2b25791c3171a3a06aa ]
+
+This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f.
+
+The duplicate CHANGEADDR event message is sent regardless of link
+status whereas the setlink changes only generate a notification when
+the link is up. Not sending a notification when the link is down breaks
+dhcpcd which only processes hwaddr changes when the link is down.
+
+Fixes reported regression:
+    https://bugzilla.kernel.org/show_bug.cgi?id=196355
+
+Reported-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4165,6 +4165,7 @@ static int rtnetlink_event(struct notifi
+ 
+       switch (event) {
+       case NETDEV_REBOOT:
++      case NETDEV_CHANGEADDR:
+       case NETDEV_CHANGENAME:
+       case NETDEV_FEAT_CHANGE:
+       case NETDEV_BONDING_FAILOVER:
diff --git a/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch b/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch

new file mode 100644 (file)

index 0000000..2a4cebe
--- /dev/null
+++ b/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
@@ -0,0 +1,38 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: WANG Cong <xiyou.wangcong@gmail.com>
+Date: Thu, 20 Jul 2017 11:27:57 -0700
+Subject: rtnetlink: allocate more memory for dev_set_mac_address()
+
+From: WANG Cong <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ]
+
+virtnet_set_mac_address() interprets mac address as struct
+sockaddr, but upper layer only allocates dev->addr_len
+which is ETH_ALEN + sizeof(sa_family_t) in this case.
+
+We lack a unified definition for mac address, so just fix
+the upper layer, this also allows drivers to interpret it
+to struct sockaddr freely.
+
+Reported-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -1977,7 +1977,8 @@ static int do_setlink(const struct sk_bu
+               struct sockaddr *sa;
+               int len;
+ 
+-              len = sizeof(sa_family_t) + dev->addr_len;
++              len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
++                                                sizeof(*sa));
+               sa = kmalloc(len, GFP_KERNEL);
+               if (!sa) {
+                       err = -ENOMEM;
diff --git a/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch b/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch

new file mode 100644 (file)

index 0000000..5a83a01
--- /dev/null
+++ b/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
@@ -0,0 +1,140 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Fri, 14 Jul 2017 18:32:45 +0200
+Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ]
+
+If the length field of the iterator (|pos.p| or |err|) is past the end
+of the chunk, we shouldn't access it.
+
+This bug has been detected by KMSAN. For the following pair of system
+calls:
+
+  socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3
+  sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0),
+         inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0,
+         sin6_scope_id=0}, 28) = 1
+
+the tool has reported a use of uninitialized memory:
+
+  ==================================================================
+  BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0
+  CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
+  01/01/2011
+  Call Trace:
+   <IRQ>
+   __dump_stack lib/dump_stack.c:16
+   dump_stack+0x172/0x1c0 lib/dump_stack.c:52
+   kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927
+   __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469
+   __sctp_rcv_init_lookup net/sctp/input.c:1074
+   __sctp_rcv_lookup_harder net/sctp/input.c:1233
+   __sctp_rcv_lookup net/sctp/input.c:1255
+   sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170
+   sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984
+   ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279
+   NF_HOOK ./include/linux/netfilter.h:257
+   ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322
+   dst_input ./include/net/dst.h:492
+   ip6_rcv_finish net/ipv6/ip6_input.c:69
+   NF_HOOK ./include/linux/netfilter.h:257
+   ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203
+   __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208
+   __netif_receive_skb net/core/dev.c:4246
+   process_backlog+0x667/0xba0 net/core/dev.c:4866
+   napi_poll net/core/dev.c:5268
+   net_rx_action+0xc95/0x1590 net/core/dev.c:5333
+   __do_softirq+0x485/0x942 kernel/softirq.c:284
+   do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902
+   </IRQ>
+   do_softirq kernel/softirq.c:328
+   __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181
+   local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31
+   rcu_read_unlock_bh ./include/linux/rcupdate.h:931
+   ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124
+   ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149
+   NF_HOOK_COND ./include/linux/netfilter.h:246
+   ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163
+   dst_output ./include/net/dst.h:486
+   NF_HOOK ./include/linux/netfilter.h:257
+   ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261
+   sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225
+   sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632
+   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+   sctp_side_effects net/sctp/sm_sideeffect.c:1773
+   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+   sock_sendmsg_nosec net/socket.c:633
+   sock_sendmsg net/socket.c:643
+   SYSC_sendto+0x608/0x710 net/socket.c:1696
+   SyS_sendto+0x8a/0xb0 net/socket.c:1664
+   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+   entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246
+  RIP: 0033:0x401133
+  RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+  RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133
+  RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003
+  RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c
+  R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000
+  R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000
+  origin:
+   save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59
+   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302
+   kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198
+   kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211
+   slab_alloc_node mm/slub.c:2743
+   __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351
+   __kmalloc_reserve net/core/skbuff.c:138
+   __alloc_skb+0x26b/0x840 net/core/skbuff.c:231
+   alloc_skb ./include/linux/skbuff.h:933
+   sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570
+   sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885
+   sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750
+   sctp_side_effects net/sctp/sm_sideeffect.c:1773
+   sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147
+   sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88
+   sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954
+   inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762
+   sock_sendmsg_nosec net/socket.c:633
+   sock_sendmsg net/socket.c:643
+   SYSC_sendto+0x608/0x710 net/socket.c:1696
+   SyS_sendto+0x8a/0xb0 net/socket.c:1664
+   do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285
+   return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246
+  ==================================================================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs(
+ 
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
++     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++      (void *)chunk + end) &&\
+      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+      pos.v += SCTP_PAD4(ntohs(pos.p->length)))
+@@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+           sizeof(sctp_chunkhdr_t));\
++     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++      (void *)chunk_hdr + end) &&\
+      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
+      err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
diff --git a/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch b/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch

new file mode 100644 (file)

index 0000000..81f59ec
--- /dev/null
+++ b/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch
@@ -0,0 +1,50 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 14 Jul 2017 22:07:33 +0800
+Subject: sctp: fix an array overflow when all ext chunks are set
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 10b3bf54406bb7f4e78da9bb2a485c5c986678ad ]
+
+Marcelo noticed an array overflow caused by commit c28445c3cb07
+("sctp: add reconf_enable in asoc ep and netns"), in which sctp
+would add SCTP_CID_RECONF into extensions when reconf_enable is
+set in sctp_make_init and sctp_make_init_ack.
+
+Then now when all ext chunks are set, 4 ext chunk ids can be put
+into extensions array while extensions array size is 3. It would
+cause a kernel panic because of this overflow.
+
+This patch is to fix it by defining extensions array size is 4 in
+both sctp_make_init and sctp_make_init_ack.
+
+Fixes: c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_make_chunk.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const
+       sctp_adaptation_ind_param_t aiparam;
+       sctp_supported_ext_param_t ext_param;
+       int num_ext = 0;
+-      __u8 extensions[3];
++      __u8 extensions[4];
+       sctp_paramhdr_t *auth_chunks = NULL,
+                       *auth_hmacs = NULL;
+ 
+@@ -396,7 +396,7 @@ struct sctp_chunk *sctp_make_init_ack(co
+       sctp_adaptation_ind_param_t aiparam;
+       sctp_supported_ext_param_t ext_param;
+       int num_ext = 0;
+-      __u8 extensions[3];
++      __u8 extensions[4];
+       sctp_paramhdr_t *auth_chunks = NULL,
+                       *auth_hmacs = NULL,
+                       *auth_random = NULL;
diff --git a/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch b/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch

new file mode 100644 (file)

index 0000000..e49ad41
--- /dev/null
+++ b/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
@@ -0,0 +1,59 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 26 Jul 2017 16:24:59 +0800
+Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ]
+
+Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
+_sctp_walk_{params, errors}()") tried to fix the issue that it
+may overstep the chunk end for _sctp_walk_{params, errors} with
+'chunk_end > offset(length) + sizeof(length)'.
+
+But it introduced a side effect: When processing INIT, it verifies
+the chunks with 'param.v == chunk_end' after iterating all params
+by sctp_walk_params(). With the check 'chunk_end > offset(length)
++ sizeof(length)', it would return when the last param is not yet
+accessed. Because the last param usually is fwdtsn supported param
+whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'
+
+This is a badly issue even causing sctp couldn't process 4-shakes.
+Client would always get abort when connecting to server, due to
+the failure of INIT chunk verification on server.
+
+The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
+instead of 'chunk_end < offset(length) + sizeof(length)' for both
+_sctp_walk_params and _sctp_walk_errors.
+
+Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/sctp.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/net/sctp/sctp.h
++++ b/include/net/sctp/sctp.h
+@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs(
+ 
+ #define _sctp_walk_params(pos, chunk, end, member)\
+ for (pos.v = chunk->member;\
+-     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
++     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
+       (void *)chunk + end) &&\
+      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
+      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt
+ #define _sctp_walk_errors(err, chunk_hdr, end)\
+ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
+           sizeof(sctp_chunkhdr_t));\
+-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
++     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
+       (void *)chunk_hdr + end) &&\
+      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
+      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
diff --git a/queue-4.12/series b/queue-4.12/series

index d2055a0365b74ea43a22984f0902e0e0f50a6855..555766e0368c9cb4a5a570559a0aa455c46afaff 100644 (file)
--- a/queue-4.12/series
+++ b/queue-4.12/series
@@ -55,3 +55,51 @@ blk-mq-include-all-present-cpus-in-the-default-queue-mapping.patch
  blk-mq-create-hctx-for-each-present-cpu.patch
  block-disable-runtime-pm-for-blk-mq.patch
  saa7164-fix-double-fetch-pcie-access-condition.patch
+sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch
+tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch
+tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch
+tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch
+tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch
+tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch
+ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch
+wireless-wext-terminate-ifr-name-coming-from-userspace.patch
+net-zero-terminate-ifr_name-in-dev_ifname.patch
+net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch
+revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch
+ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch
+net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch
+ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch
+rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch
+net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch
+mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch
+openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch
+packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch
+ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch
+net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch
+bonding-commit-link-status-change-after-propose.patch
+dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch
+dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch
+net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch
+net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch
+net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch
+sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch
+sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch
+net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch
+net-mlx5e-fix-outer_header_zero-check-size.patch
+net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch
+net-mlx5e-add-field-select-to-mtpps-register.patch
+net-mlx5e-fix-broken-disable-1pps-flow.patch
+net-mlx5e-change-1pps-out-scheme.patch
+net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch
+net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch
+net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch
+net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch
+udp6-fix-socket-leak-on-early-demux.patch
+net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch
+ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch
+virtio_net-fix-truesize-for-mergeable-buffers.patch
+sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
+sparc64-prevent-perf-from-running-during-super-critical-sections.patch
+sparc64-register-hugepages-during-arch-init.patch
+sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch
diff --git a/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch b/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch

new file mode 100644 (file)

index 0000000..f2d7951
--- /dev/null
+++ b/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue Aug  8 16:28:31 PDT 2017
+From: "David S. Miller" <davem@davemloft.net>
+Date: Fri, 4 Aug 2017 09:47:52 -0700
+Subject: sparc64: Fix exception handling in UltraSPARC-III memcpy.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+
+[ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ]
+
+Mikael Pettersson reported that some test programs in the strace-4.18
+testsuite cause an OOPS.
+
+After some debugging it turns out that garbage values are returned
+when an exception occurs, causing the fixup memset() to be run with
+bogus arguments.
+
+The problem is that two of the exception handler stubs write the
+successfully copied length into the wrong register.
+
+Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.")
+Reported-by: Mikael Pettersson <mikpelinux@gmail.com>
+Tested-by: Mikael Pettersson <mikpelinux@gmail.com>
+Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/lib/U3memcpy.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/sparc/lib/U3memcpy.S
++++ b/arch/sparc/lib/U3memcpy.S
+@@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ ENTRY(U3_retl_o2_and_7_plus_GS)
+       and     %o2, 7, %o2
+       retl
+-       add    %o2, GLOBAL_SPARE, %o2
++       add    %o2, GLOBAL_SPARE, %o0
+ ENDPROC(U3_retl_o2_and_7_plus_GS)
+ ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+       add     GLOBAL_SPARE, 8, GLOBAL_SPARE
+       and     %o2, 7, %o2
+       retl
+-       add    %o2, GLOBAL_SPARE, %o2
++       add    %o2, GLOBAL_SPARE, %o0
+ ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+ #endif
+ 
diff --git a/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch b/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch

new file mode 100644 (file)

index 0000000..9223c00
--- /dev/null
+++ b/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch
@@ -0,0 +1,335 @@
+From foo@baz Tue Aug  8 16:28:31 PDT 2017
+From: Jane Chu <jane.chu@oracle.com>
+Date: Tue, 11 Jul 2017 12:00:54 -0600
+Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout
+
+From: Jane Chu <jane.chu@oracle.com>
+
+
+[ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ]
+
+A large sun4v SPARC system may have moments of intensive xcall activities,
+usually caused by unmapping many pages on many CPUs concurrently. This can
+flood receivers with CPU mondo interrupts for an extended period, causing
+some unlucky senders to hit send-mondo timeout. This problem gets worse
+as cpu count increases because sometimes mappings must be invalidated on
+all CPUs, and sometimes all CPUs may gang up on a single CPU.
+
+But a busy system is not a broken system. In the above scenario, as long
+as the receiver is making forward progress processing mondo interrupts,
+the sender should continue to retry.
+
+This patch implements the receiver's forward progress meter by introducing
+a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range
+of 0..NR_CPUS. The receiver increments its counter as soon as it receives
+a mondo and the sender tracks the receiver's counter. If the receiver has
+stopped making forward progress when the retry limit is reached, the sender
+declares send-mondo-timeout and panic; otherwise, the receiver is allowed
+to keep making forward progress.
+
+In addition, it's been observed that PCIe hotplug events generate Correctable
+Errors that are handled by hypervisor and then OS. Hypervisor 'borrows'
+a guest cpu strand briefly to provide the service. If the cpu strand is
+simultaneously the only cpu targeted by a mondo, it may not be available
+for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second
+is the agreed wait time between hypervisor and guest OS, this patch makes
+the adjustment.
+
+Orabug: 25476541
+Orabug: 26417466
+
+Signed-off-by: Jane Chu <jane.chu@oracle.com>
+Reviewed-by: Steve Sistare <steven.sistare@oracle.com>
+Reviewed-by: Anthony Yznaga <anthony.yznaga@oracle.com>
+Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
+Reviewed-by: Thomas Tai <thomas.tai@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/trap_block.h |    1 
+ arch/sparc/kernel/smp_64.c          |  189 ++++++++++++++++++++++--------------
+ arch/sparc/kernel/sun4v_ivec.S      |   15 ++
+ arch/sparc/kernel/traps_64.c        |    1 
+ 4 files changed, 134 insertions(+), 72 deletions(-)
+
+--- a/arch/sparc/include/asm/trap_block.h
++++ b/arch/sparc/include/asm/trap_block.h
+@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR
+ void init_cur_cpu_trap(struct thread_info *);
+ void setup_tba(void);
+ extern int ncpus_probed;
++extern u64 cpu_mondo_counter[NR_CPUS];
+ 
+ unsigned long real_hard_smp_processor_id(void);
+ 
+--- a/arch/sparc/kernel/smp_64.c
++++ b/arch/sparc/kernel/smp_64.c
+@@ -622,22 +622,48 @@ retry:
+       }
+ }
+ 
+-/* Multi-cpu list version.  */
++#define       CPU_MONDO_COUNTER(cpuid)        (cpu_mondo_counter[cpuid])
++#define       MONDO_USEC_WAIT_MIN             2
++#define       MONDO_USEC_WAIT_MAX             100
++#define       MONDO_RETRY_LIMIT               500000
++
++/* Multi-cpu list version.
++ *
++ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
++ * Sometimes not all cpus receive the mondo, requiring us to re-send
++ * the mondo until all cpus have received, or cpus are truly stuck
++ * unable to receive mondo, and we timeout.
++ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
++ * perform guest service, such as PCIe error handling. Consider the
++ * service time, 1 second overall wait is reasonable for 1 cpu.
++ * Here two in-between mondo check wait time are defined: 2 usec for
++ * single cpu quick turn around and up to 100usec for large cpu count.
++ * Deliver mondo to large number of cpus could take longer, we adjusts
++ * the retry count as long as target cpus are making forward progress.
++ */
+ static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+ {
+-      int retries, this_cpu, prev_sent, i, saw_cpu_error;
++      int this_cpu, tot_cpus, prev_sent, i, rem;
++      int usec_wait, retries, tot_retries;
++      u16 first_cpu = 0xffff;
++      unsigned long xc_rcvd = 0;
+       unsigned long status;
++      int ecpuerror_id = 0;
++      int enocpu_id = 0;
+       u16 *cpu_list;
++      u16 cpu;
+ 
+       this_cpu = smp_processor_id();
+-
+       cpu_list = __va(tb->cpu_list_pa);
+-
+-      saw_cpu_error = 0;
+-      retries = 0;
++      usec_wait = cnt * MONDO_USEC_WAIT_MIN;
++      if (usec_wait > MONDO_USEC_WAIT_MAX)
++              usec_wait = MONDO_USEC_WAIT_MAX;
++      retries = tot_retries = 0;
++      tot_cpus = cnt;
+       prev_sent = 0;
++
+       do {
+-              int forward_progress, n_sent;
++              int n_sent, mondo_delivered, target_cpu_busy;
+ 
+               status = sun4v_cpu_mondo_send(cnt,
+                                             tb->cpu_list_pa,
+@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(str
+ 
+               /* HV_EOK means all cpus received the xcall, we're done.  */
+               if (likely(status == HV_EOK))
+-                      break;
++                      goto xcall_done;
++
++              /* If not these non-fatal errors, panic */
++              if (unlikely((status != HV_EWOULDBLOCK) &&
++                      (status != HV_ECPUERROR) &&
++                      (status != HV_ENOCPU)))
++                      goto fatal_errors;
+ 
+               /* First, see if we made any forward progress.
+                *
++               * Go through the cpu_list, count the target cpus that have
++               * received our mondo (n_sent), and those that did not (rem).
++               * Re-pack cpu_list with the cpus remain to be retried in the
++               * front - this simplifies tracking the truly stalled cpus.
++               *
+                * The hypervisor indicates successful sends by setting
+                * cpu list entries to the value 0xffff.
++               *
++               * EWOULDBLOCK means some target cpus did not receive the
++               * mondo and retry usually helps.
++               *
++               * ECPUERROR means at least one target cpu is in error state,
++               * it's usually safe to skip the faulty cpu and retry.
++               *
++               * ENOCPU means one of the target cpu doesn't belong to the
++               * domain, perhaps offlined which is unexpected, but not
++               * fatal and it's okay to skip the offlined cpu.
+                */
++              rem = 0;
+               n_sent = 0;
+               for (i = 0; i < cnt; i++) {
+-                      if (likely(cpu_list[i] == 0xffff))
++                      cpu = cpu_list[i];
++                      if (likely(cpu == 0xffff)) {
+                               n_sent++;
++                      } else if ((status == HV_ECPUERROR) &&
++                              (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
++                              ecpuerror_id = cpu + 1;
++                      } else if (status == HV_ENOCPU && !cpu_online(cpu)) {
++                              enocpu_id = cpu + 1;
++                      } else {
++                              cpu_list[rem++] = cpu;
++                      }
+               }
+ 
+-              forward_progress = 0;
+-              if (n_sent > prev_sent)
+-                      forward_progress = 1;
++              /* No cpu remained, we're done. */
++              if (rem == 0)
++                      break;
+ 
+-              prev_sent = n_sent;
++              /* Otherwise, update the cpu count for retry. */
++              cnt = rem;
+ 
+-              /* If we get a HV_ECPUERROR, then one or more of the cpus
+-               * in the list are in error state.  Use the cpu_state()
+-               * hypervisor call to find out which cpus are in error state.
++              /* Record the overall number of mondos received by the
++               * first of the remaining cpus.
+                */
+-              if (unlikely(status == HV_ECPUERROR)) {
+-                      for (i = 0; i < cnt; i++) {
+-                              long err;
+-                              u16 cpu;
+-
+-                              cpu = cpu_list[i];
+-                              if (cpu == 0xffff)
+-                                      continue;
+-
+-                              err = sun4v_cpu_state(cpu);
+-                              if (err == HV_CPU_STATE_ERROR) {
+-                                      saw_cpu_error = (cpu + 1);
+-                                      cpu_list[i] = 0xffff;
+-                              }
+-                      }
+-              } else if (unlikely(status != HV_EWOULDBLOCK))
+-                      goto fatal_mondo_error;
++              if (first_cpu != cpu_list[0]) {
++                      first_cpu = cpu_list[0];
++                      xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++              }
+ 
+-              /* Don't bother rewriting the CPU list, just leave the
+-               * 0xffff and non-0xffff entries in there and the
+-               * hypervisor will do the right thing.
+-               *
+-               * Only advance timeout state if we didn't make any
+-               * forward progress.
++              /* Was any mondo delivered successfully? */
++              mondo_delivered = (n_sent > prev_sent);
++              prev_sent = n_sent;
++
++              /* or, was any target cpu busy processing other mondos? */
++              target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
++              xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
++
++              /* Retry count is for no progress. If we're making progress,
++               * reset the retry count.
+                */
+-              if (unlikely(!forward_progress)) {
+-                      if (unlikely(++retries > 10000))
+-                              goto fatal_mondo_timeout;
+-
+-                      /* Delay a little bit to let other cpus catch up
+-                       * on their cpu mondo queue work.
+-                       */
+-                      udelay(2 * cnt);
++              if (likely(mondo_delivered || target_cpu_busy)) {
++                      tot_retries += retries;
++                      retries = 0;
++              } else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
++                      goto fatal_mondo_timeout;
+               }
+-      } while (1);
+ 
+-      if (unlikely(saw_cpu_error))
+-              goto fatal_mondo_cpu_error;
++              /* Delay a little bit to let other cpus catch up on
++               * their cpu mondo queue work.
++               */
++              if (!mondo_delivered)
++                      udelay(usec_wait);
+ 
+-      return;
++              retries++;
++      } while (1);
+ 
+-fatal_mondo_cpu_error:
+-      printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
+-             "(including %d) were in error state\n",
+-             this_cpu, saw_cpu_error - 1);
++xcall_done:
++      if (unlikely(ecpuerror_id > 0)) {
++              pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
++                     this_cpu, ecpuerror_id - 1);
++      } else if (unlikely(enocpu_id > 0)) {
++              pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
++                     this_cpu, enocpu_id - 1);
++      }
+       return;
+ 
++fatal_errors:
++      /* fatal errors include bad alignment, etc */
++      pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
++             this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
++      panic("Unexpected SUN4V mondo error %lu\n", status);
++
+ fatal_mondo_timeout:
+-      printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
+-             " progress after %d retries.\n",
+-             this_cpu, retries);
+-      goto dump_cpu_list_and_out;
+-
+-fatal_mondo_error:
+-      printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
+-             this_cpu, status);
+-      printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
+-             "mondo_block_pa(%lx)\n",
+-             this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+-
+-dump_cpu_list_and_out:
+-      printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
+-      for (i = 0; i < cnt; i++)
+-              printk("%u ", cpu_list[i]);
+-      printk("]\n");
++      /* some cpus being non-responsive to the cpu mondo */
++      pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
++             this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
++      panic("SUN4V mondo timeout panic\n");
+ }
+ 
+ static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
+--- a/arch/sparc/kernel/sun4v_ivec.S
++++ b/arch/sparc/kernel/sun4v_ivec.S
+@@ -26,6 +26,21 @@ sun4v_cpu_mondo:
+       ldxa    [%g0] ASI_SCRATCHPAD, %g4
+       sub     %g4, TRAP_PER_CPU_FAULT_INFO, %g4
+ 
++      /* Get smp_processor_id() into %g3 */
++      sethi   %hi(trap_block), %g5
++      or      %g5, %lo(trap_block), %g5
++      sub     %g4, %g5, %g3
++      srlx    %g3, TRAP_BLOCK_SZ_SHIFT, %g3
++
++      /* Increment cpu_mondo_counter[smp_processor_id()] */
++      sethi   %hi(cpu_mondo_counter), %g5
++      or      %g5, %lo(cpu_mondo_counter), %g5
++      sllx    %g3, 3, %g3
++      add     %g5, %g3, %g5
++      ldx     [%g5], %g3
++      add     %g3, 1, %g3
++      stx     %g3, [%g5]
++
+       /* Get CPU mondo queue base phys address into %g7.  */
+       ldx     [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+ 
+--- a/arch/sparc/kernel/traps_64.c
++++ b/arch/sparc/kernel/traps_64.c
+@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs)
+       }
+ }
+ 
++u64 cpu_mondo_counter[NR_CPUS] = {0};
+ struct trap_per_cpu trap_block[NR_CPUS];
+ EXPORT_SYMBOL(trap_block);
+ 
diff --git a/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch b/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch

new file mode 100644 (file)

index 0000000..42914b3
--- /dev/null
+++ b/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch
@@ -0,0 +1,137 @@
+From foo@baz Tue Aug  8 16:28:31 PDT 2017
+From: Rob Gardner <rob.gardner@oracle.com>
+Date: Mon, 17 Jul 2017 09:22:27 -0600
+Subject: sparc64: Prevent perf from running during super critical sections
+
+From: Rob Gardner <rob.gardner@oracle.com>
+
+
+[ Upstream commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a ]
+
+This fixes another cause of random segfaults and bus errors that may
+occur while running perf with the callgraph option.
+
+Critical sections beginning with spin_lock_irqsave() raise the interrupt
+level to PIL_NORMAL_MAX (14) and intentionally do not block performance
+counter interrupts, which arrive at PIL_NMI (15).
+
+But some sections of code are "super critical" with respect to perf
+because the perf_callchain_user() path accesses user space and may cause
+TLB activity as well as faults as it unwinds the user stack.
+
+One particular critical section occurs in switch_mm:
+
+        spin_lock_irqsave(&mm->context.lock, flags);
+        ...
+        load_secondary_context(mm);
+        tsb_context_switch(mm);
+        ...
+        spin_unlock_irqrestore(&mm->context.lock, flags);
+
+If a perf interrupt arrives in between load_secondary_context() and
+tsb_context_switch(), then perf_callchain_user() could execute with
+the context ID of one process, but with an active TSB for a different
+process. When the user stack is accessed, it is very likely to
+incur a TLB miss, since the h/w context ID has been changed. The TLB
+will then be reloaded with a translation from the TSB for one process,
+but using a context ID for another process. This exposes memory from
+one process to another, and since it is a mapping for stack memory,
+this usually causes the new process to crash quickly.
+
+This super critical section needs more protection than is provided
+by spin_lock_irqsave() since perf interrupts must not be allowed in.
+
+Since __tsb_context_switch already goes through the trouble of
+disabling interrupts completely, we fix this by moving the secondary
+context load down into this better protected region.
+
+Orabug: 25577560
+
+Signed-off-by: Dave Aldridge <david.j.aldridge@oracle.com>
+Signed-off-by: Rob Gardner <rob.gardner@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/mmu_context_64.h |   14 +++++++++-----
+ arch/sparc/kernel/tsb.S                 |   12 ++++++++++++
+ arch/sparc/power/hibernate.c            |    3 +--
+ 3 files changed, 22 insertions(+), 7 deletions(-)
+
+--- a/arch/sparc/include/asm/mmu_context_64.h
++++ b/arch/sparc/include/asm/mmu_context_64.h
+@@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *m
+ void __tsb_context_switch(unsigned long pgd_pa,
+                         struct tsb_config *tsb_base,
+                         struct tsb_config *tsb_huge,
+-                        unsigned long tsb_descr_pa);
++                        unsigned long tsb_descr_pa,
++                        unsigned long secondary_ctx);
+ 
+-static inline void tsb_context_switch(struct mm_struct *mm)
++static inline void tsb_context_switch_ctx(struct mm_struct *mm,
++                                        unsigned long ctx)
+ {
+       __tsb_context_switch(__pa(mm->pgd),
+                            &mm->context.tsb_block[MM_TSB_BASE],
+@@ -40,9 +42,12 @@ static inline void tsb_context_switch(st
+ #else
+                            NULL
+ #endif
+-                           , __pa(&mm->context.tsb_descr[MM_TSB_BASE]));
++                           , __pa(&mm->context.tsb_descr[MM_TSB_BASE]),
++                           ctx);
+ }
+ 
++#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0)
++
+ void tsb_grow(struct mm_struct *mm,
+             unsigned long tsb_index,
+             unsigned long mm_rss);
+@@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_s
+        * cpu0 to update it's TSB because at that point the cpu_vm_mask
+        * only had cpu1 set in it.
+        */
+-      load_secondary_context(mm);
+-      tsb_context_switch(mm);
++      tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+ 
+       /* Any time a processor runs a context on an address space
+        * for the first time, we must flush that context out of the
+--- a/arch/sparc/kernel/tsb.S
++++ b/arch/sparc/kernel/tsb.S
+@@ -360,6 +360,7 @@ tsb_flush:
+        * %o1: TSB base config pointer
+        * %o2: TSB huge config pointer, or NULL if none
+        * %o3: Hypervisor TSB descriptor physical address
++       * %o4: Secondary context to load, if non-zero
+        *
+        * We have to run this whole thing with interrupts
+        * disabled so that the current cpu doesn't change
+@@ -372,6 +373,17 @@ __tsb_context_switch:
+       rdpr    %pstate, %g1
+       wrpr    %g1, PSTATE_IE, %pstate
+ 
++      brz,pn  %o4, 1f
++       mov    SECONDARY_CONTEXT, %o5
++
++661:  stxa    %o4, [%o5] ASI_DMMU
++      .section .sun4v_1insn_patch, "ax"
++      .word   661b
++      stxa    %o4, [%o5] ASI_MMU
++      .previous
++      flush   %g6
++
++1:
+       TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+ 
+       stx     %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
+--- a/arch/sparc/power/hibernate.c
++++ b/arch/sparc/power/hibernate.c
+@@ -35,6 +35,5 @@ void restore_processor_state(void)
+ {
+       struct mm_struct *mm = current->active_mm;
+ 
+-      load_secondary_context(mm);
+-      tsb_context_switch(mm);
++      tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+ }
diff --git a/queue-4.12/sparc64-register-hugepages-during-arch-init.patch b/queue-4.12/sparc64-register-hugepages-during-arch-init.patch

new file mode 100644 (file)

index 0000000..dbce71a
--- /dev/null
+++ b/queue-4.12/sparc64-register-hugepages-during-arch-init.patch
@@ -0,0 +1,77 @@
+From foo@baz Tue Aug  8 16:28:31 PDT 2017
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+Date: Wed, 19 Jul 2017 17:12:54 -0700
+Subject: sparc64: Register hugepages during arch init
+
+From: Nitin Gupta <nitin.m.gupta@oracle.com>
+
+
+[ Upstream commit 8399e4b88a93fc7bc00fff3b8da9b2e718b7f45e ]
+
+Add hstate for each supported hugepage size using
+arch initcall. This change fixes some hugepage
+parameter parsing inconsistencies:
+
+case 1: no hugepage parameters
+
+ Without hugepage parameters, only a hugepages-8192kB entry is visible
+ in sysfs.  It's different from x86_64 where both 2M and 1G hugepage
+ sizes are available.
+
+case 2: default_hugepagesz=[64K|256M|2G]
+
+ When specifying only a default_hugepagesz parameter, the default
+ hugepage size isn't really changed and it stays at 8M. This is again
+ different from x86_64.
+
+Orabug: 25869946
+
+Reviewed-by: Bob Picco <bob.picco@oracle.com>
+Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/mm/init_64.c |   25 ++++++++++++++++++++++++-
+ 1 file changed, 24 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(stru
+ }
+ 
+ #ifdef CONFIG_HUGETLB_PAGE
++static void __init add_huge_page_size(unsigned long size)
++{
++      unsigned int order;
++
++      if (size_to_hstate(size))
++              return;
++
++      order = ilog2(size) - PAGE_SHIFT;
++      hugetlb_add_hstate(order);
++}
++
++static int __init hugetlbpage_init(void)
++{
++      add_huge_page_size(1UL << HPAGE_64K_SHIFT);
++      add_huge_page_size(1UL << HPAGE_SHIFT);
++      add_huge_page_size(1UL << HPAGE_256MB_SHIFT);
++      add_huge_page_size(1UL << HPAGE_2GB_SHIFT);
++
++      return 0;
++}
++
++arch_initcall(hugetlbpage_init);
++
+ static int __init setup_hugepagesz(char *string)
+ {
+       unsigned long long hugepage_size;
+@@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char
+               goto out;
+       }
+ 
+-      hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
++      add_huge_page_size(hugepage_size);
+       rc = 1;
+ 
+ out:
diff --git a/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch b/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch

new file mode 100644 (file)

index 0000000..d59736b
--- /dev/null
+++ b/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch
@@ -0,0 +1,48 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:21 -0400
+Subject: tcp_bbr: cut pacing rate only if filled pipe
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 4aea287e90dd61a48268ff2994b56f9799441b62 ]
+
+In bbr_set_pacing_rate(), which decides whether to cut the pacing
+rate, there was some code that considered exiting STARTUP to be
+equivalent to the notion of filling the pipe (i.e.,
+bbr_full_bw_reached()). Specifically, as the code was structured,
+exiting STARTUP and going into PROBE_RTT could cause us to cut the
+pacing rate down to something silly and low, based on whatever
+bandwidth samples we've had so far, when it's possible that all of
+them have been small app-limited bandwidth samples that are not
+representative of the bandwidth available in the path. (The code was
+correct at the time it was written, but the state machine changed
+without this spot being adjusted correspondingly.)
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -221,12 +221,11 @@ static u64 bbr_rate_bytes_per_sec(struct
+  */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
+-      struct bbr *bbr = inet_csk_ca(sk);
+       u64 rate = bw;
+ 
+       rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+       rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+-      if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
++      if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+               sk->sk_pacing_rate = rate;
+ }
+ 
diff --git a/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch b/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch

new file mode 100644 (file)

index 0000000..dd8fa7d
--- /dev/null
+++ b/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch
@@ -0,0 +1,76 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:25 -0400
+Subject: tcp_bbr: init pacing rate on first RTT sample
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 32984565574da7ed3afa10647bb4020d7a9e6c93 ]
+
+Fixes the following behavior: for connections that had no RTT sample
+at the time of initializing congestion control, BBR was initializing
+the pacing rate to a high nominal rate (based an a guess of RTT=1ms,
+in case this is LAN traffic). Then BBR never adjusted the pacing rate
+downward upon obtaining an actual RTT sample, if the connection never
+filled the pipe (e.g. all sends were small app-limited writes()).
+
+This fix adjusts the pacing rate upon obtaining the first RTT sample.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -113,7 +113,8 @@ struct bbr {
+               cwnd_gain:10,   /* current gain for setting cwnd */
+               full_bw_cnt:3,  /* number of rounds without large bw gains */
+               cycle_idx:3,    /* current index in pacing_gain cycle array */
+-              unused_b:6;
++              has_seen_rtt:1, /* have we seen an RTT sample yet? */
++              unused_b:5;
+       u32     prior_cwnd;     /* prior cwnd upon entering loss recovery */
+       u32     full_bw;        /* recent bw, to estimate if pipe is full */
+ };
+@@ -226,11 +227,13 @@ static u32 bbr_bw_to_pacing_rate(struct
+ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
++      struct bbr *bbr = inet_csk_ca(sk);
+       u64 bw;
+       u32 rtt_us;
+ 
+       if (tp->srtt_us) {              /* any RTT sample yet? */
+               rtt_us = max(tp->srtt_us >> 3, 1U);
++              bbr->has_seen_rtt = 1;
+       } else {                         /* no RTT sample yet */
+               rtt_us = USEC_PER_MSEC;  /* use nominal default RTT */
+       }
+@@ -248,8 +251,12 @@ static void bbr_init_pacing_rate_from_rt
+  */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
++      struct tcp_sock *tp = tcp_sk(sk);
++      struct bbr *bbr = inet_csk_ca(sk);
+       u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+ 
++      if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
++              bbr_init_pacing_rate_from_rtt(sk);
+       if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+               sk->sk_pacing_rate = rate;
+ }
+@@ -838,6 +845,7 @@ static void bbr_init(struct sock *sk)
+ 
+       minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+ 
++      bbr->has_seen_rtt = 0;
+       bbr_init_pacing_rate_from_rtt(sk);
+ 
+       bbr->restore_cwnd = 0;
diff --git a/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch b/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch

new file mode 100644 (file)

index 0000000..7810ebb
--- /dev/null
+++ b/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch
@@ -0,0 +1,55 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:22 -0400
+Subject: tcp_bbr: introduce bbr_bw_to_pacing_rate() helper
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit f19fd62dafaf1ed6cf615dba655b82fa9df59074 ]
+
+Introduce a helper to convert a BBR bandwidth and gain factor to a
+pacing rate in bytes per second. This is a pure refactor, but is
+needed for two following fixes.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -212,6 +212,16 @@ static u64 bbr_rate_bytes_per_sec(struct
+       return rate >> BW_SCALE;
+ }
+ 
++/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
++static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
++{
++      u64 rate = bw;
++
++      rate = bbr_rate_bytes_per_sec(sk, rate, gain);
++      rate = min_t(u64, rate, sk->sk_max_pacing_rate);
++      return rate;
++}
++
+ /* Pace using current bw estimate and a gain factor. In order to help drive the
+  * network toward lower queues while maintaining high utilization and low
+  * latency, the average pacing rate aims to be slightly (~1%) lower than the
+@@ -221,10 +231,8 @@ static u64 bbr_rate_bytes_per_sec(struct
+  */
+ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+ {
+-      u64 rate = bw;
++      u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
+ 
+-      rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+-      rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+       if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
+               sk->sk_pacing_rate = rate;
+ }
diff --git a/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch b/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch

new file mode 100644 (file)

index 0000000..8d567ac
--- /dev/null
+++ b/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch
@@ -0,0 +1,71 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:23 -0400
+Subject: tcp_bbr: introduce bbr_init_pacing_rate_from_rtt() helper
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 79135b89b8af304456bd67916b80116ddf03d7b6 ]
+
+Introduce a helper to initialize the BBR pacing rate unconditionally,
+based on the current cwnd and RTT estimate. This is a pure refactor,
+but is needed for two following fixes.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |   23 ++++++++++++++++++-----
+ 1 file changed, 18 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -222,6 +222,23 @@ static u32 bbr_bw_to_pacing_rate(struct
+       return rate;
+ }
+ 
++/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
++static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
++{
++      struct tcp_sock *tp = tcp_sk(sk);
++      u64 bw;
++      u32 rtt_us;
++
++      if (tp->srtt_us) {              /* any RTT sample yet? */
++              rtt_us = max(tp->srtt_us >> 3, 1U);
++      } else {                         /* no RTT sample yet */
++              rtt_us = USEC_PER_MSEC;  /* use nominal default RTT */
++      }
++      bw = (u64)tp->snd_cwnd * BW_UNIT;
++      do_div(bw, rtt_us);
++      sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
++}
++
+ /* Pace using current bw estimate and a gain factor. In order to help drive the
+  * network toward lower queues while maintaining high utilization and low
+  * latency, the average pacing rate aims to be slightly (~1%) lower than the
+@@ -806,7 +823,6 @@ static void bbr_init(struct sock *sk)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct bbr *bbr = inet_csk_ca(sk);
+-      u64 bw;
+ 
+       bbr->prior_cwnd = 0;
+       bbr->tso_segs_goal = 0;  /* default segs per skb until first ACK */
+@@ -822,11 +838,8 @@ static void bbr_init(struct sock *sk)
+ 
+       minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+ 
+-      /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+-      bw = (u64)tp->snd_cwnd * BW_UNIT;
+-      do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
+       sk->sk_pacing_rate = 0;         /* force an update of sk_pacing_rate */
+-      bbr_set_pacing_rate(sk, bw, bbr_high_gain);
++      bbr_init_pacing_rate_from_rtt(sk);
+ 
+       bbr->restore_cwnd = 0;
+       bbr->round_start = 0;
diff --git a/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch b/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch

new file mode 100644 (file)

index 0000000..dbd33a5
--- /dev/null
+++ b/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch
@@ -0,0 +1,40 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 14 Jul 2017 17:49:24 -0400
+Subject: tcp_bbr: remove sk_pacing_rate=0 transient during init
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 ]
+
+Fix a corner case noticed by Eric Dumazet, where BBR's setting
+sk->sk_pacing_rate to 0 during initialization could theoretically
+cause packets in the sending host to hang if there were packets "in
+flight" in the pacing infrastructure at the time the BBR congestion
+control state is initialized. This could occur if the pacing
+infrastructure happened to race with bbr_init() in a way such that the
+pacer read the 0 rather than the immediately following non-zero pacing
+rate.
+
+Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_bbr.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/ipv4/tcp_bbr.c
++++ b/net/ipv4/tcp_bbr.c
+@@ -838,7 +838,6 @@ static void bbr_init(struct sock *sk)
+ 
+       minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+ 
+-      sk->sk_pacing_rate = 0;         /* force an update of sk_pacing_rate */
+       bbr_init_pacing_rate_from_rtt(sk);
+ 
+       bbr->restore_cwnd = 0;
diff --git a/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch b/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch

new file mode 100644 (file)

index 0000000..72c34f3
--- /dev/null
+++ b/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch
@@ -0,0 +1,127 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 27 Jul 2017 14:45:09 +0200
+Subject: udp6: fix socket leak on early demux
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+
+[ Upstream commit c9f2c1ae123a751d4e4f949144500219354d5ee1 ]
+
+When an early demuxed packet reaches __udp6_lib_lookup_skb(), the
+sk reference is retrieved and used, but the relevant reference
+count is leaked and the socket destructor is never called.
+Beyond leaking the sk memory, if there are pending UDP packets
+in the receive queue, even the related accounted memory is leaked.
+
+In the long run, this will cause persistent forward allocation errors
+and no UDP skbs (both ipv4 and ipv6) will be able to reach the
+user-space.
+
+Fix this by explicitly accessing the early demux reference before
+the lookup, and properly decreasing the socket reference count
+after usage.
+
+Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and
+the now obsoleted comment about "socket cache".
+
+The newly added code is derived from the current ipv4 code for the
+similar path.
+
+v1 -> v2:
+  fixed the __udp6_lib_rcv() return code for resubmission,
+  as suggested by Eric
+
+Reported-by: Sam Edwards <CFSworks@gmail.com>
+Reported-by: Marc Haber <mh+netdev@zugschlus.de>
+Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/udp.h |    1 +
+ net/ipv4/udp.c    |    3 ++-
+ net/ipv6/udp.c    |   27 ++++++++++++++++++---------
+ 3 files changed, 21 insertions(+), 10 deletions(-)
+
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -265,6 +265,7 @@ static inline struct sk_buff *skb_recv_u
+ }
+ 
+ void udp_v4_early_demux(struct sk_buff *skb);
++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
+ int udp_get_port(struct sock *sk, unsigned short snum,
+                int (*saddr_cmp)(const struct sock *,
+                                 const struct sock *));
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1762,7 +1762,7 @@ drop:
+ /* For TCP sockets, sk_rx_dst is protected by socket lock
+  * For UDP, we use xchg() to guard against concurrent changes.
+  */
+-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+ {
+       struct dst_entry *old;
+ 
+@@ -2120,6 +2120,7 @@ void udp_destroy_sock(struct sock *sk)
+                       encap_destroy(sk);
+       }
+ }
++EXPORT_SYMBOL(udp_sk_rx_dst_set);
+ 
+ /*
+  *    Socket option code for UDP
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_sk
+                                         struct udp_table *udptable)
+ {
+       const struct ipv6hdr *iph = ipv6_hdr(skb);
+-      struct sock *sk;
+ 
+-      sk = skb_steal_sock(skb);
+-      if (unlikely(sk))
+-              return sk;
+       return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
+                                &iph->daddr, dport, inet6_iif(skb),
+                                udptable, skb);
+@@ -798,6 +794,24 @@ int __udp6_lib_rcv(struct sk_buff *skb,
+       if (udp6_csum_init(skb, uh, proto))
+               goto csum_error;
+ 
++      /* Check if the socket is already available, e.g. due to early demux */
++      sk = skb_steal_sock(skb);
++      if (sk) {
++              struct dst_entry *dst = skb_dst(skb);
++              int ret;
++
++              if (unlikely(sk->sk_rx_dst != dst))
++                      udp_sk_rx_dst_set(sk, dst);
++
++              ret = udpv6_queue_rcv_skb(sk, skb);
++              sock_put(sk);
++
++              /* a return value > 0 means to resubmit the input */
++              if (ret > 0)
++                      return ret;
++              return 0;
++      }
++
+       /*
+        *      Multicast receive code
+        */
+@@ -806,11 +820,6 @@ int __udp6_lib_rcv(struct sk_buff *skb,
+                               saddr, daddr, udptable, proto);
+ 
+       /* Unicast */
+-
+-      /*
+-       * check socket cache ... must talk to Alan about his plans
+-       * for sock caches... i'll skip this for now.
+-       */
+       sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
+       if (sk) {
+               int ret;
diff --git a/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch b/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch

new file mode 100644 (file)

index 0000000..973d710
--- /dev/null
+++ b/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch
@@ -0,0 +1,61 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: "Michael S. Tsirkin" <mst@redhat.com>
+Date: Mon, 31 Jul 2017 21:49:49 +0300
+Subject: virtio_net: fix truesize for mergeable buffers
+
+From: "Michael S. Tsirkin" <mst@redhat.com>
+
+
+[ Upstream commit 1daa8790d0280d2c719658e39bd59fce65efa909 ]
+
+Seth Forshee noticed a performance degradation with some workloads.
+This turns out to be due to packet drops.  Euan Kemp noticed that this
+is because we drop all packets where length exceeds the truesize, but
+for some packets we add in extra memory without updating the truesize.
+This in turn was kept around unchanged from ab7db91705e95 ("virtio-net:
+auto-tune mergeable rx buffer size for improved performance").  That
+commit had an internal reason not to account for the extra space: not
+enough bits to do it.  No longer true so let's account for the allocated
+length exactly.
+
+Many thanks to Seth Forshee for the report and bisecting and Euan Kemp
+for debugging the issue.
+
+Fixes: 680557cf79f8 ("virtio_net: rework mergeable buffer handling")
+Reported-by: Euan Kemp <euan.kemp@coreos.com>
+Tested-by: Euan Kemp <euan.kemp@coreos.com>
+Reported-by: Seth Forshee <seth.forshee@canonical.com>
+Tested-by: Seth Forshee <seth.forshee@canonical.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -889,21 +889,20 @@ static int add_recvbuf_mergeable(struct
+ 
+       buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+       buf += headroom; /* advance address leaving hole at front of pkt */
+-      ctx = (void *)(unsigned long)len;
+       get_page(alloc_frag->page);
+       alloc_frag->offset += len + headroom;
+       hole = alloc_frag->size - alloc_frag->offset;
+       if (hole < len + headroom) {
+               /* To avoid internal fragmentation, if there is very likely not
+                * enough space for another buffer, add the remaining space to
+-               * the current buffer. This extra space is not included in
+-               * the truesize stored in ctx.
++               * the current buffer.
+                */
+               len += hole;
+               alloc_frag->offset += hole;
+       }
+ 
+       sg_init_one(rq->sg, buf, len);
++      ctx = (void *)(unsigned long)len;
+       err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
+       if (err < 0)
+               put_page(virt_to_head_page(buf));
diff --git a/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch b/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch

new file mode 100644 (file)

index 0000000..d5ccacc
--- /dev/null
+++ b/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch
@@ -0,0 +1,34 @@
+From foo@baz Tue Aug  8 16:27:29 PDT 2017
+From: "Levin, Alexander" <alexander.levin@verizon.com>
+Date: Tue, 18 Jul 2017 04:23:16 +0000
+Subject: wireless: wext: terminate ifr name coming from userspace
+
+From: "Levin, Alexander" <alexander.levin@verizon.com>
+
+
+[ Upstream commit 98de4e0ea47d106846fc0e30ce4e644283fa7fc2 ]
+
+ifr name is assumed to be a valid string by the kernel, but nothing
+was forcing username to pass a valid string.
+
+In turn, this would cause panics as we tried to access the string
+past it's valid memory.
+
+Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev_ioctl.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/dev_ioctl.c
++++ b/net/core/dev_ioctl.c
+@@ -423,6 +423,8 @@ int dev_ioctl(struct net *net, unsigned
+               if (copy_from_user(&iwr, arg, sizeof(iwr)))
+                       return -EFAULT;
+ 
++              iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
++
+               return wext_handle_ioctl(net, &iwr, cmd, arg);
+       }
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Tue, 8 Aug 2017 23:29:05 +0000 (16:29 -0700)
queue-4.12/bonding-commit-link-status-change-after-propose.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-change-1pps-out-scheme.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/series		patch \| blob \| blame \| history
queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/sparc64-register-hugepages-during-arch-init.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/udp6-fix-socket-leak-on-early-demux.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch	[new file with mode: 0644]	patch \| blob
queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch	[new file with mode: 0644]	patch \| blob