From 074627ead254b30d2e508b98c3bcd6736d82dbd5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Aug 2017 16:29:05 -0700 Subject: [PATCH] 4.12-stable patches added patches: bonding-commit-link-status-change-after-propose.patch dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch net-mlx5e-add-field-select-to-mtpps-register.patch net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch net-mlx5e-change-1pps-out-scheme.patch net-mlx5e-fix-broken-disable-1pps-flow.patch net-mlx5e-fix-outer_header_zero-check-size.patch net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch net-zero-terminate-ifr_name-in-dev_ifname.patch openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch sparc64-prevent-perf-from-running-during-super-critical-sections.patch sparc64-register-hugepages-during-arch-init.patch tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch udp6-fix-socket-leak-on-early-demux.patch virtio_net-fix-truesize-for-mergeable-buffers.patch wireless-wext-terminate-ifr-name-coming-from-userspace.patch --- ...mit-link-status-change-after-propose.patch | 45 +++ ...mleak-for-dccp_feat_init-err-process.patch | 38 ++ ...dccp_ipv4-doesn-t-put-reqsk-properly.patch | 33 ++ ...dccp_ipv6-doesn-t-put-reqsk-properly.patch | 51 +++ ...inter-deref-during-fib_sync_down_dev.patch | 48 +++ ...ior-to-register_netdev_notifier-call.patch | 71 ++++ ...ialize-treq-txhash-in-cookie_v_check.patch | 146 ++++++++ ...low-of-offset-in-ip6_find_1stfragopt.patch | 55 +++ ..._mib_fragfails-twice-in-ip6_fragment.patch | 54 +++ ...on-when-config_vmap_stack-is-enabled.patch | 81 +++++ ...t-load-balancing-in-balance-alb-mode.patch | 43 +++ ...add-missing-arl-entries-for-bcm53125.patch | 33 ++ ...nable-cmode-config-support-for-6390x.patch | 38 ++ ...handle-all-4-rgmii-modes-identically.patch | 51 +++ ...der-tx_enabled-in-all-modes-on-remap.patch | 64 ++++ ...-on-command-entry-allocation-failure.patch | 66 ++++ ...ter-timeout-access-invalid-structure.patch | 52 +++ ...rules-call-with-correct-num-of-dests.patch | 41 +++ ...x5_ifc_mtpps_reg_bits-structure-size.patch | 31 ++ ...e-add-field-select-to-mtpps-register.patch | 165 +++++++++ ...-support-for-ptp_clk_req_pps-request.patch | 95 +++++ .../net-mlx5e-change-1pps-out-scheme.patch | 236 ++++++++++++ ...t-mlx5e-fix-broken-disable-1pps-flow.patch | 137 +++++++ ...x5e-fix-outer_header_zero-check-size.patch | 38 ++ ...lation-for-overflow-check-scheduling.patch | 33 ++ ...modify-add-remove-underlay-qpn-flows.patch | 97 +++++ ...erflow-check-work-to-mlx5e-workqueue.patch | 58 +++ ...ocess-phy_halted-in-phy_stop_machine.patch | 43 +++ ...ero-terminate-ifr_name-in-dev_ifname.patch | 28 ++ ...tial-out-of-bound-access-in-parse_ct.patch | 46 +++ ...e-in-prb_retire_rx_blk_timer_expired.patch | 60 ++++ ...e-notifications-for-changeaddr-event.patch | 38 ++ ...-more-memory-for-dev_set_mac_address.patch | 38 ++ ...re-leaving-_sctp_walk_-params-errors.patch | 140 ++++++++ ...overflow-when-all-ext-chunks-are-set.patch | 50 +++ ...tp_walk_params-and-_sctp_walk_errors.patch | 59 +++ queue-4.12/series | 48 +++ ...on-handling-in-ultrasparc-iii-memcpy.patch | 48 +++ ...progress-to-avoid-send-mondo-timeout.patch | 335 ++++++++++++++++++ ...nning-during-super-critical-sections.patch | 137 +++++++ ...-register-hugepages-during-arch-init.patch | 77 ++++ ...-cut-pacing-rate-only-if-filled-pipe.patch | 48 +++ ...init-pacing-rate-on-first-rtt-sample.patch | 76 ++++ ...troduce-bbr_bw_to_pacing_rate-helper.patch | 55 +++ ...bbr_init_pacing_rate_from_rtt-helper.patch | 71 ++++ ..._pacing_rate-0-transient-during-init.patch | 40 +++ .../udp6-fix-socket-leak-on-early-demux.patch | 127 +++++++ ...t-fix-truesize-for-mergeable-buffers.patch | 61 ++++ ...inate-ifr-name-coming-from-userspace.patch | 34 ++ 49 files changed, 3559 insertions(+) create mode 100644 queue-4.12/bonding-commit-link-status-change-after-propose.patch create mode 100644 queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch create mode 100644 queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch create mode 100644 queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch create mode 100644 queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch create mode 100644 queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch create mode 100644 queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch create mode 100644 queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch create mode 100644 queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch create mode 100644 queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch create mode 100644 queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch create mode 100644 queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch create mode 100644 queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch create mode 100644 queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch create mode 100644 queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch create mode 100644 queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch create mode 100644 queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch create mode 100644 queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch create mode 100644 queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch create mode 100644 queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch create mode 100644 queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch create mode 100644 queue-4.12/net-mlx5e-change-1pps-out-scheme.patch create mode 100644 queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch create mode 100644 queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch create mode 100644 queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch create mode 100644 queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch create mode 100644 queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch create mode 100644 queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch create mode 100644 queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch create mode 100644 queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch create mode 100644 queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch create mode 100644 queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch create mode 100644 queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch create mode 100644 queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch create mode 100644 queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch create mode 100644 queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch create mode 100644 queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch create mode 100644 queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch create mode 100644 queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch create mode 100644 queue-4.12/sparc64-register-hugepages-during-arch-init.patch create mode 100644 queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch create mode 100644 queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch create mode 100644 queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch create mode 100644 queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch create mode 100644 queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch create mode 100644 queue-4.12/udp6-fix-socket-leak-on-early-demux.patch create mode 100644 queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch create mode 100644 queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch diff --git a/queue-4.12/bonding-commit-link-status-change-after-propose.patch b/queue-4.12/bonding-commit-link-status-change-after-propose.patch new file mode 100644 index 00000000000..6c15a523b5a --- /dev/null +++ b/queue-4.12/bonding-commit-link-status-change-after-propose.patch @@ -0,0 +1,45 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: WANG Cong +Date: Tue, 25 Jul 2017 09:44:25 -0700 +Subject: bonding: commit link status change after propose + +From: WANG Cong + + +[ Upstream commit d94708a553022bf012fa95af10532a134eeb5a52 ] + +Commit de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring") +moves link status commitment into bond_mii_monitor(), but it still relies +on the return value of bond_miimon_inspect() as the hint. We need to return +non-zero as long as we propose a link status change. + +Fixes: de77ecd4ef02 ("bonding: improve link-status update in mii-monitoring") +Reported-by: Benjamin Gilbert +Tested-by: Benjamin Gilbert +Cc: Mahesh Bandewar +Signed-off-by: Cong Wang +Acked-by: Mahesh Bandewar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -2047,6 +2047,7 @@ static int bond_miimon_inspect(struct bo + continue; + + bond_propose_link_state(slave, BOND_LINK_FAIL); ++ commit++; + slave->delay = bond->params.downdelay; + if (slave->delay) { + netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", +@@ -2085,6 +2086,7 @@ static int bond_miimon_inspect(struct bo + continue; + + bond_propose_link_state(slave, BOND_LINK_BACK); ++ commit++; + slave->delay = bond->params.updelay; + + if (slave->delay) { diff --git a/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch b/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch new file mode 100644 index 00000000000..f74c97efdbf --- /dev/null +++ b/queue-4.12/dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:20:15 +0800 +Subject: dccp: fix a memleak for dccp_feat_init err process + +From: Xin Long + + +[ Upstream commit e90ce2fc27cad7e7b1e72b9e66201a7a4c124c2b ] + +In dccp_feat_init, when ccid_get_builtin_ccids failsto alloc +memory for rx.val, it should free tx.val before returning an +error. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/feat.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/dccp/feat.c ++++ b/net/dccp/feat.c +@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) + * singleton values (which always leads to failure). + * These settings can still (later) be overridden via sockopts. + */ +- if (ccid_get_builtin_ccids(&tx.val, &tx.len) || +- ccid_get_builtin_ccids(&rx.val, &rx.len)) ++ if (ccid_get_builtin_ccids(&tx.val, &tx.len)) + return -ENOBUFS; ++ if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { ++ kfree(tx.val); ++ return -ENOBUFS; ++ } + + if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) diff --git a/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch new file mode 100644 index 00000000000..2deeb88338a --- /dev/null +++ b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:19:46 +0800 +Subject: dccp: fix a memleak that dccp_ipv4 doesn't put reqsk properly + +From: Xin Long + + +[ Upstream commit b7953d3c0e30a5fc944f6b7bd0bcceb0794bcd85 ] + +The patch "dccp: fix a memleak that dccp_ipv6 doesn't put reqsk +properly" fixed reqsk refcnt leak for dccp_ipv6. The same issue +exists on dccp_ipv4. + +This patch is to fix it for dccp_ipv4. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv4.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); ++ reqsk_put(req); + return 0; + + drop_and_free: diff --git a/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch new file mode 100644 index 00000000000..453e4623f9c --- /dev/null +++ b/queue-4.12/dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 14:19:09 +0800 +Subject: dccp: fix a memleak that dccp_ipv6 doesn't put reqsk properly + +From: Xin Long + + +[ Upstream commit 0c2232b0a71db0ac1d22f751aa1ac0cadb950fd2 ] + +In dccp_v6_conn_request, after reqsk gets alloced and hashed into +ehash table, reqsk's refcnt is set 3. one is for req->rsk_timer, +one is for hlist, and the other one is for current using. + +The problem is when dccp_v6_conn_request returns and finishes using +reqsk, it doesn't put reqsk. This will cause reqsk refcnt leaks and +reqsk obj never gets freed. + +Jianlin found this issue when running dccp_memleak.c in a loop, the +system memory would run out. + +dccp_memleak.c: + int s1 = socket(PF_INET6, 6, IPPROTO_IP); + bind(s1, &sa1, 0x20); + listen(s1, 0x9); + int s2 = socket(PF_INET6, 6, IPPROTO_IP); + connect(s2, &sa1, 0x20); + close(s1); + close(s2); + +This patch is to put the reqsk before dccp_v6_conn_request returns, +just as what tcp_conn_request does. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ipv6.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct s + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); ++ reqsk_put(req); + return 0; + + drop_and_free: diff --git a/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch b/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch new file mode 100644 index 00000000000..0177ac23370 --- /dev/null +++ b/queue-4.12/ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Ido Schimmel +Date: Fri, 28 Jul 2017 23:27:44 +0300 +Subject: ipv4: fib: Fix NULL pointer deref during fib_sync_down_dev() + +From: Ido Schimmel + + +[ Upstream commit 71ed7ee35ad2c5300f4b51634185a0193b4fb0fa ] + +Michał reported a NULL pointer deref during fib_sync_down_dev() when +unregistering a netdevice. The problem is that we don't check for +'in_dev' being NULL, which can happen in very specific cases. + +Usually routes are flushed upon NETDEV_DOWN sent in either the netdev or +the inetaddr notification chains. However, if an interface isn't +configured with any IP address, then it's possible for host routes to be +flushed following NETDEV_UNREGISTER, after NULLing dev->ip_ptr in +inetdev_destroy(). + +To reproduce: +$ ip link add type dummy +$ ip route add local 1.1.1.0/24 dev dummy0 +$ ip link del dev dummy0 + +Fix this by checking for the presence of 'in_dev' before referencing it. + +Fixes: 982acb97560c ("ipv4: fib: Notify about nexthop status changes") +Signed-off-by: Ido Schimmel +Reported-by: Michał Mirosław +Tested-by: Michał Mirosław +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1372,7 +1372,7 @@ static int call_fib_nh_notifiers(struct + return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); + case FIB_EVENT_NH_DEL: +- if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && ++ if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + fib_nh->nh_flags & RTNH_F_LINKDOWN) || + (fib_nh->nh_flags & RTNH_F_DEAD)) + return call_fib_notifiers(dev_net(fib_nh->nh_dev), diff --git a/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch b/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch new file mode 100644 index 00000000000..b8513b4c864 --- /dev/null +++ b/queue-4.12/ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch @@ -0,0 +1,71 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Mahesh Bandewar +Date: Wed, 19 Jul 2017 15:41:33 -0700 +Subject: ipv4: initialize fib_trie prior to register_netdev_notifier call. + +From: Mahesh Bandewar + + +[ Upstream commit 8799a221f5944a7d74516ecf46d58c28ec1d1f75 ] + +Net stack initialization currently initializes fib-trie after the +first call to netdevice_notifier() call. In fact fib_trie initialization +needs to happen before first rtnl_register(). It does not cause any problem +since there are no devices UP at this moment, but trying to bring 'lo' +UP at initialization would make this assumption wrong and exposes the issue. + +Fixes following crash + + Call Trace: + ? alternate_node_alloc+0x76/0xa0 + fib_table_insert+0x1b7/0x4b0 + fib_magic.isra.17+0xea/0x120 + fib_add_ifaddr+0x7b/0x190 + fib_netdev_event+0xc0/0x130 + register_netdevice_notifier+0x1c1/0x1d0 + ip_fib_init+0x72/0x85 + ip_rt_init+0x187/0x1e9 + ip_init+0xe/0x1a + inet_init+0x171/0x26c + ? ipv4_offload_init+0x66/0x66 + do_one_initcall+0x43/0x160 + kernel_init_freeable+0x191/0x219 + ? rest_init+0x80/0x80 + kernel_init+0xe/0x150 + ret_from_fork+0x22/0x30 + Code: f6 46 23 04 74 86 4c 89 f7 e8 ae 45 01 00 49 89 c7 4d 85 ff 0f 85 7b ff ff ff 31 db eb 08 4c 89 ff e8 16 47 01 00 48 8b 44 24 38 <45> 8b 6e 14 4d 63 76 74 48 89 04 24 0f 1f 44 00 00 48 83 c4 08 + RIP: kmem_cache_alloc+0xcf/0x1c0 RSP: ffff9b1500017c28 + CR2: 0000000000000014 + +Fixes: 7b1a74fdbb9e ("[NETNS]: Refactor fib initialization so it can handle multiple namespaces.") +Fixes: 7f9b80529b8a ("[IPV4]: fib hash|trie initialization") + +Signed-off-by: Mahesh Bandewar +Acked-by: "Eric W. Biederman" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1327,13 +1327,14 @@ static struct pernet_operations fib_net_ + + void __init ip_fib_init(void) + { +- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); +- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); +- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); ++ fib_trie_init(); + + register_pernet_subsys(&fib_net_ops); ++ + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); + +- fib_trie_init(); ++ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); ++ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); ++ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + } diff --git a/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch b/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch new file mode 100644 index 00000000000..10d9d17e1a4 --- /dev/null +++ b/queue-4.12/ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch @@ -0,0 +1,146 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Alexander Potapenko +Date: Mon, 17 Jul 2017 12:35:58 +0200 +Subject: ipv4: ipv6: initialize treq->txhash in cookie_v[46]_check() + +From: Alexander Potapenko + + +[ Upstream commit 18bcf2907df935981266532e1e0d052aff2e6fae ] + +KMSAN reported use of uninitialized memory in skb_set_hash_from_sk(), +which originated from the TCP request socket created in +cookie_v6_check(): + + ================================================================== + BUG: KMSAN: use of uninitialized memory in tcp_transmit_skb+0xf77/0x3ec0 + CPU: 1 PID: 2949 Comm: syz-execprog Not tainted 4.11.0-rc5+ #2931 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + TCP: request_sock_TCPv6: Possible SYN flooding on port 20028. Sending cookies. Check SNMP counters. + Call Trace: + + __dump_stack lib/dump_stack.c:16 + dump_stack+0x172/0x1c0 lib/dump_stack.c:52 + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 + skb_set_hash_from_sk ./include/net/sock.h:2011 + tcp_transmit_skb+0xf77/0x3ec0 net/ipv4/tcp_output.c:983 + tcp_send_ack+0x75b/0x830 net/ipv4/tcp_output.c:3493 + tcp_delack_timer_handler+0x9a6/0xb90 net/ipv4/tcp_timer.c:284 + tcp_delack_timer+0x1b0/0x310 net/ipv4/tcp_timer.c:309 + call_timer_fn+0x240/0x520 kernel/time/timer.c:1268 + expire_timers kernel/time/timer.c:1307 + __run_timers+0xc13/0xf10 kernel/time/timer.c:1601 + run_timer_softirq+0x36/0xa0 kernel/time/timer.c:1614 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + invoke_softirq kernel/softirq.c:364 + irq_exit+0x1fa/0x230 kernel/softirq.c:405 + exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:657 + smp_apic_timer_interrupt+0x5a/0x80 arch/x86/kernel/apic/apic.c:966 + apic_timer_interrupt+0x86/0x90 arch/x86/entry/entry_64.S:489 + RIP: 0010:native_restore_fl ./arch/x86/include/asm/irqflags.h:36 + RIP: 0010:arch_local_irq_restore ./arch/x86/include/asm/irqflags.h:77 + RIP: 0010:__msan_poison_alloca+0xed/0x120 mm/kmsan/kmsan_instr.c:440 + RSP: 0018:ffff880024917cd8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff10 + RAX: 0000000000000246 RBX: ffff8800224c0000 RCX: 0000000000000005 + RDX: 0000000000000004 RSI: ffff880000000000 RDI: ffffea0000b6d770 + RBP: ffff880024917d58 R08: 0000000000000dd8 R09: 0000000000000004 + R10: 0000160000000000 R11: 0000000000000000 R12: ffffffff85abf810 + R13: ffff880024917dd8 R14: 0000000000000010 R15: ffffffff81cabde4 + + poll_select_copy_remaining+0xac/0x6b0 fs/select.c:293 + SYSC_select+0x4b4/0x4e0 fs/select.c:653 + SyS_select+0x76/0xa0 fs/select.c:634 + entry_SYSCALL_64_fastpath+0x13/0x94 arch/x86/entry/entry_64.S:204 + RIP: 0033:0x4597e7 + RSP: 002b:000000c420037ee0 EFLAGS: 00000246 ORIG_RAX: 0000000000000017 + RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004597e7 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + RBP: 000000c420037ef0 R08: 000000c420037ee0 R09: 0000000000000059 + R10: 0000000000000000 R11: 0000000000000246 R12: 000000000042dc20 + R13: 00000000000000f3 R14: 0000000000000030 R15: 0000000000000003 + chained origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_save_stack mm/kmsan/kmsan.c:317 + kmsan_internal_chain_origin+0x12a/0x1f0 mm/kmsan/kmsan.c:547 + __msan_store_shadow_origin_4+0xac/0x110 mm/kmsan/kmsan_instr.c:259 + tcp_create_openreq_child+0x709/0x1ae0 net/ipv4/tcp_minisocks.c:472 + tcp_v6_syn_recv_sock+0x7eb/0x2a30 net/ipv6/tcp_ipv6.c:1103 + tcp_get_cookie_sock+0x136/0x5f0 net/ipv4/syncookies.c:212 + cookie_v6_check+0x17a9/0x1b50 net/ipv6/syncookies.c:245 + tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 + tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 + tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 + kmsan_kmalloc+0x7f/0xe0 mm/kmsan/kmsan.c:337 + kmem_cache_alloc+0x1c2/0x1e0 mm/slub.c:2766 + reqsk_alloc ./include/net/request_sock.h:87 + inet_reqsk_alloc+0xa4/0x5b0 net/ipv4/tcp_input.c:6200 + cookie_v6_check+0x4f4/0x1b50 net/ipv6/syncookies.c:169 + tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:989 + tcp_v6_do_rcv+0xdd8/0x1c60 net/ipv6/tcp_ipv6.c:1298 + tcp_v6_rcv+0x41a3/0x4f00 net/ipv6/tcp_ipv6.c:1487 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + ================================================================== + +Similar error is reported for cookie_v4_check(). + +Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets") +Signed-off-by: Alexander Potapenko +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/syncookies.c | 1 + + net/ipv6/syncookies.c | 1 + + 2 files changed, 2 insertions(+) + +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -332,6 +332,7 @@ struct sock *cookie_v4_check(struct sock + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; + treq->ts_off = 0; ++ treq->txhash = net_tx_rndhash(); + req->mss = mss; + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; +--- a/net/ipv6/syncookies.c ++++ b/net/ipv6/syncookies.c +@@ -215,6 +215,7 @@ struct sock *cookie_v6_check(struct sock + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = cookie; + treq->ts_off = 0; ++ treq->txhash = net_tx_rndhash(); + + /* + * We need to lookup the dst_entry to get the correct window size. diff --git a/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch b/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch new file mode 100644 index 00000000000..f23e8769049 --- /dev/null +++ b/queue-4.12/ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch @@ -0,0 +1,55 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Sabrina Dubroca +Date: Wed, 19 Jul 2017 22:28:55 +0200 +Subject: ipv6: avoid overflow of offset in ip6_find_1stfragopt + +From: Sabrina Dubroca + + +[ Upstream commit 6399f1fae4ec29fab5ec76070435555e256ca3a6 ] + +In some cases, offset can overflow and can cause an infinite loop in +ip6_find_1stfragopt(). Make it unsigned int to prevent the overflow, and +cap it at IPV6_MAXPLEN, since packets larger than that should be invalid. + +This problem has been here since before the beginning of git history. + +Signed-off-by: Sabrina Dubroca +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/output_core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/ipv6/output_core.c ++++ b/net/ipv6/output_core.c +@@ -78,7 +78,7 @@ EXPORT_SYMBOL(ipv6_select_ident); + + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) + { +- u16 offset = sizeof(struct ipv6hdr); ++ unsigned int offset = sizeof(struct ipv6hdr); + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); + int found_rhdr = 0; +@@ -86,6 +86,7 @@ int ip6_find_1stfragopt(struct sk_buff * + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; ++ unsigned int len; + + switch (**nexthdr) { + +@@ -111,7 +112,10 @@ int ip6_find_1stfragopt(struct sk_buff * + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); +- offset += ipv6_optlen(exthdr); ++ len = ipv6_optlen(exthdr); ++ if (len + offset >= IPV6_MAXPLEN) ++ return -EINVAL; ++ offset += len; + *nexthdr = &exthdr->nexthdr; + } + diff --git a/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch b/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch new file mode 100644 index 00000000000..99dbd53b5eb --- /dev/null +++ b/queue-4.12/ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch @@ -0,0 +1,54 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Stefano Brivio +Date: Mon, 24 Jul 2017 23:14:28 +0200 +Subject: ipv6: Don't increase IPSTATS_MIB_FRAGFAILS twice in ip6_fragment() + +From: Stefano Brivio + + +[ Upstream commit afce615aaabfbaad02550e75c0bec106dafa1adf ] + +RFC 2465 defines ipv6IfStatsOutFragFails as: + + "The number of IPv6 datagrams that have been discarded + because they needed to be fragmented at this output + interface but could not be." + +The existing implementation, instead, would increase the counter +twice in case we fail to allocate room for single fragments: +once for the fragment, once for the datagram. + +This didn't look intentional though. In one of the two affected +affected failure paths, the double increase was simply a result +of a new 'goto fail' statement, introduced to avoid a skb leak. +The other path appears to be affected since at least 2.6.12-rc2. + +Reported-by: Sabrina Dubroca +Fixes: 1d325d217c7f ("ipv6: ip6_fragment: fix headroom tests and skb leak") +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct + *prevhdr = NEXTHDR_FRAGMENT; + tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); + if (!tmp_hdr) { +- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), +- IPSTATS_MIB_FRAGFAILS); + err = -ENOMEM; + goto fail; + } +@@ -793,8 +791,6 @@ slow_path: + frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC); + if (!frag) { +- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), +- IPSTATS_MIB_FRAGFAILS); + err = -ENOMEM; + goto fail; + } diff --git a/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch b/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch new file mode 100644 index 00000000000..bb6f8ff4981 --- /dev/null +++ b/queue-4.12/mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch @@ -0,0 +1,81 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Thomas Jarosch +Date: Sat, 22 Jul 2017 17:14:34 +0200 +Subject: mcs7780: Fix initialization when CONFIG_VMAP_STACK is enabled + +From: Thomas Jarosch + + +[ Upstream commit 9476d393667968b4a02afbe9d35a3558482b943e ] + +DMA transfers are not allowed to buffers that are on the stack. +Therefore allocate a buffer to store the result of usb_control_message(). + +Fixes these bugreports: +https://bugzilla.kernel.org/show_bug.cgi?id=195217 + +https://bugzilla.redhat.com/show_bug.cgi?id=1421387 +https://bugzilla.redhat.com/show_bug.cgi?id=1427398 + +Shortened kernel backtrace from 4.11.9-200.fc25.x86_64: +kernel: ------------[ cut here ]------------ +kernel: WARNING: CPU: 3 PID: 2957 at drivers/usb/core/hcd.c:1587 +kernel: transfer buffer not dma capable +kernel: Call Trace: +kernel: dump_stack+0x63/0x86 +kernel: __warn+0xcb/0xf0 +kernel: warn_slowpath_fmt+0x5a/0x80 +kernel: usb_hcd_map_urb_for_dma+0x37f/0x570 +kernel: ? try_to_del_timer_sync+0x53/0x80 +kernel: usb_hcd_submit_urb+0x34e/0xb90 +kernel: ? schedule_timeout+0x17e/0x300 +kernel: ? del_timer_sync+0x50/0x50 +kernel: ? __slab_free+0xa9/0x300 +kernel: usb_submit_urb+0x2f4/0x560 +kernel: ? urb_destroy+0x24/0x30 +kernel: usb_start_wait_urb+0x6e/0x170 +kernel: usb_control_msg+0xdc/0x120 +kernel: mcs_get_reg+0x36/0x40 [mcs7780] +kernel: mcs_net_open+0xb5/0x5c0 [mcs7780] +... + +Regression goes back to 4.9, so it's a good candidate for -stable. +Though it's the decision of the maintainer. + +Thanks to Dan Williams for adding the "transfer buffer not dma capable" +warning in the first place. It instantly pointed me in the right direction. + +Patch has been tested with transferring data from a Polar watch. + +Signed-off-by: Thomas Jarosch +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/irda/mcs7780.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/net/irda/mcs7780.c ++++ b/drivers/net/irda/mcs7780.c +@@ -141,9 +141,19 @@ static int mcs_set_reg(struct mcs_cb *mc + static int mcs_get_reg(struct mcs_cb *mcs, __u16 reg, __u16 * val) + { + struct usb_device *dev = mcs->usbdev; +- int ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, +- MCS_RD_RTYPE, 0, reg, val, 2, +- msecs_to_jiffies(MCS_CTRL_TIMEOUT)); ++ void *dmabuf; ++ int ret; ++ ++ dmabuf = kmalloc(sizeof(__u16), GFP_KERNEL); ++ if (!dmabuf) ++ return -ENOMEM; ++ ++ ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ, ++ MCS_RD_RTYPE, 0, reg, dmabuf, 2, ++ msecs_to_jiffies(MCS_CTRL_TIMEOUT)); ++ ++ memcpy(val, dmabuf, sizeof(__u16)); ++ kfree(dmabuf); + + return ret; + } diff --git a/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch b/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch new file mode 100644 index 00000000000..8414823995e --- /dev/null +++ b/queue-4.12/net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Kosuke Tatsukawa +Date: Thu, 20 Jul 2017 05:20:40 +0000 +Subject: net: bonding: Fix transmit load balancing in balance-alb mode + +From: Kosuke Tatsukawa + + +[ Upstream commit cbf5ecb305601d063dc94a57680dfbc3f96c188d ] + +balance-alb mode used to have transmit dynamic load balancing feature +enabled by default. However, transmit dynamic load balancing no longer +works in balance-alb after commit 8b426dc54cf4 ("bonding: remove +hardcoded value"). + +Both balance-tlb and balance-alb use the function bond_do_alb_xmit() to +send packets. This function uses the parameter tlb_dynamic_lb. +tlb_dynamic_lb used to have the default value of 1 for balance-alb, but +now the value is set to 0 except in balance-tlb. + +Re-enable transmit dyanmic load balancing by initializing tlb_dynamic_lb +for balance-alb similar to balance-tlb. + +Fixes: 8b426dc54cf4 ("bonding: remove hardcoded value") +Signed-off-by: Kosuke Tatsukawa +Acked-by: Andy Gospodarek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -4598,7 +4598,7 @@ static int bond_check_params(struct bond + } + ad_user_port_key = valptr->value; + +- if (bond_mode == BOND_MODE_TLB) { ++ if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) { + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), + &newval); diff --git a/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch b/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch new file mode 100644 index 00000000000..340113eef89 --- /dev/null +++ b/queue-4.12/net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Florian Fainelli +Date: Thu, 20 Jul 2017 12:25:22 -0700 +Subject: net: dsa: b53: Add missing ARL entries for BCM53125 + +From: Florian Fainelli + + +[ Upstream commit be35e8c516c1915a3035d266a2015b41f73ba3f9 ] + +The BCM53125 entry was missing an arl_entries member which would +basically prevent the ARL search from terminating properly. This switch +has 4 ARL entries, so add that. + +Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/b53/b53_common.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -1668,6 +1668,7 @@ static const struct b53_chip_data b53_sw + .dev_name = "BCM53125", + .vlans = 4096, + .enabled_ports = 0xff, ++ .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, diff --git a/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch b/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch new file mode 100644 index 00000000000..7e89cdc3520 --- /dev/null +++ b/queue-4.12/net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Martin Hundebøll +Date: Wed, 19 Jul 2017 08:17:02 +0200 +Subject: net: dsa: mv88e6xxx: Enable CMODE config support for 6390X + +From: Martin Hundebøll + + +[ Upstream commit bb0a2675f72b458e64f47071e8aabdb225a6af4d ] + +Commit f39908d3b1c45 ('net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 +ports 9 & 10') added support for setting the CMODE for the 6390X family, +but only enabled it for 9290 and 6390 - and left out 6390X. + +Fix support for setting the CMODE on 6390X also by assigning +mv88e6390x_port_set_cmode() to the .port_set_cmode function pointer in +mv88e6390x_ops too. + +Fixes: f39908d3b1c4 ("net: dsa: mv88e6xxx: Set the CMODE for mv88e6390 ports 9 & 10") +Signed-off-by: Martin Hundebøll +Reviewed-by: Andrew Lunn +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/mv88e6xxx/chip.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -3377,6 +3377,7 @@ static const struct mv88e6xxx_ops mv88e6 + .port_jumbo_config = mv88e6165_port_jumbo_config, + .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, + .port_pause_config = mv88e6390_port_pause_config, ++ .port_set_cmode = mv88e6390x_port_set_cmode, + .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, + .port_disable_pri_override = mv88e6xxx_port_disable_pri_override, + .stats_snapshot = mv88e6390_g1_stats_snapshot, diff --git a/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch b/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch new file mode 100644 index 00000000000..4f259fa843e --- /dev/null +++ b/queue-4.12/net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch @@ -0,0 +1,51 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Marc Gonzalez +Date: Tue, 25 Jul 2017 14:35:03 +0200 +Subject: net: ethernet: nb8800: Handle all 4 RGMII modes identically + +From: Marc Gonzalez + + +[ Upstream commit 4813497b537c6208c90d6cbecac5072d347de900 ] + +Before commit bf8f6952a233 ("Add blurb about RGMII") it was unclear +whose responsibility it was to insert the required clock skew, and +in hindsight, some PHY drivers got it wrong. The solution forward +is to introduce a new property, explicitly requiring skew from the +node to which it is attached. In the interim, this driver will handle +all 4 RGMII modes identically (no skew). + +Fixes: 52dfc8301248 ("net: ethernet: add driver for Aurora VLSI NB8800 Ethernet controller") +Signed-off-by: Marc Gonzalez +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/aurora/nb8800.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/aurora/nb8800.c ++++ b/drivers/net/ethernet/aurora/nb8800.c +@@ -609,7 +609,7 @@ static void nb8800_mac_config(struct net + mac_mode |= HALF_DUPLEX; + + if (gigabit) { +- if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII) ++ if (phy_interface_is_rgmii(dev->phydev)) + mac_mode |= RGMII_MODE; + + mac_mode |= GMAC_MODE; +@@ -1268,11 +1268,10 @@ static int nb8800_tangox_init(struct net + break; + + case PHY_INTERFACE_MODE_RGMII: +- pad_mode = PAD_MODE_RGMII; +- break; +- ++ case PHY_INTERFACE_MODE_RGMII_ID: ++ case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: +- pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY; ++ pad_mode = PAD_MODE_RGMII; + break; + + default: diff --git a/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch b/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch new file mode 100644 index 00000000000..4b16ba8259c --- /dev/null +++ b/queue-4.12/net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch @@ -0,0 +1,64 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Aviv Heller +Date: Sun, 2 Jul 2017 19:13:43 +0300 +Subject: net/mlx5: Consider tx_enabled in all modes on remap + +From: Aviv Heller + + +[ Upstream commit dc798b4cc0f2a06e7ad7d522403de274b86a0a6f ] + +The tx_enabled lag event field is used to determine whether a slave is +active. +Current logic uses this value only if the mode is active-backup. + +However, LACP mode, although considered a load balancing mode, can mark +a slave as inactive in certain situations (e.g., LACP timeout). + +This fix takes the tx_enabled value into account when remapping, with +no respect to the LAG mode (this should not affect the behavior in XOR +mode, since in this mode both slaves are marked as active). + +Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature) +Signed-off-by: Aviv Heller +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/lag.c | 25 ++++++++++--------------- + 1 file changed, 10 insertions(+), 15 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c +@@ -157,22 +157,17 @@ static bool mlx5_lag_is_bonded(struct ml + static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 *port1, u8 *port2) + { +- if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { +- if (tracker->netdev_state[0].tx_enabled) { +- *port1 = 1; +- *port2 = 1; +- } else { +- *port1 = 2; +- *port2 = 2; +- } +- } else { +- *port1 = 1; +- *port2 = 2; +- if (!tracker->netdev_state[0].link_up) +- *port1 = 2; +- else if (!tracker->netdev_state[1].link_up) +- *port2 = 1; ++ *port1 = 1; ++ *port2 = 2; ++ if (!tracker->netdev_state[0].tx_enabled || ++ !tracker->netdev_state[0].link_up) { ++ *port1 = 2; ++ return; + } ++ ++ if (!tracker->netdev_state[1].tx_enabled || ++ !tracker->netdev_state[1].link_up) ++ *port2 = 1; + } + + static void mlx5_activate_lag(struct mlx5_lag *ldev, diff --git a/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch b/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch new file mode 100644 index 00000000000..13e1c03307e --- /dev/null +++ b/queue-4.12/net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch @@ -0,0 +1,66 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Moshe Shemesh +Date: Sun, 25 Jun 2017 18:45:32 +0300 +Subject: net/mlx5: Fix command bad flow on command entry allocation failure + +From: Moshe Shemesh + + +[ Upstream commit 219c81f7d1d5a89656cb3b53d3b4e11e93608d80 ] + +When driver fail to allocate an entry to send command to FW, it must +notify the calling function and release the memory allocated for +this command. + +Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters') +Signed-off-by: Moshe Shemesh +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -777,6 +777,10 @@ static void cb_timeout_handler(struct wo + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + } + ++static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); ++static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, ++ struct mlx5_cmd_msg *msg); ++ + static void cmd_work_handler(struct work_struct *work) + { + struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); +@@ -786,16 +790,27 @@ static void cmd_work_handler(struct work + struct mlx5_cmd_layout *lay; + struct semaphore *sem; + unsigned long flags; ++ int alloc_ret; + + sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; + down(sem); + if (!ent->page_queue) { +- ent->idx = alloc_ent(cmd); +- if (ent->idx < 0) { ++ alloc_ret = alloc_ent(cmd); ++ if (alloc_ret < 0) { + mlx5_core_err(dev, "failed to allocate command entry\n"); ++ if (ent->callback) { ++ ent->callback(-EAGAIN, ent->context); ++ mlx5_free_cmd_msg(dev, ent->out); ++ free_msg(dev, ent->in); ++ free_cmd(ent); ++ } else { ++ ent->ret = -EAGAIN; ++ complete(&ent->done); ++ } + up(sem); + return; + } ++ ent->idx = alloc_ret; + } else { + ent->idx = cmd->max_reg_cmds; + spin_lock_irqsave(&cmd->alloc_lock, flags); diff --git a/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch b/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch new file mode 100644 index 00000000000..b69bc9ae02a --- /dev/null +++ b/queue-4.12/net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Moshe Shemesh +Date: Thu, 6 Jul 2017 15:48:40 +0300 +Subject: net/mlx5: Fix command completion after timeout access invalid structure + +From: Moshe Shemesh + + +[ Upstream commit 061870800efb4e3d1ad4082a2569363629bdfcfc ] + +Completion on timeout should not free the driver command entry structure +as it will need to access it again once real completion event from FW +will occur. + +Fixes: 73dd3a4839c1 ('net/mlx5: Avoid using pending command interface slots') +Signed-off-by: Moshe Shemesh +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -955,7 +955,7 @@ static int mlx5_cmd_invoke(struct mlx5_c + + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) +- goto out_free; ++ goto out; + + ds = ent->ts2 - ent->ts1; + op = MLX5_GET(mbox_in, in->first.data, opcode); +@@ -1419,6 +1419,7 @@ void mlx5_cmd_comp_handler(struct mlx5_c + mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", + ent->idx); + free_ent(cmd, ent->idx); ++ free_cmd(ent); + } + continue; + } +@@ -1477,7 +1478,8 @@ void mlx5_cmd_comp_handler(struct mlx5_c + free_msg(dev, ent->in); + + err = err ? err : ent->status; +- free_cmd(ent); ++ if (!forced) ++ free_cmd(ent); + callback(err, context); + } else { + complete(&ent->done); diff --git a/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch b/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch new file mode 100644 index 00000000000..f1ccbac9c77 --- /dev/null +++ b/queue-4.12/net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch @@ -0,0 +1,41 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Paul Blakey +Date: Thu, 6 Jul 2017 16:40:34 +0300 +Subject: net/mlx5: Fix mlx5_add_flow_rules call with correct num of dests + +From: Paul Blakey + + +[ Upstream commit bcec601f30fb41e9233674942fa4040a6e63657a ] + +When adding ethtool steering rule with action DISCARD we wrongly +pass a NULL dest with dest_num 1 to mlx5_add_flow_rules(). +What this error seems to have caused is sending VPORT 0 +(MLX5_FLOW_DESTINATION_TYPE_VPORT) as the fte dest instead of no dests. +We have fte action correctly set to DROP so it might been ignored +anyways. + +To reproduce use: + # sudo ethtool --config-nfc flow-type ether \ + dst aa:bb:cc:dd:ee:ff action -1 + +Fixes: 74491de93712 ("net/mlx5: Add multi dest support") +Signed-off-by: Paul Blakey +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +@@ -320,7 +320,7 @@ add_ethtool_flow_rule(struct mlx5e_priv + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; +- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); ++ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", diff --git a/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch b/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch new file mode 100644 index 00000000000..f151e02a44b --- /dev/null +++ b/queue-4.12/net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch @@ -0,0 +1,31 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Thu, 25 May 2017 15:11:26 +0300 +Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size + +From: Eugenia Emantayev + + +[ Upstream commit 0b794ffae7afa7c4e5accac8791c4b78e8d080ce ] + +Fix miscalculation in reserved_at_1a0 field. + +Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mlx5/mlx5_ifc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -8131,7 +8131,7 @@ struct mlx5_ifc_mtpps_reg_bits { + u8 out_pulse_duration[0x10]; + u8 out_periodic_adjustment[0x10]; + +- u8 reserved_at_1a0[0x60]; ++ u8 reserved_at_1a0[0x40]; + }; + + struct mlx5_ifc_mtppse_reg_bits { diff --git a/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch b/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch new file mode 100644 index 00000000000..37e8babe0f2 --- /dev/null +++ b/queue-4.12/net-mlx5e-add-field-select-to-mtpps-register.patch @@ -0,0 +1,165 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Thu, 25 May 2017 16:09:34 +0300 +Subject: net/mlx5e: Add field select to MTPPS register + +From: Eugenia Emantayev + + +[ Upstream commit fa3676885e3b5be1edfa1b2cc775e20a45b34a19 ] + +In order to mark relevant fields while setting the MTPPS register +add field select. Otherwise it can cause a misconfiguration in +firmware. + +Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 29 +++++++++++++++----- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 - + drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++ + include/linux/mlx5/mlx5_ifc.h | 10 ++++-- + 4 files changed, 36 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -53,6 +53,15 @@ enum { + MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, + }; + ++enum { ++ MLX5E_MTPPS_FS_ENABLE = BIT(0x0), ++ MLX5E_MTPPS_FS_PATTERN = BIT(0x2), ++ MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), ++ MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), ++ MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), ++ MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), ++}; ++ + void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, + struct skb_shared_hwtstamps *hwts) + { +@@ -222,7 +231,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_ + + /* For future use need to add a loop for finding all 1PPS out pins */ + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); +- MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF); ++ MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta); ++ MLX5_SET(mtpps_reg, in, field_select, ++ MLX5E_MTPPS_FS_PIN_MODE | ++ MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ); + + mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + } +@@ -258,8 +270,7 @@ static int mlx5e_extts_configure(struct + int pin = -1; + int err = 0; + +- if (!MLX5_CAP_GEN(priv->mdev, pps) || +- !MLX5_CAP_GEN(priv->mdev, pps_modify)) ++ if (!MLX5_PPS_CAP(priv->mdev)) + return -EOPNOTSUPP; + + if (rq->extts.index >= tstamp->ptp_info.n_pins) +@@ -278,6 +289,9 @@ static int mlx5e_extts_configure(struct + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); ++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | ++ MLX5E_MTPPS_FS_PATTERN | ++ MLX5E_MTPPS_FS_ENABLE); + + err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + if (err) +@@ -303,7 +317,7 @@ static int mlx5e_perout_configure(struct + int pin = -1; + s64 ns; + +- if (!MLX5_CAP_GEN(priv->mdev, pps_modify)) ++ if (!MLX5_PPS_CAP(priv->mdev)) + return -EOPNOTSUPP; + + if (rq->perout.index >= tstamp->ptp_info.n_pins) +@@ -338,7 +352,10 @@ static int mlx5e_perout_configure(struct + MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); +- ++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | ++ MLX5E_MTPPS_FS_PATTERN | ++ MLX5E_MTPPS_FS_ENABLE | ++ MLX5E_MTPPS_FS_TIME_STAMP); + return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + } + +@@ -488,7 +505,7 @@ void mlx5e_timestamp_init(struct mlx5e_p + #define MAX_PIN_NUM 8 + tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); + if (tstamp->pps_pin_caps) { +- if (MLX5_CAP_GEN(priv->mdev, pps)) ++ if (MLX5_PPS_CAP(priv->mdev)) + mlx5e_get_pps_caps(priv, tstamp); + if (tstamp->ptp_info.n_pins) + mlx5e_init_pin_config(tstamp); +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -690,7 +690,7 @@ int mlx5_start_eqs(struct mlx5_core_dev + else + mlx5_core_dbg(dev, "port_module_event is not set\n"); + +- if (MLX5_CAP_GEN(dev, pps)) ++ if (MLX5_PPS_CAP(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, +--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +@@ -153,6 +153,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev + int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); + int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + ++#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ ++ MLX5_CAP_GEN((mdev), pps_modify) && \ ++ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ ++ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) ++ + void mlx5e_init(void); + void mlx5e_cleanup(void); + +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -7718,8 +7718,10 @@ struct mlx5_ifc_pcam_reg_bits { + }; + + struct mlx5_ifc_mcam_enhanced_features_bits { +- u8 reserved_at_0[0x7f]; ++ u8 reserved_at_0[0x7d]; + ++ u8 mtpps_enh_out_per_adj[0x1]; ++ u8 mtpps_fs[0x1]; + u8 pcie_performance_group[0x1]; + }; + +@@ -8115,7 +8117,8 @@ struct mlx5_ifc_mtpps_reg_bits { + u8 reserved_at_78[0x4]; + u8 cap_pin_4_mode[0x4]; + +- u8 reserved_at_80[0x80]; ++ u8 field_select[0x20]; ++ u8 reserved_at_a0[0x60]; + + u8 enable[0x1]; + u8 reserved_at_101[0xb]; +@@ -8130,8 +8133,9 @@ struct mlx5_ifc_mtpps_reg_bits { + + u8 out_pulse_duration[0x10]; + u8 out_periodic_adjustment[0x10]; ++ u8 enhanced_out_periodic_adjustment[0x20]; + +- u8 reserved_at_1a0[0x40]; ++ u8 reserved_at_1c0[0x20]; + }; + + struct mlx5_ifc_mtppse_reg_bits { diff --git a/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch b/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch new file mode 100644 index 00000000000..a40ff893207 --- /dev/null +++ b/queue-4.12/net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch @@ -0,0 +1,95 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Sun, 28 May 2017 14:27:02 +0300 +Subject: net/mlx5e: Add missing support for PTP_CLK_REQ_PPS request + +From: Eugenia Emantayev + + +[ Upstream commit cf5033089b078303b102b65e3ccbbfa3ce0f4367 ] + +Add the missing option to enable the PTP_CLK_PPS function. +In this case pin should be configured as 1PPS IN first and +then it will be connected to PPS mechanism. +Events will be reported as PTP_CLOCK_PPSUSR events to relevant sysfs. + +Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 20 ++++++++++++++++++++ + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - + 3 files changed, 21 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -266,6 +266,7 @@ struct mlx5e_pps { + u8 pin_caps[MAX_PIN_NUM]; + struct work_struct out_work; + u64 start[MAX_PIN_NUM]; ++ u8 enabled; + }; + + struct mlx5e_tstamp { +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -394,6 +394,17 @@ static int mlx5e_perout_configure(struct + MLX5E_EVENT_MODE_REPETETIVE & on); + } + ++static int mlx5e_pps_configure(struct ptp_clock_info *ptp, ++ struct ptp_clock_request *rq, ++ int on) ++{ ++ struct mlx5e_tstamp *tstamp = ++ container_of(ptp, struct mlx5e_tstamp, ptp_info); ++ ++ tstamp->pps_info.enabled = !!on; ++ return 0; ++} ++ + static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +@@ -403,6 +414,8 @@ static int mlx5e_ptp_enable(struct ptp_c + return mlx5e_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5e_perout_configure(ptp, rq, on); ++ case PTP_CLK_REQ_PPS: ++ return mlx5e_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } +@@ -448,6 +461,7 @@ static int mlx5e_init_pin_config(struct + return -ENOMEM; + tstamp->ptp_info.enable = mlx5e_ptp_enable; + tstamp->ptp_info.verify = mlx5e_ptp_verify; ++ tstamp->ptp_info.pps = 1; + + for (i = 0; i < tstamp->ptp_info.n_pins; i++) { + snprintf(tstamp->ptp_info.pin_config[i].name, +@@ -499,6 +513,12 @@ void mlx5e_pps_event_handler(struct mlx5 + + switch (tstamp->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: ++ if (tstamp->pps_info.enabled) { ++ event->type = PTP_CLOCK_PPSUSR; ++ event->pps_times.ts_real = ns_to_timespec64(event->timestamp); ++ } else { ++ event->type = PTP_CLOCK_EXTTS; ++ } + ptp_clock_event(tstamp->ptp, event); + break; + case PTP_PF_PEROUT: +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -365,7 +365,6 @@ static void mlx5e_async_event(struct mlx + break; + case MLX5_DEV_EVENT_PPS: + eqe = (struct mlx5_eqe *)param; +- ptp_event.type = PTP_CLOCK_EXTTS; + ptp_event.index = eqe->data.pps.pin; + ptp_event.timestamp = + timecounter_cyc2time(&priv->tstamp.clock, diff --git a/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch b/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch new file mode 100644 index 00000000000..81b650d658c --- /dev/null +++ b/queue-4.12/net-mlx5e-change-1pps-out-scheme.patch @@ -0,0 +1,236 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Sun, 28 May 2017 14:06:01 +0300 +Subject: net/mlx5e: Change 1PPS out scheme + +From: Eugenia Emantayev + + +[ Upstream commit 4272f9b88db9223216cdf87314f570f6d81295b4 ] + +In order to fix the drift in 1PPS out need to adjust the next pulse. +On each 1PPS out falling edge driver gets the event, then the event +handler adjusts the next pulse starting time. + +Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 9 + + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 116 ++++++++++++++------- + 2 files changed, 87 insertions(+), 38 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -261,6 +261,13 @@ struct mlx5e_dcbx { + }; + #endif + ++#define MAX_PIN_NUM 8 ++struct mlx5e_pps { ++ u8 pin_caps[MAX_PIN_NUM]; ++ struct work_struct out_work; ++ u64 start[MAX_PIN_NUM]; ++}; ++ + struct mlx5e_tstamp { + rwlock_t lock; + struct cyclecounter cycles; +@@ -272,7 +279,7 @@ struct mlx5e_tstamp { + struct mlx5_core_dev *mdev; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; +- u8 *pps_pin_caps; ++ struct mlx5e_pps pps_info; + }; + + enum { +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -82,6 +82,33 @@ static u64 mlx5e_read_internal_timer(con + return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; + } + ++static void mlx5e_pps_out(struct work_struct *work) ++{ ++ struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps, ++ out_work); ++ struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp, ++ pps_info); ++ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; ++ unsigned long flags; ++ int i; ++ ++ for (i = 0; i < tstamp->ptp_info.n_pins; i++) { ++ u64 tstart; ++ ++ write_lock_irqsave(&tstamp->lock, flags); ++ tstart = tstamp->pps_info.start[i]; ++ tstamp->pps_info.start[i] = 0; ++ write_unlock_irqrestore(&tstamp->lock, flags); ++ if (!tstart) ++ continue; ++ ++ MLX5_SET(mtpps_reg, in, pin, i); ++ MLX5_SET64(mtpps_reg, in, time_stamp, tstart); ++ MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP); ++ mlx5_set_mtpps(tstamp->mdev, in, sizeof(in)); ++ } ++} ++ + static void mlx5e_timestamp_overflow(struct work_struct *work) + { + struct delayed_work *dwork = to_delayed_work(work); +@@ -223,21 +250,6 @@ static int mlx5e_ptp_adjfreq(struct ptp_ + int neg_adj = 0; + struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, + ptp_info); +- struct mlx5e_priv *priv = +- container_of(tstamp, struct mlx5e_priv, tstamp); +- +- if (MLX5_CAP_GEN(priv->mdev, pps_modify)) { +- u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; +- +- /* For future use need to add a loop for finding all 1PPS out pins */ +- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); +- MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta); +- MLX5_SET(mtpps_reg, in, field_select, +- MLX5E_MTPPS_FS_PIN_MODE | +- MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ); +- +- mlx5_set_mtpps(priv->mdev, in, sizeof(in)); +- } + + if (delta < 0) { + neg_adj = 1; +@@ -315,7 +327,7 @@ static int mlx5e_perout_configure(struct + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; +- u64 nsec_now, nsec_delta, time_stamp; ++ u64 nsec_now, nsec_delta, time_stamp = 0; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; +@@ -323,6 +335,7 @@ static int mlx5e_perout_configure(struct + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; ++ int err = 0; + s64 ns; + + if (!MLX5_PPS_CAP(priv->mdev)) +@@ -373,7 +386,12 @@ static int mlx5e_perout_configure(struct + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + +- return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); ++ err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); ++ if (err) ++ return err; ++ ++ return mlx5_set_mtppse(priv->mdev, pin, 0, ++ MLX5E_EVENT_MODE_REPETETIVE & on); + } + + static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, +@@ -457,22 +475,50 @@ static void mlx5e_get_pps_caps(struct ml + tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + +- tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); +- tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); +- tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); +- tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); +- tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); +- tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); +- tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); +- tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); ++ tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); ++ tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); ++ tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); ++ tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); ++ tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); ++ tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); ++ tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); ++ tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); + } + + void mlx5e_pps_event_handler(struct mlx5e_priv *priv, + struct ptp_clock_event *event) + { ++ struct net_device *netdev = priv->netdev; + struct mlx5e_tstamp *tstamp = &priv->tstamp; ++ struct timespec64 ts; ++ u64 nsec_now, nsec_delta; ++ u64 cycles_now, cycles_delta; ++ int pin = event->index; ++ s64 ns; ++ unsigned long flags; + +- ptp_clock_event(tstamp->ptp, event); ++ switch (tstamp->ptp_info.pin_config[pin].func) { ++ case PTP_PF_EXTTS: ++ ptp_clock_event(tstamp->ptp, event); ++ break; ++ case PTP_PF_PEROUT: ++ mlx5e_ptp_gettime(&tstamp->ptp_info, &ts); ++ cycles_now = mlx5_read_internal_timer(tstamp->mdev); ++ ts.tv_sec += 1; ++ ts.tv_nsec = 0; ++ ns = timespec64_to_ns(&ts); ++ write_lock_irqsave(&tstamp->lock, flags); ++ nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); ++ nsec_delta = ns - nsec_now; ++ cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, ++ tstamp->cycles.mult); ++ tstamp->pps_info.start[pin] = cycles_now + cycles_delta; ++ queue_work(priv->wq, &tstamp->pps_info.out_work); ++ write_unlock_irqrestore(&tstamp->lock, flags); ++ break; ++ default: ++ netdev_err(netdev, "%s: Unhandled event\n", __func__); ++ } + } + + void mlx5e_timestamp_init(struct mlx5e_priv *priv) +@@ -508,6 +554,7 @@ void mlx5e_timestamp_init(struct mlx5e_p + do_div(ns, NSEC_PER_SEC / 2 / HZ); + tstamp->overflow_period = ns; + ++ INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); + INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); + if (tstamp->overflow_period) + schedule_delayed_work(&tstamp->overflow_work, 0); +@@ -519,16 +566,10 @@ void mlx5e_timestamp_init(struct mlx5e_p + snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); + + /* Initialize 1PPS data structures */ +-#define MAX_PIN_NUM 8 +- tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); +- if (tstamp->pps_pin_caps) { +- if (MLX5_PPS_CAP(priv->mdev)) +- mlx5e_get_pps_caps(priv, tstamp); +- if (tstamp->ptp_info.n_pins) +- mlx5e_init_pin_config(tstamp); +- } else { +- mlx5_core_warn(priv->mdev, "1PPS initialization failed\n"); +- } ++ if (MLX5_PPS_CAP(priv->mdev)) ++ mlx5e_get_pps_caps(priv, tstamp); ++ if (tstamp->ptp_info.n_pins) ++ mlx5e_init_pin_config(tstamp); + + tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, + &priv->mdev->pdev->dev); +@@ -551,7 +592,8 @@ void mlx5e_timestamp_cleanup(struct mlx5 + priv->tstamp.ptp = NULL; + } + +- kfree(tstamp->pps_pin_caps); ++ cancel_work_sync(&tstamp->pps_info.out_work); ++ + kfree(tstamp->ptp_info.pin_config); + + cancel_delayed_work_sync(&tstamp->overflow_work); diff --git a/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch b/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch new file mode 100644 index 00000000000..a60e56ecb6a --- /dev/null +++ b/queue-4.12/net-mlx5e-fix-broken-disable-1pps-flow.patch @@ -0,0 +1,137 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Sun, 28 May 2017 12:01:38 +0300 +Subject: net/mlx5e: Fix broken disable 1PPS flow + +From: Eugenia Emantayev + + +[ Upstream commit 49c5031ca6f0628ef973a11b17e463e088bf859e ] + +Need to disable the MTPPS and unsubscribe from the pulse events +when user disables the 1PPS functionality. + +Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 75 ++++++++++++--------- + 1 file changed, 46 insertions(+), 29 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -266,6 +266,8 @@ static int mlx5e_extts_configure(struct + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; ++ u32 field_select = 0; ++ u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; +@@ -280,18 +282,21 @@ static int mlx5e_extts_configure(struct + pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; ++ pin_mode = MLX5E_PIN_MODE_IN; ++ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); ++ field_select = MLX5E_MTPPS_FS_PIN_MODE | ++ MLX5E_MTPPS_FS_PATTERN | ++ MLX5E_MTPPS_FS_ENABLE; ++ } else { ++ pin = rq->extts.index; ++ field_select = MLX5E_MTPPS_FS_ENABLE; + } + +- if (rq->extts.flags & PTP_FALLING_EDGE) +- pattern = 1; +- + MLX5_SET(mtpps_reg, in, pin, pin); +- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); ++ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); +- MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | +- MLX5E_MTPPS_FS_PATTERN | +- MLX5E_MTPPS_FS_ENABLE); ++ MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + if (err) +@@ -314,6 +319,9 @@ static int mlx5e_perout_configure(struct + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; ++ u32 field_select = 0; ++ u8 pin_mode = 0; ++ u8 pattern = 0; + int pin = -1; + s64 ns; + +@@ -328,34 +336,43 @@ static int mlx5e_perout_configure(struct + rq->perout.index); + if (pin < 0) + return -EBUSY; +- } + +- ts.tv_sec = rq->perout.period.sec; +- ts.tv_nsec = rq->perout.period.nsec; +- ns = timespec64_to_ns(&ts); +- if (on) ++ pin_mode = MLX5E_PIN_MODE_OUT; ++ pattern = MLX5E_OUT_PATTERN_PERIODIC; ++ ts.tv_sec = rq->perout.period.sec; ++ ts.tv_nsec = rq->perout.period.nsec; ++ ns = timespec64_to_ns(&ts); ++ + if ((ns >> 1) != 500000000LL) + return -EINVAL; +- ts.tv_sec = rq->perout.start.sec; +- ts.tv_nsec = rq->perout.start.nsec; +- ns = timespec64_to_ns(&ts); +- cycles_now = mlx5_read_internal_timer(tstamp->mdev); +- write_lock_irqsave(&tstamp->lock, flags); +- nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); +- nsec_delta = ns - nsec_now; +- cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, +- tstamp->cycles.mult); +- write_unlock_irqrestore(&tstamp->lock, flags); +- time_stamp = cycles_now + cycles_delta; ++ ++ ts.tv_sec = rq->perout.start.sec; ++ ts.tv_nsec = rq->perout.start.nsec; ++ ns = timespec64_to_ns(&ts); ++ cycles_now = mlx5_read_internal_timer(tstamp->mdev); ++ write_lock_irqsave(&tstamp->lock, flags); ++ nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); ++ nsec_delta = ns - nsec_now; ++ cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, ++ tstamp->cycles.mult); ++ write_unlock_irqrestore(&tstamp->lock, flags); ++ time_stamp = cycles_now + cycles_delta; ++ field_select = MLX5E_MTPPS_FS_PIN_MODE | ++ MLX5E_MTPPS_FS_PATTERN | ++ MLX5E_MTPPS_FS_ENABLE | ++ MLX5E_MTPPS_FS_TIME_STAMP; ++ } else { ++ pin = rq->perout.index; ++ field_select = MLX5E_MTPPS_FS_ENABLE; ++ } ++ + MLX5_SET(mtpps_reg, in, pin, pin); +- MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); +- MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); ++ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); ++ MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); +- MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE | +- MLX5E_MTPPS_FS_PATTERN | +- MLX5E_MTPPS_FS_ENABLE | +- MLX5E_MTPPS_FS_TIME_STAMP); ++ MLX5_SET(mtpps_reg, in, field_select, field_select); ++ + return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + } + diff --git a/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch b/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch new file mode 100644 index 00000000000..81a6dff6fdc --- /dev/null +++ b/queue-4.12/net-mlx5e-fix-outer_header_zero-check-size.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Ilan Tayari +Date: Wed, 5 Jul 2017 10:17:04 +0300 +Subject: net/mlx5e: Fix outer_header_zero() check size + +From: Ilan Tayari + + +[ Upstream commit 0242f4a0bb03906010bbf80495512be00494a0ef ] + +outer_header_zero() routine checks if the outer_headers match of a +flow-table entry are all zero. + +This function uses the size of whole fte_match_param, instead of just +the outer_headers member, causing failure to detect all-zeros if +any other members of the fte_match_param are non-zero. + +Use the correct size for zero check. + +Fixes: 6dc6071cfcde ("net/mlx5e: Add ethtool flow steering support") +Signed-off-by: Ilan Tayari +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +@@ -276,7 +276,7 @@ static void add_rule_to_list(struct mlx5 + + static bool outer_header_zero(u32 *match_criteria) + { +- int size = MLX5_ST_SZ_BYTES(fte_match_param); ++ int size = MLX5_FLD_SZ_BYTES(fte_match_param, outer_headers); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + diff --git a/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch b/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch new file mode 100644 index 00000000000..3aca2ef549d --- /dev/null +++ b/queue-4.12/net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch @@ -0,0 +1,33 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Wed, 12 Jul 2017 17:27:18 +0300 +Subject: net/mlx5e: Fix wrong delay calculation for overflow check scheduling + +From: Eugenia Emantayev + + +[ Upstream commit d439c84509a510e864fdc6166c760482cd03fc57 ] + +The overflow_period is calculated in seconds. In order to use it +for delayed work scheduling translation to jiffies is needed. + +Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -119,7 +119,8 @@ static void mlx5e_timestamp_overflow(str + write_lock_irqsave(&tstamp->lock, flags); + timecounter_read(&tstamp->clock); + write_unlock_irqrestore(&tstamp->lock, flags); +- schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period); ++ schedule_delayed_work(&tstamp->overflow_work, ++ msecs_to_jiffies(tstamp->overflow_period * 1000)); + } + + int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) diff --git a/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch b/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch new file mode 100644 index 00000000000..f96530c79bd --- /dev/null +++ b/queue-4.12/net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch @@ -0,0 +1,97 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Alex Vesker +Date: Thu, 6 Jul 2017 15:40:32 +0300 +Subject: net/mlx5e: IPoIB, Modify add/remove underlay QPN flows + +From: Alex Vesker + + +[ Upstream commit 58569ef8f619761548e7d198f59e8ebe3af91d04 ] + +On interface remove, the clean-up was done incorrectly causing +an error in the log: +"SET_FLOW_TABLE_ROOT(0x92f) op_mod(0x0) failed...syndrome (0x7e9f14)" + +This was caused by the following flow: +-ndo_uninit: + Move QP state to RST (this disconnects the QP from FT), + the QP cannot be attached to any FT unless it is in RTS. + +-mlx5_rdma_netdev_free: + cleanup_rx: Destroy FT + cleanup_tx: Destroy QP and remove QPN from FT + +This caused a problem when destroying current FT we tried to +re-attach the QP to the next FT which is not needed. + +The correct flow is: +-mlx5_rdma_netdev_free: + cleanup_rx: remove QPN from FT & Destroy FT + cleanup_tx: Destroy QP + +Fixes: 508541146af1 ("net/mlx5: Use underlay QPN from the root name space") +Signed-off-by: Alex Vesker +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +@@ -160,8 +160,6 @@ out: + + static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) + { +- mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn); +- + mlx5_core_destroy_qp(mdev, qp); + } + +@@ -176,8 +174,6 @@ static int mlx5i_init_tx(struct mlx5e_pr + return err; + } + +- mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); +- + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); +@@ -235,6 +231,7 @@ static void mlx5i_destroy_flow_steering( + + static int mlx5i_init_rx(struct mlx5e_priv *priv) + { ++ struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5e_create_indirect_rqt(priv); +@@ -253,12 +250,18 @@ static int mlx5i_init_rx(struct mlx5e_pr + if (err) + goto err_destroy_indirect_tirs; + +- err = mlx5i_create_flow_steering(priv); ++ err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + if (err) + goto err_destroy_direct_tirs; + ++ err = mlx5i_create_flow_steering(priv); ++ if (err) ++ goto err_remove_rx_underlay_qpn; ++ + return 0; + ++err_remove_rx_underlay_qpn: ++ mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); + err_destroy_indirect_tirs: +@@ -272,6 +275,9 @@ err_destroy_indirect_rqts: + + static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) + { ++ struct mlx5i_priv *ipriv = priv->ppriv; ++ ++ mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); diff --git a/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch b/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch new file mode 100644 index 00000000000..1212971c887 --- /dev/null +++ b/queue-4.12/net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch @@ -0,0 +1,58 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Eugenia Emantayev +Date: Wed, 12 Jul 2017 17:44:07 +0300 +Subject: net/mlx5e: Schedule overflow check work to mlx5e workqueue + +From: Eugenia Emantayev + + +[ Upstream commit f08c39ed0bfb503c7b3e013cd40d036ce6a0941a ] + +This is done in order to ensure that work will not run after the cleanup. + +Fixes: ef9814deafd0 ('net/mlx5e: Add HW timestamping (TS) support') +Signed-off-by: Eugenia Emantayev +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +@@ -114,13 +114,14 @@ static void mlx5e_timestamp_overflow(str + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, + overflow_work); ++ struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); + unsigned long flags; + + write_lock_irqsave(&tstamp->lock, flags); + timecounter_read(&tstamp->clock); + write_unlock_irqrestore(&tstamp->lock, flags); +- schedule_delayed_work(&tstamp->overflow_work, +- msecs_to_jiffies(tstamp->overflow_period * 1000)); ++ queue_delayed_work(priv->wq, &tstamp->overflow_work, ++ msecs_to_jiffies(tstamp->overflow_period * 1000)); + } + + int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) +@@ -578,7 +579,7 @@ void mlx5e_timestamp_init(struct mlx5e_p + INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); + INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); + if (tstamp->overflow_period) +- schedule_delayed_work(&tstamp->overflow_work, 0); ++ queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); + else + mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); + +@@ -614,8 +615,6 @@ void mlx5e_timestamp_cleanup(struct mlx5 + } + + cancel_work_sync(&tstamp->pps_info.out_work); +- +- kfree(tstamp->ptp_info.pin_config); +- + cancel_delayed_work_sync(&tstamp->overflow_work); ++ kfree(tstamp->ptp_info.pin_config); + } diff --git a/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch b/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch new file mode 100644 index 00000000000..d51200ef790 --- /dev/null +++ b/queue-4.12/net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch @@ -0,0 +1,43 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Florian Fainelli +Date: Fri, 28 Jul 2017 11:58:36 -0700 +Subject: net: phy: Correctly process PHY_HALTED in phy_stop_machine() + +From: Florian Fainelli + + +[ Upstream commit 7ad813f208533cebfcc32d3d7474dc1677d1b09a ] + +Marc reported that he was not getting the PHY library adjust_link() +callback function to run when calling phy_stop() + phy_disconnect() +which does not indeed happen because we set the state machine to +PHY_HALTED but we don't get to run it to process this state past that +point. + +Fix this with a synchronous call to phy_state_machine() in order to have +the state machine actually act on PHY_HALTED, set the PHY device's link +down, turn the network device's carrier off and finally call the +adjust_link() function. + +Reported-by: Marc Gonzalez +Fixes: a390d1f379cf ("phylib: convert state_queue work to delayed_work") +Signed-off-by: Florian Fainelli +Signed-off-by: Marc Gonzalez +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/phy/phy.c ++++ b/drivers/net/phy/phy.c +@@ -749,6 +749,9 @@ void phy_stop_machine(struct phy_device + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) + phydev->state = PHY_UP; + mutex_unlock(&phydev->lock); ++ ++ /* Now we can run the state machine synchronously */ ++ phy_state_machine(&phydev->state_queue.work); + } + + /** diff --git a/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch b/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch new file mode 100644 index 00000000000..adb26bba415 --- /dev/null +++ b/queue-4.12/net-zero-terminate-ifr_name-in-dev_ifname.patch @@ -0,0 +1,28 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: "David S. Miller" +Date: Wed, 19 Jul 2017 13:33:24 -0700 +Subject: net: Zero terminate ifr_name in dev_ifname(). + +From: "David S. Miller" + + +[ Upstream commit 63679112c536289826fec61c917621de95ba2ade ] + +The ifr.ifr_name is passed around and assumed to be NULL terminated. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev_ioctl.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/dev_ioctl.c ++++ b/net/core/dev_ioctl.c +@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, s + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; ++ ifr.ifr_name[IFNAMSIZ-1] = 0; + + error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); + if (error) diff --git a/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch b/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch new file mode 100644 index 00000000000..654fe29e68c --- /dev/null +++ b/queue-4.12/openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch @@ -0,0 +1,46 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Liping Zhang +Date: Sun, 23 Jul 2017 17:52:23 +0800 +Subject: openvswitch: fix potential out of bound access in parse_ct + +From: Liping Zhang + + +[ Upstream commit 69ec932e364b1ba9c3a2085fe96b76c8a3f71e7c ] + +Before the 'type' is validated, we shouldn't use it to fetch the +ovs_ct_attr_lens's minlen and maxlen, else, out of bound access +may happen. + +Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") +Signed-off-by: Liping Zhang +Acked-by: Pravin B Shelar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/conntrack.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -1289,8 +1289,8 @@ static int parse_ct(const struct nlattr + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); +- int maxlen = ovs_ct_attr_lens[type].maxlen; +- int minlen = ovs_ct_attr_lens[type].minlen; ++ int maxlen; ++ int minlen; + + if (type > OVS_CT_ATTR_MAX) { + OVS_NLERR(log, +@@ -1298,6 +1298,9 @@ static int parse_ct(const struct nlattr + type, OVS_CT_ATTR_MAX); + return -EINVAL; + } ++ ++ maxlen = ovs_ct_attr_lens[type].maxlen; ++ minlen = ovs_ct_attr_lens[type].minlen; + if (nla_len(a) < minlen || nla_len(a) > maxlen) { + OVS_NLERR(log, + "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", diff --git a/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch b/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch new file mode 100644 index 00000000000..505a16b915c --- /dev/null +++ b/queue-4.12/packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch @@ -0,0 +1,60 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: WANG Cong +Date: Mon, 24 Jul 2017 10:07:32 -0700 +Subject: packet: fix use-after-free in prb_retire_rx_blk_timer_expired() + +From: WANG Cong + + +[ Upstream commit c800aaf8d869f2b9b47b10c5c312fe19f0a94042 ] + +There are multiple reports showing we have a use-after-free in +the timer prb_retire_rx_blk_timer_expired(), where we use struct +tpacket_kbdq_core::pkbdq, a pg_vec, after it gets freed by +free_pg_vec(). + +The interesting part is it is not freed via packet_release() but +via packet_setsockopt(), which means we are not closing the socket. +Looking into the big and fat function packet_set_ring(), this could +happen if we satisfy the following conditions: + +1. closing == 0, not on packet_release() path +2. req->tp_block_nr == 0, we don't allocate a new pg_vec +3. rx_ring->pg_vec is already set as V3, which means we already called + packet_set_ring() wtih req->tp_block_nr > 0 previously +4. req->tp_frame_nr == 0, pass sanity check +5. po->mapped == 0, never called mmap() + +In this scenario we are clearing the old rx_ring->pg_vec, so we need +to free this pg_vec, but we don't stop the timer on this path because +of closing==0. + +The timer has to be stopped as long as we need to free pg_vec, therefore +the check on closing!=0 is wrong, we should check pg_vec!=NULL instead. + +Thanks to liujian for testing different fixes. + +Reported-by: alexander.levin@verizon.com +Reported-by: Dave Jones +Reported-by: liujian (CE) +Tested-by: liujian (CE) +Cc: Ding Tianhong +Cc: Willem de Bruijn +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4334,7 +4334,7 @@ static int packet_set_ring(struct sock * + register_prot_hook(sk); + } + spin_unlock(&po->bind_lock); +- if (closing && (po->tp_version > TPACKET_V2)) { ++ if (pg_vec && (po->tp_version > TPACKET_V2)) { + /* Because we don't support block-based V3 on tx-ring */ + if (!tx_ring) + prb_shutdown_retire_blk_timer(po, rb_queue); diff --git a/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch b/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch new file mode 100644 index 00000000000..c660dcc2671 --- /dev/null +++ b/queue-4.12/revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: David Ahern +Date: Wed, 19 Jul 2017 10:22:40 -0700 +Subject: Revert "rtnetlink: Do not generate notifications for CHANGEADDR event" + +From: David Ahern + + +[ Upstream commit 3753654e541938717b13f2b25791c3171a3a06aa ] + +This reverts commit cd8966e75ed3c6b41a37047a904617bc44fa481f. + +The duplicate CHANGEADDR event message is sent regardless of link +status whereas the setlink changes only generate a notification when +the link is up. Not sending a notification when the link is down breaks +dhcpcd which only processes hwaddr changes when the link is down. + +Fixes reported regression: + https://bugzilla.kernel.org/show_bug.cgi?id=196355 + +Reported-by: Yaroslav Isakov +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -4165,6 +4165,7 @@ static int rtnetlink_event(struct notifi + + switch (event) { + case NETDEV_REBOOT: ++ case NETDEV_CHANGEADDR: + case NETDEV_CHANGENAME: + case NETDEV_FEAT_CHANGE: + case NETDEV_BONDING_FAILOVER: diff --git a/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch b/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch new file mode 100644 index 00000000000..2a4cebe1e8c --- /dev/null +++ b/queue-4.12/rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch @@ -0,0 +1,38 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: WANG Cong +Date: Thu, 20 Jul 2017 11:27:57 -0700 +Subject: rtnetlink: allocate more memory for dev_set_mac_address() + +From: WANG Cong + + +[ Upstream commit 153711f9421be5dbc973dc57a4109dc9d54c89b1 ] + +virtnet_set_mac_address() interprets mac address as struct +sockaddr, but upper layer only allocates dev->addr_len +which is ETH_ALEN + sizeof(sa_family_t) in this case. + +We lack a unified definition for mac address, so just fix +the upper layer, this also allows drivers to interpret it +to struct sockaddr freely. + +Reported-by: David Ahern +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1977,7 +1977,8 @@ static int do_setlink(const struct sk_bu + struct sockaddr *sa; + int len; + +- len = sizeof(sa_family_t) + dev->addr_len; ++ len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, ++ sizeof(*sa)); + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; diff --git a/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch b/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch new file mode 100644 index 00000000000..5a83a0148d6 --- /dev/null +++ b/queue-4.12/sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch @@ -0,0 +1,140 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Alexander Potapenko +Date: Fri, 14 Jul 2017 18:32:45 +0200 +Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() + +From: Alexander Potapenko + + +[ Upstream commit b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a ] + +If the length field of the iterator (|pos.p| or |err|) is past the end +of the chunk, we shouldn't access it. + +This bug has been detected by KMSAN. For the following pair of system +calls: + + socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 + sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), + inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, + sin6_scope_id=0}, 28) = 1 + +the tool has reported a use of uninitialized memory: + + ================================================================== + BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 + CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs + 01/01/2011 + Call Trace: + + __dump_stack lib/dump_stack.c:16 + dump_stack+0x172/0x1c0 lib/dump_stack.c:52 + kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 + __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 + __sctp_rcv_init_lookup net/sctp/input.c:1074 + __sctp_rcv_lookup_harder net/sctp/input.c:1233 + __sctp_rcv_lookup net/sctp/input.c:1255 + sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 + sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 + ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 + dst_input ./include/net/dst.h:492 + ip6_rcv_finish net/ipv6/ip6_input.c:69 + NF_HOOK ./include/linux/netfilter.h:257 + ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 + __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 + __netif_receive_skb net/core/dev.c:4246 + process_backlog+0x667/0xba0 net/core/dev.c:4866 + napi_poll net/core/dev.c:5268 + net_rx_action+0xc95/0x1590 net/core/dev.c:5333 + __do_softirq+0x485/0x942 kernel/softirq.c:284 + do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 + + do_softirq kernel/softirq.c:328 + __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 + local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 + rcu_read_unlock_bh ./include/linux/rcupdate.h:931 + ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 + ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 + NF_HOOK_COND ./include/linux/netfilter.h:246 + ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 + dst_output ./include/net/dst.h:486 + NF_HOOK ./include/linux/netfilter.h:257 + ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 + sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 + sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 + sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 + sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 + sctp_side_effects net/sctp/sm_sideeffect.c:1773 + sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 + sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 + sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 + sock_sendmsg net/socket.c:643 + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 + entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 + RIP: 0033:0x401133 + RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c + RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 + RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 + RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c + R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 + R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 + origin: + save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 + kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 + kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 + slab_alloc_node mm/slub.c:2743 + __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 + __kmalloc_reserve net/core/skbuff.c:138 + __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 + alloc_skb ./include/linux/skbuff.h:933 + sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 + sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 + sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 + sctp_side_effects net/sctp/sm_sideeffect.c:1773 + sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 + sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 + sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 + inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 + sock_sendmsg net/socket.c:643 + SYSC_sendto+0x608/0x710 net/socket.c:1696 + SyS_sendto+0x8a/0xb0 net/socket.c:1664 + do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 + return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 + ================================================================== + +Signed-off-by: Alexander Potapenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/sctp.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs( + + #define _sctp_walk_params(pos, chunk, end, member)\ + for (pos.v = chunk->member;\ ++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ ++ (void *)chunk + end) &&\ + pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ + ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ + pos.v += SCTP_PAD4(ntohs(pos.p->length))) +@@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), nt + #define _sctp_walk_errors(err, chunk_hdr, end)\ + for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ + sizeof(sctp_chunkhdr_t));\ ++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ ++ (void *)chunk_hdr + end) &&\ + (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ + ntohs(err->length) >= sizeof(sctp_errhdr_t); \ + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) diff --git a/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch b/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch new file mode 100644 index 00000000000..81f59ec5eaa --- /dev/null +++ b/queue-4.12/sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch @@ -0,0 +1,50 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Xin Long +Date: Fri, 14 Jul 2017 22:07:33 +0800 +Subject: sctp: fix an array overflow when all ext chunks are set + +From: Xin Long + + +[ Upstream commit 10b3bf54406bb7f4e78da9bb2a485c5c986678ad ] + +Marcelo noticed an array overflow caused by commit c28445c3cb07 +("sctp: add reconf_enable in asoc ep and netns"), in which sctp +would add SCTP_CID_RECONF into extensions when reconf_enable is +set in sctp_make_init and sctp_make_init_ack. + +Then now when all ext chunks are set, 4 ext chunk ids can be put +into extensions array while extensions array size is 3. It would +cause a kernel panic because of this overflow. + +This patch is to fix it by defining extensions array size is 4 in +both sctp_make_init and sctp_make_init_ack. + +Fixes: c28445c3cb07 ("sctp: add reconf_enable in asoc ep and netns") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_make_chunk.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -228,7 +228,7 @@ struct sctp_chunk *sctp_make_init(const + sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; +- __u8 extensions[3]; ++ __u8 extensions[4]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL; + +@@ -396,7 +396,7 @@ struct sctp_chunk *sctp_make_init_ack(co + sctp_adaptation_ind_param_t aiparam; + sctp_supported_ext_param_t ext_param; + int num_ext = 0; +- __u8 extensions[3]; ++ __u8 extensions[4]; + sctp_paramhdr_t *auth_chunks = NULL, + *auth_hmacs = NULL, + *auth_random = NULL; diff --git a/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch b/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch new file mode 100644 index 00000000000..e49ad4152bf --- /dev/null +++ b/queue-4.12/sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Xin Long +Date: Wed, 26 Jul 2017 16:24:59 +0800 +Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors + +From: Xin Long + + +[ Upstream commit 6b84202c946cd3da3a8daa92c682510e9ed80321 ] + +Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving +_sctp_walk_{params, errors}()") tried to fix the issue that it +may overstep the chunk end for _sctp_walk_{params, errors} with +'chunk_end > offset(length) + sizeof(length)'. + +But it introduced a side effect: When processing INIT, it verifies +the chunks with 'param.v == chunk_end' after iterating all params +by sctp_walk_params(). With the check 'chunk_end > offset(length) ++ sizeof(length)', it would return when the last param is not yet +accessed. Because the last param usually is fwdtsn supported param +whose size is 4 and 'chunk_end == offset(length) + sizeof(length)' + +This is a badly issue even causing sctp couldn't process 4-shakes. +Client would always get abort when connecting to server, due to +the failure of INIT chunk verification on server. + +The patch is to use 'chunk_end <= offset(length) + sizeof(length)' +instead of 'chunk_end < offset(length) + sizeof(length)' for both +_sctp_walk_params and _sctp_walk_errors. + +Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()") +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/sctp.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/net/sctp/sctp.h ++++ b/include/net/sctp/sctp.h +@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs( + + #define _sctp_walk_params(pos, chunk, end, member)\ + for (pos.v = chunk->member;\ +- (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ ++ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ + (void *)chunk + end) &&\ + pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ + ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ +@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), nt + #define _sctp_walk_errors(err, chunk_hdr, end)\ + for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ + sizeof(sctp_chunkhdr_t));\ +- ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ ++ ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\ + (void *)chunk_hdr + end) &&\ + (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ + ntohs(err->length) >= sizeof(sctp_errhdr_t); \ diff --git a/queue-4.12/series b/queue-4.12/series index d2055a0365b..555766e0368 100644 --- a/queue-4.12/series +++ b/queue-4.12/series @@ -55,3 +55,51 @@ blk-mq-include-all-present-cpus-in-the-default-queue-mapping.patch blk-mq-create-hctx-for-each-present-cpu.patch block-disable-runtime-pm-for-blk-mq.patch saa7164-fix-double-fetch-pcie-access-condition.patch +sctp-fix-an-array-overflow-when-all-ext-chunks-are-set.patch +tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch +tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch +tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch +tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch +tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch +ipv4-ipv6-initialize-treq-txhash-in-cookie_v_check.patch +wireless-wext-terminate-ifr-name-coming-from-userspace.patch +net-zero-terminate-ifr_name-in-dev_ifname.patch +net-dsa-mv88e6xxx-enable-cmode-config-support-for-6390x.patch +revert-rtnetlink-do-not-generate-notifications-for-changeaddr-event.patch +ipv6-avoid-overflow-of-offset-in-ip6_find_1stfragopt.patch +net-dsa-b53-add-missing-arl-entries-for-bcm53125.patch +ipv4-initialize-fib_trie-prior-to-register_netdev_notifier-call.patch +rtnetlink-allocate-more-memory-for-dev_set_mac_address.patch +net-bonding-fix-transmit-load-balancing-in-balance-alb-mode.patch +mcs7780-fix-initialization-when-config_vmap_stack-is-enabled.patch +openvswitch-fix-potential-out-of-bound-access-in-parse_ct.patch +packet-fix-use-after-free-in-prb_retire_rx_blk_timer_expired.patch +ipv6-don-t-increase-ipstats_mib_fragfails-twice-in-ip6_fragment.patch +net-ethernet-nb8800-handle-all-4-rgmii-modes-identically.patch +bonding-commit-link-status-change-after-propose.patch +dccp-fix-a-memleak-that-dccp_ipv6-doesn-t-put-reqsk-properly.patch +dccp-fix-a-memleak-that-dccp_ipv4-doesn-t-put-reqsk-properly.patch +dccp-fix-a-memleak-for-dccp_feat_init-err-process.patch +net-mlx5-consider-tx_enabled-in-all-modes-on-remap.patch +net-mlx5-fix-command-completion-after-timeout-access-invalid-structure.patch +net-mlx5-fix-command-bad-flow-on-command-entry-allocation-failure.patch +sctp-don-t-dereference-ptr-before-leaving-_sctp_walk_-params-errors.patch +sctp-fix-the-check-for-_sctp_walk_params-and-_sctp_walk_errors.patch +net-mlx5e-ipoib-modify-add-remove-underlay-qpn-flows.patch +net-mlx5e-fix-outer_header_zero-check-size.patch +net-mlx5-fix-mlx5_ifc_mtpps_reg_bits-structure-size.patch +net-mlx5e-add-field-select-to-mtpps-register.patch +net-mlx5e-fix-broken-disable-1pps-flow.patch +net-mlx5e-change-1pps-out-scheme.patch +net-mlx5e-add-missing-support-for-ptp_clk_req_pps-request.patch +net-mlx5e-fix-wrong-delay-calculation-for-overflow-check-scheduling.patch +net-mlx5e-schedule-overflow-check-work-to-mlx5e-workqueue.patch +net-mlx5-fix-mlx5_add_flow_rules-call-with-correct-num-of-dests.patch +udp6-fix-socket-leak-on-early-demux.patch +net-phy-correctly-process-phy_halted-in-phy_stop_machine.patch +ipv4-fib-fix-null-pointer-deref-during-fib_sync_down_dev.patch +virtio_net-fix-truesize-for-mergeable-buffers.patch +sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch +sparc64-prevent-perf-from-running-during-super-critical-sections.patch +sparc64-register-hugepages-during-arch-init.patch +sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch diff --git a/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch b/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch new file mode 100644 index 00000000000..f2d7951125a --- /dev/null +++ b/queue-4.12/sparc64-fix-exception-handling-in-ultrasparc-iii-memcpy.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Aug 8 16:28:31 PDT 2017 +From: "David S. Miller" +Date: Fri, 4 Aug 2017 09:47:52 -0700 +Subject: sparc64: Fix exception handling in UltraSPARC-III memcpy. + +From: "David S. Miller" + + +[ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ] + +Mikael Pettersson reported that some test programs in the strace-4.18 +testsuite cause an OOPS. + +After some debugging it turns out that garbage values are returned +when an exception occurs, causing the fixup memset() to be run with +bogus arguments. + +The problem is that two of the exception handler stubs write the +successfully copied length into the wrong register. + +Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.") +Reported-by: Mikael Pettersson +Tested-by: Mikael Pettersson +Reviewed-by: Sam Ravnborg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/lib/U3memcpy.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/sparc/lib/U3memcpy.S ++++ b/arch/sparc/lib/U3memcpy.S +@@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) + ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl +- add %o2, GLOBAL_SPARE, %o2 ++ add %o2, GLOBAL_SPARE, %o0 + ENDPROC(U3_retl_o2_and_7_plus_GS) + ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl +- add %o2, GLOBAL_SPARE, %o2 ++ add %o2, GLOBAL_SPARE, %o0 + ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) + #endif + diff --git a/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch b/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch new file mode 100644 index 00000000000..9223c0097b7 --- /dev/null +++ b/queue-4.12/sparc64-measure-receiver-forward-progress-to-avoid-send-mondo-timeout.patch @@ -0,0 +1,335 @@ +From foo@baz Tue Aug 8 16:28:31 PDT 2017 +From: Jane Chu +Date: Tue, 11 Jul 2017 12:00:54 -0600 +Subject: sparc64: Measure receiver forward progress to avoid send mondo timeout + +From: Jane Chu + + +[ Upstream commit 9d53caec84c7c5700e7c1ed744ea584fff55f9ac ] + +A large sun4v SPARC system may have moments of intensive xcall activities, +usually caused by unmapping many pages on many CPUs concurrently. This can +flood receivers with CPU mondo interrupts for an extended period, causing +some unlucky senders to hit send-mondo timeout. This problem gets worse +as cpu count increases because sometimes mappings must be invalidated on +all CPUs, and sometimes all CPUs may gang up on a single CPU. + +But a busy system is not a broken system. In the above scenario, as long +as the receiver is making forward progress processing mondo interrupts, +the sender should continue to retry. + +This patch implements the receiver's forward progress meter by introducing +a per cpu counter 'cpu_mondo_counter[cpu]' where 'cpu' is in the range +of 0..NR_CPUS. The receiver increments its counter as soon as it receives +a mondo and the sender tracks the receiver's counter. If the receiver has +stopped making forward progress when the retry limit is reached, the sender +declares send-mondo-timeout and panic; otherwise, the receiver is allowed +to keep making forward progress. + +In addition, it's been observed that PCIe hotplug events generate Correctable +Errors that are handled by hypervisor and then OS. Hypervisor 'borrows' +a guest cpu strand briefly to provide the service. If the cpu strand is +simultaneously the only cpu targeted by a mondo, it may not be available +for the mondo in 20msec, causing SUN4V mondo timeout. It appears that 1 second +is the agreed wait time between hypervisor and guest OS, this patch makes +the adjustment. + +Orabug: 25476541 +Orabug: 26417466 + +Signed-off-by: Jane Chu +Reviewed-by: Steve Sistare +Reviewed-by: Anthony Yznaga +Reviewed-by: Rob Gardner +Reviewed-by: Thomas Tai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/trap_block.h | 1 + arch/sparc/kernel/smp_64.c | 189 ++++++++++++++++++++++-------------- + arch/sparc/kernel/sun4v_ivec.S | 15 ++ + arch/sparc/kernel/traps_64.c | 1 + 4 files changed, 134 insertions(+), 72 deletions(-) + +--- a/arch/sparc/include/asm/trap_block.h ++++ b/arch/sparc/include/asm/trap_block.h +@@ -54,6 +54,7 @@ extern struct trap_per_cpu trap_block[NR + void init_cur_cpu_trap(struct thread_info *); + void setup_tba(void); + extern int ncpus_probed; ++extern u64 cpu_mondo_counter[NR_CPUS]; + + unsigned long real_hard_smp_processor_id(void); + +--- a/arch/sparc/kernel/smp_64.c ++++ b/arch/sparc/kernel/smp_64.c +@@ -622,22 +622,48 @@ retry: + } + } + +-/* Multi-cpu list version. */ ++#define CPU_MONDO_COUNTER(cpuid) (cpu_mondo_counter[cpuid]) ++#define MONDO_USEC_WAIT_MIN 2 ++#define MONDO_USEC_WAIT_MAX 100 ++#define MONDO_RETRY_LIMIT 500000 ++ ++/* Multi-cpu list version. ++ * ++ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. ++ * Sometimes not all cpus receive the mondo, requiring us to re-send ++ * the mondo until all cpus have received, or cpus are truly stuck ++ * unable to receive mondo, and we timeout. ++ * Occasionally a target cpu strand is borrowed briefly by hypervisor to ++ * perform guest service, such as PCIe error handling. Consider the ++ * service time, 1 second overall wait is reasonable for 1 cpu. ++ * Here two in-between mondo check wait time are defined: 2 usec for ++ * single cpu quick turn around and up to 100usec for large cpu count. ++ * Deliver mondo to large number of cpus could take longer, we adjusts ++ * the retry count as long as target cpus are making forward progress. ++ */ + static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) + { +- int retries, this_cpu, prev_sent, i, saw_cpu_error; ++ int this_cpu, tot_cpus, prev_sent, i, rem; ++ int usec_wait, retries, tot_retries; ++ u16 first_cpu = 0xffff; ++ unsigned long xc_rcvd = 0; + unsigned long status; ++ int ecpuerror_id = 0; ++ int enocpu_id = 0; + u16 *cpu_list; ++ u16 cpu; + + this_cpu = smp_processor_id(); +- + cpu_list = __va(tb->cpu_list_pa); +- +- saw_cpu_error = 0; +- retries = 0; ++ usec_wait = cnt * MONDO_USEC_WAIT_MIN; ++ if (usec_wait > MONDO_USEC_WAIT_MAX) ++ usec_wait = MONDO_USEC_WAIT_MAX; ++ retries = tot_retries = 0; ++ tot_cpus = cnt; + prev_sent = 0; ++ + do { +- int forward_progress, n_sent; ++ int n_sent, mondo_delivered, target_cpu_busy; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, +@@ -645,94 +671,113 @@ static void hypervisor_xcall_deliver(str + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) +- break; ++ goto xcall_done; ++ ++ /* If not these non-fatal errors, panic */ ++ if (unlikely((status != HV_EWOULDBLOCK) && ++ (status != HV_ECPUERROR) && ++ (status != HV_ENOCPU))) ++ goto fatal_errors; + + /* First, see if we made any forward progress. + * ++ * Go through the cpu_list, count the target cpus that have ++ * received our mondo (n_sent), and those that did not (rem). ++ * Re-pack cpu_list with the cpus remain to be retried in the ++ * front - this simplifies tracking the truly stalled cpus. ++ * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. ++ * ++ * EWOULDBLOCK means some target cpus did not receive the ++ * mondo and retry usually helps. ++ * ++ * ECPUERROR means at least one target cpu is in error state, ++ * it's usually safe to skip the faulty cpu and retry. ++ * ++ * ENOCPU means one of the target cpu doesn't belong to the ++ * domain, perhaps offlined which is unexpected, but not ++ * fatal and it's okay to skip the offlined cpu. + */ ++ rem = 0; + n_sent = 0; + for (i = 0; i < cnt; i++) { +- if (likely(cpu_list[i] == 0xffff)) ++ cpu = cpu_list[i]; ++ if (likely(cpu == 0xffff)) { + n_sent++; ++ } else if ((status == HV_ECPUERROR) && ++ (sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) { ++ ecpuerror_id = cpu + 1; ++ } else if (status == HV_ENOCPU && !cpu_online(cpu)) { ++ enocpu_id = cpu + 1; ++ } else { ++ cpu_list[rem++] = cpu; ++ } + } + +- forward_progress = 0; +- if (n_sent > prev_sent) +- forward_progress = 1; ++ /* No cpu remained, we're done. */ ++ if (rem == 0) ++ break; + +- prev_sent = n_sent; ++ /* Otherwise, update the cpu count for retry. */ ++ cnt = rem; + +- /* If we get a HV_ECPUERROR, then one or more of the cpus +- * in the list are in error state. Use the cpu_state() +- * hypervisor call to find out which cpus are in error state. ++ /* Record the overall number of mondos received by the ++ * first of the remaining cpus. + */ +- if (unlikely(status == HV_ECPUERROR)) { +- for (i = 0; i < cnt; i++) { +- long err; +- u16 cpu; +- +- cpu = cpu_list[i]; +- if (cpu == 0xffff) +- continue; +- +- err = sun4v_cpu_state(cpu); +- if (err == HV_CPU_STATE_ERROR) { +- saw_cpu_error = (cpu + 1); +- cpu_list[i] = 0xffff; +- } +- } +- } else if (unlikely(status != HV_EWOULDBLOCK)) +- goto fatal_mondo_error; ++ if (first_cpu != cpu_list[0]) { ++ first_cpu = cpu_list[0]; ++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu); ++ } + +- /* Don't bother rewriting the CPU list, just leave the +- * 0xffff and non-0xffff entries in there and the +- * hypervisor will do the right thing. +- * +- * Only advance timeout state if we didn't make any +- * forward progress. ++ /* Was any mondo delivered successfully? */ ++ mondo_delivered = (n_sent > prev_sent); ++ prev_sent = n_sent; ++ ++ /* or, was any target cpu busy processing other mondos? */ ++ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu)); ++ xc_rcvd = CPU_MONDO_COUNTER(first_cpu); ++ ++ /* Retry count is for no progress. If we're making progress, ++ * reset the retry count. + */ +- if (unlikely(!forward_progress)) { +- if (unlikely(++retries > 10000)) +- goto fatal_mondo_timeout; +- +- /* Delay a little bit to let other cpus catch up +- * on their cpu mondo queue work. +- */ +- udelay(2 * cnt); ++ if (likely(mondo_delivered || target_cpu_busy)) { ++ tot_retries += retries; ++ retries = 0; ++ } else if (unlikely(retries > MONDO_RETRY_LIMIT)) { ++ goto fatal_mondo_timeout; + } +- } while (1); + +- if (unlikely(saw_cpu_error)) +- goto fatal_mondo_cpu_error; ++ /* Delay a little bit to let other cpus catch up on ++ * their cpu mondo queue work. ++ */ ++ if (!mondo_delivered) ++ udelay(usec_wait); + +- return; ++ retries++; ++ } while (1); + +-fatal_mondo_cpu_error: +- printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " +- "(including %d) were in error state\n", +- this_cpu, saw_cpu_error - 1); ++xcall_done: ++ if (unlikely(ecpuerror_id > 0)) { ++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n", ++ this_cpu, ecpuerror_id - 1); ++ } else if (unlikely(enocpu_id > 0)) { ++ pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n", ++ this_cpu, enocpu_id - 1); ++ } + return; + ++fatal_errors: ++ /* fatal errors include bad alignment, etc */ ++ pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n", ++ this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa); ++ panic("Unexpected SUN4V mondo error %lu\n", status); ++ + fatal_mondo_timeout: +- printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " +- " progress after %d retries.\n", +- this_cpu, retries); +- goto dump_cpu_list_and_out; +- +-fatal_mondo_error: +- printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", +- this_cpu, status); +- printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " +- "mondo_block_pa(%lx)\n", +- this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); +- +-dump_cpu_list_and_out: +- printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); +- for (i = 0; i < cnt; i++) +- printk("%u ", cpu_list[i]); +- printk("]\n"); ++ /* some cpus being non-responsive to the cpu mondo */ ++ pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n", ++ this_cpu, first_cpu, (tot_retries + retries), tot_cpus); ++ panic("SUN4V mondo timeout panic\n"); + } + + static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); +--- a/arch/sparc/kernel/sun4v_ivec.S ++++ b/arch/sparc/kernel/sun4v_ivec.S +@@ -26,6 +26,21 @@ sun4v_cpu_mondo: + ldxa [%g0] ASI_SCRATCHPAD, %g4 + sub %g4, TRAP_PER_CPU_FAULT_INFO, %g4 + ++ /* Get smp_processor_id() into %g3 */ ++ sethi %hi(trap_block), %g5 ++ or %g5, %lo(trap_block), %g5 ++ sub %g4, %g5, %g3 ++ srlx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 ++ ++ /* Increment cpu_mondo_counter[smp_processor_id()] */ ++ sethi %hi(cpu_mondo_counter), %g5 ++ or %g5, %lo(cpu_mondo_counter), %g5 ++ sllx %g3, 3, %g3 ++ add %g5, %g3, %g5 ++ ldx [%g5], %g3 ++ add %g3, 1, %g3 ++ stx %g3, [%g5] ++ + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + +--- a/arch/sparc/kernel/traps_64.c ++++ b/arch/sparc/kernel/traps_64.c +@@ -2733,6 +2733,7 @@ void do_getpsr(struct pt_regs *regs) + } + } + ++u64 cpu_mondo_counter[NR_CPUS] = {0}; + struct trap_per_cpu trap_block[NR_CPUS]; + EXPORT_SYMBOL(trap_block); + diff --git a/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch b/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch new file mode 100644 index 00000000000..42914b34b3d --- /dev/null +++ b/queue-4.12/sparc64-prevent-perf-from-running-during-super-critical-sections.patch @@ -0,0 +1,137 @@ +From foo@baz Tue Aug 8 16:28:31 PDT 2017 +From: Rob Gardner +Date: Mon, 17 Jul 2017 09:22:27 -0600 +Subject: sparc64: Prevent perf from running during super critical sections + +From: Rob Gardner + + +[ Upstream commit fc290a114fc6034b0f6a5a46e2fb7d54976cf87a ] + +This fixes another cause of random segfaults and bus errors that may +occur while running perf with the callgraph option. + +Critical sections beginning with spin_lock_irqsave() raise the interrupt +level to PIL_NORMAL_MAX (14) and intentionally do not block performance +counter interrupts, which arrive at PIL_NMI (15). + +But some sections of code are "super critical" with respect to perf +because the perf_callchain_user() path accesses user space and may cause +TLB activity as well as faults as it unwinds the user stack. + +One particular critical section occurs in switch_mm: + + spin_lock_irqsave(&mm->context.lock, flags); + ... + load_secondary_context(mm); + tsb_context_switch(mm); + ... + spin_unlock_irqrestore(&mm->context.lock, flags); + +If a perf interrupt arrives in between load_secondary_context() and +tsb_context_switch(), then perf_callchain_user() could execute with +the context ID of one process, but with an active TSB for a different +process. When the user stack is accessed, it is very likely to +incur a TLB miss, since the h/w context ID has been changed. The TLB +will then be reloaded with a translation from the TSB for one process, +but using a context ID for another process. This exposes memory from +one process to another, and since it is a mapping for stack memory, +this usually causes the new process to crash quickly. + +This super critical section needs more protection than is provided +by spin_lock_irqsave() since perf interrupts must not be allowed in. + +Since __tsb_context_switch already goes through the trouble of +disabling interrupts completely, we fix this by moving the secondary +context load down into this better protected region. + +Orabug: 25577560 + +Signed-off-by: Dave Aldridge +Signed-off-by: Rob Gardner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/mmu_context_64.h | 14 +++++++++----- + arch/sparc/kernel/tsb.S | 12 ++++++++++++ + arch/sparc/power/hibernate.c | 3 +-- + 3 files changed, 22 insertions(+), 7 deletions(-) + +--- a/arch/sparc/include/asm/mmu_context_64.h ++++ b/arch/sparc/include/asm/mmu_context_64.h +@@ -27,9 +27,11 @@ void destroy_context(struct mm_struct *m + void __tsb_context_switch(unsigned long pgd_pa, + struct tsb_config *tsb_base, + struct tsb_config *tsb_huge, +- unsigned long tsb_descr_pa); ++ unsigned long tsb_descr_pa, ++ unsigned long secondary_ctx); + +-static inline void tsb_context_switch(struct mm_struct *mm) ++static inline void tsb_context_switch_ctx(struct mm_struct *mm, ++ unsigned long ctx) + { + __tsb_context_switch(__pa(mm->pgd), + &mm->context.tsb_block[MM_TSB_BASE], +@@ -40,9 +42,12 @@ static inline void tsb_context_switch(st + #else + NULL + #endif +- , __pa(&mm->context.tsb_descr[MM_TSB_BASE])); ++ , __pa(&mm->context.tsb_descr[MM_TSB_BASE]), ++ ctx); + } + ++#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0) ++ + void tsb_grow(struct mm_struct *mm, + unsigned long tsb_index, + unsigned long mm_rss); +@@ -112,8 +117,7 @@ static inline void switch_mm(struct mm_s + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ +- load_secondary_context(mm); +- tsb_context_switch(mm); ++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); + + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the +--- a/arch/sparc/kernel/tsb.S ++++ b/arch/sparc/kernel/tsb.S +@@ -360,6 +360,7 @@ tsb_flush: + * %o1: TSB base config pointer + * %o2: TSB huge config pointer, or NULL if none + * %o3: Hypervisor TSB descriptor physical address ++ * %o4: Secondary context to load, if non-zero + * + * We have to run this whole thing with interrupts + * disabled so that the current cpu doesn't change +@@ -372,6 +373,17 @@ __tsb_context_switch: + rdpr %pstate, %g1 + wrpr %g1, PSTATE_IE, %pstate + ++ brz,pn %o4, 1f ++ mov SECONDARY_CONTEXT, %o5 ++ ++661: stxa %o4, [%o5] ASI_DMMU ++ .section .sun4v_1insn_patch, "ax" ++ .word 661b ++ stxa %o4, [%o5] ASI_MMU ++ .previous ++ flush %g6 ++ ++1: + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + + stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] +--- a/arch/sparc/power/hibernate.c ++++ b/arch/sparc/power/hibernate.c +@@ -35,6 +35,5 @@ void restore_processor_state(void) + { + struct mm_struct *mm = current->active_mm; + +- load_secondary_context(mm); +- tsb_context_switch(mm); ++ tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context)); + } diff --git a/queue-4.12/sparc64-register-hugepages-during-arch-init.patch b/queue-4.12/sparc64-register-hugepages-during-arch-init.patch new file mode 100644 index 00000000000..dbce71aecd5 --- /dev/null +++ b/queue-4.12/sparc64-register-hugepages-during-arch-init.patch @@ -0,0 +1,77 @@ +From foo@baz Tue Aug 8 16:28:31 PDT 2017 +From: Nitin Gupta +Date: Wed, 19 Jul 2017 17:12:54 -0700 +Subject: sparc64: Register hugepages during arch init + +From: Nitin Gupta + + +[ Upstream commit 8399e4b88a93fc7bc00fff3b8da9b2e718b7f45e ] + +Add hstate for each supported hugepage size using +arch initcall. This change fixes some hugepage +parameter parsing inconsistencies: + +case 1: no hugepage parameters + + Without hugepage parameters, only a hugepages-8192kB entry is visible + in sysfs. It's different from x86_64 where both 2M and 1G hugepage + sizes are available. + +case 2: default_hugepagesz=[64K|256M|2G] + + When specifying only a default_hugepagesz parameter, the default + hugepage size isn't really changed and it stays at 8M. This is again + different from x86_64. + +Orabug: 25869946 + +Reviewed-by: Bob Picco +Signed-off-by: Nitin Gupta +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/mm/init_64.c | 25 ++++++++++++++++++++++++- + 1 file changed, 24 insertions(+), 1 deletion(-) + +--- a/arch/sparc/mm/init_64.c ++++ b/arch/sparc/mm/init_64.c +@@ -325,6 +325,29 @@ static void __update_mmu_tsb_insert(stru + } + + #ifdef CONFIG_HUGETLB_PAGE ++static void __init add_huge_page_size(unsigned long size) ++{ ++ unsigned int order; ++ ++ if (size_to_hstate(size)) ++ return; ++ ++ order = ilog2(size) - PAGE_SHIFT; ++ hugetlb_add_hstate(order); ++} ++ ++static int __init hugetlbpage_init(void) ++{ ++ add_huge_page_size(1UL << HPAGE_64K_SHIFT); ++ add_huge_page_size(1UL << HPAGE_SHIFT); ++ add_huge_page_size(1UL << HPAGE_256MB_SHIFT); ++ add_huge_page_size(1UL << HPAGE_2GB_SHIFT); ++ ++ return 0; ++} ++ ++arch_initcall(hugetlbpage_init); ++ + static int __init setup_hugepagesz(char *string) + { + unsigned long long hugepage_size; +@@ -364,7 +387,7 @@ static int __init setup_hugepagesz(char + goto out; + } + +- hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT); ++ add_huge_page_size(hugepage_size); + rc = 1; + + out: diff --git a/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch b/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch new file mode 100644 index 00000000000..d59736b9e68 --- /dev/null +++ b/queue-4.12/tcp_bbr-cut-pacing-rate-only-if-filled-pipe.patch @@ -0,0 +1,48 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Neal Cardwell +Date: Fri, 14 Jul 2017 17:49:21 -0400 +Subject: tcp_bbr: cut pacing rate only if filled pipe + +From: Neal Cardwell + + +[ Upstream commit 4aea287e90dd61a48268ff2994b56f9799441b62 ] + +In bbr_set_pacing_rate(), which decides whether to cut the pacing +rate, there was some code that considered exiting STARTUP to be +equivalent to the notion of filling the pipe (i.e., +bbr_full_bw_reached()). Specifically, as the code was structured, +exiting STARTUP and going into PROBE_RTT could cause us to cut the +pacing rate down to something silly and low, based on whatever +bandwidth samples we've had so far, when it's possible that all of +them have been small app-limited bandwidth samples that are not +representative of the bandwidth available in the path. (The code was +correct at the time it was written, but the state machine changed +without this spot being adjusted correspondingly.) + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -221,12 +221,11 @@ static u64 bbr_rate_bytes_per_sec(struct + */ + static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) + { +- struct bbr *bbr = inet_csk_ca(sk); + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); +- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) ++ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; + } + diff --git a/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch b/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch new file mode 100644 index 00000000000..dd8fa7d4679 --- /dev/null +++ b/queue-4.12/tcp_bbr-init-pacing-rate-on-first-rtt-sample.patch @@ -0,0 +1,76 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Neal Cardwell +Date: Fri, 14 Jul 2017 17:49:25 -0400 +Subject: tcp_bbr: init pacing rate on first RTT sample + +From: Neal Cardwell + + +[ Upstream commit 32984565574da7ed3afa10647bb4020d7a9e6c93 ] + +Fixes the following behavior: for connections that had no RTT sample +at the time of initializing congestion control, BBR was initializing +the pacing rate to a high nominal rate (based an a guess of RTT=1ms, +in case this is LAN traffic). Then BBR never adjusted the pacing rate +downward upon obtaining an actual RTT sample, if the connection never +filled the pipe (e.g. all sends were small app-limited writes()). + +This fix adjusts the pacing rate upon obtaining the first RTT sample. + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -113,7 +113,8 @@ struct bbr { + cwnd_gain:10, /* current gain for setting cwnd */ + full_bw_cnt:3, /* number of rounds without large bw gains */ + cycle_idx:3, /* current index in pacing_gain cycle array */ +- unused_b:6; ++ has_seen_rtt:1, /* have we seen an RTT sample yet? */ ++ unused_b:5; + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ + }; +@@ -226,11 +227,13 @@ static u32 bbr_bw_to_pacing_rate(struct + static void bbr_init_pacing_rate_from_rtt(struct sock *sk) + { + struct tcp_sock *tp = tcp_sk(sk); ++ struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); ++ bbr->has_seen_rtt = 1; + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } +@@ -248,8 +251,12 @@ static void bbr_init_pacing_rate_from_rt + */ + static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) + { ++ struct tcp_sock *tp = tcp_sk(sk); ++ struct bbr *bbr = inet_csk_ca(sk); + u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); + ++ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) ++ bbr_init_pacing_rate_from_rtt(sk); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; + } +@@ -838,6 +845,7 @@ static void bbr_init(struct sock *sk) + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + ++ bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->restore_cwnd = 0; diff --git a/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch b/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch new file mode 100644 index 00000000000..7810ebb8a9b --- /dev/null +++ b/queue-4.12/tcp_bbr-introduce-bbr_bw_to_pacing_rate-helper.patch @@ -0,0 +1,55 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Neal Cardwell +Date: Fri, 14 Jul 2017 17:49:22 -0400 +Subject: tcp_bbr: introduce bbr_bw_to_pacing_rate() helper + +From: Neal Cardwell + + +[ Upstream commit f19fd62dafaf1ed6cf615dba655b82fa9df59074 ] + +Introduce a helper to convert a BBR bandwidth and gain factor to a +pacing rate in bytes per second. This is a pure refactor, but is +needed for two following fixes. + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -212,6 +212,16 @@ static u64 bbr_rate_bytes_per_sec(struct + return rate >> BW_SCALE; + } + ++/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ ++static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) ++{ ++ u64 rate = bw; ++ ++ rate = bbr_rate_bytes_per_sec(sk, rate, gain); ++ rate = min_t(u64, rate, sk->sk_max_pacing_rate); ++ return rate; ++} ++ + /* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the +@@ -221,10 +231,8 @@ static u64 bbr_rate_bytes_per_sec(struct + */ + static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) + { +- u64 rate = bw; ++ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); + +- rate = bbr_rate_bytes_per_sec(sk, rate, gain); +- rate = min_t(u64, rate, sk->sk_max_pacing_rate); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; + } diff --git a/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch b/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch new file mode 100644 index 00000000000..8d567ac19a6 --- /dev/null +++ b/queue-4.12/tcp_bbr-introduce-bbr_init_pacing_rate_from_rtt-helper.patch @@ -0,0 +1,71 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Neal Cardwell +Date: Fri, 14 Jul 2017 17:49:23 -0400 +Subject: tcp_bbr: introduce bbr_init_pacing_rate_from_rtt() helper + +From: Neal Cardwell + + +[ Upstream commit 79135b89b8af304456bd67916b80116ddf03d7b6 ] + +Introduce a helper to initialize the BBR pacing rate unconditionally, +based on the current cwnd and RTT estimate. This is a pure refactor, +but is needed for two following fixes. + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 23 ++++++++++++++++++----- + 1 file changed, 18 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -222,6 +222,23 @@ static u32 bbr_bw_to_pacing_rate(struct + return rate; + } + ++/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ ++static void bbr_init_pacing_rate_from_rtt(struct sock *sk) ++{ ++ struct tcp_sock *tp = tcp_sk(sk); ++ u64 bw; ++ u32 rtt_us; ++ ++ if (tp->srtt_us) { /* any RTT sample yet? */ ++ rtt_us = max(tp->srtt_us >> 3, 1U); ++ } else { /* no RTT sample yet */ ++ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ ++ } ++ bw = (u64)tp->snd_cwnd * BW_UNIT; ++ do_div(bw, rtt_us); ++ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); ++} ++ + /* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the +@@ -806,7 +823,6 @@ static void bbr_init(struct sock *sk) + { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); +- u64 bw; + + bbr->prior_cwnd = 0; + bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ +@@ -822,11 +838,8 @@ static void bbr_init(struct sock *sk) + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + +- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +- bw = (u64)tp->snd_cwnd * BW_UNIT; +- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); + sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ +- bbr_set_pacing_rate(sk, bw, bbr_high_gain); ++ bbr_init_pacing_rate_from_rtt(sk); + + bbr->restore_cwnd = 0; + bbr->round_start = 0; diff --git a/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch b/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch new file mode 100644 index 00000000000..dbd33a50e53 --- /dev/null +++ b/queue-4.12/tcp_bbr-remove-sk_pacing_rate-0-transient-during-init.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Neal Cardwell +Date: Fri, 14 Jul 2017 17:49:24 -0400 +Subject: tcp_bbr: remove sk_pacing_rate=0 transient during init + +From: Neal Cardwell + + +[ Upstream commit 1d3648eb5d1fe9ed3d095ed8fa19ad11ca4c8bc0 ] + +Fix a corner case noticed by Eric Dumazet, where BBR's setting +sk->sk_pacing_rate to 0 during initialization could theoretically +cause packets in the sending host to hang if there were packets "in +flight" in the pacing infrastructure at the time the BBR congestion +control state is initialized. This could occur if the pacing +infrastructure happened to race with bbr_init() in a way such that the +pacer read the 0 rather than the immediately following non-zero pacing +rate. + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Reported-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -838,7 +838,6 @@ static void bbr_init(struct sock *sk) + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + +- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ + bbr_init_pacing_rate_from_rtt(sk); + + bbr->restore_cwnd = 0; diff --git a/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch b/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch new file mode 100644 index 00000000000..72c34f36f6a --- /dev/null +++ b/queue-4.12/udp6-fix-socket-leak-on-early-demux.patch @@ -0,0 +1,127 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: Paolo Abeni +Date: Thu, 27 Jul 2017 14:45:09 +0200 +Subject: udp6: fix socket leak on early demux + +From: Paolo Abeni + + +[ Upstream commit c9f2c1ae123a751d4e4f949144500219354d5ee1 ] + +When an early demuxed packet reaches __udp6_lib_lookup_skb(), the +sk reference is retrieved and used, but the relevant reference +count is leaked and the socket destructor is never called. +Beyond leaking the sk memory, if there are pending UDP packets +in the receive queue, even the related accounted memory is leaked. + +In the long run, this will cause persistent forward allocation errors +and no UDP skbs (both ipv4 and ipv6) will be able to reach the +user-space. + +Fix this by explicitly accessing the early demux reference before +the lookup, and properly decreasing the socket reference count +after usage. + +Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and +the now obsoleted comment about "socket cache". + +The newly added code is derived from the current ipv4 code for the +similar path. + +v1 -> v2: + fixed the __udp6_lib_rcv() return code for resubmission, + as suggested by Eric + +Reported-by: Sam Edwards +Reported-by: Marc Haber +Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast") +Signed-off-by: Paolo Abeni +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/udp.h | 1 + + net/ipv4/udp.c | 3 ++- + net/ipv6/udp.c | 27 ++++++++++++++++++--------- + 3 files changed, 21 insertions(+), 10 deletions(-) + +--- a/include/net/udp.h ++++ b/include/net/udp.h +@@ -265,6 +265,7 @@ static inline struct sk_buff *skb_recv_u + } + + void udp_v4_early_demux(struct sk_buff *skb); ++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst); + int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(const struct sock *, + const struct sock *)); +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1762,7 +1762,7 @@ drop: + /* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use xchg() to guard against concurrent changes. + */ +-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) ++void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) + { + struct dst_entry *old; + +@@ -2120,6 +2120,7 @@ void udp_destroy_sock(struct sock *sk) + encap_destroy(sk); + } + } ++EXPORT_SYMBOL(udp_sk_rx_dst_set); + + /* + * Socket option code for UDP +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_sk + struct udp_table *udptable) + { + const struct ipv6hdr *iph = ipv6_hdr(skb); +- struct sock *sk; + +- sk = skb_steal_sock(skb); +- if (unlikely(sk)) +- return sk; + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + udptable, skb); +@@ -798,6 +794,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, + if (udp6_csum_init(skb, uh, proto)) + goto csum_error; + ++ /* Check if the socket is already available, e.g. due to early demux */ ++ sk = skb_steal_sock(skb); ++ if (sk) { ++ struct dst_entry *dst = skb_dst(skb); ++ int ret; ++ ++ if (unlikely(sk->sk_rx_dst != dst)) ++ udp_sk_rx_dst_set(sk, dst); ++ ++ ret = udpv6_queue_rcv_skb(sk, skb); ++ sock_put(sk); ++ ++ /* a return value > 0 means to resubmit the input */ ++ if (ret > 0) ++ return ret; ++ return 0; ++ } ++ + /* + * Multicast receive code + */ +@@ -806,11 +820,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, + saddr, daddr, udptable, proto); + + /* Unicast */ +- +- /* +- * check socket cache ... must talk to Alan about his plans +- * for sock caches... i'll skip this for now. +- */ + sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + if (sk) { + int ret; diff --git a/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch b/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch new file mode 100644 index 00000000000..973d7107ecb --- /dev/null +++ b/queue-4.12/virtio_net-fix-truesize-for-mergeable-buffers.patch @@ -0,0 +1,61 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: "Michael S. Tsirkin" +Date: Mon, 31 Jul 2017 21:49:49 +0300 +Subject: virtio_net: fix truesize for mergeable buffers + +From: "Michael S. Tsirkin" + + +[ Upstream commit 1daa8790d0280d2c719658e39bd59fce65efa909 ] + +Seth Forshee noticed a performance degradation with some workloads. +This turns out to be due to packet drops. Euan Kemp noticed that this +is because we drop all packets where length exceeds the truesize, but +for some packets we add in extra memory without updating the truesize. +This in turn was kept around unchanged from ab7db91705e95 ("virtio-net: +auto-tune mergeable rx buffer size for improved performance"). That +commit had an internal reason not to account for the extra space: not +enough bits to do it. No longer true so let's account for the allocated +length exactly. + +Many thanks to Seth Forshee for the report and bisecting and Euan Kemp +for debugging the issue. + +Fixes: 680557cf79f8 ("virtio_net: rework mergeable buffer handling") +Reported-by: Euan Kemp +Tested-by: Euan Kemp +Reported-by: Seth Forshee +Tested-by: Seth Forshee +Signed-off-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -889,21 +889,20 @@ static int add_recvbuf_mergeable(struct + + buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; + buf += headroom; /* advance address leaving hole at front of pkt */ +- ctx = (void *)(unsigned long)len; + get_page(alloc_frag->page); + alloc_frag->offset += len + headroom; + hole = alloc_frag->size - alloc_frag->offset; + if (hole < len + headroom) { + /* To avoid internal fragmentation, if there is very likely not + * enough space for another buffer, add the remaining space to +- * the current buffer. This extra space is not included in +- * the truesize stored in ctx. ++ * the current buffer. + */ + len += hole; + alloc_frag->offset += hole; + } + + sg_init_one(rq->sg, buf, len); ++ ctx = (void *)(unsigned long)len; + err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); + if (err < 0) + put_page(virt_to_head_page(buf)); diff --git a/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch b/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch new file mode 100644 index 00000000000..d5ccacce371 --- /dev/null +++ b/queue-4.12/wireless-wext-terminate-ifr-name-coming-from-userspace.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Aug 8 16:27:29 PDT 2017 +From: "Levin, Alexander" +Date: Tue, 18 Jul 2017 04:23:16 +0000 +Subject: wireless: wext: terminate ifr name coming from userspace + +From: "Levin, Alexander" + + +[ Upstream commit 98de4e0ea47d106846fc0e30ce4e644283fa7fc2 ] + +ifr name is assumed to be a valid string by the kernel, but nothing +was forcing username to pass a valid string. + +In turn, this would cause panics as we tried to access the string +past it's valid memory. + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev_ioctl.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/dev_ioctl.c ++++ b/net/core/dev_ioctl.c +@@ -423,6 +423,8 @@ int dev_ioctl(struct net *net, unsigned + if (copy_from_user(&iwr, arg, sizeof(iwr))) + return -EFAULT; + ++ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; ++ + return wext_handle_ioctl(net, &iwr, cmd, arg); + } + -- 2.47.3