From c98ee13176c107dddbfa2d703e090223076a3809 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 May 2020 12:14:36 +0200 Subject: [PATCH] 4.4-stable patches added patches: bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch mlxsw-switchx2-fix-ethernet-port-initialization.patch mlxsw-switchx2-fix-misuse-of-hard_header_len.patch net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch net-mlx4_core-check-device-state-before-unregistering-it.patch net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch net-mlx4_core-fix-access-to-uninitialized-index.patch net-mlx4_core-fix-potential-corruption-in-counters-database.patch net-mlx4_core-fix-query-func-cap-flags.patch net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch net_sched-flower-avoid-dissection-of-unmasked-keys.patch pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch sched-preempt-fix-preempt_count-manipulations.patch --- ...ncharged-during-map-creation-failure.patch | 48 +++++++++ ...hx2-fix-ethernet-port-initialization.patch | 35 ++++++ ...itchx2-fix-misuse-of-hard_header_len.patch | 38 +++++++ ...mode-to-device-managed-flow-steering.patch | 39 +++++++ ...device-state-before-unregistering-it.patch | 36 +++++++ ...l-if-it-has-not-yet-been-initialized.patch | 67 ++++++++++++ ...re-fix-access-to-uninitialized-index.patch | 42 ++++++++ ...tial-corruption-in-counters-database.patch | 44 ++++++++ ...t-mlx4_core-fix-query-func-cap-flags.patch | 58 ++++++++++ ...in-res-tracker-to-conform-to-fw-spec.patch | 42 ++++++++ ...ial-deadlock-in-port-statistics-flow.patch | 101 ++++++++++++++++++ ...ue-of-a-failure-in-vlan-vid-add-kill.patch | 66 ++++++++++++ ...tions-in-rx-rings-after-port-goes-up.patch | 48 +++++++++ ...er-avoid-dissection-of-unmasked-keys.patch | 83 ++++++++++++++ ...-use-proper-locking-in-fq_dump_stats.patch | 64 +++++++++++ ...ed-point-arithmetics-width-confusion.patch | 95 ++++++++++++++++ ...empt-fix-preempt_count-manipulations.patch | 59 ++++++++++ queue-4.4/series | 17 +++ 18 files changed, 982 insertions(+) create mode 100644 queue-4.4/bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch create mode 100644 queue-4.4/mlxsw-switchx2-fix-ethernet-port-initialization.patch create mode 100644 queue-4.4/mlxsw-switchx2-fix-misuse-of-hard_header_len.patch create mode 100644 queue-4.4/net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch create mode 100644 queue-4.4/net-mlx4_core-check-device-state-before-unregistering-it.patch create mode 100644 queue-4.4/net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch create mode 100644 queue-4.4/net-mlx4_core-fix-access-to-uninitialized-index.patch create mode 100644 queue-4.4/net-mlx4_core-fix-potential-corruption-in-counters-database.patch create mode 100644 queue-4.4/net-mlx4_core-fix-query-func-cap-flags.patch create mode 100644 queue-4.4/net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch create mode 100644 queue-4.4/net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch create mode 100644 queue-4.4/net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch create mode 100644 queue-4.4/net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch create mode 100644 queue-4.4/net_sched-flower-avoid-dissection-of-unmasked-keys.patch create mode 100644 queue-4.4/pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch create mode 100644 queue-4.4/sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch create mode 100644 queue-4.4/sched-preempt-fix-preempt_count-manipulations.patch diff --git a/queue-4.4/bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch b/queue-4.4/bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch new file mode 100644 index 00000000000..e88be1e950d --- /dev/null +++ b/queue-4.4/bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch @@ -0,0 +1,48 @@ +From 20b2b24f91f70e7d3f0918c077546cb21bd73a87 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Fri, 4 Nov 2016 00:56:31 +0100 +Subject: bpf: fix map not being uncharged during map creation failure + +From: Daniel Borkmann + +commit 20b2b24f91f70e7d3f0918c077546cb21bd73a87 upstream. + +In map_create(), we first find and create the map, then once that +suceeded, we charge it to the user's RLIMIT_MEMLOCK, and then fetch +a new anon fd through anon_inode_getfd(). The problem is, once the +latter fails f.e. due to RLIMIT_NOFILE limit, then we only destruct +the map via map->ops->map_free(), but without uncharging the previously +locked memory first. That means that the user_struct allocation is +leaked as well as the accounted RLIMIT_MEMLOCK memory not released. +Make the label names in the fix consistent with bpf_prog_load(). + +Fixes: aaac3ba95e4c ("bpf: charge user for creation of BPF maps and programs") +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/syscall.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -152,7 +152,7 @@ static int map_create(union bpf_attr *at + + err = bpf_map_charge_memlock(map); + if (err) +- goto free_map; ++ goto free_map_nouncharge; + + err = bpf_map_new_fd(map); + if (err < 0) +@@ -162,6 +162,8 @@ static int map_create(union bpf_attr *at + return err; + + free_map: ++ bpf_map_uncharge_memlock(map); ++free_map_nouncharge: + map->ops->map_free(map); + return err; + } diff --git a/queue-4.4/mlxsw-switchx2-fix-ethernet-port-initialization.patch b/queue-4.4/mlxsw-switchx2-fix-ethernet-port-initialization.patch new file mode 100644 index 00000000000..d3a808cfc69 --- /dev/null +++ b/queue-4.4/mlxsw-switchx2-fix-ethernet-port-initialization.patch @@ -0,0 +1,35 @@ +From 7fb6a36bab6b0b158f93eb13faa1b440f8b26009 Mon Sep 17 00:00:00 2001 +From: Elad Raz +Date: Thu, 20 Oct 2016 16:05:44 +0200 +Subject: mlxsw: switchx2: Fix ethernet port initialization + +From: Elad Raz + +commit 7fb6a36bab6b0b158f93eb13faa1b440f8b26009 upstream. + +When creating an ethernet port fails, we must move the port to disable, +otherwise putting the port in switch partition 0 (ETH) or 1 (IB) will +always fails. + +Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") +Signed-off-by: Elad Raz +Reviewed-by: Jiri Pirko +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +@@ -1074,6 +1074,7 @@ err_port_stp_state_set: + err_port_admin_status_set: + err_port_mtu_set: + err_port_speed_set: ++ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); + err_port_swid_set: + err_port_system_port_mapping_set: + port_not_usable: diff --git a/queue-4.4/mlxsw-switchx2-fix-misuse-of-hard_header_len.patch b/queue-4.4/mlxsw-switchx2-fix-misuse-of-hard_header_len.patch new file mode 100644 index 00000000000..481bb00a578 --- /dev/null +++ b/queue-4.4/mlxsw-switchx2-fix-misuse-of-hard_header_len.patch @@ -0,0 +1,38 @@ +From 251d41c58b765f00d73b1b4230cad256e25f2735 Mon Sep 17 00:00:00 2001 +From: Yotam Gigi +Date: Tue, 4 Oct 2016 09:46:05 +0200 +Subject: mlxsw: switchx2: Fix misuse of hard_header_len + +From: Yotam Gigi + +commit 251d41c58b765f00d73b1b4230cad256e25f2735 upstream. + +In order to specify that the mlxsw switchx2 driver needs additional +headroom for packets, there have been use of the hard_header_len field of +the netdevice struct. + +This commit changes that to use needed_headroom instead, as this is the +correct way to do that. + +Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") +Signed-off-by: Yotam Gigi +Acked-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +@@ -993,7 +993,7 @@ static int mlxsw_sx_port_create(struct m + /* Each packet needs to have a Tx header (metadata) on top all other + * headers. + */ +- dev->hard_header_len += MLXSW_TXHDR_LEN; ++ dev->needed_headroom = MLXSW_TXHDR_LEN; + + err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable); + if (err) { diff --git a/queue-4.4/net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch b/queue-4.4/net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch new file mode 100644 index 00000000000..50f52e66325 --- /dev/null +++ b/queue-4.4/net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch @@ -0,0 +1,39 @@ +From 44b911e77793d686b481608770d0c55c18055ba0 Mon Sep 17 00:00:00 2001 +From: Jack Morgenstein +Date: Sun, 27 Nov 2016 19:20:52 +0200 +Subject: net/mlx4: Fix uninitialized fields in rule when adding promiscuous mode to device managed flow steering + +From: Jack Morgenstein + +commit 44b911e77793d686b481608770d0c55c18055ba0 upstream. + +In procedure mlx4_flow_steer_promisc_add(), several fields +were left uninitialized in the rule structure. +Correctly initialize these fields. + +Fixes: 592e49dda812 ("net/mlx4: Implement promiscuous mode with device managed flow-steering") +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/mcg.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c ++++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c +@@ -1464,7 +1464,12 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_detach) + int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, + u32 qpn, enum mlx4_net_trans_promisc_mode mode) + { +- struct mlx4_net_trans_rule rule; ++ struct mlx4_net_trans_rule rule = { ++ .queue_mode = MLX4_NET_TRANS_Q_FIFO, ++ .exclusive = 0, ++ .allow_loopback = 1, ++ }; ++ + u64 *regid_p; + + switch (mode) { diff --git a/queue-4.4/net-mlx4_core-check-device-state-before-unregistering-it.patch b/queue-4.4/net-mlx4_core-check-device-state-before-unregistering-it.patch new file mode 100644 index 00000000000..b7e720eeeb6 --- /dev/null +++ b/queue-4.4/net-mlx4_core-check-device-state-before-unregistering-it.patch @@ -0,0 +1,36 @@ +From 9b022a6e0f26af108b9105b16b310393c898d9bd Mon Sep 17 00:00:00 2001 +From: Alex Vesker +Date: Mon, 25 Jul 2016 15:42:13 +0300 +Subject: net/mlx4_core: Check device state before unregistering it + +From: Alex Vesker + +commit 9b022a6e0f26af108b9105b16b310393c898d9bd upstream. + +Verify that the device state is registered before un-registering it. +This check is required to prevent an OOPS on flows that do +re-registration of the device and its previous state was +unregistered. + +Fixes: 225c7b1feef1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") +Signed-off-by: Alex Vesker +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/intf.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx4/intf.c ++++ b/drivers/net/ethernet/mellanox/mlx4/intf.c +@@ -217,6 +217,9 @@ void mlx4_unregister_device(struct mlx4_ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_interface *intf; + ++ if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) ++ return; ++ + mlx4_stop_catas_poll(dev); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION && + mlx4_is_slave(dev)) { diff --git a/queue-4.4/net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch b/queue-4.4/net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch new file mode 100644 index 00000000000..3cfa4604a92 --- /dev/null +++ b/queue-4.4/net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch @@ -0,0 +1,67 @@ +From 81d184199e328fdad5633da139a10337327154e0 Mon Sep 17 00:00:00 2001 +From: Jack Morgenstein +Date: Thu, 27 Oct 2016 16:27:19 +0300 +Subject: net/mlx4_core: Do not access comm channel if it has not yet been initialized + +From: Jack Morgenstein + +commit 81d184199e328fdad5633da139a10337327154e0 upstream. + +In the Hypervisor, there are several FW commands which are invoked +before the comm channel is initialized (in mlx4_multi_func_init). +These include MOD_STAT_CONFIG, QUERY_DEV_CAP, INIT_HCA, and others. + +If any of these commands fails, say with a timeout, the Hypervisor +driver enters the internal error reset flow. In this flow, the driver +attempts to notify all slaves via the comm channel that an internal error +has occurred. + +Since the comm channel has not yet been initialized (i.e., mapped via +ioremap), this will cause dereferencing a NULL pointer. + +To fix this, do not access the comm channel in the internal error flow +if it has not yet been initialized. + +Fixes: 55ad359225b2 ("net/mlx4_core: Enable device recovery flow with SRIOV") +Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet") +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c +@@ -2451,6 +2451,7 @@ err_comm_admin: + kfree(priv->mfunc.master.slave_state); + err_comm: + iounmap(priv->mfunc.comm); ++ priv->mfunc.comm = NULL; + err_vhcr: + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, +@@ -2518,6 +2519,13 @@ void mlx4_report_internal_err_comm_event + int slave; + u32 slave_read; + ++ /* If the comm channel has not yet been initialized, ++ * skip reporting the internal error event to all ++ * the communication channels. ++ */ ++ if (!priv->mfunc.comm) ++ return; ++ + /* Report an internal error event to all + * communication channels. + */ +@@ -2552,6 +2560,7 @@ void mlx4_multi_func_cleanup(struct mlx4 + } + + iounmap(priv->mfunc.comm); ++ priv->mfunc.comm = NULL; + } + + void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) diff --git a/queue-4.4/net-mlx4_core-fix-access-to-uninitialized-index.patch b/queue-4.4/net-mlx4_core-fix-access-to-uninitialized-index.patch new file mode 100644 index 00000000000..8caced661c2 --- /dev/null +++ b/queue-4.4/net-mlx4_core-fix-access-to-uninitialized-index.patch @@ -0,0 +1,42 @@ +From 2bb07e155bb3e0c722c806723f737cf8020961ef Mon Sep 17 00:00:00 2001 +From: Tariq Toukan +Date: Sun, 15 May 2016 10:21:26 +0300 +Subject: net/mlx4_core: Fix access to uninitialized index + +From: Tariq Toukan + +commit 2bb07e155bb3e0c722c806723f737cf8020961ef upstream. + +Prevent using uninitialized or negative index when handling +steering entries. + +Fixes: b12d93d63c32 ('mlx4: Add support for promiscuous mode in the new steering model.') +Signed-off-by: Tariq Toukan +Reported-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/mcg.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c ++++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c +@@ -1109,7 +1109,7 @@ int mlx4_qp_attach_common(struct mlx4_de + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mgm *mgm; + u32 members_count; +- int index, prev; ++ int index = -1, prev; + int link = 0; + int i; + int err; +@@ -1188,7 +1188,7 @@ int mlx4_qp_attach_common(struct mlx4_de + goto out; + + out: +- if (prot == MLX4_PROT_ETH) { ++ if (prot == MLX4_PROT_ETH && index != -1) { + /* manage the steering entry for promisc mode */ + if (new_entry) + err = new_steering_entry(dev, port, steer, diff --git a/queue-4.4/net-mlx4_core-fix-potential-corruption-in-counters-database.patch b/queue-4.4/net-mlx4_core-fix-potential-corruption-in-counters-database.patch new file mode 100644 index 00000000000..2d57f7d7e7d --- /dev/null +++ b/queue-4.4/net-mlx4_core-fix-potential-corruption-in-counters-database.patch @@ -0,0 +1,44 @@ +From 6b94bab0ee8d5def6a2aac0ef6204ee6e24386b6 Mon Sep 17 00:00:00 2001 +From: Eran Ben Elisha +Date: Wed, 17 Feb 2016 17:24:24 +0200 +Subject: net/mlx4_core: Fix potential corruption in counters database + +From: Eran Ben Elisha + +commit 6b94bab0ee8d5def6a2aac0ef6204ee6e24386b6 upstream. + +The error flow in procedure handle_existing_counter() is wrong. + +The procedure should exit after encountering the error, not continue +as if everything is OK. + +Fixes: 68230242cdbc ('net/mlx4_core: Add port attribute when tracking counters') +Signed-off-by: Eran Ben Elisha +Signed-off-by: Jack Morgenstein +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c ++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +@@ -918,11 +918,13 @@ static int handle_existing_counter(struc + + spin_lock_irq(mlx4_tlock(dev)); + r = find_res(dev, counter_index, RES_COUNTER); +- if (!r || r->owner != slave) ++ if (!r || r->owner != slave) { + ret = -EINVAL; +- counter = container_of(r, struct res_counter, com); +- if (!counter->port) +- counter->port = port; ++ } else { ++ counter = container_of(r, struct res_counter, com); ++ if (!counter->port) ++ counter->port = port; ++ } + + spin_unlock_irq(mlx4_tlock(dev)); + return ret; diff --git a/queue-4.4/net-mlx4_core-fix-query-func-cap-flags.patch b/queue-4.4/net-mlx4_core-fix-query-func-cap-flags.patch new file mode 100644 index 00000000000..7012f798c6f --- /dev/null +++ b/queue-4.4/net-mlx4_core-fix-query-func-cap-flags.patch @@ -0,0 +1,58 @@ +From c9cc599a96a6822c52cd72ed31dd7f813d792b4f Mon Sep 17 00:00:00 2001 +From: Moshe Shemesh +Date: Thu, 22 Sep 2016 12:11:12 +0300 +Subject: net/mlx4_core: Fix QUERY FUNC CAP flags + +From: Moshe Shemesh + +commit c9cc599a96a6822c52cd72ed31dd7f813d792b4f upstream. + +Separate QUERY_FUNC_CAP flags0 from QUERY_FUNC_CAP flags, as 'flags' is +already used for another set of flags in FUNC CAP, while phv bit should be +part of a different set of flags. +Remove QUERY_FUNC_CAP port_flags field, as it is not in use. + +Fixes: 77fc29c4bbbb ('net/mlx4_core: Preparations for 802.1ad VLAN support') +Fixes: 5cc914f10851 ('mlx4_core: Added FW commands and their wrappers for supporting SRIOV') +Signed-off-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/fw.c | 5 ++--- + drivers/net/ethernet/mellanox/mlx4/fw.h | 2 +- + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx4/fw.c +@@ -610,8 +610,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev + MLX4_GET(func_cap->phys_port_id, outbox, + QUERY_FUNC_CAP_PHYS_PORT_ID); + +- MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); +- func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); ++ MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + + /* All other resources are allocated by the master, but we still report + * 'num' and 'reserved' capabilities as follows: +@@ -2840,7 +2839,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) +- *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; ++ *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; + return err; + } + EXPORT_SYMBOL(get_phv_bit); +--- a/drivers/net/ethernet/mellanox/mlx4/fw.h ++++ b/drivers/net/ethernet/mellanox/mlx4/fw.h +@@ -150,7 +150,7 @@ struct mlx4_func_cap { + u32 qp1_proxy_qpn; + u32 reserved_lkey; + u8 physical_port; +- u8 port_flags; ++ u8 flags0; + u8 flags1; + u64 phys_port_id; + u32 extra_flags; diff --git a/queue-4.4/net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch b/queue-4.4/net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch new file mode 100644 index 00000000000..7b5917fc256 --- /dev/null +++ b/queue-4.4/net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch @@ -0,0 +1,42 @@ +From aa0c08feae8161b945520ada753d0dfe62b14fe7 Mon Sep 17 00:00:00 2001 +From: Jack Morgenstein +Date: Thu, 27 Oct 2016 16:27:13 +0300 +Subject: net/mlx4_core: Fix the resource-type enum in res tracker to conform to FW spec + +From: Jack Morgenstein + +commit aa0c08feae8161b945520ada753d0dfe62b14fe7 upstream. + +The resource type enum in the resource tracker was incorrect. +RES_EQ was put in the position of RES_NPORT_ID (a FC resource). + +Since the remaining resources maintain their current values, +and RES_EQ is not passed from slaves to the hypervisor in any +FW command, this change affects only the hypervisor. +Therefore, there is no backwards-compatibility issue. + +Fixes: 623ed84b1f95 ("mlx4_core: initial header-file changes for SRIOV support") +Signed-off-by: Jack Morgenstein +Signed-off-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h +@@ -143,9 +143,10 @@ enum mlx4_resource { + RES_MTT, + RES_MAC, + RES_VLAN, +- RES_EQ, ++ RES_NPORT_ID, + RES_COUNTER, + RES_FS_RULE, ++ RES_EQ, + MLX4_NUM_OF_RESOURCE_TYPE + }; + diff --git a/queue-4.4/net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch b/queue-4.4/net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch new file mode 100644 index 00000000000..a191ed3a380 --- /dev/null +++ b/queue-4.4/net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch @@ -0,0 +1,101 @@ +From d2582a03939ed0a80ffcd3ea5345505bc8067c54 Mon Sep 17 00:00:00 2001 +From: Jack Morgenstein +Date: Thu, 27 Oct 2016 16:27:21 +0300 +Subject: net/mlx4_en: Fix potential deadlock in port statistics flow + +From: Jack Morgenstein + +commit d2582a03939ed0a80ffcd3ea5345505bc8067c54 upstream. + +mlx4_en_DUMP_ETH_STATS took the *counter mutex* and then +called the FW command, with WRAPPED attribute. As a result, the fw command +is wrapped on the Hypervisor when it calls mlx4_en_DUMP_ETH_STATS. +The FW command wrapper flow on the hypervisor takes the *slave_cmd_mutex* +during processing. + +At the same time, a VF could be in the process of coming up, and could +call mlx4_QUERY_FUNC_CAP. On the hypervisor, the command flow takes the +*slave_cmd_mutex*, then executes mlx4_QUERY_FUNC_CAP_wrapper. +mlx4_QUERY_FUNC_CAP wrapper calls mlx4_get_default_counter_index(), +which takes the *counter mutex*. DEADLOCK. + +The fix is that the DUMP_ETH_STATS fw command should be called with +the NATIVE attribute, so that on the hypervisor, this command does not +enter the wrapper flow. + +Since the Hypervisor no longer goes through the wrapper code, we also +simply return 0 in mlx4_DUMP_ETH_STATS_wrapper (i.e.the function succeeds, +but the returned data will be all zeroes). +No need to test if it is the Hypervisor going through the wrapper. + +Fixes: f9baff509f8a ("mlx4_core: Add "native" argument to mlx4_cmd ...") +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 ++-- + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 -- + drivers/net/ethernet/mellanox/mlx4/port.c | 13 +------------ + 3 files changed, 3 insertions(+), 16 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c +@@ -164,7 +164,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_e + return PTR_ERR(mailbox); + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, + MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, +- MLX4_CMD_WRAPPED); ++ MLX4_CMD_NATIVE); + if (err) + goto out; + +@@ -325,7 +325,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_e + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, + in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, + 0, MLX4_CMD_DUMP_ETH_STATS, +- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); ++ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + } +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h +@@ -1313,8 +1313,6 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct ml + struct mlx4_cmd_info *cmd); + int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, + int port, void *buf); +-int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod, +- struct mlx4_cmd_mailbox *outbox); + int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, +--- a/drivers/net/ethernet/mellanox/mlx4/port.c ++++ b/drivers/net/ethernet/mellanox/mlx4/port.c +@@ -1155,24 +1155,13 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct ml + return err; + } + +-int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, +- u32 in_mod, struct mlx4_cmd_mailbox *outbox) +-{ +- return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0, +- MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, +- MLX4_CMD_NATIVE); +-} +- + int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) + { +- if (slave != dev->caps.function) +- return 0; +- return mlx4_common_dump_eth_stats(dev, slave, +- vhcr->in_modifier, outbox); ++ return 0; + } + + int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, diff --git a/queue-4.4/net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch b/queue-4.4/net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch new file mode 100644 index 00000000000..2830fd1415f --- /dev/null +++ b/queue-4.4/net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch @@ -0,0 +1,66 @@ +From 93c098af09455ea7bdc6f0f6b08f6ac14fa06cf4 Mon Sep 17 00:00:00 2001 +From: Kamal Heib +Date: Tue, 21 Jun 2016 14:20:02 +0300 +Subject: net/mlx4_en: Fix the return value of a failure in VLAN VID add/kill + +From: Kamal Heib + +commit 93c098af09455ea7bdc6f0f6b08f6ac14fa06cf4 upstream. + +Modify mlx4_en_vlan_rx_[add/kill]_vid to return error value in case of +failure. + +Fixes: 8e586137e6b6 ('net: make vlan ndo_vlan_rx_[add/kill]_vid return error value') +Signed-off-by: Kamal Heib +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -424,14 +424,18 @@ static int mlx4_en_vlan_rx_add_vid(struc + mutex_lock(&mdev->state_lock); + if (mdev->device_up && priv->port_up) { + err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); +- if (err) ++ if (err) { + en_err(priv, "Failed configuring VLAN filter\n"); ++ goto out; ++ } + } +- if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) +- en_dbg(HW, priv, "failed adding vlan %d\n", vid); +- mutex_unlock(&mdev->state_lock); ++ err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx); ++ if (err) ++ en_dbg(HW, priv, "Failed adding vlan %d\n", vid); + +- return 0; ++out: ++ mutex_unlock(&mdev->state_lock); ++ return err; + } + + static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, +@@ -439,7 +443,7 @@ static int mlx4_en_vlan_rx_kill_vid(stru + { + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; +- int err; ++ int err = 0; + + en_dbg(HW, priv, "Killing VID:%d\n", vid); + +@@ -456,7 +460,7 @@ static int mlx4_en_vlan_rx_kill_vid(stru + } + mutex_unlock(&mdev->state_lock); + +- return 0; ++ return err; + } + + static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) diff --git a/queue-4.4/net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch b/queue-4.4/net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch new file mode 100644 index 00000000000..ed4e4effc6d --- /dev/null +++ b/queue-4.4/net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch @@ -0,0 +1,48 @@ +From 8d59de8f7bb3db296331c665779c653b0c8d13ba Mon Sep 17 00:00:00 2001 +From: Erez Shitrit +Date: Thu, 27 Oct 2016 16:27:17 +0300 +Subject: net/mlx4_en: Process all completions in RX rings after port goes up + +From: Erez Shitrit + +commit 8d59de8f7bb3db296331c665779c653b0c8d13ba upstream. + +Currently there is a race between incoming traffic and +initialization flow. HW is able to receive the packets +after INIT_PORT is done and unicast steering is configured. +Before we set priv->port_up NAPI is not scheduled and +receive queues become full. Therefore we never get +new interrupts about the completions. +This issue could happen if running heavy traffic during +bringing port up. +The resolution is to schedule NAPI once port_up is set. +If receive queues were full this will process all cqes +and release them. + +Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") +Signed-off-by: Erez Shitrit +Signed-off-by: Eugenia Emantayev +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -1720,6 +1720,13 @@ int mlx4_en_start_port(struct net_device + vxlan_get_rx_port(dev); + #endif + priv->port_up = true; ++ ++ /* Process all completions if exist to prevent ++ * the queues freezing if they are full ++ */ ++ for (i = 0; i < priv->rx_ring_num; i++) ++ napi_schedule(&priv->rx_cq[i]->napi); ++ + netif_tx_start_all_queues(dev); + netif_device_attach(dev); + diff --git a/queue-4.4/net_sched-flower-avoid-dissection-of-unmasked-keys.patch b/queue-4.4/net_sched-flower-avoid-dissection-of-unmasked-keys.patch new file mode 100644 index 00000000000..239d8f959d9 --- /dev/null +++ b/queue-4.4/net_sched-flower-avoid-dissection-of-unmasked-keys.patch @@ -0,0 +1,83 @@ +From 339ba878cfb01b68de3d281ba33fd5e4c9f76546 Mon Sep 17 00:00:00 2001 +From: Hadar Hen Zion +Date: Wed, 17 Aug 2016 13:36:12 +0300 +Subject: net_sched: flower: Avoid dissection of unmasked keys + +From: Hadar Hen Zion + +commit 339ba878cfb01b68de3d281ba33fd5e4c9f76546 upstream. + +The current flower implementation checks the mask range and set all the +keys included in that range as "used_keys", even if a specific key in +the range has a zero mask. + +This behavior can cause a false positive return value of +dissector_uses_key function and unnecessary dissection in +__skb_flow_dissect. + +This patch checks explicitly the mask of each key and "used_keys" will +be set accordingly. + +Fixes: 77b9900ef53a ('tc: introduce Flower classifier') +Signed-off-by: Hadar Hen Zion +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/sched/cls_flower.c | 28 +++++++++++++--------------- + 1 file changed, 13 insertions(+), 15 deletions(-) + +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -351,12 +351,10 @@ static int fl_init_hashtable(struct cls_ + + #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) + #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) +-#define FL_KEY_MEMBER_END_OFFSET(member) \ +- (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) + +-#define FL_KEY_IN_RANGE(mask, member) \ +- (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ +- FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) ++#define FL_KEY_IS_MASKED(mask, member) \ ++ memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \ ++ 0, FL_KEY_MEMBER_SIZE(member)) \ + + #define FL_KEY_SET(keys, cnt, id, member) \ + do { \ +@@ -365,9 +363,9 @@ static int fl_init_hashtable(struct cls_ + cnt++; \ + } while(0); + +-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ ++#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member) \ + do { \ +- if (FL_KEY_IN_RANGE(mask, member)) \ ++ if (FL_KEY_IS_MASKED(mask, member)) \ + FL_KEY_SET(keys, cnt, id, member); \ + } while(0); + +@@ -379,14 +377,14 @@ static void fl_init_dissector(struct cls + + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); +- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, +- FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); +- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, +- FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); +- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, +- FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); +- FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, +- FLOW_DISSECTOR_KEY_PORTS, tp); ++ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, ++ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); ++ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, ++ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); ++ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, ++ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); ++ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, ++ FLOW_DISSECTOR_KEY_PORTS, tp); + + skb_flow_dissector_init(&head->dissector, keys, cnt); + } diff --git a/queue-4.4/pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch b/queue-4.4/pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch new file mode 100644 index 00000000000..3e6639c275c --- /dev/null +++ b/queue-4.4/pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch @@ -0,0 +1,64 @@ +From 695b4ec0f0a9cf29deabd3ac075911d58b31f42b Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 15 Sep 2016 16:20:01 -0700 +Subject: pkt_sched: fq: use proper locking in fq_dump_stats() + +From: Eric Dumazet + +commit 695b4ec0f0a9cf29deabd3ac075911d58b31f42b upstream. + +When fq is used on 32bit kernels, we need to lock the qdisc before +copying 64bit fields. + +Otherwise "tc -s qdisc ..." might report bogus values. + +Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/sched/sch_fq.c | 32 ++++++++++++++++++-------------- + 1 file changed, 18 insertions(+), 14 deletions(-) + +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -830,20 +830,24 @@ nla_put_failure: + static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) + { + struct fq_sched_data *q = qdisc_priv(sch); +- u64 now = ktime_get_ns(); +- struct tc_fq_qd_stats st = { +- .gc_flows = q->stat_gc_flows, +- .highprio_packets = q->stat_internal_packets, +- .tcp_retrans = q->stat_tcp_retrans, +- .throttled = q->stat_throttled, +- .flows_plimit = q->stat_flows_plimit, +- .pkts_too_long = q->stat_pkts_too_long, +- .allocation_errors = q->stat_allocation_errors, +- .flows = q->flows, +- .inactive_flows = q->inactive_flows, +- .throttled_flows = q->throttled_flows, +- .time_next_delayed_flow = q->time_next_delayed_flow - now, +- }; ++ struct tc_fq_qd_stats st; ++ ++ sch_tree_lock(sch); ++ ++ st.gc_flows = q->stat_gc_flows; ++ st.highprio_packets = q->stat_internal_packets; ++ st.tcp_retrans = q->stat_tcp_retrans; ++ st.throttled = q->stat_throttled; ++ st.flows_plimit = q->stat_flows_plimit; ++ st.pkts_too_long = q->stat_pkts_too_long; ++ st.allocation_errors = q->stat_allocation_errors; ++ st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); ++ st.flows = q->flows; ++ st.inactive_flows = q->inactive_flows; ++ st.throttled_flows = q->throttled_flows; ++ st.pad = 0; ++ ++ sch_tree_unlock(sch); + + return gnet_stats_copy_app(d, &st, sizeof(st)); + } diff --git a/queue-4.4/sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch b/queue-4.4/sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch new file mode 100644 index 00000000000..740a6838794 --- /dev/null +++ b/queue-4.4/sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch @@ -0,0 +1,95 @@ +From ea1dc6fc6242f991656e35e2ed3d90ec1cd13418 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 24 Jun 2016 16:11:02 +0200 +Subject: sched/fair: Fix calc_cfs_shares() fixed point arithmetics width confusion + +From: Peter Zijlstra + +commit ea1dc6fc6242f991656e35e2ed3d90ec1cd13418 upstream. + +Commit: + + fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") + +did something non-obvious but also did it buggy yet latent. + +The problem was exposed for real by a later commit in the v4.7 merge window: + + 2159197d6677 ("sched/core: Enable increased load resolution on 64-bit kernels") + +... after which tg->load_avg and cfs_rq->load.weight had different +units (10 bit fixed point and 20 bit fixed point resp.). + +Add a comment to explain the use of cfs_rq->load.weight over the +'natural' cfs_rq->avg.load_avg and add scale_load_down() to correct +for the difference in unit. + +Since this is (now, as per a previous commit) the only user of +calc_tg_weight(), collapse it. + +The effects of this bug should be randomly inconsistent SMP-balancing +of cgroups workloads. + +Reported-by: Jirka Hladky +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Fixes: 2159197d6677 ("sched/core: Enable increased load resolution on 64-bit kernels") +Fixes: fde7d22e01aa ("sched/fair: Fix overly small weight for interactive group entities") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/fair.c | 27 +++++++++++---------------- + 1 file changed, 11 insertions(+), 16 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -2394,28 +2394,22 @@ account_entity_dequeue(struct cfs_rq *cf + + #ifdef CONFIG_FAIR_GROUP_SCHED + # ifdef CONFIG_SMP +-static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) ++static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) + { +- long tg_weight; ++ long tg_weight, load, shares; + + /* +- * Use this CPU's real-time load instead of the last load contribution +- * as the updating of the contribution is delayed, and we will use the +- * the real-time load to calc the share. See update_tg_load_avg(). ++ * This really should be: cfs_rq->avg.load_avg, but instead we use ++ * cfs_rq->load.weight, which is its upper bound. This helps ramp up ++ * the shares for small weight interactive tasks. + */ +- tg_weight = atomic_long_read(&tg->load_avg); +- tg_weight -= cfs_rq->tg_load_avg_contrib; +- tg_weight += cfs_rq->load.weight; +- +- return tg_weight; +-} ++ load = scale_load_down(cfs_rq->load.weight); + +-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) +-{ +- long tg_weight, load, shares; ++ tg_weight = atomic_long_read(&tg->load_avg); + +- tg_weight = calc_tg_weight(tg, cfs_rq); +- load = cfs_rq->load.weight; ++ /* Ensure tg_weight >= load */ ++ tg_weight -= cfs_rq->tg_load_avg_contrib; ++ tg_weight += load; + + shares = (tg->shares * load); + if (tg_weight) +@@ -2434,6 +2428,7 @@ static inline long calc_cfs_shares(struc + return tg->shares; + } + # endif /* CONFIG_SMP */ ++ + static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + unsigned long weight) + { diff --git a/queue-4.4/sched-preempt-fix-preempt_count-manipulations.patch b/queue-4.4/sched-preempt-fix-preempt_count-manipulations.patch new file mode 100644 index 00000000000..cb3a5414415 --- /dev/null +++ b/queue-4.4/sched-preempt-fix-preempt_count-manipulations.patch @@ -0,0 +1,59 @@ +From 2e636d5e66c35dfcbaf617aa8fa963f6847478fe Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Mon, 16 May 2016 15:01:11 +0200 +Subject: sched/preempt: Fix preempt_count manipulations + +From: Peter Zijlstra + +commit 2e636d5e66c35dfcbaf617aa8fa963f6847478fe upstream. + +Vikram reported that his ARM64 compiler managed to 'optimize' away the +preempt_count manipulations in code like: + + preempt_enable_no_resched(); + put_user(); + preempt_disable(); + +Irrespective of that fact that that is horrible code that should be +fixed for many reasons, it does highlight a deficiency in the generic +preempt_count manipulators. As it is never right to combine/elide +preempt_count manipulations like this. + +Therefore sprinkle some volatile in the two generic accessors to +ensure the compiler is aware of the fact that the preempt_count is +observed outside of the regular program-order view and thus cannot be +optimized away like this. + +x86; the only arch not using the generic code is not affected as we +do all this in asm in order to use the segment base per-cpu stuff. + +Reported-by: Vikram Mulukutla +Tested-by: Vikram Mulukutla +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Fixes: a787870924db ("sched, arch: Create asm/preempt.h") +Link: http://lkml.kernel.org/r/20160516131751.GH3205@twins.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + include/asm-generic/preempt.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/asm-generic/preempt.h ++++ b/include/asm-generic/preempt.h +@@ -7,10 +7,10 @@ + + static __always_inline int preempt_count(void) + { +- return current_thread_info()->preempt_count; ++ return READ_ONCE(current_thread_info()->preempt_count); + } + +-static __always_inline int *preempt_count_ptr(void) ++static __always_inline volatile int *preempt_count_ptr(void) + { + return ¤t_thread_info()->preempt_count; + } diff --git a/queue-4.4/series b/queue-4.4/series index 8e828f4d7f5..0de5f99f6fe 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -213,3 +213,20 @@ clk-xgene-don-t-call-__pa-on-ioremaped-address.patch cls_bpf-reset-class-and-reuse-major-in-da.patch arm64-bpf-jit-jmp_jset_-x-k.patch bpf-trace-check-event-type-in-bpf_perf_event_read.patch +bpf-fix-map-not-being-uncharged-during-map-creation-failure.patch +net-mlx4_core-fix-potential-corruption-in-counters-database.patch +net-mlx4_core-fix-access-to-uninitialized-index.patch +net-mlx4_en-fix-the-return-value-of-a-failure-in-vlan-vid-add-kill.patch +net-mlx4_core-check-device-state-before-unregistering-it.patch +net-mlx4_core-fix-the-resource-type-enum-in-res-tracker-to-conform-to-fw-spec.patch +net-mlx4_en-process-all-completions-in-rx-rings-after-port-goes-up.patch +net-mlx4_core-do-not-access-comm-channel-if-it-has-not-yet-been-initialized.patch +net-mlx4_en-fix-potential-deadlock-in-port-statistics-flow.patch +net-mlx4-fix-uninitialized-fields-in-rule-when-adding-promiscuous-mode-to-device-managed-flow-steering.patch +net-mlx4_core-fix-query-func-cap-flags.patch +mlxsw-switchx2-fix-misuse-of-hard_header_len.patch +mlxsw-switchx2-fix-ethernet-port-initialization.patch +sched-fair-fix-calc_cfs_shares-fixed-point-arithmetics-width-confusion.patch +net_sched-flower-avoid-dissection-of-unmasked-keys.patch +pkt_sched-fq-use-proper-locking-in-fq_dump_stats.patch +sched-preempt-fix-preempt_count-manipulations.patch -- 2.47.3