From 77d0cf7357560973352e84496a2e16ae31a652dd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 24 Jun 2018 19:16:36 +0800 Subject: [PATCH] 4.16-stable patches added patches: bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch genirq-generic_pending-do-not-lose-pending-affinity-update.patch genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch irq_remapping-use-apic_ack_irq.patch libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch libata-zpodd-small-read-overflow-in-eject_tray.patch nbd-fix-nbd-device-deletion.patch nbd-update-size-when-connected.patch nbd-use-bd_set_size-when-updating-disk-size.patch nvme-pci-sync-controller-reset-for-aer-slot_reset.patch smb3-fix-various-xid-leaks.patch smb3-on-reconnect-set-previoussessionid-field.patch w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch x86-apic-provide-apic_ack_irq.patch x86-apic-vector-prevent-hlist-corruption-and-leaks.patch x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch x86-ioapic-use-apic_ack_irq.patch x86-platform-uv-use-apic_ack_irq.patch x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch --- ...-dedicated-low-concurrency-workqueue.patch | 106 ++++++++++++ ...t_list-entry-only-after-grace-period.patch | 102 ++++++++++++ ...5bb4-adds-a-check-for-session-expiry.patch | 36 +++++ ...d-of-sizeof-fileallinformation-class.patch | 60 +++++++ ...tion-during-limits-updates-via-sysfs.patch | 50 ++++++ ...-detection-logic-in-load-calculation.patch | 71 ++++++++ ...-fix-an-incorrect-error-return-value.patch | 37 +++++ ...affinity-setting-if-irq-chip-is-busy.patch | 94 +++++++++++ ...-do-not-lose-pending-affinity-update.patch | 88 ++++++++++ ...t-of-line-call-if-pending-is-not-set.patch | 72 +++++++++ .../irq_remapping-use-apic_ack_irq.patch | 84 ++++++++++ ...op-sandisk-sd7ub3q-g1001-nolpm-quirk.patch | 56 +++++++ ...dd-small-read-overflow-in-eject_tray.patch | 33 ++++ queue-4.16/nbd-fix-nbd-device-deletion.patch | 39 +++++ .../nbd-update-size-when-connected.patch | 34 ++++ ...-bd_set_size-when-updating-disk-size.patch | 53 ++++++ ...-controller-reset-for-aer-slot_reset.patch | 54 +++++++ queue-4.16/series | 26 +++ queue-4.16/smb3-fix-various-xid-leaks.patch | 152 ++++++++++++++++++ ...econnect-set-previoussessionid-field.patch | 34 ++++ ...ck-before-calling-clk_get_rate-on-it.patch | 68 ++++++++ .../x86-apic-provide-apic_ack_irq.patch | 80 +++++++++ ...r-prevent-hlist-corruption-and-leaks.patch | 71 ++++++++ ...-cmt-and-mbm-on-new-skylake-stepping.patch | 37 +++++ queue-4.16/x86-ioapic-use-apic_ack_irq.patch | 44 +++++ .../x86-platform-uv-use-apic_ack_irq.patch | 57 +++++++ ...-the-args-of-vector_alloc-tracepoint.patch | 35 ++++ 27 files changed, 1673 insertions(+) create mode 100644 queue-4.16/bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch create mode 100644 queue-4.16/blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch create mode 100644 queue-4.16/cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch create mode 100644 queue-4.16/cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch create mode 100644 queue-4.16/cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch create mode 100644 queue-4.16/cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch create mode 100644 queue-4.16/cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch create mode 100644 queue-4.16/genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch create mode 100644 queue-4.16/genirq-generic_pending-do-not-lose-pending-affinity-update.patch create mode 100644 queue-4.16/genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch create mode 100644 queue-4.16/irq_remapping-use-apic_ack_irq.patch create mode 100644 queue-4.16/libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch create mode 100644 queue-4.16/libata-zpodd-small-read-overflow-in-eject_tray.patch create mode 100644 queue-4.16/nbd-fix-nbd-device-deletion.patch create mode 100644 queue-4.16/nbd-update-size-when-connected.patch create mode 100644 queue-4.16/nbd-use-bd_set_size-when-updating-disk-size.patch create mode 100644 queue-4.16/nvme-pci-sync-controller-reset-for-aer-slot_reset.patch create mode 100644 queue-4.16/smb3-fix-various-xid-leaks.patch create mode 100644 queue-4.16/smb3-on-reconnect-set-previoussessionid-field.patch create mode 100644 queue-4.16/w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch create mode 100644 queue-4.16/x86-apic-provide-apic_ack_irq.patch create mode 100644 queue-4.16/x86-apic-vector-prevent-hlist-corruption-and-leaks.patch create mode 100644 queue-4.16/x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch create mode 100644 queue-4.16/x86-ioapic-use-apic_ack_irq.patch create mode 100644 queue-4.16/x86-platform-uv-use-apic_ack_irq.patch create mode 100644 queue-4.16/x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch diff --git a/queue-4.16/bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch b/queue-4.16/bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch new file mode 100644 index 00000000000..f43ae84257f --- /dev/null +++ b/queue-4.16/bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch @@ -0,0 +1,106 @@ +From f183464684190bacbfb14623bd3e4e51b7575b4c Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 23 May 2018 10:56:32 -0700 +Subject: bdi: Move cgroup bdi_writeback to a dedicated low concurrency workqueue + +From: Tejun Heo + +commit f183464684190bacbfb14623bd3e4e51b7575b4c upstream. + +From 0aa2e9b921d6db71150633ff290199554f0842a8 Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 23 May 2018 10:29:00 -0700 + +cgwb_release() punts the actual release to cgwb_release_workfn() on +system_wq. Depending on the number of cgroups or block devices, there +can be a lot of cgwb_release_workfn() in flight at the same time. + +We're periodically seeing close to 256 kworkers getting stuck with the +following stack trace and overtime the entire system gets stuck. + + [] _synchronize_rcu_expedited.constprop.72+0x2fc/0x330 + [] synchronize_rcu_expedited+0x24/0x30 + [] bdi_unregister+0x53/0x290 + [] release_bdi+0x89/0xc0 + [] wb_exit+0x85/0xa0 + [] cgwb_release_workfn+0x54/0xb0 + [] process_one_work+0x150/0x410 + [] worker_thread+0x6d/0x520 + [] kthread+0x12c/0x160 + [] ret_from_fork+0x29/0x40 + [] 0xffffffffffffffff + +The events leading to the lockup are... + +1. A lot of cgwb_release_workfn() is queued at the same time and all + system_wq kworkers are assigned to execute them. + +2. They all end up calling synchronize_rcu_expedited(). One of them + wins and tries to perform the expedited synchronization. + +3. However, that invovles queueing rcu_exp_work to system_wq and + waiting for it. Because #1 is holding all available kworkers on + system_wq, rcu_exp_work can't be executed. cgwb_release_workfn() + is waiting for synchronize_rcu_expedited() which in turn is waiting + for cgwb_release_workfn() to free up some of the kworkers. + +We shouldn't be scheduling hundreds of cgwb_release_workfn() at the +same time. There's nothing to be gained from that. This patch +updates cgwb release path to use a dedicated percpu workqueue with +@max_active of 1. + +While this resolves the problem at hand, it might be a good idea to +isolate rcu_exp_work to its own workqueue too as it can be used from +various paths and is prone to this sort of indirect A-A deadlocks. + +Signed-off-by: Tejun Heo +Cc: "Paul E. McKenney" +Cc: stable@vger.kernel.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/backing-dev.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/mm/backing-dev.c ++++ b/mm/backing-dev.c +@@ -423,6 +423,7 @@ static void wb_exit(struct bdi_writeback + * protected. + */ + static DEFINE_SPINLOCK(cgwb_lock); ++static struct workqueue_struct *cgwb_release_wq; + + /** + * wb_congested_get_create - get or create a wb_congested +@@ -533,7 +534,7 @@ static void cgwb_release(struct percpu_r + { + struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback, + refcnt); +- schedule_work(&wb->release_work); ++ queue_work(cgwb_release_wq, &wb->release_work); + } + + static void cgwb_kill(struct bdi_writeback *wb) +@@ -797,6 +798,21 @@ static void cgwb_bdi_register(struct bac + spin_unlock_irq(&cgwb_lock); + } + ++static int __init cgwb_init(void) ++{ ++ /* ++ * There can be many concurrent release work items overwhelming ++ * system_wq. Put them in a separate wq and limit concurrency. ++ * There's no point in executing many of these in parallel. ++ */ ++ cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1); ++ if (!cgwb_release_wq) ++ return -ENOMEM; ++ ++ return 0; ++} ++subsys_initcall(cgwb_init); ++ + #else /* CONFIG_CGROUP_WRITEBACK */ + + static int cgwb_bdi_init(struct backing_dev_info *bdi) diff --git a/queue-4.16/blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch b/queue-4.16/blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch new file mode 100644 index 00000000000..f311c33d71f --- /dev/null +++ b/queue-4.16/blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch @@ -0,0 +1,102 @@ +From a347c7ad8edf4c5685154f3fdc3c12fc1db800ba Mon Sep 17 00:00:00 2001 +From: Roman Pen +Date: Sun, 10 Jun 2018 22:38:24 +0200 +Subject: blk-mq: reinit q->tag_set_list entry only after grace period + +From: Roman Pen + +commit a347c7ad8edf4c5685154f3fdc3c12fc1db800ba upstream. + +It is not allowed to reinit q->tag_set_list list entry while RCU grace +period has not completed yet, otherwise the following soft lockup in +blk_mq_sched_restart() happens: + +[ 1064.252652] watchdog: BUG: soft lockup - CPU#12 stuck for 23s! [fio:9270] +[ 1064.254445] task: ffff99b912e8b900 task.stack: ffffa6d54c758000 +[ 1064.254613] RIP: 0010:blk_mq_sched_restart+0x96/0x150 +[ 1064.256510] Call Trace: +[ 1064.256664] +[ 1064.256824] blk_mq_free_request+0xea/0x100 +[ 1064.256987] msg_io_conf+0x59/0xd0 [ibnbd_client] +[ 1064.257175] complete_rdma_req+0xf2/0x230 [ibtrs_client] +[ 1064.257340] ? ibtrs_post_recv_empty+0x4d/0x70 [ibtrs_core] +[ 1064.257502] ibtrs_clt_rdma_done+0xd1/0x1e0 [ibtrs_client] +[ 1064.257669] ib_create_qp+0x321/0x380 [ib_core] +[ 1064.257841] ib_process_cq_direct+0xbd/0x120 [ib_core] +[ 1064.258007] irq_poll_softirq+0xb7/0xe0 +[ 1064.258165] __do_softirq+0x106/0x2a2 +[ 1064.258328] irq_exit+0x92/0xa0 +[ 1064.258509] do_IRQ+0x4a/0xd0 +[ 1064.258660] common_interrupt+0x7a/0x7a +[ 1064.258818] + +Meanwhile another context frees other queue but with the same set of +shared tags: + +[ 1288.201183] INFO: task bash:5910 blocked for more than 180 seconds. +[ 1288.201833] bash D 0 5910 5820 0x00000000 +[ 1288.202016] Call Trace: +[ 1288.202315] schedule+0x32/0x80 +[ 1288.202462] schedule_timeout+0x1e5/0x380 +[ 1288.203838] wait_for_completion+0xb0/0x120 +[ 1288.204137] __wait_rcu_gp+0x125/0x160 +[ 1288.204287] synchronize_sched+0x6e/0x80 +[ 1288.204770] blk_mq_free_queue+0x74/0xe0 +[ 1288.204922] blk_cleanup_queue+0xc7/0x110 +[ 1288.205073] ibnbd_clt_unmap_device+0x1bc/0x280 [ibnbd_client] +[ 1288.205389] ibnbd_clt_unmap_dev_store+0x169/0x1f0 [ibnbd_client] +[ 1288.205548] kernfs_fop_write+0x109/0x180 +[ 1288.206328] vfs_write+0xb3/0x1a0 +[ 1288.206476] SyS_write+0x52/0xc0 +[ 1288.206624] do_syscall_64+0x68/0x1d0 +[ 1288.206774] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +What happened is the following: + +1. There are several MQ queues with shared tags. +2. One queue is about to be freed and now task is in + blk_mq_del_queue_tag_set(). +3. Other CPU is in blk_mq_sched_restart() and loops over all queues in + tag list in order to find hctx to restart. + +Because linked list entry was modified in blk_mq_del_queue_tag_set() +without proper waiting for a grace period, blk_mq_sched_restart() +never ends, spining in list_for_each_entry_rcu_rr(), thus soft lockup. + +Fix is simple: reinit list entry after an RCU grace period elapsed. + +Fixes: Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") +Cc: stable@vger.kernel.org +Cc: Sagi Grimberg +Cc: linux-block@vger.kernel.org +Reviewed-by: Christoph Hellwig +Reviewed-by: Ming Lei +Reviewed-by: Bart Van Assche +Signed-off-by: Roman Pen +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + block/blk-mq.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2513,7 +2513,6 @@ static void blk_mq_del_queue_tag_set(str + + mutex_lock(&set->tag_list_lock); + list_del_rcu(&q->tag_set_list); +- INIT_LIST_HEAD(&q->tag_set_list); + if (list_is_singular(&set->tag_list)) { + /* just transitioned to unshared */ + set->flags &= ~BLK_MQ_F_TAG_SHARED; +@@ -2521,8 +2520,8 @@ static void blk_mq_del_queue_tag_set(str + blk_mq_update_tag_set_depth(set, false); + } + mutex_unlock(&set->tag_list_lock); +- + synchronize_rcu(); ++ INIT_LIST_HEAD(&q->tag_set_list); + } + + static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, diff --git a/queue-4.16/cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch b/queue-4.16/cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch new file mode 100644 index 00000000000..d4dd1fad0fd --- /dev/null +++ b/queue-4.16/cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch @@ -0,0 +1,36 @@ +From d81243c697ffc71f983736e7da2db31a8be0001f Mon Sep 17 00:00:00 2001 +From: Mark Syms +Date: Thu, 24 May 2018 09:47:31 +0100 +Subject: CIFS: 511c54a2f69195b28afb9dd119f03787b1625bb4 adds a check for session expiry + +From: Mark Syms + +commit d81243c697ffc71f983736e7da2db31a8be0001f upstream. + +Handle this additional status in the same way as SESSION_EXPIRED. + +Signed-off-by: Mark Syms +Signed-off-by: Steve French +CC: Stable +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2ops.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -1277,10 +1277,11 @@ smb2_is_session_expired(char *buf) + { + struct smb2_sync_hdr *shdr = get_sync_hdr(buf); + +- if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED) ++ if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED && ++ shdr->Status != STATUS_USER_SESSION_DELETED) + return false; + +- cifs_dbg(FYI, "Session expired\n"); ++ cifs_dbg(FYI, "Session expired or deleted\n"); + return true; + } + diff --git a/queue-4.16/cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch b/queue-4.16/cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch new file mode 100644 index 00000000000..876946359b8 --- /dev/null +++ b/queue-4.16/cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch @@ -0,0 +1,60 @@ +From ee25c6dd7b05113783ce1f4fab6b30fc00d29b8d Mon Sep 17 00:00:00 2001 +From: Shirish Pargaonkar +Date: Mon, 4 Jun 2018 06:46:22 -0500 +Subject: cifs: For SMB2 security informaion query, check for minimum sized security descriptor instead of sizeof FileAllInformation class + +From: Shirish Pargaonkar + +commit ee25c6dd7b05113783ce1f4fab6b30fc00d29b8d upstream. + +Validate_buf () function checks for an expected minimum sized response +passed to query_info() function. +For security information, the size of a security descriptor can be +smaller (one subauthority, no ACEs) than the size of the structure +that defines FileInfoClass of FileAllInformation. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199725 +Cc: +Signed-off-by: Shirish Pargaonkar +Reviewed-by: Noah Morrison +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/cifsacl.h | 14 ++++++++++++++ + fs/cifs/smb2pdu.c | 3 +-- + 2 files changed, 15 insertions(+), 2 deletions(-) + +--- a/fs/cifs/cifsacl.h ++++ b/fs/cifs/cifsacl.h +@@ -98,4 +98,18 @@ struct cifs_ace { + struct cifs_sid sid; /* ie UUID of user or group who gets these perms */ + } __attribute__((packed)); + ++/* ++ * Minimum security identifier can be one for system defined Users ++ * and Groups such as NULL SID and World or Built-in accounts such ++ * as Administrator and Guest and consists of ++ * Revision + Num (Sub)Auths + Authority + Domain (one Subauthority) ++ */ ++#define MIN_SID_LEN (1 + 1 + 6 + 4) /* in bytes */ ++ ++/* ++ * Minimum security descriptor can be one without any SACL and DACL and can ++ * consist of revision, type, and two sids of minimum size for owner and group ++ */ ++#define MIN_SEC_DESC_LEN (sizeof(struct cifs_ntsd) + (2 * MIN_SID_LEN)) ++ + #endif /* _CIFSACL_H */ +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -2271,8 +2271,7 @@ SMB2_query_acl(const unsigned int xid, s + + return query_info(xid, tcon, persistent_fid, volatile_fid, + 0, SMB2_O_INFO_SECURITY, additional_info, +- SMB2_MAX_BUFFER_SIZE, +- sizeof(struct smb2_file_all_info), data, plen); ++ SMB2_MAX_BUFFER_SIZE, MIN_SEC_DESC_LEN, data, plen); + } + + int diff --git a/queue-4.16/cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch b/queue-4.16/cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch new file mode 100644 index 00000000000..6c1672ae1a1 --- /dev/null +++ b/queue-4.16/cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch @@ -0,0 +1,50 @@ +From c7d1f119c48f64bebf0fa1e326af577c6152fe30 Mon Sep 17 00:00:00 2001 +From: Tao Wang +Date: Sat, 26 May 2018 15:16:48 +0800 +Subject: cpufreq: Fix new policy initialization during limits updates via sysfs + +From: Tao Wang + +commit c7d1f119c48f64bebf0fa1e326af577c6152fe30 upstream. + +If the policy limits are updated via cpufreq_update_policy() and +subsequently via sysfs, the limits stored in user_policy may be +set incorrectly. + +For example, if both min and max are set via sysfs to the maximum +available frequency, user_policy.min and user_policy.max will also +be the maximum. If a policy notifier triggered by +cpufreq_update_policy() lowers both the min and the max at this +point, that change is not reflected by the user_policy limits, so +if the max is updated again via sysfs to the same lower value, +then user_policy.max will be lower than user_policy.min which +shouldn't happen. In particular, if one of the policy CPUs is +then taken offline and back online, cpufreq_set_policy() will +fail for it due to a failing limits check. + +To prevent that from happening, initialize the min and max fields +of the new_policy object to the ones stored in user_policy that +were previously set via sysfs. + +Signed-off-by: Kevin Wangtao +Acked-by: Viresh Kumar +[ rjw: Subject & changelog ] +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/cpufreq.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/cpufreq/cpufreq.c ++++ b/drivers/cpufreq/cpufreq.c +@@ -703,6 +703,8 @@ static ssize_t store_##file_name \ + struct cpufreq_policy new_policy; \ + \ + memcpy(&new_policy, policy, sizeof(*policy)); \ ++ new_policy.min = policy->user_policy.min; \ ++ new_policy.max = policy->user_policy.max; \ + \ + ret = sscanf(buf, "%u", &new_policy.object); \ + if (ret != 1) \ diff --git a/queue-4.16/cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch b/queue-4.16/cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch new file mode 100644 index 00000000000..3a7e119359d --- /dev/null +++ b/queue-4.16/cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch @@ -0,0 +1,71 @@ +From 7592019634f8473f0b0973ce79297183077bdbc2 Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Fri, 8 Jun 2018 09:07:33 +0800 +Subject: cpufreq: governors: Fix long idle detection logic in load calculation + +From: Chen Yu + +commit 7592019634f8473f0b0973ce79297183077bdbc2 upstream. + +According to current code implementation, detecting the long +idle period is done by checking if the interval between two +adjacent utilization update handlers is long enough. Although +this mechanism can detect if the idle period is long enough +(no utilization hooks invoked during idle period), it might +not cover a corner case: if the task has occupied the CPU +for too long which causes no context switches during that +period, then no utilization handler will be launched until this +high prio task is scheduled out. As a result, the idle_periods +field might be calculated incorrectly because it regards the +100% load as 0% and makes the conservative governor who uses +this field confusing. + +Change the detection to compare the idle_time with sampling_rate +directly. + +Reported-by: Artem S. Tashkinov +Signed-off-by: Chen Yu +Acked-by: Viresh Kumar +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/cpufreq_governor.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/cpufreq/cpufreq_governor.c ++++ b/drivers/cpufreq/cpufreq_governor.c +@@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_p + * calls, so the previous load value can be used then. + */ + load = j_cdbs->prev_load; +- } else if (unlikely(time_elapsed > 2 * sampling_rate && ++ } else if (unlikely((int)idle_time > 2 * sampling_rate && + j_cdbs->prev_load)) { + /* + * If the CPU had gone completely idle and a task has +@@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_p + * clear prev_load to guarantee that the load will be + * computed again next time. + * +- * Detecting this situation is easy: the governor's +- * utilization update handler would not have run during +- * CPU-idle periods. Hence, an unusually large +- * 'time_elapsed' (as compared to the sampling rate) ++ * Detecting this situation is easy: an unusually large ++ * 'idle_time' (as compared to the sampling rate) + * indicates this scenario. + */ + load = j_cdbs->prev_load; +@@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_p + j_cdbs->prev_load = load; + } + +- if (time_elapsed > 2 * sampling_rate) { +- unsigned int periods = time_elapsed / sampling_rate; ++ if (unlikely((int)idle_time > 2 * sampling_rate)) { ++ unsigned int periods = idle_time / sampling_rate; + + if (periods < idle_periods) + idle_periods = periods; diff --git a/queue-4.16/cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch b/queue-4.16/cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch new file mode 100644 index 00000000000..d1b7b1ff503 --- /dev/null +++ b/queue-4.16/cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch @@ -0,0 +1,37 @@ +From e5d295b06d69a1924665a16a4987be475addd00f Mon Sep 17 00:00:00 2001 +From: Suman Anna +Date: Thu, 31 May 2018 17:21:43 -0500 +Subject: cpufreq: ti-cpufreq: Fix an incorrect error return value + +From: Suman Anna + +commit e5d295b06d69a1924665a16a4987be475addd00f upstream. + +Commit 05829d9431df (cpufreq: ti-cpufreq: kfree opp_data when +failure) has fixed a memory leak in the failure path, however +the patch returned a positive value on get_cpu_device() failure +instead of the previous negative value. Fix this incorrect error +return value properly. + +Fixes: 05829d9431df (cpufreq: ti-cpufreq: kfree opp_data when failure) +Cc: 4.14+ # v4.14+ +Signed-off-by: Suman Anna +Acked-by: Viresh Kumar +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/ti-cpufreq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/cpufreq/ti-cpufreq.c ++++ b/drivers/cpufreq/ti-cpufreq.c +@@ -226,7 +226,7 @@ static int ti_cpufreq_probe(struct platf + opp_data->cpu_dev = get_cpu_device(0); + if (!opp_data->cpu_dev) { + pr_err("%s: Failed to get device for CPU0\n", __func__); +- ret = ENODEV; ++ ret = -ENODEV; + goto free_opp_data; + } + diff --git a/queue-4.16/genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch b/queue-4.16/genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch new file mode 100644 index 00000000000..9973d6b3a5b --- /dev/null +++ b/queue-4.16/genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch @@ -0,0 +1,94 @@ +From 12f47073a40f6aa75119d8f5df4077b7f334cced Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:59 +0200 +Subject: genirq/affinity: Defer affinity setting if irq chip is busy + +From: Thomas Gleixner + +commit 12f47073a40f6aa75119d8f5df4077b7f334cced upstream. + +The case that interrupt affinity setting fails with -EBUSY can be handled +in the kernel completely by using the already available generic pending +infrastructure. + +If a irq_chip::set_affinity() fails with -EBUSY, handle it like the +interrupts for which irq_chip::set_affinity() can only be invoked from +interrupt context. Copy the new affinity mask to irq_desc::pending_mask and +set the affinity pending bit. The next raised interrupt for the affected +irq will check the pending bit and try to set the new affinity from the +handler. This avoids that -EBUSY is returned when an affinity change is +requested from user space and the previous change has not been cleaned +up. The new affinity will take effect when the next interrupt is raised +from the device. + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.819273597@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/manage.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -205,6 +205,39 @@ int irq_do_set_affinity(struct irq_data + return ret; + } + ++#ifdef CONFIG_GENERIC_PENDING_IRQ ++static inline int irq_set_affinity_pending(struct irq_data *data, ++ const struct cpumask *dest) ++{ ++ struct irq_desc *desc = irq_data_to_desc(data); ++ ++ irqd_set_move_pending(data); ++ irq_copy_pending(desc, dest); ++ return 0; ++} ++#else ++static inline int irq_set_affinity_pending(struct irq_data *data, ++ const struct cpumask *dest) ++{ ++ return -EBUSY; ++} ++#endif ++ ++static int irq_try_set_affinity(struct irq_data *data, ++ const struct cpumask *dest, bool force) ++{ ++ int ret = irq_do_set_affinity(data, dest, force); ++ ++ /* ++ * In case that the underlying vector management is busy and the ++ * architecture supports the generic pending mechanism then utilize ++ * this to avoid returning an error to user space. ++ */ ++ if (ret == -EBUSY && !force) ++ ret = irq_set_affinity_pending(data, dest); ++ return ret; ++} ++ + int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) + { +@@ -215,8 +248,8 @@ int irq_set_affinity_locked(struct irq_d + if (!chip || !chip->irq_set_affinity) + return -EINVAL; + +- if (irq_can_move_pcntxt(data)) { +- ret = irq_do_set_affinity(data, mask, force); ++ if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { ++ ret = irq_try_set_affinity(data, mask, force); + } else { + irqd_set_move_pending(data); + irq_copy_pending(desc, mask); diff --git a/queue-4.16/genirq-generic_pending-do-not-lose-pending-affinity-update.patch b/queue-4.16/genirq-generic_pending-do-not-lose-pending-affinity-update.patch new file mode 100644 index 00000000000..60b2c760012 --- /dev/null +++ b/queue-4.16/genirq-generic_pending-do-not-lose-pending-affinity-update.patch @@ -0,0 +1,88 @@ +From a33a5d2d16cb84bea8d5f5510f3a41aa48b5c467 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:54 +0200 +Subject: genirq/generic_pending: Do not lose pending affinity update + +From: Thomas Gleixner + +commit a33a5d2d16cb84bea8d5f5510f3a41aa48b5c467 upstream. + +The generic pending interrupt mechanism moves interrupts from the interrupt +handler on the original target CPU to the new destination CPU. This is +required for x86 and ia64 due to the way the interrupt delivery and +acknowledge works if the interrupts are not remapped. + +However that update can fail for various reasons. Some of them are valid +reasons to discard the pending update, but the case, when the previous move +has not been fully cleaned up is not a legit reason to fail. + +Check the return value of irq_do_set_affinity() for -EBUSY, which indicates +a pending cleanup, and rearm the pending move in the irq dexcriptor so it's +tried again when the next interrupt arrives. + +Fixes: 996c591227d9 ("x86/irq: Plug vector cleanup race") +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.386544292@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/migration.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +--- a/kernel/irq/migration.c ++++ b/kernel/irq/migration.c +@@ -38,17 +38,18 @@ bool irq_fixup_move_pending(struct irq_d + void irq_move_masked_irq(struct irq_data *idata) + { + struct irq_desc *desc = irq_data_to_desc(idata); +- struct irq_chip *chip = desc->irq_data.chip; ++ struct irq_data *data = &desc->irq_data; ++ struct irq_chip *chip = data->chip; + +- if (likely(!irqd_is_setaffinity_pending(&desc->irq_data))) ++ if (likely(!irqd_is_setaffinity_pending(data))) + return; + +- irqd_clr_move_pending(&desc->irq_data); ++ irqd_clr_move_pending(data); + + /* + * Paranoia: cpu-local interrupts shouldn't be calling in here anyway. + */ +- if (irqd_is_per_cpu(&desc->irq_data)) { ++ if (irqd_is_per_cpu(data)) { + WARN_ON(1); + return; + } +@@ -73,9 +74,20 @@ void irq_move_masked_irq(struct irq_data + * For correct operation this depends on the caller + * masking the irqs. + */ +- if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) +- irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false); ++ if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) { ++ int ret; + ++ ret = irq_do_set_affinity(data, desc->pending_mask, false); ++ /* ++ * If the there is a cleanup pending in the underlying ++ * vector management, reschedule the move for the next ++ * interrupt. Leave desc->pending_mask intact. ++ */ ++ if (ret == -EBUSY) { ++ irqd_set_move_pending(data); ++ return; ++ } ++ } + cpumask_clear(desc->pending_mask); + } + diff --git a/queue-4.16/genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch b/queue-4.16/genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch new file mode 100644 index 00000000000..baf4f0fe61a --- /dev/null +++ b/queue-4.16/genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch @@ -0,0 +1,72 @@ +From d340ebd696f921d3ad01b8c0c29dd38f2ad2bf3e Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Wed, 6 Jun 2018 14:46:59 +0200 +Subject: genirq/migration: Avoid out of line call if pending is not set + +From: Thomas Gleixner + +commit d340ebd696f921d3ad01b8c0c29dd38f2ad2bf3e upstream. + +The upcoming fix for the -EBUSY return from affinity settings requires to +use the irq_move_irq() functionality even on irq remapped interrupts. To +avoid the out of line call, move the check for the pending bit into an +inline helper. + +Preparatory change for the real fix. No functional change. + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Signed-off-by: Thomas Gleixner +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Cc: Dou Liyang +Link: https://lkml.kernel.org/r/20180604162224.471925894@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/irq.h | 7 ++++++- + kernel/irq/migration.c | 5 +---- + 2 files changed, 7 insertions(+), 5 deletions(-) + +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -556,7 +556,12 @@ extern int irq_affinity_online_cpu(unsig + #endif + + #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) +-void irq_move_irq(struct irq_data *data); ++void __irq_move_irq(struct irq_data *data); ++static inline void irq_move_irq(struct irq_data *data) ++{ ++ if (unlikely(irqd_is_setaffinity_pending(data))) ++ __irq_move_irq(data); ++} + void irq_move_masked_irq(struct irq_data *data); + void irq_force_complete_move(struct irq_desc *desc); + #else +--- a/kernel/irq/migration.c ++++ b/kernel/irq/migration.c +@@ -91,7 +91,7 @@ void irq_move_masked_irq(struct irq_data + cpumask_clear(desc->pending_mask); + } + +-void irq_move_irq(struct irq_data *idata) ++void __irq_move_irq(struct irq_data *idata) + { + bool masked; + +@@ -102,9 +102,6 @@ void irq_move_irq(struct irq_data *idata + */ + idata = irq_desc_get_irq_data(irq_data_to_desc(idata)); + +- if (likely(!irqd_is_setaffinity_pending(idata))) +- return; +- + if (unlikely(irqd_irq_disabled(idata))) + return; + diff --git a/queue-4.16/irq_remapping-use-apic_ack_irq.patch b/queue-4.16/irq_remapping-use-apic_ack_irq.patch new file mode 100644 index 00000000000..3f0c5db0c72 --- /dev/null +++ b/queue-4.16/irq_remapping-use-apic_ack_irq.patch @@ -0,0 +1,84 @@ +From 8a2b7d142e7ac477d52f5f92251e59fc136d7ddd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:56 +0200 +Subject: irq_remapping: Use apic_ack_irq() + +From: Thomas Gleixner + +commit 8a2b7d142e7ac477d52f5f92251e59fc136d7ddd upstream. + +To address the EBUSY fail of interrupt affinity settings in case that the +previous setting has not been cleaned up yet, use the new apic_ack_irq() +function instead of the special ir_ack_apic_edge() implementation which is +merily a wrapper around ack_APIC_irq(). + +Preparatory change for the real fix + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.555716895@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/iommu/amd_iommu.c | 2 +- + drivers/iommu/intel_irq_remapping.c | 2 +- + drivers/iommu/irq_remapping.c | 5 ----- + drivers/iommu/irq_remapping.h | 2 -- + 4 files changed, 2 insertions(+), 9 deletions(-) + +--- a/drivers/iommu/amd_iommu.c ++++ b/drivers/iommu/amd_iommu.c +@@ -4352,7 +4352,7 @@ static void ir_compose_msi_msg(struct ir + + static struct irq_chip amd_ir_chip = { + .name = "AMD-IR", +- .irq_ack = ir_ack_apic_edge, ++ .irq_ack = apic_ack_irq, + .irq_set_affinity = amd_ir_set_affinity, + .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity, + .irq_compose_msi_msg = ir_compose_msi_msg, +--- a/drivers/iommu/intel_irq_remapping.c ++++ b/drivers/iommu/intel_irq_remapping.c +@@ -1223,7 +1223,7 @@ static int intel_ir_set_vcpu_affinity(st + + static struct irq_chip intel_ir_chip = { + .name = "INTEL-IR", +- .irq_ack = ir_ack_apic_edge, ++ .irq_ack = apic_ack_irq, + .irq_set_affinity = intel_ir_set_affinity, + .irq_compose_msi_msg = intel_ir_compose_msi_msg, + .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, +--- a/drivers/iommu/irq_remapping.c ++++ b/drivers/iommu/irq_remapping.c +@@ -156,11 +156,6 @@ void panic_if_irq_remap(const char *msg) + panic(msg); + } + +-void ir_ack_apic_edge(struct irq_data *data) +-{ +- ack_APIC_irq(); +-} +- + /** + * irq_remapping_get_ir_irq_domain - Get the irqdomain associated with the IOMMU + * device serving request @info +--- a/drivers/iommu/irq_remapping.h ++++ b/drivers/iommu/irq_remapping.h +@@ -65,8 +65,6 @@ struct irq_remap_ops { + extern struct irq_remap_ops intel_irq_remap_ops; + extern struct irq_remap_ops amd_iommu_irq_ops; + +-extern void ir_ack_apic_edge(struct irq_data *data); +- + #else /* CONFIG_IRQ_REMAP */ + + #define irq_remapping_enabled 0 diff --git a/queue-4.16/libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch b/queue-4.16/libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch new file mode 100644 index 00000000000..0837cd122d0 --- /dev/null +++ b/queue-4.16/libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch @@ -0,0 +1,56 @@ +From 2cfce3a86b64b53f0a70e92a6a659c720c319b45 Mon Sep 17 00:00:00 2001 +From: Hans de Goede +Date: Thu, 31 May 2018 13:21:07 +0200 +Subject: libata: Drop SanDisk SD7UB3Q*G1001 NOLPM quirk + +From: Hans de Goede + +commit 2cfce3a86b64b53f0a70e92a6a659c720c319b45 upstream. + +Commit 184add2ca23c ("libata: Apply NOLPM quirk for SanDisk +SD7UB3Q*G1001 SSDs") disabled LPM for SanDisk SD7UB3Q*G1001 SSDs. + +This has lead to several reports of users of that SSD where LPM +was working fine and who know have a significantly increased idle +power consumption on their laptops. + +Likely there is another problem on the T450s from the original +reporter which gets exposed by the uncore reaching deeper sleep +states (higher PC-states) due to LPM being enabled. The problem as +reported, a hardfreeze about once a day, already did not sound like +it would be caused by LPM and the reports of the SSD working fine +confirm this. The original reporter is ok with dropping the quirk. + +A X250 user has reported the same hard freeze problem and for him +the problem went away after unrelated updates, I suspect some GPU +driver stack changes fixed things. + +TL;DR: The original reporters problem were triggered by LPM but not +an LPM issue, so drop the quirk for the SSD in question. + +BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1583207 +Cc: stable@vger.kernel.org +Cc: Richard W.M. Jones +Cc: Lorenzo Dalrio +Reported-by: Lorenzo Dalrio +Signed-off-by: Hans de Goede +Signed-off-by: Tejun Heo +Acked-by: "Richard W.M. Jones" +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-core.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -4553,9 +4553,6 @@ static const struct ata_blacklist_entry + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NOLPM, }, + +- /* Sandisk devices which are known to not handle LPM well */ +- { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, }, +- + /* devices that don't properly handle queued TRIM commands */ + { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, diff --git a/queue-4.16/libata-zpodd-small-read-overflow-in-eject_tray.patch b/queue-4.16/libata-zpodd-small-read-overflow-in-eject_tray.patch new file mode 100644 index 00000000000..17204d8cb89 --- /dev/null +++ b/queue-4.16/libata-zpodd-small-read-overflow-in-eject_tray.patch @@ -0,0 +1,33 @@ +From 18c9a99bce2a57dfd7e881658703b5d7469cc7b9 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 29 May 2018 12:13:24 +0300 +Subject: libata: zpodd: small read overflow in eject_tray() + +From: Dan Carpenter + +commit 18c9a99bce2a57dfd7e881658703b5d7469cc7b9 upstream. + +We read from the cdb[] buffer in ata_exec_internal_sg(). It has to be +ATAPI_CDB_LEN (16) bytes long, but this buffer is only 12 bytes. + +Fixes: 213342053db5 ("libata: handle power transition of ODD") +Signed-off-by: Dan Carpenter +Signed-off-by: Tejun Heo +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/ata/libata-zpodd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/ata/libata-zpodd.c ++++ b/drivers/ata/libata-zpodd.c +@@ -35,7 +35,7 @@ struct zpodd { + static int eject_tray(struct ata_device *dev) + { + struct ata_taskfile tf; +- static const char cdb[] = { GPCMD_START_STOP_UNIT, ++ static const char cdb[ATAPI_CDB_LEN] = { GPCMD_START_STOP_UNIT, + 0, 0, 0, + 0x02, /* LoEj */ + 0, 0, 0, 0, 0, 0, 0, diff --git a/queue-4.16/nbd-fix-nbd-device-deletion.patch b/queue-4.16/nbd-fix-nbd-device-deletion.patch new file mode 100644 index 00000000000..6efc2c27e54 --- /dev/null +++ b/queue-4.16/nbd-fix-nbd-device-deletion.patch @@ -0,0 +1,39 @@ +From 8364da4751cf22201d74933d5e634176f44ed407 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 16 May 2018 14:51:17 -0400 +Subject: nbd: fix nbd device deletion + +From: Josef Bacik + +commit 8364da4751cf22201d74933d5e634176f44ed407 upstream. + +This fixes a use after free bug, we shouldn't be doing disk->queue right +after we do del_gendisk(disk). Save the queue and do the cleanup after +the del_gendisk. + +Fixes: c6a4759ea0c9 ("nbd: add device refcounting") +cc: stable@vger.kernel.org +Signed-off-by: Josef Bacik +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/nbd.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -173,9 +173,12 @@ static const struct device_attribute pid + static void nbd_dev_remove(struct nbd_device *nbd) + { + struct gendisk *disk = nbd->disk; ++ struct request_queue *q; ++ + if (disk) { ++ q = disk->queue; + del_gendisk(disk); +- blk_cleanup_queue(disk->queue); ++ blk_cleanup_queue(q); + blk_mq_free_tag_set(&nbd->tag_set); + disk->private_data = NULL; + put_disk(disk); diff --git a/queue-4.16/nbd-update-size-when-connected.patch b/queue-4.16/nbd-update-size-when-connected.patch new file mode 100644 index 00000000000..90bb36e071d --- /dev/null +++ b/queue-4.16/nbd-update-size-when-connected.patch @@ -0,0 +1,34 @@ +From c3f7c9397609705ef848cc98a5fb429b3e90c3c4 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 16 May 2018 14:51:18 -0400 +Subject: nbd: update size when connected + +From: Josef Bacik + +commit c3f7c9397609705ef848cc98a5fb429b3e90c3c4 upstream. + +I messed up changing the size of an NBD device while it was connected by +not actually updating the device or doing the uevent. Fix this by +updating everything if we're connected and we change the size. + +cc: stable@vger.kernel.org +Fixes: 639812a ("nbd: don't set the device size until we're connected") +Signed-off-by: Josef Bacik +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/nbd.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -246,6 +246,8 @@ static void nbd_size_set(struct nbd_devi + struct nbd_config *config = nbd->config; + config->blksize = blocksize; + config->bytesize = blocksize * nr_blocks; ++ if (nbd->task_recv != NULL) ++ nbd_size_update(nbd); + } + + static void nbd_complete_rq(struct request *req) diff --git a/queue-4.16/nbd-use-bd_set_size-when-updating-disk-size.patch b/queue-4.16/nbd-use-bd_set_size-when-updating-disk-size.patch new file mode 100644 index 00000000000..72999413b9f --- /dev/null +++ b/queue-4.16/nbd-use-bd_set_size-when-updating-disk-size.patch @@ -0,0 +1,53 @@ +From 9e2b19675d1338d2a38e99194756f2db44a081df Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Wed, 16 May 2018 14:51:19 -0400 +Subject: nbd: use bd_set_size when updating disk size + +From: Josef Bacik + +commit 9e2b19675d1338d2a38e99194756f2db44a081df upstream. + +When we stopped relying on the bdev everywhere I broke updating the +block device size on the fly, which ceph relies on. We can't just do +set_capacity, we also have to do bd_set_size so things like parted will +notice the device size change. + +Fixes: 29eaadc ("nbd: stop using the bdev everywhere") +cc: stable@vger.kernel.org +Signed-off-by: Josef Bacik +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/nbd.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/drivers/block/nbd.c ++++ b/drivers/block/nbd.c +@@ -234,9 +234,18 @@ static void nbd_size_clear(struct nbd_de + static void nbd_size_update(struct nbd_device *nbd) + { + struct nbd_config *config = nbd->config; ++ struct block_device *bdev = bdget_disk(nbd->disk, 0); ++ + blk_queue_logical_block_size(nbd->disk->queue, config->blksize); + blk_queue_physical_block_size(nbd->disk->queue, config->blksize); + set_capacity(nbd->disk, config->bytesize >> 9); ++ if (bdev) { ++ if (bdev->bd_disk) ++ bd_set_size(bdev, config->bytesize); ++ else ++ bdev->bd_invalidated = 1; ++ bdput(bdev); ++ } + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + } + +@@ -1114,7 +1123,6 @@ static int nbd_start_device_ioctl(struct + if (ret) + return ret; + +- bd_set_size(bdev, config->bytesize); + if (max_part) + bdev->bd_invalidated = 1; + mutex_unlock(&nbd->config_lock); diff --git a/queue-4.16/nvme-pci-sync-controller-reset-for-aer-slot_reset.patch b/queue-4.16/nvme-pci-sync-controller-reset-for-aer-slot_reset.patch new file mode 100644 index 00000000000..ff961fb76aa --- /dev/null +++ b/queue-4.16/nvme-pci-sync-controller-reset-for-aer-slot_reset.patch @@ -0,0 +1,54 @@ +From cc1d5e749a2e1cf59fa940b976181e631d6985e1 Mon Sep 17 00:00:00 2001 +From: Keith Busch +Date: Thu, 10 May 2018 08:34:20 -0600 +Subject: nvme/pci: Sync controller reset for AER slot_reset + +From: Keith Busch + +commit cc1d5e749a2e1cf59fa940b976181e631d6985e1 upstream. + +AER handling expects a successful return from slot_reset means the +driver made the device functional again. The nvme driver had been using +an asynchronous reset to recover the device, so the device +may still be initializing after control is returned to the +AER handler. This creates problems for subsequent event handling, +causing the initializion to fail. + +This patch fixes that by syncing the controller reset before returning +to the AER driver, and reporting the true state of the reset. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=199657 +Reported-by: Alex Gagniuc +Cc: Sinan Kaya +Cc: Bjorn Helgaas +Cc: stable@vger.kernel.org +Tested-by: Alex Gagniuc +Reviewed-by: Christoph Hellwig +Reviewed-by: Martin K. Petersen +Signed-off-by: Keith Busch +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/nvme/host/pci.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -2665,8 +2665,15 @@ static pci_ers_result_t nvme_slot_reset( + + dev_info(dev->ctrl.device, "restart after slot reset\n"); + pci_restore_state(pdev); +- nvme_reset_ctrl(&dev->ctrl); +- return PCI_ERS_RESULT_RECOVERED; ++ nvme_reset_ctrl_sync(&dev->ctrl); ++ ++ switch (dev->ctrl.state) { ++ case NVME_CTRL_LIVE: ++ case NVME_CTRL_ADMIN_ONLY: ++ return PCI_ERS_RESULT_RECOVERED; ++ default: ++ return PCI_ERS_RESULT_DISCONNECT; ++ } + } + + static void nvme_error_resume(struct pci_dev *pdev) diff --git a/queue-4.16/series b/queue-4.16/series index fcace3d3470..c6f0c498d3e 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -28,3 +28,29 @@ alsa-hda-handle-kzalloc-failure-in-snd_hda_attach_pcm_stream.patch alsa-hda-add-dock-and-led-support-for-hp-elitebook-830-g5.patch alsa-hda-add-dock-and-led-support-for-hp-probook-640-g4.patch x86-mce-fix-stack-out-of-bounds-write-in-mce-inject.c-flags_read.patch +smb3-fix-various-xid-leaks.patch +smb3-on-reconnect-set-previoussessionid-field.patch +cifs-511c54a2f69195b28afb9dd119f03787b1625bb4-adds-a-check-for-session-expiry.patch +cifs-for-smb2-security-informaion-query-check-for-minimum-sized-security-descriptor-instead-of-sizeof-fileallinformation-class.patch +nbd-fix-nbd-device-deletion.patch +nbd-update-size-when-connected.patch +nbd-use-bd_set_size-when-updating-disk-size.patch +blk-mq-reinit-q-tag_set_list-entry-only-after-grace-period.patch +bdi-move-cgroup-bdi_writeback-to-a-dedicated-low-concurrency-workqueue.patch +cpufreq-fix-new-policy-initialization-during-limits-updates-via-sysfs.patch +cpufreq-ti-cpufreq-fix-an-incorrect-error-return-value.patch +cpufreq-governors-fix-long-idle-detection-logic-in-load-calculation.patch +libata-zpodd-small-read-overflow-in-eject_tray.patch +libata-drop-sandisk-sd7ub3q-g1001-nolpm-quirk.patch +nvme-pci-sync-controller-reset-for-aer-slot_reset.patch +w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch +x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch +x86-apic-vector-prevent-hlist-corruption-and-leaks.patch +x86-apic-provide-apic_ack_irq.patch +x86-ioapic-use-apic_ack_irq.patch +x86-platform-uv-use-apic_ack_irq.patch +irq_remapping-use-apic_ack_irq.patch +genirq-generic_pending-do-not-lose-pending-affinity-update.patch +genirq-affinity-defer-affinity-setting-if-irq-chip-is-busy.patch +genirq-migration-avoid-out-of-line-call-if-pending-is-not-set.patch +x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch diff --git a/queue-4.16/smb3-fix-various-xid-leaks.patch b/queue-4.16/smb3-fix-various-xid-leaks.patch new file mode 100644 index 00000000000..70bd4c4ea5d --- /dev/null +++ b/queue-4.16/smb3-fix-various-xid-leaks.patch @@ -0,0 +1,152 @@ +From cfe89091644c441a1ade6dae6d2e47b715648615 Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Sat, 19 May 2018 02:04:55 -0500 +Subject: smb3: fix various xid leaks + +From: Steve French + +commit cfe89091644c441a1ade6dae6d2e47b715648615 upstream. + +Fix a few cases where we were not freeing the xid which led to +active requests being non-zero at unmount time. + +Signed-off-by: Steve French +CC: Stable +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2ops.c | 63 +++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 44 insertions(+), 19 deletions(-) + +--- a/fs/cifs/smb2ops.c ++++ b/fs/cifs/smb2ops.c +@@ -1589,8 +1589,11 @@ get_smb2_acl_by_path(struct cifs_sb_info + oparms.create_options = 0; + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); +- if (!utf16_path) +- return ERR_PTR(-ENOMEM); ++ if (!utf16_path) { ++ rc = -ENOMEM; ++ free_xid(xid); ++ return ERR_PTR(rc); ++ } + + oparms.tcon = tcon; + oparms.desired_access = READ_CONTROL; +@@ -1648,8 +1651,11 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, _ + access_flags = WRITE_DAC; + + utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); +- if (!utf16_path) +- return -ENOMEM; ++ if (!utf16_path) { ++ rc = -ENOMEM; ++ free_xid(xid); ++ return rc; ++ } + + oparms.tcon = tcon; + oparms.desired_access = access_flags; +@@ -1709,15 +1715,21 @@ static long smb3_zero_range(struct file + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) +- if (keep_size == false) +- return -EOPNOTSUPP; ++ if (keep_size == false) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + /* + * Must check if file sparse since fallocate -z (zero range) assumes + * non-sparse allocation + */ +- if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) +- return -EOPNOTSUPP; ++ if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + /* + * need to make sure we are not asked to extend the file since the SMB3 +@@ -1726,8 +1738,11 @@ static long smb3_zero_range(struct file + * which for a non sparse file would zero the newly extended range + */ + if (keep_size == false) +- if (i_size_read(inode) < offset + len) +- return -EOPNOTSUPP; ++ if (i_size_read(inode) < offset + len) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + +@@ -1760,8 +1775,11 @@ static long smb3_punch_hole(struct file + + /* Need to make file sparse, if not already, before freeing range. */ + /* Consider adding equivalent for compressed since it could also work */ +- if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) +- return -EOPNOTSUPP; ++ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + cifs_dbg(FYI, "offset %lld len %lld", offset, len); + +@@ -1792,8 +1810,10 @@ static long smb3_simple_falloc(struct fi + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) +- if (keep_size == false) +- return -EOPNOTSUPP; ++ if (keep_size == false) { ++ free_xid(xid); ++ return rc; ++ } + + /* + * Files are non-sparse by default so falloc may be a no-op +@@ -1802,14 +1822,16 @@ static long smb3_simple_falloc(struct fi + */ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) { + if (keep_size == true) +- return 0; ++ rc = 0; + /* check if extending file */ + else if (i_size_read(inode) >= off + len) + /* not extending file and already not sparse */ +- return 0; ++ rc = 0; + /* BB: in future add else clause to extend file */ + else +- return -EOPNOTSUPP; ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; + } + + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { +@@ -1821,8 +1843,11 @@ static long smb3_simple_falloc(struct fi + * ie potentially making a few extra pages at the beginning + * or end of the file non-sparse via set_sparse is harmless. + */ +- if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) +- return -EOPNOTSUPP; ++ if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) { ++ rc = -EOPNOTSUPP; ++ free_xid(xid); ++ return rc; ++ } + + rc = smb2_set_sparse(xid, tcon, cfile, inode, false); + } diff --git a/queue-4.16/smb3-on-reconnect-set-previoussessionid-field.patch b/queue-4.16/smb3-on-reconnect-set-previoussessionid-field.patch new file mode 100644 index 00000000000..64220d7909e --- /dev/null +++ b/queue-4.16/smb3-on-reconnect-set-previoussessionid-field.patch @@ -0,0 +1,34 @@ +From b2adf22fdfba85a6701c481faccdbbb3a418ccfc Mon Sep 17 00:00:00 2001 +From: Steve French +Date: Thu, 31 May 2018 15:19:25 -0500 +Subject: smb3: on reconnect set PreviousSessionId field + +From: Steve French + +commit b2adf22fdfba85a6701c481faccdbbb3a418ccfc upstream. + +The server detects reconnect by the (non-zero) value in PreviousSessionId +of SMB2/SMB3 SessionSetup request, but this behavior regressed due +to commit 166cea4dc3a4f66f020cfb9286225ecd228ab61d +("SMB2: Separate RawNTLMSSP authentication from SMB2_sess_setup") + +CC: Stable +CC: Sachin Prabhu +Signed-off-by: Steve French +Reviewed-by: Ronnie Sahlberg +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/smb2pdu.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -1172,6 +1172,7 @@ SMB2_sess_setup(const unsigned int xid, + sess_data->ses = ses; + sess_data->buf0_type = CIFS_NO_BUFFER; + sess_data->nls_cp = (struct nls_table *) nls_cp; ++ sess_data->previous_session = ses->Suid; + + #ifdef CONFIG_CIFS_SMB311 + /* diff --git a/queue-4.16/w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch b/queue-4.16/w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch new file mode 100644 index 00000000000..8659419f099 --- /dev/null +++ b/queue-4.16/w1-mxc_w1-enable-clock-before-calling-clk_get_rate-on-it.patch @@ -0,0 +1,68 @@ +From 955bc61328dc0a297fb3baccd84e9d3aee501ed8 Mon Sep 17 00:00:00 2001 +From: Stefan Potyra +Date: Wed, 2 May 2018 10:55:31 +0200 +Subject: w1: mxc_w1: Enable clock before calling clk_get_rate() on it + +From: Stefan Potyra + +commit 955bc61328dc0a297fb3baccd84e9d3aee501ed8 upstream. + +According to the API, you may only call clk_get_rate() after actually +enabling it. + +Found by Linux Driver Verification project (linuxtesting.org). + +Fixes: a5fd9139f74c ("w1: add 1-wire master driver for i.MX27 / i.MX31") +Signed-off-by: Stefan Potyra +Acked-by: Evgeniy Polyakov +Cc: stable +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/w1/masters/mxc_w1.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +--- a/drivers/w1/masters/mxc_w1.c ++++ b/drivers/w1/masters/mxc_w1.c +@@ -112,6 +112,10 @@ static int mxc_w1_probe(struct platform_ + if (IS_ERR(mdev->clk)) + return PTR_ERR(mdev->clk); + ++ err = clk_prepare_enable(mdev->clk); ++ if (err) ++ return err; ++ + clkrate = clk_get_rate(mdev->clk); + if (clkrate < 10000000) + dev_warn(&pdev->dev, +@@ -125,12 +129,10 @@ static int mxc_w1_probe(struct platform_ + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + mdev->regs = devm_ioremap_resource(&pdev->dev, res); +- if (IS_ERR(mdev->regs)) +- return PTR_ERR(mdev->regs); +- +- err = clk_prepare_enable(mdev->clk); +- if (err) +- return err; ++ if (IS_ERR(mdev->regs)) { ++ err = PTR_ERR(mdev->regs); ++ goto out_disable_clk; ++ } + + /* Software reset 1-Wire module */ + writeb(MXC_W1_RESET_RST, mdev->regs + MXC_W1_RESET); +@@ -146,8 +148,12 @@ static int mxc_w1_probe(struct platform_ + + err = w1_add_master_device(&mdev->bus_master); + if (err) +- clk_disable_unprepare(mdev->clk); ++ goto out_disable_clk; ++ ++ return 0; + ++out_disable_clk: ++ clk_disable_unprepare(mdev->clk); + return err; + } + diff --git a/queue-4.16/x86-apic-provide-apic_ack_irq.patch b/queue-4.16/x86-apic-provide-apic_ack_irq.patch new file mode 100644 index 00000000000..5dc7fbce158 --- /dev/null +++ b/queue-4.16/x86-apic-provide-apic_ack_irq.patch @@ -0,0 +1,80 @@ +From c0255770ccdc77ef2184d2a0a2e0cde09d2b44a4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:55 +0200 +Subject: x86/apic: Provide apic_ack_irq() + +From: Thomas Gleixner + +commit c0255770ccdc77ef2184d2a0a2e0cde09d2b44a4 upstream. + +apic_ack_edge() is explicitely for handling interrupt affinity cleanup when +interrupt remapping is not available or disable. + +Remapped interrupts and also some of the platform specific special +interrupts, e.g. UV, invoke ack_APIC_irq() directly. + +To address the issue of failing an affinity update with -EBUSY the delayed +affinity mechanism can be reused, but ack_APIC_irq() does not handle +that. Adding this to ack_APIC_irq() is not possible, because that function +is also used for exceptions and directly handled interrupts like IPIs. + +Create a new function, which just contains the conditional invocation of +irq_move_irq() and the final ack_APIC_irq(). + +Reuse the new function in apic_ack_edge(). + +Preparatory change for the real fix. + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.471925894@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/apic.h | 2 ++ + arch/x86/kernel/apic/vector.c | 9 +++++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/apic.h ++++ b/arch/x86/include/asm/apic.h +@@ -442,6 +442,8 @@ static inline void apic_set_eoi_write(vo + + #endif /* CONFIG_X86_LOCAL_APIC */ + ++extern void apic_ack_irq(struct irq_data *data); ++ + static inline void ack_APIC_irq(void) + { + /* +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -809,13 +809,18 @@ static int apic_retrigger_irq(struct irq + return 1; + } + +-void apic_ack_edge(struct irq_data *irqd) ++void apic_ack_irq(struct irq_data *irqd) + { +- irq_complete_move(irqd_cfg(irqd)); + irq_move_irq(irqd); + ack_APIC_irq(); + } + ++void apic_ack_edge(struct irq_data *irqd) ++{ ++ irq_complete_move(irqd_cfg(irqd)); ++ apic_ack_irq(irqd); ++} ++ + static struct irq_chip lapic_controller = { + .name = "APIC", + .irq_ack = apic_ack_edge, diff --git a/queue-4.16/x86-apic-vector-prevent-hlist-corruption-and-leaks.patch b/queue-4.16/x86-apic-vector-prevent-hlist-corruption-and-leaks.patch new file mode 100644 index 00000000000..a900c15885b --- /dev/null +++ b/queue-4.16/x86-apic-vector-prevent-hlist-corruption-and-leaks.patch @@ -0,0 +1,71 @@ +From 80ae7b1a918e78b0bae88b0c0ad413d3fdced968 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:53 +0200 +Subject: x86/apic/vector: Prevent hlist corruption and leaks + +From: Thomas Gleixner + +commit 80ae7b1a918e78b0bae88b0c0ad413d3fdced968 upstream. + +Several people observed the WARN_ON() in irq_matrix_free() which triggers +when the caller tries to free an vector which is not in the allocation +range. Song provided the trace information which allowed to decode the root +cause. + +The rework of the vector allocation mechanism failed to preserve a sanity +check, which prevents setting a new target vector/CPU when the previous +affinity change has not fully completed. + +As a result a half finished affinity change can be overwritten, which can +cause the leak of a irq descriptor pointer on the previous target CPU and +double enqueue of the hlist head into the cleanup lists of two or more +CPUs. After one CPU cleaned up its vector the next CPU will invoke the +cleanup handler with vector 0, which triggers the out of range warning in +the matrix allocator. + +Prevent this by checking the apic_data of the interrupt whether the +move_in_progress flag is false and the hlist node is not hashed. Return +-EBUSY if not. + +This prevents the damage and restores the behaviour before the vector +allocation rework, but due to other changes in that area it also widens the +chance that user space can observe -EBUSY. In theory this should be fine, +but actually not all user space tools handle -EBUSY correctly. Addressing +that is not part of this fix, but will be addressed in follow up patches. + +Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") +Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> +Reported-by: Tariq Toukan +Reported-by: Song Liu +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Link: https://lkml.kernel.org/r/20180604162224.303870257@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/vector.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -235,6 +235,15 @@ static int allocate_vector(struct irq_da + if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) + return 0; + ++ /* ++ * Careful here. @apicd might either have move_in_progress set or ++ * be enqueued for cleanup. Assigning a new vector would either ++ * leave a stale vector on some CPU around or in case of a pending ++ * cleanup corrupt the hlist. ++ */ ++ if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) ++ return -EBUSY; ++ + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); + if (vector > 0) + apic_update_vector(irqd, vector, cpu); diff --git a/queue-4.16/x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch b/queue-4.16/x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch new file mode 100644 index 00000000000..905017d8380 --- /dev/null +++ b/queue-4.16/x86-intel_rdt-enable-cmt-and-mbm-on-new-skylake-stepping.patch @@ -0,0 +1,37 @@ +From 1d9f3e20a56d33e55748552aeec597f58542f92d Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 8 Jun 2018 09:07:32 -0700 +Subject: x86/intel_rdt: Enable CMT and MBM on new Skylake stepping + +From: Tony Luck + +commit 1d9f3e20a56d33e55748552aeec597f58542f92d upstream. + +New stepping of Skylake has fixes for cache occupancy and memory +bandwidth monitoring. + +Update the code to enable these by default on newer steppings. + +Signed-off-by: Tony Luck +Signed-off-by: Thomas Gleixner +Cc: Fenghua Yu +Cc: stable@vger.kernel.org # v4.14 +Cc: Vikas Shivappa +Link: https://lkml.kernel.org/r/20180608160732.9842-1-tony.luck@intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/intel_rdt.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kernel/cpu/intel_rdt.c ++++ b/arch/x86/kernel/cpu/intel_rdt.c +@@ -821,6 +821,8 @@ static __init void rdt_quirks(void) + case INTEL_FAM6_SKYLAKE_X: + if (boot_cpu_data.x86_stepping <= 4) + set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); ++ else ++ set_rdt_options("!l3cat"); + } + } + diff --git a/queue-4.16/x86-ioapic-use-apic_ack_irq.patch b/queue-4.16/x86-ioapic-use-apic_ack_irq.patch new file mode 100644 index 00000000000..f275c1e70e2 --- /dev/null +++ b/queue-4.16/x86-ioapic-use-apic_ack_irq.patch @@ -0,0 +1,44 @@ +From 2b04e46d8d0b9b7ac08ded672e3eab823f01d77a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:57 +0200 +Subject: x86/ioapic: Use apic_ack_irq() + +From: Thomas Gleixner + +commit 2b04e46d8d0b9b7ac08ded672e3eab823f01d77a upstream. + +To address the EBUSY fail of interrupt affinity settings in case that the +previous setting has not been cleaned up yet, use the new apic_ack_irq() +function instead of directly invoking ack_APIC_irq(). + +Preparatory change for the real fix + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Song Liu +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.639011135@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/io_apic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/apic/io_apic.c ++++ b/arch/x86/kernel/apic/io_apic.c +@@ -1859,7 +1859,7 @@ static void ioapic_ir_ack_level(struct i + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ +- ack_APIC_irq(); ++ apic_ack_irq(irq_data); + eoi_ioapic_pin(data->entry.vector, data); + } + diff --git a/queue-4.16/x86-platform-uv-use-apic_ack_irq.patch b/queue-4.16/x86-platform-uv-use-apic_ack_irq.patch new file mode 100644 index 00000000000..c4a7964339d --- /dev/null +++ b/queue-4.16/x86-platform-uv-use-apic_ack_irq.patch @@ -0,0 +1,57 @@ +From 839b0f1c4ef674cd929a42304c078afca278581a Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Mon, 4 Jun 2018 17:33:58 +0200 +Subject: x86/platform/uv: Use apic_ack_irq() + +From: Thomas Gleixner + +commit 839b0f1c4ef674cd929a42304c078afca278581a upstream. + +To address the EBUSY fail of interrupt affinity settings in case that the +previous setting has not been cleaned up yet, use the new apic_ack_irq() +function instead of the special uv_ack_apic() implementation which is +merily a wrapper around ack_APIC_irq(). + +Preparatory change for the real fix + +Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") +Reported-by: Song Liu +Signed-off-by: Thomas Gleixner +Tested-by: Song Liu +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Dmitry Safonov <0x7f454c46@gmail.com> +Cc: stable@vger.kernel.org +Cc: Mike Travis +Cc: Borislav Petkov +Cc: Tariq Toukan +Link: https://lkml.kernel.org/r/20180604162224.721691398@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/platform/uv/uv_irq.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +--- a/arch/x86/platform/uv/uv_irq.c ++++ b/arch/x86/platform/uv/uv_irq.c +@@ -47,11 +47,6 @@ static void uv_program_mmr(struct irq_cf + + static void uv_noop(struct irq_data *data) { } + +-static void uv_ack_apic(struct irq_data *data) +-{ +- ack_APIC_irq(); +-} +- + static int + uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +@@ -73,7 +68,7 @@ static struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .irq_mask = uv_noop, + .irq_unmask = uv_noop, +- .irq_eoi = uv_ack_apic, ++ .irq_eoi = apic_ack_irq, + .irq_set_affinity = uv_set_irq_affinity, + }; + diff --git a/queue-4.16/x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch b/queue-4.16/x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch new file mode 100644 index 00000000000..9ca39eefc4c --- /dev/null +++ b/queue-4.16/x86-vector-fix-the-args-of-vector_alloc-tracepoint.patch @@ -0,0 +1,35 @@ +From 838d76d63ec4eaeaa12bedfa50f261480f615200 Mon Sep 17 00:00:00 2001 +From: Dou Liyang +Date: Fri, 1 Jun 2018 14:50:31 +0800 +Subject: x86/vector: Fix the args of vector_alloc tracepoint + +From: Dou Liyang + +commit 838d76d63ec4eaeaa12bedfa50f261480f615200 upstream. + +The vector_alloc tracepont reversed the reserved and ret aggs, that made +the trace print wrong. Exchange them. + +Fixes: 8d1e3dca7de6 ("x86/vector: Add tracepoints for vector management") +Signed-off-by: Dou Liyang +Signed-off-by: Thomas Gleixner +Cc: hpa@zytor.com +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20180601065031.21872-1-douly.fnst@cn.fujitsu.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/trace/irq_vectors.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/include/asm/trace/irq_vectors.h ++++ b/arch/x86/include/asm/trace/irq_vectors.h +@@ -236,7 +236,7 @@ TRACE_EVENT(vector_alloc, + TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, + int ret), + +- TP_ARGS(irq, vector, ret, reserved), ++ TP_ARGS(irq, vector, reserved, ret), + + TP_STRUCT__entry( + __field( unsigned int, irq ) -- 2.47.3