From fc0f56e7c9a6611fbf1a14f0de73d3815c3d85f0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 3 Oct 2017 11:02:39 +0200 Subject: [PATCH] 4.9-stable patches added patches: cxl-fix-driver-use-count.patch gfs2-fix-debugfs-glocks-dump.patch kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch --- queue-4.9/cxl-fix-driver-use-count.patch | 83 ++++++++++++++ queue-4.9/gfs2-fix-debugfs-glocks-dump.patch | 108 ++++++++++++++++++ ...-change-sn-bit-in-vmx_update_pi_irte.patch | 50 ++++++++ ...in-kvm_vcpu_trigger_posted_interrupt.patch | 81 +++++++++++++ queue-4.9/series | 5 + ...r-migration-sysctl-values-to-0-and-1.patch | 49 ++++++++ 6 files changed, 376 insertions(+) create mode 100644 queue-4.9/cxl-fix-driver-use-count.patch create mode 100644 queue-4.9/gfs2-fix-debugfs-glocks-dump.patch create mode 100644 queue-4.9/kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch create mode 100644 queue-4.9/kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch create mode 100644 queue-4.9/timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch diff --git a/queue-4.9/cxl-fix-driver-use-count.patch b/queue-4.9/cxl-fix-driver-use-count.patch new file mode 100644 index 00000000000..50e549bad05 --- /dev/null +++ b/queue-4.9/cxl-fix-driver-use-count.patch @@ -0,0 +1,83 @@ +From 197267d0356004a31c4d6b6336598f5dff3301e1 Mon Sep 17 00:00:00 2001 +From: Frederic Barrat +Date: Wed, 30 Aug 2017 12:15:49 +0200 +Subject: cxl: Fix driver use count + +From: Frederic Barrat + +commit 197267d0356004a31c4d6b6336598f5dff3301e1 upstream. + +cxl keeps a driver use count, which is used with the hash memory model +on p8 to know when to upgrade local TLBIs to global and to trigger +callbacks to manage the MMU for PSL8. + +If a process opens a context and closes without attaching or fails the +attachment, the driver use count is never decremented. As a +consequence, TLB invalidations remain global, even if there are no +active cxl contexts. + +We should increment the driver use count when the process is attaching +to the cxl adapter, and not on open. It's not needed before the +adapter starts using the context and the use count is decremented on +the detach path, so it makes more sense. + +It affects only the user api. The kernel api is already doing The +Right Thing. + +Signed-off-by: Frederic Barrat +Cc: stable@vger.kernel.org # v4.2+ +Fixes: 7bb5d91a4dda ("cxl: Rework context lifetimes") +Acked-by: Andrew Donnellan +Signed-off-by: Michael Ellerman +[ajd: backport to stable v4.9 tree] +Signed-off-by: Andrew Donnellan +Signed-off-by: Greg Kroah-Hartman +--- + drivers/misc/cxl/api.c | 4 ++++ + drivers/misc/cxl/file.c | 8 +++++++- + 2 files changed, 11 insertions(+), 1 deletion(-) + +--- a/drivers/misc/cxl/api.c ++++ b/drivers/misc/cxl/api.c +@@ -244,6 +244,10 @@ int cxl_start_context(struct cxl_context + ctx->real_mode = false; + } + ++ /* ++ * Increment driver use count. Enables global TLBIs for hash ++ * and callbacks to handle the segment table ++ */ + cxl_ctx_get(); + + if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { +--- a/drivers/misc/cxl/file.c ++++ b/drivers/misc/cxl/file.c +@@ -91,7 +91,6 @@ static int __afu_open(struct inode *inod + + pr_devel("afu_open pe: %i\n", ctx->pe); + file->private_data = ctx; +- cxl_ctx_get(); + + /* indicate success */ + rc = 0; +@@ -213,6 +212,12 @@ static long afu_ioctl_start_work(struct + ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID); + + ++ /* ++ * Increment driver use count. Enables global TLBIs for hash ++ * and callbacks to handle the segment table ++ */ ++ cxl_ctx_get(); ++ + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); + + if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, +@@ -222,6 +227,7 @@ static long afu_ioctl_start_work(struct + put_pid(ctx->glpid); + put_pid(ctx->pid); + ctx->glpid = ctx->pid = NULL; ++ cxl_ctx_put(); + goto out; + } + diff --git a/queue-4.9/gfs2-fix-debugfs-glocks-dump.patch b/queue-4.9/gfs2-fix-debugfs-glocks-dump.patch new file mode 100644 index 00000000000..9c3b3cc0931 --- /dev/null +++ b/queue-4.9/gfs2-fix-debugfs-glocks-dump.patch @@ -0,0 +1,108 @@ +From 10201655b085df8e000822e496e5d4016a167a36 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Tue, 19 Sep 2017 07:15:35 -0500 +Subject: gfs2: Fix debugfs glocks dump + +From: Andreas Gruenbacher + +commit 10201655b085df8e000822e496e5d4016a167a36 upstream. + +The switch to rhashtables (commit 88ffbf3e03) broke the debugfs glock +dump (/sys/kernel/debug/gfs2//glocks) for dumps bigger than a +single buffer: the right function for restarting an rhashtable iteration +from the beginning of the hash table is rhashtable_walk_enter; +rhashtable_walk_stop + rhashtable_walk_start will just resume from the +current position. + +The upstream commit doesn't directly apply to 4.9.y because 4.9.y +doesn't have the following mainline commits: + + 92ecd73a887c4a2b94daf5fc35179d75d1c4ef95 + gfs2: Deduplicate gfs2_{glocks,glstats}_open + cc37a62785a584f4875788689f3fd1fa6e4eb291 + gfs2: Replace rhashtable_walk_init with rhashtable_walk_enter + +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Bob Peterson +Signed-off-by: Greg Kroah-Hartman + +--- + fs/gfs2/glock.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -1836,13 +1836,9 @@ static void *gfs2_glock_seq_start(struct + { + struct gfs2_glock_iter *gi = seq->private; + loff_t n = *pos; +- int ret; + +- if (gi->last_pos <= *pos) +- n = (*pos - gi->last_pos); +- +- ret = rhashtable_walk_start(&gi->hti); +- if (ret) ++ rhashtable_walk_enter(&gl_hash_table, &gi->hti); ++ if (rhashtable_walk_start(&gi->hti) != 0) + return NULL; + + do { +@@ -1850,6 +1846,7 @@ static void *gfs2_glock_seq_start(struct + } while (gi->gl && n--); + + gi->last_pos = *pos; ++ + return gi->gl; + } + +@@ -1861,6 +1858,7 @@ static void *gfs2_glock_seq_next(struct + (*pos)++; + gi->last_pos = *pos; + gfs2_glock_iter_next(gi); ++ + return gi->gl; + } + +@@ -1870,6 +1868,7 @@ static void gfs2_glock_seq_stop(struct s + + gi->gl = NULL; + rhashtable_walk_stop(&gi->hti); ++ rhashtable_walk_exit(&gi->hti); + } + + static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) +@@ -1932,12 +1931,10 @@ static int gfs2_glocks_open(struct inode + struct gfs2_glock_iter *gi = seq->private; + + gi->sdp = inode->i_private; +- gi->last_pos = 0; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; +- ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); + } + return ret; + } +@@ -1948,7 +1945,6 @@ static int gfs2_glocks_release(struct in + struct gfs2_glock_iter *gi = seq->private; + + gi->gl = NULL; +- rhashtable_walk_exit(&gi->hti); + return seq_release_private(inode, file); + } + +@@ -1960,12 +1956,10 @@ static int gfs2_glstats_open(struct inod + struct seq_file *seq = file->private_data; + struct gfs2_glock_iter *gi = seq->private; + gi->sdp = inode->i_private; +- gi->last_pos = 0; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; + gi->gl = NULL; +- ret = rhashtable_walk_init(&gl_hash_table, &gi->hti, GFP_KERNEL); + } + return ret; + } diff --git a/queue-4.9/kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch b/queue-4.9/kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch new file mode 100644 index 00000000000..6477be42935 --- /dev/null +++ b/queue-4.9/kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch @@ -0,0 +1,50 @@ +From dc91f2eb1a4021eb6705c15e474942f84ab9b211 Mon Sep 17 00:00:00 2001 +From: Haozhong Zhang +Date: Mon, 18 Sep 2017 09:56:49 +0800 +Subject: KVM: VMX: do not change SN bit in vmx_update_pi_irte() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Haozhong Zhang + +commit dc91f2eb1a4021eb6705c15e474942f84ab9b211 upstream. + +In kvm_vcpu_trigger_posted_interrupt() and pi_pre_block(), KVM +assumes that PI notification events should not be suppressed when the +target vCPU is not blocked. + +vmx_update_pi_irte() sets the SN field before changing an interrupt +from posting to remapping, but it does not check the vCPU mode. +Therefore, the change of SN field may break above the assumption. +Besides, I don't see reasons to suppress notification events here, so +remove the changes of SN field to avoid race condition. + +Signed-off-by: Haozhong Zhang +Reported-by: "Ramamurthy, Venkatesh" +Reported-by: Dan Williams +Reviewed-by: Paolo Bonzini +Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted") +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -11215,12 +11215,8 @@ static int vmx_update_pi_irte(struct kvm + + if (set) + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); +- else { +- /* suppress notification event before unposting */ +- pi_set_sn(vcpu_to_pi_desc(vcpu)); ++ else + ret = irq_set_vcpu_affinity(host_irq, NULL); +- pi_clear_sn(vcpu_to_pi_desc(vcpu)); +- } + + if (ret < 0) { + printk(KERN_INFO "%s: failed to update PI IRTE\n", diff --git a/queue-4.9/kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch b/queue-4.9/kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch new file mode 100644 index 00000000000..7089701b5c8 --- /dev/null +++ b/queue-4.9/kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch @@ -0,0 +1,81 @@ +From 5753743fa5108b8f98bd61e40dc63f641b26c768 Mon Sep 17 00:00:00 2001 +From: Haozhong Zhang +Date: Mon, 18 Sep 2017 09:56:50 +0800 +Subject: KVM: VMX: remove WARN_ON_ONCE in kvm_vcpu_trigger_posted_interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Haozhong Zhang + +commit 5753743fa5108b8f98bd61e40dc63f641b26c768 upstream. + +WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)) in kvm_vcpu_trigger_posted_interrupt() +intends to detect the violation of invariant that VT-d PI notification +event is not suppressed when vcpu is in the guest mode. Because the +two checks for the target vcpu mode and the target suppress field +cannot be performed atomically, the target vcpu mode may change in +between. If that does happen, WARN_ON_ONCE() here may raise false +alarms. + +As the previous patch fixed the real invariant breaker, remove this +WARN_ON_ONCE() to avoid false alarms, and document the allowed cases +instead. + +Signed-off-by: Haozhong Zhang +Reported-by: "Ramamurthy, Venkatesh" +Reported-by: Dan Williams +Reviewed-by: Paolo Bonzini +Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted") +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -4759,21 +4759,30 @@ static inline bool kvm_vcpu_trigger_post + { + #ifdef CONFIG_SMP + if (vcpu->mode == IN_GUEST_MODE) { +- struct vcpu_vmx *vmx = to_vmx(vcpu); +- + /* +- * Currently, we don't support urgent interrupt, +- * all interrupts are recognized as non-urgent +- * interrupt, so we cannot post interrupts when +- * 'SN' is set. ++ * The vector of interrupt to be delivered to vcpu had ++ * been set in PIR before this function. ++ * ++ * Following cases will be reached in this block, and ++ * we always send a notification event in all cases as ++ * explained below. ++ * ++ * Case 1: vcpu keeps in non-root mode. Sending a ++ * notification event posts the interrupt to vcpu. ++ * ++ * Case 2: vcpu exits to root mode and is still ++ * runnable. PIR will be synced to vIRR before the ++ * next vcpu entry. Sending a notification event in ++ * this case has no effect, as vcpu is not in root ++ * mode. + * +- * If the vcpu is in guest mode, it means it is +- * running instead of being scheduled out and +- * waiting in the run queue, and that's the only +- * case when 'SN' is set currently, warning if +- * 'SN' is set. ++ * Case 3: vcpu exits to root mode and is blocked. ++ * vcpu_block() has already synced PIR to vIRR and ++ * never blocks vcpu if vIRR is not cleared. Therefore, ++ * a blocked vcpu here does not wait for any requested ++ * interrupts in PIR, and sending a notification event ++ * which has no effect is safe here. + */ +- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); + + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); diff --git a/queue-4.9/series b/queue-4.9/series index 92257aa3283..da1912e49fc 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -54,3 +54,8 @@ btrfs-propagate-error-to-btrfs_cmp_data_prepare-caller.patch btrfs-prevent-to-set-invalid-default-subvolid.patch x86-mm-fix-fault-error-path-using-unsafe-vma-pointer.patch x86-fpu-don-t-let-userspace-set-bogus-xcomp_bv.patch +gfs2-fix-debugfs-glocks-dump.patch +timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch +kvm-vmx-do-not-change-sn-bit-in-vmx_update_pi_irte.patch +kvm-vmx-remove-warn_on_once-in-kvm_vcpu_trigger_posted_interrupt.patch +cxl-fix-driver-use-count.patch diff --git a/queue-4.9/timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch b/queue-4.9/timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch new file mode 100644 index 00000000000..bf4c9bc2322 --- /dev/null +++ b/queue-4.9/timer-sysclt-restrict-timer-migration-sysctl-values-to-0-and-1.patch @@ -0,0 +1,49 @@ +From b94bf594cf8ed67cdd0439e70fa939783471597a Mon Sep 17 00:00:00 2001 +From: Myungho Jung +Date: Wed, 19 Apr 2017 15:24:50 -0700 +Subject: timer/sysclt: Restrict timer migration sysctl values to 0 and 1 + +From: Myungho Jung + +commit b94bf594cf8ed67cdd0439e70fa939783471597a upstream. + +timer_migration sysctl acts as a boolean switch, so the allowed values +should be restricted to 0 and 1. + +Add the necessary extra fields to the sysctl table entry to enforce that. + +[ tglx: Rewrote changelog ] + +Signed-off-by: Myungho Jung +Link: http://lkml.kernel.org/r/1492640690-3550-1-git-send-email-mhjungk@gmail.com +Signed-off-by: Thomas Gleixner +Cc: Kazuhiro Hayashi +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sysctl.c | 2 ++ + kernel/time/timer.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/kernel/sysctl.c ++++ b/kernel/sysctl.c +@@ -1189,6 +1189,8 @@ static struct ctl_table kern_table[] = { + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = timer_migration_handler, ++ .extra1 = &zero, ++ .extra2 = &one, + }, + #endif + #ifdef CONFIG_BPF_SYSCALL +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -240,7 +240,7 @@ int timer_migration_handler(struct ctl_t + int ret; + + mutex_lock(&mutex); +- ret = proc_dointvec(table, write, buffer, lenp, ppos); ++ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!ret && write) + timers_update_migration(false); + mutex_unlock(&mutex); -- 2.47.3