From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 08:20:11 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.12.3~9 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=443d025bd3daa93c5f8b48a7603d7f7e2fb7dc5c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch kvm-vmx-check-value-written-to-ia32_bndcfgs.patch kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch pm-qos-return-einval-for-bogus-strings.patch pm-wakeirq-convert-to-srcu.patch sched-topology-fix-building-of-overlapping-sched-groups.patch sched-topology-fix-overlapping-sched_group_mask.patch tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch --- diff --git a/queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch b/queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch new file mode 100644 index 00000000000..108d7d3b6de --- /dev/null +++ b/queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch @@ -0,0 +1,43 @@ +From 691bd4340bef49cf7e5855d06cf24444b5bf2d85 Mon Sep 17 00:00:00 2001 +From: Haozhong Zhang +Date: Tue, 4 Jul 2017 10:27:41 +0800 +Subject: kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS + +From: Haozhong Zhang + +commit 691bd4340bef49cf7e5855d06cf24444b5bf2d85 upstream. + +It's easier for host applications, such as QEMU, if they can always +access guest MSR_IA32_BNDCFGS in VMCS, even though MPX is disabled in +guest cpuid. + +Signed-off-by: Haozhong Zhang +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2987,7 +2987,8 @@ static int vmx_get_msr(struct kvm_vcpu * + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + msr_info->data = vmcs_read64(GUEST_BNDCFGS); + break; +@@ -3069,7 +3070,8 @@ static int vmx_set_msr(struct kvm_vcpu * + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) ++ if (!kvm_mpx_supported() || ++ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) + return 1; + if (is_noncanonical_address(data & PAGE_MASK) || + (data & MSR_IA32_BNDCFGS_RSVD)) diff --git a/queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch b/queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch new file mode 100644 index 00000000000..63ad95d8c18 --- /dev/null +++ b/queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch @@ -0,0 +1,48 @@ +From 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Tue, 23 May 2017 11:52:54 -0700 +Subject: kvm: vmx: Check value written to IA32_BNDCFGS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 upstream. + +Bits 11:2 must be zero and the linear addess in bits 63:12 must be +canonical. Otherwise, WRMSR(BNDCFGS) should raise #GP. + +Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/msr-index.h | 2 ++ + arch/x86/kvm/vmx.c | 3 +++ + 2 files changed, 5 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -405,6 +405,8 @@ + #define MSR_IA32_TSC_ADJUST 0x0000003b + #define MSR_IA32_BNDCFGS 0x00000d90 + ++#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc ++ + #define MSR_IA32_XSS 0x00000da0 + + #define FEATURE_CONTROL_LOCKED (1<<0) +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -3071,6 +3071,9 @@ static int vmx_set_msr(struct kvm_vcpu * + case MSR_IA32_BNDCFGS: + if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; ++ if (is_noncanonical_address(data & PAGE_MASK) || ++ (data & MSR_IA32_BNDCFGS_RSVD)) ++ return 1; + vmcs_write64(GUEST_BNDCFGS, data); + break; + case MSR_IA32_TSC: diff --git a/queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch b/queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch new file mode 100644 index 00000000000..026c3841832 --- /dev/null +++ b/queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch @@ -0,0 +1,40 @@ +From a8b6fda38f80e75afa3b125c9e7f2550b579454b Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Tue, 23 May 2017 11:52:52 -0700 +Subject: kvm: vmx: Do not disable intercepts for BNDCFGS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit a8b6fda38f80e75afa3b125c9e7f2550b579454b upstream. + +The MSR permission bitmaps are shared by all VMs. However, some VMs +may not be configured to support MPX, even when the host does. If the +host supports VMX and the guest does not, we should intercept accesses +to the BNDCFGS MSR, so that we can synthesize a #GP +fault. Furthermore, if the host does not support MPX and the +"ignore_msrs" kvm kernel parameter is set, then we should intercept +accesses to the BNDCFGS MSR, so that we can skip over the rdmsr/wrmsr +without raising a #GP fault. + +Fixes: da8999d31818fdc8 ("KVM: x86: Intel MPX vmx and msr handle") +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/vmx.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -6474,7 +6474,6 @@ static __init int hardware_setup(void) + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); +- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); + + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); diff --git a/queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch b/queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch new file mode 100644 index 00000000000..e325d8de05d --- /dev/null +++ b/queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch @@ -0,0 +1,63 @@ +From 4439af9f911ae0243ffe4e2dfc12bace49605d8b Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Wed, 24 May 2017 10:49:25 -0700 +Subject: kvm: x86: Guest BNDCFGS requires guest MPX support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jim Mattson + +commit 4439af9f911ae0243ffe4e2dfc12bace49605d8b upstream. + +The BNDCFGS MSR should only be exposed to the guest if the guest +supports MPX. (cf. the TSC_AUX MSR and RDTSCP.) + +Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save") +Change-Id: I3ad7c01bda616715137ceac878f3fa7e66b6b387 +Signed-off-by: Jim Mattson +Signed-off-by: Radim Krčmář +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/cpuid.h | 8 ++++++++ + arch/x86/kvm/vmx.c | 4 ++-- + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/cpuid.h ++++ b/arch/x86/kvm/cpuid.h +@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(s + return best && (best->ebx & bit(X86_FEATURE_RTM)); + } + ++static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) ++{ ++ struct kvm_cpuid_entry2 *best; ++ ++ best = kvm_find_cpuid_entry(vcpu, 7, 0); ++ return best && (best->ebx & bit(X86_FEATURE_MPX)); ++} ++ + static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) + { + struct kvm_cpuid_entry2 *best; +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2987,7 +2987,7 @@ static int vmx_get_msr(struct kvm_vcpu * + msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported()) ++ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; + msr_info->data = vmcs_read64(GUEST_BNDCFGS); + break; +@@ -3069,7 +3069,7 @@ static int vmx_set_msr(struct kvm_vcpu * + vmcs_writel(GUEST_SYSENTER_ESP, data); + break; + case MSR_IA32_BNDCFGS: +- if (!kvm_mpx_supported()) ++ if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu)) + return 1; + vmcs_write64(GUEST_BNDCFGS, data); + break; diff --git a/queue-4.9/pm-qos-return-einval-for-bogus-strings.patch b/queue-4.9/pm-qos-return-einval-for-bogus-strings.patch new file mode 100644 index 00000000000..2900c012edd --- /dev/null +++ b/queue-4.9/pm-qos-return-einval-for-bogus-strings.patch @@ -0,0 +1,34 @@ +From 2ca30331c156ca9e97643ad05dd8930b8fe78b01 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 10 Jul 2017 10:21:40 +0300 +Subject: PM / QoS: return -EINVAL for bogus strings + +From: Dan Carpenter + +commit 2ca30331c156ca9e97643ad05dd8930b8fe78b01 upstream. + +In the current code, if the user accidentally writes a bogus command to +this sysfs file, then we set the latency tolerance to an uninitialized +variable. + +Fixes: 2d984ad132a8 (PM / QoS: Introcuce latency tolerance device PM QoS type) +Signed-off-by: Dan Carpenter +Acked-by: Pavel Machek +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/sysfs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/base/power/sysfs.c ++++ b/drivers/base/power/sysfs.c +@@ -268,6 +268,8 @@ static ssize_t pm_qos_latency_tolerance_ + value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; + else if (!strcmp(buf, "any") || !strcmp(buf, "any\n")) + value = PM_QOS_LATENCY_ANY; ++ else ++ return -EINVAL; + } + ret = dev_pm_qos_update_user_latency_tolerance(dev, value); + return ret < 0 ? ret : n; diff --git a/queue-4.9/pm-wakeirq-convert-to-srcu.patch b/queue-4.9/pm-wakeirq-convert-to-srcu.patch new file mode 100644 index 00000000000..2097776a21e --- /dev/null +++ b/queue-4.9/pm-wakeirq-convert-to-srcu.patch @@ -0,0 +1,147 @@ +From ea0212f40c6bc0594c8eff79266759e3ecd4bacc Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Sun, 25 Jun 2017 19:31:13 +0200 +Subject: PM / wakeirq: Convert to SRCU + +From: Thomas Gleixner + +commit ea0212f40c6bc0594c8eff79266759e3ecd4bacc upstream. + +The wakeirq infrastructure uses RCU to protect the list of wakeirqs. That +breaks the irq bus locking infrastructure, which is allows sleeping +functions to be called so interrupt controllers behind slow busses, +e.g. i2c, can be handled. + +The wakeirq functions hold rcu_read_lock and call into irq functions, which +in case of interrupts using the irq bus locking will trigger a +might_sleep() splat. + +Convert the wakeirq infrastructure to Sleepable RCU and unbreak it. + +Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) +Reported-by: Brian Norris +Suggested-by: Paul E. McKenney +Signed-off-by: Thomas Gleixner +Reviewed-by: Paul E. McKenney +Tested-by: Tony Lindgren +Tested-by: Brian Norris +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/power/wakeup.c | 32 ++++++++++++++++++-------------- + 1 file changed, 18 insertions(+), 14 deletions(-) + +--- a/drivers/base/power/wakeup.c ++++ b/drivers/base/power/wakeup.c +@@ -60,6 +60,8 @@ static LIST_HEAD(wakeup_sources); + + static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + ++DEFINE_STATIC_SRCU(wakeup_srcu); ++ + static struct wakeup_source deleted_ws = { + .name = "deleted", + .lock = __SPIN_LOCK_UNLOCKED(deleted_ws.lock), +@@ -198,7 +200,7 @@ void wakeup_source_remove(struct wakeup_ + spin_lock_irqsave(&events_lock, flags); + list_del_rcu(&ws->entry); + spin_unlock_irqrestore(&events_lock, flags); +- synchronize_rcu(); ++ synchronize_srcu(&wakeup_srcu); + } + EXPORT_SYMBOL_GPL(wakeup_source_remove); + +@@ -332,12 +334,12 @@ void device_wakeup_detach_irq(struct dev + void device_wakeup_arm_wake_irqs(void) + { + struct wakeup_source *ws; ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_arm_wake_irq(ws->wakeirq); +- +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + + /** +@@ -348,12 +350,12 @@ void device_wakeup_arm_wake_irqs(void) + void device_wakeup_disarm_wake_irqs(void) + { + struct wakeup_source *ws; ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + dev_pm_disarm_wake_irq(ws->wakeirq); +- +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + + /** +@@ -805,10 +807,10 @@ EXPORT_SYMBOL_GPL(pm_wakeup_event); + void pm_print_active_wakeup_sources(void) + { + struct wakeup_source *ws; +- int active = 0; ++ int srcuidx, active = 0; + struct wakeup_source *last_activity_ws = NULL; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + if (ws->active) { + pr_info("active wakeup source: %s\n", ws->name); +@@ -824,7 +826,7 @@ void pm_print_active_wakeup_sources(void + if (!active && last_activity_ws) + pr_info("last active wakeup source: %s\n", + last_activity_ws->name); +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources); + +@@ -951,8 +953,9 @@ void pm_wakep_autosleep_enabled(bool set + { + struct wakeup_source *ws; + ktime_t now = ktime_get(); ++ int srcuidx; + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { +@@ -966,7 +969,7 @@ void pm_wakep_autosleep_enabled(bool set + } + spin_unlock_irq(&ws->lock); + } +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + } + #endif /* CONFIG_PM_AUTOSLEEP */ + +@@ -1027,15 +1030,16 @@ static int print_wakeup_source_stats(str + static int wakeup_sources_stats_show(struct seq_file *m, void *unused) + { + struct wakeup_source *ws; ++ int srcuidx; + + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\tprevent_suspend_time\n"); + +- rcu_read_lock(); ++ srcuidx = srcu_read_lock(&wakeup_srcu); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) + print_wakeup_source_stats(m, ws); +- rcu_read_unlock(); ++ srcu_read_unlock(&wakeup_srcu, srcuidx); + + print_wakeup_source_stats(m, &deleted_ws); + diff --git a/queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch b/queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch new file mode 100644 index 00000000000..1b78d317726 --- /dev/null +++ b/queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch @@ -0,0 +1,65 @@ +From 0372dd2736e02672ac6e189c31f7d8c02ad543cd Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 14 Apr 2017 17:24:02 +0200 +Subject: sched/topology: Fix building of overlapping sched-groups + +From: Peter Zijlstra + +commit 0372dd2736e02672ac6e189c31f7d8c02ad543cd upstream. + +When building the overlapping groups, we very obviously should start +with the previous domain of _this_ @cpu, not CPU-0. + +This can be readily demonstrated with a topology like: + + node 0 1 2 3 + 0: 10 20 30 20 + 1: 20 10 20 30 + 2: 30 20 10 20 + 3: 20 30 20 10 + +Where (for example) CPU1 ends up generating the following nonsensical groups: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 2 0 + [] domain 1: span 0-3 level NUMA + [] groups: 1-3 (cpu_capacity = 3072) 0-1,3 (cpu_capacity = 3072) + +Where the fact that domain 1 doesn't include a group with span 0-2 is +the obvious fail. + +With patch this looks like: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 0 2 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (cpu_capacity = 3072) 0,2-3 (cpu_capacity = 3072) + +Debugged-by: Lauro Ramos Venancio +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6148,7 +6148,7 @@ build_overlap_sched_groups(struct sched_ + + cpumask_clear(covered); + +- for_each_cpu(i, span) { ++ for_each_cpu_wrap(i, span, cpu) { + struct cpumask *sg_span; + + if (cpumask_test_cpu(i, covered)) diff --git a/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch b/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch new file mode 100644 index 00000000000..d152b9e9512 --- /dev/null +++ b/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch @@ -0,0 +1,99 @@ +From 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 25 Apr 2017 14:00:49 +0200 +Subject: sched/topology: Fix overlapping sched_group_mask + +From: Peter Zijlstra + +commit 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 upstream. + +The point of sched_group_mask is to select those CPUs from +sched_group_cpus that can actually arrive at this balance domain. + +The current code gets it wrong, as can be readily demonstrated with a +topology like: + + node 0 1 2 3 + 0: 10 20 30 20 + 1: 20 10 20 30 + 2: 30 20 10 20 + 3: 20 30 20 10 + +Where (for example) domain 1 on CPU1 ends up with a mask that includes +CPU0: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 (mask: 1), 2, 0 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (mask: 0-2) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) + +This causes sched_balance_cpu() to compute the wrong CPU and +consequently should_we_balance() will terminate early resulting in +missed load-balance opportunities. + +The fixed topology looks like: + + [] CPU1 attaching sched-domain: + [] domain 0: span 0-2 level NUMA + [] groups: 1 (mask: 1), 2, 0 + [] domain 1: span 0-3 level NUMA + [] groups: 0-2 (mask: 1) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072) + +(note: this relies on OVERLAP domains to always have children, this is + true because the regular topology domains are still here -- this is + before degenerate trimming) + +Debugged-by: Lauro Ramos Venancio +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-kernel@vger.kernel.org +Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/sched/core.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6102,6 +6102,9 @@ enum s_alloc { + * Build an iteration mask that can exclude certain CPUs from the upwards + * domain traversal. + * ++ * Only CPUs that can arrive at this group should be considered to continue ++ * balancing. ++ * + * Asymmetric node setups can result in situations where the domain tree is of + * unequal depth, make sure to skip domains that already cover the entire + * range. +@@ -6120,11 +6123,24 @@ static void build_group_mask(struct sche + + for_each_cpu(i, span) { + sibling = *per_cpu_ptr(sdd->sd, i); +- if (!cpumask_test_cpu(i, sched_domain_span(sibling))) ++ ++ /* ++ * Can happen in the asymmetric case, where these siblings are ++ * unused. The mask will not be empty because those CPUs that ++ * do have the top domain _should_ span the domain. ++ */ ++ if (!sibling->child) ++ continue; ++ ++ /* If we would not end up here, we can't continue from here */ ++ if (!cpumask_equal(sg_span, sched_domain_span(sibling->child))) + continue; + + cpumask_set_cpu(i, sched_group_mask(sg)); + } ++ ++ /* We must not have empty masks here */ ++ WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg))); + } + + /* diff --git a/queue-4.9/series b/queue-4.9/series index 265fe32a0ef..9abce79e92b 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -59,3 +59,12 @@ crypto-sha1-ssse3-disable-avx2.patch crypto-caam-properly-set-iv-after-en-de-crypt.patch crypto-caam-fix-signals-handling.patch revert-sched-core-optimize-sched_smt.patch +sched-topology-fix-building-of-overlapping-sched-groups.patch +sched-topology-fix-overlapping-sched_group_mask.patch +pm-wakeirq-convert-to-srcu.patch +pm-qos-return-einval-for-bogus-strings.patch +tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch +kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch +kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch +kvm-vmx-check-value-written-to-ia32_bndcfgs.patch +kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch diff --git a/queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch b/queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch new file mode 100644 index 00000000000..eeba47f2835 --- /dev/null +++ b/queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch @@ -0,0 +1,38 @@ +From c59f29cb144a6a0dfac16ede9dc8eafc02dc56ca Mon Sep 17 00:00:00 2001 +From: Pavankumar Kondeti +Date: Fri, 9 Dec 2016 21:50:17 +0530 +Subject: tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results + +From: Pavankumar Kondeti + +commit c59f29cb144a6a0dfac16ede9dc8eafc02dc56ca upstream. + +The 's' flag is supposed to indicate that a softirq is running. This +can be detected by testing the preempt_count with SOFTIRQ_OFFSET. + +The current code tests the preempt_count with SOFTIRQ_MASK, which +would be true even when softirqs are disabled but not serving a +softirq. + +Link: http://lkml.kernel.org/r/1481300417-3564-1-git-send-email-pkondeti@codeaurora.org + +Signed-off-by: Pavankumar Kondeti +Signed-off-by: Steven Rostedt +Signed-off-by: Amit Pundir +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/trace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -1906,7 +1906,7 @@ tracing_generic_entry_update(struct trac + #endif + ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | +- ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | ++ ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | + (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | + (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); + }