]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jul 2017 08:20:11 +0000 (10:20 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jul 2017 08:20:11 +0000 (10:20 +0200)
added patches:
kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch
kvm-vmx-check-value-written-to-ia32_bndcfgs.patch
kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch
kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch
pm-qos-return-einval-for-bogus-strings.patch
pm-wakeirq-convert-to-srcu.patch
sched-topology-fix-building-of-overlapping-sched-groups.patch
sched-topology-fix-overlapping-sched_group_mask.patch
tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch

queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch [new file with mode: 0644]
queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch [new file with mode: 0644]
queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch [new file with mode: 0644]
queue-4.9/pm-qos-return-einval-for-bogus-strings.patch [new file with mode: 0644]
queue-4.9/pm-wakeirq-convert-to-srcu.patch [new file with mode: 0644]
queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch [new file with mode: 0644]
queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch [new file with mode: 0644]

diff --git a/queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch b/queue-4.9/kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch
new file mode 100644 (file)
index 0000000..108d7d3
--- /dev/null
@@ -0,0 +1,43 @@
+From 691bd4340bef49cf7e5855d06cf24444b5bf2d85 Mon Sep 17 00:00:00 2001
+From: Haozhong Zhang <haozhong.zhang@intel.com>
+Date: Tue, 4 Jul 2017 10:27:41 +0800
+Subject: kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
+
+From: Haozhong Zhang <haozhong.zhang@intel.com>
+
+commit 691bd4340bef49cf7e5855d06cf24444b5bf2d85 upstream.
+
+It's easier for host applications, such as QEMU, if they can always
+access guest MSR_IA32_BNDCFGS in VMCS, even though MPX is disabled in
+guest cpuid.
+
+Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2987,7 +2987,8 @@ static int vmx_get_msr(struct kvm_vcpu *
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
+               break;
+       case MSR_IA32_BNDCFGS:
+-              if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
++              if (!kvm_mpx_supported() ||
++                  (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+                       return 1;
+               msr_info->data = vmcs_read64(GUEST_BNDCFGS);
+               break;
+@@ -3069,7 +3070,8 @@ static int vmx_set_msr(struct kvm_vcpu *
+               vmcs_writel(GUEST_SYSENTER_ESP, data);
+               break;
+       case MSR_IA32_BNDCFGS:
+-              if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
++              if (!kvm_mpx_supported() ||
++                  (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+                       return 1;
+               if (is_noncanonical_address(data & PAGE_MASK) ||
+                   (data & MSR_IA32_BNDCFGS_RSVD))
diff --git a/queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch b/queue-4.9/kvm-vmx-check-value-written-to-ia32_bndcfgs.patch
new file mode 100644 (file)
index 0000000..63ad95d
--- /dev/null
@@ -0,0 +1,48 @@
+From 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Tue, 23 May 2017 11:52:54 -0700
+Subject: kvm: vmx: Check value written to IA32_BNDCFGS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jim Mattson <jmattson@google.com>
+
+commit 4531662d1abf6c1f0e5c2b86ddb60e61509786c8 upstream.
+
+Bits 11:2 must be zero and the linear addess in bits 63:12 must be
+canonical. Otherwise, WRMSR(BNDCFGS) should raise #GP.
+
+Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save")
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/msr-index.h |    2 ++
+ arch/x86/kvm/vmx.c               |    3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/arch/x86/include/asm/msr-index.h
++++ b/arch/x86/include/asm/msr-index.h
+@@ -405,6 +405,8 @@
+ #define MSR_IA32_TSC_ADJUST             0x0000003b
+ #define MSR_IA32_BNDCFGS              0x00000d90
++#define MSR_IA32_BNDCFGS_RSVD         0x00000ffc
++
+ #define MSR_IA32_XSS                  0x00000da0
+ #define FEATURE_CONTROL_LOCKED                                (1<<0)
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -3071,6 +3071,9 @@ static int vmx_set_msr(struct kvm_vcpu *
+       case MSR_IA32_BNDCFGS:
+               if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
+                       return 1;
++              if (is_noncanonical_address(data & PAGE_MASK) ||
++                  (data & MSR_IA32_BNDCFGS_RSVD))
++                      return 1;
+               vmcs_write64(GUEST_BNDCFGS, data);
+               break;
+       case MSR_IA32_TSC:
diff --git a/queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch b/queue-4.9/kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch
new file mode 100644 (file)
index 0000000..026c384
--- /dev/null
@@ -0,0 +1,40 @@
+From a8b6fda38f80e75afa3b125c9e7f2550b579454b Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Tue, 23 May 2017 11:52:52 -0700
+Subject: kvm: vmx: Do not disable intercepts for BNDCFGS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jim Mattson <jmattson@google.com>
+
+commit a8b6fda38f80e75afa3b125c9e7f2550b579454b upstream.
+
+The MSR permission bitmaps are shared by all VMs. However, some VMs
+may not be configured to support MPX, even when the host does. If the
+host supports VMX and the guest does not, we should intercept accesses
+to the BNDCFGS MSR, so that we can synthesize a #GP
+fault. Furthermore, if the host does not support MPX and the
+"ignore_msrs" kvm kernel parameter is set, then we should intercept
+accesses to the BNDCFGS MSR, so that we can skip over the rdmsr/wrmsr
+without raising a #GP fault.
+
+Fixes: da8999d31818fdc8 ("KVM: x86: Intel MPX vmx and msr handle")
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/vmx.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -6474,7 +6474,6 @@ static __init int hardware_setup(void)
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+-      vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+       memcpy(vmx_msr_bitmap_legacy_x2apic,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
diff --git a/queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch b/queue-4.9/kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch
new file mode 100644 (file)
index 0000000..e325d8d
--- /dev/null
@@ -0,0 +1,63 @@
+From 4439af9f911ae0243ffe4e2dfc12bace49605d8b Mon Sep 17 00:00:00 2001
+From: Jim Mattson <jmattson@google.com>
+Date: Wed, 24 May 2017 10:49:25 -0700
+Subject: kvm: x86: Guest BNDCFGS requires guest MPX support
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jim Mattson <jmattson@google.com>
+
+commit 4439af9f911ae0243ffe4e2dfc12bace49605d8b upstream.
+
+The BNDCFGS MSR should only be exposed to the guest if the guest
+supports MPX. (cf. the TSC_AUX MSR and RDTSCP.)
+
+Fixes: 0dd376e709975779 ("KVM: x86: add MSR_IA32_BNDCFGS to msrs_to_save")
+Change-Id: I3ad7c01bda616715137ceac878f3fa7e66b6b387
+Signed-off-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kvm/cpuid.h |    8 ++++++++
+ arch/x86/kvm/vmx.c   |    4 ++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/cpuid.h
++++ b/arch/x86/kvm/cpuid.h
+@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(s
+       return best && (best->ebx & bit(X86_FEATURE_RTM));
+ }
++static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
++{
++      struct kvm_cpuid_entry2 *best;
++
++      best = kvm_find_cpuid_entry(vcpu, 7, 0);
++      return best && (best->ebx & bit(X86_FEATURE_MPX));
++}
++
+ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
+ {
+       struct kvm_cpuid_entry2 *best;
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -2987,7 +2987,7 @@ static int vmx_get_msr(struct kvm_vcpu *
+               msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
+               break;
+       case MSR_IA32_BNDCFGS:
+-              if (!kvm_mpx_supported())
++              if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
+                       return 1;
+               msr_info->data = vmcs_read64(GUEST_BNDCFGS);
+               break;
+@@ -3069,7 +3069,7 @@ static int vmx_set_msr(struct kvm_vcpu *
+               vmcs_writel(GUEST_SYSENTER_ESP, data);
+               break;
+       case MSR_IA32_BNDCFGS:
+-              if (!kvm_mpx_supported())
++              if (!kvm_mpx_supported() || !guest_cpuid_has_mpx(vcpu))
+                       return 1;
+               vmcs_write64(GUEST_BNDCFGS, data);
+               break;
diff --git a/queue-4.9/pm-qos-return-einval-for-bogus-strings.patch b/queue-4.9/pm-qos-return-einval-for-bogus-strings.patch
new file mode 100644 (file)
index 0000000..2900c01
--- /dev/null
@@ -0,0 +1,34 @@
+From 2ca30331c156ca9e97643ad05dd8930b8fe78b01 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Mon, 10 Jul 2017 10:21:40 +0300
+Subject: PM / QoS: return -EINVAL for bogus strings
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+commit 2ca30331c156ca9e97643ad05dd8930b8fe78b01 upstream.
+
+In the current code, if the user accidentally writes a bogus command to
+this sysfs file, then we set the latency tolerance to an uninitialized
+variable.
+
+Fixes: 2d984ad132a8 (PM / QoS: Introcuce latency tolerance device PM QoS type)
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Acked-by: Pavel Machek <pavel@ucw.cz>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/sysfs.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/base/power/sysfs.c
++++ b/drivers/base/power/sysfs.c
+@@ -268,6 +268,8 @@ static ssize_t pm_qos_latency_tolerance_
+                       value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
+               else if (!strcmp(buf, "any") || !strcmp(buf, "any\n"))
+                       value = PM_QOS_LATENCY_ANY;
++              else
++                      return -EINVAL;
+       }
+       ret = dev_pm_qos_update_user_latency_tolerance(dev, value);
+       return ret < 0 ? ret : n;
diff --git a/queue-4.9/pm-wakeirq-convert-to-srcu.patch b/queue-4.9/pm-wakeirq-convert-to-srcu.patch
new file mode 100644 (file)
index 0000000..2097776
--- /dev/null
@@ -0,0 +1,147 @@
+From ea0212f40c6bc0594c8eff79266759e3ecd4bacc Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 25 Jun 2017 19:31:13 +0200
+Subject: PM / wakeirq: Convert to SRCU
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit ea0212f40c6bc0594c8eff79266759e3ecd4bacc upstream.
+
+The wakeirq infrastructure uses RCU to protect the list of wakeirqs. That
+breaks the irq bus locking infrastructure, which is allows sleeping
+functions to be called so interrupt controllers behind slow busses,
+e.g. i2c, can be handled.
+
+The wakeirq functions hold rcu_read_lock and call into irq functions, which
+in case of interrupts using the irq bus locking will trigger a
+might_sleep() splat.
+
+Convert the wakeirq infrastructure to Sleepable RCU and unbreak it.
+
+Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling)
+Reported-by: Brian Norris <briannorris@chromium.org>
+Suggested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Tested-by: Tony Lindgren <tony@atomide.com>
+Tested-by: Brian Norris <briannorris@chromium.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/power/wakeup.c |   32 ++++++++++++++++++--------------
+ 1 file changed, 18 insertions(+), 14 deletions(-)
+
+--- a/drivers/base/power/wakeup.c
++++ b/drivers/base/power/wakeup.c
+@@ -60,6 +60,8 @@ static LIST_HEAD(wakeup_sources);
+ static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue);
++DEFINE_STATIC_SRCU(wakeup_srcu);
++
+ static struct wakeup_source deleted_ws = {
+       .name = "deleted",
+       .lock =  __SPIN_LOCK_UNLOCKED(deleted_ws.lock),
+@@ -198,7 +200,7 @@ void wakeup_source_remove(struct wakeup_
+       spin_lock_irqsave(&events_lock, flags);
+       list_del_rcu(&ws->entry);
+       spin_unlock_irqrestore(&events_lock, flags);
+-      synchronize_rcu();
++      synchronize_srcu(&wakeup_srcu);
+ }
+ EXPORT_SYMBOL_GPL(wakeup_source_remove);
+@@ -332,12 +334,12 @@ void device_wakeup_detach_irq(struct dev
+ void device_wakeup_arm_wake_irqs(void)
+ {
+       struct wakeup_source *ws;
++      int srcuidx;
+-      rcu_read_lock();
++      srcuidx = srcu_read_lock(&wakeup_srcu);
+       list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+               dev_pm_arm_wake_irq(ws->wakeirq);
+-
+-      rcu_read_unlock();
++      srcu_read_unlock(&wakeup_srcu, srcuidx);
+ }
+ /**
+@@ -348,12 +350,12 @@ void device_wakeup_arm_wake_irqs(void)
+ void device_wakeup_disarm_wake_irqs(void)
+ {
+       struct wakeup_source *ws;
++      int srcuidx;
+-      rcu_read_lock();
++      srcuidx = srcu_read_lock(&wakeup_srcu);
+       list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+               dev_pm_disarm_wake_irq(ws->wakeirq);
+-
+-      rcu_read_unlock();
++      srcu_read_unlock(&wakeup_srcu, srcuidx);
+ }
+ /**
+@@ -805,10 +807,10 @@ EXPORT_SYMBOL_GPL(pm_wakeup_event);
+ void pm_print_active_wakeup_sources(void)
+ {
+       struct wakeup_source *ws;
+-      int active = 0;
++      int srcuidx, active = 0;
+       struct wakeup_source *last_activity_ws = NULL;
+-      rcu_read_lock();
++      srcuidx = srcu_read_lock(&wakeup_srcu);
+       list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+               if (ws->active) {
+                       pr_info("active wakeup source: %s\n", ws->name);
+@@ -824,7 +826,7 @@ void pm_print_active_wakeup_sources(void
+       if (!active && last_activity_ws)
+               pr_info("last active wakeup source: %s\n",
+                       last_activity_ws->name);
+-      rcu_read_unlock();
++      srcu_read_unlock(&wakeup_srcu, srcuidx);
+ }
+ EXPORT_SYMBOL_GPL(pm_print_active_wakeup_sources);
+@@ -951,8 +953,9 @@ void pm_wakep_autosleep_enabled(bool set
+ {
+       struct wakeup_source *ws;
+       ktime_t now = ktime_get();
++      int srcuidx;
+-      rcu_read_lock();
++      srcuidx = srcu_read_lock(&wakeup_srcu);
+       list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+               spin_lock_irq(&ws->lock);
+               if (ws->autosleep_enabled != set) {
+@@ -966,7 +969,7 @@ void pm_wakep_autosleep_enabled(bool set
+               }
+               spin_unlock_irq(&ws->lock);
+       }
+-      rcu_read_unlock();
++      srcu_read_unlock(&wakeup_srcu, srcuidx);
+ }
+ #endif /* CONFIG_PM_AUTOSLEEP */
+@@ -1027,15 +1030,16 @@ static int print_wakeup_source_stats(str
+ static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
+ {
+       struct wakeup_source *ws;
++      int srcuidx;
+       seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+               "expire_count\tactive_since\ttotal_time\tmax_time\t"
+               "last_change\tprevent_suspend_time\n");
+-      rcu_read_lock();
++      srcuidx = srcu_read_lock(&wakeup_srcu);
+       list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+               print_wakeup_source_stats(m, ws);
+-      rcu_read_unlock();
++      srcu_read_unlock(&wakeup_srcu, srcuidx);
+       print_wakeup_source_stats(m, &deleted_ws);
diff --git a/queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch b/queue-4.9/sched-topology-fix-building-of-overlapping-sched-groups.patch
new file mode 100644 (file)
index 0000000..1b78d31
--- /dev/null
@@ -0,0 +1,65 @@
+From 0372dd2736e02672ac6e189c31f7d8c02ad543cd Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Fri, 14 Apr 2017 17:24:02 +0200
+Subject: sched/topology: Fix building of overlapping sched-groups
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 0372dd2736e02672ac6e189c31f7d8c02ad543cd upstream.
+
+When building the overlapping groups, we very obviously should start
+with the previous domain of _this_ @cpu, not CPU-0.
+
+This can be readily demonstrated with a topology like:
+
+  node   0   1   2   3
+    0:  10  20  30  20
+    1:  20  10  20  30
+    2:  30  20  10  20
+    3:  20  30  20  10
+
+Where (for example) CPU1 ends up generating the following nonsensical groups:
+
+  [] CPU1 attaching sched-domain:
+  []  domain 0: span 0-2 level NUMA
+  []   groups: 1 2 0
+  []   domain 1: span 0-3 level NUMA
+  []    groups: 1-3 (cpu_capacity = 3072) 0-1,3 (cpu_capacity = 3072)
+
+Where the fact that domain 1 doesn't include a group with span 0-2 is
+the obvious fail.
+
+With patch this looks like:
+
+  [] CPU1 attaching sched-domain:
+  []  domain 0: span 0-2 level NUMA
+  []   groups: 1 0 2
+  []   domain 1: span 0-3 level NUMA
+  []    groups: 0-2 (cpu_capacity = 3072) 0,2-3 (cpu_capacity = 3072)
+
+Debugged-by: Lauro Ramos Venancio <lvenanci@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans")
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6148,7 +6148,7 @@ build_overlap_sched_groups(struct sched_
+       cpumask_clear(covered);
+-      for_each_cpu(i, span) {
++      for_each_cpu_wrap(i, span, cpu) {
+               struct cpumask *sg_span;
+               if (cpumask_test_cpu(i, covered))
diff --git a/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch b/queue-4.9/sched-topology-fix-overlapping-sched_group_mask.patch
new file mode 100644 (file)
index 0000000..d152b9e
--- /dev/null
@@ -0,0 +1,99 @@
+From 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Tue, 25 Apr 2017 14:00:49 +0200
+Subject: sched/topology: Fix overlapping sched_group_mask
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 73bb059f9b8a00c5e1bf2f7ca83138c05d05e600 upstream.
+
+The point of sched_group_mask is to select those CPUs from
+sched_group_cpus that can actually arrive at this balance domain.
+
+The current code gets it wrong, as can be readily demonstrated with a
+topology like:
+
+  node   0   1   2   3
+    0:  10  20  30  20
+    1:  20  10  20  30
+    2:  30  20  10  20
+    3:  20  30  20  10
+
+Where (for example) domain 1 on CPU1 ends up with a mask that includes
+CPU0:
+
+  [] CPU1 attaching sched-domain:
+  []  domain 0: span 0-2 level NUMA
+  []   groups: 1 (mask: 1), 2, 0
+  []   domain 1: span 0-3 level NUMA
+  []    groups: 0-2 (mask: 0-2) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072)
+
+This causes sched_balance_cpu() to compute the wrong CPU and
+consequently should_we_balance() will terminate early resulting in
+missed load-balance opportunities.
+
+The fixed topology looks like:
+
+  [] CPU1 attaching sched-domain:
+  []  domain 0: span 0-2 level NUMA
+  []   groups: 1 (mask: 1), 2, 0
+  []   domain 1: span 0-3 level NUMA
+  []    groups: 0-2 (mask: 1) (cpu_capacity: 3072), 0,2-3 (cpu_capacity: 3072)
+
+(note: this relies on OVERLAP domains to always have children, this is
+ true because the regular topology domains are still here -- this is
+ before degenerate trimming)
+
+Debugged-by: Lauro Ramos Venancio <lvenanci@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: linux-kernel@vger.kernel.org
+Fixes: e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans")
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/sched/core.c |   18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -6102,6 +6102,9 @@ enum s_alloc {
+  * Build an iteration mask that can exclude certain CPUs from the upwards
+  * domain traversal.
+  *
++ * Only CPUs that can arrive at this group should be considered to continue
++ * balancing.
++ *
+  * Asymmetric node setups can result in situations where the domain tree is of
+  * unequal depth, make sure to skip domains that already cover the entire
+  * range.
+@@ -6120,11 +6123,24 @@ static void build_group_mask(struct sche
+       for_each_cpu(i, span) {
+               sibling = *per_cpu_ptr(sdd->sd, i);
+-              if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
++
++              /*
++               * Can happen in the asymmetric case, where these siblings are
++               * unused. The mask will not be empty because those CPUs that
++               * do have the top domain _should_ span the domain.
++               */
++              if (!sibling->child)
++                      continue;
++
++              /* If we would not end up here, we can't continue from here */
++              if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
+                       continue;
+               cpumask_set_cpu(i, sched_group_mask(sg));
+       }
++
++      /* We must not have empty masks here */
++      WARN_ON_ONCE(cpumask_empty(sched_group_mask(sg)));
+ }
+ /*
index 265fe32a0eff28d22338a3716478c49691c1ae36..9abce79e92b1e6021d79b7fdf39409658f7e15c9 100644 (file)
@@ -59,3 +59,12 @@ crypto-sha1-ssse3-disable-avx2.patch
 crypto-caam-properly-set-iv-after-en-de-crypt.patch
 crypto-caam-fix-signals-handling.patch
 revert-sched-core-optimize-sched_smt.patch
+sched-topology-fix-building-of-overlapping-sched-groups.patch
+sched-topology-fix-overlapping-sched_group_mask.patch
+pm-wakeirq-convert-to-srcu.patch
+pm-qos-return-einval-for-bogus-strings.patch
+tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch
+kvm-vmx-do-not-disable-intercepts-for-bndcfgs.patch
+kvm-x86-guest-bndcfgs-requires-guest-mpx-support.patch
+kvm-vmx-check-value-written-to-ia32_bndcfgs.patch
+kvm-vmx-allow-host-to-access-guest-msr_ia32_bndcfgs.patch
diff --git a/queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch b/queue-4.9/tracing-use-softirq_offset-for-softirq-dectection-for-more-accurate-results.patch
new file mode 100644 (file)
index 0000000..eeba47f
--- /dev/null
@@ -0,0 +1,38 @@
+From c59f29cb144a6a0dfac16ede9dc8eafc02dc56ca Mon Sep 17 00:00:00 2001
+From: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Date: Fri, 9 Dec 2016 21:50:17 +0530
+Subject: tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results
+
+From: Pavankumar Kondeti <pkondeti@codeaurora.org>
+
+commit c59f29cb144a6a0dfac16ede9dc8eafc02dc56ca upstream.
+
+The 's' flag is supposed to indicate that a softirq is running. This
+can be detected by testing the preempt_count with SOFTIRQ_OFFSET.
+
+The current code tests the preempt_count with SOFTIRQ_MASK, which
+would be true even when softirqs are disabled but not serving a
+softirq.
+
+Link: http://lkml.kernel.org/r/1481300417-3564-1-git-send-email-pkondeti@codeaurora.org
+
+Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/trace/trace.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -1906,7 +1906,7 @@ tracing_generic_entry_update(struct trac
+ #endif
+               ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
+               ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
+-              ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
++              ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
+               (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
+               (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
+ }