]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 5 Aug 2013 03:11:04 +0000 (11:11 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 5 Aug 2013 03:11:04 +0000 (11:11 +0800)
added patches:
cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch
drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch
pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch
pci-retry-allocation-of-only-the-resource-type-that-failed.patch
revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch
revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch

queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch [new file with mode: 0644]
queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch [new file with mode: 0644]
queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch [new file with mode: 0644]
queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch [new file with mode: 0644]
queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch [new file with mode: 0644]
queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch [new file with mode: 0644]
queue-3.10/series

diff --git a/queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch b/queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch
new file mode 100644 (file)
index 0000000..dd96137
--- /dev/null
@@ -0,0 +1,81 @@
+From 2a99859932281ed6c2ecdd988855f8f6838f6743 Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Tue, 30 Jul 2013 00:32:00 +0200
+Subject: cpufreq: Fix cpufreq driver module refcount balance after suspend/resume
+
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+
+commit 2a99859932281ed6c2ecdd988855f8f6838f6743 upstream.
+
+Since cpufreq_cpu_put() called by __cpufreq_remove_dev() drops the
+driver module refcount, __cpufreq_remove_dev() causes that refcount
+to become negative for the cpufreq driver after a suspend/resume
+cycle.
+
+This is not the only bad thing that happens there, however, because
+kobject_put() should only be called for the policy kobject at this
+point if the CPU is not the last one for that policy.
+
+Namely, if the given CPU is the last one for that policy, the
+policy kobject's refcount should be 1 at this point, as set by
+cpufreq_add_dev_interface(), and only needs to be dropped once for
+the kobject to go away.  This actually happens under the cpu == 1
+check, so it need not be done before by cpufreq_cpu_put().
+
+On the other hand, if the given CPU is not the last one for that
+policy, this means that cpufreq_add_policy_cpu() has been called
+at least once for that policy and cpufreq_cpu_get() has been
+called for it too.  To balance that cpufreq_cpu_get(), we need to
+call cpufreq_cpu_put() in that case.
+
+Thus, to fix the described problem and keep the reference
+counters balanced in both cases, move the cpufreq_cpu_get() call
+in __cpufreq_remove_dev() to the code path executed only for
+CPUs that share the policy with other CPUs.
+
+Reported-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpufreq/cpufreq.c |   19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -1075,14 +1075,11 @@ static int __cpufreq_remove_dev(struct d
+                               __func__, cpu_dev->id, cpu);
+       }
+-      if ((cpus == 1) && (cpufreq_driver->target))
+-              __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
+-
+-      pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
+-      cpufreq_cpu_put(data);
+-
+       /* If cpu is last user of policy, free policy */
+       if (cpus == 1) {
++              if (cpufreq_driver->target)
++                      __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
++
+               lock_policy_rwsem_read(cpu);
+               kobj = &data->kobj;
+               cmp = &data->kobj_unregister;
+@@ -1103,9 +1100,13 @@ static int __cpufreq_remove_dev(struct d
+               free_cpumask_var(data->related_cpus);
+               free_cpumask_var(data->cpus);
+               kfree(data);
+-      } else if (cpufreq_driver->target) {
+-              __cpufreq_governor(data, CPUFREQ_GOV_START);
+-              __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
++      } else {
++              pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
++              cpufreq_cpu_put(data);
++              if (cpufreq_driver->target) {
++                      __cpufreq_governor(data, CPUFREQ_GOV_START);
++                      __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
++              }
+       }
+       per_cpu(cpufreq_policy_cpu, cpu) = -1;
diff --git a/queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch b/queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch
new file mode 100644 (file)
index 0000000..ec4de79
--- /dev/null
@@ -0,0 +1,49 @@
+From aeea40cbf9388fc829e66fa049f64d97fd72e118 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Thu, 11 Jul 2013 14:20:11 -0400
+Subject: drm/radeon: Disable dma rings for bo moves on r6xx
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit aeea40cbf9388fc829e66fa049f64d97fd72e118 upstream.
+
+They still seem to cause instability on some r6xx parts.
+As a follow up, we can switch to using CP DMA for bo
+moves on r6xx as a lighter weight alternative to using
+the 3D engine.
+
+A version of this patch should also go to stable kernels.
+
+Tested-by: J.N. <golden.fleeced@gmail.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/radeon/radeon_asic.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/radeon/radeon_asic.c
++++ b/drivers/gpu/drm/radeon/radeon_asic.c
+@@ -986,8 +986,8 @@ static struct radeon_asic r600_asic = {
+               .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+               .dma = &r600_copy_dma,
+               .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+-              .copy = &r600_copy_dma,
+-              .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
++              .copy = &r600_copy_blit,
++              .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+       },
+       .surface = {
+               .set_reg = r600_set_surface_reg,
+@@ -1074,8 +1074,8 @@ static struct radeon_asic rs780_asic = {
+               .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+               .dma = &r600_copy_dma,
+               .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
+-              .copy = &r600_copy_dma,
+-              .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
++              .copy = &r600_copy_blit,
++              .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
+       },
+       .surface = {
+               .set_reg = r600_set_surface_reg,
diff --git a/queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch b/queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch
new file mode 100644 (file)
index 0000000..a89f638
--- /dev/null
@@ -0,0 +1,55 @@
+From 29ed1f29b68a8395d5679b3c4e38352b617b3236 Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Fri, 19 Jul 2013 12:14:16 -0700
+Subject: PCI: pciehp: Fix null pointer deref when hot-removing SR-IOV device
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit 29ed1f29b68a8395d5679b3c4e38352b617b3236 upstream.
+
+Hot-removing a device with SR-IOV enabled causes a null pointer dereference
+in v3.9 and v3.10.
+
+This is a regression caused by ba518e3c17 ("PCI: pciehp: Iterate over all
+devices in slot, not functions 0-7").  When we iterate over the
+bus->devices list, we first remove the PF, which also removes all the VFs
+from the list.  Then the list iterator blows up because more than just the
+current entry was removed from the list.
+
+ac205b7bb7 ("PCI: make sriov work with hotplug remove") works around a
+similar problem in pci_stop_bus_devices() by iterating over the list in
+reverse, so the VFs are stopped and removed from the list first, before the
+PF.
+
+This patch changes pciehp_unconfigure_device() to iterate over the list in
+reverse, too.
+
+[bhelgaas: bugzilla, changelog]
+Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60604
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Yijing Wang <wangyijing@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/hotplug/pciehp_pci.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/hotplug/pciehp_pci.c
++++ b/drivers/pci/hotplug/pciehp_pci.c
+@@ -92,7 +92,14 @@ int pciehp_unconfigure_device(struct slo
+       if (ret)
+               presence = 0;
+-      list_for_each_entry_safe(dev, temp, &parent->devices, bus_list) {
++      /*
++       * Stopping an SR-IOV PF device removes all the associated VFs,
++       * which will update the bus->devices list and confuse the
++       * iterator.  Therefore, iterate in reverse so we remove the VFs
++       * first, then the PF.  We do the same in pci_stop_bus_device().
++       */
++      list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
++                                       bus_list) {
+               pci_dev_get(dev);
+               if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
+                       pci_read_config_byte(dev, PCI_BRIDGE_CONTROL, &bctl);
diff --git a/queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch b/queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch
new file mode 100644 (file)
index 0000000..4ec4a48
--- /dev/null
@@ -0,0 +1,137 @@
+From aa914f5ec25e4371ba18b312971314be1b9b1076 Mon Sep 17 00:00:00 2001
+From: Yinghai Lu <yinghai@kernel.org>
+Date: Thu, 25 Jul 2013 06:31:38 -0700
+Subject: PCI: Retry allocation of only the resource type that failed
+
+From: Yinghai Lu <yinghai@kernel.org>
+
+commit aa914f5ec25e4371ba18b312971314be1b9b1076 upstream.
+
+Ben Herrenschmidt reported the following problem:
+
+  - The bus has space for all desired MMIO resources, including optional
+    space for SR-IOV devices
+  - We attempt to allocate I/O port space, but it fails because the bus
+    has no I/O space
+  - Because of the I/O allocation failure, we retry MMIO allocation,
+    requesting only the required space, without the optional SR-IOV space
+
+This means we don't allocate the optional SR-IOV space, even though we
+could.
+
+This is related to 0c5be0cb0e ("PCI: Retry on IORESOURCE_IO type
+allocations").
+
+This patch changes how we handle allocation failures.  We will now retry
+allocation of only the resource type that failed.  If MMIO allocation
+fails, we'll retry only MMIO allocation.  If I/O port allocation fails,
+we'll retry only I/O port allocation.
+
+[bhelgaas: changelog]
+Reference: https://lkml.kernel.org/r/1367712653.11982.19.camel@pasglop
+Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Tested-by: Gavin Shan <shangw@linux.vnet.ibm.com>
+Signed-off-by: Yinghai Lu <yinghai@kernel.org>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/setup-bus.c |   69 +++++++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 68 insertions(+), 1 deletion(-)
+
+--- a/drivers/pci/setup-bus.c
++++ b/drivers/pci/setup-bus.c
+@@ -300,6 +300,47 @@ static void assign_requested_resources_s
+       }
+ }
++static unsigned long pci_fail_res_type_mask(struct list_head *fail_head)
++{
++      struct pci_dev_resource *fail_res;
++      unsigned long mask = 0;
++
++      /* check failed type */
++      list_for_each_entry(fail_res, fail_head, list)
++              mask |= fail_res->flags;
++
++      /*
++       * one pref failed resource will set IORESOURCE_MEM,
++       * as we can allocate pref in non-pref range.
++       * Will release all assigned non-pref sibling resources
++       * according to that bit.
++       */
++      return mask & (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH);
++}
++
++static bool pci_need_to_release(unsigned long mask, struct resource *res)
++{
++      if (res->flags & IORESOURCE_IO)
++              return !!(mask & IORESOURCE_IO);
++
++      /* check pref at first */
++      if (res->flags & IORESOURCE_PREFETCH) {
++              if (mask & IORESOURCE_PREFETCH)
++                      return true;
++              /* count pref if its parent is non-pref */
++              else if ((mask & IORESOURCE_MEM) &&
++                       !(res->parent->flags & IORESOURCE_PREFETCH))
++                      return true;
++              else
++                      return false;
++      }
++
++      if (res->flags & IORESOURCE_MEM)
++              return !!(mask & IORESOURCE_MEM);
++
++      return false;   /* should not get here */
++}
++
+ static void __assign_resources_sorted(struct list_head *head,
+                                struct list_head *realloc_head,
+                                struct list_head *fail_head)
+@@ -312,11 +353,24 @@ static void __assign_resources_sorted(st
+        *  if could do that, could get out early.
+        *  if could not do that, we still try to assign requested at first,
+        *    then try to reassign add_size for some resources.
++       *
++       * Separate three resource type checking if we need to release
++       * assigned resource after requested + add_size try.
++       *      1. if there is io port assign fail, will release assigned
++       *         io port.
++       *      2. if there is pref mmio assign fail, release assigned
++       *         pref mmio.
++       *         if assigned pref mmio's parent is non-pref mmio and there
++       *         is non-pref mmio assign fail, will release that assigned
++       *         pref mmio.
++       *      3. if there is non-pref mmio assign fail or pref mmio
++       *         assigned fail, will release assigned non-pref mmio.
+        */
+       LIST_HEAD(save_head);
+       LIST_HEAD(local_fail_head);
+       struct pci_dev_resource *save_res;
+-      struct pci_dev_resource *dev_res;
++      struct pci_dev_resource *dev_res, *tmp_res;
++      unsigned long fail_type;
+       /* Check if optional add_size is there */
+       if (!realloc_head || list_empty(realloc_head))
+@@ -348,6 +402,19 @@ static void __assign_resources_sorted(st
+               return;
+       }
++      /* check failed type */
++      fail_type = pci_fail_res_type_mask(&local_fail_head);
++      /* remove not need to be released assigned res from head list etc */
++      list_for_each_entry_safe(dev_res, tmp_res, head, list)
++              if (dev_res->res->parent &&
++                  !pci_need_to_release(fail_type, dev_res->res)) {
++                      /* remove it from realloc_head list */
++                      remove_from_list(realloc_head, dev_res->res);
++                      remove_from_list(&save_head, dev_res->res);
++                      list_del(&dev_res->list);
++                      kfree(dev_res);
++              }
++
+       free_list(&local_fail_head);
+       /* Release assigned resource */
+       list_for_each_entry(dev_res, head, list)
diff --git a/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch
new file mode 100644 (file)
index 0000000..99338cd
--- /dev/null
@@ -0,0 +1,351 @@
+From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Sat, 27 Jul 2013 01:41:34 +0200
+Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode"
+
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+
+commit 148519120c6d1f19ad53349683aeae9f228b0b8d upstream.
+
+Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for
+repeat mode), because it has been identified as the source of a
+significant performance regression in v3.8 and later as explained by
+Jeremy Eder:
+
+  We believe we've identified a particular commit to the cpuidle code
+  that seems to be impacting performance of variety of workloads.
+  The simplest way to reproduce is using netperf TCP_RR test, so
+  we're using that, on a pair of Sandy Bridge based servers.  We also
+  have data from a large database setup where performance is also
+  measurably/positively impacted, though that test data isn't easily
+  share-able.
+
+  Included below are test results from 3 test kernels:
+
+  kernel       reverts
+  -----------------------------------------------------------
+  1) vanilla   upstream (no reverts)
+
+  2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c
+
+  3) test      reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
+                       e11538d1f03914eb92af5a1a378375c05ae8520c
+
+  In summary, netperf TCP_RR numbers improve by approximately 4%
+  after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4.  When
+  69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency
+  never seems to get above 40%.  Taking that patch out gets C0 near
+  100% quite often, and performance increases.
+
+  The below data are histograms representing the %c0 residency @
+  1-second sample rates (using turbostat), while under netperf test.
+
+  - If you look at the first 4 histograms, you can see %c0 residency
+    almost entirely in the 30,40% bin.
+  - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4,
+    shows %c0 in the 80,90,100% bins.
+
+  Below each kernel name are netperf TCP_RR trans/s numbers for the
+  particular kernel that can be disclosed publicly, comparing the 3
+  test kernels.  We ran a 4th test with the vanilla kernel where
+  we've also set /dev/cpu_dma_latency=0 to show overall impact
+  boosting single-threaded TCP_RR performance over 11% above
+  baseline.
+
+  3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0):
+  TCP_RR trans/s 54323.78
+
+  -----------------------------------------------------------
+  3.10-rc2 vanilla RX (no reverts)
+  TCP_RR trans/s 48192.47
+
+  Receiver %c0
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     0]:
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [    59]:
+  ***********************************************************
+     40.0000 -    50.0000 [     1]: *
+     50.0000 -    60.0000 [     0]:
+     60.0000 -    70.0000 [     0]:
+     70.0000 -    80.0000 [     0]:
+     80.0000 -    90.0000 [     0]:
+     90.0000 -   100.0000 [     0]:
+
+  Sender %c0
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     0]:
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [    11]: ***********
+     40.0000 -    50.0000 [    49]:
+  *************************************************
+     50.0000 -    60.0000 [     0]:
+     60.0000 -    70.0000 [     0]:
+     70.0000 -    80.0000 [     0]:
+     80.0000 -    90.0000 [     0]:
+     90.0000 -   100.0000 [     0]:
+
+  -----------------------------------------------------------
+  3.10-rc2 perfteam2 RX (reverts commit
+  e11538d1f03914eb92af5a1a378375c05ae8520c)
+  TCP_RR trans/s 49698.69
+
+  Receiver %c0
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     1]: *
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [    59]:
+  ***********************************************************
+     40.0000 -    50.0000 [     0]:
+     50.0000 -    60.0000 [     0]:
+     60.0000 -    70.0000 [     0]:
+     70.0000 -    80.0000 [     0]:
+     80.0000 -    90.0000 [     0]:
+     90.0000 -   100.0000 [     0]:
+
+  Sender %c0
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     0]:
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [     2]: **
+     40.0000 -    50.0000 [    58]:
+  **********************************************************
+     50.0000 -    60.0000 [     0]:
+     60.0000 -    70.0000 [     0]:
+     70.0000 -    80.0000 [     0]:
+     80.0000 -    90.0000 [     0]:
+     90.0000 -   100.0000 [     0]:
+
+  -----------------------------------------------------------
+  3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4
+  and e11538d1f03914eb92af5a1a378375c05ae8520c)
+  TCP_RR trans/s 47766.95
+
+  Receiver %c0
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     1]: *
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [    27]: ***************************
+     40.0000 -    50.0000 [     2]: **
+     50.0000 -    60.0000 [     0]:
+     60.0000 -    70.0000 [     2]: **
+     70.0000 -    80.0000 [     0]:
+     80.0000 -    90.0000 [     0]:
+     90.0000 -   100.0000 [    28]: ****************************
+
+  Sender:
+      0.0000 -    10.0000 [     1]: *
+     10.0000 -    20.0000 [     0]:
+     20.0000 -    30.0000 [     0]:
+     30.0000 -    40.0000 [    11]: ***********
+     40.0000 -    50.0000 [     0]:
+     50.0000 -    60.0000 [     1]: *
+     60.0000 -    70.0000 [     0]:
+     70.0000 -    80.0000 [     3]: ***
+     80.0000 -    90.0000 [     7]: *******
+     90.0000 -   100.0000 [    38]: **************************************
+
+  These results demonstrate gaining back the tendency of the CPU to
+  stay in more responsive, performant C-states (and thus yield
+  measurably better performance), by reverting commit
+  69a37beabf1f0a6705c08e879bdd5d82ff6486c4.
+
+Requested-by: Jeremy Eder <jeder@redhat.com>
+Tested-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpuidle/governors/menu.c |   73 ++-------------------------------------
+ include/linux/tick.h             |    6 ---
+ kernel/time/tick-sched.c         |    9 +---
+ 3 files changed, 6 insertions(+), 82 deletions(-)
+
+--- a/drivers/cpuidle/governors/menu.c
++++ b/drivers/cpuidle/governors/menu.c
+@@ -28,13 +28,6 @@
+ #define MAX_INTERESTING 50000
+ #define STDDEV_THRESH 400
+-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
+-#define MAX_DEVIATION 60
+-
+-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
+-static DEFINE_PER_CPU(int, hrtimer_status);
+-/* menu hrtimer mode */
+-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT};
+ /*
+  * Concepts and ideas behind the menu governor
+@@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32
+       return div_u64(dividend + (divisor / 2), divisor);
+ }
+-/* Cancel the hrtimer if it is not triggered yet */
+-void menu_hrtimer_cancel(void)
+-{
+-      int cpu = smp_processor_id();
+-      struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
+-
+-      /* The timer is still not time out*/
+-      if (per_cpu(hrtimer_status, cpu)) {
+-              hrtimer_cancel(hrtmr);
+-              per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
+-      }
+-}
+-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
+-
+-/* Call back for hrtimer is triggered */
+-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
+-{
+-      int cpu = smp_processor_id();
+-
+-      per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
+-
+-      return HRTIMER_NORESTART;
+-}
+-
+ /*
+  * Try detecting repeating patterns by keeping track of the last 8
+  * intervals, and checking if the standard deviation of that set
+  * of points is below a threshold. If it is... then use the
+  * average of these 8 points as the estimated value.
+  */
+-static u32 get_typical_interval(struct menu_device *data)
++static void get_typical_interval(struct menu_device *data)
+ {
+       int i = 0, divisor = 0;
+       uint64_t max = 0, avg = 0, stddev = 0;
+       int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
+-      unsigned int ret = 0;
+ again:
+@@ -274,16 +242,13 @@ again:
+       if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
+                                                       || stddev <= 20) {
+               data->predicted_us = avg;
+-              ret = 1;
+-              return ret;
++              return;
+       } else if ((divisor * 4) > INTERVALS * 3) {
+               /* Exclude the max interval */
+               thresh = max - 1;
+               goto again;
+       }
+-
+-      return ret;
+ }
+ /**
+@@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_dr
+       int i;
+       int multiplier;
+       struct timespec t;
+-      int repeat = 0, low_predicted = 0;
+-      int cpu = smp_processor_id();
+-      struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
+       if (data->needs_update) {
+               menu_update(drv, dev);
+@@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_dr
+       data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
+                                        RESOLUTION * DECAY);
+-      repeat = get_typical_interval(data);
++      get_typical_interval(data);
+       /*
+        * We want to default to C1 (hlt), not to busy polling
+@@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_dr
+               if (s->disabled || su->disable)
+                       continue;
+-              if (s->target_residency > data->predicted_us) {
+-                      low_predicted = 1;
++              if (s->target_residency > data->predicted_us)
+                       continue;
+-              }
+               if (s->exit_latency > latency_req)
+                       continue;
+               if (s->exit_latency * multiplier > data->predicted_us)
+@@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_dr
+               data->exit_us = s->exit_latency;
+       }
+-      /* not deepest C-state chosen for low predicted residency */
+-      if (low_predicted) {
+-              unsigned int timer_us = 0;
+-
+-              /*
+-               * Set a timer to detect whether this sleep is much
+-               * longer than repeat mode predicted.  If the timer
+-               * triggers, the code will evaluate whether to put
+-               * the CPU into a deeper C-state.
+-               * The timer is cancelled on CPU wakeup.
+-               */
+-              timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
+-
+-              if (repeat && (4 * timer_us < data->expected_us)) {
+-                      RCU_NONIDLE(hrtimer_start(hrtmr,
+-                              ns_to_ktime(1000 * timer_us),
+-                              HRTIMER_MODE_REL_PINNED));
+-                      /* In repeat case, menu hrtimer is started */
+-                      per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
+-              }
+-      }
+-
+       return data->last_state_idx;
+ }
+@@ -481,9 +419,6 @@ static int menu_enable_device(struct cpu
+                               struct cpuidle_device *dev)
+ {
+       struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+-      struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
+-      hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+-      t->function = menu_hrtimer_notify;
+       memset(data, 0, sizeof(struct menu_device));
+--- a/include/linux/tick.h
++++ b/include/linux/tick.h
+@@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch
+ #endif
+-# ifdef CONFIG_CPU_IDLE_GOV_MENU
+-extern void menu_hrtimer_cancel(void);
+-# else
+-static inline void menu_hrtimer_cancel(void) {}
+-# endif /* CONFIG_CPU_IDLE_GOV_MENU */
+-
+ #endif
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -832,13 +832,10 @@ void tick_nohz_irq_exit(void)
+ {
+       struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+-      if (ts->inidle) {
+-              /* Cancel the timer because CPU already waken up from the C-states*/
+-              menu_hrtimer_cancel();
++      if (ts->inidle)
+               __tick_nohz_idle_enter(ts);
+-      } else {
++      else
+               tick_nohz_full_stop_tick(ts);
+-      }
+ }
+ /**
+@@ -936,8 +933,6 @@ void tick_nohz_idle_exit(void)
+       ts->inidle = 0;
+-      /* Cancel the timer because CPU already waken up from the C-states*/
+-      menu_hrtimer_cancel();
+       if (ts->idle_active || ts->tick_stopped)
+               now = ktime_get();
diff --git a/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch
new file mode 100644 (file)
index 0000000..fb2ab85
--- /dev/null
@@ -0,0 +1,103 @@
+From 228b30234f258a193317874854eee1ca7807186e Mon Sep 17 00:00:00 2001
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+Date: Sat, 27 Jul 2013 01:13:26 +0200
+Subject: Revert "cpuidle: Quickly notice prediction failure in general case"
+
+From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
+
+commit 228b30234f258a193317874854eee1ca7807186e upstream.
+
+Revert commit e11538d1 (cpuidle: Quickly notice prediction failure in
+general case), since it depends on commit 69a37be (cpuidle: Quickly
+notice prediction failure for repeat mode) that has been identified
+as the source of a significant performance regression in v3.8 and
+later.
+
+Requested-by: Jeremy Eder <jeder@redhat.com>
+Tested-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/cpuidle/governors/menu.c |   35 +----------------------------------
+ 1 file changed, 1 insertion(+), 34 deletions(-)
+
+--- a/drivers/cpuidle/governors/menu.c
++++ b/drivers/cpuidle/governors/menu.c
+@@ -34,7 +34,7 @@
+ static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
+ static DEFINE_PER_CPU(int, hrtimer_status);
+ /* menu hrtimer mode */
+-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
++enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT};
+ /*
+  * Concepts and ideas behind the menu governor
+@@ -116,13 +116,6 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_RE
+  *
+  */
+-/*
+- * The C-state residency is so long that is is worthwhile to exit
+- * from the shallow C-state and re-enter into a deeper C-state.
+- */
+-static unsigned int perfect_cstate_ms __read_mostly = 30;
+-module_param(perfect_cstate_ms, uint, 0000);
+-
+ struct menu_device {
+       int             last_state_idx;
+       int             needs_update;
+@@ -223,16 +216,6 @@ EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
+ static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
+ {
+       int cpu = smp_processor_id();
+-      struct menu_device *data = &per_cpu(menu_devices, cpu);
+-
+-      /* In general case, the expected residency is much larger than
+-       *  deepest C-state target residency, but prediction logic still
+-       *  predicts a small predicted residency, so the prediction
+-       *  history is totally broken if the timer is triggered.
+-       *  So reset the correction factor.
+-       */
+-      if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL)
+-              data->correction_factor[data->bucket] = RESOLUTION * DECAY;
+       per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
+@@ -389,7 +372,6 @@ static int menu_select(struct cpuidle_dr
+       /* not deepest C-state chosen for low predicted residency */
+       if (low_predicted) {
+               unsigned int timer_us = 0;
+-              unsigned int perfect_us = 0;
+               /*
+                * Set a timer to detect whether this sleep is much
+@@ -400,28 +382,13 @@ static int menu_select(struct cpuidle_dr
+                */
+               timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
+-              perfect_us = perfect_cstate_ms * 1000;
+-
+               if (repeat && (4 * timer_us < data->expected_us)) {
+                       RCU_NONIDLE(hrtimer_start(hrtmr,
+                               ns_to_ktime(1000 * timer_us),
+                               HRTIMER_MODE_REL_PINNED));
+                       /* In repeat case, menu hrtimer is started */
+                       per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
+-              } else if (perfect_us < data->expected_us) {
+-                      /*
+-                       * The next timer is long. This could be because
+-                       * we did not make a useful prediction.
+-                       * In that case, it makes sense to re-enter
+-                       * into a deeper C-state after some time.
+-                       */
+-                      RCU_NONIDLE(hrtimer_start(hrtmr,
+-                              ns_to_ktime(1000 * timer_us),
+-                              HRTIMER_MODE_REL_PINNED));
+-                      /* In general case, menu hrtimer is started */
+-                      per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL;
+               }
+-
+       }
+       return data->last_state_idx;
index 90b71d798d4dc442570549f9a3c71089ed45376f..08ab7ff5035f2bb038e9a101d92de734e86988e6 100644 (file)
@@ -66,3 +66,9 @@ zram-avoid-double-free-in-function-zram_bvec_write.patch
 zram-avoid-access-beyond-the-zram-device.patch
 zram-protect-sysfs-handler-from-invalid-memory-access.patch
 acpi-battery-fix-parsing-_bix-return-value.patch
+revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch
+cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch
+revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch
+pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch
+pci-retry-allocation-of-only-the-resource-type-that-failed.patch
+drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch