From 27369cce20dbc08d33af3897bf85f2b217acbccf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 5 Aug 2013 11:11:04 +0800 Subject: [PATCH] 3.10-stable patches added patches: cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch pci-retry-allocation-of-only-the-resource-type-that-failed.patch revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch --- ...efcount-balance-after-suspend-resume.patch | 81 ++++ ...sable-dma-rings-for-bo-moves-on-r6xx.patch | 49 +++ ...eref-when-hot-removing-sr-iov-device.patch | 55 +++ ...f-only-the-resource-type-that-failed.patch | 137 +++++++ ...e-prediction-failure-for-repeat-mode.patch | 351 ++++++++++++++++++ ...e-prediction-failure-in-general-case.patch | 103 +++++ queue-3.10/series | 6 + 7 files changed, 782 insertions(+) create mode 100644 queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch create mode 100644 queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch create mode 100644 queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch create mode 100644 queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch create mode 100644 queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch create mode 100644 queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch diff --git a/queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch b/queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch new file mode 100644 index 00000000000..dd961372ed1 --- /dev/null +++ b/queue-3.10/cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch @@ -0,0 +1,81 @@ +From 2a99859932281ed6c2ecdd988855f8f6838f6743 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Tue, 30 Jul 2013 00:32:00 +0200 +Subject: cpufreq: Fix cpufreq driver module refcount balance after suspend/resume + +From: "Rafael J. Wysocki" + +commit 2a99859932281ed6c2ecdd988855f8f6838f6743 upstream. + +Since cpufreq_cpu_put() called by __cpufreq_remove_dev() drops the +driver module refcount, __cpufreq_remove_dev() causes that refcount +to become negative for the cpufreq driver after a suspend/resume +cycle. + +This is not the only bad thing that happens there, however, because +kobject_put() should only be called for the policy kobject at this +point if the CPU is not the last one for that policy. + +Namely, if the given CPU is the last one for that policy, the +policy kobject's refcount should be 1 at this point, as set by +cpufreq_add_dev_interface(), and only needs to be dropped once for +the kobject to go away. This actually happens under the cpu == 1 +check, so it need not be done before by cpufreq_cpu_put(). + +On the other hand, if the given CPU is not the last one for that +policy, this means that cpufreq_add_policy_cpu() has been called +at least once for that policy and cpufreq_cpu_get() has been +called for it too. To balance that cpufreq_cpu_get(), we need to +call cpufreq_cpu_put() in that case. + +Thus, to fix the described problem and keep the reference +counters balanced in both cases, move the cpufreq_cpu_get() call +in __cpufreq_remove_dev() to the code path executed only for +CPUs that share the policy with other CPUs. + +Reported-and-tested-by: Toralf Förster +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Srivatsa S. Bhat +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpufreq/cpufreq.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +--- a/drivers/cpufreq/cpufreq.c ++++ b/drivers/cpufreq/cpufreq.c +@@ -1075,14 +1075,11 @@ static int __cpufreq_remove_dev(struct d + __func__, cpu_dev->id, cpu); + } + +- if ((cpus == 1) && (cpufreq_driver->target)) +- __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); +- +- pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); +- cpufreq_cpu_put(data); +- + /* If cpu is last user of policy, free policy */ + if (cpus == 1) { ++ if (cpufreq_driver->target) ++ __cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT); ++ + lock_policy_rwsem_read(cpu); + kobj = &data->kobj; + cmp = &data->kobj_unregister; +@@ -1103,9 +1100,13 @@ static int __cpufreq_remove_dev(struct d + free_cpumask_var(data->related_cpus); + free_cpumask_var(data->cpus); + kfree(data); +- } else if (cpufreq_driver->target) { +- __cpufreq_governor(data, CPUFREQ_GOV_START); +- __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); ++ } else { ++ pr_debug("%s: removing link, cpu: %d\n", __func__, cpu); ++ cpufreq_cpu_put(data); ++ if (cpufreq_driver->target) { ++ __cpufreq_governor(data, CPUFREQ_GOV_START); ++ __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); ++ } + } + + per_cpu(cpufreq_policy_cpu, cpu) = -1; diff --git a/queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch b/queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch new file mode 100644 index 00000000000..ec4de794c40 --- /dev/null +++ b/queue-3.10/drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch @@ -0,0 +1,49 @@ +From aeea40cbf9388fc829e66fa049f64d97fd72e118 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Thu, 11 Jul 2013 14:20:11 -0400 +Subject: drm/radeon: Disable dma rings for bo moves on r6xx + +From: Alex Deucher + +commit aeea40cbf9388fc829e66fa049f64d97fd72e118 upstream. + +They still seem to cause instability on some r6xx parts. +As a follow up, we can switch to using CP DMA for bo +moves on r6xx as a lighter weight alternative to using +the 3D engine. + +A version of this patch should also go to stable kernels. + +Tested-by: J.N. +Reviewed-by: Christian König +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/radeon/radeon_asic.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/radeon/radeon_asic.c ++++ b/drivers/gpu/drm/radeon/radeon_asic.c +@@ -986,8 +986,8 @@ static struct radeon_asic r600_asic = { + .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &r600_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, +- .copy = &r600_copy_dma, +- .copy_ring_index = R600_RING_TYPE_DMA_INDEX, ++ .copy = &r600_copy_blit, ++ .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + }, + .surface = { + .set_reg = r600_set_surface_reg, +@@ -1074,8 +1074,8 @@ static struct radeon_asic rs780_asic = { + .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, + .dma = &r600_copy_dma, + .dma_ring_index = R600_RING_TYPE_DMA_INDEX, +- .copy = &r600_copy_dma, +- .copy_ring_index = R600_RING_TYPE_DMA_INDEX, ++ .copy = &r600_copy_blit, ++ .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, + }, + .surface = { + .set_reg = r600_set_surface_reg, diff --git a/queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch b/queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch new file mode 100644 index 00000000000..a89f638467f --- /dev/null +++ b/queue-3.10/pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch @@ -0,0 +1,55 @@ +From 29ed1f29b68a8395d5679b3c4e38352b617b3236 Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Fri, 19 Jul 2013 12:14:16 -0700 +Subject: PCI: pciehp: Fix null pointer deref when hot-removing SR-IOV device + +From: Yinghai Lu + +commit 29ed1f29b68a8395d5679b3c4e38352b617b3236 upstream. + +Hot-removing a device with SR-IOV enabled causes a null pointer dereference +in v3.9 and v3.10. + +This is a regression caused by ba518e3c17 ("PCI: pciehp: Iterate over all +devices in slot, not functions 0-7"). When we iterate over the +bus->devices list, we first remove the PF, which also removes all the VFs +from the list. Then the list iterator blows up because more than just the +current entry was removed from the list. + +ac205b7bb7 ("PCI: make sriov work with hotplug remove") works around a +similar problem in pci_stop_bus_devices() by iterating over the list in +reverse, so the VFs are stopped and removed from the list first, before the +PF. + +This patch changes pciehp_unconfigure_device() to iterate over the list in +reverse, too. + +[bhelgaas: bugzilla, changelog] +Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60604 +Signed-off-by: Yinghai Lu +Signed-off-by: Bjorn Helgaas +Acked-by: Yijing Wang +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/hotplug/pciehp_pci.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/drivers/pci/hotplug/pciehp_pci.c ++++ b/drivers/pci/hotplug/pciehp_pci.c +@@ -92,7 +92,14 @@ int pciehp_unconfigure_device(struct slo + if (ret) + presence = 0; + +- list_for_each_entry_safe(dev, temp, &parent->devices, bus_list) { ++ /* ++ * Stopping an SR-IOV PF device removes all the associated VFs, ++ * which will update the bus->devices list and confuse the ++ * iterator. Therefore, iterate in reverse so we remove the VFs ++ * first, then the PF. We do the same in pci_stop_bus_device(). ++ */ ++ list_for_each_entry_safe_reverse(dev, temp, &parent->devices, ++ bus_list) { + pci_dev_get(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) { + pci_read_config_byte(dev, PCI_BRIDGE_CONTROL, &bctl); diff --git a/queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch b/queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch new file mode 100644 index 00000000000..4ec4a481003 --- /dev/null +++ b/queue-3.10/pci-retry-allocation-of-only-the-resource-type-that-failed.patch @@ -0,0 +1,137 @@ +From aa914f5ec25e4371ba18b312971314be1b9b1076 Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Thu, 25 Jul 2013 06:31:38 -0700 +Subject: PCI: Retry allocation of only the resource type that failed + +From: Yinghai Lu + +commit aa914f5ec25e4371ba18b312971314be1b9b1076 upstream. + +Ben Herrenschmidt reported the following problem: + + - The bus has space for all desired MMIO resources, including optional + space for SR-IOV devices + - We attempt to allocate I/O port space, but it fails because the bus + has no I/O space + - Because of the I/O allocation failure, we retry MMIO allocation, + requesting only the required space, without the optional SR-IOV space + +This means we don't allocate the optional SR-IOV space, even though we +could. + +This is related to 0c5be0cb0e ("PCI: Retry on IORESOURCE_IO type +allocations"). + +This patch changes how we handle allocation failures. We will now retry +allocation of only the resource type that failed. If MMIO allocation +fails, we'll retry only MMIO allocation. If I/O port allocation fails, +we'll retry only I/O port allocation. + +[bhelgaas: changelog] +Reference: https://lkml.kernel.org/r/1367712653.11982.19.camel@pasglop +Reported-by: Benjamin Herrenschmidt +Tested-by: Gavin Shan +Signed-off-by: Yinghai Lu +Signed-off-by: Bjorn Helgaas +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/setup-bus.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 68 insertions(+), 1 deletion(-) + +--- a/drivers/pci/setup-bus.c ++++ b/drivers/pci/setup-bus.c +@@ -300,6 +300,47 @@ static void assign_requested_resources_s + } + } + ++static unsigned long pci_fail_res_type_mask(struct list_head *fail_head) ++{ ++ struct pci_dev_resource *fail_res; ++ unsigned long mask = 0; ++ ++ /* check failed type */ ++ list_for_each_entry(fail_res, fail_head, list) ++ mask |= fail_res->flags; ++ ++ /* ++ * one pref failed resource will set IORESOURCE_MEM, ++ * as we can allocate pref in non-pref range. ++ * Will release all assigned non-pref sibling resources ++ * according to that bit. ++ */ ++ return mask & (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH); ++} ++ ++static bool pci_need_to_release(unsigned long mask, struct resource *res) ++{ ++ if (res->flags & IORESOURCE_IO) ++ return !!(mask & IORESOURCE_IO); ++ ++ /* check pref at first */ ++ if (res->flags & IORESOURCE_PREFETCH) { ++ if (mask & IORESOURCE_PREFETCH) ++ return true; ++ /* count pref if its parent is non-pref */ ++ else if ((mask & IORESOURCE_MEM) && ++ !(res->parent->flags & IORESOURCE_PREFETCH)) ++ return true; ++ else ++ return false; ++ } ++ ++ if (res->flags & IORESOURCE_MEM) ++ return !!(mask & IORESOURCE_MEM); ++ ++ return false; /* should not get here */ ++} ++ + static void __assign_resources_sorted(struct list_head *head, + struct list_head *realloc_head, + struct list_head *fail_head) +@@ -312,11 +353,24 @@ static void __assign_resources_sorted(st + * if could do that, could get out early. + * if could not do that, we still try to assign requested at first, + * then try to reassign add_size for some resources. ++ * ++ * Separate three resource type checking if we need to release ++ * assigned resource after requested + add_size try. ++ * 1. if there is io port assign fail, will release assigned ++ * io port. ++ * 2. if there is pref mmio assign fail, release assigned ++ * pref mmio. ++ * if assigned pref mmio's parent is non-pref mmio and there ++ * is non-pref mmio assign fail, will release that assigned ++ * pref mmio. ++ * 3. if there is non-pref mmio assign fail or pref mmio ++ * assigned fail, will release assigned non-pref mmio. + */ + LIST_HEAD(save_head); + LIST_HEAD(local_fail_head); + struct pci_dev_resource *save_res; +- struct pci_dev_resource *dev_res; ++ struct pci_dev_resource *dev_res, *tmp_res; ++ unsigned long fail_type; + + /* Check if optional add_size is there */ + if (!realloc_head || list_empty(realloc_head)) +@@ -348,6 +402,19 @@ static void __assign_resources_sorted(st + return; + } + ++ /* check failed type */ ++ fail_type = pci_fail_res_type_mask(&local_fail_head); ++ /* remove not need to be released assigned res from head list etc */ ++ list_for_each_entry_safe(dev_res, tmp_res, head, list) ++ if (dev_res->res->parent && ++ !pci_need_to_release(fail_type, dev_res->res)) { ++ /* remove it from realloc_head list */ ++ remove_from_list(realloc_head, dev_res->res); ++ remove_from_list(&save_head, dev_res->res); ++ list_del(&dev_res->list); ++ kfree(dev_res); ++ } ++ + free_list(&local_fail_head); + /* Release assigned resource */ + list_for_each_entry(dev_res, head, list) diff --git a/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch new file mode 100644 index 00000000000..99338cd5970 --- /dev/null +++ b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch @@ -0,0 +1,351 @@ +From 148519120c6d1f19ad53349683aeae9f228b0b8d Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Sat, 27 Jul 2013 01:41:34 +0200 +Subject: Revert "cpuidle: Quickly notice prediction failure for repeat mode" + +From: "Rafael J. Wysocki" + +commit 148519120c6d1f19ad53349683aeae9f228b0b8d upstream. + +Revert commit 69a37bea (cpuidle: Quickly notice prediction failure for +repeat mode), because it has been identified as the source of a +significant performance regression in v3.8 and later as explained by +Jeremy Eder: + + We believe we've identified a particular commit to the cpuidle code + that seems to be impacting performance of variety of workloads. + The simplest way to reproduce is using netperf TCP_RR test, so + we're using that, on a pair of Sandy Bridge based servers. We also + have data from a large database setup where performance is also + measurably/positively impacted, though that test data isn't easily + share-able. + + Included below are test results from 3 test kernels: + + kernel reverts + ----------------------------------------------------------- + 1) vanilla upstream (no reverts) + + 2) perfteam2 reverts e11538d1f03914eb92af5a1a378375c05ae8520c + + 3) test reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 + e11538d1f03914eb92af5a1a378375c05ae8520c + + In summary, netperf TCP_RR numbers improve by approximately 4% + after reverting 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. When + 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 is included, C0 residency + never seems to get above 40%. Taking that patch out gets C0 near + 100% quite often, and performance increases. + + The below data are histograms representing the %c0 residency @ + 1-second sample rates (using turbostat), while under netperf test. + + - If you look at the first 4 histograms, you can see %c0 residency + almost entirely in the 30,40% bin. + - The last pair, which reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4, + shows %c0 in the 80,90,100% bins. + + Below each kernel name are netperf TCP_RR trans/s numbers for the + particular kernel that can be disclosed publicly, comparing the 3 + test kernels. We ran a 4th test with the vanilla kernel where + we've also set /dev/cpu_dma_latency=0 to show overall impact + boosting single-threaded TCP_RR performance over 11% above + baseline. + + 3.10-rc2 vanilla RX + c0 lock (/dev/cpu_dma_latency=0): + TCP_RR trans/s 54323.78 + + ----------------------------------------------------------- + 3.10-rc2 vanilla RX (no reverts) + TCP_RR trans/s 48192.47 + + Receiver %c0 + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 0]: + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 59]: + *********************************************************** + 40.0000 - 50.0000 [ 1]: * + 50.0000 - 60.0000 [ 0]: + 60.0000 - 70.0000 [ 0]: + 70.0000 - 80.0000 [ 0]: + 80.0000 - 90.0000 [ 0]: + 90.0000 - 100.0000 [ 0]: + + Sender %c0 + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 0]: + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 11]: *********** + 40.0000 - 50.0000 [ 49]: + ************************************************* + 50.0000 - 60.0000 [ 0]: + 60.0000 - 70.0000 [ 0]: + 70.0000 - 80.0000 [ 0]: + 80.0000 - 90.0000 [ 0]: + 90.0000 - 100.0000 [ 0]: + + ----------------------------------------------------------- + 3.10-rc2 perfteam2 RX (reverts commit + e11538d1f03914eb92af5a1a378375c05ae8520c) + TCP_RR trans/s 49698.69 + + Receiver %c0 + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 1]: * + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 59]: + *********************************************************** + 40.0000 - 50.0000 [ 0]: + 50.0000 - 60.0000 [ 0]: + 60.0000 - 70.0000 [ 0]: + 70.0000 - 80.0000 [ 0]: + 80.0000 - 90.0000 [ 0]: + 90.0000 - 100.0000 [ 0]: + + Sender %c0 + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 0]: + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 2]: ** + 40.0000 - 50.0000 [ 58]: + ********************************************************** + 50.0000 - 60.0000 [ 0]: + 60.0000 - 70.0000 [ 0]: + 70.0000 - 80.0000 [ 0]: + 80.0000 - 90.0000 [ 0]: + 90.0000 - 100.0000 [ 0]: + + ----------------------------------------------------------- + 3.10-rc2 test RX (reverts 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 + and e11538d1f03914eb92af5a1a378375c05ae8520c) + TCP_RR trans/s 47766.95 + + Receiver %c0 + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 1]: * + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 27]: *************************** + 40.0000 - 50.0000 [ 2]: ** + 50.0000 - 60.0000 [ 0]: + 60.0000 - 70.0000 [ 2]: ** + 70.0000 - 80.0000 [ 0]: + 80.0000 - 90.0000 [ 0]: + 90.0000 - 100.0000 [ 28]: **************************** + + Sender: + 0.0000 - 10.0000 [ 1]: * + 10.0000 - 20.0000 [ 0]: + 20.0000 - 30.0000 [ 0]: + 30.0000 - 40.0000 [ 11]: *********** + 40.0000 - 50.0000 [ 0]: + 50.0000 - 60.0000 [ 1]: * + 60.0000 - 70.0000 [ 0]: + 70.0000 - 80.0000 [ 3]: *** + 80.0000 - 90.0000 [ 7]: ******* + 90.0000 - 100.0000 [ 38]: ************************************** + + These results demonstrate gaining back the tendency of the CPU to + stay in more responsive, performant C-states (and thus yield + measurably better performance), by reverting commit + 69a37beabf1f0a6705c08e879bdd5d82ff6486c4. + +Requested-by: Jeremy Eder +Tested-by: Len Brown +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpuidle/governors/menu.c | 73 ++------------------------------------- + include/linux/tick.h | 6 --- + kernel/time/tick-sched.c | 9 +--- + 3 files changed, 6 insertions(+), 82 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -28,13 +28,6 @@ + #define MAX_INTERESTING 50000 + #define STDDEV_THRESH 400 + +-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ +-#define MAX_DEVIATION 60 +- +-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); +-static DEFINE_PER_CPU(int, hrtimer_status); +-/* menu hrtimer mode */ +-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; + + /* + * Concepts and ideas behind the menu governor +@@ -198,42 +191,17 @@ static u64 div_round64(u64 dividend, u32 + return div_u64(dividend + (divisor / 2), divisor); + } + +-/* Cancel the hrtimer if it is not triggered yet */ +-void menu_hrtimer_cancel(void) +-{ +- int cpu = smp_processor_id(); +- struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); +- +- /* The timer is still not time out*/ +- if (per_cpu(hrtimer_status, cpu)) { +- hrtimer_cancel(hrtmr); +- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; +- } +-} +-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); +- +-/* Call back for hrtimer is triggered */ +-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) +-{ +- int cpu = smp_processor_id(); +- +- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; +- +- return HRTIMER_NORESTART; +-} +- + /* + * Try detecting repeating patterns by keeping track of the last 8 + * intervals, and checking if the standard deviation of that set + * of points is below a threshold. If it is... then use the + * average of these 8 points as the estimated value. + */ +-static u32 get_typical_interval(struct menu_device *data) ++static void get_typical_interval(struct menu_device *data) + { + int i = 0, divisor = 0; + uint64_t max = 0, avg = 0, stddev = 0; + int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ +- unsigned int ret = 0; + + again: + +@@ -274,16 +242,13 @@ again: + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { + data->predicted_us = avg; +- ret = 1; +- return ret; ++ return; + + } else if ((divisor * 4) > INTERVALS * 3) { + /* Exclude the max interval */ + thresh = max - 1; + goto again; + } +- +- return ret; + } + + /** +@@ -298,9 +263,6 @@ static int menu_select(struct cpuidle_dr + int i; + int multiplier; + struct timespec t; +- int repeat = 0, low_predicted = 0; +- int cpu = smp_processor_id(); +- struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); + + if (data->needs_update) { + menu_update(drv, dev); +@@ -335,7 +297,7 @@ static int menu_select(struct cpuidle_dr + data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], + RESOLUTION * DECAY); + +- repeat = get_typical_interval(data); ++ get_typical_interval(data); + + /* + * We want to default to C1 (hlt), not to busy polling +@@ -356,10 +318,8 @@ static int menu_select(struct cpuidle_dr + + if (s->disabled || su->disable) + continue; +- if (s->target_residency > data->predicted_us) { +- low_predicted = 1; ++ if (s->target_residency > data->predicted_us) + continue; +- } + if (s->exit_latency > latency_req) + continue; + if (s->exit_latency * multiplier > data->predicted_us) +@@ -369,28 +329,6 @@ static int menu_select(struct cpuidle_dr + data->exit_us = s->exit_latency; + } + +- /* not deepest C-state chosen for low predicted residency */ +- if (low_predicted) { +- unsigned int timer_us = 0; +- +- /* +- * Set a timer to detect whether this sleep is much +- * longer than repeat mode predicted. If the timer +- * triggers, the code will evaluate whether to put +- * the CPU into a deeper C-state. +- * The timer is cancelled on CPU wakeup. +- */ +- timer_us = 2 * (data->predicted_us + MAX_DEVIATION); +- +- if (repeat && (4 * timer_us < data->expected_us)) { +- RCU_NONIDLE(hrtimer_start(hrtmr, +- ns_to_ktime(1000 * timer_us), +- HRTIMER_MODE_REL_PINNED)); +- /* In repeat case, menu hrtimer is started */ +- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; +- } +- } +- + return data->last_state_idx; + } + +@@ -481,9 +419,6 @@ static int menu_enable_device(struct cpu + struct cpuidle_device *dev) + { + struct menu_device *data = &per_cpu(menu_devices, dev->cpu); +- struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); +- hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +- t->function = menu_hrtimer_notify; + + memset(data, 0, sizeof(struct menu_device)); + +--- a/include/linux/tick.h ++++ b/include/linux/tick.h +@@ -174,10 +174,4 @@ static inline void tick_nohz_task_switch + #endif + + +-# ifdef CONFIG_CPU_IDLE_GOV_MENU +-extern void menu_hrtimer_cancel(void); +-# else +-static inline void menu_hrtimer_cancel(void) {} +-# endif /* CONFIG_CPU_IDLE_GOV_MENU */ +- + #endif +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -832,13 +832,10 @@ void tick_nohz_irq_exit(void) + { + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + +- if (ts->inidle) { +- /* Cancel the timer because CPU already waken up from the C-states*/ +- menu_hrtimer_cancel(); ++ if (ts->inidle) + __tick_nohz_idle_enter(ts); +- } else { ++ else + tick_nohz_full_stop_tick(ts); +- } + } + + /** +@@ -936,8 +933,6 @@ void tick_nohz_idle_exit(void) + + ts->inidle = 0; + +- /* Cancel the timer because CPU already waken up from the C-states*/ +- menu_hrtimer_cancel(); + if (ts->idle_active || ts->tick_stopped) + now = ktime_get(); + diff --git a/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch new file mode 100644 index 00000000000..fb2ab853968 --- /dev/null +++ b/queue-3.10/revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch @@ -0,0 +1,103 @@ +From 228b30234f258a193317874854eee1ca7807186e Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Sat, 27 Jul 2013 01:13:26 +0200 +Subject: Revert "cpuidle: Quickly notice prediction failure in general case" + +From: "Rafael J. Wysocki" + +commit 228b30234f258a193317874854eee1ca7807186e upstream. + +Revert commit e11538d1 (cpuidle: Quickly notice prediction failure in +general case), since it depends on commit 69a37be (cpuidle: Quickly +notice prediction failure for repeat mode) that has been identified +as the source of a significant performance regression in v3.8 and +later. + +Requested-by: Jeremy Eder +Tested-by: Len Brown +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpuidle/governors/menu.c | 35 +---------------------------------- + 1 file changed, 1 insertion(+), 34 deletions(-) + +--- a/drivers/cpuidle/governors/menu.c ++++ b/drivers/cpuidle/governors/menu.c +@@ -34,7 +34,7 @@ + static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); + static DEFINE_PER_CPU(int, hrtimer_status); + /* menu hrtimer mode */ +-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; ++enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; + + /* + * Concepts and ideas behind the menu governor +@@ -116,13 +116,6 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_RE + * + */ + +-/* +- * The C-state residency is so long that is is worthwhile to exit +- * from the shallow C-state and re-enter into a deeper C-state. +- */ +-static unsigned int perfect_cstate_ms __read_mostly = 30; +-module_param(perfect_cstate_ms, uint, 0000); +- + struct menu_device { + int last_state_idx; + int needs_update; +@@ -223,16 +216,6 @@ EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); + static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) + { + int cpu = smp_processor_id(); +- struct menu_device *data = &per_cpu(menu_devices, cpu); +- +- /* In general case, the expected residency is much larger than +- * deepest C-state target residency, but prediction logic still +- * predicts a small predicted residency, so the prediction +- * history is totally broken if the timer is triggered. +- * So reset the correction factor. +- */ +- if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) +- data->correction_factor[data->bucket] = RESOLUTION * DECAY; + + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + +@@ -389,7 +372,6 @@ static int menu_select(struct cpuidle_dr + /* not deepest C-state chosen for low predicted residency */ + if (low_predicted) { + unsigned int timer_us = 0; +- unsigned int perfect_us = 0; + + /* + * Set a timer to detect whether this sleep is much +@@ -400,28 +382,13 @@ static int menu_select(struct cpuidle_dr + */ + timer_us = 2 * (data->predicted_us + MAX_DEVIATION); + +- perfect_us = perfect_cstate_ms * 1000; +- + if (repeat && (4 * timer_us < data->expected_us)) { + RCU_NONIDLE(hrtimer_start(hrtmr, + ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED)); + /* In repeat case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; +- } else if (perfect_us < data->expected_us) { +- /* +- * The next timer is long. This could be because +- * we did not make a useful prediction. +- * In that case, it makes sense to re-enter +- * into a deeper C-state after some time. +- */ +- RCU_NONIDLE(hrtimer_start(hrtmr, +- ns_to_ktime(1000 * timer_us), +- HRTIMER_MODE_REL_PINNED)); +- /* In general case, menu hrtimer is started */ +- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; + } +- + } + + return data->last_state_idx; diff --git a/queue-3.10/series b/queue-3.10/series index 90b71d798d4..08ab7ff5035 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -66,3 +66,9 @@ zram-avoid-double-free-in-function-zram_bvec_write.patch zram-avoid-access-beyond-the-zram-device.patch zram-protect-sysfs-handler-from-invalid-memory-access.patch acpi-battery-fix-parsing-_bix-return-value.patch +revert-cpuidle-quickly-notice-prediction-failure-in-general-case.patch +cpufreq-fix-cpufreq-driver-module-refcount-balance-after-suspend-resume.patch +revert-cpuidle-quickly-notice-prediction-failure-for-repeat-mode.patch +pci-pciehp-fix-null-pointer-deref-when-hot-removing-sr-iov-device.patch +pci-retry-allocation-of-only-the-resource-type-that-failed.patch +drm-radeon-disable-dma-rings-for-bo-moves-on-r6xx.patch -- 2.47.3