[thirdparty/kernel/stable-queue.git] / releases / 2.6.38.8 / idle-governor-avoid-lock-acquisition-to-read-pm_qos-before.patch

From 333c5ae9948194428fe6c5ef5c088304fc98263b Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Fri, 11 Feb 2011 12:49:04 -0800
Subject: idle governor: Avoid lock acquisition to read pm_qos before
 entering idle

From: Tim Chen <tim.c.chen@linux.intel.com>

commit 333c5ae9948194428fe6c5ef5c088304fc98263b upstream.

Thanks to the reviews and comments by Rafael, James, Mark and Andi.
Here's version 2 of the patch incorporating your comments and also some
update to my previous patch comments.

I noticed that before entering idle state, the menu idle governor will
look up the current pm_qos target value according to the list of qos
requests received.  This look up currently needs the acquisition of a
lock to access the list of qos requests to find the qos target value,
slowing down the entrance into idle state due to contention by multiple
cpus to access this list.  The contention is severe when there are a lot
of cpus waking and going into idle.  For example, for a simple workload
that has 32 pair of processes ping ponging messages to each other, where
64 cpu cores are active in test system, I see the following profile with
37.82% of cpu cycles spent in contention of pm_qos_lock:

-     37.82%          swapper  [kernel.kallsyms]          [k]
_raw_spin_lock_irqsave
   - _raw_spin_lock_irqsave
      - 95.65% pm_qos_request
           menu_select
           cpuidle_idle_call
         - cpu_idle
              99.98% start_secondary

A better approach will be to cache the updated pm_qos target value so
reading it does not require lock acquisition as in the patch below.
With this patch the contention for pm_qos_lock is removed and I saw a
2.2X increase in throughput for my message passing workload.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: James Bottomley <James.Bottomley@suse.de>
Acked-by: mark gross <markgross@thegnar.org>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 include/linux/pm_qos_params.h |    4 ++++
 kernel/pm_qos_params.c        |   37 +++++++++++++++++++++++++------------
 2 files changed, 29 insertions(+), 12 deletions(-)

--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -16,6 +16,10 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
+#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
+#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
+
 struct pm_qos_request_list {
 	struct plist_node list;
 	int pm_qos_class;
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -53,11 +53,17 @@ enum pm_qos_type {
 	PM_QOS_MIN		/* return the smallest value */
 };
 
+/*
+ * Note: The lockless read path depends on the CPU accessing
+ * target_value atomically.  Atomic access is only guaranteed on all CPU
+ * types linux supports for 32 bit quantites
+ */
 struct pm_qos_object {
 	struct plist_head requests;
 	struct blocking_notifier_head *notifiers;
 	struct miscdevice pm_qos_power_miscdev;
 	char *name;
+	s32 target_value;	/* Do not change to 64 bit */
 	s32 default_value;
 	enum pm_qos_type type;
 };
@@ -70,7 +76,8 @@ static struct pm_qos_object cpu_dma_pm_q
 	.requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
 	.notifiers = &cpu_dma_lat_notifier,
 	.name = "cpu_dma_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN,
 };
 
@@ -79,7 +86,8 @@ static struct pm_qos_object network_lat_
 	.requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_lat_notifier,
 	.name = "network_latency",
-	.default_value = 2000 * USEC_PER_SEC,
+	.target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
 	.type = PM_QOS_MIN
 };
 
@@ -89,7 +97,8 @@ static struct pm_qos_object network_thro
 	.requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
 	.notifiers = &network_throughput_notifier,
 	.name = "network_throughput",
-	.default_value = 0,
+	.target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+	.default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
 	.type = PM_QOS_MAX,
 };
 
@@ -132,6 +141,16 @@ static inline int pm_qos_get_value(struc
 	}
 }
 
+static inline s32 pm_qos_read_value(struct pm_qos_object *o)
+{
+	return o->target_value;
+}
+
+static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
+{
+	o->target_value = value;
+}
+
 static void update_target(struct pm_qos_object *o, struct plist_node *node,
 			  int del, int value)
 {
@@ -156,6 +175,7 @@ static void update_target(struct pm_qos_
 		plist_add(node, &o->requests);
 	}
 	curr_value = pm_qos_get_value(o);
+	pm_qos_set_value(o, curr_value);
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
 
 	if (prev_value != curr_value)
@@ -190,18 +210,11 @@ static int find_pm_qos_object_by_minor(i
  * pm_qos_request - returns current system wide qos expectation
  * @pm_qos_class: identification of which qos value is requested
  *
- * This function returns the current target value in an atomic manner.
+ * This function returns the current target value.
  */
 int pm_qos_request(int pm_qos_class)
 {
-	unsigned long flags;
-	int value;
-
-	spin_lock_irqsave(&pm_qos_lock, flags);
-	value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
-	spin_unlock_irqrestore(&pm_qos_lock, flags);
-
-	return value;
+	return pm_qos_read_value(pm_qos_array[pm_qos_class]);
 }
 EXPORT_SYMBOL_GPL(pm_qos_request);
Commit	Line	Data
9e44e803 GKH	1	From 333c5ae9948194428fe6c5ef5c088304fc98263b Mon Sep 17 00:00:00 2001
	2	From: Tim Chen <tim.c.chen@linux.intel.com>
	3	Date: Fri, 11 Feb 2011 12:49:04 -0800
	4	Subject: idle governor: Avoid lock acquisition to read pm_qos before
	5	entering idle
	6
	7	From: Tim Chen <tim.c.chen@linux.intel.com>
	8
	9	commit 333c5ae9948194428fe6c5ef5c088304fc98263b upstream.
	10
	11	Thanks to the reviews and comments by Rafael, James, Mark and Andi.
	12	Here's version 2 of the patch incorporating your comments and also some
	13	update to my previous patch comments.
	14
	15	I noticed that before entering idle state, the menu idle governor will
	16	look up the current pm_qos target value according to the list of qos
	17	requests received. This look up currently needs the acquisition of a
	18	lock to access the list of qos requests to find the qos target value,
	19	slowing down the entrance into idle state due to contention by multiple
	20	cpus to access this list. The contention is severe when there are a lot
	21	of cpus waking and going into idle. For example, for a simple workload
	22	that has 32 pair of processes ping ponging messages to each other, where
	23	64 cpu cores are active in test system, I see the following profile with
	24	37.82% of cpu cycles spent in contention of pm_qos_lock:
	25
	26	- 37.82% swapper [kernel.kallsyms] [k]
	27	_raw_spin_lock_irqsave
	28	- _raw_spin_lock_irqsave
	29	- 95.65% pm_qos_request
	30	menu_select
	31	cpuidle_idle_call
	32	- cpu_idle
	33	99.98% start_secondary
	34
	35	A better approach will be to cache the updated pm_qos target value so
	36	reading it does not require lock acquisition as in the patch below.
	37	With this patch the contention for pm_qos_lock is removed and I saw a
	38	2.2X increase in throughput for my message passing workload.
	39
	40	Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
	41	Acked-by: Andi Kleen <ak@linux.intel.com>
	42	Acked-by: James Bottomley <James.Bottomley@suse.de>
	43	Acked-by: mark gross <markgross@thegnar.org>
	44	Signed-off-by: Len Brown <len.brown@intel.com>
	45	Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
	46
	47	---
	48	include/linux/pm_qos_params.h \| 4 ++++
	49	kernel/pm_qos_params.c \| 37 +++++++++++++++++++++++++------------
	50	2 files changed, 29 insertions(+), 12 deletions(-)
	51
	52	--- a/include/linux/pm_qos_params.h
	53	+++ b/include/linux/pm_qos_params.h
	54	@@ -16,6 +16,10 @@
	55	#define PM_QOS_NUM_CLASSES 4
	56	#define PM_QOS_DEFAULT_VALUE -1
	57
	58	+#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
	59	+#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
	60	+#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0
	61	+
	62	struct pm_qos_request_list {
	63	struct plist_node list;
	64	int pm_qos_class;
65	--- a/kernel/pm_qos_params.c
66	+++ b/kernel/pm_qos_params.c
67	@@ -53,11 +53,17 @@ enum pm_qos_type {
68	PM_QOS_MIN /* return the smallest value */
69	};
70
71	+/*
72	+ * Note: The lockless read path depends on the CPU accessing
73	+ * target_value atomically. Atomic access is only guaranteed on all CPU
74	+ * types linux supports for 32 bit quantites
75	+ */
76	struct pm_qos_object {
77	struct plist_head requests;
78	struct blocking_notifier_head *notifiers;
79	struct miscdevice pm_qos_power_miscdev;
80	char *name;
81	+ s32 target_value; /* Do not change to 64 bit */
82	s32 default_value;
83	enum pm_qos_type type;
84	};
85	@@ -70,7 +76,8 @@ static struct pm_qos_object cpu_dma_pm_q
86	.requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
87	.notifiers = &cpu_dma_lat_notifier,
88	.name = "cpu_dma_latency",
89	- .default_value = 2000 * USEC_PER_SEC,
90	+ .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
91	+ .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
92	.type = PM_QOS_MIN,
93	};
94
95	@@ -79,7 +86,8 @@ static struct pm_qos_object network_lat_
96	.requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
97	.notifiers = &network_lat_notifier,
98	.name = "network_latency",
99	- .default_value = 2000 * USEC_PER_SEC,
100	+ .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
101	+ .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
102	.type = PM_QOS_MIN
103	};
104
105	@@ -89,7 +97,8 @@ static struct pm_qos_object network_thro
106	.requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
107	.notifiers = &network_throughput_notifier,
108	.name = "network_throughput",
109	- .default_value = 0,
110	+ .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
111	+ .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
112	.type = PM_QOS_MAX,
113	};
114
115	@@ -132,6 +141,16 @@ static inline int pm_qos_get_value(struc
116	}
117	}
118
119	+static inline s32 pm_qos_read_value(struct pm_qos_object *o)
120	+{
121	+ return o->target_value;
122	+}
123	+
124	+static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
125	+{
126	+ o->target_value = value;
127	+}
128	+
129	static void update_target(struct pm_qos_object o, struct plist_node node,
130	int del, int value)
131	{
132	@@ -156,6 +175,7 @@ static void update_target(struct pm_qos_
133	plist_add(node, &o->requests);
134	}
135	curr_value = pm_qos_get_value(o);
136	+ pm_qos_set_value(o, curr_value);
137	spin_unlock_irqrestore(&pm_qos_lock, flags);
138
139	if (prev_value != curr_value)
140	@@ -190,18 +210,11 @@ static int find_pm_qos_object_by_minor(i
141	* pm_qos_request - returns current system wide qos expectation
142	* @pm_qos_class: identification of which qos value is requested
143	*
144	- * This function returns the current target value in an atomic manner.
145	+ * This function returns the current target value.
146	*/
147	int pm_qos_request(int pm_qos_class)
148	{
149	- unsigned long flags;
150	- int value;
151	-
152	- spin_lock_irqsave(&pm_qos_lock, flags);
153	- value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
154	- spin_unlock_irqrestore(&pm_qos_lock, flags);
155	-
156	- return value;
157	+ return pm_qos_read_value(pm_qos_array[pm_qos_class]);
158	}
159	EXPORT_SYMBOL_GPL(pm_qos_request);
160