]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.arch/ppc-oprofile-spu-mutex-locking.patch
Merge branch 'master' into next
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / ppc-oprofile-spu-mutex-locking.patch
CommitLineData
2cb7cef9
BS
1Subject: [PATCH] powerpc/oprofile: Fix mutex locking for cell spu-oprofile
2From: Carl Love <cel@us.ibm.com>
3References: 422501 - LTC47617
4
5The issue is the SPU code is not holding the kernel mutex lock while
6adding samples to the kernel buffer.
7
8This patch creates per SPU buffers to hold the data. Data
9is added to the buffers from in interrupt context. The data
10is periodically pushed to the kernel buffer via a new Oprofile
11function oprofile_put_buff(). The oprofile_put_buff() function
12is called via a work queue enabling the funtion to acquire the
13mutex lock.
14
15The existing user controls for adjusting the per CPU buffer
16size is used to control the size of the per SPU buffers.
17Similarly, overflows of the SPU buffers are reported by
18incrementing the per CPU buffer stats. This eliminates the
19need to have architecture specific controls for the per SPU
20buffers which is not acceptable to the OProfile user tool
21maintainer.
22
23The export of the oprofile add_event_entry() is removed as it
24is no longer needed given this patch.
25
26Note, this patch has not addressed the issue of indexing arrays
27by the spu number. This still needs to be fixed as the spu
28numbering is not guarenteed to be 0 to max_num_spus-1.
29
30Signed-off-by: Carl Love <carll@us.ibm.com>
31Signed-off-by: Maynard Johnson <maynardj@us.ibm.com>
32Signed-off-by: Arnd Bergmann <arnd@arndb.de>
33Acked-by: Acked-by: Robert Richter <robert.richter@amd.com>
34Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
35Signed-off-by: Olaf Hering <olh@suse.de>
36
37---
38 arch/powerpc/oprofile/cell/pr_util.h | 13 +
39 arch/powerpc/oprofile/cell/spu_profiler.c | 4
40 arch/powerpc/oprofile/cell/spu_task_sync.c | 236 +++++++++++++++++++++++++----
41 drivers/oprofile/buffer_sync.c | 24 ++
42 drivers/oprofile/cpu_buffer.c | 15 +
43 drivers/oprofile/event_buffer.h | 7
44 include/linux/oprofile.h | 16 +
45 7 files changed, 279 insertions(+), 36 deletions(-)
46
47--- a/arch/powerpc/oprofile/cell/pr_util.h
48+++ b/arch/powerpc/oprofile/cell/pr_util.h
49@@ -24,6 +24,11 @@
50 #define SKIP_GENERIC_SYNC 0
51 #define SYNC_START_ERROR -1
52 #define DO_GENERIC_SYNC 1
53+#define SPUS_PER_NODE 8
54+#define DEFAULT_TIMER_EXPIRE (HZ / 10)
55+
56+extern struct delayed_work spu_work;
57+extern int spu_prof_running;
58
59 struct spu_overlay_info { /* map of sections within an SPU overlay */
60 unsigned int vma; /* SPU virtual memory address from elf */
61@@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of
62
63 };
64
65+struct spu_buffer {
66+ int last_guard_val;
67+ int ctx_sw_seen;
68+ unsigned long *buff;
69+ unsigned int head, tail;
70+};
71+
72+
73 /* The three functions below are for maintaining and accessing
74 * the vma-to-fileoffset map.
75 */
76--- a/arch/powerpc/oprofile/cell/spu_profiler.c
77+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
78@@ -24,12 +24,11 @@
79
80 static u32 *samples;
81
82-static int spu_prof_running;
83+int spu_prof_running;
84 static unsigned int profiling_interval;
85
86 #define NUM_SPU_BITS_TRBUF 16
87 #define SPUS_PER_TB_ENTRY 4
88-#define SPUS_PER_NODE 8
89
90 #define SPU_PC_MASK 0xFFFF
91
92@@ -209,6 +208,7 @@ int start_spu_profiling(unsigned int cyc
93
94 spu_prof_running = 1;
95 hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
96+ schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
97
98 return 0;
99 }
100--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
101+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
102@@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock);
103 static DEFINE_SPINLOCK(cache_lock);
104 static int num_spu_nodes;
105 int spu_prof_num_nodes;
106-int last_guard_val[MAX_NUMNODES * 8];
107+
108+struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
109+struct delayed_work spu_work;
110+static unsigned max_spu_buff;
111+
112+static void spu_buff_add(unsigned long int value, int spu)
113+{
114+ /* spu buff is a circular buffer. Add entries to the
115+ * head. Head is the index to store the next value.
116+ * The buffer is full when there is one available entry
117+ * in the queue, i.e. head and tail can't be equal.
118+ * That way we can tell the difference between the
119+ * buffer being full versus empty.
120+ *
121+ * ASSUPTION: the buffer_lock is held when this function
122+ * is called to lock the buffer, head and tail.
123+ */
124+ int full = 1;
125+
126+ if (spu_buff[spu].head >= spu_buff[spu].tail) {
127+ if ((spu_buff[spu].head - spu_buff[spu].tail)
128+ < (max_spu_buff - 1))
129+ full = 0;
130+
131+ } else if (spu_buff[spu].tail > spu_buff[spu].head) {
132+ if ((spu_buff[spu].tail - spu_buff[spu].head)
133+ > 1)
134+ full = 0;
135+ }
136+
137+ if (!full) {
138+ spu_buff[spu].buff[spu_buff[spu].head] = value;
139+ spu_buff[spu].head++;
140+
141+ if (spu_buff[spu].head >= max_spu_buff)
142+ spu_buff[spu].head = 0;
143+ } else {
144+ /* From the user's perspective make the SPU buffer
145+ * size management/overflow look like we are using
146+ * per cpu buffers. The user uses the same
147+ * per cpu parameter to adjust the SPU buffer size.
148+ * Increment the sample_lost_overflow to inform
149+ * the user the buffer size needs to be increased.
150+ */
151+ oprofile_cpu_buffer_inc_smpl_lost();
152+ }
153+}
154+
155+/* This function copies the per SPU buffers to the
156+ * OProfile kernel buffer.
157+ */
158+void sync_spu_buff(void)
159+{
160+ int spu;
161+ unsigned long flags;
162+ int curr_head;
163+
164+ for (spu = 0; spu < num_spu_nodes; spu++) {
165+ /* In case there was an issue and the buffer didn't
166+ * get created skip it.
167+ */
168+ if (spu_buff[spu].buff == NULL)
169+ continue;
170+
171+ /* Hold the lock to make sure the head/tail
172+ * doesn't change while spu_buff_add() is
173+ * deciding if the buffer is full or not.
174+ * Being a little paranoid.
175+ */
176+ spin_lock_irqsave(&buffer_lock, flags);
177+ curr_head = spu_buff[spu].head;
178+ spin_unlock_irqrestore(&buffer_lock, flags);
179+
180+ /* Transfer the current contents to the kernel buffer.
181+ * data can still be added to the head of the buffer.
182+ */
183+ oprofile_put_buff(spu_buff[spu].buff,
184+ spu_buff[spu].tail,
185+ curr_head, max_spu_buff);
186+
187+ spin_lock_irqsave(&buffer_lock, flags);
188+ spu_buff[spu].tail = curr_head;
189+ spin_unlock_irqrestore(&buffer_lock, flags);
190+ }
191+
192+}
193+
194+static void wq_sync_spu_buff(struct work_struct *work)
195+{
196+ /* move data from spu buffers to kernel buffer */
197+ sync_spu_buff();
198+
199+ /* only reschedule if profiling is not done */
200+ if (spu_prof_running)
201+ schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
202+}
203
204 /* Container for caching information about an active SPU task. */
205 struct cached_info {
206@@ -305,14 +400,21 @@ static int process_context_switch(struct
207
208 /* Record context info in event buffer */
209 spin_lock_irqsave(&buffer_lock, flags);
210- add_event_entry(ESCAPE_CODE);
211- add_event_entry(SPU_CTX_SWITCH_CODE);
212- add_event_entry(spu->number);
213- add_event_entry(spu->pid);
214- add_event_entry(spu->tgid);
215- add_event_entry(app_dcookie);
216- add_event_entry(spu_cookie);
217- add_event_entry(offset);
218+ spu_buff_add(ESCAPE_CODE, spu->number);
219+ spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
220+ spu_buff_add(spu->number, spu->number);
221+ spu_buff_add(spu->pid, spu->number);
222+ spu_buff_add(spu->tgid, spu->number);
223+ spu_buff_add(app_dcookie, spu->number);
224+ spu_buff_add(spu_cookie, spu->number);
225+ spu_buff_add(offset, spu->number);
226+
227+ /* Set flag to indicate SPU PC data can now be written out. If
228+ * the SPU program counter data is seen before an SPU context
229+ * record is seen, the postprocessing will fail.
230+ */
231+ spu_buff[spu->number].ctx_sw_seen = 1;
232+
233 spin_unlock_irqrestore(&buffer_lock, flags);
234 smp_wmb(); /* insure spu event buffer updates are written */
235 /* don't want entries intermingled... */
236@@ -360,6 +462,47 @@ static int number_of_online_nodes(void)
237 return nodes;
238 }
239
240+static int oprofile_spu_buff_create(void)
241+{
242+ int spu;
243+
244+ max_spu_buff = oprofile_get_cpu_buffer_size();
245+
246+ for (spu = 0; spu < num_spu_nodes; spu++) {
247+ /* create circular buffers to store the data in.
248+ * use locks to manage accessing the buffers
249+ */
250+ spu_buff[spu].head = 0;
251+ spu_buff[spu].tail = 0;
252+
253+ /*
254+ * Create a buffer for each SPU. Can't reliably
255+ * create a single buffer for all spus due to not
256+ * enough contiguous kernel memory.
257+ */
258+
259+ spu_buff[spu].buff = kzalloc((max_spu_buff
260+ * sizeof(unsigned long)),
261+ GFP_KERNEL);
262+
263+ if (!spu_buff[spu].buff) {
264+ printk(KERN_ERR "SPU_PROF: "
265+ "%s, line %d: oprofile_spu_buff_create "
266+ "failed to allocate spu buffer %d.\n",
267+ __func__, __LINE__, spu);
268+
269+ /* release the spu buffers that have been allocated */
270+ while (spu >= 0) {
271+ kfree(spu_buff[spu].buff);
272+ spu_buff[spu].buff = 0;
273+ spu--;
274+ }
275+ return -ENOMEM;
276+ }
277+ }
278+ return 0;
279+}
280+
281 /* The main purpose of this function is to synchronize
282 * OProfile with SPUFS by registering to be notified of
283 * SPU task switches.
284@@ -372,20 +515,35 @@ static int number_of_online_nodes(void)
285 */
286 int spu_sync_start(void)
287 {
288- int k;
289+ int spu;
290 int ret = SKIP_GENERIC_SYNC;
291 int register_ret;
292 unsigned long flags = 0;
293
294 spu_prof_num_nodes = number_of_online_nodes();
295 num_spu_nodes = spu_prof_num_nodes * 8;
296+ INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
297+
298+ /* create buffer for storing the SPU data to put in
299+ * the kernel buffer.
300+ */
301+ ret = oprofile_spu_buff_create();
302+ if (ret)
303+ goto out;
304
305 spin_lock_irqsave(&buffer_lock, flags);
306- add_event_entry(ESCAPE_CODE);
307- add_event_entry(SPU_PROFILING_CODE);
308- add_event_entry(num_spu_nodes);
309+ for (spu = 0; spu < num_spu_nodes; spu++) {
310+ spu_buff_add(ESCAPE_CODE, spu);
311+ spu_buff_add(SPU_PROFILING_CODE, spu);
312+ spu_buff_add(num_spu_nodes, spu);
313+ }
314 spin_unlock_irqrestore(&buffer_lock, flags);
315
316+ for (spu = 0; spu < num_spu_nodes; spu++) {
317+ spu_buff[spu].ctx_sw_seen = 0;
318+ spu_buff[spu].last_guard_val = 0;
319+ }
320+
321 /* Register for SPU events */
322 register_ret = spu_switch_event_register(&spu_active);
323 if (register_ret) {
324@@ -393,8 +551,6 @@ int spu_sync_start(void)
325 goto out;
326 }
327
328- for (k = 0; k < (MAX_NUMNODES * 8); k++)
329- last_guard_val[k] = 0;
330 pr_debug("spu_sync_start -- running.\n");
331 out:
332 return ret;
333@@ -446,13 +602,20 @@ void spu_sync_buffer(int spu_num, unsign
334 * use. We need to discard samples taken during the time
335 * period which an overlay occurs (i.e., guard value changes).
336 */
337- if (grd_val && grd_val != last_guard_val[spu_num]) {
338- last_guard_val[spu_num] = grd_val;
339+ if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
340+ spu_buff[spu_num].last_guard_val = grd_val;
341 /* Drop the rest of the samples. */
342 break;
343 }
344
345- add_event_entry(file_offset | spu_num_shifted);
346+ /* We must ensure that the SPU context switch has been written
347+ * out before samples for the SPU. Otherwise, the SPU context
348+ * information is not available and the postprocessing of the
349+ * SPU PC will fail with no available anonymous map information.
350+ */
351+ if (spu_buff[spu_num].ctx_sw_seen)
352+ spu_buff_add((file_offset | spu_num_shifted),
353+ spu_num);
354 }
355 spin_unlock(&buffer_lock);
356 out:
357@@ -463,20 +626,41 @@ out:
358 int spu_sync_stop(void)
359 {
360 unsigned long flags = 0;
361- int ret = spu_switch_event_unregister(&spu_active);
362- if (ret) {
363+ int ret;
364+ int k;
365+
366+ ret = spu_switch_event_unregister(&spu_active);
367+
368+ if (ret)
369 printk(KERN_ERR "SPU_PROF: "
370- "%s, line %d: spu_switch_event_unregister returned %d\n",
371- __func__, __LINE__, ret);
372- goto out;
373- }
374+ "%s, line %d: spu_switch_event_unregister " \
375+ "returned %d\n",
376+ __func__, __LINE__, ret);
377+
378+ /* flush any remaining data in the per SPU buffers */
379+ sync_spu_buff();
380
381 spin_lock_irqsave(&cache_lock, flags);
382 ret = release_cached_info(RELEASE_ALL);
383 spin_unlock_irqrestore(&cache_lock, flags);
384-out:
385+
386+ /* remove scheduled work queue item rather then waiting
387+ * for every queued entry to execute. Then flush pending
388+ * system wide buffer to event buffer.
389+ */
390+ cancel_delayed_work(&spu_work);
391+
392+ for (k = 0; k < num_spu_nodes; k++) {
393+ spu_buff[k].ctx_sw_seen = 0;
394+
395+ /*
396+ * spu_sys_buff will be null if there was a problem
397+ * allocating the buffer. Only delete if it exists.
398+ */
399+ kfree(spu_buff[k].buff);
400+ spu_buff[k].buff = 0;
401+ }
402 pr_debug("spu_sync_stop -- done.\n");
403 return ret;
404 }
405
406-
407--- a/drivers/oprofile/buffer_sync.c
408+++ b/drivers/oprofile/buffer_sync.c
409@@ -551,3 +551,27 @@ void sync_buffer(int cpu)
410
411 mutex_unlock(&buffer_mutex);
412 }
413+
414+/* The function can be used to add a buffer worth of data directly to
415+ * the kernel buffer. The buffer is assumed to be a circular buffer.
416+ * Take the entries from index start and end at index end, wrapping
417+ * at max_entries.
418+ */
419+void oprofile_put_buff(unsigned long *buf, unsigned int start,
420+ unsigned int stop, unsigned int max)
421+{
422+ int i;
423+
424+ i = start;
425+
426+ mutex_lock(&buffer_mutex);
427+ while (i != stop) {
428+ add_event_entry(buf[i++]);
429+
430+ if (i >= max)
431+ i = 0;
432+ }
433+
434+ mutex_unlock(&buffer_mutex);
435+}
436+
437--- a/drivers/oprofile/cpu_buffer.c
438+++ b/drivers/oprofile/cpu_buffer.c
439@@ -37,13 +37,26 @@ static int work_enabled;
440 void free_cpu_buffers(void)
441 {
442 int i;
443-
444+
445 for_each_online_cpu(i) {
446 vfree(per_cpu(cpu_buffer, i).buffer);
447 per_cpu(cpu_buffer, i).buffer = NULL;
448 }
449 }
450
451+unsigned long oprofile_get_cpu_buffer_size(void)
452+{
453+ return fs_cpu_buffer_size;
454+}
455+
456+void oprofile_cpu_buffer_inc_smpl_lost(void)
457+{
458+ struct oprofile_cpu_buffer *cpu_buf
459+ = &__get_cpu_var(cpu_buffer);
460+
461+ cpu_buf->sample_lost_overflow++;
462+}
463+
464 int alloc_cpu_buffers(void)
465 {
466 int i;
467--- a/drivers/oprofile/event_buffer.h
468+++ b/drivers/oprofile/event_buffer.h
469@@ -17,6 +17,13 @@ int alloc_event_buffer(void);
470
471 void free_event_buffer(void);
472
473+/**
474+ * Add data to the event buffer.
475+ * The data passed is free-form, but typically consists of
476+ * file offsets, dcookies, context information, and ESCAPE codes.
477+ */
478+void add_event_entry(unsigned long data);
479+
480 /* wake up the process sleeping on the event file */
481 void wake_up_buffer_waiter(void);
482
483--- a/include/linux/oprofile.h
484+++ b/include/linux/oprofile.h
485@@ -84,13 +84,6 @@ int oprofile_arch_init(struct oprofile_o
486 void oprofile_arch_exit(void);
487
488 /**
489- * Add data to the event buffer.
490- * The data passed is free-form, but typically consists of
491- * file offsets, dcookies, context information, and ESCAPE codes.
492- */
493-void add_event_entry(unsigned long data);
494-
495-/**
496 * Add a sample. This may be called from any context. Pass
497 * smp_processor_id() as cpu.
498 */
499@@ -160,5 +153,14 @@ int oprofilefs_ulong_from_user(unsigned
500
501 /** lock for read/write safety */
502 extern spinlock_t oprofilefs_lock;
503+
504+/**
505+ * Add the contents of a circular buffer to the event buffer.
506+ */
507+void oprofile_put_buff(unsigned long *buf, unsigned int start,
508+ unsigned int stop, unsigned int max);
509+
510+unsigned long oprofile_get_cpu_buffer_size(void);
511+void oprofile_cpu_buffer_inc_smpl_lost(void);
512
513 #endif /* OPROFILE_H */