]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.arch/ppc-pseries-migration_hang_fix.patch
Revert "Move xen patchset to new version's subdir."
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.arch / ppc-pseries-migration_hang_fix.patch
CommitLineData
00e5a55c
BS
1Subject: Fix partition migration hang under load
2From: Brian King <brking@linux.vnet.ibm.com>
3References: 470563 - LTC51153
4
5While testing partition migration with heavy CPU load using
6shared processors, it was observed that sometimes the migration
7would never complete and would appear to hang. Currently, the
8migration code assumes that if H_SUCCESS is returned from the H_JOIN
9then the migration is complete and the processor is waking up on
10the target system. If there was an outstanding PROD to the processor
11when the H_JOIN is called, however, it will return H_SUCCESS on the source
12system, causing the migration to hang, or in some scenarios cause
13the kernel to crash on the complete call waking the caller
14of rtas_percpu_suspend_me.
15
16Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
17Signed-off-by: Olaf Hering <olh@suse.de>
18---
19
20 arch/powerpc/kernel/rtas.c | 10 ++++++++--
21 1 file changed, 8 insertions(+), 2 deletions(-)
22
23--- a/arch/powerpc/kernel/rtas.c
24+++ b/arch/powerpc/kernel/rtas.c
25@@ -46,6 +46,7 @@ EXPORT_SYMBOL(rtas);
26
27 struct rtas_suspend_me_data {
28 atomic_t working; /* number of cpus accessing this struct */
29+ atomic_t done;
30 int token; /* ibm,suspend-me */
31 int error;
32 struct completion *complete; /* wait on this until working == 0 */
33@@ -663,7 +664,7 @@ static int ibm_suspend_me_token = RTAS_U
34 #ifdef CONFIG_PPC_PSERIES
35 static void rtas_percpu_suspend_me(void *info)
36 {
37- long rc;
38+ long rc = H_SUCCESS;
39 unsigned long msr_save;
40 int cpu;
41 struct rtas_suspend_me_data *data =
42@@ -675,7 +676,8 @@ static void rtas_percpu_suspend_me(void
43 msr_save = mfmsr();
44 mtmsr(msr_save & ~(MSR_EE));
45
46- rc = plpar_hcall_norets(H_JOIN);
47+ while (rc == H_SUCCESS && !atomic_read(&data->done))
48+ rc = plpar_hcall_norets(H_JOIN);
49
50 mtmsr(msr_save);
51
52@@ -698,6 +700,9 @@ static void rtas_percpu_suspend_me(void
53 smp_processor_id(), rc);
54 data->error = rc;
55 }
56+
57+ atomic_set(&data->done, 1);
58+
59 /* This cpu did the suspend or got an error; in either case,
60 * we need to prod all other other cpus out of join state.
61 * Extra prods are harmless.
62@@ -740,6 +745,7 @@ static int rtas_ibm_suspend_me(struct rt
63 }
64
65 atomic_set(&data.working, 0);
66+ atomic_set(&data.done, 0);
67 data.token = rtas_token("ibm,suspend-me");
68 data.error = 0;
69 data.complete = &done;