]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.31/patches.arch/ppc-memoryless-nodes.patch
Move xen patchset to new version's subdir.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / ppc-memoryless-nodes.patch
1 Subject: fix booting with memoryless nodes
2 From: haveblue@us.ibm.com
3 References: 443280 - LTC49675
4
5 I've reproduced this on 2.6.27.7. I'm pretty sure it is caused by this
6 patch:
7
8 http://git.kernel.org/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=8f64e1f2d1e09267ac926e15090fd505c1c0cbcb
9
10 The problem is that Jon took a loop which was (in psuedocode):
11
12 for_each_node(nid)
13 NODE_DATA(nid) = careful_alloc(nid);
14 setup_bootmem(nid);
15 reserve_node_bootmem(nid);
16
17 and broke it up into:
18
19 for_each_node(nid)
20 NODE_DATA(nid) = careful_alloc(nid);
21 setup_bootmem(nid);
22 for_each_node(nid)
23 reserve_node_bootmem(nid);
24
25 The issue comes in when the 'careful_alloc()' is called on a node with
26 no memory. It falls back to using bootmem from a previously-initialized
27 node. But, bootmem has not yet been reserved when Jon's patch is
28 applied. It gives back bogus memory (0xc000000000000000) and pukes
29 later in boot.
30
31 The following patch collapses the loop back together. It also breaks
32 the mark_reserved_regions_for_nid() code out into a function and adds
33 some comments. I think a huge part of introducing this bug is because
34 for loop was too long and hard to read.
35
36 The actual bug fix here is the:
37
38 + if (end_pfn <= node->node_start_pfn ||
39 + start_pfn >= node_end_pfn)
40 + continue;
41
42 Signed-off-by: Olaf Hering <olh@suse.de>
43
44 ---
45 arch/powerpc/mm/numa.c | 130 ++++++++++++++++++++++++++++++-------------------
46 1 file changed, 82 insertions(+), 48 deletions(-)
47
48 --- a/arch/powerpc/mm/numa.c
49 +++ b/arch/powerpc/mm/numa.c
50 @@ -19,6 +19,7 @@
51 #include <linux/notifier.h>
52 #include <linux/lmb.h>
53 #include <linux/of.h>
54 +#include <linux/pfn.h>
55 #include <asm/sparsemem.h>
56 #include <asm/prom.h>
57 #include <asm/system.h>
58 @@ -867,10 +868,75 @@ static struct notifier_block __cpuinitda
59 .priority = 1 /* Must run before sched domains notifier. */
60 };
61
62 +static void mark_reserved_regions_for_nid(int nid)
63 +{
64 + struct pglist_data *node = NODE_DATA(nid);
65 + int i;
66 +
67 + dbg("mark_reserved_regions_for_nid(%d) NODE_DATA: %p\n", nid, node);
68 + for (i = 0; i < lmb.reserved.cnt; i++) {
69 + unsigned long physbase = lmb.reserved.region[i].base;
70 + unsigned long size = lmb.reserved.region[i].size;
71 + unsigned long start_pfn = physbase >> PAGE_SHIFT;
72 + unsigned long end_pfn = PFN_UP(physbase + size);
73 + struct node_active_region node_ar;
74 + unsigned long node_end_pfn = node->node_start_pfn +
75 + node->node_spanned_pages;
76 +
77 + /*
78 + * Check to make sure that this lmb.reserved area is
79 + * within the bounds of the node that we care about.
80 + * Checking the nid of the start and end points is not
81 + * sufficient because the reserved area could span the
82 + * entire node.
83 + */
84 + if (end_pfn <= node->node_start_pfn ||
85 + start_pfn >= node_end_pfn)
86 + continue;
87 +
88 + get_node_active_region(start_pfn, &node_ar);
89 + while (start_pfn < end_pfn &&
90 + node_ar.start_pfn < node_ar.end_pfn) {
91 + unsigned long reserve_size = size;
92 + /*
93 + * if reserved region extends past active region
94 + * then trim size to active region
95 + */
96 + if (end_pfn > node_ar.end_pfn)
97 + reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
98 + - physbase;
99 + /*
100 + * Only worry about *this* node, others may not
101 + * yet have valid NODE_DATA().
102 + */
103 + if (node_ar.nid == nid)
104 + reserve_bootmem_node(NODE_DATA(node_ar.nid),
105 + physbase, reserve_size,
106 + BOOTMEM_DEFAULT);
107 + /*
108 + * if reserved region is contained in the active region
109 + * then done.
110 + */
111 + if (end_pfn <= node_ar.end_pfn)
112 + break;
113 +
114 + /*
115 + * reserved region extends past the active region
116 + * get next active region that contains this
117 + * reserved region
118 + */
119 + start_pfn = node_ar.end_pfn;
120 + physbase = start_pfn << PAGE_SHIFT;
121 + size = size - reserve_size;
122 + get_node_active_region(start_pfn, &node_ar);
123 + }
124 + }
125 +}
126 +
127 +
128 void __init do_init_bootmem(void)
129 {
130 int nid;
131 - unsigned int i;
132
133 min_low_pfn = 0;
134 max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
135 @@ -890,9 +956,16 @@ void __init do_init_bootmem(void)
136 unsigned long bootmem_paddr;
137 unsigned long bootmap_pages;
138
139 + dbg("node %d is online\n", nid);
140 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
141
142 - /* Allocate the node structure node local if possible */
143 + /*
144 + * Allocate the node structure node local if possible
145 + *
146 + * Be careful moving this around, as it relies on all
147 + * previous nodes' bootmem to be initialized and have
148 + * all reserved areas marked.
149 + */
150 NODE_DATA(nid) = careful_allocation(nid,
151 sizeof(struct pglist_data),
152 SMP_CACHE_BYTES, end_pfn);
153 @@ -924,53 +997,14 @@ void __init do_init_bootmem(void)
154 start_pfn, end_pfn);
155
156 free_bootmem_with_active_regions(nid, end_pfn);
157 - }
158 -
159 - /* Mark reserved regions */
160 - for (i = 0; i < lmb.reserved.cnt; i++) {
161 - unsigned long physbase = lmb.reserved.region[i].base;
162 - unsigned long size = lmb.reserved.region[i].size;
163 - unsigned long start_pfn = physbase >> PAGE_SHIFT;
164 - unsigned long end_pfn = ((physbase + size) >> PAGE_SHIFT);
165 - struct node_active_region node_ar;
166 -
167 - get_node_active_region(start_pfn, &node_ar);
168 - while (start_pfn < end_pfn &&
169 - node_ar.start_pfn < node_ar.end_pfn) {
170 - unsigned long reserve_size = size;
171 - /*
172 - * if reserved region extends past active region
173 - * then trim size to active region
174 - */
175 - if (end_pfn > node_ar.end_pfn)
176 - reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
177 - - (start_pfn << PAGE_SHIFT);
178 - dbg("reserve_bootmem %lx %lx nid=%d\n", physbase,
179 - reserve_size, node_ar.nid);
180 - reserve_bootmem_node(NODE_DATA(node_ar.nid), physbase,
181 - reserve_size, BOOTMEM_DEFAULT);
182 - /*
183 - * if reserved region is contained in the active region
184 - * then done.
185 - */
186 - if (end_pfn <= node_ar.end_pfn)
187 - break;
188 -
189 - /*
190 - * reserved region extends past the active region
191 - * get next active region that contains this
192 - * reserved region
193 - */
194 - start_pfn = node_ar.end_pfn;
195 - physbase = start_pfn << PAGE_SHIFT;
196 - size = size - reserve_size;
197 - get_node_active_region(start_pfn, &node_ar);
198 - }
199 -
200 - }
201 -
202 - for_each_online_node(nid)
203 + /*
204 + * Be very careful about moving this around. Future
205 + * calls to careful_allocation() depend on this getting
206 + * done correctly.
207 + */
208 + mark_reserved_regions_for_nid(nid);
209 sparse_memory_present_with_active_regions(nid);
210 + }
211 }
212
213 void __init paging_init(void)