]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.suse/ocfs2-throttle-back-local-alloc-when-low-on-disk-sp.patch
Updated xen patches taken from suse.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / ocfs2-throttle-back-local-alloc-when-low-on-disk-sp.patch
1 From: Mark Fasheh <mfasheh@suse.com>
2 Subject: ocfs2: throttle back local alloc when low on disk space
3 Patch-mainline: 2.6.28
4
5 Ocfs2's local allocator disables itself for the duration of a mount point
6 when it has trouble allocating a large enough area from the primary bitmap.
7 That can cause performance problems, especially for disks which were only
8 temporarily full or fragmented. This patch allows for the allocator to
9 shrink it's window first, before being disabled. Later, it can also be
10 re-enabled so that any performance drop is minimized.
11
12 To do this, we allow the value of osb->local_alloc_bits to be shrunk when
13 needed. The default value is recorded in a mostly read-only variable so that
14 we can re-initialize when required.
15
16 Locking had to be updated so that we could protect changes to
17 local_alloc_bits. Mostly this involves protecting various local alloc values
18 with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
19 is used when the local allocator is has shrunk, but is not disabled. If the
20 available space dips below 1 megabyte, the local alloc file is disabled. In
21 either case, local alloc is re-enabled 30 seconds after the event, or when
22 an appropriate amount of bits is seen in the primary bitmap.
23
24 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
25 ---
26 fs/ocfs2/localalloc.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++---
27 fs/ocfs2/localalloc.h | 4 +
28 fs/ocfs2/ocfs2.h | 23 +++++-
29 fs/ocfs2/suballoc.c | 31 ++++----
30 fs/ocfs2/suballoc.h | 1 +
31 fs/ocfs2/super.c | 4 +-
32 6 files changed, 230 insertions(+), 31 deletions(-)
33
34 Index: linux-2.6.27/fs/ocfs2/localalloc.c
35 ===================================================================
36 --- linux-2.6.27.orig/fs/ocfs2/localalloc.c
37 +++ linux-2.6.27/fs/ocfs2/localalloc.c
38 @@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window(
39 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
40 struct inode *local_alloc_inode);
41
42 +static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
43 +{
44 + return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
45 + osb->local_alloc_state == OCFS2_LA_ENABLED);
46 +}
47 +
48 +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
49 + unsigned int num_clusters)
50 +{
51 + spin_lock(&osb->osb_lock);
52 + if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
53 + osb->local_alloc_state == OCFS2_LA_THROTTLED)
54 + if (num_clusters >= osb->local_alloc_default_bits) {
55 + cancel_delayed_work(&osb->la_enable_wq);
56 + osb->local_alloc_state = OCFS2_LA_ENABLED;
57 + }
58 + spin_unlock(&osb->osb_lock);
59 +}
60 +
61 +void ocfs2_la_enable_worker(struct work_struct *work)
62 +{
63 + struct ocfs2_super *osb =
64 + container_of(work, struct ocfs2_super,
65 + la_enable_wq.work);
66 + spin_lock(&osb->osb_lock);
67 + osb->local_alloc_state = OCFS2_LA_ENABLED;
68 + spin_unlock(&osb->osb_lock);
69 +}
70 +
71 /*
72 * Tell us whether a given allocation should use the local alloc
73 * file. Otherwise, it has to go to the main bitmap.
74 + *
75 + * This function does semi-dirty reads of local alloc size and state!
76 + * This is ok however, as the values are re-checked once under mutex.
77 */
78 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
79 {
80 - int la_bits = osb->local_alloc_bits;
81 int ret = 0;
82 + int la_bits;
83 +
84 + spin_lock(&osb->osb_lock);
85 + la_bits = osb->local_alloc_bits;
86
87 - if (osb->local_alloc_state != OCFS2_LA_ENABLED)
88 + if (!ocfs2_la_state_enabled(osb))
89 goto bail;
90
91 /* la_bits should be at least twice the size (in clusters) of
92 @@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct
93 bail:
94 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
95 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
96 + spin_unlock(&osb->osb_lock);
97 return ret;
98 }
99
100 @@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct o
101
102 mlog_entry_void();
103
104 + cancel_delayed_work(&osb->la_enable_wq);
105 + flush_workqueue(ocfs2_wq);
106 +
107 if (osb->local_alloc_state == OCFS2_LA_UNUSED)
108 goto out;
109
110 @@ -485,7 +524,7 @@ static int ocfs2_local_alloc_in_range(st
111 }
112
113 /*
114 - * make sure we've got at least bitswanted contiguous bits in the
115 + * make sure we've got at least bits_wanted contiguous bits in the
116 * local alloc. You lose them when you drop i_mutex.
117 *
118 * We will add ourselves to the transaction passed in, but may start
119 @@ -516,16 +555,18 @@ int ocfs2_reserve_local_alloc_bits(struc
120
121 mutex_lock(&local_alloc_inode->i_mutex);
122
123 - if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
124 - status = -ENOSPC;
125 - goto bail;
126 - }
127 -
128 - if (bits_wanted > osb->local_alloc_bits) {
129 - mlog(0, "Asking for more than my max window size!\n");
130 + /*
131 + * We must double check state and allocator bits because
132 + * another process may have changed them while holding i_mutex.
133 + */
134 + spin_lock(&osb->osb_lock);
135 + if (!ocfs2_la_state_enabled(osb) ||
136 + (bits_wanted > osb->local_alloc_bits)) {
137 + spin_unlock(&osb->osb_lock);
138 status = -ENOSPC;
139 goto bail;
140 }
141 + spin_unlock(&osb->osb_lock);
142
143 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
144
145 @@ -553,6 +594,21 @@ int ocfs2_reserve_local_alloc_bits(struc
146 mlog_errno(status);
147 goto bail;
148 }
149 +
150 + /*
151 + * Under certain conditions, the window slide code
152 + * might have reduced the number of bits available or
153 + * disabled the the local alloc entirely. Re-check
154 + * here and return -ENOSPC if necessary.
155 + */
156 + status = -ENOSPC;
157 + if (!ocfs2_la_state_enabled(osb))
158 + goto bail;
159 +
160 + free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
161 + le32_to_cpu(alloc->id1.bitmap1.i_used);
162 + if (bits_wanted > free_bits)
163 + goto bail;
164 }
165
166 if (ac->ac_max_block)
167 @@ -835,6 +891,85 @@ bail:
168 return status;
169 }
170
171 +enum ocfs2_la_event {
172 + OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
173 + OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
174 + * enough bits theoretically
175 + * free, but a contiguous
176 + * allocation could not be
177 + * found. */
178 + OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
179 + * enough bits free to satisfy
180 + * our request. */
181 +};
182 +#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
183 +/*
184 + * Given an event, calculate the size of our next local alloc window.
185 + *
186 + * This should always be called under i_mutex of the local alloc inode
187 + * so that local alloc disabling doesn't race with processes trying to
188 + * use the allocator.
189 + *
190 + * Returns the state which the local alloc was left in. This value can
191 + * be ignored by some paths.
192 + */
193 +static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
194 + enum ocfs2_la_event event)
195 +{
196 + unsigned int bits;
197 + int state;
198 +
199 + spin_lock(&osb->osb_lock);
200 + if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
201 + WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
202 + goto out_unlock;
203 + }
204 +
205 + /*
206 + * ENOSPC and fragmentation are treated similarly for now.
207 + */
208 + if (event == OCFS2_LA_EVENT_ENOSPC ||
209 + event == OCFS2_LA_EVENT_FRAGMENTED) {
210 + /*
211 + * We ran out of contiguous space in the primary
212 + * bitmap. Drastically reduce the number of bits used
213 + * by local alloc until we have to disable it.
214 + */
215 + bits = osb->local_alloc_bits >> 1;
216 + if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
217 + /*
218 + * By setting state to THROTTLED, we'll keep
219 + * the number of local alloc bits used down
220 + * until an event occurs which would give us
221 + * reason to assume the bitmap situation might
222 + * have changed.
223 + */
224 + osb->local_alloc_state = OCFS2_LA_THROTTLED;
225 + osb->local_alloc_bits = bits;
226 + } else {
227 + osb->local_alloc_state = OCFS2_LA_DISABLED;
228 + }
229 + queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
230 + OCFS2_LA_ENABLE_INTERVAL);
231 + goto out_unlock;
232 + }
233 +
234 + /*
235 + * Don't increase the size of the local alloc window until we
236 + * know we might be able to fulfill the request. Otherwise, we
237 + * risk bouncing around the global bitmap during periods of
238 + * low space.
239 + */
240 + if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
241 + osb->local_alloc_bits = osb->local_alloc_default_bits;
242 +
243 +out_unlock:
244 + state = osb->local_alloc_state;
245 + spin_unlock(&osb->osb_lock);
246 +
247 + return state;
248 +}
249 +
250 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
251 struct ocfs2_alloc_context **ac,
252 struct inode **bitmap_inode,
253 @@ -849,12 +984,21 @@ static int ocfs2_local_alloc_reserve_for
254 goto bail;
255 }
256
257 +retry_enospc:
258 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
259
260 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
261 + if (status == -ENOSPC) {
262 + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
263 + OCFS2_LA_DISABLED)
264 + goto bail;
265 +
266 + ocfs2_free_ac_resource(*ac);
267 + memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
268 + goto retry_enospc;
269 + }
270 if (status < 0) {
271 - if (status != -ENOSPC)
272 - mlog_errno(status);
273 + mlog_errno(status);
274 goto bail;
275 }
276
277 @@ -907,6 +1051,34 @@ static int ocfs2_local_alloc_new_window(
278 * the more specific cluster api to claim bits. */
279 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
280 &cluster_off, &cluster_count);
281 + if (status == -ENOSPC) {
282 +retry_enospc:
283 + /*
284 + * Note: We could also try syncing the journal here to
285 + * allow use of any free bits which the current
286 + * transaction can't give us access to. --Mark
287 + */
288 + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
289 + OCFS2_LA_DISABLED)
290 + goto bail;
291 +
292 + status = ocfs2_claim_clusters(osb, handle, ac,
293 + osb->local_alloc_bits,
294 + &cluster_off,
295 + &cluster_count);
296 + if (status == -ENOSPC)
297 + goto retry_enospc;
298 + /*
299 + * We only shrunk the *minimum* number of in our
300 + * request - it's entirely possible that the allocator
301 + * might give us more than we asked for.
302 + */
303 + if (status == 0) {
304 + spin_lock(&osb->osb_lock);
305 + osb->local_alloc_bits = cluster_count;
306 + spin_unlock(&osb->osb_lock);
307 + }
308 + }
309 if (status < 0) {
310 if (status != -ENOSPC)
311 mlog_errno(status);
312 @@ -950,6 +1122,8 @@ static int ocfs2_local_alloc_slide_windo
313
314 mlog_entry_void();
315
316 + ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
317 +
318 /* This will lock the main bitmap for us. */
319 status = ocfs2_local_alloc_reserve_for_window(osb,
320 &ac,
321 Index: linux-2.6.27/fs/ocfs2/localalloc.h
322 ===================================================================
323 --- linux-2.6.27.orig/fs/ocfs2/localalloc.h
324 +++ linux-2.6.27/fs/ocfs2/localalloc.h
325 @@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct
326 u32 *bit_off,
327 u32 *num_bits);
328
329 +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
330 + unsigned int num_clusters);
331 +void ocfs2_la_enable_worker(struct work_struct *work);
332 +
333 #endif /* OCFS2_LOCALALLOC_H */
334 Index: linux-2.6.27/fs/ocfs2/ocfs2.h
335 ===================================================================
336 --- linux-2.6.27.orig/fs/ocfs2/ocfs2.h
337 +++ linux-2.6.27/fs/ocfs2/ocfs2.h
338 @@ -176,9 +176,13 @@ struct ocfs2_alloc_stats
339
340 enum ocfs2_local_alloc_state
341 {
342 - OCFS2_LA_UNUSED = 0,
343 - OCFS2_LA_ENABLED,
344 - OCFS2_LA_DISABLED
345 + OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
346 + * this mountpoint. */
347 + OCFS2_LA_ENABLED, /* Local alloc is in use. */
348 + OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
349 + * of bits has been reduced. */
350 + OCFS2_LA_DISABLED /* Local alloc has temporarily been
351 + * disabled. */
352 };
353
354 enum ocfs2_mount_options
355 @@ -261,9 +265,20 @@ struct ocfs2_super
356 struct ocfs2_journal *journal;
357 unsigned long osb_commit_interval;
358
359 + struct delayed_work la_enable_wq;
360 +
361 + /*
362 + * Must hold local alloc i_mutex and osb->osb_lock to change
363 + * local_alloc_bits. Reads can be done under either lock.
364 + */
365 unsigned int local_alloc_bits;
366 - enum ocfs2_local_alloc_state local_alloc_state;
367 + unsigned int local_alloc_default_bits;
368 +
369 + enum ocfs2_local_alloc_state local_alloc_state; /* protected
370 + * by osb_lock */
371 +
372 struct buffer_head *local_alloc_bh;
373 +
374 u64 la_last_gd;
375
376 /* Next two fields are for local node slot recovery during
377 Index: linux-2.6.27/fs/ocfs2/suballoc.c
378 ===================================================================
379 --- linux-2.6.27.orig/fs/ocfs2/suballoc.c
380 +++ linux-2.6.27/fs/ocfs2/suballoc.c
381 @@ -117,7 +117,7 @@ static int ocfs2_reserve_clusters_with_l
382 u32 bits_wanted, u64 max_block,
383 struct ocfs2_alloc_context **ac);
384
385 -static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
386 +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
387 {
388 struct inode *inode = ac->ac_inode;
389
390 @@ -709,21 +709,11 @@ static int ocfs2_reserve_clusters_with_l
391 status = ocfs2_reserve_local_alloc_bits(osb,
392 bits_wanted,
393 *ac);
394 - if (status == -ENOSPC) {
395 - /* reserve_local_bits will return enospc with
396 - * the local alloc inode still locked, so we
397 - * can change this safely here. */
398 - mlog(0, "Disabling local alloc\n");
399 - /* We set to OCFS2_LA_DISABLED so that umount
400 - * can clean up what's left of the local
401 - * allocation */
402 - osb->local_alloc_state = OCFS2_LA_DISABLED;
403 - } else if (status == -EFBIG) {
404 + if (status == -EFBIG) {
405 /* The local alloc window is outside ac_max_block.
406 - * use the main bitmap, but don't disable
407 - * local alloc. */
408 + * use the main bitmap. */
409 status = -ENOSPC;
410 - } else if (status < 0) {
411 + } else if ((status < 0) && (status != -ENOSPC)) {
412 mlog_errno(status);
413 goto bail;
414 }
415 @@ -1045,6 +1035,7 @@ static int ocfs2_cluster_group_search(st
416 int ret;
417 u64 blkoff;
418 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
419 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
420 u16 tmp_off, tmp_found;
421 unsigned int max_bits, gd_cluster_off;
422
423 @@ -1096,6 +1087,12 @@ static int ocfs2_cluster_group_search(st
424 *bit_off = tmp_off;
425 *bits_found = tmp_found;
426 search = 0; /* success */
427 + } else if (tmp_found) {
428 + /*
429 + * Don't show bits which we'll be returning
430 + * for allocation to the local alloc bitmap.
431 + */
432 + ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
433 }
434 }
435
436 @@ -1902,9 +1899,15 @@ int ocfs2_free_clusters(handle_t *handle
437 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
438 bg_start_bit, bg_blkno,
439 num_clusters);
440 - if (status < 0)
441 + if (status < 0) {
442 mlog_errno(status);
443 + goto out;
444 + }
445 +
446 + ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
447 + num_clusters);
448
449 +out:
450 mlog_exit(status);
451 return status;
452 }
453 Index: linux-2.6.27/fs/ocfs2/suballoc.h
454 ===================================================================
455 --- linux-2.6.27.orig/fs/ocfs2/suballoc.h
456 +++ linux-2.6.27/fs/ocfs2/suballoc.h
457 @@ -158,6 +158,7 @@ static inline int ocfs2_is_cluster_bitma
458 * apis above. */
459 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
460 struct ocfs2_alloc_context *ac);
461 +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
462
463 /* given a cluster offset, calculate which block group it belongs to
464 * and return that block offset. */
465 Index: linux-2.6.27/fs/ocfs2/super.c
466 ===================================================================
467 --- linux-2.6.27.orig/fs/ocfs2/super.c
468 +++ linux-2.6.27/fs/ocfs2/super.c
469 @@ -655,7 +655,8 @@ static int ocfs2_fill_super(struct super
470 osb->s_atime_quantum = parsed_options.atime_quantum;
471 osb->preferred_slot = parsed_options.slot;
472 osb->osb_commit_interval = parsed_options.commit_interval;
473 - osb->local_alloc_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
474 + osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
475 + osb->local_alloc_bits = osb->local_alloc_default_bits;
476
477 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
478 if (status)
479 @@ -1465,6 +1466,7 @@ static int ocfs2_initialize_super(struct
480
481 osb->local_alloc_state = OCFS2_LA_UNUSED;
482 osb->local_alloc_bh = NULL;
483 + INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
484
485 init_waitqueue_head(&osb->osb_mount_event);
486