1 From: Mark Fasheh <mfasheh@suse.com>
2 Subject: ocfs2: throttle back local alloc when low on disk space
5 Ocfs2's local allocator disables itself for the duration of a mount point
6 when it has trouble allocating a large enough area from the primary bitmap.
7 That can cause performance problems, especially for disks which were only
8 temporarily full or fragmented. This patch allows for the allocator to
9 shrink it's window first, before being disabled. Later, it can also be
10 re-enabled so that any performance drop is minimized.
12 To do this, we allow the value of osb->local_alloc_bits to be shrunk when
13 needed. The default value is recorded in a mostly read-only variable so that
14 we can re-initialize when required.
16 Locking had to be updated so that we could protect changes to
17 local_alloc_bits. Mostly this involves protecting various local alloc values
18 with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
19 is used when the local allocator is has shrunk, but is not disabled. If the
20 available space dips below 1 megabyte, the local alloc file is disabled. In
21 either case, local alloc is re-enabled 30 seconds after the event, or when
22 an appropriate amount of bits is seen in the primary bitmap.
24 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
26 fs/ocfs2/localalloc.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++---
27 fs/ocfs2/localalloc.h | 4 +
28 fs/ocfs2/ocfs2.h | 23 +++++-
29 fs/ocfs2/suballoc.c | 31 ++++----
30 fs/ocfs2/suballoc.h | 1 +
31 fs/ocfs2/super.c | 4 +-
32 6 files changed, 230 insertions(+), 31 deletions(-)
34 Index: linux-2.6.27/fs/ocfs2/localalloc.c
35 ===================================================================
36 --- linux-2.6.27.orig/fs/ocfs2/localalloc.c
37 +++ linux-2.6.27/fs/ocfs2/localalloc.c
38 @@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window(
39 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
40 struct inode *local_alloc_inode);
42 +static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
44 + return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
45 + osb->local_alloc_state == OCFS2_LA_ENABLED);
48 +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
49 + unsigned int num_clusters)
51 + spin_lock(&osb->osb_lock);
52 + if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
53 + osb->local_alloc_state == OCFS2_LA_THROTTLED)
54 + if (num_clusters >= osb->local_alloc_default_bits) {
55 + cancel_delayed_work(&osb->la_enable_wq);
56 + osb->local_alloc_state = OCFS2_LA_ENABLED;
58 + spin_unlock(&osb->osb_lock);
61 +void ocfs2_la_enable_worker(struct work_struct *work)
63 + struct ocfs2_super *osb =
64 + container_of(work, struct ocfs2_super,
66 + spin_lock(&osb->osb_lock);
67 + osb->local_alloc_state = OCFS2_LA_ENABLED;
68 + spin_unlock(&osb->osb_lock);
72 * Tell us whether a given allocation should use the local alloc
73 * file. Otherwise, it has to go to the main bitmap.
75 + * This function does semi-dirty reads of local alloc size and state!
76 + * This is ok however, as the values are re-checked once under mutex.
78 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
80 - int la_bits = osb->local_alloc_bits;
84 + spin_lock(&osb->osb_lock);
85 + la_bits = osb->local_alloc_bits;
87 - if (osb->local_alloc_state != OCFS2_LA_ENABLED)
88 + if (!ocfs2_la_state_enabled(osb))
91 /* la_bits should be at least twice the size (in clusters) of
92 @@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct
94 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
95 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
96 + spin_unlock(&osb->osb_lock);
100 @@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct o
104 + cancel_delayed_work(&osb->la_enable_wq);
105 + flush_workqueue(ocfs2_wq);
107 if (osb->local_alloc_state == OCFS2_LA_UNUSED)
110 @@ -485,7 +524,7 @@ static int ocfs2_local_alloc_in_range(st
114 - * make sure we've got at least bitswanted contiguous bits in the
115 + * make sure we've got at least bits_wanted contiguous bits in the
116 * local alloc. You lose them when you drop i_mutex.
118 * We will add ourselves to the transaction passed in, but may start
119 @@ -516,16 +555,18 @@ int ocfs2_reserve_local_alloc_bits(struc
121 mutex_lock(&local_alloc_inode->i_mutex);
123 - if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
128 - if (bits_wanted > osb->local_alloc_bits) {
129 - mlog(0, "Asking for more than my max window size!\n");
131 + * We must double check state and allocator bits because
132 + * another process may have changed them while holding i_mutex.
134 + spin_lock(&osb->osb_lock);
135 + if (!ocfs2_la_state_enabled(osb) ||
136 + (bits_wanted > osb->local_alloc_bits)) {
137 + spin_unlock(&osb->osb_lock);
141 + spin_unlock(&osb->osb_lock);
143 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
145 @@ -553,6 +594,21 @@ int ocfs2_reserve_local_alloc_bits(struc
151 + * Under certain conditions, the window slide code
152 + * might have reduced the number of bits available or
153 + * disabled the the local alloc entirely. Re-check
154 + * here and return -ENOSPC if necessary.
157 + if (!ocfs2_la_state_enabled(osb))
160 + free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
161 + le32_to_cpu(alloc->id1.bitmap1.i_used);
162 + if (bits_wanted > free_bits)
166 if (ac->ac_max_block)
167 @@ -835,6 +891,85 @@ bail:
171 +enum ocfs2_la_event {
172 + OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
173 + OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
174 + * enough bits theoretically
175 + * free, but a contiguous
176 + * allocation could not be
178 + OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
179 + * enough bits free to satisfy
182 +#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
184 + * Given an event, calculate the size of our next local alloc window.
186 + * This should always be called under i_mutex of the local alloc inode
187 + * so that local alloc disabling doesn't race with processes trying to
188 + * use the allocator.
190 + * Returns the state which the local alloc was left in. This value can
191 + * be ignored by some paths.
193 +static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
194 + enum ocfs2_la_event event)
199 + spin_lock(&osb->osb_lock);
200 + if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
201 + WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
206 + * ENOSPC and fragmentation are treated similarly for now.
208 + if (event == OCFS2_LA_EVENT_ENOSPC ||
209 + event == OCFS2_LA_EVENT_FRAGMENTED) {
211 + * We ran out of contiguous space in the primary
212 + * bitmap. Drastically reduce the number of bits used
213 + * by local alloc until we have to disable it.
215 + bits = osb->local_alloc_bits >> 1;
216 + if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
218 + * By setting state to THROTTLED, we'll keep
219 + * the number of local alloc bits used down
220 + * until an event occurs which would give us
221 + * reason to assume the bitmap situation might
224 + osb->local_alloc_state = OCFS2_LA_THROTTLED;
225 + osb->local_alloc_bits = bits;
227 + osb->local_alloc_state = OCFS2_LA_DISABLED;
229 + queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
230 + OCFS2_LA_ENABLE_INTERVAL);
235 + * Don't increase the size of the local alloc window until we
236 + * know we might be able to fulfill the request. Otherwise, we
237 + * risk bouncing around the global bitmap during periods of
240 + if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
241 + osb->local_alloc_bits = osb->local_alloc_default_bits;
244 + state = osb->local_alloc_state;
245 + spin_unlock(&osb->osb_lock);
250 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
251 struct ocfs2_alloc_context **ac,
252 struct inode **bitmap_inode,
253 @@ -849,12 +984,21 @@ static int ocfs2_local_alloc_reserve_for
258 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
260 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
261 + if (status == -ENOSPC) {
262 + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
266 + ocfs2_free_ac_resource(*ac);
267 + memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
271 - if (status != -ENOSPC)
272 - mlog_errno(status);
273 + mlog_errno(status);
277 @@ -907,6 +1051,34 @@ static int ocfs2_local_alloc_new_window(
278 * the more specific cluster api to claim bits. */
279 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
280 &cluster_off, &cluster_count);
281 + if (status == -ENOSPC) {
284 + * Note: We could also try syncing the journal here to
285 + * allow use of any free bits which the current
286 + * transaction can't give us access to. --Mark
288 + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
292 + status = ocfs2_claim_clusters(osb, handle, ac,
293 + osb->local_alloc_bits,
296 + if (status == -ENOSPC)
299 + * We only shrunk the *minimum* number of in our
300 + * request - it's entirely possible that the allocator
301 + * might give us more than we asked for.
304 + spin_lock(&osb->osb_lock);
305 + osb->local_alloc_bits = cluster_count;
306 + spin_unlock(&osb->osb_lock);
310 if (status != -ENOSPC)
312 @@ -950,6 +1122,8 @@ static int ocfs2_local_alloc_slide_windo
316 + ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
318 /* This will lock the main bitmap for us. */
319 status = ocfs2_local_alloc_reserve_for_window(osb,
321 Index: linux-2.6.27/fs/ocfs2/localalloc.h
322 ===================================================================
323 --- linux-2.6.27.orig/fs/ocfs2/localalloc.h
324 +++ linux-2.6.27/fs/ocfs2/localalloc.h
325 @@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct
329 +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
330 + unsigned int num_clusters);
331 +void ocfs2_la_enable_worker(struct work_struct *work);
333 #endif /* OCFS2_LOCALALLOC_H */
334 Index: linux-2.6.27/fs/ocfs2/ocfs2.h
335 ===================================================================
336 --- linux-2.6.27.orig/fs/ocfs2/ocfs2.h
337 +++ linux-2.6.27/fs/ocfs2/ocfs2.h
338 @@ -176,9 +176,13 @@ struct ocfs2_alloc_stats
340 enum ocfs2_local_alloc_state
342 - OCFS2_LA_UNUSED = 0,
345 + OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
346 + * this mountpoint. */
347 + OCFS2_LA_ENABLED, /* Local alloc is in use. */
348 + OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
349 + * of bits has been reduced. */
350 + OCFS2_LA_DISABLED /* Local alloc has temporarily been
354 enum ocfs2_mount_options
355 @@ -261,9 +265,20 @@ struct ocfs2_super
356 struct ocfs2_journal *journal;
357 unsigned long osb_commit_interval;
359 + struct delayed_work la_enable_wq;
362 + * Must hold local alloc i_mutex and osb->osb_lock to change
363 + * local_alloc_bits. Reads can be done under either lock.
365 unsigned int local_alloc_bits;
366 - enum ocfs2_local_alloc_state local_alloc_state;
367 + unsigned int local_alloc_default_bits;
369 + enum ocfs2_local_alloc_state local_alloc_state; /* protected
372 struct buffer_head *local_alloc_bh;
376 /* Next two fields are for local node slot recovery during
377 Index: linux-2.6.27/fs/ocfs2/suballoc.c
378 ===================================================================
379 --- linux-2.6.27.orig/fs/ocfs2/suballoc.c
380 +++ linux-2.6.27/fs/ocfs2/suballoc.c
381 @@ -117,7 +117,7 @@ static int ocfs2_reserve_clusters_with_l
382 u32 bits_wanted, u64 max_block,
383 struct ocfs2_alloc_context **ac);
385 -static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
386 +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
388 struct inode *inode = ac->ac_inode;
390 @@ -709,21 +709,11 @@ static int ocfs2_reserve_clusters_with_l
391 status = ocfs2_reserve_local_alloc_bits(osb,
394 - if (status == -ENOSPC) {
395 - /* reserve_local_bits will return enospc with
396 - * the local alloc inode still locked, so we
397 - * can change this safely here. */
398 - mlog(0, "Disabling local alloc\n");
399 - /* We set to OCFS2_LA_DISABLED so that umount
400 - * can clean up what's left of the local
402 - osb->local_alloc_state = OCFS2_LA_DISABLED;
403 - } else if (status == -EFBIG) {
404 + if (status == -EFBIG) {
405 /* The local alloc window is outside ac_max_block.
406 - * use the main bitmap, but don't disable
408 + * use the main bitmap. */
410 - } else if (status < 0) {
411 + } else if ((status < 0) && (status != -ENOSPC)) {
415 @@ -1045,6 +1035,7 @@ static int ocfs2_cluster_group_search(st
418 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
419 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
420 u16 tmp_off, tmp_found;
421 unsigned int max_bits, gd_cluster_off;
423 @@ -1096,6 +1087,12 @@ static int ocfs2_cluster_group_search(st
425 *bits_found = tmp_found;
426 search = 0; /* success */
427 + } else if (tmp_found) {
429 + * Don't show bits which we'll be returning
430 + * for allocation to the local alloc bitmap.
432 + ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
436 @@ -1902,9 +1899,15 @@ int ocfs2_free_clusters(handle_t *handle
437 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
438 bg_start_bit, bg_blkno,
446 + ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
453 Index: linux-2.6.27/fs/ocfs2/suballoc.h
454 ===================================================================
455 --- linux-2.6.27.orig/fs/ocfs2/suballoc.h
456 +++ linux-2.6.27/fs/ocfs2/suballoc.h
457 @@ -158,6 +158,7 @@ static inline int ocfs2_is_cluster_bitma
459 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
460 struct ocfs2_alloc_context *ac);
461 +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
463 /* given a cluster offset, calculate which block group it belongs to
464 * and return that block offset. */
465 Index: linux-2.6.27/fs/ocfs2/super.c
466 ===================================================================
467 --- linux-2.6.27.orig/fs/ocfs2/super.c
468 +++ linux-2.6.27/fs/ocfs2/super.c
469 @@ -655,7 +655,8 @@ static int ocfs2_fill_super(struct super
470 osb->s_atime_quantum = parsed_options.atime_quantum;
471 osb->preferred_slot = parsed_options.slot;
472 osb->osb_commit_interval = parsed_options.commit_interval;
473 - osb->local_alloc_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
474 + osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
475 + osb->local_alloc_bits = osb->local_alloc_default_bits;
477 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
479 @@ -1465,6 +1466,7 @@ static int ocfs2_initialize_super(struct
481 osb->local_alloc_state = OCFS2_LA_UNUSED;
482 osb->local_alloc_bh = NULL;
483 + INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
485 init_waitqueue_head(&osb->osb_mount_event);