]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.suse/ocfs2-throttle-back-local-alloc-when-low-on-disk-sp.patch
Move xen patchset to new version's subdir.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.suse / ocfs2-throttle-back-local-alloc-when-low-on-disk-sp.patch
CommitLineData
00e5a55c
BS
1From: Mark Fasheh <mfasheh@suse.com>
2Subject: ocfs2: throttle back local alloc when low on disk space
3Patch-mainline: 2.6.28
4
5Ocfs2's local allocator disables itself for the duration of a mount point
6when it has trouble allocating a large enough area from the primary bitmap.
7That can cause performance problems, especially for disks which were only
8temporarily full or fragmented. This patch allows for the allocator to
9shrink it's window first, before being disabled. Later, it can also be
10re-enabled so that any performance drop is minimized.
11
12To do this, we allow the value of osb->local_alloc_bits to be shrunk when
13needed. The default value is recorded in a mostly read-only variable so that
14we can re-initialize when required.
15
16Locking had to be updated so that we could protect changes to
17local_alloc_bits. Mostly this involves protecting various local alloc values
18with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
19is used when the local allocator is has shrunk, but is not disabled. If the
20available space dips below 1 megabyte, the local alloc file is disabled. In
21either case, local alloc is re-enabled 30 seconds after the event, or when
22an appropriate amount of bits is seen in the primary bitmap.
23
24Signed-off-by: Mark Fasheh <mfasheh@suse.com>
25---
26 fs/ocfs2/localalloc.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++---
27 fs/ocfs2/localalloc.h | 4 +
28 fs/ocfs2/ocfs2.h | 23 +++++-
29 fs/ocfs2/suballoc.c | 31 ++++----
30 fs/ocfs2/suballoc.h | 1 +
31 fs/ocfs2/super.c | 4 +-
32 6 files changed, 230 insertions(+), 31 deletions(-)
33
34Index: linux-2.6.27/fs/ocfs2/localalloc.c
35===================================================================
36--- linux-2.6.27.orig/fs/ocfs2/localalloc.c
37+++ linux-2.6.27/fs/ocfs2/localalloc.c
38@@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window(
39 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
40 struct inode *local_alloc_inode);
41
42+static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
43+{
44+ return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
45+ osb->local_alloc_state == OCFS2_LA_ENABLED);
46+}
47+
48+void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
49+ unsigned int num_clusters)
50+{
51+ spin_lock(&osb->osb_lock);
52+ if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
53+ osb->local_alloc_state == OCFS2_LA_THROTTLED)
54+ if (num_clusters >= osb->local_alloc_default_bits) {
55+ cancel_delayed_work(&osb->la_enable_wq);
56+ osb->local_alloc_state = OCFS2_LA_ENABLED;
57+ }
58+ spin_unlock(&osb->osb_lock);
59+}
60+
61+void ocfs2_la_enable_worker(struct work_struct *work)
62+{
63+ struct ocfs2_super *osb =
64+ container_of(work, struct ocfs2_super,
65+ la_enable_wq.work);
66+ spin_lock(&osb->osb_lock);
67+ osb->local_alloc_state = OCFS2_LA_ENABLED;
68+ spin_unlock(&osb->osb_lock);
69+}
70+
71 /*
72 * Tell us whether a given allocation should use the local alloc
73 * file. Otherwise, it has to go to the main bitmap.
74+ *
75+ * This function does semi-dirty reads of local alloc size and state!
76+ * This is ok however, as the values are re-checked once under mutex.
77 */
78 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
79 {
80- int la_bits = osb->local_alloc_bits;
81 int ret = 0;
82+ int la_bits;
83+
84+ spin_lock(&osb->osb_lock);
85+ la_bits = osb->local_alloc_bits;
86
87- if (osb->local_alloc_state != OCFS2_LA_ENABLED)
88+ if (!ocfs2_la_state_enabled(osb))
89 goto bail;
90
91 /* la_bits should be at least twice the size (in clusters) of
92@@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct
93 bail:
94 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
95 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
96+ spin_unlock(&osb->osb_lock);
97 return ret;
98 }
99
100@@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct o
101
102 mlog_entry_void();
103
104+ cancel_delayed_work(&osb->la_enable_wq);
105+ flush_workqueue(ocfs2_wq);
106+
107 if (osb->local_alloc_state == OCFS2_LA_UNUSED)
108 goto out;
109
110@@ -485,7 +524,7 @@ static int ocfs2_local_alloc_in_range(st
111 }
112
113 /*
114- * make sure we've got at least bitswanted contiguous bits in the
115+ * make sure we've got at least bits_wanted contiguous bits in the
116 * local alloc. You lose them when you drop i_mutex.
117 *
118 * We will add ourselves to the transaction passed in, but may start
119@@ -516,16 +555,18 @@ int ocfs2_reserve_local_alloc_bits(struc
120
121 mutex_lock(&local_alloc_inode->i_mutex);
122
123- if (osb->local_alloc_state != OCFS2_LA_ENABLED) {
124- status = -ENOSPC;
125- goto bail;
126- }
127-
128- if (bits_wanted > osb->local_alloc_bits) {
129- mlog(0, "Asking for more than my max window size!\n");
130+ /*
131+ * We must double check state and allocator bits because
132+ * another process may have changed them while holding i_mutex.
133+ */
134+ spin_lock(&osb->osb_lock);
135+ if (!ocfs2_la_state_enabled(osb) ||
136+ (bits_wanted > osb->local_alloc_bits)) {
137+ spin_unlock(&osb->osb_lock);
138 status = -ENOSPC;
139 goto bail;
140 }
141+ spin_unlock(&osb->osb_lock);
142
143 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
144
145@@ -553,6 +594,21 @@ int ocfs2_reserve_local_alloc_bits(struc
146 mlog_errno(status);
147 goto bail;
148 }
149+
150+ /*
151+ * Under certain conditions, the window slide code
152+ * might have reduced the number of bits available or
153+ * disabled the the local alloc entirely. Re-check
154+ * here and return -ENOSPC if necessary.
155+ */
156+ status = -ENOSPC;
157+ if (!ocfs2_la_state_enabled(osb))
158+ goto bail;
159+
160+ free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
161+ le32_to_cpu(alloc->id1.bitmap1.i_used);
162+ if (bits_wanted > free_bits)
163+ goto bail;
164 }
165
166 if (ac->ac_max_block)
167@@ -835,6 +891,85 @@ bail:
168 return status;
169 }
170
171+enum ocfs2_la_event {
172+ OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
173+ OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
174+ * enough bits theoretically
175+ * free, but a contiguous
176+ * allocation could not be
177+ * found. */
178+ OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
179+ * enough bits free to satisfy
180+ * our request. */
181+};
182+#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
183+/*
184+ * Given an event, calculate the size of our next local alloc window.
185+ *
186+ * This should always be called under i_mutex of the local alloc inode
187+ * so that local alloc disabling doesn't race with processes trying to
188+ * use the allocator.
189+ *
190+ * Returns the state which the local alloc was left in. This value can
191+ * be ignored by some paths.
192+ */
193+static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
194+ enum ocfs2_la_event event)
195+{
196+ unsigned int bits;
197+ int state;
198+
199+ spin_lock(&osb->osb_lock);
200+ if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
201+ WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
202+ goto out_unlock;
203+ }
204+
205+ /*
206+ * ENOSPC and fragmentation are treated similarly for now.
207+ */
208+ if (event == OCFS2_LA_EVENT_ENOSPC ||
209+ event == OCFS2_LA_EVENT_FRAGMENTED) {
210+ /*
211+ * We ran out of contiguous space in the primary
212+ * bitmap. Drastically reduce the number of bits used
213+ * by local alloc until we have to disable it.
214+ */
215+ bits = osb->local_alloc_bits >> 1;
216+ if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
217+ /*
218+ * By setting state to THROTTLED, we'll keep
219+ * the number of local alloc bits used down
220+ * until an event occurs which would give us
221+ * reason to assume the bitmap situation might
222+ * have changed.
223+ */
224+ osb->local_alloc_state = OCFS2_LA_THROTTLED;
225+ osb->local_alloc_bits = bits;
226+ } else {
227+ osb->local_alloc_state = OCFS2_LA_DISABLED;
228+ }
229+ queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
230+ OCFS2_LA_ENABLE_INTERVAL);
231+ goto out_unlock;
232+ }
233+
234+ /*
235+ * Don't increase the size of the local alloc window until we
236+ * know we might be able to fulfill the request. Otherwise, we
237+ * risk bouncing around the global bitmap during periods of
238+ * low space.
239+ */
240+ if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
241+ osb->local_alloc_bits = osb->local_alloc_default_bits;
242+
243+out_unlock:
244+ state = osb->local_alloc_state;
245+ spin_unlock(&osb->osb_lock);
246+
247+ return state;
248+}
249+
250 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
251 struct ocfs2_alloc_context **ac,
252 struct inode **bitmap_inode,
253@@ -849,12 +984,21 @@ static int ocfs2_local_alloc_reserve_for
254 goto bail;
255 }
256
257+retry_enospc:
258 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
259
260 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
261+ if (status == -ENOSPC) {
262+ if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
263+ OCFS2_LA_DISABLED)
264+ goto bail;
265+
266+ ocfs2_free_ac_resource(*ac);
267+ memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
268+ goto retry_enospc;
269+ }
270 if (status < 0) {
271- if (status != -ENOSPC)
272- mlog_errno(status);
273+ mlog_errno(status);
274 goto bail;
275 }
276
277@@ -907,6 +1051,34 @@ static int ocfs2_local_alloc_new_window(
278 * the more specific cluster api to claim bits. */
279 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
280 &cluster_off, &cluster_count);
281+ if (status == -ENOSPC) {
282+retry_enospc:
283+ /*
284+ * Note: We could also try syncing the journal here to
285+ * allow use of any free bits which the current
286+ * transaction can't give us access to. --Mark
287+ */
288+ if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
289+ OCFS2_LA_DISABLED)
290+ goto bail;
291+
292+ status = ocfs2_claim_clusters(osb, handle, ac,
293+ osb->local_alloc_bits,
294+ &cluster_off,
295+ &cluster_count);
296+ if (status == -ENOSPC)
297+ goto retry_enospc;
298+ /*
299+ * We only shrunk the *minimum* number of in our
300+ * request - it's entirely possible that the allocator
301+ * might give us more than we asked for.
302+ */
303+ if (status == 0) {
304+ spin_lock(&osb->osb_lock);
305+ osb->local_alloc_bits = cluster_count;
306+ spin_unlock(&osb->osb_lock);
307+ }
308+ }
309 if (status < 0) {
310 if (status != -ENOSPC)
311 mlog_errno(status);
312@@ -950,6 +1122,8 @@ static int ocfs2_local_alloc_slide_windo
313
314 mlog_entry_void();
315
316+ ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
317+
318 /* This will lock the main bitmap for us. */
319 status = ocfs2_local_alloc_reserve_for_window(osb,
320 &ac,
321Index: linux-2.6.27/fs/ocfs2/localalloc.h
322===================================================================
323--- linux-2.6.27.orig/fs/ocfs2/localalloc.h
324+++ linux-2.6.27/fs/ocfs2/localalloc.h
325@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct
326 u32 *bit_off,
327 u32 *num_bits);
328
329+void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
330+ unsigned int num_clusters);
331+void ocfs2_la_enable_worker(struct work_struct *work);
332+
333 #endif /* OCFS2_LOCALALLOC_H */
334Index: linux-2.6.27/fs/ocfs2/ocfs2.h
335===================================================================
336--- linux-2.6.27.orig/fs/ocfs2/ocfs2.h
337+++ linux-2.6.27/fs/ocfs2/ocfs2.h
338@@ -176,9 +176,13 @@ struct ocfs2_alloc_stats
339
340 enum ocfs2_local_alloc_state
341 {
342- OCFS2_LA_UNUSED = 0,
343- OCFS2_LA_ENABLED,
344- OCFS2_LA_DISABLED
345+ OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
346+ * this mountpoint. */
347+ OCFS2_LA_ENABLED, /* Local alloc is in use. */
348+ OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
349+ * of bits has been reduced. */
350+ OCFS2_LA_DISABLED /* Local alloc has temporarily been
351+ * disabled. */
352 };
353
354 enum ocfs2_mount_options
355@@ -261,9 +265,20 @@ struct ocfs2_super
356 struct ocfs2_journal *journal;
357 unsigned long osb_commit_interval;
358
359+ struct delayed_work la_enable_wq;
360+
361+ /*
362+ * Must hold local alloc i_mutex and osb->osb_lock to change
363+ * local_alloc_bits. Reads can be done under either lock.
364+ */
365 unsigned int local_alloc_bits;
366- enum ocfs2_local_alloc_state local_alloc_state;
367+ unsigned int local_alloc_default_bits;
368+
369+ enum ocfs2_local_alloc_state local_alloc_state; /* protected
370+ * by osb_lock */
371+
372 struct buffer_head *local_alloc_bh;
373+
374 u64 la_last_gd;
375
376 /* Next two fields are for local node slot recovery during
377Index: linux-2.6.27/fs/ocfs2/suballoc.c
378===================================================================
379--- linux-2.6.27.orig/fs/ocfs2/suballoc.c
380+++ linux-2.6.27/fs/ocfs2/suballoc.c
381@@ -117,7 +117,7 @@ static int ocfs2_reserve_clusters_with_l
382 u32 bits_wanted, u64 max_block,
383 struct ocfs2_alloc_context **ac);
384
385-static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
386+void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
387 {
388 struct inode *inode = ac->ac_inode;
389
390@@ -709,21 +709,11 @@ static int ocfs2_reserve_clusters_with_l
391 status = ocfs2_reserve_local_alloc_bits(osb,
392 bits_wanted,
393 *ac);
394- if (status == -ENOSPC) {
395- /* reserve_local_bits will return enospc with
396- * the local alloc inode still locked, so we
397- * can change this safely here. */
398- mlog(0, "Disabling local alloc\n");
399- /* We set to OCFS2_LA_DISABLED so that umount
400- * can clean up what's left of the local
401- * allocation */
402- osb->local_alloc_state = OCFS2_LA_DISABLED;
403- } else if (status == -EFBIG) {
404+ if (status == -EFBIG) {
405 /* The local alloc window is outside ac_max_block.
406- * use the main bitmap, but don't disable
407- * local alloc. */
408+ * use the main bitmap. */
409 status = -ENOSPC;
410- } else if (status < 0) {
411+ } else if ((status < 0) && (status != -ENOSPC)) {
412 mlog_errno(status);
413 goto bail;
414 }
415@@ -1045,6 +1035,7 @@ static int ocfs2_cluster_group_search(st
416 int ret;
417 u64 blkoff;
418 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
419+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
420 u16 tmp_off, tmp_found;
421 unsigned int max_bits, gd_cluster_off;
422
423@@ -1096,6 +1087,12 @@ static int ocfs2_cluster_group_search(st
424 *bit_off = tmp_off;
425 *bits_found = tmp_found;
426 search = 0; /* success */
427+ } else if (tmp_found) {
428+ /*
429+ * Don't show bits which we'll be returning
430+ * for allocation to the local alloc bitmap.
431+ */
432+ ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
433 }
434 }
435
436@@ -1902,9 +1899,15 @@ int ocfs2_free_clusters(handle_t *handle
437 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
438 bg_start_bit, bg_blkno,
439 num_clusters);
440- if (status < 0)
441+ if (status < 0) {
442 mlog_errno(status);
443+ goto out;
444+ }
445+
446+ ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
447+ num_clusters);
448
449+out:
450 mlog_exit(status);
451 return status;
452 }
453Index: linux-2.6.27/fs/ocfs2/suballoc.h
454===================================================================
455--- linux-2.6.27.orig/fs/ocfs2/suballoc.h
456+++ linux-2.6.27/fs/ocfs2/suballoc.h
457@@ -158,6 +158,7 @@ static inline int ocfs2_is_cluster_bitma
458 * apis above. */
459 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
460 struct ocfs2_alloc_context *ac);
461+void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
462
463 /* given a cluster offset, calculate which block group it belongs to
464 * and return that block offset. */
465Index: linux-2.6.27/fs/ocfs2/super.c
466===================================================================
467--- linux-2.6.27.orig/fs/ocfs2/super.c
468+++ linux-2.6.27/fs/ocfs2/super.c
469@@ -655,7 +655,8 @@ static int ocfs2_fill_super(struct super
470 osb->s_atime_quantum = parsed_options.atime_quantum;
471 osb->preferred_slot = parsed_options.slot;
472 osb->osb_commit_interval = parsed_options.commit_interval;
473- osb->local_alloc_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
474+ osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
475+ osb->local_alloc_bits = osb->local_alloc_default_bits;
476
477 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
478 if (status)
479@@ -1465,6 +1466,7 @@ static int ocfs2_initialize_super(struct
480
481 osb->local_alloc_state = OCFS2_LA_UNUSED;
482 osb->local_alloc_bh = NULL;
483+ INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
484
485 init_waitqueue_head(&osb->osb_mount_event);
486