]>
Commit | Line | Data |
---|---|---|
00e5a55c BS |
1 | From: Mark Fasheh <mfasheh@suse.com> |
2 | Subject: ocfs2: throttle back local alloc when low on disk space | |
3 | Patch-mainline: 2.6.28 | |
4 | ||
5 | Ocfs2's local allocator disables itself for the duration of a mount point | |
6 | when it has trouble allocating a large enough area from the primary bitmap. | |
7 | That can cause performance problems, especially for disks which were only | |
8 | temporarily full or fragmented. This patch allows for the allocator to | |
9 | shrink it's window first, before being disabled. Later, it can also be | |
10 | re-enabled so that any performance drop is minimized. | |
11 | ||
12 | To do this, we allow the value of osb->local_alloc_bits to be shrunk when | |
13 | needed. The default value is recorded in a mostly read-only variable so that | |
14 | we can re-initialize when required. | |
15 | ||
16 | Locking had to be updated so that we could protect changes to | |
17 | local_alloc_bits. Mostly this involves protecting various local alloc values | |
18 | with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which | |
19 | is used when the local allocator is has shrunk, but is not disabled. If the | |
20 | available space dips below 1 megabyte, the local alloc file is disabled. In | |
21 | either case, local alloc is re-enabled 30 seconds after the event, or when | |
22 | an appropriate amount of bits is seen in the primary bitmap. | |
23 | ||
24 | Signed-off-by: Mark Fasheh <mfasheh@suse.com> | |
25 | --- | |
26 | fs/ocfs2/localalloc.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++--- | |
27 | fs/ocfs2/localalloc.h | 4 + | |
28 | fs/ocfs2/ocfs2.h | 23 +++++- | |
29 | fs/ocfs2/suballoc.c | 31 ++++---- | |
30 | fs/ocfs2/suballoc.h | 1 + | |
31 | fs/ocfs2/super.c | 4 +- | |
32 | 6 files changed, 230 insertions(+), 31 deletions(-) | |
33 | ||
34 | Index: linux-2.6.27/fs/ocfs2/localalloc.c | |
35 | =================================================================== | |
36 | --- linux-2.6.27.orig/fs/ocfs2/localalloc.c | |
37 | +++ linux-2.6.27/fs/ocfs2/localalloc.c | |
38 | @@ -73,16 +73,51 @@ static int ocfs2_local_alloc_new_window( | |
39 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |
40 | struct inode *local_alloc_inode); | |
41 | ||
42 | +static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | |
43 | +{ | |
44 | + return (osb->local_alloc_state == OCFS2_LA_THROTTLED || | |
45 | + osb->local_alloc_state == OCFS2_LA_ENABLED); | |
46 | +} | |
47 | + | |
48 | +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | |
49 | + unsigned int num_clusters) | |
50 | +{ | |
51 | + spin_lock(&osb->osb_lock); | |
52 | + if (osb->local_alloc_state == OCFS2_LA_DISABLED || | |
53 | + osb->local_alloc_state == OCFS2_LA_THROTTLED) | |
54 | + if (num_clusters >= osb->local_alloc_default_bits) { | |
55 | + cancel_delayed_work(&osb->la_enable_wq); | |
56 | + osb->local_alloc_state = OCFS2_LA_ENABLED; | |
57 | + } | |
58 | + spin_unlock(&osb->osb_lock); | |
59 | +} | |
60 | + | |
61 | +void ocfs2_la_enable_worker(struct work_struct *work) | |
62 | +{ | |
63 | + struct ocfs2_super *osb = | |
64 | + container_of(work, struct ocfs2_super, | |
65 | + la_enable_wq.work); | |
66 | + spin_lock(&osb->osb_lock); | |
67 | + osb->local_alloc_state = OCFS2_LA_ENABLED; | |
68 | + spin_unlock(&osb->osb_lock); | |
69 | +} | |
70 | + | |
71 | /* | |
72 | * Tell us whether a given allocation should use the local alloc | |
73 | * file. Otherwise, it has to go to the main bitmap. | |
74 | + * | |
75 | + * This function does semi-dirty reads of local alloc size and state! | |
76 | + * This is ok however, as the values are re-checked once under mutex. | |
77 | */ | |
78 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | |
79 | { | |
80 | - int la_bits = osb->local_alloc_bits; | |
81 | int ret = 0; | |
82 | + int la_bits; | |
83 | + | |
84 | + spin_lock(&osb->osb_lock); | |
85 | + la_bits = osb->local_alloc_bits; | |
86 | ||
87 | - if (osb->local_alloc_state != OCFS2_LA_ENABLED) | |
88 | + if (!ocfs2_la_state_enabled(osb)) | |
89 | goto bail; | |
90 | ||
91 | /* la_bits should be at least twice the size (in clusters) of | |
92 | @@ -96,6 +131,7 @@ int ocfs2_alloc_should_use_local(struct | |
93 | bail: | |
94 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | |
95 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | |
96 | + spin_unlock(&osb->osb_lock); | |
97 | return ret; | |
98 | } | |
99 | ||
100 | @@ -208,6 +244,9 @@ void ocfs2_shutdown_local_alloc(struct o | |
101 | ||
102 | mlog_entry_void(); | |
103 | ||
104 | + cancel_delayed_work(&osb->la_enable_wq); | |
105 | + flush_workqueue(ocfs2_wq); | |
106 | + | |
107 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | |
108 | goto out; | |
109 | ||
110 | @@ -485,7 +524,7 @@ static int ocfs2_local_alloc_in_range(st | |
111 | } | |
112 | ||
113 | /* | |
114 | - * make sure we've got at least bitswanted contiguous bits in the | |
115 | + * make sure we've got at least bits_wanted contiguous bits in the | |
116 | * local alloc. You lose them when you drop i_mutex. | |
117 | * | |
118 | * We will add ourselves to the transaction passed in, but may start | |
119 | @@ -516,16 +555,18 @@ int ocfs2_reserve_local_alloc_bits(struc | |
120 | ||
121 | mutex_lock(&local_alloc_inode->i_mutex); | |
122 | ||
123 | - if (osb->local_alloc_state != OCFS2_LA_ENABLED) { | |
124 | - status = -ENOSPC; | |
125 | - goto bail; | |
126 | - } | |
127 | - | |
128 | - if (bits_wanted > osb->local_alloc_bits) { | |
129 | - mlog(0, "Asking for more than my max window size!\n"); | |
130 | + /* | |
131 | + * We must double check state and allocator bits because | |
132 | + * another process may have changed them while holding i_mutex. | |
133 | + */ | |
134 | + spin_lock(&osb->osb_lock); | |
135 | + if (!ocfs2_la_state_enabled(osb) || | |
136 | + (bits_wanted > osb->local_alloc_bits)) { | |
137 | + spin_unlock(&osb->osb_lock); | |
138 | status = -ENOSPC; | |
139 | goto bail; | |
140 | } | |
141 | + spin_unlock(&osb->osb_lock); | |
142 | ||
143 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | |
144 | ||
145 | @@ -553,6 +594,21 @@ int ocfs2_reserve_local_alloc_bits(struc | |
146 | mlog_errno(status); | |
147 | goto bail; | |
148 | } | |
149 | + | |
150 | + /* | |
151 | + * Under certain conditions, the window slide code | |
152 | + * might have reduced the number of bits available or | |
153 | + * disabled the the local alloc entirely. Re-check | |
154 | + * here and return -ENOSPC if necessary. | |
155 | + */ | |
156 | + status = -ENOSPC; | |
157 | + if (!ocfs2_la_state_enabled(osb)) | |
158 | + goto bail; | |
159 | + | |
160 | + free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - | |
161 | + le32_to_cpu(alloc->id1.bitmap1.i_used); | |
162 | + if (bits_wanted > free_bits) | |
163 | + goto bail; | |
164 | } | |
165 | ||
166 | if (ac->ac_max_block) | |
167 | @@ -835,6 +891,85 @@ bail: | |
168 | return status; | |
169 | } | |
170 | ||
171 | +enum ocfs2_la_event { | |
172 | + OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ | |
173 | + OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has | |
174 | + * enough bits theoretically | |
175 | + * free, but a contiguous | |
176 | + * allocation could not be | |
177 | + * found. */ | |
178 | + OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have | |
179 | + * enough bits free to satisfy | |
180 | + * our request. */ | |
181 | +}; | |
182 | +#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) | |
183 | +/* | |
184 | + * Given an event, calculate the size of our next local alloc window. | |
185 | + * | |
186 | + * This should always be called under i_mutex of the local alloc inode | |
187 | + * so that local alloc disabling doesn't race with processes trying to | |
188 | + * use the allocator. | |
189 | + * | |
190 | + * Returns the state which the local alloc was left in. This value can | |
191 | + * be ignored by some paths. | |
192 | + */ | |
193 | +static int ocfs2_recalc_la_window(struct ocfs2_super *osb, | |
194 | + enum ocfs2_la_event event) | |
195 | +{ | |
196 | + unsigned int bits; | |
197 | + int state; | |
198 | + | |
199 | + spin_lock(&osb->osb_lock); | |
200 | + if (osb->local_alloc_state == OCFS2_LA_DISABLED) { | |
201 | + WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); | |
202 | + goto out_unlock; | |
203 | + } | |
204 | + | |
205 | + /* | |
206 | + * ENOSPC and fragmentation are treated similarly for now. | |
207 | + */ | |
208 | + if (event == OCFS2_LA_EVENT_ENOSPC || | |
209 | + event == OCFS2_LA_EVENT_FRAGMENTED) { | |
210 | + /* | |
211 | + * We ran out of contiguous space in the primary | |
212 | + * bitmap. Drastically reduce the number of bits used | |
213 | + * by local alloc until we have to disable it. | |
214 | + */ | |
215 | + bits = osb->local_alloc_bits >> 1; | |
216 | + if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { | |
217 | + /* | |
218 | + * By setting state to THROTTLED, we'll keep | |
219 | + * the number of local alloc bits used down | |
220 | + * until an event occurs which would give us | |
221 | + * reason to assume the bitmap situation might | |
222 | + * have changed. | |
223 | + */ | |
224 | + osb->local_alloc_state = OCFS2_LA_THROTTLED; | |
225 | + osb->local_alloc_bits = bits; | |
226 | + } else { | |
227 | + osb->local_alloc_state = OCFS2_LA_DISABLED; | |
228 | + } | |
229 | + queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, | |
230 | + OCFS2_LA_ENABLE_INTERVAL); | |
231 | + goto out_unlock; | |
232 | + } | |
233 | + | |
234 | + /* | |
235 | + * Don't increase the size of the local alloc window until we | |
236 | + * know we might be able to fulfill the request. Otherwise, we | |
237 | + * risk bouncing around the global bitmap during periods of | |
238 | + * low space. | |
239 | + */ | |
240 | + if (osb->local_alloc_state != OCFS2_LA_THROTTLED) | |
241 | + osb->local_alloc_bits = osb->local_alloc_default_bits; | |
242 | + | |
243 | +out_unlock: | |
244 | + state = osb->local_alloc_state; | |
245 | + spin_unlock(&osb->osb_lock); | |
246 | + | |
247 | + return state; | |
248 | +} | |
249 | + | |
250 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |
251 | struct ocfs2_alloc_context **ac, | |
252 | struct inode **bitmap_inode, | |
253 | @@ -849,12 +984,21 @@ static int ocfs2_local_alloc_reserve_for | |
254 | goto bail; | |
255 | } | |
256 | ||
257 | +retry_enospc: | |
258 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | |
259 | ||
260 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | |
261 | + if (status == -ENOSPC) { | |
262 | + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | |
263 | + OCFS2_LA_DISABLED) | |
264 | + goto bail; | |
265 | + | |
266 | + ocfs2_free_ac_resource(*ac); | |
267 | + memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); | |
268 | + goto retry_enospc; | |
269 | + } | |
270 | if (status < 0) { | |
271 | - if (status != -ENOSPC) | |
272 | - mlog_errno(status); | |
273 | + mlog_errno(status); | |
274 | goto bail; | |
275 | } | |
276 | ||
277 | @@ -907,6 +1051,34 @@ static int ocfs2_local_alloc_new_window( | |
278 | * the more specific cluster api to claim bits. */ | |
279 | status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, | |
280 | &cluster_off, &cluster_count); | |
281 | + if (status == -ENOSPC) { | |
282 | +retry_enospc: | |
283 | + /* | |
284 | + * Note: We could also try syncing the journal here to | |
285 | + * allow use of any free bits which the current | |
286 | + * transaction can't give us access to. --Mark | |
287 | + */ | |
288 | + if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == | |
289 | + OCFS2_LA_DISABLED) | |
290 | + goto bail; | |
291 | + | |
292 | + status = ocfs2_claim_clusters(osb, handle, ac, | |
293 | + osb->local_alloc_bits, | |
294 | + &cluster_off, | |
295 | + &cluster_count); | |
296 | + if (status == -ENOSPC) | |
297 | + goto retry_enospc; | |
298 | + /* | |
299 | + * We only shrunk the *minimum* number of in our | |
300 | + * request - it's entirely possible that the allocator | |
301 | + * might give us more than we asked for. | |
302 | + */ | |
303 | + if (status == 0) { | |
304 | + spin_lock(&osb->osb_lock); | |
305 | + osb->local_alloc_bits = cluster_count; | |
306 | + spin_unlock(&osb->osb_lock); | |
307 | + } | |
308 | + } | |
309 | if (status < 0) { | |
310 | if (status != -ENOSPC) | |
311 | mlog_errno(status); | |
312 | @@ -950,6 +1122,8 @@ static int ocfs2_local_alloc_slide_windo | |
313 | ||
314 | mlog_entry_void(); | |
315 | ||
316 | + ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); | |
317 | + | |
318 | /* This will lock the main bitmap for us. */ | |
319 | status = ocfs2_local_alloc_reserve_for_window(osb, | |
320 | &ac, | |
321 | Index: linux-2.6.27/fs/ocfs2/localalloc.h | |
322 | =================================================================== | |
323 | --- linux-2.6.27.orig/fs/ocfs2/localalloc.h | |
324 | +++ linux-2.6.27/fs/ocfs2/localalloc.h | |
325 | @@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct | |
326 | u32 *bit_off, | |
327 | u32 *num_bits); | |
328 | ||
329 | +void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | |
330 | + unsigned int num_clusters); | |
331 | +void ocfs2_la_enable_worker(struct work_struct *work); | |
332 | + | |
333 | #endif /* OCFS2_LOCALALLOC_H */ | |
334 | Index: linux-2.6.27/fs/ocfs2/ocfs2.h | |
335 | =================================================================== | |
336 | --- linux-2.6.27.orig/fs/ocfs2/ocfs2.h | |
337 | +++ linux-2.6.27/fs/ocfs2/ocfs2.h | |
338 | @@ -176,9 +176,13 @@ struct ocfs2_alloc_stats | |
339 | ||
340 | enum ocfs2_local_alloc_state | |
341 | { | |
342 | - OCFS2_LA_UNUSED = 0, | |
343 | - OCFS2_LA_ENABLED, | |
344 | - OCFS2_LA_DISABLED | |
345 | + OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for | |
346 | + * this mountpoint. */ | |
347 | + OCFS2_LA_ENABLED, /* Local alloc is in use. */ | |
348 | + OCFS2_LA_THROTTLED, /* Local alloc is in use, but number | |
349 | + * of bits has been reduced. */ | |
350 | + OCFS2_LA_DISABLED /* Local alloc has temporarily been | |
351 | + * disabled. */ | |
352 | }; | |
353 | ||
354 | enum ocfs2_mount_options | |
355 | @@ -261,9 +265,20 @@ struct ocfs2_super | |
356 | struct ocfs2_journal *journal; | |
357 | unsigned long osb_commit_interval; | |
358 | ||
359 | + struct delayed_work la_enable_wq; | |
360 | + | |
361 | + /* | |
362 | + * Must hold local alloc i_mutex and osb->osb_lock to change | |
363 | + * local_alloc_bits. Reads can be done under either lock. | |
364 | + */ | |
365 | unsigned int local_alloc_bits; | |
366 | - enum ocfs2_local_alloc_state local_alloc_state; | |
367 | + unsigned int local_alloc_default_bits; | |
368 | + | |
369 | + enum ocfs2_local_alloc_state local_alloc_state; /* protected | |
370 | + * by osb_lock */ | |
371 | + | |
372 | struct buffer_head *local_alloc_bh; | |
373 | + | |
374 | u64 la_last_gd; | |
375 | ||
376 | /* Next two fields are for local node slot recovery during | |
377 | Index: linux-2.6.27/fs/ocfs2/suballoc.c | |
378 | =================================================================== | |
379 | --- linux-2.6.27.orig/fs/ocfs2/suballoc.c | |
380 | +++ linux-2.6.27/fs/ocfs2/suballoc.c | |
381 | @@ -117,7 +117,7 @@ static int ocfs2_reserve_clusters_with_l | |
382 | u32 bits_wanted, u64 max_block, | |
383 | struct ocfs2_alloc_context **ac); | |
384 | ||
385 | -static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |
386 | +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |
387 | { | |
388 | struct inode *inode = ac->ac_inode; | |
389 | ||
390 | @@ -709,21 +709,11 @@ static int ocfs2_reserve_clusters_with_l | |
391 | status = ocfs2_reserve_local_alloc_bits(osb, | |
392 | bits_wanted, | |
393 | *ac); | |
394 | - if (status == -ENOSPC) { | |
395 | - /* reserve_local_bits will return enospc with | |
396 | - * the local alloc inode still locked, so we | |
397 | - * can change this safely here. */ | |
398 | - mlog(0, "Disabling local alloc\n"); | |
399 | - /* We set to OCFS2_LA_DISABLED so that umount | |
400 | - * can clean up what's left of the local | |
401 | - * allocation */ | |
402 | - osb->local_alloc_state = OCFS2_LA_DISABLED; | |
403 | - } else if (status == -EFBIG) { | |
404 | + if (status == -EFBIG) { | |
405 | /* The local alloc window is outside ac_max_block. | |
406 | - * use the main bitmap, but don't disable | |
407 | - * local alloc. */ | |
408 | + * use the main bitmap. */ | |
409 | status = -ENOSPC; | |
410 | - } else if (status < 0) { | |
411 | + } else if ((status < 0) && (status != -ENOSPC)) { | |
412 | mlog_errno(status); | |
413 | goto bail; | |
414 | } | |
415 | @@ -1045,6 +1035,7 @@ static int ocfs2_cluster_group_search(st | |
416 | int ret; | |
417 | u64 blkoff; | |
418 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; | |
419 | + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
420 | u16 tmp_off, tmp_found; | |
421 | unsigned int max_bits, gd_cluster_off; | |
422 | ||
423 | @@ -1096,6 +1087,12 @@ static int ocfs2_cluster_group_search(st | |
424 | *bit_off = tmp_off; | |
425 | *bits_found = tmp_found; | |
426 | search = 0; /* success */ | |
427 | + } else if (tmp_found) { | |
428 | + /* | |
429 | + * Don't show bits which we'll be returning | |
430 | + * for allocation to the local alloc bitmap. | |
431 | + */ | |
432 | + ocfs2_local_alloc_seen_free_bits(osb, tmp_found); | |
433 | } | |
434 | } | |
435 | ||
436 | @@ -1902,9 +1899,15 @@ int ocfs2_free_clusters(handle_t *handle | |
437 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | |
438 | bg_start_bit, bg_blkno, | |
439 | num_clusters); | |
440 | - if (status < 0) | |
441 | + if (status < 0) { | |
442 | mlog_errno(status); | |
443 | + goto out; | |
444 | + } | |
445 | + | |
446 | + ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), | |
447 | + num_clusters); | |
448 | ||
449 | +out: | |
450 | mlog_exit(status); | |
451 | return status; | |
452 | } | |
453 | Index: linux-2.6.27/fs/ocfs2/suballoc.h | |
454 | =================================================================== | |
455 | --- linux-2.6.27.orig/fs/ocfs2/suballoc.h | |
456 | +++ linux-2.6.27/fs/ocfs2/suballoc.h | |
457 | @@ -158,6 +158,7 @@ static inline int ocfs2_is_cluster_bitma | |
458 | * apis above. */ | |
459 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |
460 | struct ocfs2_alloc_context *ac); | |
461 | +void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); | |
462 | ||
463 | /* given a cluster offset, calculate which block group it belongs to | |
464 | * and return that block offset. */ | |
465 | Index: linux-2.6.27/fs/ocfs2/super.c | |
466 | =================================================================== | |
467 | --- linux-2.6.27.orig/fs/ocfs2/super.c | |
468 | +++ linux-2.6.27/fs/ocfs2/super.c | |
469 | @@ -655,7 +655,8 @@ static int ocfs2_fill_super(struct super | |
470 | osb->s_atime_quantum = parsed_options.atime_quantum; | |
471 | osb->preferred_slot = parsed_options.slot; | |
472 | osb->osb_commit_interval = parsed_options.commit_interval; | |
473 | - osb->local_alloc_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); | |
474 | + osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); | |
475 | + osb->local_alloc_bits = osb->local_alloc_default_bits; | |
476 | ||
477 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | |
478 | if (status) | |
479 | @@ -1465,6 +1466,7 @@ static int ocfs2_initialize_super(struct | |
480 | ||
481 | osb->local_alloc_state = OCFS2_LA_UNUSED; | |
482 | osb->local_alloc_bh = NULL; | |
483 | + INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); | |
484 | ||
485 | init_waitqueue_head(&osb->osb_mount_event); | |
486 |