]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Greg Kroah-Hartman <gregkh@suse.de> |
2 | Subject: revert ext4 changes in 2.6.27.19 and 2.6.27.20 and 2.6.27.25 | |
3 | Patch-mainline: no | |
4 | ||
5 | As we are already taking a different version of ext4, revert the | |
6 | changes that were made to ext4 in 2.6.27.19 and 2.6.27.20 and 2.6.27.25 | |
7 | ||
8 | Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> | |
9 | ||
10 | --- b/Documentation/filesystems/ext4.txt | |
11 | +++ a/Documentation/filesystems/ext4.txt | |
12 | @@ -73,7 +73,7 @@ | |
13 | * extent format more robust in face of on-disk corruption due to magics, | |
14 | * internal redunancy in tree | |
15 | * improved file allocation (multi-block alloc) | |
16 | +* fix 32000 subdirectory limit | |
17 | -* lift 32000 subdirectory limit imposed by i_links_count[1] | |
18 | * nsec timestamps for mtime, atime, ctime, create time | |
19 | * inode version field on disk (NFSv4, Lustre) | |
20 | * reduced e2fsck time via uninit_bg feature | |
21 | @@ -88,9 +88,6 @@ | |
22 | * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force | |
23 | the ordering) | |
24 | ||
25 | -[1] Filesystems with a block size of 1k may see a limit imposed by the | |
26 | -directory hash tree having a maximum depth of two. | |
27 | - | |
28 | 2.2 Candidate features for future inclusion | |
29 | ||
30 | * Online defrag (patches available but not well tested) | |
31 | reverted: | |
32 | --- b/fs/ext4/balloc.c | |
33 | +++ a/fs/ext4/balloc.c | |
34 | @@ -20,7 +20,6 @@ | |
35 | #include "ext4.h" | |
36 | #include "ext4_jbd2.h" | |
37 | #include "group.h" | |
38 | -#include "mballoc.h" | |
39 | ||
40 | /* | |
41 | * balloc.c contains the blocks allocation and deallocation routines | |
42 | @@ -319,41 +318,18 @@ | |
43 | block_group, bitmap_blk); | |
44 | return NULL; | |
45 | } | |
46 | + if (bh_uptodate_or_lock(bh)) | |
47 | - | |
48 | - if (bitmap_uptodate(bh)) | |
49 | return bh; | |
50 | ||
51 | - lock_buffer(bh); | |
52 | - if (bitmap_uptodate(bh)) { | |
53 | - unlock_buffer(bh); | |
54 | - return bh; | |
55 | - } | |
56 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
57 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | |
58 | ext4_init_block_bitmap(sb, bh, block_group, desc); | |
59 | - set_bitmap_uptodate(bh); | |
60 | set_buffer_uptodate(bh); | |
61 | unlock_buffer(bh); | |
62 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
63 | return bh; | |
64 | } | |
65 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
66 | - if (buffer_uptodate(bh)) { | |
67 | - /* | |
68 | - * if not uninit if bh is uptodate, | |
69 | - * bitmap is also uptodate | |
70 | - */ | |
71 | - set_bitmap_uptodate(bh); | |
72 | - unlock_buffer(bh); | |
73 | - return bh; | |
74 | - } | |
75 | - /* | |
76 | - * submit the buffer_head for read. We can | |
77 | - * safely mark the bitmap as uptodate now. | |
78 | - * We do it here so the bitmap uptodate bit | |
79 | - * get set with buffer lock held. | |
80 | - */ | |
81 | - set_bitmap_uptodate(bh); | |
82 | if (bh_submit_read(bh) < 0) { | |
83 | put_bh(bh); | |
84 | ext4_error(sb, __func__, | |
85 | @@ -861,136 +837,6 @@ | |
86 | } | |
87 | ||
88 | /** | |
89 | - * ext4_add_groupblocks() -- Add given blocks to an existing group | |
90 | - * @handle: handle to this transaction | |
91 | - * @sb: super block | |
92 | - * @block: start physcial block to add to the block group | |
93 | - * @count: number of blocks to free | |
94 | - * | |
95 | - * This marks the blocks as free in the bitmap. We ask the | |
96 | - * mballoc to reload the buddy after this by setting group | |
97 | - * EXT4_GROUP_INFO_NEED_INIT_BIT flag | |
98 | - */ | |
99 | -void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |
100 | - ext4_fsblk_t block, unsigned long count) | |
101 | -{ | |
102 | - struct buffer_head *bitmap_bh = NULL; | |
103 | - struct buffer_head *gd_bh; | |
104 | - ext4_group_t block_group; | |
105 | - ext4_grpblk_t bit; | |
106 | - unsigned long i; | |
107 | - struct ext4_group_desc *desc; | |
108 | - struct ext4_super_block *es; | |
109 | - struct ext4_sb_info *sbi; | |
110 | - int err = 0, ret; | |
111 | - ext4_grpblk_t blocks_freed; | |
112 | - struct ext4_group_info *grp; | |
113 | - | |
114 | - sbi = EXT4_SB(sb); | |
115 | - es = sbi->s_es; | |
116 | - ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | |
117 | - | |
118 | - ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | |
119 | - grp = ext4_get_group_info(sb, block_group); | |
120 | - /* | |
121 | - * Check to see if we are freeing blocks across a group | |
122 | - * boundary. | |
123 | - */ | |
124 | - if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | |
125 | - goto error_return; | |
126 | - | |
127 | - bitmap_bh = ext4_read_block_bitmap(sb, block_group); | |
128 | - if (!bitmap_bh) | |
129 | - goto error_return; | |
130 | - desc = ext4_get_group_desc(sb, block_group, &gd_bh); | |
131 | - if (!desc) | |
132 | - goto error_return; | |
133 | - | |
134 | - if (in_range(ext4_block_bitmap(sb, desc), block, count) || | |
135 | - in_range(ext4_inode_bitmap(sb, desc), block, count) || | |
136 | - in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | |
137 | - in_range(block + count - 1, ext4_inode_table(sb, desc), | |
138 | - sbi->s_itb_per_group)) { | |
139 | - ext4_error(sb, __func__, | |
140 | - "Adding blocks in system zones - " | |
141 | - "Block = %llu, count = %lu", | |
142 | - block, count); | |
143 | - goto error_return; | |
144 | - } | |
145 | - | |
146 | - /* | |
147 | - * We are about to add blocks to the bitmap, | |
148 | - * so we need undo access. | |
149 | - */ | |
150 | - BUFFER_TRACE(bitmap_bh, "getting undo access"); | |
151 | - err = ext4_journal_get_undo_access(handle, bitmap_bh); | |
152 | - if (err) | |
153 | - goto error_return; | |
154 | - | |
155 | - /* | |
156 | - * We are about to modify some metadata. Call the journal APIs | |
157 | - * to unshare ->b_data if a currently-committing transaction is | |
158 | - * using it | |
159 | - */ | |
160 | - BUFFER_TRACE(gd_bh, "get_write_access"); | |
161 | - err = ext4_journal_get_write_access(handle, gd_bh); | |
162 | - if (err) | |
163 | - goto error_return; | |
164 | - /* | |
165 | - * make sure we don't allow a parallel init on other groups in the | |
166 | - * same buddy cache | |
167 | - */ | |
168 | - down_write(&grp->alloc_sem); | |
169 | - for (i = 0, blocks_freed = 0; i < count; i++) { | |
170 | - BUFFER_TRACE(bitmap_bh, "clear bit"); | |
171 | - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | |
172 | - bit + i, bitmap_bh->b_data)) { | |
173 | - ext4_error(sb, __func__, | |
174 | - "bit already cleared for block %llu", | |
175 | - (ext4_fsblk_t)(block + i)); | |
176 | - BUFFER_TRACE(bitmap_bh, "bit already cleared"); | |
177 | - } else { | |
178 | - blocks_freed++; | |
179 | - } | |
180 | - } | |
181 | - spin_lock(sb_bgl_lock(sbi, block_group)); | |
182 | - le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); | |
183 | - desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | |
184 | - spin_unlock(sb_bgl_lock(sbi, block_group)); | |
185 | - percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | |
186 | - | |
187 | - if (sbi->s_log_groups_per_flex) { | |
188 | - ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | |
189 | - spin_lock(sb_bgl_lock(sbi, flex_group)); | |
190 | - sbi->s_flex_groups[flex_group].free_blocks += blocks_freed; | |
191 | - spin_unlock(sb_bgl_lock(sbi, flex_group)); | |
192 | - } | |
193 | - /* | |
194 | - * request to reload the buddy with the | |
195 | - * new bitmap information | |
196 | - */ | |
197 | - set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | |
198 | - ext4_mb_update_group_info(grp, blocks_freed); | |
199 | - up_write(&grp->alloc_sem); | |
200 | - | |
201 | - /* We dirtied the bitmap block */ | |
202 | - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | |
203 | - err = ext4_journal_dirty_metadata(handle, bitmap_bh); | |
204 | - | |
205 | - /* And the group descriptor block */ | |
206 | - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | |
207 | - ret = ext4_journal_dirty_metadata(handle, gd_bh); | |
208 | - if (!err) | |
209 | - err = ret; | |
210 | - sb->s_dirt = 1; | |
211 | - | |
212 | -error_return: | |
213 | - brelse(bitmap_bh); | |
214 | - ext4_std_error(sb, err); | |
215 | - return; | |
216 | -} | |
217 | - | |
218 | -/** | |
219 | * ext4_free_blocks() -- Free given blocks and update quota | |
220 | * @handle: handle for this transaction | |
221 | * @inode: inode | |
222 | reverted: | |
223 | --- b/fs/ext4/ext4.h | |
224 | +++ a/fs/ext4/ext4.h | |
225 | @@ -19,7 +19,6 @@ | |
226 | #include <linux/types.h> | |
227 | #include <linux/blkdev.h> | |
228 | #include <linux/magic.h> | |
229 | -#include <linux/jbd2.h> | |
230 | #include "ext4_i.h" | |
231 | ||
232 | /* | |
233 | @@ -248,30 +247,6 @@ | |
234 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | |
235 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | |
236 | ||
237 | -/* Flags that should be inherited by new inodes from their parent. */ | |
238 | -#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ | |
239 | - EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ | |
240 | - EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ | |
241 | - EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ | |
242 | - EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) | |
243 | - | |
244 | -/* Flags that are appropriate for regular files (all but dir-specific ones). */ | |
245 | -#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) | |
246 | - | |
247 | -/* Flags that are appropriate for non-directories/regular files. */ | |
248 | -#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) | |
249 | - | |
250 | -/* Mask out flags that are inappropriate for the given type of inode. */ | |
251 | -static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |
252 | -{ | |
253 | - if (S_ISDIR(mode)) | |
254 | - return flags; | |
255 | - else if (S_ISREG(mode)) | |
256 | - return flags & EXT4_REG_FLMASK; | |
257 | - else | |
258 | - return flags & EXT4_OTHER_FLMASK; | |
259 | -} | |
260 | - | |
261 | /* | |
262 | * Inode dynamic state flags | |
263 | */ | |
264 | @@ -279,7 +254,6 @@ | |
265 | #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ | |
266 | #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ | |
267 | #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ | |
268 | -#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ | |
269 | ||
270 | /* Used to pass group descriptor data when online resize is done */ | |
271 | struct ext4_new_group_input { | |
272 | @@ -327,9 +301,7 @@ | |
273 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | |
274 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | |
275 | #define EXT4_IOC_MIGRATE _IO('f', 9) | |
276 | - /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ | |
277 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | |
278 | -#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | |
279 | ||
280 | /* | |
281 | * ioctl commands in 32 bit emulation | |
282 | @@ -887,7 +859,7 @@ | |
283 | { | |
284 | unsigned len = le16_to_cpu(dlen); | |
285 | ||
286 | + if (len == EXT4_MAX_REC_LEN) | |
287 | - if (len == EXT4_MAX_REC_LEN || len == 0) | |
288 | return 1 << 16; | |
289 | return len; | |
290 | } | |
291 | @@ -917,9 +889,6 @@ | |
292 | #define DX_HASH_LEGACY 0 | |
293 | #define DX_HASH_HALF_MD4 1 | |
294 | #define DX_HASH_TEA 2 | |
295 | -#define DX_HASH_LEGACY_UNSIGNED 3 | |
296 | -#define DX_HASH_HALF_MD4_UNSIGNED 4 | |
297 | -#define DX_HASH_TEA_UNSIGNED 5 | |
298 | ||
299 | #ifdef __KERNEL__ | |
300 | ||
301 | @@ -1019,11 +988,9 @@ | |
302 | ext4_fsblk_t nblocks); | |
303 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | |
304 | ext4_fsblk_t block, unsigned long count, int metadata); | |
305 | +extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | |
306 | + ext4_fsblk_t block, unsigned long count, | |
307 | -extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |
308 | - ext4_fsblk_t block, unsigned long count, | |
309 | unsigned long *pdquot_freed_blocks); | |
310 | -extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |
311 | - ext4_fsblk_t block, unsigned long count); | |
312 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | |
313 | extern void ext4_check_blocks_bitmap (struct super_block *); | |
314 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |
315 | @@ -1071,13 +1038,12 @@ | |
316 | extern void exit_ext4_mballoc(void); | |
317 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | |
318 | unsigned long, unsigned long, int, unsigned long *); | |
319 | +extern int ext4_mb_add_more_groupinfo(struct super_block *sb, | |
320 | -extern int ext4_mb_add_groupinfo(struct super_block *sb, | |
321 | ext4_group_t i, struct ext4_group_desc *desc); | |
322 | extern void ext4_mb_update_group_info(struct ext4_group_info *grp, | |
323 | ext4_grpblk_t add); | |
324 | + | |
325 | + | |
326 | -extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); | |
327 | -extern void ext4_mb_put_buddy_cache_lock(struct super_block *, | |
328 | - ext4_group_t, int); | |
329 | /* inode.c */ | |
330 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | |
331 | struct buffer_head *bh, ext4_fsblk_t blocknr); | |
332 | @@ -1105,14 +1071,13 @@ | |
333 | extern void ext4_truncate (struct inode *); | |
334 | extern void ext4_set_inode_flags(struct inode *); | |
335 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | |
336 | -extern int ext4_alloc_da_blocks(struct inode *inode); | |
337 | extern void ext4_set_aops(struct inode *inode); | |
338 | extern int ext4_writepage_trans_blocks(struct inode *); | |
339 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | |
340 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | |
341 | extern int ext4_block_truncate_page(handle_t *handle, | |
342 | struct address_space *mapping, loff_t from); | |
343 | +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | |
344 | -extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |
345 | ||
346 | /* ioctl.c */ | |
347 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | |
348 | @@ -1202,11 +1167,8 @@ | |
349 | ||
350 | static inline loff_t ext4_isize(struct ext4_inode *raw_inode) | |
351 | { | |
352 | + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | | |
353 | + le32_to_cpu(raw_inode->i_size_lo); | |
354 | - if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) | |
355 | - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | | |
356 | - le32_to_cpu(raw_inode->i_size_lo); | |
357 | - else | |
358 | - return (loff_t) le32_to_cpu(raw_inode->i_size_lo); | |
359 | } | |
360 | ||
361 | static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |
362 | @@ -1282,23 +1244,6 @@ | |
363 | sector_t block, unsigned long max_blocks, | |
364 | struct buffer_head *bh, int create, | |
365 | int extend_disksize, int flag); | |
366 | -/* | |
367 | - * Add new method to test wether block and inode bitmaps are properly | |
368 | - * initialized. With uninit_bg reading the block from disk is not enough | |
369 | - * to mark the bitmap uptodate. We need to also zero-out the bitmap | |
370 | - */ | |
371 | -#define BH_BITMAP_UPTODATE BH_JBDPrivateStart | |
372 | - | |
373 | -static inline int bitmap_uptodate(struct buffer_head *bh) | |
374 | -{ | |
375 | - return (buffer_uptodate(bh) && | |
376 | - test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state)); | |
377 | -} | |
378 | -static inline void set_bitmap_uptodate(struct buffer_head *bh) | |
379 | -{ | |
380 | - set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); | |
381 | -} | |
382 | - | |
383 | #endif /* __KERNEL__ */ | |
384 | ||
385 | #endif /* _EXT4_H */ | |
386 | reverted: | |
387 | --- b/fs/ext4/ext4_sb.h | |
388 | +++ a/fs/ext4/ext4_sb.h | |
389 | @@ -56,7 +56,6 @@ | |
390 | u32 s_next_generation; | |
391 | u32 s_hash_seed[4]; | |
392 | int s_def_hash_version; | |
393 | - int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ | |
394 | struct percpu_counter s_freeblocks_counter; | |
395 | struct percpu_counter s_freeinodes_counter; | |
396 | struct percpu_counter s_dirs_counter; | |
397 | @@ -103,8 +102,7 @@ | |
398 | struct list_head s_committed_transaction; | |
399 | spinlock_t s_md_lock; | |
400 | tid_t s_last_transaction; | |
401 | + unsigned short *s_mb_offsets, *s_mb_maxs; | |
402 | - unsigned short *s_mb_offsets; | |
403 | - unsigned int *s_mb_maxs; | |
404 | ||
405 | /* tunables */ | |
406 | unsigned long s_stripe; | |
407 | reverted: | |
408 | --- b/fs/ext4/extents.c | |
409 | +++ a/fs/ext4/extents.c | |
410 | @@ -1118,8 +1118,7 @@ | |
411 | struct ext4_extent_idx *ix; | |
412 | struct ext4_extent *ex; | |
413 | ext4_fsblk_t block; | |
414 | + int depth, ee_len; | |
415 | - int depth; /* Note, NOT eh_depth; depth from top of tree */ | |
416 | - int ee_len; | |
417 | ||
418 | BUG_ON(path == NULL); | |
419 | depth = path->p_depth; | |
420 | @@ -1178,8 +1177,7 @@ | |
421 | if (bh == NULL) | |
422 | return -EIO; | |
423 | eh = ext_block_hdr(bh); | |
424 | + if (ext4_ext_check_header(inode, eh, depth)) { | |
425 | - /* subtract from p_depth to get proper eh_depth */ | |
426 | - if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { | |
427 | put_bh(bh); | |
428 | return -EIO; | |
429 | } | |
430 | @@ -1633,13 +1631,11 @@ | |
431 | { | |
432 | struct ext4_ext_cache *cex; | |
433 | BUG_ON(len == 0); | |
434 | - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | |
435 | cex = &EXT4_I(inode)->i_cached_extent; | |
436 | cex->ec_type = type; | |
437 | cex->ec_block = block; | |
438 | cex->ec_len = len; | |
439 | cex->ec_start = start; | |
440 | - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | |
441 | } | |
442 | ||
443 | /* | |
444 | @@ -1696,17 +1692,12 @@ | |
445 | struct ext4_extent *ex) | |
446 | { | |
447 | struct ext4_ext_cache *cex; | |
448 | - int ret = EXT4_EXT_CACHE_NO; | |
449 | ||
450 | - /* | |
451 | - * We borrow i_block_reservation_lock to protect i_cached_extent | |
452 | - */ | |
453 | - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | |
454 | cex = &EXT4_I(inode)->i_cached_extent; | |
455 | ||
456 | /* has cache valid data? */ | |
457 | if (cex->ec_type == EXT4_EXT_CACHE_NO) | |
458 | + return EXT4_EXT_CACHE_NO; | |
459 | - goto errout; | |
460 | ||
461 | BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && | |
462 | cex->ec_type != EXT4_EXT_CACHE_EXTENT); | |
463 | @@ -1717,11 +1708,11 @@ | |
464 | ext_debug("%u cached by %u:%u:%llu\n", | |
465 | block, | |
466 | cex->ec_block, cex->ec_len, cex->ec_start); | |
467 | + return cex->ec_type; | |
468 | - ret = cex->ec_type; | |
469 | } | |
470 | + | |
471 | + /* not in cache */ | |
472 | + return EXT4_EXT_CACHE_NO; | |
473 | -errout: | |
474 | - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | |
475 | - return ret; | |
476 | } | |
477 | ||
478 | /* | |
479 | @@ -2677,8 +2668,6 @@ | |
480 | if (allocated > max_blocks) | |
481 | allocated = max_blocks; | |
482 | set_buffer_unwritten(bh_result); | |
483 | - bh_result->b_bdev = inode->i_sb->s_bdev; | |
484 | - bh_result->b_blocknr = newblock; | |
485 | goto out2; | |
486 | } | |
487 | ||
488 | reverted: | |
489 | --- b/fs/ext4/file.c | |
490 | +++ a/fs/ext4/file.c | |
491 | @@ -33,14 +33,9 @@ | |
492 | */ | |
493 | static int ext4_release_file (struct inode * inode, struct file * filp) | |
494 | { | |
495 | - if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { | |
496 | - ext4_alloc_da_blocks(inode); | |
497 | - EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; | |
498 | - } | |
499 | /* if we are the last writer on the inode, drop the block reservation */ | |
500 | if ((filp->f_mode & FMODE_WRITE) && | |
501 | + (atomic_read(&inode->i_writecount) == 1)) | |
502 | - (atomic_read(&inode->i_writecount) == 1) && | |
503 | - !EXT4_I(inode)->i_reserved_data_blocks) | |
504 | { | |
505 | down_write(&EXT4_I(inode)->i_data_sem); | |
506 | ext4_discard_reservation(inode); | |
507 | reverted: | |
508 | --- b/fs/ext4/hash.c | |
509 | +++ a/fs/ext4/hash.c | |
510 | @@ -35,71 +35,23 @@ | |
511 | ||
512 | ||
513 | /* The old legacy hash */ | |
514 | +static __u32 dx_hack_hash (const char *name, int len) | |
515 | -static __u32 dx_hack_hash_unsigned(const char *name, int len) | |
516 | { | |
517 | + __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | |
518 | - __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | |
519 | - const unsigned char *ucp = (const unsigned char *) name; | |
520 | - | |
521 | - while (len--) { | |
522 | - hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); | |
523 | - | |
524 | - if (hash & 0x80000000) | |
525 | - hash -= 0x7fffffff; | |
526 | - hash1 = hash0; | |
527 | - hash0 = hash; | |
528 | - } | |
529 | - return hash0 << 1; | |
530 | -} | |
531 | - | |
532 | -static __u32 dx_hack_hash_signed(const char *name, int len) | |
533 | -{ | |
534 | - __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | |
535 | - const signed char *scp = (const signed char *) name; | |
536 | - | |
537 | while (len--) { | |
538 | + __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); | |
539 | - hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); | |
540 | ||
541 | + if (hash & 0x80000000) hash -= 0x7fffffff; | |
542 | - if (hash & 0x80000000) | |
543 | - hash -= 0x7fffffff; | |
544 | hash1 = hash0; | |
545 | hash0 = hash; | |
546 | } | |
547 | + return (hash0 << 1); | |
548 | - return hash0 << 1; | |
549 | } | |
550 | ||
551 | +static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |
552 | -static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) | |
553 | { | |
554 | __u32 pad, val; | |
555 | int i; | |
556 | - const signed char *scp = (const signed char *) msg; | |
557 | - | |
558 | - pad = (__u32)len | ((__u32)len << 8); | |
559 | - pad |= pad << 16; | |
560 | - | |
561 | - val = pad; | |
562 | - if (len > num*4) | |
563 | - len = num * 4; | |
564 | - for (i = 0; i < len; i++) { | |
565 | - if ((i % 4) == 0) | |
566 | - val = pad; | |
567 | - val = ((int) scp[i]) + (val << 8); | |
568 | - if ((i % 4) == 3) { | |
569 | - *buf++ = val; | |
570 | - val = pad; | |
571 | - num--; | |
572 | - } | |
573 | - } | |
574 | - if (--num >= 0) | |
575 | - *buf++ = val; | |
576 | - while (--num >= 0) | |
577 | - *buf++ = pad; | |
578 | -} | |
579 | - | |
580 | -static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) | |
581 | -{ | |
582 | - __u32 pad, val; | |
583 | - int i; | |
584 | - const unsigned char *ucp = (const unsigned char *) msg; | |
585 | ||
586 | pad = (__u32)len | ((__u32)len << 8); | |
587 | pad |= pad << 16; | |
588 | @@ -110,7 +62,7 @@ | |
589 | for (i=0; i < len; i++) { | |
590 | if ((i % 4) == 0) | |
591 | val = pad; | |
592 | + val = msg[i] + (val << 8); | |
593 | - val = ((int) ucp[i]) + (val << 8); | |
594 | if ((i % 4) == 3) { | |
595 | *buf++ = val; | |
596 | val = pad; | |
597 | @@ -143,8 +95,6 @@ | |
598 | const char *p; | |
599 | int i; | |
600 | __u32 in[8], buf[4]; | |
601 | - void (*str2hashbuf)(const char *, int, __u32 *, int) = | |
602 | - str2hashbuf_signed; | |
603 | ||
604 | /* Initialize the default seed for the hash checksum functions */ | |
605 | buf[0] = 0x67452301; | |
606 | @@ -163,18 +113,13 @@ | |
607 | } | |
608 | ||
609 | switch (hinfo->hash_version) { | |
610 | - case DX_HASH_LEGACY_UNSIGNED: | |
611 | - hash = dx_hack_hash_unsigned(name, len); | |
612 | - break; | |
613 | case DX_HASH_LEGACY: | |
614 | + hash = dx_hack_hash(name, len); | |
615 | - hash = dx_hack_hash_signed(name, len); | |
616 | break; | |
617 | - case DX_HASH_HALF_MD4_UNSIGNED: | |
618 | - str2hashbuf = str2hashbuf_unsigned; | |
619 | case DX_HASH_HALF_MD4: | |
620 | p = name; | |
621 | while (len > 0) { | |
622 | + str2hashbuf(p, len, in, 8); | |
623 | - (*str2hashbuf)(p, len, in, 8); | |
624 | half_md4_transform(buf, in); | |
625 | len -= 32; | |
626 | p += 32; | |
627 | @@ -182,12 +127,10 @@ | |
628 | minor_hash = buf[2]; | |
629 | hash = buf[1]; | |
630 | break; | |
631 | - case DX_HASH_TEA_UNSIGNED: | |
632 | - str2hashbuf = str2hashbuf_unsigned; | |
633 | case DX_HASH_TEA: | |
634 | p = name; | |
635 | while (len > 0) { | |
636 | + str2hashbuf(p, len, in, 4); | |
637 | - (*str2hashbuf)(p, len, in, 4); | |
638 | TEA_transform(buf, in); | |
639 | len -= 16; | |
640 | p += 16; | |
641 | reverted: | |
642 | --- b/fs/ext4/ialloc.c | |
643 | +++ a/fs/ext4/ialloc.c | |
644 | @@ -84,7 +84,7 @@ | |
645 | } | |
646 | ||
647 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | |
648 | + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), | |
649 | - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | |
650 | bh->b_data); | |
651 | ||
652 | return EXT4_INODES_PER_GROUP(sb); | |
653 | @@ -115,40 +115,18 @@ | |
654 | block_group, bitmap_blk); | |
655 | return NULL; | |
656 | } | |
657 | + if (bh_uptodate_or_lock(bh)) | |
658 | - if (bitmap_uptodate(bh)) | |
659 | return bh; | |
660 | ||
661 | - lock_buffer(bh); | |
662 | - if (bitmap_uptodate(bh)) { | |
663 | - unlock_buffer(bh); | |
664 | - return bh; | |
665 | - } | |
666 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
667 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | |
668 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | |
669 | - set_bitmap_uptodate(bh); | |
670 | set_buffer_uptodate(bh); | |
671 | unlock_buffer(bh); | |
672 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
673 | return bh; | |
674 | } | |
675 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); | |
676 | - if (buffer_uptodate(bh)) { | |
677 | - /* | |
678 | - * if not uninit if bh is uptodate, | |
679 | - * bitmap is also uptodate | |
680 | - */ | |
681 | - set_bitmap_uptodate(bh); | |
682 | - unlock_buffer(bh); | |
683 | - return bh; | |
684 | - } | |
685 | - /* | |
686 | - * submit the buffer_head for read. We can | |
687 | - * safely mark the bitmap as uptodate now. | |
688 | - * We do it here so the bitmap uptodate bit | |
689 | - * get set with buffer lock held. | |
690 | - */ | |
691 | - set_bitmap_uptodate(bh); | |
692 | if (bh_submit_read(bh) < 0) { | |
693 | put_bh(bh); | |
694 | ext4_error(sb, __func__, | |
695 | @@ -188,7 +166,7 @@ | |
696 | struct ext4_group_desc * gdp; | |
697 | struct ext4_super_block * es; | |
698 | struct ext4_sb_info *sbi; | |
699 | + int fatal = 0, err; | |
700 | - int fatal = 0, err, cleared; | |
701 | ext4_group_t flex_group; | |
702 | ||
703 | if (atomic_read(&inode->i_count) > 1) { | |
704 | @@ -242,12 +220,10 @@ | |
705 | goto error_return; | |
706 | ||
707 | /* Ok, now we can actually update the inode bitmaps.. */ | |
708 | + if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | |
709 | + bit, bitmap_bh->b_data)) | |
710 | + ext4_error (sb, "ext4_free_inode", | |
711 | + "bit already cleared for inode %lu", ino); | |
712 | - spin_lock(sb_bgl_lock(sbi, block_group)); | |
713 | - cleared = ext4_clear_bit(bit, bitmap_bh->b_data); | |
714 | - spin_unlock(sb_bgl_lock(sbi, block_group)); | |
715 | - if (!cleared) | |
716 | - ext4_error(sb, "ext4_free_inode", | |
717 | - "bit already cleared for inode %lu", ino); | |
718 | else { | |
719 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | |
720 | ||
721 | @@ -591,77 +567,6 @@ | |
722 | } | |
723 | ||
724 | /* | |
725 | - * claim the inode from the inode bitmap. If the group | |
726 | - * is uninit we need to take the groups's sb_bgl_lock | |
727 | - * and clear the uninit flag. The inode bitmap update | |
728 | - * and group desc uninit flag clear should be done | |
729 | - * after holding sb_bgl_lock so that ext4_read_inode_bitmap | |
730 | - * doesn't race with the ext4_claim_inode | |
731 | - */ | |
732 | -static int ext4_claim_inode(struct super_block *sb, | |
733 | - struct buffer_head *inode_bitmap_bh, | |
734 | - unsigned long ino, ext4_group_t group, int mode) | |
735 | -{ | |
736 | - int free = 0, retval = 0; | |
737 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
738 | - struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); | |
739 | - | |
740 | - spin_lock(sb_bgl_lock(sbi, group)); | |
741 | - if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | |
742 | - /* not a free inode */ | |
743 | - retval = 1; | |
744 | - goto err_ret; | |
745 | - } | |
746 | - ino++; | |
747 | - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | |
748 | - ino > EXT4_INODES_PER_GROUP(sb)) { | |
749 | - spin_unlock(sb_bgl_lock(sbi, group)); | |
750 | - ext4_error(sb, __func__, | |
751 | - "reserved inode or inode > inodes count - " | |
752 | - "block_group = %lu, inode=%lu", group, | |
753 | - ino + group * EXT4_INODES_PER_GROUP(sb)); | |
754 | - return 1; | |
755 | - } | |
756 | - /* If we didn't allocate from within the initialized part of the inode | |
757 | - * table then we need to initialize up to this inode. */ | |
758 | - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | |
759 | - | |
760 | - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | |
761 | - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | |
762 | - /* When marking the block group with | |
763 | - * ~EXT4_BG_INODE_UNINIT we don't want to depend | |
764 | - * on the value of bg_itable_unused even though | |
765 | - * mke2fs could have initialized the same for us. | |
766 | - * Instead we calculated the value below | |
767 | - */ | |
768 | - | |
769 | - free = 0; | |
770 | - } else { | |
771 | - free = EXT4_INODES_PER_GROUP(sb) - | |
772 | - le16_to_cpu(gdp->bg_itable_unused); | |
773 | - } | |
774 | - | |
775 | - /* | |
776 | - * Check the relative inode number against the last used | |
777 | - * relative inode number in this group. if it is greater | |
778 | - * we need to update the bg_itable_unused count | |
779 | - * | |
780 | - */ | |
781 | - if (ino > free) | |
782 | - gdp->bg_itable_unused = | |
783 | - cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); | |
784 | - } | |
785 | - le16_add_cpu(&gdp->bg_free_inodes_count, -1); | |
786 | - if (S_ISDIR(mode)) { | |
787 | - le16_add_cpu(&gdp->bg_used_dirs_count, 1); | |
788 | - } | |
789 | - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | |
790 | -err_ret: | |
791 | - spin_unlock(sb_bgl_lock(sbi, group)); | |
792 | - return retval; | |
793 | -} | |
794 | - | |
795 | -/* | |
796 | * There are two policies for allocating an inode. If the new inode is | |
797 | * a directory, then a forward search is made for a block group with both | |
798 | * free space and a low directory-to-inode ratio; if that fails, then of | |
799 | @@ -687,7 +592,6 @@ | |
800 | struct inode *ret; | |
801 | ext4_group_t i; | |
802 | int free = 0; | |
803 | - static int once = 1; | |
804 | ext4_group_t flex_group; | |
805 | ||
806 | /* Cannot create files in a deleted directory */ | |
807 | @@ -705,15 +609,6 @@ | |
808 | ||
809 | if (sbi->s_log_groups_per_flex) { | |
810 | ret2 = find_group_flex(sb, dir, &group); | |
811 | - if (ret2 == -1) { | |
812 | - ret2 = find_group_other(sb, dir, &group); | |
813 | - if (ret2 == 0 && once) { | |
814 | - once = 0; | |
815 | - printk(KERN_NOTICE "ext4: find_group_flex " | |
816 | - "failed, fallback succeeded dir %lu\n", | |
817 | - dir->i_ino); | |
818 | - } | |
819 | - } | |
820 | goto got_group; | |
821 | } | |
822 | ||
823 | @@ -754,12 +649,8 @@ | |
824 | if (err) | |
825 | goto fail; | |
826 | ||
827 | + if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group), | |
828 | + ino, bitmap_bh->b_data)) { | |
829 | - BUFFER_TRACE(bh2, "get_write_access"); | |
830 | - err = ext4_journal_get_write_access(handle, bh2); | |
831 | - if (err) | |
832 | - goto fail; | |
833 | - if (!ext4_claim_inode(sb, bitmap_bh, | |
834 | - ino, group, mode)) { | |
835 | /* we won it */ | |
836 | BUFFER_TRACE(bitmap_bh, | |
837 | "call ext4_journal_dirty_metadata"); | |
838 | @@ -767,13 +658,10 @@ | |
839 | bitmap_bh); | |
840 | if (err) | |
841 | goto fail; | |
842 | - /* zero bit is inode number 1*/ | |
843 | - ino++; | |
844 | goto got; | |
845 | } | |
846 | /* we lost it */ | |
847 | jbd2_journal_release_buffer(handle, bitmap_bh); | |
848 | - jbd2_journal_release_buffer(handle, bh2); | |
849 | ||
850 | if (++ino < EXT4_INODES_PER_GROUP(sb)) | |
851 | goto repeat_in_this_group; | |
852 | @@ -793,6 +681,21 @@ | |
853 | goto out; | |
854 | ||
855 | got: | |
856 | + ino++; | |
857 | + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || | |
858 | + ino > EXT4_INODES_PER_GROUP(sb)) { | |
859 | + ext4_error(sb, __func__, | |
860 | + "reserved inode or inode > inodes count - " | |
861 | + "block_group = %lu, inode=%lu", group, | |
862 | + ino + group * EXT4_INODES_PER_GROUP(sb)); | |
863 | + err = -EIO; | |
864 | + goto fail; | |
865 | + } | |
866 | + | |
867 | + BUFFER_TRACE(bh2, "get_write_access"); | |
868 | + err = ext4_journal_get_write_access(handle, bh2); | |
869 | + if (err) goto fail; | |
870 | + | |
871 | /* We may have to initialize the block bitmap if it isn't already */ | |
872 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && | |
873 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | |
874 | @@ -827,10 +730,47 @@ | |
875 | if (err) | |
876 | goto fail; | |
877 | } | |
878 | + | |
879 | + spin_lock(sb_bgl_lock(sbi, group)); | |
880 | + /* If we didn't allocate from within the initialized part of the inode | |
881 | + * table then we need to initialize up to this inode. */ | |
882 | + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | |
883 | + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | |
884 | + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | |
885 | + | |
886 | + /* When marking the block group with | |
887 | + * ~EXT4_BG_INODE_UNINIT we don't want to depend | |
888 | + * on the value of bg_itable_unused even though | |
889 | + * mke2fs could have initialized the same for us. | |
890 | + * Instead we calculated the value below | |
891 | + */ | |
892 | + | |
893 | + free = 0; | |
894 | + } else { | |
895 | + free = EXT4_INODES_PER_GROUP(sb) - | |
896 | + le16_to_cpu(gdp->bg_itable_unused); | |
897 | + } | |
898 | + | |
899 | + /* | |
900 | + * Check the relative inode number against the last used | |
901 | + * relative inode number in this group. if it is greater | |
902 | + * we need to update the bg_itable_unused count | |
903 | + * | |
904 | + */ | |
905 | + if (ino > free) | |
906 | + gdp->bg_itable_unused = | |
907 | + cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); | |
908 | + } | |
909 | + | |
910 | + le16_add_cpu(&gdp->bg_free_inodes_count, -1); | |
911 | + if (S_ISDIR(mode)) { | |
912 | + le16_add_cpu(&gdp->bg_used_dirs_count, 1); | |
913 | + } | |
914 | + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | |
915 | + spin_unlock(sb_bgl_lock(sbi, group)); | |
916 | + BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); | |
917 | - BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); | |
918 | err = ext4_journal_dirty_metadata(handle, bh2); | |
919 | + if (err) goto fail; | |
920 | - if (err) | |
921 | - goto fail; | |
922 | ||
923 | percpu_counter_dec(&sbi->s_freeinodes_counter); | |
924 | if (S_ISDIR(mode)) | |
925 | @@ -866,12 +806,16 @@ | |
926 | ei->i_disksize = 0; | |
927 | ||
928 | /* | |
929 | + * Don't inherit extent flag from directory. We set extent flag on | |
930 | + * newly created directory and file only if -o extent mount option is | |
931 | + * specified | |
932 | - * Don't inherit extent flag from directory, amongst others. We set | |
933 | - * extent flag on newly created directory and file only if -o extent | |
934 | - * mount option is specified | |
935 | */ | |
936 | + ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL); | |
937 | + if (S_ISLNK(mode)) | |
938 | + ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); | |
939 | + /* dirsync only applies to directories */ | |
940 | + if (!S_ISDIR(mode)) | |
941 | + ei->i_flags &= ~EXT4_DIRSYNC_FL; | |
942 | - ei->i_flags = | |
943 | - ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); | |
944 | ei->i_file_acl = 0; | |
945 | ei->i_dtime = 0; | |
946 | ei->i_block_alloc_info = NULL; | |
947 | reverted: | |
948 | --- b/fs/ext4/inode.c | |
949 | +++ a/fs/ext4/inode.c | |
950 | @@ -46,10 +46,8 @@ | |
951 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | |
952 | loff_t new_size) | |
953 | { | |
954 | + return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, | |
955 | + new_size); | |
956 | - return jbd2_journal_begin_ordered_truncate( | |
957 | - EXT4_SB(inode->i_sb)->s_journal, | |
958 | - &EXT4_I(inode)->jinode, | |
959 | - new_size); | |
960 | } | |
961 | ||
962 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | |
963 | @@ -353,9 +351,9 @@ | |
964 | final = ptrs; | |
965 | } else { | |
966 | ext4_warning(inode->i_sb, "ext4_block_to_path", | |
967 | + "block %lu > max", | |
968 | - "block %lu > max in inode %lu", | |
969 | i_block + direct_blocks + | |
970 | + indirect_blocks + double_blocks); | |
971 | - indirect_blocks + double_blocks, inode->i_ino); | |
972 | } | |
973 | if (boundary) | |
974 | *boundary = final - 1 - (i_block & (ptrs - 1)); | |
975 | @@ -1046,14 +1044,6 @@ | |
976 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | |
977 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | |
978 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | |
979 | - | |
980 | - /* | |
981 | - * If we have done all the pending block allocations and if | |
982 | - * there aren't any writers on the inode, we can discard the | |
983 | - * inode's preallocations. | |
984 | - */ | |
985 | - if (!total && (atomic_read(&inode->i_writecount) == 0)) | |
986 | - ext4_discard_reservation(inode); | |
987 | } | |
988 | ||
989 | /* | |
990 | @@ -1085,7 +1075,6 @@ | |
991 | int retval; | |
992 | ||
993 | clear_buffer_mapped(bh); | |
994 | - clear_buffer_unwritten(bh); | |
995 | ||
996 | /* | |
997 | * Try to see if we can get the block without requesting | |
998 | @@ -1116,18 +1105,6 @@ | |
999 | return retval; | |
1000 | ||
1001 | /* | |
1002 | - * When we call get_blocks without the create flag, the | |
1003 | - * BH_Unwritten flag could have gotten set if the blocks | |
1004 | - * requested were part of a uninitialized extent. We need to | |
1005 | - * clear this flag now that we are committed to convert all or | |
1006 | - * part of the uninitialized extent to be an initialized | |
1007 | - * extent. This is because we need to avoid the combination | |
1008 | - * of BH_Unwritten and BH_Mapped flags being simultaneously | |
1009 | - * set on the buffer_head. | |
1010 | - */ | |
1011 | - clear_buffer_unwritten(bh); | |
1012 | - | |
1013 | - /* | |
1014 | * New blocks allocate and/or writing to uninitialized extent | |
1015 | * will possibly result in updating i_data, so we take | |
1016 | * the write lock of i_data_sem, and call get_blocks() | |
1017 | @@ -1393,10 +1370,6 @@ | |
1018 | goto out; | |
1019 | } | |
1020 | ||
1021 | - /* We cannot recurse into the filesystem as the transaction is already | |
1022 | - * started */ | |
1023 | - flags |= AOP_FLAG_NOFS; | |
1024 | - | |
1025 | page = grab_cache_page_write_begin(mapping, index, flags); | |
1026 | if (!page) { | |
1027 | ext4_journal_stop(handle); | |
1028 | @@ -1406,7 +1379,7 @@ | |
1029 | *pagep = page; | |
1030 | ||
1031 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | |
1032 | + ext4_get_block); | |
1033 | - ext4_get_block); | |
1034 | ||
1035 | if (!ret && ext4_should_journal_data(inode)) { | |
1036 | ret = walk_page_buffers(handle, page_buffers(page), | |
1037 | @@ -1675,25 +1648,18 @@ | |
1038 | */ | |
1039 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | |
1040 | { | |
1041 | + struct address_space *mapping = mpd->inode->i_mapping; | |
1042 | + int ret = 0, err, nr_pages, i; | |
1043 | + unsigned long index, end; | |
1044 | - long pages_skipped; | |
1045 | struct pagevec pvec; | |
1046 | - unsigned long index, end; | |
1047 | - int ret = 0, err, nr_pages, i; | |
1048 | - struct inode *inode = mpd->inode; | |
1049 | - struct address_space *mapping = inode->i_mapping; | |
1050 | ||
1051 | BUG_ON(mpd->next_page <= mpd->first_page); | |
1052 | + pagevec_init(&pvec, 0); | |
1053 | - /* | |
1054 | - * We need to start from the first_page to the next_page - 1 | |
1055 | - * to make sure we also write the mapped dirty buffer_heads. | |
1056 | - * If we look at mpd->lbh.b_blocknr we would only be looking | |
1057 | - * at the currently mapped buffer_heads. | |
1058 | - */ | |
1059 | index = mpd->first_page; | |
1060 | end = mpd->next_page - 1; | |
1061 | ||
1062 | - pagevec_init(&pvec, 0); | |
1063 | while (index <= end) { | |
1064 | + /* XXX: optimize tail */ | |
1065 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | |
1066 | if (nr_pages == 0) | |
1067 | break; | |
1068 | @@ -1705,10 +1671,6 @@ | |
1069 | break; | |
1070 | index++; | |
1071 | ||
1072 | - BUG_ON(!PageLocked(page)); | |
1073 | - BUG_ON(PageWriteback(page)); | |
1074 | - | |
1075 | - pages_skipped = mpd->wbc->pages_skipped; | |
1076 | err = mapping->a_ops->writepage(page, mpd->wbc); | |
1077 | if (!err) | |
1078 | mpd->pages_written++; | |
1079 | @@ -2029,29 +1991,11 @@ | |
1080 | bh = head; | |
1081 | do { | |
1082 | BUG_ON(buffer_locked(bh)); | |
1083 | - /* | |
1084 | - * We need to try to allocate | |
1085 | - * unmapped blocks in the same page. | |
1086 | - * Otherwise we won't make progress | |
1087 | - * with the page in ext4_da_writepage | |
1088 | - */ | |
1089 | if (buffer_dirty(bh) && | |
1090 | (!buffer_mapped(bh) || buffer_delay(bh))) { | |
1091 | mpage_add_bh_to_extent(mpd, logical, bh); | |
1092 | if (mpd->io_done) | |
1093 | return MPAGE_DA_EXTENT_TAIL; | |
1094 | - } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | |
1095 | - /* | |
1096 | - * mapped dirty buffer. We need to update | |
1097 | - * the b_state because we look at | |
1098 | - * b_state in mpage_da_map_blocks. We don't | |
1099 | - * update b_size because if we find an | |
1100 | - * unmapped buffer_head later we need to | |
1101 | - * use the b_state flag of that buffer_head. | |
1102 | - */ | |
1103 | - if (mpd->lbh.b_size == 0) | |
1104 | - mpd->lbh.b_state = | |
1105 | - bh->b_state & BH_FLAGS; | |
1106 | } | |
1107 | logical++; | |
1108 | } while ((bh = bh->b_this_page) != head); | |
1109 | @@ -2118,10 +2062,6 @@ | |
1110 | struct buffer_head *bh_result, int create) | |
1111 | { | |
1112 | int ret = 0; | |
1113 | - sector_t invalid_block = ~((sector_t) 0xffff); | |
1114 | - | |
1115 | - if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | |
1116 | - invalid_block = ~0; | |
1117 | ||
1118 | BUG_ON(create == 0); | |
1119 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | |
1120 | @@ -2143,18 +2083,11 @@ | |
1121 | /* not enough space to reserve */ | |
1122 | return ret; | |
1123 | ||
1124 | + map_bh(bh_result, inode->i_sb, 0); | |
1125 | - map_bh(bh_result, inode->i_sb, invalid_block); | |
1126 | set_buffer_new(bh_result); | |
1127 | set_buffer_delay(bh_result); | |
1128 | } else if (ret > 0) { | |
1129 | bh_result->b_size = (ret << inode->i_blkbits); | |
1130 | - /* | |
1131 | - * With sub-block writes into unwritten extents | |
1132 | - * we also need to mark the buffer as new so that | |
1133 | - * the unwritten parts of the buffer gets correctly zeroed. | |
1134 | - */ | |
1135 | - if (buffer_unwritten(bh_result)) | |
1136 | - set_buffer_new(bh_result); | |
1137 | ret = 0; | |
1138 | } | |
1139 | ||
1140 | @@ -2365,20 +2298,6 @@ | |
1141 | */ | |
1142 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | |
1143 | return 0; | |
1144 | - | |
1145 | - /* | |
1146 | - * If the filesystem has aborted, it is read-only, so return | |
1147 | - * right away instead of dumping stack traces later on that | |
1148 | - * will obscure the real source of the problem. We test | |
1149 | - * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because | |
1150 | - * the latter could be true if the filesystem is mounted | |
1151 | - * read-only, and in that case, ext4_da_writepages should | |
1152 | - * *never* be called, so if that ever happens, we would want | |
1153 | - * the stack trace. | |
1154 | - */ | |
1155 | - if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT)) | |
1156 | - return -EROFS; | |
1157 | - | |
1158 | /* | |
1159 | * Make sure nr_to_write is >= sbi->s_mb_stream_request | |
1160 | * This make sure small files blocks are allocated in | |
1161 | @@ -2417,7 +2336,7 @@ | |
1162 | handle = ext4_journal_start(inode, needed_blocks); | |
1163 | if (IS_ERR(handle)) { | |
1164 | ret = PTR_ERR(handle); | |
1165 | + printk(KERN_EMERG "%s: jbd2_start: " | |
1166 | - printk(KERN_CRIT "%s: jbd2_start: " | |
1167 | "%ld pages, ino %lu; err %d\n", __func__, | |
1168 | wbc->nr_to_write, inode->i_ino, ret); | |
1169 | dump_stack(); | |
1170 | @@ -2501,9 +2420,6 @@ | |
1171 | ret = PTR_ERR(handle); | |
1172 | goto out; | |
1173 | } | |
1174 | - /* We cannot recurse into the filesystem as the transaction is already | |
1175 | - * started */ | |
1176 | - flags |= AOP_FLAG_NOFS; | |
1177 | ||
1178 | page = grab_cache_page_write_begin(mapping, index, flags); | |
1179 | if (!page) { | |
1180 | @@ -2617,48 +2533,6 @@ | |
1181 | return; | |
1182 | } | |
1183 | ||
1184 | -/* | |
1185 | - * Force all delayed allocation blocks to be allocated for a given inode. | |
1186 | - */ | |
1187 | -int ext4_alloc_da_blocks(struct inode *inode) | |
1188 | -{ | |
1189 | - if (!EXT4_I(inode)->i_reserved_data_blocks && | |
1190 | - !EXT4_I(inode)->i_reserved_meta_blocks) | |
1191 | - return 0; | |
1192 | - | |
1193 | - /* | |
1194 | - * We do something simple for now. The filemap_flush() will | |
1195 | - * also start triggering a write of the data blocks, which is | |
1196 | - * not strictly speaking necessary (and for users of | |
1197 | - * laptop_mode, not even desirable). However, to do otherwise | |
1198 | - * would require replicating code paths in: | |
1199 | - * | |
1200 | - * ext4_da_writepages() -> | |
1201 | - * write_cache_pages() ---> (via passed in callback function) | |
1202 | - * __mpage_da_writepage() --> | |
1203 | - * mpage_add_bh_to_extent() | |
1204 | - * mpage_da_map_blocks() | |
1205 | - * | |
1206 | - * The problem is that write_cache_pages(), located in | |
1207 | - * mm/page-writeback.c, marks pages clean in preparation for | |
1208 | - * doing I/O, which is not desirable if we're not planning on | |
1209 | - * doing I/O at all. | |
1210 | - * | |
1211 | - * We could call write_cache_pages(), and then redirty all of | |
1212 | - * the pages by calling redirty_page_for_writeback() but that | |
1213 | - * would be ugly in the extreme. So instead we would need to | |
1214 | - * replicate parts of the code in the above functions, | |
1215 | - * simplifying them becuase we wouldn't actually intend to | |
1216 | - * write out the pages, but rather only collect contiguous | |
1217 | - * logical block extents, call the multi-block allocator, and | |
1218 | - * then update the buffer heads with the block allocations. | |
1219 | - * | |
1220 | - * For now, though, we'll cheat by calling filemap_flush(), | |
1221 | - * which will map the blocks, and start the I/O, but not | |
1222 | - * actually wait for the I/O to complete. | |
1223 | - */ | |
1224 | - return filemap_flush(inode->i_mapping); | |
1225 | -} | |
1226 | ||
1227 | /* | |
1228 | * bmap() is special. It gets used by applications such as lilo and by | |
1229 | @@ -3668,9 +3542,6 @@ | |
1230 | if (!ext4_can_truncate(inode)) | |
1231 | return; | |
1232 | ||
1233 | - if (inode->i_size == 0) | |
1234 | - ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; | |
1235 | - | |
1236 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | |
1237 | ext4_ext_truncate(inode); | |
1238 | return; | |
1239 | @@ -4088,9 +3959,11 @@ | |
1240 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | |
1241 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); | |
1242 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | |
1243 | + if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | |
1244 | + cpu_to_le32(EXT4_OS_HURD)) { | |
1245 | - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) | |
1246 | ei->i_file_acl |= | |
1247 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | |
1248 | + } | |
1249 | inode->i_size = ext4_isize(raw_inode); | |
1250 | ei->i_disksize = inode->i_size; | |
1251 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | |
1252 | @@ -4137,18 +4010,6 @@ | |
1253 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | |
1254 | } | |
1255 | ||
1256 | - if (ei->i_file_acl && | |
1257 | - ((ei->i_file_acl < | |
1258 | - (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | |
1259 | - EXT4_SB(sb)->s_gdb_count)) || | |
1260 | - (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { | |
1261 | - ext4_error(sb, __func__, | |
1262 | - "bad extended attribute block %llu in inode #%lu", | |
1263 | - ei->i_file_acl, inode->i_ino); | |
1264 | - ret = -EIO; | |
1265 | - goto bad_inode; | |
1266 | - } | |
1267 | - | |
1268 | if (S_ISREG(inode->i_mode)) { | |
1269 | inode->i_op = &ext4_file_inode_operations; | |
1270 | inode->i_fop = &ext4_file_operations; | |
1271 | @@ -4163,8 +4024,7 @@ | |
1272 | inode->i_op = &ext4_symlink_inode_operations; | |
1273 | ext4_set_aops(inode); | |
1274 | } | |
1275 | + } else { | |
1276 | - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || | |
1277 | - S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | |
1278 | inode->i_op = &ext4_special_inode_operations; | |
1279 | if (raw_inode->i_block[0]) | |
1280 | init_special_inode(inode, inode->i_mode, | |
1281 | @@ -4172,13 +4032,6 @@ | |
1282 | else | |
1283 | init_special_inode(inode, inode->i_mode, | |
1284 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | |
1285 | - } else { | |
1286 | - brelse(bh); | |
1287 | - ret = -EIO; | |
1288 | - ext4_error(inode->i_sb, __func__, | |
1289 | - "bogus i_mode (%o) for inode=%lu", | |
1290 | - inode->i_mode, inode->i_ino); | |
1291 | - goto bad_inode; | |
1292 | } | |
1293 | brelse (iloc.bh); | |
1294 | ext4_set_inode_flags(inode); | |
1295 | @@ -4956,9 +4809,8 @@ | |
1296 | return !buffer_mapped(bh); | |
1297 | } | |
1298 | ||
1299 | +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |
1300 | -int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |
1301 | { | |
1302 | - struct page *page = vmf->page; | |
1303 | loff_t size; | |
1304 | unsigned long len; | |
1305 | int ret = -EINVAL; | |
1306 | @@ -5009,8 +4861,6 @@ | |
1307 | goto out_unlock; | |
1308 | ret = 0; | |
1309 | out_unlock: | |
1310 | - if (ret) | |
1311 | - ret = VM_FAULT_SIGBUS; | |
1312 | up_read(&inode->i_alloc_sem); | |
1313 | return ret; | |
1314 | } | |
1315 | reverted: | |
1316 | --- b/fs/ext4/ioctl.c | |
1317 | +++ a/fs/ext4/ioctl.c | |
1318 | @@ -49,7 +49,8 @@ | |
1319 | if (err) | |
1320 | return err; | |
1321 | ||
1322 | + if (!S_ISDIR(inode->i_mode)) | |
1323 | + flags &= ~EXT4_DIRSYNC_FL; | |
1324 | - flags = ext4_mask_flags(inode->i_mode, flags); | |
1325 | ||
1326 | err = -EPERM; | |
1327 | mutex_lock(&inode->i_mutex); | |
1328 | @@ -287,20 +288,6 @@ | |
1329 | return err; | |
1330 | } | |
1331 | ||
1332 | - case EXT4_IOC_ALLOC_DA_BLKS: | |
1333 | - { | |
1334 | - int err; | |
1335 | - if (!is_owner_or_cap(inode)) | |
1336 | - return -EACCES; | |
1337 | - | |
1338 | - err = mnt_want_write(filp->f_path.mnt); | |
1339 | - if (err) | |
1340 | - return err; | |
1341 | - err = ext4_alloc_da_blocks(inode); | |
1342 | - mnt_drop_write(filp->f_path.mnt); | |
1343 | - return err; | |
1344 | - } | |
1345 | - | |
1346 | default: | |
1347 | return -ENOTTY; | |
1348 | } | |
1349 | reverted: | |
1350 | --- b/fs/ext4/mballoc.c | |
1351 | +++ a/fs/ext4/mballoc.c | |
1352 | @@ -100,7 +100,7 @@ | |
1353 | * inode as: | |
1354 | * | |
1355 | * { page } | |
1356 | + * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | |
1357 | - * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... | |
1358 | * | |
1359 | * | |
1360 | * one block each for bitmap and buddy information. So for each group we | |
1361 | @@ -330,18 +330,6 @@ | |
1362 | * object | |
1363 | * | |
1364 | */ | |
1365 | -static struct kmem_cache *ext4_pspace_cachep; | |
1366 | -static struct kmem_cache *ext4_ac_cachep; | |
1367 | -static struct kmem_cache *ext4_free_ext_cachep; | |
1368 | -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |
1369 | - ext4_group_t group); | |
1370 | -static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |
1371 | - ext4_group_t group); | |
1372 | -static int ext4_mb_init_per_dev_proc(struct super_block *sb); | |
1373 | -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | |
1374 | -static void ext4_mb_free_committed_blocks(struct super_block *); | |
1375 | -static void ext4_mb_poll_new_transaction(struct super_block *sb, | |
1376 | - handle_t *handle); | |
1377 | ||
1378 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | |
1379 | { | |
1380 | @@ -730,7 +718,7 @@ | |
1381 | * stored in the inode as | |
1382 | * | |
1383 | * { page } | |
1384 | + * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | |
1385 | - * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... | |
1386 | * | |
1387 | * | |
1388 | * one block each for bitmap and buddy information. | |
1389 | @@ -796,42 +784,20 @@ | |
1390 | if (bh[i] == NULL) | |
1391 | goto out; | |
1392 | ||
1393 | + if (bh_uptodate_or_lock(bh[i])) | |
1394 | - if (bitmap_uptodate(bh[i])) | |
1395 | continue; | |
1396 | ||
1397 | - lock_buffer(bh[i]); | |
1398 | - if (bitmap_uptodate(bh[i])) { | |
1399 | - unlock_buffer(bh[i]); | |
1400 | - continue; | |
1401 | - } | |
1402 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | |
1403 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | |
1404 | ext4_init_block_bitmap(sb, bh[i], | |
1405 | first_group + i, desc); | |
1406 | - set_bitmap_uptodate(bh[i]); | |
1407 | set_buffer_uptodate(bh[i]); | |
1408 | unlock_buffer(bh[i]); | |
1409 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | |
1410 | continue; | |
1411 | } | |
1412 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | |
1413 | - if (buffer_uptodate(bh[i])) { | |
1414 | - /* | |
1415 | - * if not uninit if bh is uptodate, | |
1416 | - * bitmap is also uptodate | |
1417 | - */ | |
1418 | - set_bitmap_uptodate(bh[i]); | |
1419 | - unlock_buffer(bh[i]); | |
1420 | - continue; | |
1421 | - } | |
1422 | get_bh(bh[i]); | |
1423 | - /* | |
1424 | - * submit the buffer_head for read. We can | |
1425 | - * safely mark the bitmap as uptodate now. | |
1426 | - * We do it here so the bitmap uptodate bit | |
1427 | - * get set with buffer lock held. | |
1428 | - */ | |
1429 | - set_bitmap_uptodate(bh[i]); | |
1430 | bh[i]->b_end_io = end_buffer_read_sync; | |
1431 | submit_bh(READ, bh[i]); | |
1432 | mb_debug("read bitmap for group %lu\n", first_group + i); | |
1433 | @@ -848,8 +814,6 @@ | |
1434 | ||
1435 | err = 0; | |
1436 | first_block = page->index * blocks_per_page; | |
1437 | - /* init the page */ | |
1438 | - memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | |
1439 | for (i = 0; i < blocks_per_page; i++) { | |
1440 | int group; | |
1441 | struct ext4_group_info *grinfo; | |
1442 | @@ -876,6 +840,7 @@ | |
1443 | BUG_ON(incore == NULL); | |
1444 | mb_debug("put buddy for group %u in page %lu/%x\n", | |
1445 | group, page->index, i * blocksize); | |
1446 | + memset(data, 0xff, blocksize); | |
1447 | grinfo = ext4_get_group_info(sb, group); | |
1448 | grinfo->bb_fragments = 0; | |
1449 | memset(grinfo->bb_counters, 0, | |
1450 | @@ -883,9 +848,7 @@ | |
1451 | /* | |
1452 | * incore got set to the group block bitmap below | |
1453 | */ | |
1454 | - ext4_lock_group(sb, group); | |
1455 | ext4_mb_generate_buddy(sb, data, incore, group); | |
1456 | - ext4_unlock_group(sb, group); | |
1457 | incore = NULL; | |
1458 | } else { | |
1459 | /* this is block of bitmap */ | |
1460 | @@ -899,7 +862,6 @@ | |
1461 | ||
1462 | /* mark all preallocated blks used in in-core bitmap */ | |
1463 | ext4_mb_generate_from_pa(sb, data, group); | |
1464 | - ext4_mb_generate_from_freelist(sb, data, group); | |
1465 | ext4_unlock_group(sb, group); | |
1466 | ||
1467 | /* set incore so that the buddy information can be | |
1468 | @@ -924,20 +886,18 @@ | |
1469 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |
1470 | struct ext4_buddy *e4b) | |
1471 | { | |
1472 | + struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1473 | + struct inode *inode = sbi->s_buddy_cache; | |
1474 | int blocks_per_page; | |
1475 | int block; | |
1476 | int pnum; | |
1477 | int poff; | |
1478 | struct page *page; | |
1479 | int ret; | |
1480 | - struct ext4_group_info *grp; | |
1481 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1482 | - struct inode *inode = sbi->s_buddy_cache; | |
1483 | ||
1484 | mb_debug("load group %lu\n", group); | |
1485 | ||
1486 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
1487 | - grp = ext4_get_group_info(sb, group); | |
1488 | ||
1489 | e4b->bd_blkbits = sb->s_blocksize_bits; | |
1490 | e4b->bd_info = ext4_get_group_info(sb, group); | |
1491 | @@ -945,15 +905,6 @@ | |
1492 | e4b->bd_group = group; | |
1493 | e4b->bd_buddy_page = NULL; | |
1494 | e4b->bd_bitmap_page = NULL; | |
1495 | - e4b->alloc_semp = &grp->alloc_sem; | |
1496 | - | |
1497 | - /* Take the read lock on the group alloc | |
1498 | - * sem. This would make sure a parallel | |
1499 | - * ext4_mb_init_group happening on other | |
1500 | - * groups mapped by the page is blocked | |
1501 | - * till we are done with allocation | |
1502 | - */ | |
1503 | - down_read(e4b->alloc_semp); | |
1504 | ||
1505 | /* | |
1506 | * the buddy cache inode stores the block bitmap | |
1507 | @@ -969,14 +920,6 @@ | |
1508 | page = find_get_page(inode->i_mapping, pnum); | |
1509 | if (page == NULL || !PageUptodate(page)) { | |
1510 | if (page) | |
1511 | - /* | |
1512 | - * drop the page reference and try | |
1513 | - * to get the page with lock. If we | |
1514 | - * are not uptodate that implies | |
1515 | - * somebody just created the page but | |
1516 | - * is yet to initialize the same. So | |
1517 | - * wait for it to initialize. | |
1518 | - */ | |
1519 | page_cache_release(page); | |
1520 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
1521 | if (page) { | |
1522 | @@ -1042,9 +985,6 @@ | |
1523 | page_cache_release(e4b->bd_buddy_page); | |
1524 | e4b->bd_buddy = NULL; | |
1525 | e4b->bd_bitmap = NULL; | |
1526 | - | |
1527 | - /* Done with the buddy cache */ | |
1528 | - up_read(e4b->alloc_semp); | |
1529 | return ret; | |
1530 | } | |
1531 | ||
1532 | @@ -1054,9 +994,6 @@ | |
1533 | page_cache_release(e4b->bd_bitmap_page); | |
1534 | if (e4b->bd_buddy_page) | |
1535 | page_cache_release(e4b->bd_buddy_page); | |
1536 | - /* Done with the buddy cache */ | |
1537 | - if (e4b->alloc_semp) | |
1538 | - up_read(e4b->alloc_semp); | |
1539 | } | |
1540 | ||
1541 | ||
1542 | @@ -1094,10 +1031,7 @@ | |
1543 | cur += 32; | |
1544 | continue; | |
1545 | } | |
1546 | + mb_clear_bit_atomic(lock, cur, bm); | |
1547 | - if (lock) | |
1548 | - mb_clear_bit_atomic(lock, cur, bm); | |
1549 | - else | |
1550 | - mb_clear_bit(cur, bm); | |
1551 | cur++; | |
1552 | } | |
1553 | } | |
1554 | @@ -1115,10 +1049,7 @@ | |
1555 | cur += 32; | |
1556 | continue; | |
1557 | } | |
1558 | + mb_set_bit_atomic(lock, cur, bm); | |
1559 | - if (lock) | |
1560 | - mb_set_bit_atomic(lock, cur, bm); | |
1561 | - else | |
1562 | - mb_set_bit(cur, bm); | |
1563 | cur++; | |
1564 | } | |
1565 | } | |
1566 | @@ -1365,20 +1296,13 @@ | |
1567 | ac->ac_tail = ret & 0xffff; | |
1568 | ac->ac_buddy = ret >> 16; | |
1569 | ||
1570 | + /* XXXXXXX: SUCH A HORRIBLE **CK */ | |
1571 | + /*FIXME!! Why ? */ | |
1572 | - /* | |
1573 | - * take the page reference. We want the page to be pinned | |
1574 | - * so that we don't get a ext4_mb_init_cache_call for this | |
1575 | - * group until we update the bitmap. That would mean we | |
1576 | - * double allocate blocks. The reference is dropped | |
1577 | - * in ext4_mb_release_context | |
1578 | - */ | |
1579 | ac->ac_bitmap_page = e4b->bd_bitmap_page; | |
1580 | get_page(ac->ac_bitmap_page); | |
1581 | ac->ac_buddy_page = e4b->bd_buddy_page; | |
1582 | get_page(ac->ac_buddy_page); | |
1583 | + | |
1584 | - /* on allocation we use ac to track the held semaphore */ | |
1585 | - ac->alloc_semp = e4b->alloc_semp; | |
1586 | - e4b->alloc_semp = NULL; | |
1587 | /* store last allocated for subsequent stream allocation */ | |
1588 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | |
1589 | spin_lock(&sbi->s_md_lock); | |
1590 | @@ -1402,8 +1326,6 @@ | |
1591 | struct ext4_free_extent ex; | |
1592 | int max; | |
1593 | ||
1594 | - if (ac->ac_status == AC_STATUS_FOUND) | |
1595 | - return; | |
1596 | /* | |
1597 | * We don't want to scan for a whole year | |
1598 | */ | |
1599 | @@ -1450,7 +1372,7 @@ | |
1600 | struct ext4_free_extent *gex = &ac->ac_g_ex; | |
1601 | ||
1602 | BUG_ON(ex->fe_len <= 0); | |
1603 | + BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | |
1604 | - BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | |
1605 | BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | |
1606 | BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); | |
1607 | ||
1608 | @@ -1770,173 +1692,6 @@ | |
1609 | return 0; | |
1610 | } | |
1611 | ||
1612 | -/* | |
1613 | - * lock the group_info alloc_sem of all the groups | |
1614 | - * belonging to the same buddy cache page. This | |
1615 | - * make sure other parallel operation on the buddy | |
1616 | - * cache doesn't happen whild holding the buddy cache | |
1617 | - * lock | |
1618 | - */ | |
1619 | -int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | |
1620 | -{ | |
1621 | - int i; | |
1622 | - int block, pnum; | |
1623 | - int blocks_per_page; | |
1624 | - int groups_per_page; | |
1625 | - ext4_group_t first_group; | |
1626 | - struct ext4_group_info *grp; | |
1627 | - | |
1628 | - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
1629 | - /* | |
1630 | - * the buddy cache inode stores the block bitmap | |
1631 | - * and buddy information in consecutive blocks. | |
1632 | - * So for each group we need two blocks. | |
1633 | - */ | |
1634 | - block = group * 2; | |
1635 | - pnum = block / blocks_per_page; | |
1636 | - first_group = pnum * blocks_per_page / 2; | |
1637 | - | |
1638 | - groups_per_page = blocks_per_page >> 1; | |
1639 | - if (groups_per_page == 0) | |
1640 | - groups_per_page = 1; | |
1641 | - /* read all groups the page covers into the cache */ | |
1642 | - for (i = 0; i < groups_per_page; i++) { | |
1643 | - | |
1644 | - if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | |
1645 | - break; | |
1646 | - grp = ext4_get_group_info(sb, first_group + i); | |
1647 | - /* take all groups write allocation | |
1648 | - * semaphore. This make sure there is | |
1649 | - * no block allocation going on in any | |
1650 | - * of that groups | |
1651 | - */ | |
1652 | - down_write(&grp->alloc_sem); | |
1653 | - } | |
1654 | - return i; | |
1655 | -} | |
1656 | - | |
1657 | -void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |
1658 | - ext4_group_t group, int locked_group) | |
1659 | -{ | |
1660 | - int i; | |
1661 | - int block, pnum; | |
1662 | - int blocks_per_page; | |
1663 | - ext4_group_t first_group; | |
1664 | - struct ext4_group_info *grp; | |
1665 | - | |
1666 | - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
1667 | - /* | |
1668 | - * the buddy cache inode stores the block bitmap | |
1669 | - * and buddy information in consecutive blocks. | |
1670 | - * So for each group we need two blocks. | |
1671 | - */ | |
1672 | - block = group * 2; | |
1673 | - pnum = block / blocks_per_page; | |
1674 | - first_group = pnum * blocks_per_page / 2; | |
1675 | - /* release locks on all the groups */ | |
1676 | - for (i = 0; i < locked_group; i++) { | |
1677 | - | |
1678 | - grp = ext4_get_group_info(sb, first_group + i); | |
1679 | - /* take all groups write allocation | |
1680 | - * semaphore. This make sure there is | |
1681 | - * no block allocation going on in any | |
1682 | - * of that groups | |
1683 | - */ | |
1684 | - up_write(&grp->alloc_sem); | |
1685 | - } | |
1686 | - | |
1687 | -} | |
1688 | - | |
1689 | -static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | |
1690 | -{ | |
1691 | - | |
1692 | - int ret; | |
1693 | - void *bitmap; | |
1694 | - int blocks_per_page; | |
1695 | - int block, pnum, poff; | |
1696 | - int num_grp_locked = 0; | |
1697 | - struct ext4_group_info *this_grp; | |
1698 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1699 | - struct inode *inode = sbi->s_buddy_cache; | |
1700 | - struct page *page = NULL, *bitmap_page = NULL; | |
1701 | - | |
1702 | - mb_debug("init group %lu\n", group); | |
1703 | - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
1704 | - this_grp = ext4_get_group_info(sb, group); | |
1705 | - /* | |
1706 | - * This ensures we don't add group | |
1707 | - * to this buddy cache via resize | |
1708 | - */ | |
1709 | - num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | |
1710 | - if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | |
1711 | - /* | |
1712 | - * somebody initialized the group | |
1713 | - * return without doing anything | |
1714 | - */ | |
1715 | - ret = 0; | |
1716 | - goto err; | |
1717 | - } | |
1718 | - /* | |
1719 | - * the buddy cache inode stores the block bitmap | |
1720 | - * and buddy information in consecutive blocks. | |
1721 | - * So for each group we need two blocks. | |
1722 | - */ | |
1723 | - block = group * 2; | |
1724 | - pnum = block / blocks_per_page; | |
1725 | - poff = block % blocks_per_page; | |
1726 | - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
1727 | - if (page) { | |
1728 | - BUG_ON(page->mapping != inode->i_mapping); | |
1729 | - ret = ext4_mb_init_cache(page, NULL); | |
1730 | - if (ret) { | |
1731 | - unlock_page(page); | |
1732 | - goto err; | |
1733 | - } | |
1734 | - unlock_page(page); | |
1735 | - } | |
1736 | - if (page == NULL || !PageUptodate(page)) { | |
1737 | - ret = -EIO; | |
1738 | - goto err; | |
1739 | - } | |
1740 | - mark_page_accessed(page); | |
1741 | - bitmap_page = page; | |
1742 | - bitmap = page_address(page) + (poff * sb->s_blocksize); | |
1743 | - | |
1744 | - /* init buddy cache */ | |
1745 | - block++; | |
1746 | - pnum = block / blocks_per_page; | |
1747 | - poff = block % blocks_per_page; | |
1748 | - page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
1749 | - if (page == bitmap_page) { | |
1750 | - /* | |
1751 | - * If both the bitmap and buddy are in | |
1752 | - * the same page we don't need to force | |
1753 | - * init the buddy | |
1754 | - */ | |
1755 | - unlock_page(page); | |
1756 | - } else if (page) { | |
1757 | - BUG_ON(page->mapping != inode->i_mapping); | |
1758 | - ret = ext4_mb_init_cache(page, bitmap); | |
1759 | - if (ret) { | |
1760 | - unlock_page(page); | |
1761 | - goto err; | |
1762 | - } | |
1763 | - unlock_page(page); | |
1764 | - } | |
1765 | - if (page == NULL || !PageUptodate(page)) { | |
1766 | - ret = -EIO; | |
1767 | - goto err; | |
1768 | - } | |
1769 | - mark_page_accessed(page); | |
1770 | -err: | |
1771 | - ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | |
1772 | - if (bitmap_page) | |
1773 | - page_cache_release(bitmap_page); | |
1774 | - if (page) | |
1775 | - page_cache_release(page); | |
1776 | - return ret; | |
1777 | -} | |
1778 | - | |
1779 | static noinline_for_stack int | |
1780 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |
1781 | { | |
1782 | @@ -2020,7 +1775,7 @@ | |
1783 | group = 0; | |
1784 | ||
1785 | /* quick check to skip empty groups */ | |
1786 | + grp = ext4_get_group_info(ac->ac_sb, group); | |
1787 | - grp = ext4_get_group_info(sb, group); | |
1788 | if (grp->bb_free == 0) | |
1789 | continue; | |
1790 | ||
1791 | @@ -2033,9 +1788,10 @@ | |
1792 | * we need full data about the group | |
1793 | * to make a good selection | |
1794 | */ | |
1795 | + err = ext4_mb_load_buddy(sb, group, &e4b); | |
1796 | - err = ext4_mb_init_group(sb, group); | |
1797 | if (err) | |
1798 | goto out; | |
1799 | + ext4_mb_release_desc(&e4b); | |
1800 | } | |
1801 | ||
1802 | /* | |
1803 | @@ -2543,8 +2299,6 @@ | |
1804 | } | |
1805 | ||
1806 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | |
1807 | - init_rwsem(&meta_group_info[i]->alloc_sem); | |
1808 | - meta_group_info[i]->bb_free_root.rb_node = NULL;; | |
1809 | ||
1810 | #ifdef DOUBLE_CHECK | |
1811 | { | |
1812 | @@ -2571,6 +2325,54 @@ | |
1813 | } /* ext4_mb_add_groupinfo */ | |
1814 | ||
1815 | /* | |
1816 | + * Add a group to the existing groups. | |
1817 | + * This function is used for online resize | |
1818 | + */ | |
1819 | +int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | |
1820 | + struct ext4_group_desc *desc) | |
1821 | +{ | |
1822 | + struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1823 | + struct inode *inode = sbi->s_buddy_cache; | |
1824 | + int blocks_per_page; | |
1825 | + int block; | |
1826 | + int pnum; | |
1827 | + struct page *page; | |
1828 | + int err; | |
1829 | + | |
1830 | + /* Add group based on group descriptor*/ | |
1831 | + err = ext4_mb_add_groupinfo(sb, group, desc); | |
1832 | + if (err) | |
1833 | + return err; | |
1834 | + | |
1835 | + /* | |
1836 | + * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | |
1837 | + * datas) are set not up to date so that they will be re-initilaized | |
1838 | + * during the next call to ext4_mb_load_buddy | |
1839 | + */ | |
1840 | + | |
1841 | + /* Set buddy page as not up to date */ | |
1842 | + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
1843 | + block = group * 2; | |
1844 | + pnum = block / blocks_per_page; | |
1845 | + page = find_get_page(inode->i_mapping, pnum); | |
1846 | + if (page != NULL) { | |
1847 | + ClearPageUptodate(page); | |
1848 | + page_cache_release(page); | |
1849 | + } | |
1850 | + | |
1851 | + /* Set bitmap page as not up to date */ | |
1852 | + block++; | |
1853 | + pnum = block / blocks_per_page; | |
1854 | + page = find_get_page(inode->i_mapping, pnum); | |
1855 | + if (page != NULL) { | |
1856 | + ClearPageUptodate(page); | |
1857 | + page_cache_release(page); | |
1858 | + } | |
1859 | + | |
1860 | + return 0; | |
1861 | +} | |
1862 | + | |
1863 | +/* | |
1864 | * Update an existing group. | |
1865 | * This function is used for online resize | |
1866 | */ | |
1867 | @@ -2693,12 +2495,10 @@ | |
1868 | clear_opt(sbi->s_mount_opt, MBALLOC); | |
1869 | return -ENOMEM; | |
1870 | } | |
1871 | - | |
1872 | - i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | |
1873 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | |
1874 | if (sbi->s_mb_maxs == NULL) { | |
1875 | clear_opt(sbi->s_mount_opt, MBALLOC); | |
1876 | + kfree(sbi->s_mb_maxs); | |
1877 | - kfree(sbi->s_mb_offsets); | |
1878 | return -ENOMEM; | |
1879 | } | |
1880 | ||
1881 | @@ -2858,11 +2658,13 @@ | |
1882 | static noinline_for_stack void | |
1883 | ext4_mb_free_committed_blocks(struct super_block *sb) | |
1884 | { | |
1885 | + struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1886 | + int err; | |
1887 | + int i; | |
1888 | + int count = 0; | |
1889 | + int count2 = 0; | |
1890 | + struct ext4_free_metadata *md; | |
1891 | struct ext4_buddy e4b; | |
1892 | - struct ext4_group_info *db; | |
1893 | - struct ext4_sb_info *sbi = EXT4_SB(sb); | |
1894 | - int err, count = 0, count2 = 0; | |
1895 | - struct ext4_free_data *entry; | |
1896 | ||
1897 | if (list_empty(&sbi->s_committed_transaction)) | |
1898 | return; | |
1899 | @@ -2870,46 +2672,44 @@ | |
1900 | /* there is committed blocks to be freed yet */ | |
1901 | do { | |
1902 | /* get next array of blocks */ | |
1903 | + md = NULL; | |
1904 | - entry = NULL; | |
1905 | spin_lock(&sbi->s_md_lock); | |
1906 | if (!list_empty(&sbi->s_committed_transaction)) { | |
1907 | + md = list_entry(sbi->s_committed_transaction.next, | |
1908 | + struct ext4_free_metadata, list); | |
1909 | + list_del(&md->list); | |
1910 | - entry = list_entry(sbi->s_committed_transaction.next, | |
1911 | - struct ext4_free_data, list); | |
1912 | - list_del(&entry->list); | |
1913 | } | |
1914 | spin_unlock(&sbi->s_md_lock); | |
1915 | ||
1916 | + if (md == NULL) | |
1917 | - if (entry == NULL) | |
1918 | break; | |
1919 | ||
1920 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | |
1921 | + md->num, md->group, md); | |
1922 | - entry->count, entry->group, entry); | |
1923 | ||
1924 | + err = ext4_mb_load_buddy(sb, md->group, &e4b); | |
1925 | - err = ext4_mb_load_buddy(sb, entry->group, &e4b); | |
1926 | /* we expect to find existing buddy because it's pinned */ | |
1927 | BUG_ON(err != 0); | |
1928 | ||
1929 | - db = e4b.bd_info; | |
1930 | /* there are blocks to put in buddy to make them really free */ | |
1931 | + count += md->num; | |
1932 | - count += entry->count; | |
1933 | count2++; | |
1934 | + ext4_lock_group(sb, md->group); | |
1935 | + for (i = 0; i < md->num; i++) { | |
1936 | + mb_debug(" %u", md->blocks[i]); | |
1937 | + mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | |
1938 | - ext4_lock_group(sb, entry->group); | |
1939 | - /* Take it out of per group rb tree */ | |
1940 | - rb_erase(&entry->node, &(db->bb_free_root)); | |
1941 | - mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | |
1942 | - | |
1943 | - if (!db->bb_free_root.rb_node) { | |
1944 | - /* No more items in the per group rb tree | |
1945 | - * balance refcounts from ext4_mb_free_metadata() | |
1946 | - */ | |
1947 | - page_cache_release(e4b.bd_buddy_page); | |
1948 | - page_cache_release(e4b.bd_bitmap_page); | |
1949 | } | |
1950 | + mb_debug("\n"); | |
1951 | + ext4_unlock_group(sb, md->group); | |
1952 | - ext4_unlock_group(sb, entry->group); | |
1953 | ||
1954 | + /* balance refcounts from ext4_mb_free_metadata() */ | |
1955 | + page_cache_release(e4b.bd_buddy_page); | |
1956 | + page_cache_release(e4b.bd_bitmap_page); | |
1957 | + | |
1958 | + kfree(md); | |
1959 | - kmem_cache_free(ext4_free_ext_cachep, entry); | |
1960 | ext4_mb_release_desc(&e4b); | |
1961 | + | |
1962 | + } while (md); | |
1963 | - } while (1); | |
1964 | ||
1965 | mb_debug("freed %u blocks in %u structures\n", count, count2); | |
1966 | } | |
1967 | @@ -3064,16 +2864,6 @@ | |
1968 | kmem_cache_destroy(ext4_pspace_cachep); | |
1969 | return -ENOMEM; | |
1970 | } | |
1971 | - | |
1972 | - ext4_free_ext_cachep = | |
1973 | - kmem_cache_create("ext4_free_block_extents", | |
1974 | - sizeof(struct ext4_free_data), | |
1975 | - 0, SLAB_RECLAIM_ACCOUNT, NULL); | |
1976 | - if (ext4_free_ext_cachep == NULL) { | |
1977 | - kmem_cache_destroy(ext4_pspace_cachep); | |
1978 | - kmem_cache_destroy(ext4_ac_cachep); | |
1979 | - return -ENOMEM; | |
1980 | - } | |
1981 | #ifdef CONFIG_PROC_FS | |
1982 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | |
1983 | if (proc_root_ext4 == NULL) | |
1984 | @@ -3090,7 +2880,6 @@ | |
1985 | #ifdef CONFIG_PROC_FS | |
1986 | remove_proc_entry("fs/ext4", NULL); | |
1987 | #endif | |
1988 | - kmem_cache_destroy(ext4_free_ext_cachep); | |
1989 | } | |
1990 | ||
1991 | ||
1992 | @@ -3152,8 +2941,8 @@ | |
1993 | in_range(block + len - 1, ext4_inode_table(sb, gdp), | |
1994 | EXT4_SB(sb)->s_itb_per_group)) { | |
1995 | ext4_error(sb, __func__, | |
1996 | + "Allocating block in system zone - block = %llu", | |
1997 | + block); | |
1998 | - "Allocating block %llu in system zone of %lu group\n", | |
1999 | - block, ac->ac_b_ex.fe_group); | |
2000 | /* File system mounted not to panic on error | |
2001 | * Fix the bitmap and repeat the block allocation | |
2002 | * We leak some of the blocks here. | |
2003 | @@ -3175,9 +2964,10 @@ | |
2004 | } | |
2005 | } | |
2006 | #endif | |
2007 | + mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, | |
2008 | + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | |
2009 | + | |
2010 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | |
2011 | - mb_set_bits(NULL, bitmap_bh->b_data, | |
2012 | - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | |
2013 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | |
2014 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | |
2015 | gdp->bg_free_blocks_count = | |
2016 | @@ -3400,7 +3190,7 @@ | |
2017 | } | |
2018 | BUG_ON(start + size <= ac->ac_o_ex.fe_logical && | |
2019 | start > ac->ac_o_ex.fe_logical); | |
2020 | + BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | |
2021 | - BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); | |
2022 | ||
2023 | /* now prepare goal request */ | |
2024 | ||
2025 | @@ -3610,37 +3400,10 @@ | |
2026 | ac->ac_criteria = 20; | |
2027 | return 1; | |
2028 | } | |
2029 | - | |
2030 | return 0; | |
2031 | } | |
2032 | ||
2033 | /* | |
2034 | - * the function goes through all block freed in the group | |
2035 | - * but not yet committed and marks them used in in-core bitmap. | |
2036 | - * buddy must be generated from this bitmap | |
2037 | - * Need to be called with ext4 group lock (ext4_lock_group) | |
2038 | - */ | |
2039 | -static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |
2040 | - ext4_group_t group) | |
2041 | -{ | |
2042 | - struct rb_node *n; | |
2043 | - struct ext4_group_info *grp; | |
2044 | - struct ext4_free_data *entry; | |
2045 | - | |
2046 | - grp = ext4_get_group_info(sb, group); | |
2047 | - n = rb_first(&(grp->bb_free_root)); | |
2048 | - | |
2049 | - while (n) { | |
2050 | - entry = rb_entry(n, struct ext4_free_data, node); | |
2051 | - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | |
2052 | - bitmap, entry->start_blk, | |
2053 | - entry->count); | |
2054 | - n = rb_next(n); | |
2055 | - } | |
2056 | - return; | |
2057 | -} | |
2058 | - | |
2059 | -/* | |
2060 | * the function goes through all preallocation in this group and marks them | |
2061 | * used in in-core bitmap. buddy must be generated from this bitmap | |
2062 | * Need to be called with ext4 group lock (ext4_lock_group) | |
2063 | @@ -3698,7 +3461,6 @@ | |
2064 | struct super_block *sb, struct ext4_prealloc_space *pa) | |
2065 | { | |
2066 | unsigned long grp; | |
2067 | - ext4_fsblk_t grp_blk; | |
2068 | ||
2069 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) | |
2070 | return; | |
2071 | @@ -3713,12 +3475,8 @@ | |
2072 | pa->pa_deleted = 1; | |
2073 | spin_unlock(&pa->pa_lock); | |
2074 | ||
2075 | + /* -1 is to protect from crossing allocation group */ | |
2076 | + ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); | |
2077 | - grp_blk = pa->pa_pstart; | |
2078 | - /* If linear, pa_pstart may be in the next group when pa is used up */ | |
2079 | - if (pa->pa_linear) | |
2080 | - grp_blk--; | |
2081 | - | |
2082 | - ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL); | |
2083 | ||
2084 | /* | |
2085 | * possible race: | |
2086 | @@ -3807,8 +3565,6 @@ | |
2087 | pa->pa_free = pa->pa_len; | |
2088 | atomic_set(&pa->pa_count, 1); | |
2089 | spin_lock_init(&pa->pa_lock); | |
2090 | - INIT_LIST_HEAD(&pa->pa_inode_list); | |
2091 | - INIT_LIST_HEAD(&pa->pa_group_list); | |
2092 | pa->pa_deleted = 0; | |
2093 | pa->pa_linear = 0; | |
2094 | ||
2095 | @@ -3867,7 +3623,6 @@ | |
2096 | atomic_set(&pa->pa_count, 1); | |
2097 | spin_lock_init(&pa->pa_lock); | |
2098 | INIT_LIST_HEAD(&pa->pa_inode_list); | |
2099 | - INIT_LIST_HEAD(&pa->pa_group_list); | |
2100 | pa->pa_deleted = 0; | |
2101 | pa->pa_linear = 1; | |
2102 | ||
2103 | @@ -4411,7 +4166,6 @@ | |
2104 | ac->ac_pa = NULL; | |
2105 | ac->ac_bitmap_page = NULL; | |
2106 | ac->ac_buddy_page = NULL; | |
2107 | - ac->alloc_semp = NULL; | |
2108 | ac->ac_lg = NULL; | |
2109 | ||
2110 | /* we have to define context: we'll we work with a file or | |
2111 | @@ -4532,7 +4286,7 @@ | |
2112 | pa_inode_list) { | |
2113 | spin_lock(&tmp_pa->pa_lock); | |
2114 | if (tmp_pa->pa_deleted) { | |
2115 | + spin_unlock(&pa->pa_lock); | |
2116 | - spin_unlock(&tmp_pa->pa_lock); | |
2117 | continue; | |
2118 | } | |
2119 | if (!added && pa->pa_free < tmp_pa->pa_free) { | |
2120 | @@ -4577,23 +4331,18 @@ | |
2121 | pa->pa_free -= ac->ac_b_ex.fe_len; | |
2122 | pa->pa_len -= ac->ac_b_ex.fe_len; | |
2123 | spin_unlock(&pa->pa_lock); | |
2124 | + /* | |
2125 | + * We want to add the pa to the right bucket. | |
2126 | + * Remove it from the list and while adding | |
2127 | + * make sure the list to which we are adding | |
2128 | + * doesn't grow big. | |
2129 | + */ | |
2130 | + if (likely(pa->pa_free)) { | |
2131 | + spin_lock(pa->pa_obj_lock); | |
2132 | + list_del_rcu(&pa->pa_inode_list); | |
2133 | + spin_unlock(pa->pa_obj_lock); | |
2134 | + ext4_mb_add_n_trim(ac); | |
2135 | + } | |
2136 | - } | |
2137 | - } | |
2138 | - if (ac->alloc_semp) | |
2139 | - up_read(ac->alloc_semp); | |
2140 | - if (pa) { | |
2141 | - /* | |
2142 | - * We want to add the pa to the right bucket. | |
2143 | - * Remove it from the list and while adding | |
2144 | - * make sure the list to which we are adding | |
2145 | - * doesn't grow big. We need to release | |
2146 | - * alloc_semp before calling ext4_mb_add_n_trim() | |
2147 | - */ | |
2148 | - if (pa->pa_linear && likely(pa->pa_free)) { | |
2149 | - spin_lock(pa->pa_obj_lock); | |
2150 | - list_del_rcu(&pa->pa_inode_list); | |
2151 | - spin_unlock(pa->pa_obj_lock); | |
2152 | - ext4_mb_add_n_trim(ac); | |
2153 | } | |
2154 | ext4_mb_put_pa(ac, ac->ac_sb, pa); | |
2155 | } | |
2156 | @@ -4700,14 +4449,10 @@ | |
2157 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | |
2158 | ext4_mb_new_preallocation(ac); | |
2159 | } | |
2160 | + | |
2161 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | |
2162 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | |
2163 | if (*errp == -EAGAIN) { | |
2164 | - /* | |
2165 | - * drop the reference that we took | |
2166 | - * in ext4_mb_use_best_found | |
2167 | - */ | |
2168 | - ext4_mb_release_context(ac); | |
2169 | ac->ac_b_ex.fe_group = 0; | |
2170 | ac->ac_b_ex.fe_start = 0; | |
2171 | ac->ac_b_ex.fe_len = 0; | |
2172 | @@ -4772,97 +4517,65 @@ | |
2173 | ext4_mb_free_committed_blocks(sb); | |
2174 | } | |
2175 | ||
2176 | -/* | |
2177 | - * We can merge two free data extents only if the physical blocks | |
2178 | - * are contiguous, AND the extents were freed by the same transaction, | |
2179 | - * AND the blocks are associated with the same group. | |
2180 | - */ | |
2181 | -static int can_merge(struct ext4_free_data *entry1, | |
2182 | - struct ext4_free_data *entry2) | |
2183 | -{ | |
2184 | - if ((entry1->t_tid == entry2->t_tid) && | |
2185 | - (entry1->group == entry2->group) && | |
2186 | - ((entry1->start_blk + entry1->count) == entry2->start_blk)) | |
2187 | - return 1; | |
2188 | - return 0; | |
2189 | -} | |
2190 | - | |
2191 | static noinline_for_stack int | |
2192 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |
2193 | + ext4_group_t group, ext4_grpblk_t block, int count) | |
2194 | - struct ext4_free_data *new_entry) | |
2195 | { | |
2196 | - ext4_grpblk_t block; | |
2197 | - struct ext4_free_data *entry; | |
2198 | struct ext4_group_info *db = e4b->bd_info; | |
2199 | struct super_block *sb = e4b->bd_sb; | |
2200 | struct ext4_sb_info *sbi = EXT4_SB(sb); | |
2201 | + struct ext4_free_metadata *md; | |
2202 | + int i; | |
2203 | - struct rb_node **n = &db->bb_free_root.rb_node, *node; | |
2204 | - struct rb_node *parent = NULL, *new_node; | |
2205 | ||
2206 | BUG_ON(e4b->bd_bitmap_page == NULL); | |
2207 | BUG_ON(e4b->bd_buddy_page == NULL); | |
2208 | ||
2209 | + ext4_lock_group(sb, group); | |
2210 | + for (i = 0; i < count; i++) { | |
2211 | + md = db->bb_md_cur; | |
2212 | + if (md && db->bb_tid != handle->h_transaction->t_tid) { | |
2213 | + db->bb_md_cur = NULL; | |
2214 | + md = NULL; | |
2215 | - new_node = &new_entry->node; | |
2216 | - block = new_entry->start_blk; | |
2217 | - | |
2218 | - if (!*n) { | |
2219 | - /* first free block exent. We need to | |
2220 | - protect buddy cache from being freed, | |
2221 | - * otherwise we'll refresh it from | |
2222 | - * on-disk bitmap and lose not-yet-available | |
2223 | - * blocks */ | |
2224 | - page_cache_get(e4b->bd_buddy_page); | |
2225 | - page_cache_get(e4b->bd_bitmap_page); | |
2226 | - } | |
2227 | - while (*n) { | |
2228 | - parent = *n; | |
2229 | - entry = rb_entry(parent, struct ext4_free_data, node); | |
2230 | - if (block < entry->start_blk) | |
2231 | - n = &(*n)->rb_left; | |
2232 | - else if (block >= (entry->start_blk + entry->count)) | |
2233 | - n = &(*n)->rb_right; | |
2234 | - else { | |
2235 | - ext4_error(sb, __func__, | |
2236 | - "Double free of blocks %d (%d %d)\n", | |
2237 | - block, entry->start_blk, entry->count); | |
2238 | - return 0; | |
2239 | } | |
2240 | - } | |
2241 | ||
2242 | + if (md == NULL) { | |
2243 | + ext4_unlock_group(sb, group); | |
2244 | + md = kmalloc(sizeof(*md), GFP_NOFS); | |
2245 | + if (md == NULL) | |
2246 | + return -ENOMEM; | |
2247 | + md->num = 0; | |
2248 | + md->group = group; | |
2249 | + | |
2250 | + ext4_lock_group(sb, group); | |
2251 | + if (db->bb_md_cur == NULL) { | |
2252 | + spin_lock(&sbi->s_md_lock); | |
2253 | + list_add(&md->list, &sbi->s_active_transaction); | |
2254 | + spin_unlock(&sbi->s_md_lock); | |
2255 | + /* protect buddy cache from being freed, | |
2256 | + * otherwise we'll refresh it from | |
2257 | + * on-disk bitmap and lose not-yet-available | |
2258 | + * blocks */ | |
2259 | + page_cache_get(e4b->bd_buddy_page); | |
2260 | + page_cache_get(e4b->bd_bitmap_page); | |
2261 | + db->bb_md_cur = md; | |
2262 | + db->bb_tid = handle->h_transaction->t_tid; | |
2263 | + mb_debug("new md 0x%p for group %lu\n", | |
2264 | + md, md->group); | |
2265 | + } else { | |
2266 | + kfree(md); | |
2267 | + md = db->bb_md_cur; | |
2268 | + } | |
2269 | - rb_link_node(new_node, parent, n); | |
2270 | - rb_insert_color(new_node, &db->bb_free_root); | |
2271 | - | |
2272 | - /* Now try to see the extent can be merged to left and right */ | |
2273 | - node = rb_prev(new_node); | |
2274 | - if (node) { | |
2275 | - entry = rb_entry(node, struct ext4_free_data, node); | |
2276 | - if (can_merge(entry, new_entry)) { | |
2277 | - new_entry->start_blk = entry->start_blk; | |
2278 | - new_entry->count += entry->count; | |
2279 | - rb_erase(node, &(db->bb_free_root)); | |
2280 | - spin_lock(&sbi->s_md_lock); | |
2281 | - list_del(&entry->list); | |
2282 | - spin_unlock(&sbi->s_md_lock); | |
2283 | - kmem_cache_free(ext4_free_ext_cachep, entry); | |
2284 | } | |
2285 | - } | |
2286 | ||
2287 | + BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | |
2288 | + md->blocks[md->num] = block + i; | |
2289 | + md->num++; | |
2290 | + if (md->num == EXT4_BB_MAX_BLOCKS) { | |
2291 | + /* no more space, put full container on a sb's list */ | |
2292 | + db->bb_md_cur = NULL; | |
2293 | - node = rb_next(new_node); | |
2294 | - if (node) { | |
2295 | - entry = rb_entry(node, struct ext4_free_data, node); | |
2296 | - if (can_merge(new_entry, entry)) { | |
2297 | - new_entry->count += entry->count; | |
2298 | - rb_erase(node, &(db->bb_free_root)); | |
2299 | - spin_lock(&sbi->s_md_lock); | |
2300 | - list_del(&entry->list); | |
2301 | - spin_unlock(&sbi->s_md_lock); | |
2302 | - kmem_cache_free(ext4_free_ext_cachep, entry); | |
2303 | } | |
2304 | } | |
2305 | + ext4_unlock_group(sb, group); | |
2306 | - /* Add the extent to active_transaction list */ | |
2307 | - spin_lock(&sbi->s_md_lock); | |
2308 | - list_add(&new_entry->list, &sbi->s_active_transaction); | |
2309 | - spin_unlock(&sbi->s_md_lock); | |
2310 | return 0; | |
2311 | } | |
2312 | ||
2313 | @@ -4962,6 +4675,11 @@ | |
2314 | err = ext4_journal_get_write_access(handle, gd_bh); | |
2315 | if (err) | |
2316 | goto error_return; | |
2317 | + | |
2318 | + err = ext4_mb_load_buddy(sb, block_group, &e4b); | |
2319 | + if (err) | |
2320 | + goto error_return; | |
2321 | + | |
2322 | #ifdef AGGRESSIVE_CHECK | |
2323 | { | |
2324 | int i; | |
2325 | @@ -4969,6 +4687,13 @@ | |
2326 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | |
2327 | } | |
2328 | #endif | |
2329 | + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | |
2330 | + bit, count); | |
2331 | + | |
2332 | + /* We dirtied the bitmap block */ | |
2333 | + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | |
2334 | + err = ext4_journal_dirty_metadata(handle, bitmap_bh); | |
2335 | + | |
2336 | if (ac) { | |
2337 | ac->ac_b_ex.fe_group = block_group; | |
2338 | ac->ac_b_ex.fe_start = bit; | |
2339 | @@ -4976,33 +4701,12 @@ | |
2340 | ext4_mb_store_history(ac); | |
2341 | } | |
2342 | ||
2343 | - err = ext4_mb_load_buddy(sb, block_group, &e4b); | |
2344 | - if (err) | |
2345 | - goto error_return; | |
2346 | if (metadata) { | |
2347 | + /* blocks being freed are metadata. these blocks shouldn't | |
2348 | + * be used until this transaction is committed */ | |
2349 | + ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); | |
2350 | - struct ext4_free_data *new_entry; | |
2351 | - /* | |
2352 | - * blocks being freed are metadata. these blocks shouldn't | |
2353 | - * be used until this transaction is committed | |
2354 | - */ | |
2355 | - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | |
2356 | - new_entry->start_blk = bit; | |
2357 | - new_entry->group = block_group; | |
2358 | - new_entry->count = count; | |
2359 | - new_entry->t_tid = handle->h_transaction->t_tid; | |
2360 | - ext4_lock_group(sb, block_group); | |
2361 | - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | |
2362 | - bit, count); | |
2363 | - ext4_mb_free_metadata(handle, &e4b, new_entry); | |
2364 | - ext4_unlock_group(sb, block_group); | |
2365 | } else { | |
2366 | ext4_lock_group(sb, block_group); | |
2367 | - /* need to update group_info->bb_free and bitmap | |
2368 | - * with group lock held. generate_buddy look at | |
2369 | - * them with group lock_held | |
2370 | - */ | |
2371 | - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | |
2372 | - bit, count); | |
2373 | mb_free_blocks(inode, &e4b, bit, count); | |
2374 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | |
2375 | ext4_unlock_group(sb, block_group); | |
2376 | @@ -5025,10 +4729,6 @@ | |
2377 | ||
2378 | *freed += count; | |
2379 | ||
2380 | - /* We dirtied the bitmap block */ | |
2381 | - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | |
2382 | - err = ext4_journal_dirty_metadata(handle, bitmap_bh); | |
2383 | - | |
2384 | /* And the group descriptor block */ | |
2385 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | |
2386 | ret = ext4_journal_dirty_metadata(handle, gd_bh); | |
2387 | reverted: | |
2388 | --- b/fs/ext4/mballoc.h | |
2389 | +++ a/fs/ext4/mballoc.h | |
2390 | @@ -18,7 +18,6 @@ | |
2391 | #include <linux/pagemap.h> | |
2392 | #include <linux/seq_file.h> | |
2393 | #include <linux/version.h> | |
2394 | -#include <linux/mutex.h> | |
2395 | #include "ext4_jbd2.h" | |
2396 | #include "ext4.h" | |
2397 | #include "group.h" | |
2398 | @@ -97,27 +96,25 @@ | |
2399 | */ | |
2400 | #define MB_DEFAULT_GROUP_PREALLOC 512 | |
2401 | ||
2402 | +static struct kmem_cache *ext4_pspace_cachep; | |
2403 | +static struct kmem_cache *ext4_ac_cachep; | |
2404 | -struct ext4_free_data { | |
2405 | - /* this links the free block information from group_info */ | |
2406 | - struct rb_node node; | |
2407 | ||
2408 | +#ifdef EXT4_BB_MAX_BLOCKS | |
2409 | +#undef EXT4_BB_MAX_BLOCKS | |
2410 | +#endif | |
2411 | +#define EXT4_BB_MAX_BLOCKS 30 | |
2412 | - /* this links the free block information from ext4_sb_info */ | |
2413 | - struct list_head list; | |
2414 | ||
2415 | +struct ext4_free_metadata { | |
2416 | - /* group which free block extent belongs */ | |
2417 | ext4_group_t group; | |
2418 | + unsigned short num; | |
2419 | + ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | |
2420 | + struct list_head list; | |
2421 | - | |
2422 | - /* free block extent */ | |
2423 | - ext4_grpblk_t start_blk; | |
2424 | - ext4_grpblk_t count; | |
2425 | - | |
2426 | - /* transaction which freed this extent */ | |
2427 | - tid_t t_tid; | |
2428 | }; | |
2429 | ||
2430 | struct ext4_group_info { | |
2431 | unsigned long bb_state; | |
2432 | + unsigned long bb_tid; | |
2433 | + struct ext4_free_metadata *bb_md_cur; | |
2434 | - struct rb_root bb_free_root; | |
2435 | unsigned short bb_first_free; | |
2436 | unsigned short bb_free; | |
2437 | unsigned short bb_fragments; | |
2438 | @@ -125,7 +122,6 @@ | |
2439 | #ifdef DOUBLE_CHECK | |
2440 | void *bb_bitmap; | |
2441 | #endif | |
2442 | - struct rw_semaphore alloc_sem; | |
2443 | unsigned short bb_counters[]; | |
2444 | }; | |
2445 | ||
2446 | @@ -213,11 +209,6 @@ | |
2447 | __u8 ac_op; /* operation, for history only */ | |
2448 | struct page *ac_bitmap_page; | |
2449 | struct page *ac_buddy_page; | |
2450 | - /* | |
2451 | - * pointer to the held semaphore upon successful | |
2452 | - * block allocation | |
2453 | - */ | |
2454 | - struct rw_semaphore *alloc_semp; | |
2455 | struct ext4_prealloc_space *ac_pa; | |
2456 | struct ext4_locality_group *ac_lg; | |
2457 | }; | |
2458 | @@ -251,7 +242,6 @@ | |
2459 | struct super_block *bd_sb; | |
2460 | __u16 bd_blkbits; | |
2461 | ext4_group_t bd_group; | |
2462 | - struct rw_semaphore *alloc_semp; | |
2463 | }; | |
2464 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | |
2465 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | |
2466 | @@ -261,6 +251,8 @@ | |
2467 | { | |
2468 | return; | |
2469 | } | |
2470 | +#else | |
2471 | +static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |
2472 | #endif | |
2473 | ||
2474 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | |
2475 | @@ -268,6 +260,19 @@ | |
2476 | static struct proc_dir_entry *proc_root_ext4; | |
2477 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | |
2478 | ||
2479 | +static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |
2480 | + ext4_group_t group); | |
2481 | +static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | |
2482 | +static void ext4_mb_free_committed_blocks(struct super_block *); | |
2483 | +static void ext4_mb_return_to_preallocation(struct inode *inode, | |
2484 | + struct ext4_buddy *e4b, sector_t block, | |
2485 | + int count); | |
2486 | +static void ext4_mb_put_pa(struct ext4_allocation_context *, | |
2487 | + struct super_block *, struct ext4_prealloc_space *pa); | |
2488 | +static int ext4_mb_init_per_dev_proc(struct super_block *sb); | |
2489 | +static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | |
2490 | + | |
2491 | + | |
2492 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | |
2493 | { | |
2494 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | |
2495 | @@ -292,7 +297,7 @@ | |
2496 | &(grinfo->bb_state)); | |
2497 | } | |
2498 | ||
2499 | +static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | |
2500 | -static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | |
2501 | struct ext4_free_extent *fex) | |
2502 | { | |
2503 | ext4_fsblk_t block; | |
2504 | reverted: | |
2505 | --- b/fs/ext4/migrate.c | |
2506 | +++ a/fs/ext4/migrate.c | |
2507 | @@ -480,7 +480,7 @@ | |
2508 | + 1); | |
2509 | if (IS_ERR(handle)) { | |
2510 | retval = PTR_ERR(handle); | |
2511 | + goto err_out; | |
2512 | - return retval; | |
2513 | } | |
2514 | tmp_inode = ext4_new_inode(handle, | |
2515 | inode->i_sb->s_root->d_inode, | |
2516 | @@ -488,7 +488,8 @@ | |
2517 | if (IS_ERR(tmp_inode)) { | |
2518 | retval = -ENOMEM; | |
2519 | ext4_journal_stop(handle); | |
2520 | + tmp_inode = NULL; | |
2521 | + goto err_out; | |
2522 | - return retval; | |
2523 | } | |
2524 | i_size_write(tmp_inode, i_size_read(inode)); | |
2525 | /* | |
2526 | @@ -616,7 +617,8 @@ | |
2527 | ||
2528 | ext4_journal_stop(handle); | |
2529 | ||
2530 | + if (tmp_inode) | |
2531 | + iput(tmp_inode); | |
2532 | - iput(tmp_inode); | |
2533 | ||
2534 | return retval; | |
2535 | } | |
2536 | reverted: | |
2537 | --- b/fs/ext4/namei.c | |
2538 | +++ a/fs/ext4/namei.c | |
2539 | @@ -371,8 +371,6 @@ | |
2540 | goto fail; | |
2541 | } | |
2542 | hinfo->hash_version = root->info.hash_version; | |
2543 | - if (hinfo->hash_version <= DX_HASH_TEA) | |
2544 | - hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | |
2545 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | |
2546 | if (dentry) | |
2547 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | |
2548 | @@ -642,9 +640,6 @@ | |
2549 | dir = dir_file->f_path.dentry->d_inode; | |
2550 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | |
2551 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | |
2552 | - if (hinfo.hash_version <= DX_HASH_TEA) | |
2553 | - hinfo.hash_version += | |
2554 | - EXT4_SB(dir->i_sb)->s_hash_unsigned; | |
2555 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; | |
2556 | count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, | |
2557 | start_hash, start_minor_hash); | |
2558 | @@ -1055,16 +1050,8 @@ | |
2559 | return ERR_PTR(-EIO); | |
2560 | } | |
2561 | inode = ext4_iget(dir->i_sb, ino); | |
2562 | + if (IS_ERR(inode)) | |
2563 | + return ERR_CAST(inode); | |
2564 | - if (unlikely(IS_ERR(inode))) { | |
2565 | - if (PTR_ERR(inode) == -ESTALE) { | |
2566 | - ext4_error(dir->i_sb, __func__, | |
2567 | - "deleted inode referenced: %u", | |
2568 | - ino); | |
2569 | - return ERR_PTR(-EIO); | |
2570 | - } else { | |
2571 | - return ERR_CAST(inode); | |
2572 | - } | |
2573 | - } | |
2574 | } | |
2575 | return d_splice_alias(inode, dentry); | |
2576 | } | |
2577 | @@ -1390,7 +1377,7 @@ | |
2578 | struct fake_dirent *fde; | |
2579 | ||
2580 | blocksize = dir->i_sb->s_blocksize; | |
2581 | + dxtrace(printk("Creating index\n")); | |
2582 | - dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); | |
2583 | retval = ext4_journal_get_write_access(handle, bh); | |
2584 | if (retval) { | |
2585 | ext4_std_error(dir->i_sb, retval); | |
2586 | @@ -1399,20 +1386,6 @@ | |
2587 | } | |
2588 | root = (struct dx_root *) bh->b_data; | |
2589 | ||
2590 | - /* The 0th block becomes the root, move the dirents out */ | |
2591 | - fde = &root->dotdot; | |
2592 | - de = (struct ext4_dir_entry_2 *)((char *)fde + | |
2593 | - ext4_rec_len_from_disk(fde->rec_len)); | |
2594 | - if ((char *) de >= (((char *) root) + blocksize)) { | |
2595 | - ext4_error(dir->i_sb, __func__, | |
2596 | - "invalid rec_len for '..' in inode %lu", | |
2597 | - dir->i_ino); | |
2598 | - brelse(bh); | |
2599 | - return -EIO; | |
2600 | - } | |
2601 | - len = ((char *) root) + blocksize - (char *) de; | |
2602 | - | |
2603 | - /* Allocate new block for the 0th block's dirents */ | |
2604 | bh2 = ext4_append (handle, dir, &block, &retval); | |
2605 | if (!(bh2)) { | |
2606 | brelse(bh); | |
2607 | @@ -1421,6 +1394,11 @@ | |
2608 | EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; | |
2609 | data1 = bh2->b_data; | |
2610 | ||
2611 | + /* The 0th block becomes the root, move the dirents out */ | |
2612 | + fde = &root->dotdot; | |
2613 | + de = (struct ext4_dir_entry_2 *)((char *)fde + | |
2614 | + ext4_rec_len_from_disk(fde->rec_len)); | |
2615 | + len = ((char *) root) + blocksize - (char *) de; | |
2616 | memcpy (data1, de, len); | |
2617 | de = (struct ext4_dir_entry_2 *) data1; | |
2618 | top = data1 + len; | |
2619 | @@ -1440,8 +1418,6 @@ | |
2620 | ||
2621 | /* Initialize as for dx_probe */ | |
2622 | hinfo.hash_version = root->info.hash_version; | |
2623 | - if (hinfo.hash_version <= DX_HASH_TEA) | |
2624 | - hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; | |
2625 | hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; | |
2626 | ext4fs_dirhash(name, namelen, &hinfo); | |
2627 | frame = frames; | |
2628 | @@ -2314,7 +2290,7 @@ | |
2629 | struct inode * old_inode, * new_inode; | |
2630 | struct buffer_head * old_bh, * new_bh, * dir_bh; | |
2631 | struct ext4_dir_entry_2 * old_de, * new_de; | |
2632 | + int retval; | |
2633 | - int retval, force_da_alloc = 0; | |
2634 | ||
2635 | old_bh = new_bh = dir_bh = NULL; | |
2636 | ||
2637 | @@ -2452,7 +2428,6 @@ | |
2638 | ext4_mark_inode_dirty(handle, new_inode); | |
2639 | if (!new_inode->i_nlink) | |
2640 | ext4_orphan_add(handle, new_inode); | |
2641 | - force_da_alloc = 1; | |
2642 | } | |
2643 | retval = 0; | |
2644 | ||
2645 | @@ -2461,8 +2436,6 @@ | |
2646 | brelse (old_bh); | |
2647 | brelse (new_bh); | |
2648 | ext4_journal_stop(handle); | |
2649 | - if (retval == 0 && force_da_alloc) | |
2650 | - ext4_alloc_da_blocks(old_inode); | |
2651 | return retval; | |
2652 | } | |
2653 | ||
2654 | reverted: | |
2655 | --- b/fs/ext4/resize.c | |
2656 | +++ a/fs/ext4/resize.c | |
2657 | @@ -284,9 +284,11 @@ | |
2658 | if ((err = extend_or_restart_transaction(handle, 2, bh))) | |
2659 | goto exit_bh; | |
2660 | ||
2661 | + mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), | |
2662 | + bh->b_data); | |
2663 | - mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data); | |
2664 | ext4_journal_dirty_metadata(handle, bh); | |
2665 | brelse(bh); | |
2666 | + | |
2667 | /* Mark unused entries in inode bitmap used */ | |
2668 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", | |
2669 | input->inode_bitmap, input->inode_bitmap - start); | |
2670 | @@ -295,7 +297,7 @@ | |
2671 | goto exit_journal; | |
2672 | } | |
2673 | ||
2674 | + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), | |
2675 | - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | |
2676 | bh->b_data); | |
2677 | ext4_journal_dirty_metadata(handle, bh); | |
2678 | exit_bh: | |
2679 | @@ -745,7 +747,6 @@ | |
2680 | struct inode *inode = NULL; | |
2681 | handle_t *handle; | |
2682 | int gdb_off, gdb_num; | |
2683 | - int num_grp_locked = 0; | |
2684 | int err, err2; | |
2685 | ||
2686 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | |
2687 | @@ -786,7 +787,6 @@ | |
2688 | } | |
2689 | } | |
2690 | ||
2691 | - | |
2692 | if ((err = verify_group_input(sb, input))) | |
2693 | goto exit_put; | |
2694 | ||
2695 | @@ -855,18 +855,15 @@ | |
2696 | * using the new disk blocks. | |
2697 | */ | |
2698 | ||
2699 | - num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group); | |
2700 | /* Update group descriptor block for new group */ | |
2701 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | |
2702 | gdb_off * EXT4_DESC_SIZE(sb)); | |
2703 | ||
2704 | - memset(gdp, 0, EXT4_DESC_SIZE(sb)); | |
2705 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | |
2706 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | |
2707 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | |
2708 | gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); | |
2709 | gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb)); | |
2710 | - gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | |
2711 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | |
2712 | ||
2713 | /* | |
2714 | @@ -874,11 +871,9 @@ | |
2715 | * descriptor | |
2716 | */ | |
2717 | if (test_opt(sb, MBALLOC)) { | |
2718 | + err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | |
2719 | + if (err) | |
2720 | - err = ext4_mb_add_groupinfo(sb, input->group, gdp); | |
2721 | - if (err) { | |
2722 | - ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | |
2723 | goto exit_journal; | |
2724 | - } | |
2725 | } | |
2726 | /* | |
2727 | * Make the new blocks and inodes valid next. We do this before | |
2728 | @@ -920,7 +915,6 @@ | |
2729 | ||
2730 | /* Update the global fs size fields */ | |
2731 | sbi->s_groups_count++; | |
2732 | - ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked); | |
2733 | ||
2734 | ext4_journal_dirty_metadata(handle, primary); | |
2735 | ||
2736 | @@ -982,7 +976,9 @@ | |
2737 | struct buffer_head * bh; | |
2738 | handle_t *handle; | |
2739 | int err; | |
2740 | + unsigned long freed_blocks; | |
2741 | ext4_group_t group; | |
2742 | + struct ext4_group_info *grp; | |
2743 | ||
2744 | /* We don't need to worry about locking wrt other resizers just | |
2745 | * yet: we're going to revalidate es->s_blocks_count after | |
2746 | @@ -1081,13 +1077,50 @@ | |
2747 | unlock_super(sb); | |
2748 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | |
2749 | o_blocks_count + add); | |
2750 | + ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); | |
2751 | - /* We add the blocks to the bitmap and set the group need init bit */ | |
2752 | - ext4_add_groupblocks(handle, sb, o_blocks_count, add); | |
2753 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | |
2754 | o_blocks_count + add); | |
2755 | if ((err = ext4_journal_stop(handle))) | |
2756 | goto exit_put; | |
2757 | ||
2758 | + /* | |
2759 | + * Mark mballoc pages as not up to date so that they will be updated | |
2760 | + * next time they are loaded by ext4_mb_load_buddy. | |
2761 | + */ | |
2762 | + if (test_opt(sb, MBALLOC)) { | |
2763 | + struct ext4_sb_info *sbi = EXT4_SB(sb); | |
2764 | + struct inode *inode = sbi->s_buddy_cache; | |
2765 | + int blocks_per_page; | |
2766 | + int block; | |
2767 | + int pnum; | |
2768 | + struct page *page; | |
2769 | + | |
2770 | + /* Set buddy page as not up to date */ | |
2771 | + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | |
2772 | + block = group * 2; | |
2773 | + pnum = block / blocks_per_page; | |
2774 | + page = find_get_page(inode->i_mapping, pnum); | |
2775 | + if (page != NULL) { | |
2776 | + ClearPageUptodate(page); | |
2777 | + page_cache_release(page); | |
2778 | + } | |
2779 | + | |
2780 | + /* Set bitmap page as not up to date */ | |
2781 | + block++; | |
2782 | + pnum = block / blocks_per_page; | |
2783 | + page = find_get_page(inode->i_mapping, pnum); | |
2784 | + if (page != NULL) { | |
2785 | + ClearPageUptodate(page); | |
2786 | + page_cache_release(page); | |
2787 | + } | |
2788 | + | |
2789 | + /* Get the info on the last group */ | |
2790 | + grp = ext4_get_group_info(sb, group); | |
2791 | + | |
2792 | + /* Update free blocks in group info */ | |
2793 | + ext4_mb_update_group_info(grp, add); | |
2794 | + } | |
2795 | + | |
2796 | if (test_opt(sb, DEBUG)) | |
2797 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | |
2798 | ext4_blocks_count(es)); | |
2799 | reverted: | |
2800 | --- b/fs/ext4/super.c | |
2801 | +++ a/fs/ext4/super.c | |
2802 | @@ -1493,6 +1493,7 @@ | |
2803 | ext4_group_t flex_group_count; | |
2804 | ext4_group_t flex_group; | |
2805 | int groups_per_flex = 0; | |
2806 | + __u64 block_bitmap = 0; | |
2807 | int i; | |
2808 | ||
2809 | if (!sbi->s_es->s_log_groups_per_flex) { | |
2810 | @@ -1515,6 +1516,9 @@ | |
2811 | goto failed; | |
2812 | } | |
2813 | ||
2814 | + gdp = ext4_get_group_desc(sb, 1, &bh); | |
2815 | + block_bitmap = ext4_block_bitmap(sb, gdp) - 1; | |
2816 | + | |
2817 | for (i = 0; i < sbi->s_groups_count; i++) { | |
2818 | gdp = ext4_get_group_desc(sb, i, &bh); | |
2819 | ||
2820 | @@ -1916,8 +1920,8 @@ | |
2821 | struct inode *root; | |
2822 | int ret = -EINVAL; | |
2823 | int blocksize; | |
2824 | + int db_count; | |
2825 | + int i; | |
2826 | - unsigned int db_count; | |
2827 | - unsigned int i; | |
2828 | int needs_recovery; | |
2829 | __le32 features; | |
2830 | __u64 blocks_count; | |
2831 | @@ -2168,18 +2172,6 @@ | |
2832 | for (i = 0; i < 4; i++) | |
2833 | sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); | |
2834 | sbi->s_def_hash_version = es->s_def_hash_version; | |
2835 | - i = le32_to_cpu(es->s_flags); | |
2836 | - if (i & EXT2_FLAGS_UNSIGNED_HASH) | |
2837 | - sbi->s_hash_unsigned = 3; | |
2838 | - else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { | |
2839 | -#ifdef __CHAR_UNSIGNED__ | |
2840 | - es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); | |
2841 | - sbi->s_hash_unsigned = 3; | |
2842 | -#else | |
2843 | - es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); | |
2844 | -#endif | |
2845 | - sb->s_dirt = 1; | |
2846 | - } | |
2847 | ||
2848 | if (sbi->s_blocks_per_group > blocksize * 8) { | |
2849 | printk(KERN_ERR | |
2850 | @@ -2207,30 +2199,20 @@ | |
2851 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) | |
2852 | goto cantfind_ext4; | |
2853 | ||
2854 | + /* ensure blocks_count calculation below doesn't sign-extend */ | |
2855 | + if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) < | |
2856 | + le32_to_cpu(es->s_first_data_block) + 1) { | |
2857 | + printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, " | |
2858 | + "first data block %u, blocks per group %lu\n", | |
2859 | + ext4_blocks_count(es), | |
2860 | + le32_to_cpu(es->s_first_data_block), | |
2861 | + EXT4_BLOCKS_PER_GROUP(sb)); | |
2862 | - /* | |
2863 | - * It makes no sense for the first data block to be beyond the end | |
2864 | - * of the filesystem. | |
2865 | - */ | |
2866 | - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | |
2867 | - printk(KERN_WARNING "EXT4-fs: bad geometry: first data" | |
2868 | - "block %u is beyond end of filesystem (%llu)\n", | |
2869 | - le32_to_cpu(es->s_first_data_block), | |
2870 | - ext4_blocks_count(es)); | |
2871 | goto failed_mount; | |
2872 | } | |
2873 | blocks_count = (ext4_blocks_count(es) - | |
2874 | le32_to_cpu(es->s_first_data_block) + | |
2875 | EXT4_BLOCKS_PER_GROUP(sb) - 1); | |
2876 | do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); | |
2877 | - if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { | |
2878 | - printk(KERN_WARNING "EXT4-fs: groups count too large: %u " | |
2879 | - "(block count %llu, first data block %u, " | |
2880 | - "blocks per group %lu)\n", sbi->s_groups_count, | |
2881 | - ext4_blocks_count(es), | |
2882 | - le32_to_cpu(es->s_first_data_block), | |
2883 | - EXT4_BLOCKS_PER_GROUP(sb)); | |
2884 | - goto failed_mount; | |
2885 | - } | |
2886 | sbi->s_groups_count = blocks_count; | |
2887 | db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | |
2888 | EXT4_DESC_PER_BLOCK(sb); | |
2889 | @@ -2950,14 +2932,14 @@ | |
2890 | ||
2891 | static int ext4_sync_fs(struct super_block *sb, int wait) | |
2892 | { | |
2893 | + int ret = 0; | |
2894 | - tid_t target; | |
2895 | ||
2896 | sb->s_dirt = 0; | |
2897 | + if (wait) | |
2898 | + ret = ext4_force_commit(sb); | |
2899 | + else | |
2900 | + jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); | |
2901 | + return ret; | |
2902 | - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | |
2903 | - if (wait) | |
2904 | - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); | |
2905 | - } | |
2906 | - return 0; | |
2907 | } | |
2908 | ||
2909 | /* | |
2910 | reverted: | |
2911 | --- b/fs/jbd2/commit.c | |
2912 | +++ a/fs/jbd2/commit.c | |
2913 | @@ -24,7 +24,6 @@ | |
2914 | #include <linux/crc32.h> | |
2915 | #include <linux/writeback.h> | |
2916 | #include <linux/backing-dev.h> | |
2917 | -#include <linux/bio.h> | |
2918 | ||
2919 | /* | |
2920 | * Default IO end handler for temporary BJ_IO buffer_heads. | |
2921 | @@ -171,34 +170,12 @@ | |
2922 | * This function along with journal_submit_commit_record | |
2923 | * allows to write the commit record asynchronously. | |
2924 | */ | |
2925 | +static int journal_wait_on_commit_record(struct buffer_head *bh) | |
2926 | -static int journal_wait_on_commit_record(journal_t *journal, | |
2927 | - struct buffer_head *bh) | |
2928 | { | |
2929 | int ret = 0; | |
2930 | ||
2931 | -retry: | |
2932 | clear_buffer_dirty(bh); | |
2933 | wait_on_buffer(bh); | |
2934 | - if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | |
2935 | - printk(KERN_WARNING | |
2936 | - "JBD2: wait_on_commit_record: sync failed on %s - " | |
2937 | - "disabling barriers\n", journal->j_devname); | |
2938 | - spin_lock(&journal->j_state_lock); | |
2939 | - journal->j_flags &= ~JBD2_BARRIER; | |
2940 | - spin_unlock(&journal->j_state_lock); | |
2941 | - | |
2942 | - lock_buffer(bh); | |
2943 | - clear_buffer_dirty(bh); | |
2944 | - set_buffer_uptodate(bh); | |
2945 | - bh->b_end_io = journal_end_buffer_io_sync; | |
2946 | - | |
2947 | - ret = submit_bh(WRITE_SYNC, bh); | |
2948 | - if (ret) { | |
2949 | - unlock_buffer(bh); | |
2950 | - return ret; | |
2951 | - } | |
2952 | - goto retry; | |
2953 | - } | |
2954 | ||
2955 | if (unlikely(!buffer_uptodate(bh))) | |
2956 | ret = -EIO; | |
2957 | @@ -818,7 +795,7 @@ | |
2958 | __jbd2_journal_abort_hard(journal); | |
2959 | } | |
2960 | if (!err && !is_journal_aborted(journal)) | |
2961 | + err = journal_wait_on_commit_record(cbh); | |
2962 | - err = journal_wait_on_commit_record(journal, cbh); | |
2963 | ||
2964 | if (err) | |
2965 | jbd2_journal_abort(journal, err); | |
2966 | reverted: | |
2967 | --- b/fs/jbd2/journal.c | |
2968 | +++ a/fs/jbd2/journal.c | |
2969 | @@ -430,7 +430,7 @@ | |
2970 | } | |
2971 | ||
2972 | /* | |
2973 | + * Called under j_state_lock. Returns true if a transaction was started. | |
2974 | - * Called under j_state_lock. Returns true if a transaction commit was started. | |
2975 | */ | |
2976 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |
2977 | { | |
2978 | @@ -498,8 +498,7 @@ | |
2979 | ||
2980 | /* | |
2981 | * Start a commit of the current running transaction (if any). Returns true | |
2982 | + * if a transaction was started, and fills its tid in at *ptid | |
2983 | - * if a transaction is going to be committed (or is currently already | |
2984 | - * committing), and fills its tid in at *ptid | |
2985 | */ | |
2986 | int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |
2987 | { | |
2988 | @@ -509,19 +508,15 @@ | |
2989 | if (journal->j_running_transaction) { | |
2990 | tid_t tid = journal->j_running_transaction->t_tid; | |
2991 | ||
2992 | + ret = __jbd2_log_start_commit(journal, tid); | |
2993 | + if (ret && ptid) | |
2994 | - __jbd2_log_start_commit(journal, tid); | |
2995 | - /* There's a running transaction and we've just made sure | |
2996 | - * it's commit has been scheduled. */ | |
2997 | - if (ptid) | |
2998 | *ptid = tid; | |
2999 | + } else if (journal->j_committing_transaction && ptid) { | |
3000 | - ret = 1; | |
3001 | - } else if (journal->j_committing_transaction) { | |
3002 | /* | |
3003 | * If ext3_write_super() recently started a commit, then we | |
3004 | * have to wait for completion of that transaction | |
3005 | */ | |
3006 | + *ptid = journal->j_committing_transaction->t_tid; | |
3007 | - if (ptid) | |
3008 | - *ptid = journal->j_committing_transaction->t_tid; | |
3009 | ret = 1; | |
3010 | } | |
3011 | spin_unlock(&journal->j_state_lock); | |
3012 | reverted: | |
3013 | --- b/fs/jbd2/revoke.c | |
3014 | +++ a/fs/jbd2/revoke.c | |
3015 | @@ -55,25 +55,6 @@ | |
3016 | * need do nothing. | |
3017 | * RevokeValid set, Revoked set: | |
3018 | * buffer has been revoked. | |
3019 | - * | |
3020 | - * Locking rules: | |
3021 | - * We keep two hash tables of revoke records. One hashtable belongs to the | |
3022 | - * running transaction (is pointed to by journal->j_revoke), the other one | |
3023 | - * belongs to the committing transaction. Accesses to the second hash table | |
3024 | - * happen only from the kjournald and no other thread touches this table. Also | |
3025 | - * journal_switch_revoke_table() which switches which hashtable belongs to the | |
3026 | - * running and which to the committing transaction is called only from | |
3027 | - * kjournald. Therefore we need no locks when accessing the hashtable belonging | |
3028 | - * to the committing transaction. | |
3029 | - * | |
3030 | - * All users operating on the hash table belonging to the running transaction | |
3031 | - * have a handle to the transaction. Therefore they are safe from kjournald | |
3032 | - * switching hash tables under them. For operations on the lists of entries in | |
3033 | - * the hash table j_revoke_lock is used. | |
3034 | - * | |
3035 | - * Finally, also replay code uses the hash tables but at this moment noone else | |
3036 | - * can touch them (filesystem isn't mounted yet) and hence no locking is | |
3037 | - * needed. | |
3038 | */ | |
3039 | ||
3040 | #ifndef __KERNEL__ | |
3041 | @@ -420,6 +401,8 @@ | |
3042 | * the second time we would still have a pending revoke to cancel. So, | |
3043 | * do not trust the Revoked bit on buffers unless RevokeValid is also | |
3044 | * set. | |
3045 | + * | |
3046 | + * The caller must have the journal locked. | |
3047 | */ | |
3048 | int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | |
3049 | { | |
3050 | @@ -497,7 +480,10 @@ | |
3051 | /* | |
3052 | * Write revoke records to the journal for all entries in the current | |
3053 | * revoke hash, deleting the entries as we go. | |
3054 | + * | |
3055 | + * Called with the journal lock held. | |
3056 | */ | |
3057 | + | |
3058 | void jbd2_journal_write_revoke_records(journal_t *journal, | |
3059 | transaction_t *transaction) | |
3060 | { | |
3061 | reverted: | |
3062 | --- b/fs/jbd2/transaction.c | |
3063 | +++ a/fs/jbd2/transaction.c | |
3064 | @@ -2049,46 +2049,26 @@ | |
3065 | } | |
3066 | ||
3067 | /* | |
3068 | + * This function must be called when inode is journaled in ordered mode | |
3069 | + * before truncation happens. It starts writeout of truncated part in | |
3070 | + * case it is in the committing transaction so that we stand to ordered | |
3071 | + * mode consistency guarantees. | |
3072 | - * File truncate and transaction commit interact with each other in a | |
3073 | - * non-trivial way. If a transaction writing data block A is | |
3074 | - * committing, we cannot discard the data by truncate until we have | |
3075 | - * written them. Otherwise if we crashed after the transaction with | |
3076 | - * write has committed but before the transaction with truncate has | |
3077 | - * committed, we could see stale data in block A. This function is a | |
3078 | - * helper to solve this problem. It starts writeout of the truncated | |
3079 | - * part in case it is in the committing transaction. | |
3080 | - * | |
3081 | - * Filesystem code must call this function when inode is journaled in | |
3082 | - * ordered mode before truncation happens and after the inode has been | |
3083 | - * placed on orphan list with the new inode size. The second condition | |
3084 | - * avoids the race that someone writes new data and we start | |
3085 | - * committing the transaction after this function has been called but | |
3086 | - * before a transaction for truncate is started (and furthermore it | |
3087 | - * allows us to optimize the case where the addition to orphan list | |
3088 | - * happens in the same transaction as write --- we don't have to write | |
3089 | - * any data in such case). | |
3090 | */ | |
3091 | +int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | |
3092 | -int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |
3093 | - struct jbd2_inode *jinode, | |
3094 | loff_t new_size) | |
3095 | { | |
3096 | + journal_t *journal; | |
3097 | + transaction_t *commit_trans; | |
3098 | - transaction_t *inode_trans, *commit_trans; | |
3099 | int ret = 0; | |
3100 | ||
3101 | + if (!inode->i_transaction && !inode->i_next_transaction) | |
3102 | - /* This is a quick check to avoid locking if not necessary */ | |
3103 | - if (!jinode->i_transaction) | |
3104 | goto out; | |
3105 | + journal = inode->i_transaction->t_journal; | |
3106 | - /* Locks are here just to force reading of recent values, it is | |
3107 | - * enough that the transaction was not committing before we started | |
3108 | - * a transaction adding the inode to orphan list */ | |
3109 | spin_lock(&journal->j_state_lock); | |
3110 | commit_trans = journal->j_committing_transaction; | |
3111 | spin_unlock(&journal->j_state_lock); | |
3112 | + if (inode->i_transaction == commit_trans) { | |
3113 | + ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | |
3114 | - spin_lock(&journal->j_list_lock); | |
3115 | - inode_trans = jinode->i_transaction; | |
3116 | - spin_unlock(&journal->j_list_lock); | |
3117 | - if (inode_trans == commit_trans) { | |
3118 | - ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, | |
3119 | new_size, LLONG_MAX); | |
3120 | if (ret) | |
3121 | jbd2_journal_abort(journal, ret); | |
3122 | reverted: | |
3123 | --- b/include/linux/jbd2.h | |
3124 | +++ a/include/linux/jbd2.h | |
3125 | @@ -308,8 +308,7 @@ | |
3126 | int val = (expr); \ | |
3127 | if (!val) { \ | |
3128 | printk(KERN_ERR \ | |
3129 | + "EXT3-fs unexpected failure: %s;\n",# expr); \ | |
3130 | - "JBD2 unexpected failure: %s: %s;\n", \ | |
3131 | - __func__, #expr); \ | |
3132 | printk(KERN_ERR why "\n"); \ | |
3133 | } \ | |
3134 | val; \ | |
3135 | @@ -330,7 +329,6 @@ | |
3136 | BH_State, /* Pins most journal_head state */ | |
3137 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ | |
3138 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ | |
3139 | - BH_JBDPrivateStart, /* First bit available for private use by FS */ | |
3140 | }; | |
3141 | ||
3142 | BUFFER_FNS(JBD, jbd) | |
3143 | @@ -1075,8 +1073,7 @@ | |
3144 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); | |
3145 | extern int jbd2_journal_force_commit(journal_t *); | |
3146 | extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); | |
3147 | +extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); | |
3148 | -extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |
3149 | - struct jbd2_inode *inode, loff_t new_size); | |
3150 | extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); | |
3151 | extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); | |
3152 |