]>
Commit | Line | Data |
---|---|---|
6a930a95 BS |
1 | From: Jeff Mahoney <jeffm@suse.com> |
2 | Subject: reiserfs: journaled xattrs | |
3 | ||
4 | Deadlocks are possible in the xattr code between the journal lock and the | |
5 | xattr sems. | |
6 | ||
7 | This patch implements journalling for xattr operations. The benefit is | |
8 | twofold: | |
9 | * It gets rid of the deadlock possibility by always ensuring that xattr | |
10 | write operations are initiated inside a transaction. | |
11 | * It corrects the problem where xattr backing files aren't considered any | |
12 | differently than normal files, despite the fact they are metadata. | |
13 | ||
14 | I discussed the added journal load with Chris Mason, and we decided that | |
15 | since xattrs (versus other journal activity) is fairly rare, the introduction | |
16 | of larger transactions to support journaled xattrs wouldn't be too big a deal. | |
17 | ||
18 | Signed-off-by: Jeff Mahoney <jeffm@suse.com> | |
19 | ||
20 | -- | |
21 | fs/reiserfs/inode.c | 3 - | |
22 | fs/reiserfs/namei.c | 14 +---- | |
23 | fs/reiserfs/xattr.c | 39 +++++++++++---- | |
24 | fs/reiserfs/xattr_acl.c | 105 +++++++++++++++++++++++++++++++---------- | |
25 | include/linux/reiserfs_acl.h | 3 - | |
26 | include/linux/reiserfs_fs.h | 4 + | |
27 | include/linux/reiserfs_xattr.h | 40 ++++++++++++++- | |
28 | 7 files changed, 159 insertions(+), 49 deletions(-) | |
29 | ||
30 | --- a/fs/reiserfs/inode.c | |
31 | +++ b/fs/reiserfs/inode.c | |
32 | @@ -1919,9 +1919,8 @@ int reiserfs_new_inode(struct reiserfs_t | |
33 | goto out_inserted_sd; | |
34 | } | |
35 | ||
36 | - /* XXX CHECK THIS */ | |
37 | if (reiserfs_posixacl(inode->i_sb)) { | |
38 | - retval = reiserfs_inherit_default_acl(dir, dentry, inode); | |
39 | + retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); | |
40 | if (retval) { | |
41 | err = retval; | |
42 | reiserfs_check_path(&path_to_key); | |
43 | --- a/fs/reiserfs/namei.c | |
44 | +++ b/fs/reiserfs/namei.c | |
45 | @@ -607,15 +607,13 @@ static int reiserfs_create(struct inode | |
46 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | |
47 | REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); | |
48 | struct reiserfs_transaction_handle th; | |
49 | - int locked; | |
50 | ||
51 | if (!(inode = new_inode(dir->i_sb))) { | |
52 | return -ENOMEM; | |
53 | } | |
54 | new_inode_init(inode, dir, mode); | |
55 | ||
56 | - locked = reiserfs_cache_default_acl(dir); | |
57 | - | |
58 | + jbegin_count += reiserfs_cache_default_acl(dir); | |
59 | reiserfs_write_lock(dir->i_sb); | |
60 | ||
61 | retval = journal_begin(&th, dir->i_sb, jbegin_count); | |
62 | @@ -669,7 +667,6 @@ static int reiserfs_mknod(struct inode * | |
63 | JOURNAL_PER_BALANCE_CNT * 3 + | |
64 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | |
65 | REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); | |
66 | - int locked; | |
67 | ||
68 | if (!new_valid_dev(rdev)) | |
69 | return -EINVAL; | |
70 | @@ -679,8 +676,7 @@ static int reiserfs_mknod(struct inode * | |
71 | } | |
72 | new_inode_init(inode, dir, mode); | |
73 | ||
74 | - locked = reiserfs_cache_default_acl(dir); | |
75 | - | |
76 | + jbegin_count += reiserfs_cache_default_acl(dir); | |
77 | reiserfs_write_lock(dir->i_sb); | |
78 | ||
79 | retval = journal_begin(&th, dir->i_sb, jbegin_count); | |
80 | @@ -737,7 +733,6 @@ static int reiserfs_mkdir(struct inode * | |
81 | JOURNAL_PER_BALANCE_CNT * 3 + | |
82 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | |
83 | REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); | |
84 | - int locked; | |
85 | ||
86 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | |
87 | /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ | |
88 | @@ -749,8 +744,7 @@ static int reiserfs_mkdir(struct inode * | |
89 | } | |
90 | new_inode_init(inode, dir, mode); | |
91 | ||
92 | - locked = reiserfs_cache_default_acl(dir); | |
93 | - | |
94 | + jbegin_count += reiserfs_cache_default_acl(dir); | |
95 | reiserfs_write_lock(dir->i_sb); | |
96 | ||
97 | retval = journal_begin(&th, dir->i_sb, jbegin_count); | |
98 | @@ -1037,8 +1031,6 @@ static int reiserfs_symlink(struct inode | |
99 | memcpy(name, symname, strlen(symname)); | |
100 | padd_item(name, item_len, strlen(symname)); | |
101 | ||
102 | - /* We would inherit the default ACL here, but symlinks don't get ACLs */ | |
103 | - | |
104 | retval = journal_begin(&th, parent_dir->i_sb, jbegin_count); | |
105 | if (retval) { | |
106 | drop_new_inode(inode); | |
107 | --- a/fs/reiserfs/xattr_acl.c | |
108 | +++ b/fs/reiserfs/xattr_acl.c | |
109 | @@ -10,15 +10,17 @@ | |
110 | #include <linux/reiserfs_acl.h> | |
111 | #include <asm/uaccess.h> | |
112 | ||
113 | -static int reiserfs_set_acl(struct inode *inode, int type, | |
114 | +static int reiserfs_set_acl(struct reiserfs_transaction_handle *th, | |
115 | + struct inode *inode, int type, | |
116 | struct posix_acl *acl); | |
117 | ||
118 | static int | |
119 | xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) | |
120 | { | |
121 | struct posix_acl *acl; | |
122 | - int error; | |
123 | - | |
124 | + int error, error2; | |
125 | + struct reiserfs_transaction_handle th; | |
126 | + size_t jcreate_blocks; | |
127 | if (!reiserfs_posixacl(inode->i_sb)) | |
128 | return -EOPNOTSUPP; | |
129 | if (!is_owner_or_cap(inode)) | |
130 | @@ -36,7 +38,21 @@ xattr_set_acl(struct inode *inode, int t | |
131 | } else | |
132 | acl = NULL; | |
133 | ||
134 | - error = reiserfs_set_acl(inode, type, acl); | |
135 | + /* Pessimism: We can't assume that anything from the xattr root up | |
136 | + * has been created. */ | |
137 | + | |
138 | + jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + | |
139 | + reiserfs_xattr_nblocks(inode, size) * 2; | |
140 | + | |
141 | + reiserfs_write_lock(inode->i_sb); | |
142 | + error = journal_begin(&th, inode->i_sb, jcreate_blocks); | |
143 | + if (error == 0) { | |
144 | + error = reiserfs_set_acl(&th, inode, type, acl); | |
145 | + error2 = journal_end(&th, inode->i_sb, jcreate_blocks); | |
146 | + if (error2) | |
147 | + error = error2; | |
148 | + } | |
149 | + reiserfs_write_unlock(inode->i_sb); | |
150 | ||
151 | release_and_out: | |
152 | posix_acl_release(acl); | |
153 | @@ -266,7 +282,8 @@ struct posix_acl *reiserfs_get_acl(struc | |
154 | * BKL held [before 2.5.x] | |
155 | */ | |
156 | static int | |
157 | -reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |
158 | +reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, | |
159 | + int type, struct posix_acl *acl) | |
160 | { | |
161 | char *name; | |
162 | void *value = NULL; | |
163 | @@ -310,7 +327,7 @@ reiserfs_set_acl(struct inode *inode, in | |
164 | return (int)PTR_ERR(value); | |
165 | } | |
166 | ||
167 | - error = __reiserfs_xattr_set(inode, name, value, size, 0); | |
168 | + error = reiserfs_xattr_set_handle(th, inode, name, value, size, 0); | |
169 | ||
170 | /* | |
171 | * Ensure that the inode gets dirtied if we're only using | |
172 | @@ -337,7 +354,8 @@ reiserfs_set_acl(struct inode *inode, in | |
173 | /* dir->i_mutex: locked, | |
174 | * inode is new and not released into the wild yet */ | |
175 | int | |
176 | -reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, | |
177 | +reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |
178 | + struct inode *dir, struct dentry *dentry, | |
179 | struct inode *inode) | |
180 | { | |
181 | struct posix_acl *acl; | |
182 | @@ -374,7 +392,8 @@ reiserfs_inherit_default_acl(struct inod | |
183 | ||
184 | /* Copy the default ACL to the default ACL of a new directory */ | |
185 | if (S_ISDIR(inode->i_mode)) { | |
186 | - err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); | |
187 | + err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT, | |
188 | + acl); | |
189 | if (err) | |
190 | goto cleanup; | |
191 | } | |
192 | @@ -395,9 +414,9 @@ reiserfs_inherit_default_acl(struct inod | |
193 | ||
194 | /* If we need an ACL.. */ | |
195 | if (need_acl > 0) { | |
196 | - err = | |
197 | - reiserfs_set_acl(inode, ACL_TYPE_ACCESS, | |
198 | - acl_copy); | |
199 | + err = reiserfs_set_acl(th, inode, | |
200 | + ACL_TYPE_ACCESS, | |
201 | + acl_copy); | |
202 | if (err) | |
203 | goto cleanup_copy; | |
204 | } | |
205 | @@ -415,21 +434,45 @@ reiserfs_inherit_default_acl(struct inod | |
206 | return err; | |
207 | } | |
208 | ||
209 | -/* Looks up and caches the result of the default ACL. | |
210 | - * We do this so that we don't need to carry the xattr_sem into | |
211 | - * reiserfs_new_inode if we don't need to */ | |
212 | +/* This is used to cache the default acl before a new object is created. | |
213 | + * The biggest reason for this is to get an idea of how many blocks will | |
214 | + * actually be required for the create operation if we must inherit an ACL. | |
215 | + * An ACL write can add up to 3 object creations and an additional file write | |
216 | + * so we'd prefer not to reserve that many blocks in the journal if we can. | |
217 | + * It also has the advantage of not loading the ACL with a transaction open, | |
218 | + * this may seem silly, but if the owner of the directory is doing the | |
219 | + * creation, the ACL may not be loaded since the permissions wouldn't require | |
220 | + * it. | |
221 | + * We return the number of blocks required for the transaction. | |
222 | + */ | |
223 | int reiserfs_cache_default_acl(struct inode *inode) | |
224 | { | |
225 | - int ret = 0; | |
226 | - if (reiserfs_posixacl(inode->i_sb) && !IS_PRIVATE(inode)) { | |
227 | - struct posix_acl *acl; | |
228 | - acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); | |
229 | - ret = (acl && !IS_ERR(acl)); | |
230 | - if (ret) | |
231 | - posix_acl_release(acl); | |
232 | + struct posix_acl *acl; | |
233 | + int nblocks = 0; | |
234 | + | |
235 | + if (IS_PRIVATE(inode)) | |
236 | + return 0; | |
237 | + | |
238 | + acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); | |
239 | + | |
240 | + if (acl && !IS_ERR(acl)) { | |
241 | + int size = reiserfs_acl_size(acl->a_count); | |
242 | + | |
243 | + /* Other xattrs can be created during inode creation. We don't | |
244 | + * want to claim too many blocks, so we check to see if we | |
245 | + * we need to create the tree to the xattrs, and then we | |
246 | + * just want two files. */ | |
247 | + nblocks = reiserfs_xattr_jcreate_nblocks(inode); | |
248 | + nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); | |
249 | + | |
250 | + REISERFS_I(inode)->i_flags |= i_has_xattr_dir; | |
251 | + | |
252 | + /* We need to account for writes + bitmaps for two files */ | |
253 | + nblocks += reiserfs_xattr_nblocks(inode, size) * 4; | |
254 | + posix_acl_release(acl); | |
255 | } | |
256 | ||
257 | - return ret; | |
258 | + return nblocks; | |
259 | } | |
260 | ||
261 | int reiserfs_acl_chmod(struct inode *inode) | |
262 | @@ -455,8 +498,22 @@ int reiserfs_acl_chmod(struct inode *ino | |
263 | if (!clone) | |
264 | return -ENOMEM; | |
265 | error = posix_acl_chmod_masq(clone, inode->i_mode); | |
266 | - if (!error) | |
267 | - error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); | |
268 | + if (!error) { | |
269 | + struct reiserfs_transaction_handle th; | |
270 | + size_t size = reiserfs_xattr_nblocks(inode, | |
271 | + reiserfs_acl_size(clone->a_count)); | |
272 | + reiserfs_write_lock(inode->i_sb); | |
273 | + error = journal_begin(&th, inode->i_sb, size * 2); | |
274 | + if (!error) { | |
275 | + int error2; | |
276 | + error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, | |
277 | + clone); | |
278 | + error2 = journal_end(&th, inode->i_sb, size * 2); | |
279 | + if (error2) | |
280 | + error = error2; | |
281 | + } | |
282 | + reiserfs_write_unlock(inode->i_sb); | |
283 | + } | |
284 | posix_acl_release(clone); | |
285 | return error; | |
286 | } | |
287 | --- a/fs/reiserfs/xattr.c | |
288 | +++ b/fs/reiserfs/xattr.c | |
289 | @@ -632,8 +632,9 @@ out_dput: | |
290 | * inode->i_mutex: down | |
291 | */ | |
292 | int | |
293 | -__reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, | |
294 | - size_t buffer_size, int flags) | |
295 | +reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, | |
296 | + struct inode *inode, const char *name, | |
297 | + const void *buffer, size_t buffer_size, int flags) | |
298 | { | |
299 | int err = 0; | |
300 | struct dentry *dentry; | |
301 | @@ -723,14 +724,34 @@ out_unlock: | |
302 | return err; | |
303 | } | |
304 | ||
305 | -int | |
306 | -reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, | |
307 | - size_t buffer_size, int flags) | |
308 | +/* We need to start a transaction to maintain lock ordering */ | |
309 | +int reiserfs_xattr_set(struct inode *inode, const char *name, | |
310 | + const void *buffer, size_t buffer_size, int flags) | |
311 | { | |
312 | - int err = __reiserfs_xattr_set(inode, name, buffer, buffer_size, flags); | |
313 | - if (err == -ENODATA) | |
314 | - err = 0; | |
315 | - return err; | |
316 | + | |
317 | + struct reiserfs_transaction_handle th; | |
318 | + int error, error2; | |
319 | + size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); | |
320 | + | |
321 | + if (!(flags & XATTR_REPLACE)) | |
322 | + jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); | |
323 | + | |
324 | + reiserfs_write_lock(inode->i_sb); | |
325 | + error = journal_begin(&th, inode->i_sb, jbegin_count); | |
326 | + if (error) { | |
327 | + reiserfs_write_unlock(inode->i_sb); | |
328 | + return error; | |
329 | + } | |
330 | + | |
331 | + error = reiserfs_xattr_set_handle(&th, inode, name, | |
332 | + buffer, buffer_size, flags); | |
333 | + | |
334 | + error2 = journal_end(&th, inode->i_sb, jbegin_count); | |
335 | + if (error == 0) | |
336 | + error = error2; | |
337 | + reiserfs_write_unlock(inode->i_sb); | |
338 | + | |
339 | + return error; | |
340 | } | |
341 | ||
342 | /* | |
343 | --- a/include/linux/reiserfs_acl.h | |
344 | +++ b/include/linux/reiserfs_acl.h | |
345 | @@ -49,7 +49,8 @@ static inline int reiserfs_acl_count(siz | |
346 | #ifdef CONFIG_REISERFS_FS_POSIX_ACL | |
347 | struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); | |
348 | int reiserfs_acl_chmod(struct inode *inode); | |
349 | -int reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry, | |
350 | +int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |
351 | + struct inode *dir, struct dentry *dentry, | |
352 | struct inode *inode); | |
353 | int reiserfs_cache_default_acl(struct inode *dir); | |
354 | extern struct xattr_handler reiserfs_posix_acl_default_handler; | |
355 | --- a/include/linux/reiserfs_fs.h | |
356 | +++ b/include/linux/reiserfs_fs.h | |
357 | @@ -1615,6 +1615,10 @@ struct reiserfs_journal_header { | |
358 | #define JOURNAL_MAX_COMMIT_AGE 30 | |
359 | #define JOURNAL_MAX_TRANS_AGE 30 | |
360 | #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) | |
361 | +#define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ | |
362 | + 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ | |
363 | + REISERFS_QUOTA_TRANS_BLOCKS(sb))) | |
364 | + | |
365 | #ifdef CONFIG_QUOTA | |
366 | /* We need to update data and inode (atime) */ | |
367 | #define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & (1<<REISERFS_QUOTA) ? 2 : 0) | |
368 | --- a/include/linux/reiserfs_xattr.h | |
369 | +++ b/include/linux/reiserfs_xattr.h | |
370 | @@ -46,14 +46,50 @@ int reiserfs_removexattr(struct dentry * | |
371 | int reiserfs_permission(struct inode *inode, int mask); | |
372 | ||
373 | int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); | |
374 | -int __reiserfs_xattr_set(struct inode *, const char *, const void *, | |
375 | - size_t, int); | |
376 | int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); | |
377 | +int reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *, | |
378 | + struct inode *, const char *, const void *, | |
379 | + size_t, int); | |
380 | ||
381 | extern struct xattr_handler reiserfs_xattr_user_handler; | |
382 | extern struct xattr_handler reiserfs_xattr_trusted_handler; | |
383 | extern struct xattr_handler reiserfs_xattr_security_handler; | |
384 | ||
385 | +#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) | |
386 | +static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) | |
387 | +{ | |
388 | + loff_t ret = 0; | |
389 | + if (reiserfs_file_data_log(inode)) { | |
390 | + ret = _ROUND_UP(xattr_size(size), inode->i_sb->s_blocksize); | |
391 | + ret >>= inode->i_sb->s_blocksize_bits; | |
392 | + } | |
393 | + return ret; | |
394 | +} | |
395 | + | |
396 | +/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. | |
397 | + * Let's try to be smart about it. | |
398 | + * xattr root: We cache it. If it's not cached, we may need to create it. | |
399 | + * xattr dir: If anything has been loaded for this inode, we can set a flag | |
400 | + * saying so. | |
401 | + * xattr file: Since we don't cache xattrs, we can't tell. We always include | |
402 | + * blocks for it. | |
403 | + * | |
404 | + * However, since root and dir can be created between calls - YOU MUST SAVE | |
405 | + * THIS VALUE. | |
406 | + */ | |
407 | +static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) | |
408 | +{ | |
409 | + size_t nblocks = JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); | |
410 | + | |
411 | + if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { | |
412 | + nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); | |
413 | + if (REISERFS_SB(inode->i_sb)->xattr_root == NULL) | |
414 | + nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); | |
415 | + } | |
416 | + | |
417 | + return nblocks; | |
418 | +} | |
419 | + | |
420 | static inline void reiserfs_init_xattr_rwsem(struct inode *inode) | |
421 | { | |
422 | init_rwsem(&REISERFS_I(inode)->i_xattr_sem); |