]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Tao Ma <tao.ma@oracle.com> |
2 | Subject: [PATCH 11/16] ocfs2: Add xattr bucket iteration for large numbers of EAs | |
3 | Patch-mainline: 2.6.28? | |
4 | References: FATE302067 | |
5 | ||
6 | Ocfs2 breaks up xattr index tree leaves into 4k regions, called buckets. | |
7 | Attributes are stored within a given bucket, depending on hash value. | |
8 | ||
9 | After a discussion with Mark, we decided that the per-bucket index | |
10 | (xe_entry[]) would only exist in the 1st block of a bucket. Likewise, | |
11 | name/value pairs will not straddle more than one block. This allows the | |
12 | majority of operations to work directly on the buffer heads in a leaf block. | |
13 | ||
14 | This patch adds code to iterate the buckets in an EA. A new abstration of | |
15 | ocfs2_xattr_bucket is added. It records the bhs in this bucket and | |
16 | ocfs2_xattr_header. This keeps the code neat, improving readibility. | |
17 | ||
18 | Signed-off-by: Tao Ma <tao.ma@oracle.com> | |
19 | Signed-off-by: Mark Fasheh <mfasheh@suse.com> | |
20 | --- | |
21 | fs/ocfs2/ocfs2_fs.h | 35 +++++++- | |
22 | fs/ocfs2/xattr.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++++++- | |
23 | fs/ocfs2/xattr.h | 9 ++ | |
24 | 3 files changed, 293 insertions(+), 6 deletions(-) | |
25 | ||
26 | diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h | |
27 | index 98e1f8b..8d5e72f 100644 | |
28 | --- a/fs/ocfs2/ocfs2_fs.h | |
29 | +++ b/fs/ocfs2/ocfs2_fs.h | |
30 | @@ -755,8 +755,13 @@ struct ocfs2_xattr_header { | |
31 | __le16 xh_count; /* contains the count of how | |
32 | many records are in the | |
33 | local xattr storage. */ | |
34 | - __le16 xh_reserved1; | |
35 | - __le32 xh_reserved2; | |
36 | + __le16 xh_free_start; /* current offset for storing | |
37 | + xattr. */ | |
38 | + __le16 xh_name_value_len; /* total length of name/value | |
39 | + length in this bucket. */ | |
40 | + __le16 xh_num_buckets; /* bucket nums in one extent | |
41 | + record, only valid in the | |
42 | + first bucket. */ | |
43 | __le64 xh_csum; | |
44 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ | |
45 | }; | |
46 | @@ -793,6 +798,10 @@ struct ocfs2_xattr_tree_root { | |
47 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ | |
48 | ~(OCFS2_XATTR_ROUND)) | |
49 | ||
50 | +#define OCFS2_XATTR_BUCKET_SIZE 4096 | |
51 | +#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ | |
52 | + / OCFS2_MIN_BLOCKSIZE) | |
53 | + | |
54 | /* | |
55 | * On disk structure for xattr block. | |
56 | */ | |
57 | @@ -963,6 +972,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) | |
58 | return 0; | |
59 | ||
60 | } | |
61 | + | |
62 | +static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | |
63 | +{ | |
64 | + int size; | |
65 | + | |
66 | + size = sb->s_blocksize - | |
67 | + offsetof(struct ocfs2_xattr_block, | |
68 | + xb_attrs.xb_root.xt_list.l_recs); | |
69 | + | |
70 | + return size / sizeof(struct ocfs2_extent_rec); | |
71 | +} | |
72 | #else | |
73 | static inline int ocfs2_fast_symlink_chars(int blocksize) | |
74 | { | |
75 | @@ -1046,6 +1066,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) | |
76 | ||
77 | return 0; | |
78 | } | |
79 | + | |
80 | +static inline int ocfs2_xattr_recs_per_xb(int blocksize) | |
81 | +{ | |
82 | + int size; | |
83 | + | |
84 | + size = blocksize - | |
85 | + offsetof(struct ocfs2_xattr_block, | |
86 | + xb_attrs.xb_root.xt_list.l_recs); | |
87 | + | |
88 | + return size / sizeof(struct ocfs2_extent_rec); | |
89 | +} | |
90 | #endif /* __KERNEL__ */ | |
91 | ||
92 | ||
93 | diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c | |
94 | index 3685cc6..ed41c15 100644 | |
95 | --- a/fs/ocfs2/xattr.c | |
96 | +++ b/fs/ocfs2/xattr.c | |
97 | @@ -52,6 +52,7 @@ | |
98 | #include "suballoc.h" | |
99 | #include "uptodate.h" | |
100 | #include "buffer_head_io.h" | |
101 | +#include "super.h" | |
102 | #include "xattr.h" | |
103 | ||
104 | ||
105 | @@ -60,6 +61,11 @@ struct ocfs2_xattr_def_value_root { | |
106 | struct ocfs2_extent_rec er; | |
107 | }; | |
108 | ||
109 | +struct ocfs2_xattr_bucket { | |
110 | + struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; | |
111 | + struct ocfs2_xattr_header *xh; | |
112 | +}; | |
113 | + | |
114 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) | |
115 | #define OCFS2_XATTR_INLINE_SIZE 80 | |
116 | ||
117 | @@ -115,6 +121,11 @@ struct ocfs2_xattr_search { | |
118 | int not_found; | |
119 | }; | |
120 | ||
121 | +static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | |
122 | + struct ocfs2_xattr_tree_root *xt, | |
123 | + char *buffer, | |
124 | + size_t buffer_size); | |
125 | + | |
126 | static inline struct xattr_handler *ocfs2_xattr_handler(int name_index) | |
127 | { | |
128 | struct xattr_handler *handler = NULL; | |
129 | @@ -499,7 +510,7 @@ static int ocfs2_xattr_block_list(struct inode *inode, | |
130 | size_t buffer_size) | |
131 | { | |
132 | struct buffer_head *blk_bh = NULL; | |
133 | - struct ocfs2_xattr_header *header = NULL; | |
134 | + struct ocfs2_xattr_block *xb; | |
135 | int ret = 0; | |
136 | ||
137 | if (!di->i_xattr_loc) | |
138 | @@ -519,10 +530,17 @@ static int ocfs2_xattr_block_list(struct inode *inode, | |
139 | goto cleanup; | |
140 | } | |
141 | ||
142 | - header = &((struct ocfs2_xattr_block *)blk_bh->b_data)-> | |
143 | - xb_attrs.xb_header; | |
144 | + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | |
145 | ||
146 | - ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); | |
147 | + if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | |
148 | + struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | |
149 | + ret = ocfs2_xattr_list_entries(inode, header, | |
150 | + buffer, buffer_size); | |
151 | + } else { | |
152 | + struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | |
153 | + ret = ocfs2_xattr_tree_list_index_block(inode, xt, | |
154 | + buffer, buffer_size); | |
155 | + } | |
156 | cleanup: | |
157 | brelse(blk_bh); | |
158 | ||
159 | @@ -1939,3 +1957,232 @@ cleanup: | |
160 | return ret; | |
161 | } | |
162 | ||
163 | +/* | |
164 | + * Find the xattr extent rec which may contains name_hash. | |
165 | + * e_cpos will be the first name hash of the xattr rec. | |
166 | + * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. | |
167 | + */ | |
168 | +static int ocfs2_xattr_get_rec(struct inode *inode, | |
169 | + u32 name_hash, | |
170 | + u64 *p_blkno, | |
171 | + u32 *e_cpos, | |
172 | + u32 *num_clusters, | |
173 | + struct ocfs2_extent_list *el) | |
174 | +{ | |
175 | + int ret = 0, i; | |
176 | + struct buffer_head *eb_bh = NULL; | |
177 | + struct ocfs2_extent_block *eb; | |
178 | + struct ocfs2_extent_rec *rec = NULL; | |
179 | + u64 e_blkno = 0; | |
180 | + | |
181 | + if (el->l_tree_depth) { | |
182 | + ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | |
183 | + if (ret) { | |
184 | + mlog_errno(ret); | |
185 | + goto out; | |
186 | + } | |
187 | + | |
188 | + eb = (struct ocfs2_extent_block *) eb_bh->b_data; | |
189 | + el = &eb->h_list; | |
190 | + | |
191 | + if (el->l_tree_depth) { | |
192 | + ocfs2_error(inode->i_sb, | |
193 | + "Inode %lu has non zero tree depth in " | |
194 | + "xattr tree block %llu\n", inode->i_ino, | |
195 | + (unsigned long long)eb_bh->b_blocknr); | |
196 | + ret = -EROFS; | |
197 | + goto out; | |
198 | + } | |
199 | + } | |
200 | + | |
201 | + for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | |
202 | + rec = &el->l_recs[i]; | |
203 | + | |
204 | + if (le32_to_cpu(rec->e_cpos) <= name_hash) { | |
205 | + e_blkno = le64_to_cpu(rec->e_blkno); | |
206 | + break; | |
207 | + } | |
208 | + } | |
209 | + | |
210 | + if (!e_blkno) { | |
211 | + ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | |
212 | + "record (%u, %u, 0) in xattr", inode->i_ino, | |
213 | + le32_to_cpu(rec->e_cpos), | |
214 | + ocfs2_rec_clusters(el, rec)); | |
215 | + ret = -EROFS; | |
216 | + goto out; | |
217 | + } | |
218 | + | |
219 | + *p_blkno = le64_to_cpu(rec->e_blkno); | |
220 | + *num_clusters = le16_to_cpu(rec->e_leaf_clusters); | |
221 | + if (e_cpos) | |
222 | + *e_cpos = le32_to_cpu(rec->e_cpos); | |
223 | +out: | |
224 | + brelse(eb_bh); | |
225 | + return ret; | |
226 | +} | |
227 | + | |
228 | +typedef int (xattr_bucket_func)(struct inode *inode, | |
229 | + struct ocfs2_xattr_bucket *bucket, | |
230 | + void *para); | |
231 | + | |
232 | +static int ocfs2_iterate_xattr_buckets(struct inode *inode, | |
233 | + u64 blkno, | |
234 | + u32 clusters, | |
235 | + xattr_bucket_func *func, | |
236 | + void *para) | |
237 | +{ | |
238 | + int i, j, ret = 0; | |
239 | + int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | |
240 | + u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | |
241 | + u32 num_buckets = clusters * bpc; | |
242 | + struct ocfs2_xattr_bucket bucket; | |
243 | + | |
244 | + memset(&bucket, 0, sizeof(bucket)); | |
245 | + | |
246 | + mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", | |
247 | + clusters, blkno); | |
248 | + | |
249 | + for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { | |
250 | + ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | |
251 | + blkno, blk_per_bucket, | |
252 | + bucket.bhs, OCFS2_BH_CACHED, inode); | |
253 | + if (ret) { | |
254 | + mlog_errno(ret); | |
255 | + goto out; | |
256 | + } | |
257 | + | |
258 | + bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; | |
259 | + /* | |
260 | + * The real bucket num in this series of blocks is stored | |
261 | + * in the 1st bucket. | |
262 | + */ | |
263 | + if (i == 0) | |
264 | + num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); | |
265 | + | |
266 | + mlog(0, "iterating xattr bucket %llu\n", blkno); | |
267 | + if (func) { | |
268 | + ret = func(inode, &bucket, para); | |
269 | + if (ret) { | |
270 | + mlog_errno(ret); | |
271 | + break; | |
272 | + } | |
273 | + } | |
274 | + | |
275 | + for (j = 0; j < blk_per_bucket; j++) | |
276 | + brelse(bucket.bhs[j]); | |
277 | + memset(&bucket, 0, sizeof(bucket)); | |
278 | + } | |
279 | + | |
280 | +out: | |
281 | + for (j = 0; j < blk_per_bucket; j++) | |
282 | + brelse(bucket.bhs[j]); | |
283 | + | |
284 | + return ret; | |
285 | +} | |
286 | + | |
287 | +struct ocfs2_xattr_tree_list { | |
288 | + char *buffer; | |
289 | + size_t buffer_size; | |
290 | +}; | |
291 | + | |
292 | +static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | |
293 | + struct ocfs2_xattr_header *xh, | |
294 | + int index, | |
295 | + int *block_off, | |
296 | + int *new_offset) | |
297 | +{ | |
298 | + u16 name_offset; | |
299 | + | |
300 | + if (index < 0 || index >= le16_to_cpu(xh->xh_count)) | |
301 | + return -EINVAL; | |
302 | + | |
303 | + name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | |
304 | + | |
305 | + *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | |
306 | + *new_offset = name_offset % inode->i_sb->s_blocksize; | |
307 | + | |
308 | + return 0; | |
309 | +} | |
310 | + | |
311 | +static int ocfs2_list_xattr_bucket(struct inode *inode, | |
312 | + struct ocfs2_xattr_bucket *bucket, | |
313 | + void *para) | |
314 | +{ | |
315 | + int ret = 0; | |
316 | + struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; | |
317 | + size_t size; | |
318 | + int i, block_off, new_offset; | |
319 | + | |
320 | + for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { | |
321 | + struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; | |
322 | + struct xattr_handler *handler = | |
323 | + ocfs2_xattr_handler(ocfs2_xattr_get_type(entry)); | |
324 | + | |
325 | + if (handler) { | |
326 | + ret = ocfs2_xattr_bucket_get_name_value(inode, | |
327 | + bucket->xh, | |
328 | + i, | |
329 | + &block_off, | |
330 | + &new_offset); | |
331 | + if (ret) | |
332 | + break; | |
333 | + size = handler->list(inode, xl->buffer, xl->buffer_size, | |
334 | + bucket->bhs[block_off]->b_data + | |
335 | + new_offset, | |
336 | + entry->xe_name_len); | |
337 | + if (xl->buffer) { | |
338 | + if (size > xl->buffer_size) | |
339 | + return -ERANGE; | |
340 | + xl->buffer += size; | |
341 | + } | |
342 | + xl->buffer_size -= size; | |
343 | + } | |
344 | + } | |
345 | + | |
346 | + return ret; | |
347 | +} | |
348 | + | |
349 | +static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | |
350 | + struct ocfs2_xattr_tree_root *xt, | |
351 | + char *buffer, | |
352 | + size_t buffer_size) | |
353 | +{ | |
354 | + struct ocfs2_extent_list *el = &xt->xt_list; | |
355 | + int ret = 0; | |
356 | + u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | |
357 | + u64 p_blkno = 0; | |
358 | + struct ocfs2_xattr_tree_list xl = { | |
359 | + .buffer = buffer, | |
360 | + .buffer_size = buffer_size, | |
361 | + }; | |
362 | + | |
363 | + if (le16_to_cpu(el->l_next_free_rec) == 0) | |
364 | + return 0; | |
365 | + | |
366 | + while (name_hash > 0) { | |
367 | + ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | |
368 | + &e_cpos, &num_clusters, el); | |
369 | + if (ret) { | |
370 | + mlog_errno(ret); | |
371 | + goto out; | |
372 | + } | |
373 | + | |
374 | + ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | |
375 | + ocfs2_list_xattr_bucket, | |
376 | + &xl); | |
377 | + if (ret) { | |
378 | + mlog_errno(ret); | |
379 | + goto out; | |
380 | + } | |
381 | + | |
382 | + if (e_cpos == 0) | |
383 | + break; | |
384 | + | |
385 | + name_hash = e_cpos - 1; | |
386 | + } | |
387 | + | |
388 | + ret = buffer_size - xl.buffer_size; | |
389 | +out: | |
390 | + return ret; | |
391 | +} | |
392 | diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h | |
393 | index f565c64..a69c8aa 100644 | |
394 | --- a/fs/ocfs2/xattr.h | |
395 | +++ b/fs/ocfs2/xattr.h | |
396 | @@ -55,4 +55,13 @@ extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, | |
397 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); | |
398 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | |
399 | ||
400 | +static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | |
401 | +{ | |
402 | + return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; | |
403 | +} | |
404 | + | |
405 | +static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | |
406 | +{ | |
407 | + return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | |
408 | +} | |
409 | #endif /* OCFS2_XATTR_H */ | |
410 | -- | |
411 | 1.5.4.5 | |
412 |