]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.39/patches.suse/ocfs2-Add-xattr-bucket-iteration-for.patch
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.suse / ocfs2-Add-xattr-bucket-iteration-for.patch
CommitLineData
2cb7cef9
BS
1From: Tao Ma <tao.ma@oracle.com>
2Subject: [PATCH 11/16] ocfs2: Add xattr bucket iteration for large numbers of EAs
3Patch-mainline: 2.6.28?
4References: FATE302067
5
6Ocfs2 breaks up xattr index tree leaves into 4k regions, called buckets.
7Attributes are stored within a given bucket, depending on hash value.
8
9After a discussion with Mark, we decided that the per-bucket index
10(xe_entry[]) would only exist in the 1st block of a bucket. Likewise,
11name/value pairs will not straddle more than one block. This allows the
12majority of operations to work directly on the buffer heads in a leaf block.
13
14This patch adds code to iterate the buckets in an EA. A new abstration of
15ocfs2_xattr_bucket is added. It records the bhs in this bucket and
16ocfs2_xattr_header. This keeps the code neat, improving readibility.
17
18Signed-off-by: Tao Ma <tao.ma@oracle.com>
19Signed-off-by: Mark Fasheh <mfasheh@suse.com>
20---
21 fs/ocfs2/ocfs2_fs.h | 35 +++++++-
22 fs/ocfs2/xattr.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++++++-
23 fs/ocfs2/xattr.h | 9 ++
24 3 files changed, 293 insertions(+), 6 deletions(-)
25
26diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
27index 98e1f8b..8d5e72f 100644
28--- a/fs/ocfs2/ocfs2_fs.h
29+++ b/fs/ocfs2/ocfs2_fs.h
30@@ -755,8 +755,13 @@ struct ocfs2_xattr_header {
31 __le16 xh_count; /* contains the count of how
32 many records are in the
33 local xattr storage. */
34- __le16 xh_reserved1;
35- __le32 xh_reserved2;
36+ __le16 xh_free_start; /* current offset for storing
37+ xattr. */
38+ __le16 xh_name_value_len; /* total length of name/value
39+ length in this bucket. */
40+ __le16 xh_num_buckets; /* bucket nums in one extent
41+ record, only valid in the
42+ first bucket. */
43 __le64 xh_csum;
44 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
45 };
46@@ -793,6 +798,10 @@ struct ocfs2_xattr_tree_root {
47 #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
48 ~(OCFS2_XATTR_ROUND))
49
50+#define OCFS2_XATTR_BUCKET_SIZE 4096
51+#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
52+ / OCFS2_MIN_BLOCKSIZE)
53+
54 /*
55 * On disk structure for xattr block.
56 */
57@@ -963,6 +972,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
58 return 0;
59
60 }
61+
62+static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
63+{
64+ int size;
65+
66+ size = sb->s_blocksize -
67+ offsetof(struct ocfs2_xattr_block,
68+ xb_attrs.xb_root.xt_list.l_recs);
69+
70+ return size / sizeof(struct ocfs2_extent_rec);
71+}
72 #else
73 static inline int ocfs2_fast_symlink_chars(int blocksize)
74 {
75@@ -1046,6 +1066,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
76
77 return 0;
78 }
79+
80+static inline int ocfs2_xattr_recs_per_xb(int blocksize)
81+{
82+ int size;
83+
84+ size = blocksize -
85+ offsetof(struct ocfs2_xattr_block,
86+ xb_attrs.xb_root.xt_list.l_recs);
87+
88+ return size / sizeof(struct ocfs2_extent_rec);
89+}
90 #endif /* __KERNEL__ */
91
92
93diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
94index 3685cc6..ed41c15 100644
95--- a/fs/ocfs2/xattr.c
96+++ b/fs/ocfs2/xattr.c
97@@ -52,6 +52,7 @@
98 #include "suballoc.h"
99 #include "uptodate.h"
100 #include "buffer_head_io.h"
101+#include "super.h"
102 #include "xattr.h"
103
104
105@@ -60,6 +61,11 @@ struct ocfs2_xattr_def_value_root {
106 struct ocfs2_extent_rec er;
107 };
108
109+struct ocfs2_xattr_bucket {
110+ struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
111+ struct ocfs2_xattr_header *xh;
112+};
113+
114 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
115 #define OCFS2_XATTR_INLINE_SIZE 80
116
117@@ -115,6 +121,11 @@ struct ocfs2_xattr_search {
118 int not_found;
119 };
120
121+static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
122+ struct ocfs2_xattr_tree_root *xt,
123+ char *buffer,
124+ size_t buffer_size);
125+
126 static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
127 {
128 struct xattr_handler *handler = NULL;
129@@ -499,7 +510,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
130 size_t buffer_size)
131 {
132 struct buffer_head *blk_bh = NULL;
133- struct ocfs2_xattr_header *header = NULL;
134+ struct ocfs2_xattr_block *xb;
135 int ret = 0;
136
137 if (!di->i_xattr_loc)
138@@ -519,10 +530,17 @@ static int ocfs2_xattr_block_list(struct inode *inode,
139 goto cleanup;
140 }
141
142- header = &((struct ocfs2_xattr_block *)blk_bh->b_data)->
143- xb_attrs.xb_header;
144+ xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
145
146- ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
147+ if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
148+ struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
149+ ret = ocfs2_xattr_list_entries(inode, header,
150+ buffer, buffer_size);
151+ } else {
152+ struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
153+ ret = ocfs2_xattr_tree_list_index_block(inode, xt,
154+ buffer, buffer_size);
155+ }
156 cleanup:
157 brelse(blk_bh);
158
159@@ -1939,3 +1957,232 @@ cleanup:
160 return ret;
161 }
162
163+/*
164+ * Find the xattr extent rec which may contains name_hash.
165+ * e_cpos will be the first name hash of the xattr rec.
166+ * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
167+ */
168+static int ocfs2_xattr_get_rec(struct inode *inode,
169+ u32 name_hash,
170+ u64 *p_blkno,
171+ u32 *e_cpos,
172+ u32 *num_clusters,
173+ struct ocfs2_extent_list *el)
174+{
175+ int ret = 0, i;
176+ struct buffer_head *eb_bh = NULL;
177+ struct ocfs2_extent_block *eb;
178+ struct ocfs2_extent_rec *rec = NULL;
179+ u64 e_blkno = 0;
180+
181+ if (el->l_tree_depth) {
182+ ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
183+ if (ret) {
184+ mlog_errno(ret);
185+ goto out;
186+ }
187+
188+ eb = (struct ocfs2_extent_block *) eb_bh->b_data;
189+ el = &eb->h_list;
190+
191+ if (el->l_tree_depth) {
192+ ocfs2_error(inode->i_sb,
193+ "Inode %lu has non zero tree depth in "
194+ "xattr tree block %llu\n", inode->i_ino,
195+ (unsigned long long)eb_bh->b_blocknr);
196+ ret = -EROFS;
197+ goto out;
198+ }
199+ }
200+
201+ for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
202+ rec = &el->l_recs[i];
203+
204+ if (le32_to_cpu(rec->e_cpos) <= name_hash) {
205+ e_blkno = le64_to_cpu(rec->e_blkno);
206+ break;
207+ }
208+ }
209+
210+ if (!e_blkno) {
211+ ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
212+ "record (%u, %u, 0) in xattr", inode->i_ino,
213+ le32_to_cpu(rec->e_cpos),
214+ ocfs2_rec_clusters(el, rec));
215+ ret = -EROFS;
216+ goto out;
217+ }
218+
219+ *p_blkno = le64_to_cpu(rec->e_blkno);
220+ *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
221+ if (e_cpos)
222+ *e_cpos = le32_to_cpu(rec->e_cpos);
223+out:
224+ brelse(eb_bh);
225+ return ret;
226+}
227+
228+typedef int (xattr_bucket_func)(struct inode *inode,
229+ struct ocfs2_xattr_bucket *bucket,
230+ void *para);
231+
232+static int ocfs2_iterate_xattr_buckets(struct inode *inode,
233+ u64 blkno,
234+ u32 clusters,
235+ xattr_bucket_func *func,
236+ void *para)
237+{
238+ int i, j, ret = 0;
239+ int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
240+ u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
241+ u32 num_buckets = clusters * bpc;
242+ struct ocfs2_xattr_bucket bucket;
243+
244+ memset(&bucket, 0, sizeof(bucket));
245+
246+ mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
247+ clusters, blkno);
248+
249+ for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
250+ ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
251+ blkno, blk_per_bucket,
252+ bucket.bhs, OCFS2_BH_CACHED, inode);
253+ if (ret) {
254+ mlog_errno(ret);
255+ goto out;
256+ }
257+
258+ bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
259+ /*
260+ * The real bucket num in this series of blocks is stored
261+ * in the 1st bucket.
262+ */
263+ if (i == 0)
264+ num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
265+
266+ mlog(0, "iterating xattr bucket %llu\n", blkno);
267+ if (func) {
268+ ret = func(inode, &bucket, para);
269+ if (ret) {
270+ mlog_errno(ret);
271+ break;
272+ }
273+ }
274+
275+ for (j = 0; j < blk_per_bucket; j++)
276+ brelse(bucket.bhs[j]);
277+ memset(&bucket, 0, sizeof(bucket));
278+ }
279+
280+out:
281+ for (j = 0; j < blk_per_bucket; j++)
282+ brelse(bucket.bhs[j]);
283+
284+ return ret;
285+}
286+
287+struct ocfs2_xattr_tree_list {
288+ char *buffer;
289+ size_t buffer_size;
290+};
291+
292+static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
293+ struct ocfs2_xattr_header *xh,
294+ int index,
295+ int *block_off,
296+ int *new_offset)
297+{
298+ u16 name_offset;
299+
300+ if (index < 0 || index >= le16_to_cpu(xh->xh_count))
301+ return -EINVAL;
302+
303+ name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
304+
305+ *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
306+ *new_offset = name_offset % inode->i_sb->s_blocksize;
307+
308+ return 0;
309+}
310+
311+static int ocfs2_list_xattr_bucket(struct inode *inode,
312+ struct ocfs2_xattr_bucket *bucket,
313+ void *para)
314+{
315+ int ret = 0;
316+ struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
317+ size_t size;
318+ int i, block_off, new_offset;
319+
320+ for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
321+ struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
322+ struct xattr_handler *handler =
323+ ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
324+
325+ if (handler) {
326+ ret = ocfs2_xattr_bucket_get_name_value(inode,
327+ bucket->xh,
328+ i,
329+ &block_off,
330+ &new_offset);
331+ if (ret)
332+ break;
333+ size = handler->list(inode, xl->buffer, xl->buffer_size,
334+ bucket->bhs[block_off]->b_data +
335+ new_offset,
336+ entry->xe_name_len);
337+ if (xl->buffer) {
338+ if (size > xl->buffer_size)
339+ return -ERANGE;
340+ xl->buffer += size;
341+ }
342+ xl->buffer_size -= size;
343+ }
344+ }
345+
346+ return ret;
347+}
348+
349+static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
350+ struct ocfs2_xattr_tree_root *xt,
351+ char *buffer,
352+ size_t buffer_size)
353+{
354+ struct ocfs2_extent_list *el = &xt->xt_list;
355+ int ret = 0;
356+ u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
357+ u64 p_blkno = 0;
358+ struct ocfs2_xattr_tree_list xl = {
359+ .buffer = buffer,
360+ .buffer_size = buffer_size,
361+ };
362+
363+ if (le16_to_cpu(el->l_next_free_rec) == 0)
364+ return 0;
365+
366+ while (name_hash > 0) {
367+ ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
368+ &e_cpos, &num_clusters, el);
369+ if (ret) {
370+ mlog_errno(ret);
371+ goto out;
372+ }
373+
374+ ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
375+ ocfs2_list_xattr_bucket,
376+ &xl);
377+ if (ret) {
378+ mlog_errno(ret);
379+ goto out;
380+ }
381+
382+ if (e_cpos == 0)
383+ break;
384+
385+ name_hash = e_cpos - 1;
386+ }
387+
388+ ret = buffer_size - xl.buffer_size;
389+out:
390+ return ret;
391+}
392diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
393index f565c64..a69c8aa 100644
394--- a/fs/ocfs2/xattr.h
395+++ b/fs/ocfs2/xattr.h
396@@ -55,4 +55,13 @@ extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
397 extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
398 extern struct xattr_handler *ocfs2_xattr_handlers[];
399
400+static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
401+{
402+ return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
403+}
404+
405+static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
406+{
407+ return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
408+}
409 #endif /* OCFS2_XATTR_H */
410--
4111.5.4.5
412