]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Tao Ma <tao.ma@oracle.com> |
2 | Date: Mon, 27 Oct 2008 06:06:24 +0800 | |
3 | Subject: ocfs2/xattr: Proper hash collision handle in bucket division | |
4 | ||
5 | In ocfs2/xattr, we must make sure the xattrs which have the same hash value | |
6 | exist in the same bucket so that the search schema can work. But in the old | |
7 | implementation, when we want to extend a bucket, we just move half number of | |
8 | xattrs to the new bucket. This works in most cases, but if we are lucky | |
9 | enough we will move 2 xattrs into 2 different buckets. This means that an | |
10 | xattr from the previous bucket cannot be found anymore. This patch fix this | |
11 | problem by finding the right position during extending the bucket and extend | |
12 | an empty bucket if needed. | |
13 | ||
14 | Signed-off-by: Tao Ma <tao.ma@oracle.com> | |
15 | Cc: Joel Becker <joel.becker@oracle.com> | |
16 | Signed-off-by: Mark Fasheh <mfasheh@suse.com> | |
17 | --- | |
18 | fs/ocfs2/xattr.c | 144 +++++++++++++++++++++++++++++++++++++++++++----------- | |
19 | 1 files changed, 115 insertions(+), 29 deletions(-) | |
20 | ||
21 | Index: linux-2.6.27-ocfs2/fs/ocfs2/xattr.c | |
22 | =================================================================== | |
23 | --- linux-2.6.27-ocfs2.orig/fs/ocfs2/xattr.c | |
24 | +++ linux-2.6.27-ocfs2/fs/ocfs2/xattr.c | |
25 | @@ -3110,25 +3110,73 @@ static int ocfs2_read_xattr_bucket(struc | |
26 | } | |
27 | ||
28 | /* | |
29 | - * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). | |
30 | + * Find the suitable pos when we divide a bucket into 2. | |
31 | + * We have to make sure the xattrs with the same hash value exist | |
32 | + * in the same bucket. | |
33 | + * | |
34 | + * If this ocfs2_xattr_header covers more than one hash value, find a | |
35 | + * place where the hash value changes. Try to find the most even split. | |
36 | + * The most common case is that all entries have different hash values, | |
37 | + * and the first check we make will find a place to split. | |
38 | + */ | |
39 | +static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh) | |
40 | +{ | |
41 | + struct ocfs2_xattr_entry *entries = xh->xh_entries; | |
42 | + int count = le16_to_cpu(xh->xh_count); | |
43 | + int delta, middle = count / 2; | |
44 | + | |
45 | + /* | |
46 | + * We start at the middle. Each step gets farther away in both | |
47 | + * directions. We therefore hit the change in hash value | |
48 | + * nearest to the middle. Note that this loop does not execute for | |
49 | + * count < 2. | |
50 | + */ | |
51 | + for (delta = 0; delta < middle; delta++) { | |
52 | + /* Let's check delta earlier than middle */ | |
53 | + if (cmp_xe(&entries[middle - delta - 1], | |
54 | + &entries[middle - delta])) | |
55 | + return middle - delta; | |
56 | + | |
57 | + /* For even counts, don't walk off the end */ | |
58 | + if ((middle + delta + 1) == count) | |
59 | + continue; | |
60 | + | |
61 | + /* Now try delta past middle */ | |
62 | + if (cmp_xe(&entries[middle + delta], | |
63 | + &entries[middle + delta + 1])) | |
64 | + return middle + delta + 1; | |
65 | + } | |
66 | + | |
67 | + /* Every entry had the same hash */ | |
68 | + return count; | |
69 | +} | |
70 | + | |
71 | +/* | |
72 | + * Move some xattrs in old bucket(blk) to new bucket(new_blk). | |
73 | * first_hash will record the 1st hash of the new bucket. | |
74 | + * | |
75 | + * Normally half of the xattrs will be moved. But we have to make | |
76 | + * sure that the xattrs with the same hash value are stored in the | |
77 | + * same bucket. If all the xattrs in this bucket have the same hash | |
78 | + * value, the new bucket will be initialized as an empty one and the | |
79 | + * first_hash will be initialized as (hash_value+1). | |
80 | */ | |
81 | -static int ocfs2_half_xattr_bucket(struct inode *inode, | |
82 | - handle_t *handle, | |
83 | - u64 blk, | |
84 | - u64 new_blk, | |
85 | - u32 *first_hash, | |
86 | - int new_bucket_head) | |
87 | +static int ocfs2_divide_xattr_bucket(struct inode *inode, | |
88 | + handle_t *handle, | |
89 | + u64 blk, | |
90 | + u64 new_blk, | |
91 | + u32 *first_hash, | |
92 | + int new_bucket_head) | |
93 | { | |
94 | int ret, i; | |
95 | - u16 count, start, len, name_value_len, xe_len, name_offset; | |
96 | + int count, start, len, name_value_len = 0, xe_len, name_offset = 0; | |
97 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | |
98 | struct buffer_head **s_bhs, **t_bhs = NULL; | |
99 | struct ocfs2_xattr_header *xh; | |
100 | struct ocfs2_xattr_entry *xe; | |
101 | int blocksize = inode->i_sb->s_blocksize; | |
102 | ||
103 | - mlog(0, "move half of xattrs from bucket %llu to %llu\n", | |
104 | + mlog(0, "move some of xattrs from bucket %llu to %llu\n", | |
105 | blk, new_blk); | |
106 | ||
107 | s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | |
108 | @@ -3171,14 +3219,35 @@ static int ocfs2_half_xattr_bucket(struc | |
109 | } | |
110 | } | |
111 | ||
112 | + xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; | |
113 | + count = le16_to_cpu(xh->xh_count); | |
114 | + start = ocfs2_xattr_find_divide_pos(xh); | |
115 | + | |
116 | + if (start == count) { | |
117 | + xe = &xh->xh_entries[start-1]; | |
118 | + | |
119 | + /* | |
120 | + * initialized a new empty bucket here. | |
121 | + * The hash value is set as one larger than | |
122 | + * that of the last entry in the previous bucket. | |
123 | + */ | |
124 | + for (i = 0; i < blk_per_bucket; i++) | |
125 | + memset(t_bhs[i]->b_data, 0, blocksize); | |
126 | + | |
127 | + xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; | |
128 | + xh->xh_free_start = cpu_to_le16(blocksize); | |
129 | + xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; | |
130 | + le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); | |
131 | + | |
132 | + goto set_num_buckets; | |
133 | + } | |
134 | + | |
135 | /* copy the whole bucket to the new first. */ | |
136 | for (i = 0; i < blk_per_bucket; i++) | |
137 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | |
138 | ||
139 | /* update the new bucket. */ | |
140 | xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; | |
141 | - count = le16_to_cpu(xh->xh_count); | |
142 | - start = count / 2; | |
143 | ||
144 | /* | |
145 | * Calculate the total name/value len and xh_free_start for | |
146 | @@ -3235,6 +3304,7 @@ static int ocfs2_half_xattr_bucket(struc | |
147 | xh->xh_free_start = xe->xe_name_offset; | |
148 | } | |
149 | ||
150 | +set_num_buckets: | |
151 | /* set xh->xh_num_buckets for the new xh. */ | |
152 | if (new_bucket_head) | |
153 | xh->xh_num_buckets = cpu_to_le16(1); | |
154 | @@ -3252,9 +3322,13 @@ static int ocfs2_half_xattr_bucket(struc | |
155 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | |
156 | ||
157 | /* | |
158 | - * Now only update the 1st block of the old bucket. | |
159 | - * Please note that the entry has been sorted already above. | |
160 | + * Now only update the 1st block of the old bucket. If we | |
161 | + * just added a new empty bucket, there is no need to modify | |
162 | + * it. | |
163 | */ | |
164 | + if (start == count) | |
165 | + goto out; | |
166 | + | |
167 | xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; | |
168 | memset(&xh->xh_entries[start], 0, | |
169 | sizeof(struct ocfs2_xattr_entry) * (count - start)); | |
170 | @@ -3439,15 +3513,15 @@ out: | |
171 | } | |
172 | ||
173 | /* | |
174 | - * Move half of the xattrs in this cluster to the new cluster. | |
175 | + * Move some xattrs in this cluster to the new cluster. | |
176 | * This function should only be called when bucket size == cluster size. | |
177 | * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. | |
178 | */ | |
179 | -static int ocfs2_half_xattr_cluster(struct inode *inode, | |
180 | - handle_t *handle, | |
181 | - u64 prev_blk, | |
182 | - u64 new_blk, | |
183 | - u32 *first_hash) | |
184 | +static int ocfs2_divide_xattr_cluster(struct inode *inode, | |
185 | + handle_t *handle, | |
186 | + u64 prev_blk, | |
187 | + u64 new_blk, | |
188 | + u32 *first_hash) | |
189 | { | |
190 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | |
191 | int ret, credits = 2 * blk_per_bucket; | |
192 | @@ -3461,8 +3535,8 @@ static int ocfs2_half_xattr_cluster(stru | |
193 | } | |
194 | ||
195 | /* Move half of the xattr in start_blk to the next bucket. */ | |
196 | - return ocfs2_half_xattr_bucket(inode, handle, prev_blk, | |
197 | - new_blk, first_hash, 1); | |
198 | + return ocfs2_divide_xattr_bucket(inode, handle, prev_blk, | |
199 | + new_blk, first_hash, 1); | |
200 | } | |
201 | ||
202 | /* | |
203 | @@ -3524,9 +3598,9 @@ static int ocfs2_adjust_xattr_cross_clus | |
204 | last_blk, new_blk, | |
205 | v_start); | |
206 | else { | |
207 | - ret = ocfs2_half_xattr_cluster(inode, handle, | |
208 | - last_blk, new_blk, | |
209 | - v_start); | |
210 | + ret = ocfs2_divide_xattr_cluster(inode, handle, | |
211 | + last_blk, new_blk, | |
212 | + v_start); | |
213 | ||
214 | if ((*header_bh)->b_blocknr == last_blk && extend) | |
215 | *extend = 0; | |
216 | @@ -3743,8 +3817,8 @@ static int ocfs2_extend_xattr_bucket(str | |
217 | } | |
218 | ||
219 | /* Move half of the xattr in start_blk to the next bucket. */ | |
220 | - ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, | |
221 | - start_blk + blk_per_bucket, NULL, 0); | |
222 | + ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, | |
223 | + start_blk + blk_per_bucket, NULL, 0); | |
224 | ||
225 | le16_add_cpu(&first_xh->xh_num_buckets, 1); | |
226 | ocfs2_journal_dirty(handle, first_bh); | |
227 | @@ -4435,11 +4509,21 @@ out: | |
228 | return ret; | |
229 | } | |
230 | ||
231 | -/* check whether the xattr bucket is filled up with the same hash value. */ | |
232 | +/* | |
233 | + * check whether the xattr bucket is filled up with the same hash value. | |
234 | + * If we want to insert the xattr with the same hash, return -ENOSPC. | |
235 | + * If we want to insert a xattr with different hash value, go ahead | |
236 | + * and ocfs2_divide_xattr_bucket will handle this. | |
237 | + */ | |
238 | static int ocfs2_check_xattr_bucket_collision(struct inode *inode, | |
239 | - struct ocfs2_xattr_bucket *bucket) | |
240 | + struct ocfs2_xattr_bucket *bucket, | |
241 | + const char *name) | |
242 | { | |
243 | struct ocfs2_xattr_header *xh = bucket->xh; | |
244 | + u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); | |
245 | + | |
246 | + if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) | |
247 | + return 0; | |
248 | ||
249 | if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == | |
250 | xh->xh_entries[0].xe_name_hash) { | |
251 | @@ -4562,7 +4646,9 @@ try_again: | |
252 | * one bucket's worth, so check it here whether we need to | |
253 | * add a new bucket for the insert. | |
254 | */ | |
255 | - ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); | |
256 | + ret = ocfs2_check_xattr_bucket_collision(inode, | |
257 | + &xs->bucket, | |
258 | + xi->name); | |
259 | if (ret) { | |
260 | mlog_errno(ret); | |
261 | goto out; |