src/patches/suse-2.6.27.25/patches.fixes/ocfs2-xattr-Proper-hash-collision-handle-in-bucket.patch

   1 From: Tao Ma <tao.ma@oracle.com>
   2 Date: Mon, 27 Oct 2008 06:06:24 +0800
   3 Subject: ocfs2/xattr: Proper hash collision handle in bucket division
   4
   5 In ocfs2/xattr, we must make sure the xattrs which have the same hash value
   6 exist in the same bucket so that the search schema can work. But in the old
   7 implementation, when we want to extend a bucket, we just move half number of
   8 xattrs to the new bucket. This works in most cases, but if we are lucky
   9 enough we will move 2 xattrs into 2 different buckets. This means that an
  10 xattr from the previous bucket cannot be found anymore. This patch fix this
  11 problem by finding the right position during extending the bucket and extend
  12 an empty bucket if needed.
  13
  14 Signed-off-by: Tao Ma <tao.ma@oracle.com>
  15 Cc: Joel Becker <joel.becker@oracle.com>
  16 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
  17 ---
  18  fs/ocfs2/xattr.c |  144 +++++++++++++++++++++++++++++++++++++++++++-----------
  19  1 files changed, 115 insertions(+), 29 deletions(-)
  20
  21 Index: linux-2.6.27-ocfs2/fs/ocfs2/xattr.c
  22 ===================================================================
  23 --- linux-2.6.27-ocfs2.orig/fs/ocfs2/xattr.c
  24 +++ linux-2.6.27-ocfs2/fs/ocfs2/xattr.c
  25 @@ -3110,25 +3110,73 @@ static int ocfs2_read_xattr_bucket(struc
  26  }
  27
  28  /*
  29 - * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
  30 + * Find the suitable pos when we divide a bucket into 2.
  31 + * We have to make sure the xattrs with the same hash value exist
  32 + * in the same bucket.
  33 + *
  34 + * If this ocfs2_xattr_header covers more than one hash value, find a
  35 + * place where the hash value changes.  Try to find the most even split.
  36 + * The most common case is that all entries have different hash values,
  37 + * and the first check we make will find a place to split.
  38 + */
  39 +static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
  40 +{
  41 +       struct ocfs2_xattr_entry *entries = xh->xh_entries;
  42 +       int count = le16_to_cpu(xh->xh_count);
  43 +       int delta, middle = count / 2;
  44 +
  45 +       /*
  46 +        * We start at the middle.  Each step gets farther away in both
  47 +        * directions.  We therefore hit the change in hash value
  48 +        * nearest to the middle.  Note that this loop does not execute for
  49 +        * count < 2.
  50 +        */
  51 +       for (delta = 0; delta < middle; delta++) {
  52 +               /* Let's check delta earlier than middle */
  53 +               if (cmp_xe(&entries[middle - delta - 1],
  54 +                          &entries[middle - delta]))
  55 +                       return middle - delta;
  56 +
  57 +               /* For even counts, don't walk off the end */
  58 +               if ((middle + delta + 1) == count)
  59 +                       continue;
  60 +
  61 +               /* Now try delta past middle */
  62 +               if (cmp_xe(&entries[middle + delta],
  63 +                          &entries[middle + delta + 1]))
  64 +                       return middle + delta + 1;
  65 +       }
  66 +
  67 +       /* Every entry had the same hash */
  68 +       return count;
  69 +}
  70 +
  71 +/*
  72 + * Move some xattrs in old bucket(blk) to new bucket(new_blk).
  73   * first_hash will record the 1st hash of the new bucket.
  74 + *
  75 + * Normally half of the xattrs will be moved.  But we have to make
  76 + * sure that the xattrs with the same hash value are stored in the
  77 + * same bucket. If all the xattrs in this bucket have the same hash
  78 + * value, the new bucket will be initialized as an empty one and the
  79 + * first_hash will be initialized as (hash_value+1).
  80   */
  81 -static int ocfs2_half_xattr_bucket(struct inode *inode,
  82 -                                  handle_t *handle,
  83 -                                  u64 blk,
  84 -                                  u64 new_blk,
  85 -                                  u32 *first_hash,
  86 -                                  int new_bucket_head)
  87 +static int ocfs2_divide_xattr_bucket(struct inode *inode,
  88 +                                   handle_t *handle,
  89 +                                   u64 blk,
  90 +                                   u64 new_blk,
  91 +                                   u32 *first_hash,
  92 +                                   int new_bucket_head)
  93  {
  94         int ret, i;
  95 -       u16 count, start, len, name_value_len, xe_len, name_offset;
  96 +       int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
  97         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
  98         struct buffer_head **s_bhs, **t_bhs = NULL;
  99         struct ocfs2_xattr_header *xh;
 100         struct ocfs2_xattr_entry *xe;
 101         int blocksize = inode->i_sb->s_blocksize;
 102
 103 -       mlog(0, "move half of xattrs from bucket %llu to %llu\n",
 104 +       mlog(0, "move some of xattrs from bucket %llu to %llu\n",
 105              blk, new_blk);
 106
 107         s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
 108 @@ -3171,14 +3219,35 @@ static int ocfs2_half_xattr_bucket(struc
 109                 }
 110         }
 111
 112 +       xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
 113 +       count = le16_to_cpu(xh->xh_count);
 114 +       start = ocfs2_xattr_find_divide_pos(xh);
 115 +
 116 +       if (start == count) {
 117 +               xe = &xh->xh_entries[start-1];
 118 +
 119 +               /*
 120 +                * initialized a new empty bucket here.
 121 +                * The hash value is set as one larger than
 122 +                * that of the last entry in the previous bucket.
 123 +                */
 124 +               for (i = 0; i < blk_per_bucket; i++)
 125 +                       memset(t_bhs[i]->b_data, 0, blocksize);
 126 +
 127 +               xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
 128 +               xh->xh_free_start = cpu_to_le16(blocksize);
 129 +               xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
 130 +               le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
 131 +
 132 +               goto set_num_buckets;
 133 +       }
 134 +
 135         /* copy the whole bucket to the new first. */
 136         for (i = 0; i < blk_per_bucket; i++)
 137                 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
 138
 139         /* update the new bucket. */
 140         xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
 141 -       count = le16_to_cpu(xh->xh_count);
 142 -       start = count / 2;
 143
 144         /*
 145          * Calculate the total name/value len and xh_free_start for
 146 @@ -3235,6 +3304,7 @@ static int ocfs2_half_xattr_bucket(struc
 147                         xh->xh_free_start = xe->xe_name_offset;
 148         }
 149
 150 +set_num_buckets:
 151         /* set xh->xh_num_buckets for the new xh. */
 152         if (new_bucket_head)
 153                 xh->xh_num_buckets = cpu_to_le16(1);
 154 @@ -3252,9 +3322,13 @@ static int ocfs2_half_xattr_bucket(struc
 155                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
 156
 157         /*
 158 -        * Now only update the 1st block of the old bucket.
 159 -        * Please note that the entry has been sorted already above.
 160 +        * Now only update the 1st block of the old bucket.  If we
 161 +        * just added a new empty bucket, there is no need to modify
 162 +        * it.
 163          */
 164 +       if (start == count)
 165 +               goto out;
 166 +
 167         xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
 168         memset(&xh->xh_entries[start], 0,
 169                sizeof(struct ocfs2_xattr_entry) * (count - start));
 170 @@ -3439,15 +3513,15 @@ out:
 171  }
 172
 173  /*
 174 - * Move half of the xattrs in this cluster to the new cluster.
 175 + * Move some xattrs in this cluster to the new cluster.
 176   * This function should only be called when bucket size == cluster size.
 177   * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
 178   */
 179 -static int ocfs2_half_xattr_cluster(struct inode *inode,
 180 -                                   handle_t *handle,
 181 -                                   u64 prev_blk,
 182 -                                   u64 new_blk,
 183 -                                   u32 *first_hash)
 184 +static int ocfs2_divide_xattr_cluster(struct inode *inode,
 185 +                                     handle_t *handle,
 186 +                                     u64 prev_blk,
 187 +                                     u64 new_blk,
 188 +                                     u32 *first_hash)
 189  {
 190         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
 191         int ret, credits = 2 * blk_per_bucket;
 192 @@ -3461,8 +3535,8 @@ static int ocfs2_half_xattr_cluster(stru
 193         }
 194
 195         /* Move half of the xattr in start_blk to the next bucket. */
 196 -       return  ocfs2_half_xattr_bucket(inode, handle, prev_blk,
 197 -                                       new_blk, first_hash, 1);
 198 +       return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
 199 +                                         new_blk, first_hash, 1);
 200  }
 201
 202  /*
 203 @@ -3524,9 +3598,9 @@ static int ocfs2_adjust_xattr_cross_clus
 204                                                      last_blk, new_blk,
 205                                                      v_start);
 206                 else {
 207 -                       ret = ocfs2_half_xattr_cluster(inode, handle,
 208 -                                                      last_blk, new_blk,
 209 -                                                      v_start);
 210 +                       ret = ocfs2_divide_xattr_cluster(inode, handle,
 211 +                                                        last_blk, new_blk,
 212 +                                                        v_start);
 213
 214                         if ((*header_bh)->b_blocknr == last_blk && extend)
 215                                 *extend = 0;
 216 @@ -3743,8 +3817,8 @@ static int ocfs2_extend_xattr_bucket(str
 217         }
 218
 219         /* Move half of the xattr in start_blk to the next bucket. */
 220 -       ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
 221 -                                     start_blk + blk_per_bucket, NULL, 0);
 222 +       ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
 223 +                                       start_blk + blk_per_bucket, NULL, 0);
 224
 225         le16_add_cpu(&first_xh->xh_num_buckets, 1);
 226         ocfs2_journal_dirty(handle, first_bh);
 227 @@ -4435,11 +4509,21 @@ out:
 228         return ret;
 229  }
 230
 231 -/* check whether the xattr bucket is filled up with the same hash value. */
 232 +/*
 233 + * check whether the xattr bucket is filled up with the same hash value.
 234 + * If we want to insert the xattr with the same hash, return -ENOSPC.
 235 + * If we want to insert a xattr with different hash value, go ahead
 236 + * and ocfs2_divide_xattr_bucket will handle this.
 237 + */
 238  static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
 239 -                                             struct ocfs2_xattr_bucket *bucket)
 240 +                                             struct ocfs2_xattr_bucket *bucket,
 241 +                                             const char *name)
 242  {
 243         struct ocfs2_xattr_header *xh = bucket->xh;
 244 +       u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
 245 +
 246 +       if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
 247 +               return 0;
 248
 249         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
 250             xh->xh_entries[0].xe_name_hash) {
 251 @@ -4562,7 +4646,9 @@ try_again:
 252                  * one bucket's worth, so check it here whether we need to
 253                  * add a new bucket for the insert.
 254                  */
 255 -               ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
 256 +               ret = ocfs2_check_xattr_bucket_collision(inode,
 257 +                                                        &xs->bucket,
 258 +                                                        xi->name);
 259                 if (ret) {
 260                         mlog_errno(ret);
 261                         goto out;