xfs_scrub: make name_entry a first class structure

author Darrick J. Wong <darrick.wong@oracle.com>

Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)

committer Eric Sandeen <sandeen@redhat.com>

Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
author Darrick J. Wong <darrick.wong@oracle.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committer Eric Sandeen <sandeen@redhat.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
diff --git a/scrub/unicrash.c b/scrub/unicrash.c

index 3538a6064785ec4f40f2efb858e0b9d2932a0424..51da32c2eb4f3245dc0c4e52c18426e89d880c82 100644 (file)
--- a/scrub/unicrash.c
+++ b/scrub/unicrash.c
@@ -61,9 +61,16 @@
  
  struct name_entry {
         struct name_entry       *next;
+
+       /* NFKC normalized name */
+       uint8_t                 *normstr;
+       size_t                  normstrlen;
+
         xfs_ino_t               ino;
-       size_t                  uninamelen;
-       uint8_t                 uniname[0];
+
+       /* Raw UTF8 name */
+       size_t                  namelen;
+       char                    name[0];
  };
  #define NAME_ENTRY_SZ(nl)      (sizeof(struct name_entry) + 1 + \
                                  (nl * sizeof(uint8_t)))
@@ -119,6 +126,120 @@ is_utf8_locale(void)
         return answer;
  }
  
+/*
+ * Generate normalized form of the name.
+ * If this fails, just forget everything; this is an advisory checker.
+ */
+static bool
+name_entry_compute_checknames(
+       struct unicrash         *uc,
+       struct name_entry       *entry)
+{
+       uint8_t                 *normstr;
+       size_t                  normstrlen;
+
+       normstrlen = (entry->namelen * 2) + 1;
+       normstr = calloc(normstrlen, sizeof(uint8_t));
+       if (!normstr)
+               return false;
+
+       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
+                       entry->namelen, normstr, &normstrlen));
+               goto out_normstr;
+
+       entry->normstr = normstr;
+       entry->normstrlen = normstrlen;
+       return true;
+out_normstr:
+       free(normstr);
+       return false;
+}
+
+/* Create a new name entry, returns false if we could not succeed. */
+static bool
+name_entry_create(
+       struct unicrash         *uc,
+       const char              *name,
+       xfs_ino_t               ino,
+       struct name_entry       **entry)
+{
+       struct name_entry       *new_entry;
+       size_t                  namelen = strlen(name);
+
+       /* Create new entry */
+       new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
+       if (!new_entry)
+               return false;
+       new_entry->next = NULL;
+       new_entry->ino = ino;
+       memcpy(new_entry->name, name, namelen);
+       new_entry->name[namelen] = 0;
+       new_entry->namelen = namelen;
+
+       /* Normalize name to find collisions. */
+       if (!name_entry_compute_checknames(uc, new_entry))
+               goto out;
+
+       *entry = new_entry;
+       return true;
+
+out:
+       free(new_entry);
+       return false;
+}
+
+/* Free a name entry */
+static void
+name_entry_free(
+       struct name_entry       *entry)
+{
+       free(entry->normstr);
+       free(entry);
+}
+
+/* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
+
+#define rol32(x, y)            (((x) << (y)) | ((x) >> (32 - (y))))
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+static xfs_dahash_t
+name_entry_hash(
+       struct name_entry       *entry)
+{
+       uint8_t                 *name;
+       size_t                  namelen;
+       xfs_dahash_t            hash;
+
+       name = entry->normstr;
+       namelen = entry->normstrlen;
+
+       /*
+        * Do four characters at a time as long as we can.
+        */
+       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
+               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+                      (name[3] << 0) ^ rol32(hash, 7 * 4);
+
+       /*
+        * Now do the rest of the characters.
+        */
+       switch (namelen) {
+       case 3:
+               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+                      rol32(hash, 7 * 3);
+       case 2:
+               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
+       case 1:
+               return (name[0] << 0) ^ rol32(hash, 7 * 1);
+       default: /* case 0: */
+               return hash;
+       }
+}
+
  /* Initialize the collision detector. */
  static bool
  unicrash_init(
@@ -190,89 +311,28 @@ unicrash_free(
         for (i = 0; i < uc->nr_buckets; i++) {
                 for (ne = uc->buckets[i]; ne != NULL; ne = x) {
                         x = ne->next;
-                       free(ne);
+                       name_entry_free(ne);
                 }
         }
         free(uc);
  }
  
-/* Steal the dirhash function from libxfs, avoid linking with libxfs. */
-
-#define rol32(x, y)            (((x) << (y)) | ((x) >> (32 - (y))))
-
-/*
- * Implement a simple hash on a character string.
- * Rotate the hash value by 7 bits, then XOR each character in.
- * This is implemented with some source-level loop unrolling.
- */
-static xfs_dahash_t
-unicrash_hashname(
-       const uint8_t           *name,
-       size_t                  namelen)
-{
-       xfs_dahash_t            hash;
-
-       /*
-        * Do four characters at a time as long as we can.
-        */
-       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
-               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
-                      (name[3] << 0) ^ rol32(hash, 7 * 4);
-
-       /*
-        * Now do the rest of the characters.
-        */
-       switch (namelen) {
-       case 3:
-               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
-                      rol32(hash, 7 * 3);
-       case 2:
-               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
-       case 1:
-               return (name[0] << 0) ^ rol32(hash, 7 * 1);
-       default: /* case 0: */
-               return hash;
-       }
-}
-
-/*
- * Normalize a name according to Unicode NFKC normalization rules.
- * Returns true if the name was already normalized.
- */
-static bool
-unicrash_normalize(
-       const char              *in,
-       uint8_t                 *out,
-       size_t                  outlen)
-{
-       size_t                  inlen = strlen(in);
-
-       assert(inlen <= outlen);
-       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen,
-                       out, &outlen)) {
-               /* Didn't normalize, just return the same buffer. */
-               memcpy(out, in, inlen + 1);
-               return true;
-       }
-       out[outlen] = 0;
-       return outlen == inlen ? memcmp(in, out, inlen) == 0 : false;
-}
-
  /* Complain about Unicode problems. */
  static void
  unicrash_complain(
         struct unicrash         *uc,
         const char              *descr,
         const char              *what,
+       struct name_entry       *entry,
         unsigned int            badflags,
-       const char              *name,
-       uint8_t                 *uniname)
+       struct name_entry       *dup_entry)
  {
         char                    *bad1 = NULL;
         char                    *bad2 = NULL;
  
-       bad1 = string_escape(name);
-       bad2 = string_escape((char *)uniname);
+       bad1 = string_escape(entry->name);
+       if (dup_entry)
+               bad2 = string_escape(dup_entry->name);
  
         /*
          * Two names that normalize to the same string will render
@@ -294,52 +354,39 @@ out:
  
  /*
   * Try to add a name -> ino entry to the collision detector.  The name
- * must be normalized according to Unicode NFKC normalization rules to
- * detect byte-unique names that map to the same sequence of Unicode
- * code points.
- *
- * This function returns true either if there was no previous mapping or
- * there was a mapping that matched exactly.  It returns false if
- * there is already a record with that name pointing to a different
- * inode.
+ * must be normalized according to Unicode NFKC rules to detect names that
+ * could be confused with each other.
   */
  static bool
  unicrash_add(
         struct unicrash         *uc,
-       uint8_t                 *uniname,
-       xfs_ino_t               ino,
-       unsigned int            *badflags)
+       struct name_entry       *new_entry,
+       unsigned int            *badflags,
+       struct name_entry       **existing_entry)
  {
-       struct name_entry       *ne;
-       struct name_entry       *x;
-       struct name_entry       **nep;
-       size_t                  uninamelen = u8_strlen(uniname);
+       struct name_entry       *entry;
         size_t                  bucket;
         xfs_dahash_t            hash;
  
-       /* Do we already know about that name? */
-       hash = unicrash_hashname(uniname, uninamelen);
+       /* Store name in hashtable. */
+       hash = name_entry_hash(new_entry);
         bucket = hash % uc->nr_buckets;
-       for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) {
-               if (u8_strcmp(uniname, ne->uniname) == 0 &&
-                   (uc->compare_ino ? ino != ne->ino : true)) {
+       entry = uc->buckets[bucket];
+       new_entry->next = entry;
+       uc->buckets[bucket] = new_entry;
+
+       while (entry != NULL) {
+               /* Same normalization? */
+               if (new_entry->normstrlen == entry->normstrlen &&
+                   !u8_strcmp(new_entry->normstr, entry->normstr) &&
+                   (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
                         *badflags |= UNICRASH_NOT_UNIQUE;
+                       *existing_entry = entry;
                         return true;
                 }
-               nep = &ne->next;
-               x = ne->next;
+               entry = entry->next;
         }
  
-       /* Remember that name. */
-       x = malloc(NAME_ENTRY_SZ(uninamelen));
-       if (!x)
-               return false;
-       x->next = NULL;
-       x->ino = ino;
-       x->uninamelen = uninamelen;
-       memcpy(x->uniname, uniname, uninamelen + 1);
-       *nep = x;
-
         return true;
  }
  
@@ -352,19 +399,22 @@ __unicrash_check_name(
         const char              *name,
         xfs_ino_t               ino)
  {
-       uint8_t                 uniname[(NAME_MAX * 2) + 1];
+       struct name_entry       *dup_entry = NULL;
+       struct name_entry       *new_entry;
         unsigned int            badflags = 0;
         bool                    moveon;
  
-       memset(uniname, 0, (NAME_MAX * 2) + 1);
-       unicrash_normalize(name, uniname, NAME_MAX * 2);
-       moveon = unicrash_add(uc, uniname, ino, &badflags);
+       /* If we can't create entry data, just skip it. */
+       if (!name_entry_create(uc, name, ino, &new_entry))
+               return true;
+
+       moveon = unicrash_add(uc, new_entry, &badflags, &dup_entry);
         if (!moveon)
                 return false;
  
         if (badflags)
-               unicrash_complain(uc, descr, namedescr, badflags, name,
-                               uniname);
+               unicrash_complain(uc, descr, namedescr, new_entry, badflags,
+                               dup_entry);
  
         return true;
  }
author	Darrick J. Wong <darrick.wong@oracle.com>
	Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committer	Eric Sandeen <sandeen@redhat.com>
	Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)