]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/commitdiff
xfs_scrub: make name_entry a first class structure
authorDarrick J. Wong <darrick.wong@oracle.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
committerEric Sandeen <sandeen@redhat.com>
Thu, 12 Apr 2018 15:34:11 +0000 (10:34 -0500)
Instead of open-coding the construction and hashtable insertion of name
entries, make name_entry a first class object.  This means that we now
have name_entry_ prefix functions that take care of computing Unicode
normalized names as part of name_entry construction, and we pass around
the name_entries when we're looking for suspicious characters and
identically rendering names.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
scrub/unicrash.c

index 3538a6064785ec4f40f2efb858e0b9d2932a0424..51da32c2eb4f3245dc0c4e52c18426e89d880c82 100644 (file)
 
 struct name_entry {
        struct name_entry       *next;
+
+       /* NFKC normalized name */
+       uint8_t                 *normstr;
+       size_t                  normstrlen;
+
        xfs_ino_t               ino;
-       size_t                  uninamelen;
-       uint8_t                 uniname[0];
+
+       /* Raw UTF8 name */
+       size_t                  namelen;
+       char                    name[0];
 };
 #define NAME_ENTRY_SZ(nl)      (sizeof(struct name_entry) + 1 + \
                                 (nl * sizeof(uint8_t)))
@@ -119,6 +126,120 @@ is_utf8_locale(void)
        return answer;
 }
 
+/*
+ * Generate normalized form of the name.
+ * If this fails, just forget everything; this is an advisory checker.
+ */
+static bool
+name_entry_compute_checknames(
+       struct unicrash         *uc,
+       struct name_entry       *entry)
+{
+       uint8_t                 *normstr;
+       size_t                  normstrlen;
+
+       normstrlen = (entry->namelen * 2) + 1;
+       normstr = calloc(normstrlen, sizeof(uint8_t));
+       if (!normstr)
+               return false;
+
+       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
+                       entry->namelen, normstr, &normstrlen));
+               goto out_normstr;
+
+       entry->normstr = normstr;
+       entry->normstrlen = normstrlen;
+       return true;
+out_normstr:
+       free(normstr);
+       return false;
+}
+
+/* Create a new name entry, returns false if we could not succeed. */
+static bool
+name_entry_create(
+       struct unicrash         *uc,
+       const char              *name,
+       xfs_ino_t               ino,
+       struct name_entry       **entry)
+{
+       struct name_entry       *new_entry;
+       size_t                  namelen = strlen(name);
+
+       /* Create new entry */
+       new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
+       if (!new_entry)
+               return false;
+       new_entry->next = NULL;
+       new_entry->ino = ino;
+       memcpy(new_entry->name, name, namelen);
+       new_entry->name[namelen] = 0;
+       new_entry->namelen = namelen;
+
+       /* Normalize name to find collisions. */
+       if (!name_entry_compute_checknames(uc, new_entry))
+               goto out;
+
+       *entry = new_entry;
+       return true;
+
+out:
+       free(new_entry);
+       return false;
+}
+
+/* Free a name entry */
+static void
+name_entry_free(
+       struct name_entry       *entry)
+{
+       free(entry->normstr);
+       free(entry);
+}
+
+/* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
+
+#define rol32(x, y)            (((x) << (y)) | ((x) >> (32 - (y))))
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+static xfs_dahash_t
+name_entry_hash(
+       struct name_entry       *entry)
+{
+       uint8_t                 *name;
+       size_t                  namelen;
+       xfs_dahash_t            hash;
+
+       name = entry->normstr;
+       namelen = entry->normstrlen;
+
+       /*
+        * Do four characters at a time as long as we can.
+        */
+       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
+               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+                      (name[3] << 0) ^ rol32(hash, 7 * 4);
+
+       /*
+        * Now do the rest of the characters.
+        */
+       switch (namelen) {
+       case 3:
+               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+                      rol32(hash, 7 * 3);
+       case 2:
+               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
+       case 1:
+               return (name[0] << 0) ^ rol32(hash, 7 * 1);
+       default: /* case 0: */
+               return hash;
+       }
+}
+
 /* Initialize the collision detector. */
 static bool
 unicrash_init(
@@ -190,89 +311,28 @@ unicrash_free(
        for (i = 0; i < uc->nr_buckets; i++) {
                for (ne = uc->buckets[i]; ne != NULL; ne = x) {
                        x = ne->next;
-                       free(ne);
+                       name_entry_free(ne);
                }
        }
        free(uc);
 }
 
-/* Steal the dirhash function from libxfs, avoid linking with libxfs. */
-
-#define rol32(x, y)            (((x) << (y)) | ((x) >> (32 - (y))))
-
-/*
- * Implement a simple hash on a character string.
- * Rotate the hash value by 7 bits, then XOR each character in.
- * This is implemented with some source-level loop unrolling.
- */
-static xfs_dahash_t
-unicrash_hashname(
-       const uint8_t           *name,
-       size_t                  namelen)
-{
-       xfs_dahash_t            hash;
-
-       /*
-        * Do four characters at a time as long as we can.
-        */
-       for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
-               hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
-                      (name[3] << 0) ^ rol32(hash, 7 * 4);
-
-       /*
-        * Now do the rest of the characters.
-        */
-       switch (namelen) {
-       case 3:
-               return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
-                      rol32(hash, 7 * 3);
-       case 2:
-               return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
-       case 1:
-               return (name[0] << 0) ^ rol32(hash, 7 * 1);
-       default: /* case 0: */
-               return hash;
-       }
-}
-
-/*
- * Normalize a name according to Unicode NFKC normalization rules.
- * Returns true if the name was already normalized.
- */
-static bool
-unicrash_normalize(
-       const char              *in,
-       uint8_t                 *out,
-       size_t                  outlen)
-{
-       size_t                  inlen = strlen(in);
-
-       assert(inlen <= outlen);
-       if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen,
-                       out, &outlen)) {
-               /* Didn't normalize, just return the same buffer. */
-               memcpy(out, in, inlen + 1);
-               return true;
-       }
-       out[outlen] = 0;
-       return outlen == inlen ? memcmp(in, out, inlen) == 0 : false;
-}
-
 /* Complain about Unicode problems. */
 static void
 unicrash_complain(
        struct unicrash         *uc,
        const char              *descr,
        const char              *what,
+       struct name_entry       *entry,
        unsigned int            badflags,
-       const char              *name,
-       uint8_t                 *uniname)
+       struct name_entry       *dup_entry)
 {
        char                    *bad1 = NULL;
        char                    *bad2 = NULL;
 
-       bad1 = string_escape(name);
-       bad2 = string_escape((char *)uniname);
+       bad1 = string_escape(entry->name);
+       if (dup_entry)
+               bad2 = string_escape(dup_entry->name);
 
        /*
         * Two names that normalize to the same string will render
@@ -294,52 +354,39 @@ out:
 
 /*
  * Try to add a name -> ino entry to the collision detector.  The name
- * must be normalized according to Unicode NFKC normalization rules to
- * detect byte-unique names that map to the same sequence of Unicode
- * code points.
- *
- * This function returns true either if there was no previous mapping or
- * there was a mapping that matched exactly.  It returns false if
- * there is already a record with that name pointing to a different
- * inode.
+ * must be normalized according to Unicode NFKC rules to detect names that
+ * could be confused with each other.
  */
 static bool
 unicrash_add(
        struct unicrash         *uc,
-       uint8_t                 *uniname,
-       xfs_ino_t               ino,
-       unsigned int            *badflags)
+       struct name_entry       *new_entry,
+       unsigned int            *badflags,
+       struct name_entry       **existing_entry)
 {
-       struct name_entry       *ne;
-       struct name_entry       *x;
-       struct name_entry       **nep;
-       size_t                  uninamelen = u8_strlen(uniname);
+       struct name_entry       *entry;
        size_t                  bucket;
        xfs_dahash_t            hash;
 
-       /* Do we already know about that name? */
-       hash = unicrash_hashname(uniname, uninamelen);
+       /* Store name in hashtable. */
+       hash = name_entry_hash(new_entry);
        bucket = hash % uc->nr_buckets;
-       for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) {
-               if (u8_strcmp(uniname, ne->uniname) == 0 &&
-                   (uc->compare_ino ? ino != ne->ino : true)) {
+       entry = uc->buckets[bucket];
+       new_entry->next = entry;
+       uc->buckets[bucket] = new_entry;
+
+       while (entry != NULL) {
+               /* Same normalization? */
+               if (new_entry->normstrlen == entry->normstrlen &&
+                   !u8_strcmp(new_entry->normstr, entry->normstr) &&
+                   (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
                        *badflags |= UNICRASH_NOT_UNIQUE;
+                       *existing_entry = entry;
                        return true;
                }
-               nep = &ne->next;
-               x = ne->next;
+               entry = entry->next;
        }
 
-       /* Remember that name. */
-       x = malloc(NAME_ENTRY_SZ(uninamelen));
-       if (!x)
-               return false;
-       x->next = NULL;
-       x->ino = ino;
-       x->uninamelen = uninamelen;
-       memcpy(x->uniname, uniname, uninamelen + 1);
-       *nep = x;
-
        return true;
 }
 
@@ -352,19 +399,22 @@ __unicrash_check_name(
        const char              *name,
        xfs_ino_t               ino)
 {
-       uint8_t                 uniname[(NAME_MAX * 2) + 1];
+       struct name_entry       *dup_entry = NULL;
+       struct name_entry       *new_entry;
        unsigned int            badflags = 0;
        bool                    moveon;
 
-       memset(uniname, 0, (NAME_MAX * 2) + 1);
-       unicrash_normalize(name, uniname, NAME_MAX * 2);
-       moveon = unicrash_add(uc, uniname, ino, &badflags);
+       /* If we can't create entry data, just skip it. */
+       if (!name_entry_create(uc, name, ino, &new_entry))
+               return true;
+
+       moveon = unicrash_add(uc, new_entry, &badflags, &dup_entry);
        if (!moveon)
                return false;
 
        if (badflags)
-               unicrash_complain(uc, descr, namedescr, badflags, name,
-                               uniname);
+               unicrash_complain(uc, descr, namedescr, new_entry, badflags,
+                               dup_entry);
 
        return true;
 }