scrub/unicrash.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2018-2024 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include <stdint.h>
   8 #include <stdlib.h>
   9 #include <dirent.h>
  10 #include <sys/types.h>
  11 #include <sys/statvfs.h>
  12 #include <strings.h>
  13 #include <unicode/uclean.h>
  14 #include <unicode/ustring.h>
  15 #include <unicode/unorm2.h>
  16 #include <unicode/uspoof.h>
  17 #include "libfrog/paths.h"
  18 #include "xfs_scrub.h"
  19 #include "common.h"
  20 #include "descr.h"
  21 #include "unicrash.h"
  22
  23 /*
  24  * Detect Unicode confusable names in directories and attributes.
  25  *
  26  * Record all the name->ino mappings in a directory/xattr, with a twist!  The
  27  * twist is to record the Unicode skeleton and normalized version of every
  28  * name we see so that we can check for a name space (directory, extended
  29  * attribute set) containing names containing malicious characters or that
  30  * could be confused for one another.  These entries are at best a sign of
  31  * Unicode mishandling, or some sort of weird name substitution attack if the
  32  * entries do not point to the same inode.  Warn if we see multiple dirents
  33  * that do not all point to the same inode.
  34  *
  35  * For extended attributes we perform the same collision checks on the
  36  * attribute, though any collision is enough to trigger a warning.
  37  *
  38  * We avoid flagging these problems as errors because XFS treats names as a
  39  * sequence of arbitrary nonzero bytes.  While a Unicode collision is not
  40  * technically a filesystem corruption, we ought to say something if there's a
  41  * possibility for misleading a user.  Unquestionably bad things (direction
  42  * overrides, control characters, names that normalize to the same string)
  43  * produce warnings, whereas potentially confusable names produce
  44  * informational messages.
  45  *
  46  * The skeleton algorithm is detailed in section 4 ("Confusable Detection") of
  47  * the Unicode technical standard #39.  First we normalize the name, then we
  48  * substitute code points according to the confusable code point table, then
  49  * normalize again.
  50  *
  51  * We take the extra step of removing non-identifier code points such as
  52  * formatting characters, control characters, zero width characters, etc.
  53  * from the skeleton so that we can complain about names that are confusable
  54  * due to invisible control characters.
  55  *
  56  * In other words, skel = remove_invisible(nfd(remap_confusables(nfd(name)))).
  57  */
  58
  59 struct name_entry {
  60         struct name_entry       *next;
  61
  62         /* NFKC normalized name */
  63         UChar                   *normstr;
  64         size_t                  normstrlen;
  65
  66         /* Unicode skeletonized name */
  67         UChar                   *skelstr;
  68         size_t                  skelstrlen;
  69
  70         xfs_ino_t               ino;
  71
  72         /* Raw dirent name */
  73         size_t                  namelen;
  74         char                    name[0];
  75 };
  76 #define NAME_ENTRY_SZ(nl)       (sizeof(struct name_entry) + 1 + \
  77                                  (nl * sizeof(uint8_t)))
  78
  79 struct unicrash {
  80         struct scrub_ctx        *ctx;
  81         USpoofChecker           *spoof;
  82         const UNormalizer2      *normalizer;
  83         bool                    compare_ino;
  84         bool                    is_only_root_writeable;
  85         size_t                  nr_buckets;
  86         struct name_entry       *buckets[0];
  87 };
  88 #define UNICRASH_SZ(nr)         (sizeof(struct unicrash) + \
  89                                  (nr * sizeof(struct name_entry *)))
  90
  91 /* Things to complain about in Unicode naming. */
  92
  93 /*
  94  * Multiple names resolve to the same normalized string and therefore render
  95  * identically.
  96  */
  97 #define UNICRASH_NOT_UNIQUE     (1 << 0)
  98
  99 /* Name contains directional overrides. */
 100 #define UNICRASH_BIDI_OVERRIDE  (1 << 1)
 101
 102 /* Name mixes left-to-right and right-to-left characters. */
 103 #define UNICRASH_BIDI_MIXED     (1 << 2)
 104
 105 /* Control characters in name. */
 106 #define UNICRASH_CONTROL_CHAR   (1 << 3)
 107
 108 /* Invisible characters.  Only a problem if we have collisions. */
 109 #define UNICRASH_ZERO_WIDTH     (1 << 4)
 110
 111 /* Multiple names resolve to the same skeleton string. */
 112 #define UNICRASH_CONFUSABLE     (1 << 5)
 113
 114 /*
 115  * We only care about validating utf8 collisions if the underlying
 116  * system configuration says we're using utf8.  If the language
 117  * specifier string used to output messages has ".UTF-8" somewhere in
 118  * its name, then we conclude utf8 is in use.  Otherwise, no checking is
 119  * performed.
 120  *
 121  * Most modern Linux systems default to utf8, so the only time this
 122  * check will return false is if the administrator configured things
 123  * this way or if things are so messed up there is no locale data at
 124  * all.
 125  */
 126 #define UTF8_STR                ".UTF-8"
 127 #define UTF8_STRLEN             (sizeof(UTF8_STR) - 1)
 128 static bool
 129 is_utf8_locale(void)
 130 {
 131         const char              *msg_locale;
 132         static int              answer = -1;
 133
 134         if (answer != -1)
 135                 return answer;
 136
 137         msg_locale = setlocale(LC_MESSAGES, NULL);
 138         if (msg_locale == NULL)
 139                 return false;
 140
 141         if (strstr(msg_locale, UTF8_STR) != NULL)
 142                 answer = 1;
 143         else
 144                 answer = 0;
 145         return answer;
 146 }
 147
 148 /*
 149  * Generate normalized form and skeleton of the name.  If this fails, just
 150  * forget everything and return false; this is an advisory checker.
 151  */
 152 static bool
 153 name_entry_compute_checknames(
 154         struct unicrash         *uc,
 155         struct name_entry       *entry)
 156 {
 157         UChar                   *normstr;
 158         UChar                   *unistr;
 159         UChar                   *skelstr;
 160         int32_t                 normstrlen;
 161         int32_t                 unistrlen;
 162         int32_t                 skelstrlen;
 163         UChar32                 uchr;
 164         int32_t                 i, j;
 165
 166         UErrorCode              uerr = U_ZERO_ERROR;
 167
 168         /* Convert bytestr to unistr for normalization */
 169         u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
 170         if (uerr != U_BUFFER_OVERFLOW_ERROR)
 171                 return false;
 172         uerr = U_ZERO_ERROR;
 173         unistr = calloc(unistrlen + 1, sizeof(UChar));
 174         if (!unistr)
 175                 return false;
 176         u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
 177                         &uerr);
 178         if (U_FAILURE(uerr))
 179                 goto out_unistr;
 180
 181         /* Normalize the string. */
 182         normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
 183                         0, &uerr);
 184         if (uerr != U_BUFFER_OVERFLOW_ERROR)
 185                 goto out_unistr;
 186         uerr = U_ZERO_ERROR;
 187         normstr = calloc(normstrlen + 1, sizeof(UChar));
 188         if (!normstr)
 189                 goto out_unistr;
 190         unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
 191                         &uerr);
 192         if (U_FAILURE(uerr))
 193                 goto out_normstr;
 194
 195         /* Compute skeleton. */
 196         skelstrlen = uspoof_getSkeleton(uc->spoof, 0, unistr, unistrlen, NULL,
 197                         0, &uerr);
 198         if (uerr != U_BUFFER_OVERFLOW_ERROR)
 199                 goto out_normstr;
 200         uerr = U_ZERO_ERROR;
 201         skelstr = calloc(skelstrlen + 1, sizeof(UChar));
 202         if (!skelstr)
 203                 goto out_normstr;
 204         uspoof_getSkeleton(uc->spoof, 0, unistr, unistrlen, skelstr, skelstrlen,
 205                         &uerr);
 206         if (U_FAILURE(uerr))
 207                 goto out_skelstr;
 208
 209         /* Remove control/formatting characters from skeleton. */
 210         for (i = 0, j = 0; i < skelstrlen; j = i) {
 211                 U16_NEXT_UNSAFE(skelstr, i, uchr);
 212                 if (!u_isIDIgnorable(uchr))
 213                         continue;
 214                 memmove(&skelstr[j], &skelstr[i],
 215                                 (skelstrlen - i + 1) * sizeof(UChar));
 216                 skelstrlen -= (i - j);
 217                 i = j;
 218         }
 219
 220         entry->skelstr = skelstr;
 221         entry->skelstrlen = skelstrlen;
 222         entry->normstr = normstr;
 223         entry->normstrlen = normstrlen;
 224         free(unistr);
 225         return true;
 226
 227 out_skelstr:
 228         free(skelstr);
 229 out_normstr:
 230         free(normstr);
 231 out_unistr:
 232         free(unistr);
 233         return false;
 234 }
 235
 236 /* Create a new name entry, returns false if we could not succeed. */
 237 static bool
 238 name_entry_create(
 239         struct unicrash         *uc,
 240         const char              *name,
 241         xfs_ino_t               ino,
 242         struct name_entry       **entry)
 243 {
 244         struct name_entry       *new_entry;
 245         size_t                  namelen = strlen(name);
 246
 247         /* Create new entry */
 248         new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
 249         if (!new_entry)
 250                 return false;
 251         new_entry->next = NULL;
 252         new_entry->ino = ino;
 253         memcpy(new_entry->name, name, namelen);
 254         new_entry->name[namelen] = 0;
 255         new_entry->namelen = namelen;
 256
 257         /* Normalize/skeletonize name to find collisions. */
 258         if (!name_entry_compute_checknames(uc, new_entry))
 259                 goto out;
 260
 261         *entry = new_entry;
 262         return true;
 263
 264 out:
 265         free(new_entry);
 266         return false;
 267 }
 268
 269 /* Free a name entry */
 270 static void
 271 name_entry_free(
 272         struct name_entry       *entry)
 273 {
 274         free(entry->normstr);
 275         free(entry->skelstr);
 276         free(entry);
 277 }
 278
 279 /* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
 280
 281 #define rol32(x, y)             (((x) << (y)) | ((x) >> (32 - (y))))
 282
 283 /*
 284  * Implement a simple hash on a character string.
 285  * Rotate the hash value by 7 bits, then XOR each character in.
 286  * This is implemented with some source-level loop unrolling.
 287  */
 288 static xfs_dahash_t
 289 name_entry_hash(
 290         struct name_entry       *entry)
 291 {
 292         uint8_t                 *name;
 293         size_t                  namelen;
 294         xfs_dahash_t            hash;
 295
 296         name = (uint8_t *)entry->skelstr;
 297         namelen = entry->skelstrlen * sizeof(UChar);
 298
 299         /*
 300          * Do four characters at a time as long as we can.
 301          */
 302         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
 303                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
 304                        (name[3] << 0) ^ rol32(hash, 7 * 4);
 305
 306         /*
 307          * Now do the rest of the characters.
 308          */
 309         switch (namelen) {
 310         case 3:
 311                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
 312                        rol32(hash, 7 * 3);
 313         case 2:
 314                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
 315         case 1:
 316                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
 317         default: /* case 0: */
 318                 return hash;
 319         }
 320 }
 321
 322 /*
 323  * Check a name for suspicious elements that have appeared in filename
 324  * spoofing attacks.  This includes names that mixed directions or contain
 325  * direction overrides control characters, both of which have appeared in
 326  * filename spoofing attacks.
 327  */
 328 static void
 329 name_entry_examine(
 330         struct name_entry       *entry,
 331         unsigned int            *badflags)
 332 {
 333         UChar32                 uchr;
 334         int32_t                 i;
 335         uint8_t                 mask = 0;
 336
 337         for (i = 0; i < entry->normstrlen;) {
 338                 U16_NEXT_UNSAFE(entry->normstr, i, uchr);
 339
 340                 /* zero width character sequences */
 341                 switch (uchr) {
 342                 case 0x200B:    /* zero width space */
 343                 case 0x200C:    /* zero width non-joiner */
 344                 case 0x200D:    /* zero width joiner */
 345                 case 0xFEFF:    /* zero width non breaking space */
 346                 case 0x2060:    /* word joiner */
 347                 case 0x2061:    /* function application */
 348                 case 0x2062:    /* invisible times (multiply) */
 349                 case 0x2063:    /* invisible separator (comma) */
 350                 case 0x2064:    /* invisible plus (addition) */
 351                         *badflags |= UNICRASH_ZERO_WIDTH;
 352                         break;
 353                 }
 354
 355                 /* control characters */
 356                 if (u_iscntrl(uchr))
 357                         *badflags |= UNICRASH_CONTROL_CHAR;
 358
 359                 switch (u_charDirection(uchr)) {
 360                 case U_LEFT_TO_RIGHT:
 361                         mask |= 0x01;
 362                         break;
 363                 case U_RIGHT_TO_LEFT:
 364                         mask |= 0x02;
 365                         break;
 366                 case U_RIGHT_TO_LEFT_OVERRIDE:
 367                         *badflags |= UNICRASH_BIDI_OVERRIDE;
 368                         break;
 369                 case U_LEFT_TO_RIGHT_OVERRIDE:
 370                         *badflags |= UNICRASH_BIDI_OVERRIDE;
 371                         break;
 372                 default:
 373                         break;
 374                 }
 375         }
 376
 377         /* mixing left-to-right and right-to-left chars */
 378         if (mask == 0x3)
 379                 *badflags |= UNICRASH_BIDI_MIXED;
 380 }
 381
 382 /* Initialize the collision detector. */
 383 static int
 384 unicrash_init(
 385         struct unicrash         **ucp,
 386         struct scrub_ctx        *ctx,
 387         bool                    compare_ino,
 388         size_t                  nr_buckets,
 389         bool                    is_only_root_writeable)
 390 {
 391         struct unicrash         *p;
 392         UErrorCode              uerr = U_ZERO_ERROR;
 393
 394         if (!is_utf8_locale()) {
 395                 *ucp = NULL;
 396                 return 0;
 397         }
 398
 399         if (nr_buckets > 65536)
 400                 nr_buckets = 65536;
 401         else if (nr_buckets < 16)
 402                 nr_buckets = 16;
 403
 404         p = calloc(1, UNICRASH_SZ(nr_buckets));
 405         if (!p)
 406                 return errno;
 407         p->ctx = ctx;
 408         p->nr_buckets = nr_buckets;
 409         p->compare_ino = compare_ino;
 410         p->normalizer = unorm2_getNFKCInstance(&uerr);
 411         if (U_FAILURE(uerr))
 412                 goto out_free;
 413         p->spoof = uspoof_open(&uerr);
 414         if (U_FAILURE(uerr))
 415                 goto out_free;
 416         uspoof_setChecks(p->spoof, USPOOF_ALL_CHECKS, &uerr);
 417         if (U_FAILURE(uerr))
 418                 goto out_spoof;
 419         p->is_only_root_writeable = is_only_root_writeable;
 420         *ucp = p;
 421
 422         return 0;
 423 out_spoof:
 424         uspoof_close(p->spoof);
 425 out_free:
 426         free(p);
 427         return ENOMEM;
 428 }
 429
 430 /*
 431  * Is this inode owned by root and not writable by others?  If so, skip
 432  * even the informational messages, because this was put in place by the
 433  * administrator.
 434  */
 435 static bool
 436 is_only_root_writable(
 437         struct xfs_bulkstat     *bstat)
 438 {
 439         if (bstat->bs_uid != 0 || bstat->bs_gid != 0)
 440                 return false;
 441         return !(bstat->bs_mode & S_IWOTH);
 442 }
 443
 444 /* Initialize the collision detector for a directory. */
 445 int
 446 unicrash_dir_init(
 447         struct unicrash         **ucp,
 448         struct scrub_ctx        *ctx,
 449         struct xfs_bulkstat     *bstat)
 450 {
 451         /*
 452          * Assume 64 bytes per dentry, clamp buckets between 16 and 64k.
 453          * Same general idea as dir_hash_init in xfs_repair.
 454          */
 455         return unicrash_init(ucp, ctx, true, bstat->bs_size / 64,
 456                         is_only_root_writable(bstat));
 457 }
 458
 459 /* Initialize the collision detector for an extended attribute. */
 460 int
 461 unicrash_xattr_init(
 462         struct unicrash         **ucp,
 463         struct scrub_ctx        *ctx,
 464         struct xfs_bulkstat     *bstat)
 465 {
 466         /* Assume 16 attributes per extent for lack of a better idea. */
 467         return unicrash_init(ucp, ctx, false, 16 * (1 + bstat->bs_aextents),
 468                         is_only_root_writable(bstat));
 469 }
 470
 471 /* Initialize the collision detector for a filesystem label. */
 472 int
 473 unicrash_fs_label_init(
 474         struct unicrash         **ucp,
 475         struct scrub_ctx        *ctx)
 476 {
 477         return unicrash_init(ucp, ctx, false, 16, true);
 478 }
 479
 480 /* Free the crash detector. */
 481 void
 482 unicrash_free(
 483         struct unicrash         *uc)
 484 {
 485         struct name_entry       *ne;
 486         struct name_entry       *x;
 487         size_t                  i;
 488
 489         if (!uc)
 490                 return;
 491
 492         uspoof_close(uc->spoof);
 493         for (i = 0; i < uc->nr_buckets; i++) {
 494                 for (ne = uc->buckets[i]; ne != NULL; ne = x) {
 495                         x = ne->next;
 496                         name_entry_free(ne);
 497                 }
 498         }
 499         free(uc);
 500 }
 501
 502 /* Complain about Unicode problems. */
 503 static void
 504 unicrash_complain(
 505         struct unicrash         *uc,
 506         struct descr            *dsc,
 507         const char              *what,
 508         struct name_entry       *entry,
 509         unsigned int            badflags,
 510         struct name_entry       *dup_entry)
 511 {
 512         char                    *bad1 = NULL;
 513         char                    *bad2 = NULL;
 514
 515         bad1 = string_escape(entry->name);
 516         if (dup_entry)
 517                 bad2 = string_escape(dup_entry->name);
 518
 519         /*
 520          * Most filechooser UIs do not look for bidirectional overrides when
 521          * they render names.  This can result in misleading name presentation
 522          * that makes "hig<rtl>gnp.sh" render like "highs.png".
 523          */
 524         if (badflags & UNICRASH_BIDI_OVERRIDE) {
 525                 str_warn(uc->ctx, descr_render(dsc),
 526 _("Unicode name \"%s\" in %s contains suspicious text direction overrides."),
 527                                 bad1, what);
 528                 goto out;
 529         }
 530
 531         /*
 532          * Two names that normalize to the same string will render
 533          * identically even though the filesystem considers them unique
 534          * names.  "cafe\xcc\x81" and "caf\xc3\xa9" have different byte
 535          * sequences, but they both appear as "café".
 536          */
 537         if (badflags & UNICRASH_NOT_UNIQUE) {
 538                 str_warn(uc->ctx, descr_render(dsc),
 539 _("Unicode name \"%s\" in %s renders identically to \"%s\"."),
 540                                 bad1, what, bad2);
 541                 goto out;
 542         }
 543
 544         /*
 545          * If a name contains invisible/nonprinting characters and can be
 546          * confused with another name as a result, we should complain.
 547          * "moo<zerowidthspace>cow" and "moocow" are misleading.
 548          */
 549         if ((badflags & UNICRASH_ZERO_WIDTH) &&
 550             (badflags & UNICRASH_CONFUSABLE)) {
 551                 str_warn(uc->ctx, descr_render(dsc),
 552 _("Unicode name \"%s\" in %s could be confused with '%s' due to invisible characters."),
 553                                 bad1, what, bad2);
 554                 goto out;
 555         }
 556
 557         /*
 558          * Unfiltered control characters can mess up your terminal and render
 559          * invisibly in filechooser UIs.
 560          */
 561         if (badflags & UNICRASH_CONTROL_CHAR) {
 562                 str_warn(uc->ctx, descr_render(dsc),
 563 _("Unicode name \"%s\" in %s contains control characters."),
 564                                 bad1, what);
 565                 goto out;
 566         }
 567
 568         /*
 569          * Skip the informational messages if the inode owning the name is
 570          * only writeable by root, because those files were put there by the
 571          * sysadmin.  Also skip names less than four letters long because
 572          * there's a much higher chance of collisions with short names.
 573          */
 574         if (!verbose && (uc->is_only_root_writeable || entry->namelen < 4))
 575                 goto out;
 576
 577         /*
 578          * It's not considered good practice (says Unicode) to mix LTR
 579          * characters with RTL characters.  The mere presence of different
 580          * bidirectional characters isn't enough to trip up software, so don't
 581          * warn about this too loudly.
 582          */
 583         if (badflags & UNICRASH_BIDI_MIXED) {
 584                 str_info(uc->ctx, descr_render(dsc),
 585 _("Unicode name \"%s\" in %s mixes bidirectional characters."),
 586                                 bad1, what);
 587                 goto out;
 588         }
 589
 590         /*
 591          * We'll note if two names could be confusable with each other, but
 592          * whether or not the user will actually confuse them is dependent
 593          * on the rendering system and the typefaces in use.  Maybe "foo.1"
 594          * and "moo.l" look the same, maybe they do not.
 595          */
 596         if (badflags & UNICRASH_CONFUSABLE) {
 597                 str_info(uc->ctx, descr_render(dsc),
 598 _("Unicode name \"%s\" in %s could be confused with \"%s\"."),
 599                                 bad1, what, bad2);
 600         }
 601
 602 out:
 603         free(bad1);
 604         free(bad2);
 605 }
 606
 607 /*
 608  * Try to add a name -> ino entry to the collision detector.  The name
 609  * must be skeletonized according to Unicode TR39 to detect names that
 610  * could be visually confused with each other.
 611  */
 612 static void
 613 unicrash_add(
 614         struct unicrash         *uc,
 615         struct name_entry       *new_entry,
 616         unsigned int            *badflags,
 617         struct name_entry       **existing_entry)
 618 {
 619         struct name_entry       *entry;
 620         size_t                  bucket;
 621         xfs_dahash_t            hash;
 622
 623         /* Store name in hashtable. */
 624         hash = name_entry_hash(new_entry);
 625         bucket = hash % uc->nr_buckets;
 626         entry = uc->buckets[bucket];
 627         new_entry->next = entry;
 628         uc->buckets[bucket] = new_entry;
 629
 630         while (entry != NULL) {
 631                 /*
 632                  * If we see the same byte sequence then someone's modifying
 633                  * the namespace while we're scanning it.  Update the existing
 634                  * entry's inode mapping and erase the new entry from existence.
 635                  */
 636                 if (new_entry->namelen == entry->namelen &&
 637                     !memcmp(new_entry->name, entry->name, entry->namelen)) {
 638                         entry->ino = new_entry->ino;
 639                         uc->buckets[bucket] = new_entry->next;
 640                         name_entry_free(new_entry);
 641                         *badflags = 0;
 642                         return;
 643                 }
 644
 645                 /* Same normalization? */
 646                 if (new_entry->normstrlen == entry->normstrlen &&
 647                     !u_strcmp(new_entry->normstr, entry->normstr) &&
 648                     (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
 649                         *badflags |= UNICRASH_NOT_UNIQUE;
 650                         *existing_entry = entry;
 651                         return;
 652                 }
 653
 654                 /* Confusable? */
 655                 if (new_entry->skelstrlen == entry->skelstrlen &&
 656                     !u_strcmp(new_entry->skelstr, entry->skelstr) &&
 657                     (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
 658                         *badflags |= UNICRASH_CONFUSABLE;
 659                         *existing_entry = entry;
 660                         return;
 661                 }
 662                 entry = entry->next;
 663         }
 664 }
 665
 666 /* Check a name for unicode normalization problems or collisions. */
 667 static int
 668 __unicrash_check_name(
 669         struct unicrash         *uc,
 670         struct descr            *dsc,
 671         const char              *namedescr,
 672         const char              *name,
 673         xfs_ino_t               ino)
 674 {
 675         struct name_entry       *dup_entry = NULL;
 676         struct name_entry       *new_entry = NULL;
 677         unsigned int            badflags = 0;
 678
 679         /* If we can't create entry data, just skip it. */
 680         if (!name_entry_create(uc, name, ino, &new_entry))
 681                 return 0;
 682
 683         name_entry_examine(new_entry, &badflags);
 684         unicrash_add(uc, new_entry, &badflags, &dup_entry);
 685         if (badflags)
 686                 unicrash_complain(uc, dsc, namedescr, new_entry, badflags,
 687                                 dup_entry);
 688
 689         return 0;
 690 }
 691
 692 /*
 693  * Check a directory entry for unicode normalization problems or collisions.
 694  * If errors occur, this function will log them and return nonzero.
 695  */
 696 int
 697 unicrash_check_dir_name(
 698         struct unicrash         *uc,
 699         struct descr            *dsc,
 700         struct dirent           *dentry)
 701 {
 702         if (!uc)
 703                 return 0;
 704         return __unicrash_check_name(uc, dsc, _("directory"),
 705                         dentry->d_name, dentry->d_ino);
 706 }
 707
 708 /*
 709  * Check an extended attribute name for unicode normalization problems
 710  * or collisions.  If errors occur, this function will log them and return
 711  * nonzero.
 712  */
 713 int
 714 unicrash_check_xattr_name(
 715         struct unicrash         *uc,
 716         struct descr            *dsc,
 717         const char              *attrname)
 718 {
 719         if (!uc)
 720                 return 0;
 721         return __unicrash_check_name(uc, dsc, _("extended attribute"),
 722                         attrname, 0);
 723 }
 724
 725 /*
 726  * Check the fs label for unicode normalization problems or misleading bits.
 727  * If errors occur, this function will log them and return nonzero.
 728  */
 729 int
 730 unicrash_check_fs_label(
 731         struct unicrash         *uc,
 732         struct descr            *dsc,
 733         const char              *label)
 734 {
 735         if (!uc)
 736                 return 0;
 737         return __unicrash_check_name(uc, dsc, _("filesystem label"),
 738                         label, 0);
 739 }
 740
 741 /* Load libicu and initialize it. */
 742 bool
 743 unicrash_load(void)
 744 {
 745         UErrorCode              uerr = U_ZERO_ERROR;
 746
 747         u_init(&uerr);
 748         return U_FAILURE(uerr);
 749 }
 750
 751 /* Unload libicu once we're done with it. */
 752 void
 753 unicrash_unload(void)
 754 {
 755         u_cleanup();
 756 }