]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/unicrash.c
xfs_scrub: adapt phase5 to deferred descriptions
[thirdparty/xfsprogs-dev.git] / scrub / unicrash.c
CommitLineData
959ef981 1// SPDX-License-Identifier: GPL-2.0+
4bbed4ec
DW
2/*
3 * Copyright (C) 2018 Oracle. All Rights Reserved.
4bbed4ec 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
4bbed4ec 5 */
a440f877 6#include "xfs.h"
4bbed4ec 7#include <stdint.h>
4bbed4ec
DW
8#include <stdlib.h>
9#include <dirent.h>
10#include <sys/types.h>
4bbed4ec 11#include <sys/statvfs.h>
bff5d1a4
DW
12#include <strings.h>
13#include <unicode/ustring.h>
14#include <unicode/unorm2.h>
367db2f5 15#include <unicode/uspoof.h>
42b4c8e8 16#include "libfrog/paths.h"
4bbed4ec
DW
17#include "xfs_scrub.h"
18#include "common.h"
a3158a75 19#include "descr.h"
8872c2a5 20#include "unicrash.h"
4bbed4ec
DW
21
22/*
367db2f5 23 * Detect Unicode confusable names in directories and attributes.
4bbed4ec 24 *
367db2f5
DW
25 * Record all the name->ino mappings in a directory/xattr, with a twist! The
26 * twist is to record the Unicode skeleton and normalized version of every
27 * name we see so that we can check for a name space (directory, extended
28 * attribute set) containing names containing malicious characters or that
29 * could be confused for one another. These entries are at best a sign of
30 * Unicode mishandling, or some sort of weird name substitution attack if the
31 * entries do not point to the same inode. Warn if we see multiple dirents
32 * that do not all point to the same inode.
4bbed4ec
DW
33 *
34 * For extended attributes we perform the same collision checks on the
35 * attribute, though any collision is enough to trigger a warning.
36 *
367db2f5
DW
37 * We avoid flagging these problems as errors because XFS treats names as a
38 * sequence of arbitrary nonzero bytes. While a Unicode collision is not
39 * technically a filesystem corruption, we ought to say something if there's a
40 * possibility for misleading a user. Unquestionably bad things (direction
41 * overrides, control characters, names that normalize to the same string)
42 * produce warnings, whereas potentially confusable names produce
43 * informational messages.
4bbed4ec 44 *
367db2f5
DW
45 * The skeleton algorithm is detailed in section 4 ("Confusable Detection") of
46 * the Unicode technical standard #39. First we normalize the name, then we
47 * substitute code points according to the confusable code point table, then
48 * normalize again.
49 *
50 * We take the extra step of removing non-identifier code points such as
51 * formatting characters, control characters, zero width characters, etc.
52 * from the skeleton so that we can complain about names that are confusable
53 * due to invisible control characters.
54 *
55 * In other words, skel = remove_invisible(nfd(remap_confusables(nfd(name)))).
4bbed4ec
DW
56 */
57
58struct name_entry {
59 struct name_entry *next;
3029a02c
DW
60
61 /* NFKC normalized name */
bff5d1a4 62 UChar *normstr;
3029a02c
DW
63 size_t normstrlen;
64
367db2f5
DW
65 /* Unicode skeletonized name */
66 UChar *skelstr;
67 size_t skelstrlen;
68
4bbed4ec 69 xfs_ino_t ino;
3029a02c
DW
70
71 /* Raw UTF8 name */
72 size_t namelen;
73 char name[0];
4bbed4ec
DW
74};
75#define NAME_ENTRY_SZ(nl) (sizeof(struct name_entry) + 1 + \
76 (nl * sizeof(uint8_t)))
77
78struct unicrash {
79 struct scrub_ctx *ctx;
367db2f5 80 USpoofChecker *spoof;
bff5d1a4 81 const UNormalizer2 *normalizer;
4bbed4ec 82 bool compare_ino;
5fa5a860 83 bool is_only_root_writeable;
4bbed4ec
DW
84 size_t nr_buckets;
85 struct name_entry *buckets[0];
86};
87#define UNICRASH_SZ(nr) (sizeof(struct unicrash) + \
88 (nr * sizeof(struct name_entry *)))
89
95c2f78b
DW
90/* Things to complain about in Unicode naming. */
91
92/*
93 * Multiple names resolve to the same normalized string and therefore render
94 * identically.
95 */
96#define UNICRASH_NOT_UNIQUE (1 << 0)
97
baa9ed8d
DW
98/* Name contains directional overrides. */
99#define UNICRASH_BIDI_OVERRIDE (1 << 1)
100
101/* Name mixes left-to-right and right-to-left characters. */
102#define UNICRASH_BIDI_MIXED (1 << 2)
103
104/* Control characters in name. */
105#define UNICRASH_CONTROL_CHAR (1 << 3)
106
107/* Invisible characters. Only a problem if we have collisions. */
108#define UNICRASH_ZERO_WIDTH (1 << 4)
109
367db2f5
DW
110/* Multiple names resolve to the same skeleton string. */
111#define UNICRASH_CONFUSABLE (1 << 5)
112
4bbed4ec
DW
113/*
114 * We only care about validating utf8 collisions if the underlying
115 * system configuration says we're using utf8. If the language
116 * specifier string used to output messages has ".UTF-8" somewhere in
117 * its name, then we conclude utf8 is in use. Otherwise, no checking is
118 * performed.
119 *
120 * Most modern Linux systems default to utf8, so the only time this
121 * check will return false is if the administrator configured things
122 * this way or if things are so messed up there is no locale data at
123 * all.
124 */
125#define UTF8_STR ".UTF-8"
126#define UTF8_STRLEN (sizeof(UTF8_STR) - 1)
127static bool
128is_utf8_locale(void)
129{
130 const char *msg_locale;
131 static int answer = -1;
132
133 if (answer != -1)
134 return answer;
135
136 msg_locale = setlocale(LC_MESSAGES, NULL);
137 if (msg_locale == NULL)
138 return false;
139
140 if (strstr(msg_locale, UTF8_STR) != NULL)
141 answer = 1;
142 else
143 answer = 0;
144 return answer;
145}
146
3029a02c 147/*
367db2f5 148 * Generate normalized form and skeleton of the name.
3029a02c
DW
149 * If this fails, just forget everything; this is an advisory checker.
150 */
151static bool
152name_entry_compute_checknames(
153 struct unicrash *uc,
154 struct name_entry *entry)
155{
bff5d1a4
DW
156 UChar *normstr;
157 UChar *unistr;
367db2f5 158 UChar *skelstr;
bff5d1a4
DW
159 int32_t normstrlen;
160 int32_t unistrlen;
367db2f5
DW
161 int32_t skelstrlen;
162 UChar32 uchr;
163 int32_t i, j;
164
bff5d1a4
DW
165 UErrorCode uerr = U_ZERO_ERROR;
166
167 /* Convert bytestr to unistr for normalization */
168 u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
169 if (uerr != U_BUFFER_OVERFLOW_ERROR)
3029a02c 170 return false;
bff5d1a4
DW
171 uerr = U_ZERO_ERROR;
172 unistr = calloc(unistrlen + 1, sizeof(UChar));
173 if (!unistr)
174 return false;
175 u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
176 &uerr);
177 if (U_FAILURE(uerr))
178 goto out_unistr;
179
180 /* Normalize the string. */
181 normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
182 0, &uerr);
183 if (uerr != U_BUFFER_OVERFLOW_ERROR)
184 goto out_unistr;
185 uerr = U_ZERO_ERROR;
186 normstr = calloc(normstrlen + 1, sizeof(UChar));
187 if (!normstr)
188 goto out_unistr;
189 unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
190 &uerr);
191 if (U_FAILURE(uerr))
3029a02c
DW
192 goto out_normstr;
193
367db2f5
DW
194 /* Compute skeleton. */
195 skelstrlen = uspoof_getSkeleton(uc->spoof, 0, unistr, unistrlen, NULL,
196 0, &uerr);
197 if (uerr != U_BUFFER_OVERFLOW_ERROR)
198 goto out_normstr;
199 uerr = U_ZERO_ERROR;
200 skelstr = calloc(skelstrlen + 1, sizeof(UChar));
201 if (!skelstr)
202 goto out_normstr;
203 uspoof_getSkeleton(uc->spoof, 0, unistr, unistrlen, skelstr, skelstrlen,
204 &uerr);
205 if (U_FAILURE(uerr))
206 goto out_skelstr;
207
208 /* Remove control/formatting characters from skeleton. */
209 for (i = 0, j = 0; i < skelstrlen; j = i) {
210 U16_NEXT_UNSAFE(skelstr, i, uchr);
211 if (!u_isIDIgnorable(uchr))
212 continue;
213 memmove(&skelstr[j], &skelstr[i],
214 (skelstrlen - i + 1) * sizeof(UChar));
215 skelstrlen -= (i - j);
216 i = j;
217 }
218
219 entry->skelstr = skelstr;
220 entry->skelstrlen = skelstrlen;
3029a02c
DW
221 entry->normstr = normstr;
222 entry->normstrlen = normstrlen;
bff5d1a4 223 free(unistr);
3029a02c 224 return true;
bff5d1a4 225
367db2f5
DW
226out_skelstr:
227 free(skelstr);
3029a02c
DW
228out_normstr:
229 free(normstr);
bff5d1a4
DW
230out_unistr:
231 free(unistr);
3029a02c
DW
232 return false;
233}
234
235/* Create a new name entry, returns false if we could not succeed. */
236static bool
237name_entry_create(
238 struct unicrash *uc,
239 const char *name,
240 xfs_ino_t ino,
241 struct name_entry **entry)
242{
243 struct name_entry *new_entry;
244 size_t namelen = strlen(name);
245
246 /* Create new entry */
247 new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
248 if (!new_entry)
249 return false;
250 new_entry->next = NULL;
251 new_entry->ino = ino;
252 memcpy(new_entry->name, name, namelen);
253 new_entry->name[namelen] = 0;
254 new_entry->namelen = namelen;
255
367db2f5 256 /* Normalize/skeletonize name to find collisions. */
3029a02c
DW
257 if (!name_entry_compute_checknames(uc, new_entry))
258 goto out;
259
260 *entry = new_entry;
261 return true;
262
263out:
264 free(new_entry);
265 return false;
266}
267
268/* Free a name entry */
269static void
270name_entry_free(
271 struct name_entry *entry)
272{
273 free(entry->normstr);
367db2f5 274 free(entry->skelstr);
3029a02c
DW
275 free(entry);
276}
277
278/* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
279
280#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
281
282/*
283 * Implement a simple hash on a character string.
284 * Rotate the hash value by 7 bits, then XOR each character in.
285 * This is implemented with some source-level loop unrolling.
286 */
287static xfs_dahash_t
288name_entry_hash(
289 struct name_entry *entry)
290{
291 uint8_t *name;
292 size_t namelen;
293 xfs_dahash_t hash;
294
367db2f5
DW
295 name = (uint8_t *)entry->skelstr;
296 namelen = entry->skelstrlen * sizeof(UChar);
3029a02c
DW
297
298 /*
299 * Do four characters at a time as long as we can.
300 */
301 for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
302 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
303 (name[3] << 0) ^ rol32(hash, 7 * 4);
304
305 /*
306 * Now do the rest of the characters.
307 */
308 switch (namelen) {
309 case 3:
310 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
311 rol32(hash, 7 * 3);
312 case 2:
313 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
314 case 1:
315 return (name[0] << 0) ^ rol32(hash, 7 * 1);
316 default: /* case 0: */
317 return hash;
318 }
319}
320
baa9ed8d
DW
321/*
322 * Check a name for suspicious elements that have appeared in filename
323 * spoofing attacks. This includes names that mixed directions or contain
324 * direction overrides control characters, both of which have appeared in
325 * filename spoofing attacks.
326 */
327static void
328name_entry_examine(
329 struct name_entry *entry,
330 unsigned int *badflags)
331{
332 UChar32 uchr;
333 int32_t i;
334 uint8_t mask = 0;
335
336 for (i = 0; i < entry->normstrlen;) {
337 U16_NEXT_UNSAFE(entry->normstr, i, uchr);
338
339 /* zero width character sequences */
340 switch (uchr) {
341 case 0x200B: /* zero width space */
342 case 0x200C: /* zero width non-joiner */
343 case 0x200D: /* zero width joiner */
344 case 0xFEFF: /* zero width non breaking space */
345 case 0x2060: /* word joiner */
346 case 0x2061: /* function application */
347 case 0x2062: /* invisible times (multiply) */
348 case 0x2063: /* invisible separator (comma) */
349 case 0x2064: /* invisible plus (addition) */
350 *badflags |= UNICRASH_ZERO_WIDTH;
351 break;
352 }
353
354 /* control characters */
355 if (u_iscntrl(uchr))
356 *badflags |= UNICRASH_CONTROL_CHAR;
357
358 switch (u_charDirection(uchr)) {
359 case U_LEFT_TO_RIGHT:
360 mask |= 0x01;
361 break;
362 case U_RIGHT_TO_LEFT:
363 mask |= 0x02;
364 break;
365 case U_RIGHT_TO_LEFT_OVERRIDE:
366 *badflags |= UNICRASH_BIDI_OVERRIDE;
367 break;
368 case U_LEFT_TO_RIGHT_OVERRIDE:
369 *badflags |= UNICRASH_BIDI_OVERRIDE;
370 break;
371 default:
372 break;
373 }
374 }
375
376 /* mixing left-to-right and right-to-left chars */
377 if (mask == 0x3)
378 *badflags |= UNICRASH_BIDI_MIXED;
379}
380
4bbed4ec
DW
381/* Initialize the collision detector. */
382static bool
383unicrash_init(
384 struct unicrash **ucp,
385 struct scrub_ctx *ctx,
386 bool compare_ino,
5fa5a860
DW
387 size_t nr_buckets,
388 bool is_only_root_writeable)
4bbed4ec
DW
389{
390 struct unicrash *p;
bff5d1a4 391 UErrorCode uerr = U_ZERO_ERROR;
4bbed4ec
DW
392
393 if (!is_utf8_locale()) {
394 *ucp = NULL;
395 return true;
396 }
397
398 if (nr_buckets > 65536)
399 nr_buckets = 65536;
400 else if (nr_buckets < 16)
401 nr_buckets = 16;
402
403 p = calloc(1, UNICRASH_SZ(nr_buckets));
404 if (!p)
405 return false;
406 p->ctx = ctx;
407 p->nr_buckets = nr_buckets;
408 p->compare_ino = compare_ino;
bff5d1a4
DW
409 p->normalizer = unorm2_getNFKCInstance(&uerr);
410 if (U_FAILURE(uerr))
411 goto out_free;
367db2f5
DW
412 p->spoof = uspoof_open(&uerr);
413 if (U_FAILURE(uerr))
414 goto out_free;
415 uspoof_setChecks(p->spoof, USPOOF_ALL_CHECKS, &uerr);
416 if (U_FAILURE(uerr))
417 goto out_spoof;
5fa5a860 418 p->is_only_root_writeable = is_only_root_writeable;
4bbed4ec
DW
419 *ucp = p;
420
421 return true;
367db2f5
DW
422out_spoof:
423 uspoof_close(p->spoof);
bff5d1a4
DW
424out_free:
425 free(p);
426 return false;
4bbed4ec
DW
427}
428
5fa5a860
DW
429/*
430 * Is this inode owned by root and not writable by others? If so, skip
431 * even the informational messages, because this was put in place by the
432 * administrator.
433 */
434static bool
435is_only_root_writable(
4cca629d 436 struct xfs_bulkstat *bstat)
5fa5a860
DW
437{
438 if (bstat->bs_uid != 0 || bstat->bs_gid != 0)
439 return false;
440 return !(bstat->bs_mode & S_IWOTH);
441}
442
4bbed4ec
DW
443/* Initialize the collision detector for a directory. */
444bool
445unicrash_dir_init(
446 struct unicrash **ucp,
447 struct scrub_ctx *ctx,
4cca629d 448 struct xfs_bulkstat *bstat)
4bbed4ec
DW
449{
450 /*
451 * Assume 64 bytes per dentry, clamp buckets between 16 and 64k.
452 * Same general idea as dir_hash_init in xfs_repair.
453 */
5fa5a860
DW
454 return unicrash_init(ucp, ctx, true, bstat->bs_size / 64,
455 is_only_root_writable(bstat));
4bbed4ec
DW
456}
457
458/* Initialize the collision detector for an extended attribute. */
459bool
460unicrash_xattr_init(
461 struct unicrash **ucp,
462 struct scrub_ctx *ctx,
4cca629d 463 struct xfs_bulkstat *bstat)
4bbed4ec
DW
464{
465 /* Assume 16 attributes per extent for lack of a better idea. */
5fa5a860
DW
466 return unicrash_init(ucp, ctx, false, 16 * (1 + bstat->bs_aextents),
467 is_only_root_writable(bstat));
4bbed4ec
DW
468}
469
3baa69cd
DW
470/* Initialize the collision detector for a filesystem label. */
471bool
472unicrash_fs_label_init(
473 struct unicrash **ucp,
474 struct scrub_ctx *ctx)
475{
476 return unicrash_init(ucp, ctx, false, 16, true);
477}
478
4bbed4ec
DW
479/* Free the crash detector. */
480void
481unicrash_free(
482 struct unicrash *uc)
483{
484 struct name_entry *ne;
485 struct name_entry *x;
486 size_t i;
487
488 if (!uc)
489 return;
490
367db2f5 491 uspoof_close(uc->spoof);
4bbed4ec
DW
492 for (i = 0; i < uc->nr_buckets; i++) {
493 for (ne = uc->buckets[i]; ne != NULL; ne = x) {
494 x = ne->next;
3029a02c 495 name_entry_free(ne);
4bbed4ec
DW
496 }
497 }
498 free(uc);
499}
500
4bbed4ec
DW
501/* Complain about Unicode problems. */
502static void
503unicrash_complain(
504 struct unicrash *uc,
a3158a75 505 struct descr *dsc,
4bbed4ec 506 const char *what,
3029a02c 507 struct name_entry *entry,
95c2f78b 508 unsigned int badflags,
3029a02c 509 struct name_entry *dup_entry)
4bbed4ec
DW
510{
511 char *bad1 = NULL;
512 char *bad2 = NULL;
513
3029a02c
DW
514 bad1 = string_escape(entry->name);
515 if (dup_entry)
516 bad2 = string_escape(dup_entry->name);
4bbed4ec 517
baa9ed8d
DW
518 /*
519 * Most filechooser UIs do not look for bidirectional overrides when
520 * they render names. This can result in misleading name presentation
521 * that makes "hig<rtl>gnp.sh" render like "highs.png".
522 */
523 if (badflags & UNICRASH_BIDI_OVERRIDE) {
a3158a75 524 str_warn(uc->ctx, descr_render(dsc),
baa9ed8d
DW
525_("Unicode name \"%s\" in %s contains suspicious text direction overrides."),
526 bad1, what);
527 goto out;
528 }
529
95c2f78b
DW
530 /*
531 * Two names that normalize to the same string will render
532 * identically even though the filesystem considers them unique
533 * names. "cafe\xcc\x81" and "caf\xc3\xa9" have different byte
534 * sequences, but they both appear as "café".
535 */
536 if (badflags & UNICRASH_NOT_UNIQUE) {
a3158a75 537 str_warn(uc->ctx, descr_render(dsc),
95c2f78b
DW
538_("Unicode name \"%s\" in %s renders identically to \"%s\"."),
539 bad1, what, bad2);
540 goto out;
541 }
4bbed4ec 542
367db2f5
DW
543 /*
544 * If a name contains invisible/nonprinting characters and can be
545 * confused with another name as a result, we should complain.
546 * "moo<zerowidthspace>cow" and "moocow" are misleading.
547 */
548 if ((badflags & UNICRASH_ZERO_WIDTH) &&
549 (badflags & UNICRASH_CONFUSABLE)) {
a3158a75 550 str_warn(uc->ctx, descr_render(dsc),
367db2f5
DW
551_("Unicode name \"%s\" in %s could be confused with '%s' due to invisible characters."),
552 bad1, what, bad2);
553 goto out;
554 }
555
baa9ed8d
DW
556 /*
557 * Unfiltered control characters can mess up your terminal and render
558 * invisibly in filechooser UIs.
559 */
560 if (badflags & UNICRASH_CONTROL_CHAR) {
a3158a75 561 str_warn(uc->ctx, descr_render(dsc),
baa9ed8d
DW
562_("Unicode name \"%s\" in %s contains control characters."),
563 bad1, what);
564 goto out;
565 }
566
5fa5a860
DW
567 /*
568 * Skip the informational messages if the inode owning the name is
569 * only writeable by root, because those files were put there by the
570 * sysadmin. Also skip names less than four letters long because
571 * there's a much higher chance of collisions with short names.
572 */
573 if (!verbose && (uc->is_only_root_writeable || entry->namelen < 4))
574 goto out;
575
baa9ed8d
DW
576 /*
577 * It's not considered good practice (says Unicode) to mix LTR
578 * characters with RTL characters. The mere presence of different
579 * bidirectional characters isn't enough to trip up software, so don't
580 * warn about this too loudly.
581 */
582 if (badflags & UNICRASH_BIDI_MIXED) {
a3158a75 583 str_info(uc->ctx, descr_render(dsc),
baa9ed8d
DW
584_("Unicode name \"%s\" in %s mixes bidirectional characters."),
585 bad1, what);
586 goto out;
587 }
588
367db2f5
DW
589 /*
590 * We'll note if two names could be confusable with each other, but
591 * whether or not the user will actually confuse them is dependent
592 * on the rendering system and the typefaces in use. Maybe "foo.1"
593 * and "moo.l" look the same, maybe they do not.
594 */
595 if (badflags & UNICRASH_CONFUSABLE) {
a3158a75 596 str_info(uc->ctx, descr_render(dsc),
367db2f5
DW
597_("Unicode name \"%s\" in %s could be confused with \"%s\"."),
598 bad1, what, bad2);
599 }
600
95c2f78b 601out:
4bbed4ec
DW
602 free(bad1);
603 free(bad2);
604}
605
606/*
607 * Try to add a name -> ino entry to the collision detector. The name
367db2f5
DW
608 * must be skeletonized according to Unicode TR39 to detect names that
609 * could be visually confused with each other.
4bbed4ec
DW
610 */
611static bool
612unicrash_add(
613 struct unicrash *uc,
3029a02c
DW
614 struct name_entry *new_entry,
615 unsigned int *badflags,
616 struct name_entry **existing_entry)
4bbed4ec 617{
3029a02c 618 struct name_entry *entry;
4bbed4ec
DW
619 size_t bucket;
620 xfs_dahash_t hash;
621
3029a02c
DW
622 /* Store name in hashtable. */
623 hash = name_entry_hash(new_entry);
4bbed4ec 624 bucket = hash % uc->nr_buckets;
3029a02c
DW
625 entry = uc->buckets[bucket];
626 new_entry->next = entry;
627 uc->buckets[bucket] = new_entry;
628
629 while (entry != NULL) {
630 /* Same normalization? */
631 if (new_entry->normstrlen == entry->normstrlen &&
bff5d1a4 632 !u_strcmp(new_entry->normstr, entry->normstr) &&
3029a02c 633 (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
95c2f78b 634 *badflags |= UNICRASH_NOT_UNIQUE;
3029a02c 635 *existing_entry = entry;
4bbed4ec
DW
636 return true;
637 }
367db2f5
DW
638
639 /* Confusable? */
640 if (new_entry->skelstrlen == entry->skelstrlen &&
641 !u_strcmp(new_entry->skelstr, entry->skelstr) &&
642 (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
643 *badflags |= UNICRASH_CONFUSABLE;
644 *existing_entry = entry;
645 return true;
646 }
3029a02c 647 entry = entry->next;
4bbed4ec
DW
648 }
649
4bbed4ec
DW
650 return true;
651}
652
653/* Check a name for unicode normalization problems or collisions. */
654static bool
655__unicrash_check_name(
656 struct unicrash *uc,
a3158a75 657 struct descr *dsc,
4bbed4ec
DW
658 const char *namedescr,
659 const char *name,
660 xfs_ino_t ino)
661{
3029a02c
DW
662 struct name_entry *dup_entry = NULL;
663 struct name_entry *new_entry;
95c2f78b 664 unsigned int badflags = 0;
4bbed4ec 665 bool moveon;
4bbed4ec 666
3029a02c
DW
667 /* If we can't create entry data, just skip it. */
668 if (!name_entry_create(uc, name, ino, &new_entry))
669 return true;
670
baa9ed8d
DW
671 name_entry_examine(new_entry, &badflags);
672
3029a02c 673 moveon = unicrash_add(uc, new_entry, &badflags, &dup_entry);
4bbed4ec
DW
674 if (!moveon)
675 return false;
676
95c2f78b 677 if (badflags)
a3158a75 678 unicrash_complain(uc, dsc, namedescr, new_entry, badflags,
3029a02c 679 dup_entry);
4bbed4ec 680
4bbed4ec
DW
681 return true;
682}
683
684/* Check a directory entry for unicode normalization problems or collisions. */
685bool
686unicrash_check_dir_name(
687 struct unicrash *uc,
a3158a75 688 struct descr *dsc,
4bbed4ec
DW
689 struct dirent *dentry)
690{
691 if (!uc)
692 return true;
a3158a75 693 return __unicrash_check_name(uc, dsc, _("directory"),
4bbed4ec
DW
694 dentry->d_name, dentry->d_ino);
695}
696
697/*
698 * Check an extended attribute name for unicode normalization problems
699 * or collisions.
700 */
701bool
702unicrash_check_xattr_name(
703 struct unicrash *uc,
a3158a75 704 struct descr *dsc,
4bbed4ec
DW
705 const char *attrname)
706{
707 if (!uc)
708 return true;
a3158a75 709 return __unicrash_check_name(uc, dsc, _("extended attribute"),
4bbed4ec
DW
710 attrname, 0);
711}
3baa69cd
DW
712
713/*
714 * Check the fs label for unicode normalization problems or misleading bits.
715 */
716bool
717unicrash_check_fs_label(
718 struct unicrash *uc,
a3158a75 719 struct descr *dsc,
3baa69cd
DW
720 const char *label)
721{
722 if (!uc)
723 return true;
a3158a75 724 return __unicrash_check_name(uc, dsc, _("filesystem label"),
3baa69cd
DW
725 label, 0);
726}