]> git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blame - scrub/unicrash.c
xfs_scrub: fix #include ordering to avoid build failure
[thirdparty/xfsprogs-dev.git] / scrub / unicrash.c
CommitLineData
4bbed4ec
DW
1/*
2 * Copyright (C) 2018 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
a440f877 20#include "xfs.h"
4bbed4ec 21#include <stdint.h>
4bbed4ec
DW
22#include <stdlib.h>
23#include <dirent.h>
24#include <sys/types.h>
4bbed4ec
DW
25#include <sys/statvfs.h>
26#include <unistr.h>
27#include <uninorm.h>
4bbed4ec
DW
28#include "path.h"
29#include "xfs_scrub.h"
30#include "common.h"
31
32/*
33 * Detect collisions of Unicode-normalized names.
34 *
35 * Record all the name->ino mappings in a directory/xattr, with a twist!
36 * The twist is that we perform unicode normalization on every name we
37 * see, so that we can warn about a directory containing more than one
38 * directory entries that normalize to the same Unicode string. These
39 * entries are at best a sign of Unicode mishandling, or some sort of
40 * weird name substitution attack if the entries do not point to the
41 * same inode. Warn if we see multiple dirents that do not all point to
42 * the same inode.
43 *
44 * For extended attributes we perform the same collision checks on the
45 * attribute, though any collision is enough to trigger a warning.
46 *
47 * We flag these collisions as warnings and not errors because XFS
48 * treats names as a sequence of arbitrary nonzero bytes. While a
49 * Unicode collision is not technically a filesystem corruption, we
50 * ought to say something if there's a possibility for misleading a
51 * user.
52 *
53 * To normalize, we use Unicode NFKC. We use the composing
54 * normalization mode (e.g. "E WITH ACUTE" instead of "E" then "ACUTE")
55 * because that's what W3C (and in general Linux) uses. This enables us
56 * to detect multiple object names that normalize to the same name and
57 * could be confusing to users. Furthermore, we use the compatibility
58 * mode to detect names with compatible but different code points to
59 * strengthen those checks.
60 */
61
62struct name_entry {
63 struct name_entry *next;
64 xfs_ino_t ino;
65 size_t uninamelen;
66 uint8_t uniname[0];
67};
68#define NAME_ENTRY_SZ(nl) (sizeof(struct name_entry) + 1 + \
69 (nl * sizeof(uint8_t)))
70
71struct unicrash {
72 struct scrub_ctx *ctx;
73 bool compare_ino;
74 size_t nr_buckets;
75 struct name_entry *buckets[0];
76};
77#define UNICRASH_SZ(nr) (sizeof(struct unicrash) + \
78 (nr * sizeof(struct name_entry *)))
79
80/*
81 * We only care about validating utf8 collisions if the underlying
82 * system configuration says we're using utf8. If the language
83 * specifier string used to output messages has ".UTF-8" somewhere in
84 * its name, then we conclude utf8 is in use. Otherwise, no checking is
85 * performed.
86 *
87 * Most modern Linux systems default to utf8, so the only time this
88 * check will return false is if the administrator configured things
89 * this way or if things are so messed up there is no locale data at
90 * all.
91 */
92#define UTF8_STR ".UTF-8"
93#define UTF8_STRLEN (sizeof(UTF8_STR) - 1)
94static bool
95is_utf8_locale(void)
96{
97 const char *msg_locale;
98 static int answer = -1;
99
100 if (answer != -1)
101 return answer;
102
103 msg_locale = setlocale(LC_MESSAGES, NULL);
104 if (msg_locale == NULL)
105 return false;
106
107 if (strstr(msg_locale, UTF8_STR) != NULL)
108 answer = 1;
109 else
110 answer = 0;
111 return answer;
112}
113
114/* Initialize the collision detector. */
115static bool
116unicrash_init(
117 struct unicrash **ucp,
118 struct scrub_ctx *ctx,
119 bool compare_ino,
120 size_t nr_buckets)
121{
122 struct unicrash *p;
123
124 if (!is_utf8_locale()) {
125 *ucp = NULL;
126 return true;
127 }
128
129 if (nr_buckets > 65536)
130 nr_buckets = 65536;
131 else if (nr_buckets < 16)
132 nr_buckets = 16;
133
134 p = calloc(1, UNICRASH_SZ(nr_buckets));
135 if (!p)
136 return false;
137 p->ctx = ctx;
138 p->nr_buckets = nr_buckets;
139 p->compare_ino = compare_ino;
140 *ucp = p;
141
142 return true;
143}
144
145/* Initialize the collision detector for a directory. */
146bool
147unicrash_dir_init(
148 struct unicrash **ucp,
149 struct scrub_ctx *ctx,
150 struct xfs_bstat *bstat)
151{
152 /*
153 * Assume 64 bytes per dentry, clamp buckets between 16 and 64k.
154 * Same general idea as dir_hash_init in xfs_repair.
155 */
156 return unicrash_init(ucp, ctx, true, bstat->bs_size / 64);
157}
158
159/* Initialize the collision detector for an extended attribute. */
160bool
161unicrash_xattr_init(
162 struct unicrash **ucp,
163 struct scrub_ctx *ctx,
164 struct xfs_bstat *bstat)
165{
166 /* Assume 16 attributes per extent for lack of a better idea. */
167 return unicrash_init(ucp, ctx, false, 16 * (1 + bstat->bs_aextents));
168}
169
170/* Free the crash detector. */
171void
172unicrash_free(
173 struct unicrash *uc)
174{
175 struct name_entry *ne;
176 struct name_entry *x;
177 size_t i;
178
179 if (!uc)
180 return;
181
182 for (i = 0; i < uc->nr_buckets; i++) {
183 for (ne = uc->buckets[i]; ne != NULL; ne = x) {
184 x = ne->next;
185 free(ne);
186 }
187 }
188 free(uc);
189}
190
191/* Steal the dirhash function from libxfs, avoid linking with libxfs. */
192
193#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
194
195/*
196 * Implement a simple hash on a character string.
197 * Rotate the hash value by 7 bits, then XOR each character in.
198 * This is implemented with some source-level loop unrolling.
199 */
200static xfs_dahash_t
201unicrash_hashname(
202 const uint8_t *name,
203 size_t namelen)
204{
205 xfs_dahash_t hash;
206
207 /*
208 * Do four characters at a time as long as we can.
209 */
210 for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
211 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
212 (name[3] << 0) ^ rol32(hash, 7 * 4);
213
214 /*
215 * Now do the rest of the characters.
216 */
217 switch (namelen) {
218 case 3:
219 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
220 rol32(hash, 7 * 3);
221 case 2:
222 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
223 case 1:
224 return (name[0] << 0) ^ rol32(hash, 7 * 1);
225 default: /* case 0: */
226 return hash;
227 }
228}
229
230/*
231 * Normalize a name according to Unicode NFKC normalization rules.
232 * Returns true if the name was already normalized.
233 */
234static bool
235unicrash_normalize(
236 const char *in,
237 uint8_t *out,
238 size_t outlen)
239{
240 size_t inlen = strlen(in);
241
242 assert(inlen <= outlen);
243 if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen,
244 out, &outlen)) {
245 /* Didn't normalize, just return the same buffer. */
246 memcpy(out, in, inlen + 1);
247 return true;
248 }
249 out[outlen] = 0;
250 return outlen == inlen ? memcmp(in, out, inlen) == 0 : false;
251}
252
253/* Complain about Unicode problems. */
254static void
255unicrash_complain(
256 struct unicrash *uc,
257 const char *descr,
258 const char *what,
259 bool normal,
260 bool unique,
261 const char *name,
262 uint8_t *uniname)
263{
264 char *bad1 = NULL;
265 char *bad2 = NULL;
266
267 bad1 = string_escape(name);
268 bad2 = string_escape((char *)uniname);
269
270 if (!normal && should_warn_about_name(uc->ctx))
271 str_info(uc->ctx, descr,
272_("Unicode name \"%s\" in %s should be normalized as \"%s\"."),
273 bad1, what, bad2);
274 if (!unique)
275 str_warn(uc->ctx, descr,
276_("Duplicate normalized Unicode name \"%s\" found in %s."),
277 bad1, what);
278
279 free(bad1);
280 free(bad2);
281}
282
283/*
284 * Try to add a name -> ino entry to the collision detector. The name
285 * must be normalized according to Unicode NFKC normalization rules to
286 * detect byte-unique names that map to the same sequence of Unicode
287 * code points.
288 *
289 * This function returns true either if there was no previous mapping or
290 * there was a mapping that matched exactly. It returns false if
291 * there is already a record with that name pointing to a different
292 * inode.
293 */
294static bool
295unicrash_add(
296 struct unicrash *uc,
297 uint8_t *uniname,
298 xfs_ino_t ino,
299 bool *unique)
300{
301 struct name_entry *ne;
302 struct name_entry *x;
303 struct name_entry **nep;
304 size_t uninamelen = u8_strlen(uniname);
305 size_t bucket;
306 xfs_dahash_t hash;
307
308 /* Do we already know about that name? */
309 hash = unicrash_hashname(uniname, uninamelen);
310 bucket = hash % uc->nr_buckets;
311 for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) {
312 if (u8_strcmp(uniname, ne->uniname) == 0) {
313 *unique = uc->compare_ino ? ne->ino == ino : false;
314 return true;
315 }
316 nep = &ne->next;
317 x = ne->next;
318 }
319
320 /* Remember that name. */
321 x = malloc(NAME_ENTRY_SZ(uninamelen));
322 if (!x)
323 return false;
324 x->next = NULL;
325 x->ino = ino;
326 x->uninamelen = uninamelen;
327 memcpy(x->uniname, uniname, uninamelen + 1);
328 *nep = x;
329 *unique = true;
330
331 return true;
332}
333
334/* Check a name for unicode normalization problems or collisions. */
335static bool
336__unicrash_check_name(
337 struct unicrash *uc,
338 const char *descr,
339 const char *namedescr,
340 const char *name,
341 xfs_ino_t ino)
342{
343 uint8_t uniname[(NAME_MAX * 2) + 1];
344 bool moveon;
345 bool normal;
346 bool unique;
347
348 memset(uniname, 0, (NAME_MAX * 2) + 1);
349 normal = unicrash_normalize(name, uniname, NAME_MAX * 2);
350 moveon = unicrash_add(uc, uniname, ino, &unique);
351 if (!moveon)
352 return false;
353
354 if (normal && unique)
355 return true;
356
357 unicrash_complain(uc, descr, namedescr, normal, unique, name,
358 uniname);
359 return true;
360}
361
362/* Check a directory entry for unicode normalization problems or collisions. */
363bool
364unicrash_check_dir_name(
365 struct unicrash *uc,
366 const char *descr,
367 struct dirent *dentry)
368{
369 if (!uc)
370 return true;
371 return __unicrash_check_name(uc, descr, _("directory"),
372 dentry->d_name, dentry->d_ino);
373}
374
375/*
376 * Check an extended attribute name for unicode normalization problems
377 * or collisions.
378 */
379bool
380unicrash_check_xattr_name(
381 struct unicrash *uc,
382 const char *descr,
383 const char *attrname)
384{
385 if (!uc)
386 return true;
387 return __unicrash_check_name(uc, descr, _("extended attribute"),
388 attrname, 0);
389}