]>
Commit | Line | Data |
---|---|---|
4bbed4ec DW |
1 | /* |
2 | * Copyright (C) 2018 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
a440f877 | 20 | #include "xfs.h" |
4bbed4ec | 21 | #include <stdint.h> |
4bbed4ec DW |
22 | #include <stdlib.h> |
23 | #include <dirent.h> | |
24 | #include <sys/types.h> | |
4bbed4ec DW |
25 | #include <sys/statvfs.h> |
26 | #include <unistr.h> | |
27 | #include <uninorm.h> | |
4bbed4ec DW |
28 | #include "path.h" |
29 | #include "xfs_scrub.h" | |
30 | #include "common.h" | |
31 | ||
32 | /* | |
33 | * Detect collisions of Unicode-normalized names. | |
34 | * | |
35 | * Record all the name->ino mappings in a directory/xattr, with a twist! | |
36 | * The twist is that we perform unicode normalization on every name we | |
37 | * see, so that we can warn about a directory containing more than one | |
38 | * directory entries that normalize to the same Unicode string. These | |
39 | * entries are at best a sign of Unicode mishandling, or some sort of | |
40 | * weird name substitution attack if the entries do not point to the | |
41 | * same inode. Warn if we see multiple dirents that do not all point to | |
42 | * the same inode. | |
43 | * | |
44 | * For extended attributes we perform the same collision checks on the | |
45 | * attribute, though any collision is enough to trigger a warning. | |
46 | * | |
47 | * We flag these collisions as warnings and not errors because XFS | |
48 | * treats names as a sequence of arbitrary nonzero bytes. While a | |
49 | * Unicode collision is not technically a filesystem corruption, we | |
50 | * ought to say something if there's a possibility for misleading a | |
51 | * user. | |
52 | * | |
53 | * To normalize, we use Unicode NFKC. We use the composing | |
54 | * normalization mode (e.g. "E WITH ACUTE" instead of "E" then "ACUTE") | |
55 | * because that's what W3C (and in general Linux) uses. This enables us | |
56 | * to detect multiple object names that normalize to the same name and | |
57 | * could be confusing to users. Furthermore, we use the compatibility | |
58 | * mode to detect names with compatible but different code points to | |
59 | * strengthen those checks. | |
60 | */ | |
61 | ||
62 | struct name_entry { | |
63 | struct name_entry *next; | |
64 | xfs_ino_t ino; | |
65 | size_t uninamelen; | |
66 | uint8_t uniname[0]; | |
67 | }; | |
68 | #define NAME_ENTRY_SZ(nl) (sizeof(struct name_entry) + 1 + \ | |
69 | (nl * sizeof(uint8_t))) | |
70 | ||
71 | struct unicrash { | |
72 | struct scrub_ctx *ctx; | |
73 | bool compare_ino; | |
74 | size_t nr_buckets; | |
75 | struct name_entry *buckets[0]; | |
76 | }; | |
77 | #define UNICRASH_SZ(nr) (sizeof(struct unicrash) + \ | |
78 | (nr * sizeof(struct name_entry *))) | |
79 | ||
80 | /* | |
81 | * We only care about validating utf8 collisions if the underlying | |
82 | * system configuration says we're using utf8. If the language | |
83 | * specifier string used to output messages has ".UTF-8" somewhere in | |
84 | * its name, then we conclude utf8 is in use. Otherwise, no checking is | |
85 | * performed. | |
86 | * | |
87 | * Most modern Linux systems default to utf8, so the only time this | |
88 | * check will return false is if the administrator configured things | |
89 | * this way or if things are so messed up there is no locale data at | |
90 | * all. | |
91 | */ | |
92 | #define UTF8_STR ".UTF-8" | |
93 | #define UTF8_STRLEN (sizeof(UTF8_STR) - 1) | |
94 | static bool | |
95 | is_utf8_locale(void) | |
96 | { | |
97 | const char *msg_locale; | |
98 | static int answer = -1; | |
99 | ||
100 | if (answer != -1) | |
101 | return answer; | |
102 | ||
103 | msg_locale = setlocale(LC_MESSAGES, NULL); | |
104 | if (msg_locale == NULL) | |
105 | return false; | |
106 | ||
107 | if (strstr(msg_locale, UTF8_STR) != NULL) | |
108 | answer = 1; | |
109 | else | |
110 | answer = 0; | |
111 | return answer; | |
112 | } | |
113 | ||
114 | /* Initialize the collision detector. */ | |
115 | static bool | |
116 | unicrash_init( | |
117 | struct unicrash **ucp, | |
118 | struct scrub_ctx *ctx, | |
119 | bool compare_ino, | |
120 | size_t nr_buckets) | |
121 | { | |
122 | struct unicrash *p; | |
123 | ||
124 | if (!is_utf8_locale()) { | |
125 | *ucp = NULL; | |
126 | return true; | |
127 | } | |
128 | ||
129 | if (nr_buckets > 65536) | |
130 | nr_buckets = 65536; | |
131 | else if (nr_buckets < 16) | |
132 | nr_buckets = 16; | |
133 | ||
134 | p = calloc(1, UNICRASH_SZ(nr_buckets)); | |
135 | if (!p) | |
136 | return false; | |
137 | p->ctx = ctx; | |
138 | p->nr_buckets = nr_buckets; | |
139 | p->compare_ino = compare_ino; | |
140 | *ucp = p; | |
141 | ||
142 | return true; | |
143 | } | |
144 | ||
145 | /* Initialize the collision detector for a directory. */ | |
146 | bool | |
147 | unicrash_dir_init( | |
148 | struct unicrash **ucp, | |
149 | struct scrub_ctx *ctx, | |
150 | struct xfs_bstat *bstat) | |
151 | { | |
152 | /* | |
153 | * Assume 64 bytes per dentry, clamp buckets between 16 and 64k. | |
154 | * Same general idea as dir_hash_init in xfs_repair. | |
155 | */ | |
156 | return unicrash_init(ucp, ctx, true, bstat->bs_size / 64); | |
157 | } | |
158 | ||
159 | /* Initialize the collision detector for an extended attribute. */ | |
160 | bool | |
161 | unicrash_xattr_init( | |
162 | struct unicrash **ucp, | |
163 | struct scrub_ctx *ctx, | |
164 | struct xfs_bstat *bstat) | |
165 | { | |
166 | /* Assume 16 attributes per extent for lack of a better idea. */ | |
167 | return unicrash_init(ucp, ctx, false, 16 * (1 + bstat->bs_aextents)); | |
168 | } | |
169 | ||
170 | /* Free the crash detector. */ | |
171 | void | |
172 | unicrash_free( | |
173 | struct unicrash *uc) | |
174 | { | |
175 | struct name_entry *ne; | |
176 | struct name_entry *x; | |
177 | size_t i; | |
178 | ||
179 | if (!uc) | |
180 | return; | |
181 | ||
182 | for (i = 0; i < uc->nr_buckets; i++) { | |
183 | for (ne = uc->buckets[i]; ne != NULL; ne = x) { | |
184 | x = ne->next; | |
185 | free(ne); | |
186 | } | |
187 | } | |
188 | free(uc); | |
189 | } | |
190 | ||
191 | /* Steal the dirhash function from libxfs, avoid linking with libxfs. */ | |
192 | ||
193 | #define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) | |
194 | ||
195 | /* | |
196 | * Implement a simple hash on a character string. | |
197 | * Rotate the hash value by 7 bits, then XOR each character in. | |
198 | * This is implemented with some source-level loop unrolling. | |
199 | */ | |
200 | static xfs_dahash_t | |
201 | unicrash_hashname( | |
202 | const uint8_t *name, | |
203 | size_t namelen) | |
204 | { | |
205 | xfs_dahash_t hash; | |
206 | ||
207 | /* | |
208 | * Do four characters at a time as long as we can. | |
209 | */ | |
210 | for (hash = 0; namelen >= 4; namelen -= 4, name += 4) | |
211 | hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ | |
212 | (name[3] << 0) ^ rol32(hash, 7 * 4); | |
213 | ||
214 | /* | |
215 | * Now do the rest of the characters. | |
216 | */ | |
217 | switch (namelen) { | |
218 | case 3: | |
219 | return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ | |
220 | rol32(hash, 7 * 3); | |
221 | case 2: | |
222 | return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); | |
223 | case 1: | |
224 | return (name[0] << 0) ^ rol32(hash, 7 * 1); | |
225 | default: /* case 0: */ | |
226 | return hash; | |
227 | } | |
228 | } | |
229 | ||
230 | /* | |
231 | * Normalize a name according to Unicode NFKC normalization rules. | |
232 | * Returns true if the name was already normalized. | |
233 | */ | |
234 | static bool | |
235 | unicrash_normalize( | |
236 | const char *in, | |
237 | uint8_t *out, | |
238 | size_t outlen) | |
239 | { | |
240 | size_t inlen = strlen(in); | |
241 | ||
242 | assert(inlen <= outlen); | |
243 | if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen, | |
244 | out, &outlen)) { | |
245 | /* Didn't normalize, just return the same buffer. */ | |
246 | memcpy(out, in, inlen + 1); | |
247 | return true; | |
248 | } | |
249 | out[outlen] = 0; | |
250 | return outlen == inlen ? memcmp(in, out, inlen) == 0 : false; | |
251 | } | |
252 | ||
253 | /* Complain about Unicode problems. */ | |
254 | static void | |
255 | unicrash_complain( | |
256 | struct unicrash *uc, | |
257 | const char *descr, | |
258 | const char *what, | |
259 | bool normal, | |
260 | bool unique, | |
261 | const char *name, | |
262 | uint8_t *uniname) | |
263 | { | |
264 | char *bad1 = NULL; | |
265 | char *bad2 = NULL; | |
266 | ||
267 | bad1 = string_escape(name); | |
268 | bad2 = string_escape((char *)uniname); | |
269 | ||
270 | if (!normal && should_warn_about_name(uc->ctx)) | |
271 | str_info(uc->ctx, descr, | |
272 | _("Unicode name \"%s\" in %s should be normalized as \"%s\"."), | |
273 | bad1, what, bad2); | |
274 | if (!unique) | |
275 | str_warn(uc->ctx, descr, | |
276 | _("Duplicate normalized Unicode name \"%s\" found in %s."), | |
277 | bad1, what); | |
278 | ||
279 | free(bad1); | |
280 | free(bad2); | |
281 | } | |
282 | ||
283 | /* | |
284 | * Try to add a name -> ino entry to the collision detector. The name | |
285 | * must be normalized according to Unicode NFKC normalization rules to | |
286 | * detect byte-unique names that map to the same sequence of Unicode | |
287 | * code points. | |
288 | * | |
289 | * This function returns true either if there was no previous mapping or | |
290 | * there was a mapping that matched exactly. It returns false if | |
291 | * there is already a record with that name pointing to a different | |
292 | * inode. | |
293 | */ | |
294 | static bool | |
295 | unicrash_add( | |
296 | struct unicrash *uc, | |
297 | uint8_t *uniname, | |
298 | xfs_ino_t ino, | |
299 | bool *unique) | |
300 | { | |
301 | struct name_entry *ne; | |
302 | struct name_entry *x; | |
303 | struct name_entry **nep; | |
304 | size_t uninamelen = u8_strlen(uniname); | |
305 | size_t bucket; | |
306 | xfs_dahash_t hash; | |
307 | ||
308 | /* Do we already know about that name? */ | |
309 | hash = unicrash_hashname(uniname, uninamelen); | |
310 | bucket = hash % uc->nr_buckets; | |
311 | for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) { | |
312 | if (u8_strcmp(uniname, ne->uniname) == 0) { | |
313 | *unique = uc->compare_ino ? ne->ino == ino : false; | |
314 | return true; | |
315 | } | |
316 | nep = &ne->next; | |
317 | x = ne->next; | |
318 | } | |
319 | ||
320 | /* Remember that name. */ | |
321 | x = malloc(NAME_ENTRY_SZ(uninamelen)); | |
322 | if (!x) | |
323 | return false; | |
324 | x->next = NULL; | |
325 | x->ino = ino; | |
326 | x->uninamelen = uninamelen; | |
327 | memcpy(x->uniname, uniname, uninamelen + 1); | |
328 | *nep = x; | |
329 | *unique = true; | |
330 | ||
331 | return true; | |
332 | } | |
333 | ||
334 | /* Check a name for unicode normalization problems or collisions. */ | |
335 | static bool | |
336 | __unicrash_check_name( | |
337 | struct unicrash *uc, | |
338 | const char *descr, | |
339 | const char *namedescr, | |
340 | const char *name, | |
341 | xfs_ino_t ino) | |
342 | { | |
343 | uint8_t uniname[(NAME_MAX * 2) + 1]; | |
344 | bool moveon; | |
345 | bool normal; | |
346 | bool unique; | |
347 | ||
348 | memset(uniname, 0, (NAME_MAX * 2) + 1); | |
349 | normal = unicrash_normalize(name, uniname, NAME_MAX * 2); | |
350 | moveon = unicrash_add(uc, uniname, ino, &unique); | |
351 | if (!moveon) | |
352 | return false; | |
353 | ||
354 | if (normal && unique) | |
355 | return true; | |
356 | ||
357 | unicrash_complain(uc, descr, namedescr, normal, unique, name, | |
358 | uniname); | |
359 | return true; | |
360 | } | |
361 | ||
362 | /* Check a directory entry for unicode normalization problems or collisions. */ | |
363 | bool | |
364 | unicrash_check_dir_name( | |
365 | struct unicrash *uc, | |
366 | const char *descr, | |
367 | struct dirent *dentry) | |
368 | { | |
369 | if (!uc) | |
370 | return true; | |
371 | return __unicrash_check_name(uc, descr, _("directory"), | |
372 | dentry->d_name, dentry->d_ino); | |
373 | } | |
374 | ||
375 | /* | |
376 | * Check an extended attribute name for unicode normalization problems | |
377 | * or collisions. | |
378 | */ | |
379 | bool | |
380 | unicrash_check_xattr_name( | |
381 | struct unicrash *uc, | |
382 | const char *descr, | |
383 | const char *attrname) | |
384 | { | |
385 | if (!uc) | |
386 | return true; | |
387 | return __unicrash_check_name(uc, descr, _("extended attribute"), | |
388 | attrname, 0); | |
389 | } |