]>
git.ipfire.org Git - thirdparty/xfsprogs-dev.git/blob - db/obfuscate.c
1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2007, 2011 SGI
10 static inline unsigned char
11 random_filename_char(void)
13 static unsigned char filename_alphabet
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
14 "abcdefghijklmnopqrstuvwxyz"
17 return filename_alphabet
[random() % (sizeof filename_alphabet
- 1)];
20 #define rol32(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
23 * Given a name and its hash value, massage the name in such a way
24 * that the result is another name of equal length which shares the
34 unsigned char *oldname
= NULL
;
37 xfs_dahash_t new_hash
;
39 unsigned char high_bit
;
41 bool is_ci_name
= is_dirent
&& xfs_has_asciici(mp
);
45 * Our obfuscation algorithm requires at least 5-character
46 * names, so don't bother if the name is too short. We
47 * work backward from a hash value to determine the last
48 * five bytes in a name required to produce a new name
55 oldname
= malloc(name_len
);
58 memcpy(oldname
, name
, name_len
);
66 * If we cannot generate a ci-compatible obfuscated name after 1000
67 * tries, don't bother obfuscating the name.
70 memcpy(name
, oldname
, name_len
);
75 * The beginning of the obfuscated name can be pretty much
76 * anything, so fill it in with random characters.
77 * Accumulate its new hash value as we go.
79 for (i
= 0; i
< name_len
- 5; i
++) {
80 *newp
= random_filename_char();
82 new_hash
= xfs_ascii_ci_xfrm(*newp
) ^
85 new_hash
= *newp
^ rol32(new_hash
, 7);
90 * Compute which five bytes need to be used at the end of
91 * the name so the hash of the obfuscated name is the same
92 * as the hash of the original. If any result in an invalid
93 * character, flip a bit and arrange for a corresponding bit
94 * in a neighboring byte to be flipped as well. For the
95 * last byte, the "neighbor" to change is the first byte
96 * we're computing here.
98 new_hash
= rol32(new_hash
, 3) ^ hash
;
102 for (shift
= 28; shift
>= 0; shift
-= 7) {
103 *newp
= (new_hash
>> shift
& 0x7f) ^ high_bit
;
104 if (is_invalid_char(*newp
)) {
111 * If ascii-ci is enabled, uppercase characters are converted
112 * to lowercase characters while computing the name hash. If
113 * any of the necessary correction bytes are uppercase, the
114 * hash of the new name will not match. Try again with a
117 if (is_ci_name
&& xfs_ascii_ci_need_xfrm(*newp
))
120 ASSERT(!is_invalid_char(*newp
));
125 * If we flipped a bit on the last byte, we need to fix up
126 * the matching bit in the first byte. The result will
127 * be a valid character, because we know that first byte
128 * has 0's in its upper four bits (it was produced by a
129 * 28-bit right-shift of a 32-bit unsigned value).
134 if (is_ci_name
&& xfs_ascii_ci_need_xfrm(*first
))
137 ASSERT(!is_invalid_char(*first
));
144 * Flip a bit in each of two bytes at the end of the given name.
145 * This is used in generating a series of alternate names to be used
146 * in the event a duplicate is found.
148 * The bits flipped are selected such that they both affect the same
149 * bit in the name's computed hash value, so flipping them both will
152 * The following diagram aims to show the portion of a computed
153 * hash that a given byte of a name affects.
155 * 31 28 24 21 14 8 7 3 0
156 * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
157 * hash: | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
158 * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
159 * last-4 ->| |<-- last-2 --->| |<--- last ---->|
160 * |<-- last-3 --->| |<-- last-1 --->| |<- last-4
161 * |<-- last-7 --->| |<-- last-5 --->|
162 * |<-- last-8 --->| |<-- last-6 --->|
165 * The last byte of the name directly affects the low-order byte of
166 * the hash. The next-to-last affects bits 7-14, the next one back
167 * affects bits 14-21, and so on. The effect wraps around when it
168 * goes beyond the top of the hash (as happens for byte last-4).
170 * Bits that are flipped together "overlap" on the hash value. As
171 * an example of overlap, the last two bytes both affect bit 7 in
172 * the hash. That pair of bytes (and their overlapping bits) can be
173 * used for this "flip bit" operation (it's the first pair tried,
176 * A table defines overlapping pairs--the bytes involved and bits
177 * within them--that can be used this way. The byte offset is
178 * relative to a starting point within the name, which will be set
179 * to affect the bytes at the end of the name. The function is
180 * called with a "bitseq" value which indicates which bit flip is
181 * desired, and this translates directly into selecting which entry
182 * in the bit_to_flip[] table to apply.
184 * The function returns 1 if the operation was successful. It
185 * returns 0 if the result produced a character that's not valid in
186 * a name (either '/' or a '\0'). Finally, it returns -1 if the bit
187 * sequence number is beyond what is supported for a name of this
192 * (Also see the discussion above find_alternate(), below.)
194 * In order to make this function work for any length name, the
195 * table is ordered by increasing byte offset, so that the earliest
196 * entries can apply to the shortest strings. This way all names
197 * are done consistently.
199 * When bit flips occur, they can convert printable characters
200 * into non-printable ones. In an effort to reduce the impact of
201 * this, the first bit flips are chosen to affect bytes the end of
202 * the name (and furthermore, toward the low bits of a byte). Those
203 * bytes are often non-printable anyway because of the way they are
204 * initially selected by obfuscate_name()). This is accomplished,
205 * using later table entries first.
207 * Each row in the table doubles the number of alternates that
208 * can be generated. A two-byte name is limited to using only
209 * the first row, so it's possible to generate two alternates
210 * (the original name, plus the alternate produced by flipping
211 * the one pair of bits). In a 5-byte name, the effect of the
212 * first byte overlaps the last by 4 its, and there are 8 bits
213 * to flip, allowing for 256 possible alternates.
215 * Short names (less than 5 bytes) are never even obfuscated, so for
216 * such names the relatively small number of alternates should never
217 * really be a problem.
219 * Long names (more than 6 bytes, say) are not likely to exhaust
220 * the number of available alternates. In fact, the table could
221 * probably have stopped at 8 entries, on the assumption that 256
222 * alternates should be enough for most any situation. The entries
223 * beyond those are present mostly for demonstration of how it could
224 * be populated with more entries, should it ever be necessary to do
235 unsigned char *p0
, *p1
;
236 unsigned char m0
, m1
;
238 int byte
; /* Offset from start within name */
239 unsigned char bit
; /* Bit within that byte */
240 } bit_to_flip
[][2] = { /* Sorted by second entry's byte */
241 { { 0, 0 }, { 1, 7 } }, /* Each row defines a pair */
242 { { 1, 0 }, { 2, 7 } }, /* of bytes and a bit within */
243 { { 2, 0 }, { 3, 7 } }, /* each byte. Each bit in */
244 { { 0, 4 }, { 4, 0 } }, /* a pair affects the same */
245 { { 0, 5 }, { 4, 1 } }, /* bit in the hash, so flipping */
246 { { 0, 6 }, { 4, 2 } }, /* both will change the name */
247 { { 0, 7 }, { 4, 3 } }, /* while preserving the hash. */
248 { { 3, 0 }, { 4, 7 } },
249 { { 0, 0 }, { 5, 3 } }, /* The first entry's byte offset */
250 { { 0, 1 }, { 5, 4 } }, /* must be less than the second. */
251 { { 0, 2 }, { 5, 5 } },
252 { { 0, 3 }, { 5, 6 } }, /* The table can be extended to */
253 { { 0, 4 }, { 5, 7 } }, /* an arbitrary number of entries */
254 { { 4, 0 }, { 5, 7 } }, /* but there's not much point. */
258 /* Find the first entry *not* usable for name of this length */
260 for (index
= 0; index
< ARRAY_SIZE(bit_to_flip
); index
++)
261 if (bit_to_flip
[index
][1].byte
>= name_len
)
265 * Back up to the last usable entry. If that number is
266 * smaller than the bit sequence number, inform the caller
267 * that nothing this large (or larger) will work.
269 if (bitseq
> --index
)
273 * We will be switching bits at the end of name, with a
274 * preference for affecting the last bytes first. Compute
275 * where in the name we'll start applying the changes.
277 offset
= name_len
- (bit_to_flip
[index
][1].byte
+ 1);
278 index
-= bitseq
; /* Use later table entries first */
280 p0
= name
+ offset
+ bit_to_flip
[index
][0].byte
;
281 p1
= name
+ offset
+ bit_to_flip
[index
][1].byte
;
282 m0
= 1 << bit_to_flip
[index
][0].bit
;
283 m1
= 1 << bit_to_flip
[index
][1].bit
;
285 /* Only change the bytes if it produces valid characters */
287 if (is_invalid_char(*p0
^ m0
) || is_invalid_char(*p1
^ m1
))
297 * This function generates a well-defined sequence of "alternate"
298 * names for a given name. An alternate is a name having the same
299 * length and same hash value as the original name. This is needed
300 * because the algorithm produces only one obfuscated name to use
301 * for a given original name, and it's possible that result matches
302 * a name already seen. This function checks for this, and if it
303 * occurs, finds another suitable obfuscated name to use.
305 * Each bit in the binary representation of the sequence number is
306 * used to select one possible "bit flip" operation to perform on
307 * the name. So for example:
308 * seq = 0: selects no bits to flip
309 * seq = 1: selects the 0th bit to flip
310 * seq = 2: selects the 1st bit to flip
311 * seq = 3: selects the 0th and 1st bit to flip
314 * The flip_bit() function takes care of the details of the bit
315 * flipping within the name. Note that the "1st bit" in this
316 * context is a bit sequence number; i.e. it doesn't necessarily
317 * mean bit 0x02 will be changed.
319 * If a valid name (one that contains no '/' or '\0' characters) is
320 * produced by this process for the given sequence number, this
321 * function returns 1. If the result is not valid, it returns 0.
322 * Returns -1 if the sequence number is beyond the the maximum for
323 * names of the given length.
328 * The number of alternates available for a given name is dependent
329 * on its length. A "bit flip" involves inverting two bits in
330 * a name--the two bits being selected such that their values
331 * affect the name's hash value in the same way. Alternates are
332 * thus generated by inverting the value of pairs of such
333 * "overlapping" bits in the original name. Each byte after the
334 * first in a name adds at least one bit of overlap to work with.
335 * (See comments above flip_bit() for more discussion on this.)
337 * So the number of alternates is dependent on the number of such
338 * overlapping bits in a name. If there are N bit overlaps, there
339 * 2^N alternates for that hash value.
341 * Here are the number of overlapping bits available for generating
342 * alternates for names of specific lengths:
343 * 1 0 (must have 2 bytes to have any overlap)
344 * 2 1 One bit overlaps--so 2 possible alternates
345 * 3 2 Two bits overlap--so 4 possible alternates
346 * 4 4 Three bits overlap, so 2^3 alternates
347 * 5 8 8 bits overlap (due to wrapping), 256 alternates
348 * 6 18 2^18 alternates
349 * 7 28 2^28 alternates
351 * It's clear that the number of alternates grows very quickly with
352 * the length of the name. But note that the set of alternates
353 * includes invalid names. And for certain (contrived) names, the
354 * number of valid names is a fairly small fraction of the total
355 * number of alternates.
357 * The main driver for this infrastructure for coming up with
358 * alternate names is really related to names 5 (or possibly 6)
359 * bytes in length. 5-byte obfuscated names contain no randomly-
360 * generated bytes in them, and the chance of an obfuscated name
361 * matching an already-seen name is too high to just ignore. This
362 * methodical selection of alternates ensures we don't produce
363 * duplicate names unless we have exhausted our options.
375 return 1; /* alternate 0 is the original name */
376 if (name_len
< 2) /* Must have 2 bytes to flip */
379 for (bitseq
= 0; bits
; bitseq
++) {
380 uint32_t mask
= 1 << bitseq
;
386 fb
= flip_bit(name_len
, name
, bitseq
);