flist.h fprint.h frag.h freesp.h hash.h help.h init.h inode.h input.h \
io.h logformat.h malloc.h metadump.h output.h print.h quit.h sb.h \
sig.h strvec.h text.h type.h write.h attrset.h symlink.h fsmap.h \
- fuzz.h
+ fuzz.h obfuscate.h
CFILES = $(HFILES:.h=.c) btdump.c btheight.c convert.c info.c namei.c \
timelimit.c
LSRCFILES = xfs_admin.sh xfs_ncheck.sh xfs_metadump.sh
#include "faddr.h"
#include "field.h"
#include "dir2.h"
+#include "obfuscate.h"
#define DEFAULT_MAX_EXT_SIZE XFS_MAX_BMBT_EXTLEN
return ent;
}
-#define is_invalid_char(c) ((c) == '/' || (c) == '\0')
-#define rol32(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
-
-static inline unsigned char
-random_filename_char(void)
-{
- static unsigned char filename_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789-_";
-
- return filename_alphabet[random() % (sizeof filename_alphabet - 1)];
-}
-
#define ORPHANAGE "lost+found"
#define ORPHANAGE_LEN (sizeof (ORPHANAGE) - 1)
return slen == namelen && !memcmp(name, s, namelen);
}
-/*
- * Given a name and its hash value, massage the name in such a way
- * that the result is another name of equal length which shares the
- * same hash value.
- */
-static void
-obfuscate_name(
- xfs_dahash_t hash,
- size_t name_len,
- unsigned char *name,
- bool is_dirent)
-{
- unsigned char *oldname = NULL;
- unsigned char *newp;
- int i;
- xfs_dahash_t new_hash;
- unsigned char *first;
- unsigned char high_bit;
- int tries = 0;
- bool is_ci_name = is_dirent && xfs_has_asciici(mp);
- int shift;
-
- /*
- * Our obfuscation algorithm requires at least 5-character
- * names, so don't bother if the name is too short. We
- * work backward from a hash value to determine the last
- * five bytes in a name required to produce a new name
- * with the same hash.
- */
- if (name_len < 5)
- return;
-
- if (is_ci_name) {
- oldname = malloc(name_len);
- if (!oldname)
- return;
- memcpy(oldname, name, name_len);
- }
-
-again:
- newp = name;
- new_hash = 0;
-
- /*
- * If we cannot generate a ci-compatible obfuscated name after 1000
- * tries, don't bother obfuscating the name.
- */
- if (tries++ > 1000) {
- memcpy(name, oldname, name_len);
- goto out_free;
- }
-
- /*
- * The beginning of the obfuscated name can be pretty much
- * anything, so fill it in with random characters.
- * Accumulate its new hash value as we go.
- */
- for (i = 0; i < name_len - 5; i++) {
- *newp = random_filename_char();
- if (is_ci_name)
- new_hash = xfs_ascii_ci_xfrm(*newp) ^
- rol32(new_hash, 7);
- else
- new_hash = *newp ^ rol32(new_hash, 7);
- newp++;
- }
-
- /*
- * Compute which five bytes need to be used at the end of
- * the name so the hash of the obfuscated name is the same
- * as the hash of the original. If any result in an invalid
- * character, flip a bit and arrange for a corresponding bit
- * in a neighboring byte to be flipped as well. For the
- * last byte, the "neighbor" to change is the first byte
- * we're computing here.
- */
- new_hash = rol32(new_hash, 3) ^ hash;
-
- first = newp;
- high_bit = 0;
- for (shift = 28; shift >= 0; shift -= 7) {
- *newp = (new_hash >> shift & 0x7f) ^ high_bit;
- if (is_invalid_char(*newp)) {
- *newp ^= 1;
- high_bit = 0x80;
- } else
- high_bit = 0;
-
- /*
- * If ascii-ci is enabled, uppercase characters are converted
- * to lowercase characters while computing the name hash. If
- * any of the necessary correction bytes are uppercase, the
- * hash of the new name will not match. Try again with a
- * different prefix.
- */
- if (is_ci_name && xfs_ascii_ci_need_xfrm(*newp))
- goto again;
-
- ASSERT(!is_invalid_char(*newp));
- newp++;
- }
-
- /*
- * If we flipped a bit on the last byte, we need to fix up
- * the matching bit in the first byte. The result will
- * be a valid character, because we know that first byte
- * has 0's in its upper four bits (it was produced by a
- * 28-bit right-shift of a 32-bit unsigned value).
- */
- if (high_bit) {
- *first ^= 0x10;
-
- if (is_ci_name && xfs_ascii_ci_need_xfrm(*first))
- goto again;
-
- ASSERT(!is_invalid_char(*first));
- }
-
-out_free:
- free(oldname);
-}
-
-/*
- * Flip a bit in each of two bytes at the end of the given name.
- * This is used in generating a series of alternate names to be used
- * in the event a duplicate is found.
- *
- * The bits flipped are selected such that they both affect the same
- * bit in the name's computed hash value, so flipping them both will
- * preserve the hash.
- *
- * The following diagram aims to show the portion of a computed
- * hash that a given byte of a name affects.
- *
- * 31 28 24 21 14 8 7 3 0
- * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
- * hash: | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
- * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
- * last-4 ->| |<-- last-2 --->| |<--- last ---->|
- * |<-- last-3 --->| |<-- last-1 --->| |<- last-4
- * |<-- last-7 --->| |<-- last-5 --->|
- * |<-- last-8 --->| |<-- last-6 --->|
- * . . . and so on
- *
- * The last byte of the name directly affects the low-order byte of
- * the hash. The next-to-last affects bits 7-14, the next one back
- * affects bits 14-21, and so on. The effect wraps around when it
- * goes beyond the top of the hash (as happens for byte last-4).
- *
- * Bits that are flipped together "overlap" on the hash value. As
- * an example of overlap, the last two bytes both affect bit 7 in
- * the hash. That pair of bytes (and their overlapping bits) can be
- * used for this "flip bit" operation (it's the first pair tried,
- * actually).
- *
- * A table defines overlapping pairs--the bytes involved and bits
- * within them--that can be used this way. The byte offset is
- * relative to a starting point within the name, which will be set
- * to affect the bytes at the end of the name. The function is
- * called with a "bitseq" value which indicates which bit flip is
- * desired, and this translates directly into selecting which entry
- * in the bit_to_flip[] table to apply.
- *
- * The function returns 1 if the operation was successful. It
- * returns 0 if the result produced a character that's not valid in
- * a name (either '/' or a '\0'). Finally, it returns -1 if the bit
- * sequence number is beyond what is supported for a name of this
- * length.
- *
- * Discussion
- * ----------
- * (Also see the discussion above find_alternate(), below.)
- *
- * In order to make this function work for any length name, the
- * table is ordered by increasing byte offset, so that the earliest
- * entries can apply to the shortest strings. This way all names
- * are done consistently.
- *
- * When bit flips occur, they can convert printable characters
- * into non-printable ones. In an effort to reduce the impact of
- * this, the first bit flips are chosen to affect bytes the end of
- * the name (and furthermore, toward the low bits of a byte). Those
- * bytes are often non-printable anyway because of the way they are
- * initially selected by obfuscate_name()). This is accomplished,
- * using later table entries first.
- *
- * Each row in the table doubles the number of alternates that
- * can be generated. A two-byte name is limited to using only
- * the first row, so it's possible to generate two alternates
- * (the original name, plus the alternate produced by flipping
- * the one pair of bits). In a 5-byte name, the effect of the
- * first byte overlaps the last by 4 its, and there are 8 bits
- * to flip, allowing for 256 possible alternates.
- *
- * Short names (less than 5 bytes) are never even obfuscated, so for
- * such names the relatively small number of alternates should never
- * really be a problem.
- *
- * Long names (more than 6 bytes, say) are not likely to exhaust
- * the number of available alternates. In fact, the table could
- * probably have stopped at 8 entries, on the assumption that 256
- * alternates should be enough for most any situation. The entries
- * beyond those are present mostly for demonstration of how it could
- * be populated with more entries, should it ever be necessary to do
- * so.
- */
-static int
-flip_bit(
- size_t name_len,
- unsigned char *name,
- uint32_t bitseq)
-{
- int index;
- size_t offset;
- unsigned char *p0, *p1;
- unsigned char m0, m1;
- struct {
- int byte; /* Offset from start within name */
- unsigned char bit; /* Bit within that byte */
- } bit_to_flip[][2] = { /* Sorted by second entry's byte */
- { { 0, 0 }, { 1, 7 } }, /* Each row defines a pair */
- { { 1, 0 }, { 2, 7 } }, /* of bytes and a bit within */
- { { 2, 0 }, { 3, 7 } }, /* each byte. Each bit in */
- { { 0, 4 }, { 4, 0 } }, /* a pair affects the same */
- { { 0, 5 }, { 4, 1 } }, /* bit in the hash, so flipping */
- { { 0, 6 }, { 4, 2 } }, /* both will change the name */
- { { 0, 7 }, { 4, 3 } }, /* while preserving the hash. */
- { { 3, 0 }, { 4, 7 } },
- { { 0, 0 }, { 5, 3 } }, /* The first entry's byte offset */
- { { 0, 1 }, { 5, 4 } }, /* must be less than the second. */
- { { 0, 2 }, { 5, 5 } },
- { { 0, 3 }, { 5, 6 } }, /* The table can be extended to */
- { { 0, 4 }, { 5, 7 } }, /* an arbitrary number of entries */
- { { 4, 0 }, { 5, 7 } }, /* but there's not much point. */
- /* . . . */
- };
-
- /* Find the first entry *not* usable for name of this length */
-
- for (index = 0; index < ARRAY_SIZE(bit_to_flip); index++)
- if (bit_to_flip[index][1].byte >= name_len)
- break;
-
- /*
- * Back up to the last usable entry. If that number is
- * smaller than the bit sequence number, inform the caller
- * that nothing this large (or larger) will work.
- */
- if (bitseq > --index)
- return -1;
-
- /*
- * We will be switching bits at the end of name, with a
- * preference for affecting the last bytes first. Compute
- * where in the name we'll start applying the changes.
- */
- offset = name_len - (bit_to_flip[index][1].byte + 1);
- index -= bitseq; /* Use later table entries first */
-
- p0 = name + offset + bit_to_flip[index][0].byte;
- p1 = name + offset + bit_to_flip[index][1].byte;
- m0 = 1 << bit_to_flip[index][0].bit;
- m1 = 1 << bit_to_flip[index][1].bit;
-
- /* Only change the bytes if it produces valid characters */
-
- if (is_invalid_char(*p0 ^ m0) || is_invalid_char(*p1 ^ m1))
- return 0;
-
- *p0 ^= m0;
- *p1 ^= m1;
-
- return 1;
-}
-
-/*
- * This function generates a well-defined sequence of "alternate"
- * names for a given name. An alternate is a name having the same
- * length and same hash value as the original name. This is needed
- * because the algorithm produces only one obfuscated name to use
- * for a given original name, and it's possible that result matches
- * a name already seen. This function checks for this, and if it
- * occurs, finds another suitable obfuscated name to use.
- *
- * Each bit in the binary representation of the sequence number is
- * used to select one possible "bit flip" operation to perform on
- * the name. So for example:
- * seq = 0: selects no bits to flip
- * seq = 1: selects the 0th bit to flip
- * seq = 2: selects the 1st bit to flip
- * seq = 3: selects the 0th and 1st bit to flip
- * ... and so on.
- *
- * The flip_bit() function takes care of the details of the bit
- * flipping within the name. Note that the "1st bit" in this
- * context is a bit sequence number; i.e. it doesn't necessarily
- * mean bit 0x02 will be changed.
- *
- * If a valid name (one that contains no '/' or '\0' characters) is
- * produced by this process for the given sequence number, this
- * function returns 1. If the result is not valid, it returns 0.
- * Returns -1 if the sequence number is beyond the the maximum for
- * names of the given length.
- *
- *
- * Discussion
- * ----------
- * The number of alternates available for a given name is dependent
- * on its length. A "bit flip" involves inverting two bits in
- * a name--the two bits being selected such that their values
- * affect the name's hash value in the same way. Alternates are
- * thus generated by inverting the value of pairs of such
- * "overlapping" bits in the original name. Each byte after the
- * first in a name adds at least one bit of overlap to work with.
- * (See comments above flip_bit() for more discussion on this.)
- *
- * So the number of alternates is dependent on the number of such
- * overlapping bits in a name. If there are N bit overlaps, there
- * 2^N alternates for that hash value.
- *
- * Here are the number of overlapping bits available for generating
- * alternates for names of specific lengths:
- * 1 0 (must have 2 bytes to have any overlap)
- * 2 1 One bit overlaps--so 2 possible alternates
- * 3 2 Two bits overlap--so 4 possible alternates
- * 4 4 Three bits overlap, so 2^3 alternates
- * 5 8 8 bits overlap (due to wrapping), 256 alternates
- * 6 18 2^18 alternates
- * 7 28 2^28 alternates
- * ...
- * It's clear that the number of alternates grows very quickly with
- * the length of the name. But note that the set of alternates
- * includes invalid names. And for certain (contrived) names, the
- * number of valid names is a fairly small fraction of the total
- * number of alternates.
- *
- * The main driver for this infrastructure for coming up with
- * alternate names is really related to names 5 (or possibly 6)
- * bytes in length. 5-byte obfuscated names contain no randomly-
- * generated bytes in them, and the chance of an obfuscated name
- * matching an already-seen name is too high to just ignore. This
- * methodical selection of alternates ensures we don't produce
- * duplicate names unless we have exhausted our options.
- */
-static int
-find_alternate(
- size_t name_len,
- unsigned char *name,
- uint32_t seq)
-{
- uint32_t bitseq = 0;
- uint32_t bits = seq;
-
- if (!seq)
- return 1; /* alternate 0 is the original name */
- if (name_len < 2) /* Must have 2 bytes to flip */
- return -1;
-
- for (bitseq = 0; bits; bitseq++) {
- uint32_t mask = 1 << bitseq;
- int fb;
-
- if (!(bits & mask))
- continue;
-
- fb = flip_bit(name_len, name, bitseq);
- if (fb < 1)
- return fb ? -1 : 0;
- bits ^= mask;
- }
-
- return 1;
-}
-
/*
* Look up the given name in the name table. If it is already
* present, iterate through a well-defined sequence of alternate
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2007, 2011 SGI
+ * All Rights Reserved.
+ */
+#include "libxfs.h"
+#include "init.h"
+#include "obfuscate.h"
+
+static inline unsigned char
+random_filename_char(void)
+{
+ static unsigned char filename_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_";
+
+ return filename_alphabet[random() % (sizeof filename_alphabet - 1)];
+}
+
+#define rol32(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
+
+/*
+ * Given a name and its hash value, massage the name in such a way
+ * that the result is another name of equal length which shares the
+ * same hash value.
+ */
+void
+obfuscate_name(
+ xfs_dahash_t hash,
+ size_t name_len,
+ unsigned char *name,
+ bool is_dirent)
+{
+ unsigned char *oldname = NULL;
+ unsigned char *newp;
+ int i;
+ xfs_dahash_t new_hash;
+ unsigned char *first;
+ unsigned char high_bit;
+ int tries = 0;
+ bool is_ci_name = is_dirent && xfs_has_asciici(mp);
+ int shift;
+
+ /*
+ * Our obfuscation algorithm requires at least 5-character
+ * names, so don't bother if the name is too short. We
+ * work backward from a hash value to determine the last
+ * five bytes in a name required to produce a new name
+ * with the same hash.
+ */
+ if (name_len < 5)
+ return;
+
+ if (is_ci_name) {
+ oldname = malloc(name_len);
+ if (!oldname)
+ return;
+ memcpy(oldname, name, name_len);
+ }
+
+again:
+ newp = name;
+ new_hash = 0;
+
+ /*
+ * If we cannot generate a ci-compatible obfuscated name after 1000
+ * tries, don't bother obfuscating the name.
+ */
+ if (tries++ > 1000) {
+ memcpy(name, oldname, name_len);
+ goto out_free;
+ }
+
+ /*
+ * The beginning of the obfuscated name can be pretty much
+ * anything, so fill it in with random characters.
+ * Accumulate its new hash value as we go.
+ */
+ for (i = 0; i < name_len - 5; i++) {
+ *newp = random_filename_char();
+ if (is_ci_name)
+ new_hash = xfs_ascii_ci_xfrm(*newp) ^
+ rol32(new_hash, 7);
+ else
+ new_hash = *newp ^ rol32(new_hash, 7);
+ newp++;
+ }
+
+ /*
+ * Compute which five bytes need to be used at the end of
+ * the name so the hash of the obfuscated name is the same
+ * as the hash of the original. If any result in an invalid
+ * character, flip a bit and arrange for a corresponding bit
+ * in a neighboring byte to be flipped as well. For the
+ * last byte, the "neighbor" to change is the first byte
+ * we're computing here.
+ */
+ new_hash = rol32(new_hash, 3) ^ hash;
+
+ first = newp;
+ high_bit = 0;
+ for (shift = 28; shift >= 0; shift -= 7) {
+ *newp = (new_hash >> shift & 0x7f) ^ high_bit;
+ if (is_invalid_char(*newp)) {
+ *newp ^= 1;
+ high_bit = 0x80;
+ } else
+ high_bit = 0;
+
+ /*
+ * If ascii-ci is enabled, uppercase characters are converted
+ * to lowercase characters while computing the name hash. If
+ * any of the necessary correction bytes are uppercase, the
+ * hash of the new name will not match. Try again with a
+ * different prefix.
+ */
+ if (is_ci_name && xfs_ascii_ci_need_xfrm(*newp))
+ goto again;
+
+ ASSERT(!is_invalid_char(*newp));
+ newp++;
+ }
+
+ /*
+ * If we flipped a bit on the last byte, we need to fix up
+ * the matching bit in the first byte. The result will
+ * be a valid character, because we know that first byte
+ * has 0's in its upper four bits (it was produced by a
+ * 28-bit right-shift of a 32-bit unsigned value).
+ */
+ if (high_bit) {
+ *first ^= 0x10;
+
+ if (is_ci_name && xfs_ascii_ci_need_xfrm(*first))
+ goto again;
+
+ ASSERT(!is_invalid_char(*first));
+ }
+out_free:
+ free(oldname);
+}
+
+/*
+ * Flip a bit in each of two bytes at the end of the given name.
+ * This is used in generating a series of alternate names to be used
+ * in the event a duplicate is found.
+ *
+ * The bits flipped are selected such that they both affect the same
+ * bit in the name's computed hash value, so flipping them both will
+ * preserve the hash.
+ *
+ * The following diagram aims to show the portion of a computed
+ * hash that a given byte of a name affects.
+ *
+ * 31 28 24 21 14 8 7 3 0
+ * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
+ * hash: | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ * +-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-|-+-+-+-+-+-+-+-+
+ * last-4 ->| |<-- last-2 --->| |<--- last ---->|
+ * |<-- last-3 --->| |<-- last-1 --->| |<- last-4
+ * |<-- last-7 --->| |<-- last-5 --->|
+ * |<-- last-8 --->| |<-- last-6 --->|
+ * . . . and so on
+ *
+ * The last byte of the name directly affects the low-order byte of
+ * the hash. The next-to-last affects bits 7-14, the next one back
+ * affects bits 14-21, and so on. The effect wraps around when it
+ * goes beyond the top of the hash (as happens for byte last-4).
+ *
+ * Bits that are flipped together "overlap" on the hash value. As
+ * an example of overlap, the last two bytes both affect bit 7 in
+ * the hash. That pair of bytes (and their overlapping bits) can be
+ * used for this "flip bit" operation (it's the first pair tried,
+ * actually).
+ *
+ * A table defines overlapping pairs--the bytes involved and bits
+ * within them--that can be used this way. The byte offset is
+ * relative to a starting point within the name, which will be set
+ * to affect the bytes at the end of the name. The function is
+ * called with a "bitseq" value which indicates which bit flip is
+ * desired, and this translates directly into selecting which entry
+ * in the bit_to_flip[] table to apply.
+ *
+ * The function returns 1 if the operation was successful. It
+ * returns 0 if the result produced a character that's not valid in
+ * a name (either '/' or a '\0'). Finally, it returns -1 if the bit
+ * sequence number is beyond what is supported for a name of this
+ * length.
+ *
+ * Discussion
+ * ----------
+ * (Also see the discussion above find_alternate(), below.)
+ *
+ * In order to make this function work for any length name, the
+ * table is ordered by increasing byte offset, so that the earliest
+ * entries can apply to the shortest strings. This way all names
+ * are done consistently.
+ *
+ * When bit flips occur, they can convert printable characters
+ * into non-printable ones. In an effort to reduce the impact of
+ * this, the first bit flips are chosen to affect bytes the end of
+ * the name (and furthermore, toward the low bits of a byte). Those
+ * bytes are often non-printable anyway because of the way they are
+ * initially selected by obfuscate_name()). This is accomplished,
+ * using later table entries first.
+ *
+ * Each row in the table doubles the number of alternates that
+ * can be generated. A two-byte name is limited to using only
+ * the first row, so it's possible to generate two alternates
+ * (the original name, plus the alternate produced by flipping
+ * the one pair of bits). In a 5-byte name, the effect of the
+ * first byte overlaps the last by 4 its, and there are 8 bits
+ * to flip, allowing for 256 possible alternates.
+ *
+ * Short names (less than 5 bytes) are never even obfuscated, so for
+ * such names the relatively small number of alternates should never
+ * really be a problem.
+ *
+ * Long names (more than 6 bytes, say) are not likely to exhaust
+ * the number of available alternates. In fact, the table could
+ * probably have stopped at 8 entries, on the assumption that 256
+ * alternates should be enough for most any situation. The entries
+ * beyond those are present mostly for demonstration of how it could
+ * be populated with more entries, should it ever be necessary to do
+ * so.
+ */
+static int
+flip_bit(
+ size_t name_len,
+ unsigned char *name,
+ uint32_t bitseq)
+{
+ int index;
+ size_t offset;
+ unsigned char *p0, *p1;
+ unsigned char m0, m1;
+ struct {
+ int byte; /* Offset from start within name */
+ unsigned char bit; /* Bit within that byte */
+ } bit_to_flip[][2] = { /* Sorted by second entry's byte */
+ { { 0, 0 }, { 1, 7 } }, /* Each row defines a pair */
+ { { 1, 0 }, { 2, 7 } }, /* of bytes and a bit within */
+ { { 2, 0 }, { 3, 7 } }, /* each byte. Each bit in */
+ { { 0, 4 }, { 4, 0 } }, /* a pair affects the same */
+ { { 0, 5 }, { 4, 1 } }, /* bit in the hash, so flipping */
+ { { 0, 6 }, { 4, 2 } }, /* both will change the name */
+ { { 0, 7 }, { 4, 3 } }, /* while preserving the hash. */
+ { { 3, 0 }, { 4, 7 } },
+ { { 0, 0 }, { 5, 3 } }, /* The first entry's byte offset */
+ { { 0, 1 }, { 5, 4 } }, /* must be less than the second. */
+ { { 0, 2 }, { 5, 5 } },
+ { { 0, 3 }, { 5, 6 } }, /* The table can be extended to */
+ { { 0, 4 }, { 5, 7 } }, /* an arbitrary number of entries */
+ { { 4, 0 }, { 5, 7 } }, /* but there's not much point. */
+ /* . . . */
+ };
+
+ /* Find the first entry *not* usable for name of this length */
+
+ for (index = 0; index < ARRAY_SIZE(bit_to_flip); index++)
+ if (bit_to_flip[index][1].byte >= name_len)
+ break;
+
+ /*
+ * Back up to the last usable entry. If that number is
+ * smaller than the bit sequence number, inform the caller
+ * that nothing this large (or larger) will work.
+ */
+ if (bitseq > --index)
+ return -1;
+
+ /*
+ * We will be switching bits at the end of name, with a
+ * preference for affecting the last bytes first. Compute
+ * where in the name we'll start applying the changes.
+ */
+ offset = name_len - (bit_to_flip[index][1].byte + 1);
+ index -= bitseq; /* Use later table entries first */
+
+ p0 = name + offset + bit_to_flip[index][0].byte;
+ p1 = name + offset + bit_to_flip[index][1].byte;
+ m0 = 1 << bit_to_flip[index][0].bit;
+ m1 = 1 << bit_to_flip[index][1].bit;
+
+ /* Only change the bytes if it produces valid characters */
+
+ if (is_invalid_char(*p0 ^ m0) || is_invalid_char(*p1 ^ m1))
+ return 0;
+
+ *p0 ^= m0;
+ *p1 ^= m1;
+
+ return 1;
+}
+
+/*
+ * This function generates a well-defined sequence of "alternate"
+ * names for a given name. An alternate is a name having the same
+ * length and same hash value as the original name. This is needed
+ * because the algorithm produces only one obfuscated name to use
+ * for a given original name, and it's possible that result matches
+ * a name already seen. This function checks for this, and if it
+ * occurs, finds another suitable obfuscated name to use.
+ *
+ * Each bit in the binary representation of the sequence number is
+ * used to select one possible "bit flip" operation to perform on
+ * the name. So for example:
+ * seq = 0: selects no bits to flip
+ * seq = 1: selects the 0th bit to flip
+ * seq = 2: selects the 1st bit to flip
+ * seq = 3: selects the 0th and 1st bit to flip
+ * ... and so on.
+ *
+ * The flip_bit() function takes care of the details of the bit
+ * flipping within the name. Note that the "1st bit" in this
+ * context is a bit sequence number; i.e. it doesn't necessarily
+ * mean bit 0x02 will be changed.
+ *
+ * If a valid name (one that contains no '/' or '\0' characters) is
+ * produced by this process for the given sequence number, this
+ * function returns 1. If the result is not valid, it returns 0.
+ * Returns -1 if the sequence number is beyond the the maximum for
+ * names of the given length.
+ *
+ *
+ * Discussion
+ * ----------
+ * The number of alternates available for a given name is dependent
+ * on its length. A "bit flip" involves inverting two bits in
+ * a name--the two bits being selected such that their values
+ * affect the name's hash value in the same way. Alternates are
+ * thus generated by inverting the value of pairs of such
+ * "overlapping" bits in the original name. Each byte after the
+ * first in a name adds at least one bit of overlap to work with.
+ * (See comments above flip_bit() for more discussion on this.)
+ *
+ * So the number of alternates is dependent on the number of such
+ * overlapping bits in a name. If there are N bit overlaps, there
+ * 2^N alternates for that hash value.
+ *
+ * Here are the number of overlapping bits available for generating
+ * alternates for names of specific lengths:
+ * 1 0 (must have 2 bytes to have any overlap)
+ * 2 1 One bit overlaps--so 2 possible alternates
+ * 3 2 Two bits overlap--so 4 possible alternates
+ * 4 4 Three bits overlap, so 2^3 alternates
+ * 5 8 8 bits overlap (due to wrapping), 256 alternates
+ * 6 18 2^18 alternates
+ * 7 28 2^28 alternates
+ * ...
+ * It's clear that the number of alternates grows very quickly with
+ * the length of the name. But note that the set of alternates
+ * includes invalid names. And for certain (contrived) names, the
+ * number of valid names is a fairly small fraction of the total
+ * number of alternates.
+ *
+ * The main driver for this infrastructure for coming up with
+ * alternate names is really related to names 5 (or possibly 6)
+ * bytes in length. 5-byte obfuscated names contain no randomly-
+ * generated bytes in them, and the chance of an obfuscated name
+ * matching an already-seen name is too high to just ignore. This
+ * methodical selection of alternates ensures we don't produce
+ * duplicate names unless we have exhausted our options.
+ */
+int
+find_alternate(
+ size_t name_len,
+ unsigned char *name,
+ uint32_t seq)
+{
+ uint32_t bitseq = 0;
+ uint32_t bits = seq;
+
+ if (!seq)
+ return 1; /* alternate 0 is the original name */
+ if (name_len < 2) /* Must have 2 bytes to flip */
+ return -1;
+
+ for (bitseq = 0; bits; bitseq++) {
+ uint32_t mask = 1 << bitseq;
+ int fb;
+
+ if (!(bits & mask))
+ continue;
+
+ fb = flip_bit(name_len, name, bitseq);
+ if (fb < 1)
+ return fb ? -1 : 0;
+ bits ^= mask;
+ }
+
+ return 1;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2007, 2011 SGI
+ * All Rights Reserved.
+ */
+#ifndef __DB_OBFUSCATE_H__
+#define __DB_OBFUSCATE_H__
+
+/* Routines to obfuscate directory filenames and xattr names. */
+
+#define is_invalid_char(c) ((c) == '/' || (c) == '\0')
+
+void obfuscate_name(xfs_dahash_t hash, size_t name_len, unsigned char *name,
+ bool is_dirent);
+int find_alternate(size_t name_len, unsigned char *name, uint32_t seq);
+
+#endif /* __DB_OBFUSCATE_H__ */