#include "ewah/ewok.h"
#include "fsmonitor.h"
#include "fsmonitor-ipc.h"
+#include "name-hash.h"
#include "run-command.h"
#include "strbuf.h"
#include "trace2.h"
return result;
}
-static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
+/*
+ * Invalidate the FSM bit on this CE. This is like mark_fsmonitor_invalid()
+ * but we've already handled the untracked-cache, so let's not repeat that
+ * work. This also lets us have a different trace message so that we can
+ * see everything that was done as part of the refresh-callback.
+ */
+static void invalidate_ce_fsm(struct cache_entry *ce)
{
- int i, len = strlen(name);
- int pos = index_name_pos(istate, name, len);
+ if (ce->ce_flags & CE_FSMONITOR_VALID) {
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback INV: '%s'",
+ ce->name);
+ ce->ce_flags &= ~CE_FSMONITOR_VALID;
+ }
+}
+
+static size_t handle_path_with_trailing_slash(
+ struct index_state *istate, const char *name, int pos);
+
+/*
+ * Use the name-hash to do a case-insensitive cache-entry lookup with
+ * the pathname and invalidate the cache-entry.
+ *
+ * Returns the number of cache-entries that we invalidated.
+ */
+static size_t handle_using_name_hash_icase(
+ struct index_state *istate, const char *name)
+{
+ struct cache_entry *ce = NULL;
+
+ ce = index_file_exists(istate, name, strlen(name), 1);
+ if (!ce)
+ return 0;
+ /*
+ * A case-insensitive search in the name-hash using the
+ * observed pathname found a cache-entry, so the observed path
+ * is case-incorrect. Invalidate the cache-entry and use the
+ * correct spelling from the cache-entry to invalidate the
+ * untracked-cache. Since we now have sparse-directories in
+ * the index, the observed pathname may represent a regular
+ * file or a sparse-index directory.
+ *
+ * Note that we should not have seen FSEvents for a
+ * sparse-index directory, but we handle it just in case.
+ *
+ * Either way, we know that there are not any cache-entries for
+ * children inside the cone of the directory, so we don't need to
+ * do the usual scan.
+ */
trace_printf_key(&trace_fsmonitor,
- "fsmonitor_refresh_callback '%s' (pos %d)",
- name, pos);
+ "fsmonitor_refresh_callback MAP: '%s' '%s'",
+ name, ce->name);
- if (name[len - 1] == '/') {
- /*
- * The daemon can decorate directory events, such as
- * moves or renames, with a trailing slash if the OS
- * FS Event contains sufficient information, such as
- * MacOS.
- *
- * Use this to invalidate the entire cone under that
- * directory.
- *
- * We do not expect an exact match because the index
- * does not normally contain directory entries, so we
- * start at the insertion point and scan.
- */
- if (pos < 0)
- pos = -pos - 1;
+ /*
+ * NEEDSWORK: We used the name-hash to find the correct
+ * case-spelling of the pathname in the cache-entry[], so
+ * technically this is a tracked file or a sparse-directory.
+ * It should not have any entries in the untracked-cache, so
+ * we should not need to use the case-corrected spelling to
+ * invalidate the the untracked-cache. So we may not need to
+ * do this. For now, I'm going to be conservative and always
+ * do it; we can revisit this later.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, ce->name, 0);
- /* Mark all entries for the folder invalid */
- for (i = pos; i < istate->cache_nr; i++) {
- if (!starts_with(istate->cache[i]->name, name))
- break;
- istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
- }
+ invalidate_ce_fsm(ce);
+ return 1;
+}
+
+/*
+ * Use the dir-name-hash to find the correct-case spelling of the
+ * directory. Use the canonical spelling to invalidate all of the
+ * cache-entries within the matching cone.
+ *
+ * Returns the number of cache-entries that we invalidated.
+ */
+static size_t handle_using_dir_name_hash_icase(
+ struct index_state *istate, const char *name)
+{
+ struct strbuf canonical_path = STRBUF_INIT;
+ int pos;
+ size_t len = strlen(name);
+ size_t nr_in_cone;
+
+ if (name[len - 1] == '/')
+ len--;
+
+ if (!index_dir_find(istate, name, len, &canonical_path))
+ return 0; /* name is untracked */
+ if (!memcmp(name, canonical_path.buf, canonical_path.len)) {
+ strbuf_release(&canonical_path);
/*
- * We need to remove the traling "/" from the path
- * for the untracked cache.
+ * NEEDSWORK: Our caller already tried an exact match
+ * and failed to find one. They called us to do an
+ * ICASE match, so we should never get an exact match,
+ * so we could promote this to a BUG() here if we
+ * wanted to. It doesn't hurt anything to just return
+ * 0 and go on because we should never get here. Or we
+ * could just get rid of the memcmp() and this "if"
+ * clause completely.
*/
- name[len - 1] = '\0';
- } else if (pos >= 0) {
+ BUG("handle_using_dir_name_hash_icase(%s) did not exact match",
+ name);
+ }
+
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback MAP: '%s' '%s'",
+ name, canonical_path.buf);
+
+ /*
+ * The dir-name-hash only tells us the corrected spelling of
+ * the prefix. We have to use this canonical path to do a
+ * lookup in the cache-entry array so that we repeat the
+ * original search using the case-corrected spelling.
+ */
+ strbuf_addch(&canonical_path, '/');
+ pos = index_name_pos(istate, canonical_path.buf,
+ canonical_path.len);
+ nr_in_cone = handle_path_with_trailing_slash(
+ istate, canonical_path.buf, pos);
+ strbuf_release(&canonical_path);
+ return nr_in_cone;
+}
+
+/*
+ * The daemon sent an observed pathname without a trailing slash.
+ * (This is the normal case.) We do not know if it is a tracked or
+ * untracked file, a sparse-directory, or a populated directory (on a
+ * platform such as Windows where FSEvents are not qualified).
+ *
+ * The pathname contains the observed case reported by the FS. We
+ * do not know it is case-correct or -incorrect.
+ *
+ * Assume it is case-correct and try an exact match.
+ *
+ * Return the number of cache-entries that we invalidated.
+ */
+static size_t handle_path_without_trailing_slash(
+ struct index_state *istate, const char *name, int pos)
+{
+ /*
+ * Mark the untracked cache dirty for this path (regardless of
+ * whether or not we find an exact match for it in the index).
+ * Since the path is unqualified (no trailing slash hint in the
+ * FSEvent), it may refer to a file or directory. So we should
+ * not assume one or the other and should always let the untracked
+ * cache decide what needs to invalidated.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, name, 0);
+
+ if (pos >= 0) {
/*
- * We have an exact match for this path and can just
- * invalidate it.
+ * An exact match on a tracked file. We assume that we
+ * do not need to scan forward for a sparse-directory
+ * cache-entry with the same pathname, nor for a cone
+ * at that directory. (That is, assume no D/F conflicts.)
*/
- istate->cache[pos]->ce_flags &= ~CE_FSMONITOR_VALID;
+ invalidate_ce_fsm(istate->cache[pos]);
+ return 1;
} else {
+ size_t nr_in_cone;
+ struct strbuf work_path = STRBUF_INIT;
+
/*
- * The path is not a tracked file -or- it is a
- * directory event on a platform that cannot
- * distinguish between file and directory events in
- * the event handler, such as Windows.
- *
- * Scan as if it is a directory and invalidate the
- * cone under it. (But remember to ignore items
- * between "name" and "name/", such as "name-" and
- * "name.".
+ * The negative "pos" gives us the suggested insertion
+ * point for the pathname (without the trailing slash).
+ * We need to see if there is a directory with that
+ * prefix, but there can be lots of pathnames between
+ * "foo" and "foo/" like "foo-" or "foo-bar", so we
+ * don't want to do our own scan.
*/
+ strbuf_add(&work_path, name, strlen(name));
+ strbuf_addch(&work_path, '/');
+ pos = index_name_pos(istate, work_path.buf, work_path.len);
+ nr_in_cone = handle_path_with_trailing_slash(
+ istate, work_path.buf, pos);
+ strbuf_release(&work_path);
+ return nr_in_cone;
+ }
+}
+
+/*
+ * The daemon can decorate directory events, such as a move or rename,
+ * by adding a trailing slash to the observed name. Use this to
+ * explicitly invalidate the entire cone under that directory.
+ *
+ * The daemon can only reliably do that if the OS FSEvent contains
+ * sufficient information in the event.
+ *
+ * macOS FSEvents have enough information.
+ *
+ * Other platforms may or may not be able to do it (and it might
+ * depend on the type of event (for example, a daemon could lstat() an
+ * observed pathname after a rename, but not after a delete)).
+ *
+ * If we find an exact match in the index for a path with a trailing
+ * slash, it means that we matched a sparse-index directory in a
+ * cone-mode sparse-checkout (since that's the only time we have
+ * directories in the index). We should never see this in practice
+ * (because sparse directories should not be present and therefore
+ * not generating FS events). Either way, we can treat them in the
+ * same way and just invalidate the cache-entry and the untracked
+ * cache (and in this case, the forward cache-entry scan won't find
+ * anything and it doesn't hurt to let it run).
+ *
+ * Return the number of cache-entries that we invalidated. We will
+ * use this later to determine if we need to attempt a second
+ * case-insensitive search on case-insensitive file systems. That is,
+ * if the search using the observed-case in the FSEvent yields any
+ * results, we assume the prefix is case-correct. If there are no
+ * matches, we still don't know if the observed path is simply
+ * untracked or case-incorrect.
+ */
+static size_t handle_path_with_trailing_slash(
+ struct index_state *istate, const char *name, int pos)
+{
+ int i;
+ size_t nr_in_cone = 0;
+
+ /*
+ * Mark the untracked cache dirty for this directory path
+ * (regardless of whether or not we find an exact match for it
+ * in the index or find it to be proper prefix of one or more
+ * files in the index), since the FSEvent is hinting that
+ * there may be changes on or within the directory.
+ */
+ untracked_cache_invalidate_trimmed_path(istate, name, 0);
+
+ if (pos < 0)
pos = -pos - 1;
- for (i = pos; i < istate->cache_nr; i++) {
- if (!starts_with(istate->cache[i]->name, name))
- break;
- if ((unsigned char)istate->cache[i]->name[len] > '/')
- break;
- if (istate->cache[i]->name[len] == '/')
- istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
- }
+ /* Mark all entries for the folder invalid */
+ for (i = pos; i < istate->cache_nr; i++) {
+ if (!starts_with(istate->cache[i]->name, name))
+ break;
+ invalidate_ce_fsm(istate->cache[i]);
+ nr_in_cone++;
}
+ return nr_in_cone;
+}
+
+static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
+{
+ int len = strlen(name);
+ int pos = index_name_pos(istate, name, len);
+ size_t nr_in_cone;
+
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback '%s' (pos %d)",
+ name, pos);
+
+ if (name[len - 1] == '/')
+ nr_in_cone = handle_path_with_trailing_slash(istate, name, pos);
+ else
+ nr_in_cone = handle_path_without_trailing_slash(istate, name, pos);
+
/*
- * Mark the untracked cache dirty even if it wasn't found in the index
- * as it could be a new untracked file.
+ * If we did not find an exact match for this pathname or any
+ * cache-entries with this directory prefix and we're on a
+ * case-insensitive file system, try again using the name-hash
+ * and dir-name-hash.
*/
- untracked_cache_invalidate_path(istate, name, 0);
+ if (!nr_in_cone && ignore_case) {
+ nr_in_cone = handle_using_name_hash_icase(istate, name);
+ if (!nr_in_cone)
+ nr_in_cone = handle_using_dir_name_hash_icase(
+ istate, name);
+ }
+
+ if (nr_in_cone)
+ trace_printf_key(&trace_fsmonitor,
+ "fsmonitor_refresh_callback CNT: %d",
+ (int)nr_in_cone);
}
/*
)
'
+# The FSMonitor daemon reports the OBSERVED pathname of modified files
+# and thus contains the OBSERVED spelling on case-insensitive file
+# systems. The daemon does not (and should not) load the .git/index
+# file and therefore does not know the expected case-spelling. Since
+# it is possible for the user to create files/subdirectories with the
+# incorrect case, a modified file event for a tracked will not have
+# the EXPECTED case. This can cause `index_name_pos()` to incorrectly
+# report that the file is untracked. This causes the client to fail to
+# mark the file as possibly dirty (keeping the CE_FSMONITOR_VALID bit
+# set) so that `git status` will avoid inspecting it and thus not
+# present in the status output.
+#
+# The setup is a little contrived.
+#
+test_expect_success CASE_INSENSITIVE_FS 'fsmonitor subdir case wrong on disk' '
+ test_when_finished "stop_daemon_delete_repo subdir_case_wrong" &&
+
+ git init subdir_case_wrong &&
+ (
+ cd subdir_case_wrong &&
+ echo x >AAA &&
+ echo x >BBB &&
+
+ mkdir dir1 &&
+ echo x >dir1/file1 &&
+ mkdir dir1/dir2 &&
+ echo x >dir1/dir2/file2 &&
+ mkdir dir1/dir2/dir3 &&
+ echo x >dir1/dir2/dir3/file3 &&
+
+ echo x >yyy &&
+ echo x >zzz &&
+ git add . &&
+ git commit -m "data" &&
+
+ # This will cause "dir1/" and everything under it
+ # to be deleted.
+ git sparse-checkout set --cone --sparse-index &&
+
+ # Create dir2 with the wrong case and then let Git
+ # repopulate dir3 -- it will not correct the spelling
+ # of dir2.
+ mkdir dir1 &&
+ mkdir dir1/DIR2 &&
+ git sparse-checkout add dir1/dir2/dir3
+ ) &&
+
+ start_daemon -C subdir_case_wrong --tf "$PWD/subdir_case_wrong.trace" &&
+
+ # Enable FSMonitor in the client. Run enough commands for
+ # the .git/index to sync up with the daemon with everything
+ # marked clean.
+ git -C subdir_case_wrong config core.fsmonitor true &&
+ git -C subdir_case_wrong update-index --fsmonitor &&
+ git -C subdir_case_wrong status &&
+
+ # Make some files dirty so that FSMonitor gets FSEvents for
+ # each of them.
+ echo xx >>subdir_case_wrong/AAA &&
+ echo xx >>subdir_case_wrong/dir1/DIR2/dir3/file3 &&
+ echo xx >>subdir_case_wrong/zzz &&
+
+ GIT_TRACE_FSMONITOR="$PWD/subdir_case_wrong.log" \
+ git -C subdir_case_wrong --no-optional-locks status --short \
+ >"$PWD/subdir_case_wrong.out" &&
+
+ # "git status" should have gotten file events for each of
+ # the 3 files.
+ #
+ # "dir2" should be in the observed case on disk.
+ grep "fsmonitor_refresh_callback" \
+ <"$PWD/subdir_case_wrong.log" \
+ >"$PWD/subdir_case_wrong.log1" &&
+
+ grep -q "AAA.*pos 0" "$PWD/subdir_case_wrong.log1" &&
+ grep -q "zzz.*pos 6" "$PWD/subdir_case_wrong.log1" &&
+
+ grep -q "dir1/DIR2/dir3/file3.*pos -3" "$PWD/subdir_case_wrong.log1" &&
+
+ # Verify that we get a mapping event to correct the case.
+ grep -q "MAP:.*dir1/DIR2/dir3/file3.*dir1/dir2/dir3/file3" \
+ "$PWD/subdir_case_wrong.log1" &&
+
+ # The refresh-callbacks should have caused "git status" to clear
+ # the CE_FSMONITOR_VALID bit on each of those files and caused
+ # the worktree scan to visit them and mark them as modified.
+ grep -q " M AAA" "$PWD/subdir_case_wrong.out" &&
+ grep -q " M zzz" "$PWD/subdir_case_wrong.out" &&
+ grep -q " M dir1/dir2/dir3/file3" "$PWD/subdir_case_wrong.out"
+'
+
+test_expect_success CASE_INSENSITIVE_FS 'fsmonitor file case wrong on disk' '
+ test_when_finished "stop_daemon_delete_repo file_case_wrong" &&
+
+ git init file_case_wrong &&
+ (
+ cd file_case_wrong &&
+ echo x >AAA &&
+ echo x >BBB &&
+
+ mkdir dir1 &&
+ mkdir dir1/dir2 &&
+ mkdir dir1/dir2/dir3 &&
+ echo x >dir1/dir2/dir3/FILE-3-B &&
+ echo x >dir1/dir2/dir3/XXXX-3-X &&
+ echo x >dir1/dir2/dir3/file-3-a &&
+ echo x >dir1/dir2/dir3/yyyy-3-y &&
+ mkdir dir1/dir2/dir4 &&
+ echo x >dir1/dir2/dir4/FILE-4-A &&
+ echo x >dir1/dir2/dir4/XXXX-4-X &&
+ echo x >dir1/dir2/dir4/file-4-b &&
+ echo x >dir1/dir2/dir4/yyyy-4-y &&
+
+ echo x >yyy &&
+ echo x >zzz &&
+ git add . &&
+ git commit -m "data"
+ ) &&
+
+ start_daemon -C file_case_wrong --tf "$PWD/file_case_wrong.trace" &&
+
+ # Enable FSMonitor in the client. Run enough commands for
+ # the .git/index to sync up with the daemon with everything
+ # marked clean.
+ git -C file_case_wrong config core.fsmonitor true &&
+ git -C file_case_wrong update-index --fsmonitor &&
+ git -C file_case_wrong status &&
+
+ # Make some files dirty so that FSMonitor gets FSEvents for
+ # each of them.
+ echo xx >>file_case_wrong/AAA &&
+ echo xx >>file_case_wrong/zzz &&
+
+ # Rename some files so that FSMonitor sees a create and delete
+ # FSEvent for each. (A simple "mv foo FOO" is not portable
+ # between macOS and Windows. It works on both platforms, but makes
+ # the test messy, since (1) one platform updates "ctime" on the
+ # moved file and one does not and (2) it causes a directory event
+ # on one platform and not on the other which causes additional
+ # scanning during "git status" which causes a "H" vs "h" discrepancy
+ # in "git ls-files -f".) So old-school it and move it out of the
+ # way and copy it to the case-incorrect name so that we get fresh
+ # "ctime" and "mtime" values.
+
+ mv file_case_wrong/dir1/dir2/dir3/file-3-a file_case_wrong/dir1/dir2/dir3/ORIG &&
+ cp file_case_wrong/dir1/dir2/dir3/ORIG file_case_wrong/dir1/dir2/dir3/FILE-3-A &&
+ rm file_case_wrong/dir1/dir2/dir3/ORIG &&
+ mv file_case_wrong/dir1/dir2/dir4/FILE-4-A file_case_wrong/dir1/dir2/dir4/ORIG &&
+ cp file_case_wrong/dir1/dir2/dir4/ORIG file_case_wrong/dir1/dir2/dir4/file-4-a &&
+ rm file_case_wrong/dir1/dir2/dir4/ORIG &&
+
+ # Run status enough times to fully sync.
+ #
+ # The first instance should get the create and delete FSEvents
+ # for each pair. Status should update the index with a new FSM
+ # token (so the next invocation will not see data for these
+ # events).
+
+ GIT_TRACE_FSMONITOR="$PWD/file_case_wrong-try1.log" \
+ git -C file_case_wrong status --short \
+ >"$PWD/file_case_wrong-try1.out" &&
+ grep -q "fsmonitor_refresh_callback.*FILE-3-A.*pos -3" "$PWD/file_case_wrong-try1.log" &&
+ grep -q "fsmonitor_refresh_callback.*file-3-a.*pos 4" "$PWD/file_case_wrong-try1.log" &&
+ grep -q "fsmonitor_refresh_callback.*FILE-4-A.*pos 6" "$PWD/file_case_wrong-try1.log" &&
+ grep -q "fsmonitor_refresh_callback.*file-4-a.*pos -9" "$PWD/file_case_wrong-try1.log" &&
+
+ # FSM refresh will have invalidated the FSM bit and cause a regular
+ # (real) scan of these tracked files, so they should have "H" status.
+ # (We will not see a "h" status until the next refresh (on the next
+ # command).)
+
+ git -C file_case_wrong ls-files -f >"$PWD/file_case_wrong-lsf1.out" &&
+ grep -q "H dir1/dir2/dir3/file-3-a" "$PWD/file_case_wrong-lsf1.out" &&
+ grep -q "H dir1/dir2/dir4/FILE-4-A" "$PWD/file_case_wrong-lsf1.out" &&
+
+
+ # Try the status again. We assume that the above status command
+ # advanced the token so that the next one will not see those events.
+
+ GIT_TRACE_FSMONITOR="$PWD/file_case_wrong-try2.log" \
+ git -C file_case_wrong status --short \
+ >"$PWD/file_case_wrong-try2.out" &&
+ ! grep -q "fsmonitor_refresh_callback.*FILE-3-A.*pos" "$PWD/file_case_wrong-try2.log" &&
+ ! grep -q "fsmonitor_refresh_callback.*file-3-a.*pos" "$PWD/file_case_wrong-try2.log" &&
+ ! grep -q "fsmonitor_refresh_callback.*FILE-4-A.*pos" "$PWD/file_case_wrong-try2.log" &&
+ ! grep -q "fsmonitor_refresh_callback.*file-4-a.*pos" "$PWD/file_case_wrong-try2.log" &&
+
+ # FSM refresh saw nothing, so it will mark all files as valid,
+ # so they should now have "h" status.
+
+ git -C file_case_wrong ls-files -f >"$PWD/file_case_wrong-lsf2.out" &&
+ grep -q "h dir1/dir2/dir3/file-3-a" "$PWD/file_case_wrong-lsf2.out" &&
+ grep -q "h dir1/dir2/dir4/FILE-4-A" "$PWD/file_case_wrong-lsf2.out" &&
+
+
+ # We now have files with clean content, but with case-incorrect
+ # file names. Modify them to see if status properly reports
+ # them.
+
+ echo xx >>file_case_wrong/dir1/dir2/dir3/FILE-3-A &&
+ echo xx >>file_case_wrong/dir1/dir2/dir4/file-4-a &&
+
+ GIT_TRACE_FSMONITOR="$PWD/file_case_wrong-try3.log" \
+ git -C file_case_wrong --no-optional-locks status --short \
+ >"$PWD/file_case_wrong-try3.out" &&
+
+ # Verify that we get a mapping event to correct the case.
+ grep -q "fsmonitor_refresh_callback MAP:.*dir1/dir2/dir3/FILE-3-A.*dir1/dir2/dir3/file-3-a" \
+ "$PWD/file_case_wrong-try3.log" &&
+ grep -q "fsmonitor_refresh_callback MAP:.*dir1/dir2/dir4/file-4-a.*dir1/dir2/dir4/FILE-4-A" \
+ "$PWD/file_case_wrong-try3.log" &&
+
+ # FSEvents are in observed case.
+ grep -q "fsmonitor_refresh_callback.*FILE-3-A.*pos -3" "$PWD/file_case_wrong-try3.log" &&
+ grep -q "fsmonitor_refresh_callback.*file-4-a.*pos -9" "$PWD/file_case_wrong-try3.log" &&
+
+ # The refresh-callbacks should have caused "git status" to clear
+ # the CE_FSMONITOR_VALID bit on each of those files and caused
+ # the worktree scan to visit them and mark them as modified.
+ grep -q " M dir1/dir2/dir3/file-3-a" "$PWD/file_case_wrong-try3.out" &&
+ grep -q " M dir1/dir2/dir4/FILE-4-A" "$PWD/file_case_wrong-try3.out"
+'
+
test_done