From: Joel Rosdahl Date: Wed, 27 Apr 2022 19:15:35 +0000 (+0200) Subject: refactor: Improve absolute path normalization functions X-Git-Tag: v4.6.1~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9a05332915d2808f4713437006b3f7c812d9fd74;p=thirdparty%2Fccache.git refactor: Improve absolute path normalization functions The Util::normalize_absolute_path function only works on the syntactic level, i.e. the result may not actually resolve to the same filesystem entry (nor to any file system entry for that matter). It was meant to be used for paths that don’t necessarily exist yet, such as a future directory in which to write debug files. It may fail in edge cases with symlinks in the path in combination with .. segments. If the caller wants to ensure that the resulting path actually makes sense, it needs to check if the resulting path points to the same file entry as the original. To improve on this, Util::normalize_absolute_path has now been renamed to Util::normalize_abstract_absolute_path and there is a new Util::normalize_concrete_absolute_path function which returns the original path if the normalized result doesn't resolve to the same file system entry as the original path. --- diff --git a/src/Config.cpp b/src/Config.cpp index a64e2529b..ea2c63233 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -839,7 +839,7 @@ Config::set_item(const std::string& key, m_base_dir = Util::expand_environment_variables(value); if (!m_base_dir.empty()) { // The empty string means "disable" verify_absolute_path(m_base_dir); - m_base_dir = Util::normalize_absolute_path(m_base_dir); + m_base_dir = Util::normalize_abstract_absolute_path(m_base_dir); } break; diff --git a/src/Util.cpp b/src/Util.cpp index 870a2ae18..abbf76d3e 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -636,14 +636,9 @@ get_apparent_cwd(const std::string& actual_cwd) auto pwd_stat = Stat::stat(pwd); auto cwd_stat = Stat::stat(actual_cwd); - if (!pwd_stat || !cwd_stat || !pwd_stat.same_inode_as(cwd_stat)) { - return actual_cwd; - } - std::string normalized_pwd = normalize_absolute_path(pwd); - return normalized_pwd == pwd - || Stat::stat(normalized_pwd).same_inode_as(pwd_stat) - ? normalized_pwd - : pwd; + return !pwd_stat || !cwd_stat || !pwd_stat.same_inode_as(cwd_stat) + ? actual_cwd + : normalize_concrete_absolute_path(pwd); #endif } @@ -898,7 +893,8 @@ make_relative_path(const std::string& base_dir, const auto real_path = Util::real_path(std::string(path)); const auto add_relpath_candidates = [&](auto path) { - const std::string normalized_path = Util::normalize_absolute_path(path); + const std::string normalized_path = + Util::normalize_abstract_absolute_path(path); relpath_candidates.push_back( Util::get_relative_path(actual_cwd, normalized_path)); if (apparent_cwd != actual_cwd) { @@ -946,7 +942,7 @@ matches_dir_prefix_or_file(string_view dir_prefix_or_file, string_view path) } std::string -normalize_absolute_path(string_view path) +normalize_abstract_absolute_path(string_view path) { if (!util::is_absolute_path(path)) { return std::string(path); @@ -956,7 +952,7 @@ normalize_absolute_path(string_view path) if (path.find("\\") != string_view::npos) { std::string new_path(path); std::replace(new_path.begin(), new_path.end(), '\\', '/'); - return normalize_absolute_path(new_path); + return normalize_abstract_absolute_path(new_path); } std::string drive(path.substr(0, 2)); @@ -1004,6 +1000,15 @@ normalize_absolute_path(string_view path) #endif } +std::string +normalize_concrete_absolute_path(const std::string& path) +{ + const auto normalized_path = normalize_abstract_absolute_path(path); + return Stat::stat(normalized_path).same_inode_as(Stat::stat(path)) + ? normalized_path + : path; +} + uint64_t parse_duration(const std::string& duration) { diff --git a/src/Util.hpp b/src/Util.hpp index 6325463bd..4ddffa346 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -276,10 +276,15 @@ bool matches_dir_prefix_or_file(nonstd::string_view dir_prefix_or_file, // // Normalization here means syntactically removing redundant slashes and // resolving "." and ".." parts. The algorithm does however *not* follow -// symlinks, so the result may not actually resolve to `path`. +// symlinks, so the result may not actually resolve to the same filesystem entry +// as `path` (nor to any existing file system entry for that matter). // // On Windows: Backslashes are replaced with forward slashes. -std::string normalize_absolute_path(nonstd::string_view path); +std::string normalize_abstract_absolute_path(nonstd::string_view path); + +// Like normalize_abstract_absolute_path, but returns `path` unchanged if the +// normalized result doesn't resolve to the same file system entry as `path`. +std::string normalize_concrete_absolute_path(const std::string& path); // Parse `duration`, an unsigned integer with d (days) or s (seconds) suffix, // into seconds. Throws `core::Error` on error. diff --git a/src/util/path.cpp b/src/util/path.cpp index b42d61e35..86d9d0202 100644 --- a/src/util/path.cpp +++ b/src/util/path.cpp @@ -82,7 +82,7 @@ to_absolute_path(nonstd::string_view path) if (util::is_absolute_path(path)) { return std::string(path); } else { - return Util::normalize_absolute_path( + return Util::normalize_abstract_absolute_path( FMT("{}/{}", Util::get_actual_cwd(), path)); } } diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index 4ea9903e5..72c617966 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -561,33 +561,53 @@ TEST_CASE("Util::matches_dir_prefix_or_file") #endif } -TEST_CASE("Util::normalize_absolute_path") +TEST_CASE("Util::normalize_abstract_absolute_path") { - CHECK(Util::normalize_absolute_path("") == ""); - CHECK(Util::normalize_absolute_path(".") == "."); - CHECK(Util::normalize_absolute_path("..") == ".."); - CHECK(Util::normalize_absolute_path("...") == "..."); - CHECK(Util::normalize_absolute_path("x/./") == "x/./"); + CHECK(Util::normalize_abstract_absolute_path("") == ""); + CHECK(Util::normalize_abstract_absolute_path(".") == "."); + CHECK(Util::normalize_abstract_absolute_path("..") == ".."); + CHECK(Util::normalize_abstract_absolute_path("...") == "..."); + CHECK(Util::normalize_abstract_absolute_path("x/./") == "x/./"); #ifdef _WIN32 - CHECK(Util::normalize_absolute_path("c:/") == "c:/"); - CHECK(Util::normalize_absolute_path("c:\\") == "c:/"); - CHECK(Util::normalize_absolute_path("c:/.") == "c:/"); - CHECK(Util::normalize_absolute_path("c:\\..") == "c:/"); - CHECK(Util::normalize_absolute_path("c:\\x/..") == "c:/"); - CHECK(Util::normalize_absolute_path("c:\\x/./y\\..\\\\z") == "c:/x/z"); + CHECK(Util::normalize_abstract_absolute_path("c:/") == "c:/"); + CHECK(Util::normalize_abstract_absolute_path("c:\\") == "c:/"); + CHECK(Util::normalize_abstract_absolute_path("c:/.") == "c:/"); + CHECK(Util::normalize_abstract_absolute_path("c:\\..") == "c:/"); + CHECK(Util::normalize_abstract_absolute_path("c:\\x/..") == "c:/"); + CHECK(Util::normalize_abstract_absolute_path("c:\\x/./y\\..\\\\z") + == "c:/x/z"); #else - CHECK(Util::normalize_absolute_path("/") == "/"); - CHECK(Util::normalize_absolute_path("/.") == "/"); - CHECK(Util::normalize_absolute_path("/..") == "/"); - CHECK(Util::normalize_absolute_path("/./") == "/"); - CHECK(Util::normalize_absolute_path("//") == "/"); - CHECK(Util::normalize_absolute_path("/../x") == "/x"); - CHECK(Util::normalize_absolute_path("/x/./y/z") == "/x/y/z"); - CHECK(Util::normalize_absolute_path("/x/../y/z/") == "/y/z"); - CHECK(Util::normalize_absolute_path("/x/.../y/z") == "/x/.../y/z"); - CHECK(Util::normalize_absolute_path("/x/yyy/../zz") == "/x/zz"); - CHECK(Util::normalize_absolute_path("//x/yyy///.././zz") == "/x/zz"); + CHECK(Util::normalize_abstract_absolute_path("/") == "/"); + CHECK(Util::normalize_abstract_absolute_path("/.") == "/"); + CHECK(Util::normalize_abstract_absolute_path("/..") == "/"); + CHECK(Util::normalize_abstract_absolute_path("/./") == "/"); + CHECK(Util::normalize_abstract_absolute_path("//") == "/"); + CHECK(Util::normalize_abstract_absolute_path("/../x") == "/x"); + CHECK(Util::normalize_abstract_absolute_path("/x/./y/z") == "/x/y/z"); + CHECK(Util::normalize_abstract_absolute_path("/x/../y/z/") == "/y/z"); + CHECK(Util::normalize_abstract_absolute_path("/x/.../y/z") == "/x/.../y/z"); + CHECK(Util::normalize_abstract_absolute_path("/x/yyy/../zz") == "/x/zz"); + CHECK(Util::normalize_abstract_absolute_path("//x/yyy///.././zz") == "/x/zz"); +#endif +} + +TEST_CASE("Util::normalize_concrete_absolute_path") +{ +#ifndef _WIN32 + TestContext test_context; + + Util::write_file("file", ""); + REQUIRE(Util::create_dir("dir1/dir2")); + REQUIRE(symlink("dir1/dir2", "symlink") == 0); + const auto cwd = Util::get_actual_cwd(); + + CHECK(Util::normalize_concrete_absolute_path(FMT("{}/file", cwd)) + == FMT("{}/file", cwd)); + CHECK(Util::normalize_concrete_absolute_path(FMT("{}/dir1/../file", cwd)) + == FMT("{}/file", cwd)); + CHECK(Util::normalize_concrete_absolute_path(FMT("{}/symlink/../file", cwd)) + == FMT("{}/symlink/../file", cwd)); #endif }