chore: Clean up code slightly

author Joel Rosdahl <joel@rosdahl.net>

Wed, 5 Oct 2022 19:34:17 +0000 (21:34 +0200)

committer Joel Rosdahl <joel@rosdahl.net>

Thu, 6 Oct 2022 16:31:47 +0000 (18:31 +0200)
author Joel Rosdahl <joel@rosdahl.net>
Wed, 5 Oct 2022 19:34:17 +0000 (21:34 +0200)
committer Joel Rosdahl <joel@rosdahl.net>
Thu, 6 Oct 2022 16:31:47 +0000 (18:31 +0200)
diff --git a/src/Depfile.cpp b/src/Depfile.cpp

index c5accfe171ec7fc0506ad44da0b25d87d4db652c..2d721a6c1609ea7690caa8407b0e0abb6c42d753 100644 (file)
--- a/src/Depfile.cpp
+++ b/src/Depfile.cpp
@@ -146,30 +146,30 @@ tokenize(std::string_view file_content)
  {
    // A dependency file uses Makefile syntax. This is not perfect parser but
    // should be enough for parsing a regular dependency file.
-  // enhancement:
-  // - space between target and colon
-  // - no space between colon and first pre-requisite
-  // the later is pretty complex because of the windows paths which are
+  //
+  // Note that this is pretty complex because of Windows paths that can be
    // identical to a target-colon-prerequisite without spaces (e.g. cat:/meow vs.
-  // c:/meow) here are the tests on windows gnu make 4.3 how it handles this:
-  //  + cat:/meow -> sees "cat" and "/meow"
-  //  + cat:\meow -> sees "cat" and "\meow"
-  //  + cat:\ meow -> sees "cat" and " meow"
-  //  + cat:c:/meow -> sees "cat" and "c:/meow"
-  //  + cat:c:\meow -> sees "cat" and "c:\meow"
-  //  + cat:c: -> target pattern contains no '%'.  Stop.
-  //  + cat:c:\ -> target pattern contains no '%'.  Stop.
-  //  + cat:c:/ -> sees "cat" and "c:/"
-  //  + cat:c:meow -> target pattern contains no '%'.  Stop.
-  //  + c:c:/meow -> sees "c" and "c:/meow"
-  //  + c:c:\meow -> sees "c" and "c:\meow"
-  //  + c:z:\meow -> sees "c" and "z:\meow"
-  //  + c:cd:\meow -> target pattern contains no '%'.  Stop.
-
-  // the logic for a windows path is:
-  //  - if there is a colon, if the previous token is 1 char long
-  //    and that the following char is a slash (fw or bw), then it is
-  //    a windows path
+  // c:/meow).
+  //
+  // Here are tests on Windows on how GNU Make 4.3 handles different scenarios:
+  //
+  //   cat:/meow   -> sees "cat" and "/meow"
+  //   cat:\meow   -> sees "cat" and "\meow"
+  //   cat:\ meow  -> sees "cat" and " meow"
+  //   cat:c:/meow -> sees "cat" and "c:/meow"
+  //   cat:c:\meow -> sees "cat" and "c:\meow"
+  //   cat:c:      -> target pattern contains no '%'.  Stop.
+  //   cat:c:\     -> target pattern contains no '%'.  Stop.
+  //   cat:c:/     -> sees "cat" and "c:/"
+  //   cat:c:meow  -> target pattern contains no '%'.  Stop.
+  //   c:c:/meow   -> sees "c" and "c:/meow"
+  //   c:c:\meow   -> sees "c" and "c:\meow"
+  //   c:z:\meow   -> sees "c" and "z:\meow"
+  //   c:cd:\meow  -> target pattern contains no '%'.  Stop.
+  //
+  // Thus, if there is a colon and the previous token is one character long and
+  // the following character is a slash (forward or backward), then it is
+  // interpreted as a Windows path.
  
    std::vector<std::string> result;
    const size_t length = file_content.size();
@@ -179,34 +179,32 @@ tokenize(std::string_view file_content)
    while (p < length) {
      char c = file_content[p];
  
-    if (c == ':') {
-      if (p + 1 < length && !is_blank(token) && token.length() == 1) {
-        const char next = file_content[p + 1];
-        if (next == '/' || next == '\\') {
-          // only in this case, this is not a separator and colon is
-          // added to token
-          token.push_back(c);
-          ++p;
-          continue;
-        }
+    if (c == ':' && p + 1 < length && !is_blank(token) && token.length() == 1) {
+      const char next = file_content[p + 1];
+      if (next == '/' || next == '\\') {
+        // It's a Windows path, so the colon is not a separator and instead
+        // added to the token.
+        token.push_back(c);
+        ++p;
+        continue;
        }
      }
+
      // Each token is separated by whitespace or a colon.
      if (isspace(c) || c == ':') {
-      // chomp all spaces before next char
+      // Chomp all spaces before next character.
        while (p < length && isspace(file_content[p])) {
          ++p;
        }
        if (!is_blank(token)) {
-        // if there were spaces between a token and the : sign, the :
-        // must be added to the same token to make sure it is seen as
-        // a target and not as a dependency (ccache requirement)
+        // If there were spaces between a token and the colon, add the colon the
+        // token to make sure it is seen as a target and not as a dependency.
          if (p < length) {
            const char next = file_content[p];
            if (next == ':') {
              token.push_back(next);
              ++p;
-            // chomp all spaces before next char
+            // Chomp all spaces before next character.
              while (p < length && isspace(file_content[p])) {
                ++p;
              }
diff --git a/src/Depfile.hpp b/src/Depfile.hpp

index d45bf539707fd1a5f4fe34158a8526d4769047e4..af73ff8bc0baea343205e27fb91fa99eb540d5d9 100644 (file)
--- a/src/Depfile.hpp
+++ b/src/Depfile.hpp
@@ -37,6 +37,8 @@ std::optional<std::string> rewrite_source_paths(const Context& ctx,
  
  void make_paths_relative_in_output_dep(const Context& ctx);
  
+// Tokenize `file_content` into a list of files, where the first token is the
+// target and ends with a colon.
  std::vector<std::string> tokenize(std::string_view file_content);
  
  } // namespace Depfile
diff --git a/src/util/file.cpp b/src/util/file.cpp

index 964d684611e35d26d3dffcc9664e2900eaf1fd38..5ddc05bbaa5f011aaf2e059292d58c091e38c261 100644 (file)
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -165,18 +165,14 @@ read_file(const std::string& path, size_t size_hint)
  #ifdef _WIN32
    if constexpr (std::is_same<T, std::string>::value) {
      // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM.
-    //
-    // Note that this code assumes a little-endian machine, which is why it's
-    // #ifdef-ed to only run on Windows (which is always little-endian) where
-    // it's actually needed.
      if (has_utf16_le_bom(result)) {
        result.erase(0, 2); // Remove BOM.
-      if (result.empty())
+      if (result.empty()) {
          return result;
+      }
  
        std::wstring result_as_u16((result.size() / 2) + 1, '\0');
        result_as_u16 = reinterpret_cast<const wchar_t*>(result.c_str());
-
        const int size = WideCharToMultiByte(CP_UTF8,
                                             WC_ERR_INVALID_CHARS,
                                             result_as_u16.c_str(),
@@ -185,11 +181,12 @@ read_file(const std::string& path, size_t size_hint)
                                             0,
                                             nullptr,
                                             nullptr);
-      if (size <= 0)
+      if (size <= 0) {
          return nonstd::make_unexpected(
            FMT("Failed to convert {} from UTF-16LE to UTF-8: {}",
                path,
                Win32Util::error_message(GetLastError())));
+      }
  
        result = std::string(size, '\0');
        WideCharToMultiByte(CP_UTF8,
diff --git a/test/suites/depend.bash b/test/suites/depend.bash

index 99c090c4016749fadcd0411529446aba063d8e2a..88bf786de8c54d939f78c5996d460b58ac854f1a 100644 (file)
--- a/test/suites/depend.bash
+++ b/test/suites/depend.bash
@@ -90,8 +90,7 @@ EOF
  
  generate_reference_compiler_output() {
      local filename
-    if [[ $# -gt 0 ]]
-    then
+    if [[ $# -gt 0 ]]; then
          filename=$1
      else
          filename=test.c
@@ -204,7 +203,7 @@ EOF
      # dir2 has a change in header which affects object file
      # dir3 has a change in header which does not affect object file
      # dir4 has an additional include header which should change the dependency file
-    # dir5 has no changes, only a different base dir
+    # dir5 has no changes, only a different base directory
      TEST "Different sets of headers for the same source code"
  
      set_up_different_sets_of_headers_test
@@ -329,7 +328,8 @@ EOF
      generate_reference_compiler_output `pwd`/test.c
      CCACHE_DEPEND=1 CCACHE_BASEDIR=$BASEDIR5 $CCACHE_COMPILE $DEPFLAGS -c `pwd`/test.c
      expect_equal_object_files reference_test.o test.o
-    # from the ccache doc: One known issue is that absolute paths are not reproduced in dependency files
+    # From the manual: "One known issue is that absolute paths are not
+    # reproduced in dependency files":
      # expect_equal_content reference_test.d test.d
      expect_equal_content test.d $BASEDIR1/test.d
      expect_stat direct_cache_hit 7
diff --git a/unittest/test_Depfile.cpp b/unittest/test_Depfile.cpp

index e8ae88df4fc8fa2a5a7acfef608226cefb539bc5..7f636e017b7be2c5c3b61ea8a72054266f3f7b3c 100644 (file)
--- a/unittest/test_Depfile.cpp
+++ b/unittest/test_Depfile.cpp
@@ -78,13 +78,13 @@ TEST_CASE("Depfile::rewrite_source_paths")
  
  TEST_CASE("Depfile::tokenize")
  {
-  SUBCASE("Parse empty depfile")
+  SUBCASE("Empty")
    {
      std::vector<std::string> result = Depfile::tokenize("");
      CHECK(result.size() == 0);
    }
  
-  SUBCASE("Parse simple depfile")
+  SUBCASE("Simple")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow meow purr");
@@ -95,7 +95,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[3] == "purr");
    }
  
-  SUBCASE("Parse depfile with a dollar sign followed by a dollar sign")
+  SUBCASE("Dollar sign followed by a dollar sign")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: meow$$");
      REQUIRE(result.size() == 2);
@@ -103,7 +103,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow$");
    }
  
-  SUBCASE("Parse depfile with a dollar sign followed by an alphabet")
+  SUBCASE("Dollar sign followed by an alphabet")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: meow$w");
      REQUIRE(result.size() == 2);
@@ -111,7 +111,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow$w");
    }
  
-  SUBCASE("Parse depfile with a backslash followed by a number sign or a colon")
+  SUBCASE("Backslash followed by a number sign or a colon")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\\# meow\\:");
@@ -121,7 +121,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[2] == "meow:");
    }
  
-  SUBCASE("Parse depfile with a backslash followed by an alphabet")
+  SUBCASE("Backslash followed by an alphabet")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\\w purr\\r");
@@ -131,7 +131,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[2] == "purr\\r");
    }
  
-  SUBCASE("Parse depfile with a backslash followed by a space or a tab")
+  SUBCASE("Backslash followed by a space or a tab")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\\ meow purr\\\tpurr");
@@ -141,7 +141,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[2] == "purr\tpurr");
    }
  
-  SUBCASE("Parse depfile with backslashes followed by a space or a tab")
+  SUBCASE("Backslashes followed by a space or a tab")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\\\\\\ meow purr\\\\ purr");
@@ -152,7 +152,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[3] == "purr");
    }
  
-  SUBCASE("Parse depfile with a backslash newline")
+  SUBCASE("Backslash newline")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\\\nmeow\\\n purr\\\n\tpurr");
@@ -164,12 +164,11 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[4] == "purr");
    }
  
-  SUBCASE("Parse depfile with a new line")
+  SUBCASE("Newlines")
    {
-    // This is invalid depfile because it has multiple lines without backslash,
-    // which is not valid in Makefile syntax.
-    // However, Depfile::tokenize is parsing it to each token, which is
-    // expected.
+    // This is an invalid dependency file since it has multiple lines without
+    // backslash, which is not valid Makefile syntax. However, the
+    // Depfile::tokenize's simplistic parser accepts them.
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: meow\nmeow\npurr\n");
      REQUIRE(result.size() == 4);
@@ -179,7 +178,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[3] == "purr");
    }
  
-  SUBCASE("Parse depfile with a trailing dollar sign")
+  SUBCASE("Trailing dollar sign")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: meow$");
      REQUIRE(result.size() == 2);
@@ -187,7 +186,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow$");
    }
  
-  SUBCASE("Parse depfile with a trailing backslash")
+  SUBCASE("Trailing backslash")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: meow\\");
      REQUIRE(result.size() == 2);
@@ -195,7 +194,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow\\");
    }
  
-  SUBCASE("Parse depfile with a trailing backslash newline")
+  SUBCASE("Trailing backslash newline")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: meow\\\n");
      REQUIRE(result.size() == 2);
@@ -203,23 +202,15 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE("Parse depfile with a one space before colon")
+  SUBCASE("Space before the colon but not after")
    {
-    std::vector<std::string> result = Depfile::tokenize("cat.o : meow");
+    std::vector<std::string> result = Depfile::tokenize("cat.o :meow");
      REQUIRE(result.size() == 2);
      CHECK(result[0] == "cat.o:");
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE("Parse depfile with a two spaces before colon")
-  {
-    std::vector<std::string> result = Depfile::tokenize("cat.o  : meow");
-    REQUIRE(result.size() == 2);
-    CHECK(result[0] == "cat.o:");
-    CHECK(result[1] == "meow");
-  }
-
-  SUBCASE("Parse depfile with a plenty of spaces before colon")
+  SUBCASE("Space around the colon")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o    :    meow");
      REQUIRE(result.size() == 2);
@@ -227,7 +218,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE("Parse depfile with no space between colon and dependency")
+  SUBCASE("No space between colon and dependency")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o:meow");
      REQUIRE(result.size() == 2);
@@ -235,9 +226,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE(
-    "Parse depfile with windows formatted filename (with backslashes in "
-    "target)")
+  SUBCASE("Windows filename (with backslashes in target)")
    {
      std::vector<std::string> result = Depfile::tokenize("e:\\cat.o: meow");
      REQUIRE(result.size() == 2);
@@ -245,9 +234,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE(
-    "Parse depfile with windows formatted filename (with backslashes in "
-    "prerequisite)")
+  SUBCASE("Windows filename (with backslashes in prerequisite)")
    {
      std::vector<std::string> result =
        Depfile::tokenize("cat.o: c:\\meow\\purr");
@@ -256,8 +243,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:\\meow\\purr");
    }
  
-  SUBCASE(
-    "Parse depfile with windows formatted filename (with slashes in target)")
+  SUBCASE("Windows filename (with slashes in target)")
    {
      std::vector<std::string> result = Depfile::tokenize("e:/cat.o: meow");
      REQUIRE(result.size() == 2);
@@ -265,9 +251,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "meow");
    }
  
-  SUBCASE(
-    "Parse depfile with windows formatted filename (with slashes in "
-    "prerequisite)")
+  SUBCASE("Windows filename (with slashes in prerequisite)")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: c:/meow/purr");
      REQUIRE(result.size() == 2);
@@ -275,9 +259,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:/meow/purr");
    }
  
-  SUBCASE(
-    "Parse depfile with windows formatted filename (with slashes and trailing "
-    "colon)")
+  SUBCASE("Windows filename (with slashes and trailing colon)")
    {
      std::vector<std::string> result = Depfile::tokenize("cat.o: c: /meow/purr");
      REQUIRE(result.size() == 3);
@@ -286,7 +268,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[2] == "/meow/purr");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest1)")
+  SUBCASE("Windows filename: cat:/meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:/meow");
      REQUIRE(result.size() == 2);
@@ -294,7 +276,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "/meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest2)")
+  SUBCASE("Windows filename: cat:\\meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:\\meow");
      REQUIRE(result.size() == 2);
@@ -302,7 +284,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "\\meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest3)")
+  SUBCASE("Windows filename: cat:\\ meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:\\ meow");
      REQUIRE(result.size() == 2);
@@ -310,7 +292,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == " meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest4)")
+  SUBCASE("Windows filename: cat:c:/meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:/meow");
      REQUIRE(result.size() == 2);
@@ -318,7 +300,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:/meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest5)")
+  SUBCASE("Windows filename: cat:c:\\meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:\\meow");
      REQUIRE(result.size() == 2);
@@ -326,7 +308,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:\\meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest6)")
+  SUBCASE("Windows filename: cat:c:")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:");
      REQUIRE(result.size() == 2);
@@ -334,7 +316,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest7)")
+  SUBCASE("Windows filename: cat:c:\\")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:\\");
      REQUIRE(result.size() == 2);
@@ -342,7 +324,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:\\");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest8)")
+  SUBCASE("Windows filename: cat:c:/")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:/");
      REQUIRE(result.size() == 2);
@@ -350,7 +332,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:/");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest9)")
+  SUBCASE("Windows filename: cat:c:meow")
    {
      std::vector<std::string> result = Depfile::tokenize("cat:c:meow");
      REQUIRE(result.size() == 3);
@@ -359,7 +341,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[2] == "meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest10)")
+  SUBCASE("Windows filename: c:c:/meow")
    {
      std::vector<std::string> result = Depfile::tokenize("c:c:/meow");
      REQUIRE(result.size() == 2);
@@ -367,7 +349,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:/meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest11)")
+  SUBCASE("Windows filename: c:c:\\meow")
    {
      std::vector<std::string> result = Depfile::tokenize("c:c:\\meow");
      REQUIRE(result.size() == 2);
@@ -375,7 +357,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "c:\\meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest12)")
+  SUBCASE("Windows filename: c:z:\\meow")
    {
      std::vector<std::string> result = Depfile::tokenize("c:z:\\meow");
      REQUIRE(result.size() == 2);
@@ -383,7 +365,7 @@ TEST_CASE("Depfile::tokenize")
      CHECK(result[1] == "z:\\meow");
    }
  
-  SUBCASE("Parse depfile with windows formatted filename (subtest13)")
+  SUBCASE("Windows filename: c:cd:\\meow")
    {
      std::vector<std::string> result = Depfile::tokenize("c:cd:\\meow");
      REQUIRE(result.size() == 3);
author	Joel Rosdahl <joel@rosdahl.net>
	Wed, 5 Oct 2022 19:34:17 +0000 (21:34 +0200)
committer	Joel Rosdahl <joel@rosdahl.net>
	Thu, 6 Oct 2022 16:31:47 +0000 (18:31 +0200)
src/Depfile.cpp		patch \| blob \| blame \| history
src/Depfile.hpp		patch \| blob \| blame \| history
src/util/file.cpp		patch \| blob \| blame \| history
test/suites/depend.bash		patch \| blob \| blame \| history
unittest/test_Depfile.cpp		patch \| blob \| blame \| history