From: Joel Rosdahl Date: Thu, 25 Apr 2024 19:32:16 +0000 (+0200) Subject: refactor: Improve dependency file parsing X-Git-Tag: v4.10~40 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb3e3f033a430b73c636f8bb58ee720ec84fc104;p=thirdparty%2Fccache.git refactor: Improve dependency file parsing - Use Depfile::tokenize to parse the dep file in rewrite_source_paths instead of an even more ad-hoc parser. - Add Depfile::untokenize to go from tokens back to text representation. --- diff --git a/src/ccache/Depfile.cpp b/src/ccache/Depfile.cpp index 441c0427..c6a6feea 100644 --- a/src/ccache/Depfile.cpp +++ b/src/ccache/Depfile.cpp @@ -34,12 +34,6 @@ namespace fs = util::filesystem; -static inline bool -is_blank(const std::string& s) -{ - return std::all_of(s.begin(), s.end(), [](char c) { return isspace(c); }); -} - namespace Depfile { std::string @@ -66,58 +60,31 @@ escape_filename(std::string_view filename) } std::optional -rewrite_source_paths(const Context& ctx, std::string_view file_content) +rewrite_source_paths(const Context& ctx, std::string_view content) { ASSERT(!ctx.config.base_dir().empty()); - // Fast path for the common case: - if (file_content.find(ctx.config.base_dir()) == std::string::npos) { - return std::nullopt; - } - - std::string adjusted_file_content; - adjusted_file_content.reserve(file_content.size()); - - bool content_rewritten = false; - bool seen_target_token = false; - - using util::Tokenizer; - for (const auto line : Tokenizer(file_content, - "\n", - Tokenizer::Mode::include_empty, - Tokenizer::IncludeDelimiter::yes)) { - const auto tokens = util::split_into_views(line, " \t"); - for (size_t i = 0; i < tokens.size(); ++i) { - DEBUG_ASSERT(!line.empty()); // line.empty() -> no tokens - DEBUG_ASSERT(!tokens[i].empty()); - - if (i > 0 || line[0] == ' ' || line[0] == '\t') { - adjusted_file_content.push_back(' '); - } - - const auto& token = tokens[i]; - bool token_rewritten = false; - if (seen_target_token && fs::path(token).is_absolute()) { - const auto new_path = Util::make_relative_path(ctx, token); - if (new_path != token) { - adjusted_file_content.append(new_path); - token_rewritten = true; - } - } - if (token_rewritten) { - content_rewritten = true; - } else { - adjusted_file_content.append(token.begin(), token.end()); - } - - if (tokens[i].back() == ':') { - seen_target_token = true; - } + bool rewritten = false; + bool first = true; + auto tokens = tokenize(content); + for (auto& token : tokens) { + if (first) { + // Don't rewrite object file path. + first = false; + continue; + } + if (token.empty() || token == ":") { + continue; + } + auto rel_path = Util::make_relative_path(ctx, token); + if (rel_path != token) { + rewritten = true; + token = std::move(rel_path); } } - if (content_rewritten) { - return adjusted_file_content; + if (rewritten) { + return untokenize(tokens); } else { return std::nullopt; } @@ -133,14 +100,12 @@ make_paths_relative_in_output_dep(const Context& ctx) } const std::string& output_dep = ctx.args_info.output_dep; - const auto file_content = util::read_file(output_dep); - if (!file_content) { - LOG("Failed to read dependency file {}: {}", - output_dep, - file_content.error()); + const auto content = util::read_file(output_dep); + if (!content) { + LOG("Failed to read dependency file {}: {}", output_dep, content.error()); return; } - const auto new_content = rewrite_source_paths(ctx, *file_content); + const auto new_content = rewrite_source_paths(ctx, *content); if (new_content) { util::write_file(output_dep, *new_content); } else { @@ -149,7 +114,7 @@ make_paths_relative_in_output_dep(const Context& ctx) } std::vector -tokenize(std::string_view file_content) +tokenize(std::string_view text) { // A dependency file uses Makefile syntax. This is not perfect parser but // should be enough for parsing a regular dependency file. @@ -178,94 +143,113 @@ tokenize(std::string_view file_content) // the following character is a slash (forward or backward), then it is // interpreted as a Windows path. - std::vector result; - const size_t length = file_content.size(); - std::string token; - size_t p = 0; + std::vector tokens; + const size_t length = text.size(); - while (p < length) { - char c = file_content[p]; + size_t i = 0; - if (c == ':' && p + 1 < length && !is_blank(token) && token.length() == 1) { - const char next = file_content[p + 1]; - if (next == '/' || next == '\\') { - // It's a Windows path, so the colon is not a separator and instead - // added to the token. - token.push_back(c); - ++p; - continue; - } + while (true) { + // Find start of next token. + while (i < length && text[i] != '\n' && isspace(text[i])) { + ++i; } - // Each token is separated by whitespace or a colon. - if (isspace(c) || c == ':') { - // Chomp all spaces before next character. - while (p < length && isspace(file_content[p])) { - ++p; + // Detect end of entry. + if (i == length || text[i] == '\n') { + if (!tokens.empty() && !tokens.back().empty()) { + tokens.emplace_back(""); } - if (!is_blank(token)) { - // If there were spaces between a token and the colon, add the colon the - // token to make sure it is seen as a target and not as a dependency. - if (p < length) { - const char next = file_content[p]; - if (next == ':') { - token.push_back(next); - ++p; - // Chomp all spaces before next character. - while (p < length && isspace(file_content[p])) { - ++p; - } - } - } - result.push_back(token); + if (i == length) { + // Reached the end. + break; } - token.clear(); + ++i; continue; } - switch (c) { - case '\\': - if (p + 1 < length) { - const char next = file_content[p + 1]; - switch (next) { - // A backspace followed by any of the below characters leaves the - // character as is. + if (text[i] == ':') { + tokens.emplace_back(":"); + ++i; + continue; + } + + if (text[i] == '\\' && i + 1 < length && text[i + 1] == '\n') { + // Line continuation. + i += 2; + continue; + } + + // Parse token. + std::string token; + while (i < length) { + if (text[i] == ':' && token.length() == 1 && !isspace(token[0]) + && i + 1 < length && (text[i + 1] == '/' || text[i + 1] == '\\')) { + // It's a Windows path, so the colon is not a separator and instead + // added to the token. + token += text[i]; + ++i; + continue; + } + + if (text[i] == ':' || isspace(text[i]) + || (text[i] == '\\' && i + 1 < length && text[i + 1] == '\n')) { + // End of token. + break; + } + + if (i + 1 < length) { + switch (text[i]) { case '\\': - case '#': - case ':': - case ' ': - case '\t': - c = next; - ++p; + switch (text[i + 1]) { + // A backspace followed by any of the below characters leaves the + // character as is. + case '\\': + case '#': + case ':': + case ' ': + case '\t': + ++i; + break; + } + break; + case '$': + if (text[i + 1] == '$') { + // A dollar sign preceded by a dollar sign escapes the dollar sign. + ++i; + } break; - // Backslash followed by newline is interpreted like a space, so simply - // discard the backslash. - case '\n': - ++p; - continue; - } - } - break; - case '$': - if (p + 1 < length) { - const char next = file_content[p + 1]; - if (next == '$') { - // A dollar sign preceded by a dollar sign escapes the dollar sign. - c = next; - ++p; } } - break; + + token += text[i]; + ++i; } - token.push_back(c); - ++p; + tokens.push_back(token); } - if (!is_blank(token)) { - result.push_back(token); - } + return tokens; +} +std::string +untokenize(const std::vector& tokens) +{ + std::string result; + for (const auto& token : tokens) { + if (token.empty()) { + result += '\n'; + } else if (token == ":") { + result += ':'; + } else { + if (!result.empty() && result.back() != '\n') { + result += " \\\n "; + } + result += escape_filename(token); + } + } + if (!result.empty() && result.back() != '\n') { + result += '\n'; + } return result; } diff --git a/src/ccache/Depfile.hpp b/src/ccache/Depfile.hpp index 85393cd0..bb0ac6d5 100644 --- a/src/ccache/Depfile.hpp +++ b/src/ccache/Depfile.hpp @@ -34,8 +34,11 @@ std::optional rewrite_source_paths(const Context& ctx, void make_paths_relative_in_output_dep(const Context& ctx); -// Tokenize `file_content` into a list of files, where the first token is the -// target and ends with a colon. -std::vector tokenize(std::string_view file_content); +// Split `text` into tokens. A colon token delimits the target tokens from +// dependency tokens. An empty token marks the end of an entry. +std::vector tokenize(std::string_view text); + +// Return text from `tokens` that originate from `tokenize`. +std::string untokenize(const std::vector& tokens); } // namespace Depfile diff --git a/src/ccache/ccache.cpp b/src/ccache/ccache.cpp index 60bb154e..a6beb202 100644 --- a/src/ccache/ccache.cpp +++ b/src/ccache/ccache.cpp @@ -647,12 +647,18 @@ result_key_from_depfile(Context& ctx, Hash& hash) return tl::unexpected(Statistic::bad_input_file); } + bool seen_colon = false; for (std::string_view token : Depfile::tokenize(*file_content)) { - if (util::ends_with(token, ":")) { + if (token.empty()) { + seen_colon = false; continue; } - std::string path = Util::make_relative_path(ctx, token); - TRY(remember_include_file(ctx, path, hash, false, &hash)); + if (seen_colon) { + std::string path = Util::make_relative_path(ctx, token); + TRY(remember_include_file(ctx, path, hash, false, &hash)); + } else if (token == ":") { + seen_colon = true; + } } // Explicitly check the .gch/.pch/.pth file as it may not be mentioned in the diff --git a/unittest/test_Depfile.cpp b/unittest/test_Depfile.cpp index 3296750a..6e5d91a9 100644 --- a/unittest/test_Depfile.cpp +++ b/unittest/test_Depfile.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -31,6 +32,7 @@ namespace fs = util::filesystem; using TestUtil::TestContext; +using pstr = util::PathString; TEST_SUITE_BEGIN("Depfile"); @@ -40,6 +42,7 @@ TEST_CASE("Depfile::escape_filename") CHECK(Depfile::escape_filename("foo") == "foo"); CHECK(Depfile::escape_filename("foo\\bar") == "foo\\\\bar"); CHECK(Depfile::escape_filename("foo#bar") == "foo\\#bar"); + CHECK(Depfile::escape_filename("foo:bar") == "foo\\:bar"); CHECK(Depfile::escape_filename("foo bar") == "foo\\ bar"); CHECK(Depfile::escape_filename("foo\tbar") == "foo\\\tbar"); CHECK(Depfile::escape_filename("foo$bar") == "foo$$bar"); @@ -51,10 +54,14 @@ TEST_CASE("Depfile::rewrite_source_paths") const fs::path cwd = ctx.actual_cwd; - const auto content = - FMT("{0}/foo.o {0}/foo.o: bar.c {0}/bar.h \\\n\n {1}/fie.h {0}/fum.h\n", - cwd, - cwd.parent_path()); + const auto content = FMT( + "{0}/foo.o: bar.c \\\n" + " {0}/bar/bar.h {1}/fie.h \\\n" + "\n" + " {0}/bar/bar.h: \n" + " {1}/fie.h:\n", + Depfile::escape_filename(pstr(cwd).str()), + Depfile::escape_filename(pstr(cwd.parent_path()).str())); SUBCASE("Base directory not in dep file content") { @@ -74,10 +81,15 @@ TEST_CASE("Depfile::rewrite_source_paths") { ctx.config.set_base_dir(cwd.string()); const auto actual = Depfile::rewrite_source_paths(ctx, content); - const auto expected = - FMT("{0}/foo.o {0}/foo.o: bar.c ./bar.h \\\n\n {1}/fie.h ./fum.h\n", - cwd, - cwd.parent_path()); + const auto expected = FMT( + "{0}/foo.o: \\\n" + " bar.c \\\n" + " ./bar/bar.h \\\n" + " {1}/fie.h\n" + "./bar/bar.h:\n" + "{1}/fie.h:\n", + Depfile::escape_filename(pstr(cwd).str()), + Depfile::escape_filename(pstr(cwd.parent_path()).str())); REQUIRE(actual); CHECK(*actual == expected); } @@ -87,88 +99,98 @@ TEST_CASE("Depfile::tokenize") { SUBCASE("Empty") { - std::vector result = Depfile::tokenize(""); + auto result = Depfile::tokenize(""); CHECK(result.size() == 0); } SUBCASE("Simple") { - std::vector result = - Depfile::tokenize("cat.o: meow meow purr"); - REQUIRE(result.size() == 4); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o: meow meow purr"); + REQUIRE(result.size() == 6); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); CHECK(result[2] == "meow"); - CHECK(result[3] == "purr"); + CHECK(result[3] == "meow"); + CHECK(result[4] == "purr"); + CHECK(result[5] == ""); } SUBCASE("Dollar sign followed by a dollar sign") { - std::vector result = Depfile::tokenize("cat.o: meow$$"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow$"); + auto result = Depfile::tokenize("cat.o: meow$$"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow$"); + CHECK(result[3] == ""); } SUBCASE("Dollar sign followed by an alphabet") { - std::vector result = Depfile::tokenize("cat.o: meow$w"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow$w"); + auto result = Depfile::tokenize("cat.o: meow$w"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow$w"); + CHECK(result[3] == ""); } SUBCASE("Backslash followed by a number sign or a colon") { - std::vector result = - Depfile::tokenize("cat.o: meow\\# meow\\:"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow#"); - CHECK(result[2] == "meow:"); + auto result = Depfile::tokenize("cat.o: meow\\# meow\\:"); + REQUIRE(result.size() == 5); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow#"); + CHECK(result[3] == "meow:"); + CHECK(result[4] == ""); } SUBCASE("Backslash followed by an alphabet") { - std::vector result = - Depfile::tokenize("cat.o: meow\\w purr\\r"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow\\w"); - CHECK(result[2] == "purr\\r"); + auto result = Depfile::tokenize("cat.o: meow\\w purr\\r"); + REQUIRE(result.size() == 5); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow\\w"); + CHECK(result[3] == "purr\\r"); + CHECK(result[4] == ""); } SUBCASE("Backslash followed by a space or a tab") { - std::vector result = - Depfile::tokenize("cat.o: meow\\ meow purr\\\tpurr"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow meow"); - CHECK(result[2] == "purr\tpurr"); + auto result = Depfile::tokenize("cat.o: meow\\ meow purr\\\tpurr"); + REQUIRE(result.size() == 5); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow meow"); + CHECK(result[3] == "purr\tpurr"); + CHECK(result[4] == ""); } SUBCASE("Backslashes followed by a space or a tab") { - std::vector result = - Depfile::tokenize("cat.o: meow\\\\\\ meow purr\\\\ purr"); - REQUIRE(result.size() == 4); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow\\ meow"); - CHECK(result[2] == "purr\\"); - CHECK(result[3] == "purr"); + auto result = Depfile::tokenize("cat.o: meow\\\\\\ meow purr\\\\ purr"); + REQUIRE(result.size() == 6); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow\\ meow"); + CHECK(result[3] == "purr\\"); + CHECK(result[4] == "purr"); + CHECK(result[5] == ""); } SUBCASE("Backslash newline") { - std::vector result = - Depfile::tokenize("cat.o: meow\\\nmeow\\\n purr\\\n\tpurr"); - REQUIRE(result.size() == 5); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o: meow\\\nmeow\\\n purr\\\n\tpurr"); + REQUIRE(result.size() == 7); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); CHECK(result[2] == "meow"); - CHECK(result[3] == "purr"); + CHECK(result[3] == "meow"); CHECK(result[4] == "purr"); + CHECK(result[5] == "purr"); + CHECK(result[6] == ""); } SUBCASE("Newlines") @@ -176,210 +198,290 @@ TEST_CASE("Depfile::tokenize") // This is an invalid dependency file since it has multiple lines without // backslash, which is not valid Makefile syntax. However, the // Depfile::tokenize's simplistic parser accepts them. - std::vector result = - Depfile::tokenize("cat.o: meow\nmeow\npurr\n"); - REQUIRE(result.size() == 4); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o: meow\nmeow\npurr\n"); + REQUIRE(result.size() == 8); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); CHECK(result[2] == "meow"); - CHECK(result[3] == "purr"); + CHECK(result[3] == ""); + CHECK(result[4] == "meow"); + CHECK(result[5] == ""); + CHECK(result[6] == "purr"); + CHECK(result[7] == ""); + } + + SUBCASE("Multiple entries") + { + auto result = Depfile::tokenize( + "foo.o bar.o: a.h \\\n" + " b.h\\\n" + " c.h\n" + "a.h:\n" + " b.h:"); // Intentionally no newline + REQUIRE(result.size() == 13); + CHECK(result[0] == "foo.o"); + CHECK(result[1] == "bar.o"); + CHECK(result[2] == ":"); + CHECK(result[3] == "a.h"); + CHECK(result[4] == "b.h"); + CHECK(result[5] == "c.h"); + CHECK(result[6] == ""); + CHECK(result[7] == "a.h"); + CHECK(result[8] == ":"); + CHECK(result[9] == ""); + CHECK(result[10] == "b.h"); + CHECK(result[11] == ":"); + CHECK(result[12] == ""); } SUBCASE("Trailing dollar sign") { - std::vector result = Depfile::tokenize("cat.o: meow$"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow$"); + auto result = Depfile::tokenize("cat.o: meow$"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow$"); + CHECK(result[3] == ""); } SUBCASE("Trailing backslash") { - std::vector result = Depfile::tokenize("cat.o: meow\\"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow\\"); + auto result = Depfile::tokenize("cat.o: meow\\"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow\\"); + CHECK(result[3] == ""); } SUBCASE("Trailing backslash newline") { - std::vector result = Depfile::tokenize("cat.o: meow\\\n"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o: meow\\\n"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("Space before the colon but not after") { - std::vector result = Depfile::tokenize("cat.o :meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o :meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("Space around the colon") { - std::vector result = Depfile::tokenize("cat.o : meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o : meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("No space between colon and dependency") { - std::vector result = Depfile::tokenize("cat.o:meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("cat.o:meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename (with backslashes in target)") { - std::vector result = Depfile::tokenize("e:\\cat.o: meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "e:\\cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("e:\\cat.o: meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "e:\\cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename (with backslashes in prerequisite)") { - std::vector result = - Depfile::tokenize("cat.o: c:\\meow\\purr"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "c:\\meow\\purr"); + auto result = Depfile::tokenize("cat.o: c:\\meow\\purr"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:\\meow\\purr"); + CHECK(result[3] == ""); } SUBCASE("Windows filename (with slashes in target)") { - std::vector result = Depfile::tokenize("e:/cat.o: meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "e:/cat.o:"); - CHECK(result[1] == "meow"); + auto result = Depfile::tokenize("e:/cat.o: meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "e:/cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename (with slashes in prerequisite)") { - std::vector result = Depfile::tokenize("cat.o: c:/meow/purr"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "c:/meow/purr"); - } - - SUBCASE("Windows filename (with slashes and trailing colon)") - { - std::vector result = Depfile::tokenize("cat.o: c: /meow/purr"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "cat.o:"); - CHECK(result[1] == "c:"); - CHECK(result[2] == "/meow/purr"); + auto result = Depfile::tokenize("cat.o: c:/meow/purr"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:/meow/purr"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:/meow") { - std::vector result = Depfile::tokenize("cat:/meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "/meow"); + auto result = Depfile::tokenize("cat:/meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "/meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:\\meow") { - std::vector result = Depfile::tokenize("cat:\\meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "\\meow"); + auto result = Depfile::tokenize("cat:\\meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "\\meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:\\ meow") { - std::vector result = Depfile::tokenize("cat:\\ meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == " meow"); + auto result = Depfile::tokenize("cat:\\ meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == " meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:c:/meow") { - std::vector result = Depfile::tokenize("cat:c:/meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:/meow"); + auto result = Depfile::tokenize("cat:c:/meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:/meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:c:\\meow") { - std::vector result = Depfile::tokenize("cat:c:\\meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:\\meow"); + auto result = Depfile::tokenize("cat:c:\\meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:\\meow"); + CHECK(result[3] == ""); } + // Invalid pattern but tested for documentative purposes. SUBCASE("Windows filename: cat:c:") { - std::vector result = Depfile::tokenize("cat:c:"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:"); + auto result = Depfile::tokenize("cat:c:"); + REQUIRE(result.size() == 5); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c"); + CHECK(result[3] == ":"); + CHECK(result[4] == ""); } + // Invalid pattern but tested for documentative purposes. SUBCASE("Windows filename: cat:c:\\") { - std::vector result = Depfile::tokenize("cat:c:\\"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:\\"); + auto result = Depfile::tokenize("cat:c:\\"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:\\"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: cat:c:/") { - std::vector result = Depfile::tokenize("cat:c:/"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:/"); + auto result = Depfile::tokenize("cat:c:/"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:/"); + CHECK(result[3] == ""); } + // Invalid pattern but tested for documentative purposes. SUBCASE("Windows filename: cat:c:meow") { - std::vector result = Depfile::tokenize("cat:c:meow"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "cat:"); - CHECK(result[1] == "c:"); - CHECK(result[2] == "meow"); + auto result = Depfile::tokenize("cat:c:meow"); + REQUIRE(result.size() == 6); + CHECK(result[0] == "cat"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c"); + CHECK(result[3] == ":"); + CHECK(result[4] == "meow"); + CHECK(result[5] == ""); } SUBCASE("Windows filename: c:c:/meow") { - std::vector result = Depfile::tokenize("c:c:/meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "c:"); - CHECK(result[1] == "c:/meow"); + auto result = Depfile::tokenize("c:c:/meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "c"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:/meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: c:c:\\meow") { - std::vector result = Depfile::tokenize("c:c:\\meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "c:"); - CHECK(result[1] == "c:\\meow"); + auto result = Depfile::tokenize("c:c:\\meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "c"); + CHECK(result[1] == ":"); + CHECK(result[2] == "c:\\meow"); + CHECK(result[3] == ""); } SUBCASE("Windows filename: c:z:\\meow") { - std::vector result = Depfile::tokenize("c:z:\\meow"); - REQUIRE(result.size() == 2); - CHECK(result[0] == "c:"); - CHECK(result[1] == "z:\\meow"); + auto result = Depfile::tokenize("c:z:\\meow"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "c"); + CHECK(result[1] == ":"); + CHECK(result[2] == "z:\\meow"); + CHECK(result[3] == ""); } + // Invalid pattern but tested for documentative purposes. SUBCASE("Windows filename: c:cd:\\meow") { - std::vector result = Depfile::tokenize("c:cd:\\meow"); - REQUIRE(result.size() == 3); - CHECK(result[0] == "c:"); - CHECK(result[1] == "cd:"); - CHECK(result[2] == "\\meow"); + auto result = Depfile::tokenize("c:cd:\\meow"); + REQUIRE(result.size() == 6); + CHECK(result[0] == "c"); + CHECK(result[1] == ":"); + CHECK(result[2] == "cd"); + CHECK(result[3] == ":"); + CHECK(result[4] == "\\meow"); + CHECK(result[5] == ""); } } +TEST_CASE("Depfile::untokenize") +{ + CHECK(Depfile::untokenize({}) == ""); + CHECK(Depfile::untokenize({"foo.o"}) == "foo.o\n"); + CHECK(Depfile::untokenize({"foo.o", ":"}) == "foo.o:\n"); + CHECK(Depfile::untokenize({"foo.o", ":", "bar.h"}) + == ("foo.o: \\\n" + " bar.h\n")); +} + TEST_SUITE_END();