From 1d2b5bf4da8ebf2afaa492d4b9db6fdd578f5331 Mon Sep 17 00:00:00 2001 From: Yoshimasa Niwa Date: Wed, 18 Nov 2020 12:18:18 -0800 Subject: [PATCH] Fix result_name_from_depfile by parsing depfile in Makefile syntax (#722) --- src/ccache.cpp | 80 ++++++++++++++++++++++++- src/ccache.hpp | 2 + unittest/test_ccache.cpp | 124 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 2 deletions(-) diff --git a/src/ccache.cpp b/src/ccache.cpp index 6b0b822c4..95c443b4a 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -707,6 +707,82 @@ use_relative_paths_in_depfile(const Context& ctx) } } +static inline bool +is_blank(const std::string& s) +{ + return std::all_of(s.begin(), s.end(), [](char c) { return isspace(c); }); +} + +std::vector +parse_depfile(string_view file_content) +{ + std::vector result; + + // A depfile is formatted with Makefile syntax. + // This is not perfect parser, however enough for parsing a regular depfile. + const size_t length = file_content.size(); + std::string token; + size_t p{0}; + while (p < length) { + // Each token is separated by spaces. + if (isspace(file_content[p])) { + while (p < length && isspace(file_content[p])) { + p++; + } + if (!is_blank(token)) { + result.push_back(token); + } + token.clear(); + continue; + } + + char c{file_content[p]}; + switch (c) { + case '\\': + if (p + 1 < length) { + const char next{file_content[p + 1]}; + switch (next) { + // A backspace can be followed by next characters and leave them as-is. + case '\\': + case '#': + case ':': + case ' ': + case '\t': + c = next; + p++; + break; + // For this parser, it can treat a backslash-newline as just a space. + // Therefore simply skip a backslash. + case '\n': + p++; + continue; + } + } + break; + case '$': + if (p + 1 < length) { + const char next{file_content[p + 1]}; + switch (next) { + // A dollar sign can be followed by a dollar sign and leave it as-is. + case '$': + c = next; + p++; + break; + } + } + break; + } + + token.push_back(c); + p++; + } + if (!is_blank(token)) { + result.push_back(token); + } + + return result; +} + // Extract the used includes from the dependency file. Note that we cannot // distinguish system headers from other includes here. static optional @@ -721,8 +797,8 @@ result_name_from_depfile(Context& ctx, Hash& hash) return nullopt; } - for (string_view token : Util::split_into_views(file_content, " \t\r\n")) { - if (token == "\\" || token.ends_with(":")) { + for (string_view token : parse_depfile(file_content)) { + if (token.ends_with(":")) { continue; } if (!ctx.has_absolute_include_headers) { diff --git a/src/ccache.hpp b/src/ccache.hpp index 924937adf..2a74a6995 100644 --- a/src/ccache.hpp +++ b/src/ccache.hpp @@ -28,6 +28,7 @@ #include #include +#include class Context; @@ -64,3 +65,4 @@ void find_compiler(Context& ctx, CompilerType guess_compiler(nonstd::string_view path); nonstd::optional rewrite_dep_file_paths(const Context& ctx, const std::string& file_content); +std::vector parse_depfile(nonstd::string_view file_content); diff --git a/unittest/test_ccache.cpp b/unittest/test_ccache.cpp index ad77009f4..3601e4e41 100644 --- a/unittest/test_ccache.cpp +++ b/unittest/test_ccache.cpp @@ -236,4 +236,128 @@ TEST_CASE("rewrite_dep_file_paths") } } +TEST_CASE("parse_depfile") +{ + SUBCASE("Parse empty depfile") + { + std::vector result = parse_depfile(""); + CHECK(result.size() == 0); + } + + SUBCASE("Parse simple depfile") + { + std::vector result = parse_depfile("cat.o: meow meow purr"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow"); + CHECK(result[2] == "meow"); + CHECK(result[3] == "purr"); + } + + SUBCASE("Parse depfile with a dollar sign followed by a dollar sign") + { + std::vector result = parse_depfile("cat.o: meow$$"); + REQUIRE(result.size() == 2); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow$"); + } + + SUBCASE("Parse depfile with a dollar sign followed by an alphabet") + { + std::vector result = parse_depfile("cat.o: meow$w"); + REQUIRE(result.size() == 2); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow$w"); + } + + SUBCASE("Parse depfile with a backslash followed by a number sign or a colon") + { + std::vector result = parse_depfile("cat.o: meow\\# meow\\:"); + REQUIRE(result.size() == 3); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow#"); + CHECK(result[2] == "meow:"); + } + + SUBCASE("Parse depfile with a backslash followed by an alphabet") + { + std::vector result = parse_depfile("cat.o: meow\\w purr\\r"); + REQUIRE(result.size() == 3); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow\\w"); + CHECK(result[2] == "purr\\r"); + } + + SUBCASE("Parse depfile with a backslash followed by a space or a tab") + { + std::vector result = + parse_depfile("cat.o: meow\\ meow purr\\\tpurr"); + REQUIRE(result.size() == 3); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow meow"); + CHECK(result[2] == "purr\tpurr"); + } + + SUBCASE("Parse depfile with backslashes followed by a space or a tab") + { + std::vector result = + parse_depfile("cat.o: meow\\\\\\ meow purr\\\\ purr"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow\\ meow"); + CHECK(result[2] == "purr\\"); + CHECK(result[3] == "purr"); + } + + SUBCASE("Parse depfile with a backslash newline") + { + std::vector result = + parse_depfile("cat.o: meow\\\nmeow\\\n purr\\\n\tpurr"); + REQUIRE(result.size() == 5); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow"); + CHECK(result[2] == "meow"); + CHECK(result[3] == "purr"); + CHECK(result[4] == "purr"); + } + + SUBCASE("Parse depfile with a new line") + { + // This is invalid depfile because it has multiple lines without backslash, + // which is not valid in Makefile syntax. + // However, parse_depfile is parsing it to each token, which is expected. + std::vector result = + parse_depfile("cat.o: meow\nmeow\npurr\n"); + REQUIRE(result.size() == 4); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow"); + CHECK(result[2] == "meow"); + CHECK(result[3] == "purr"); + } + + SUBCASE("Parse depfile with a trailing dollar sign") + { + std::vector result = parse_depfile("cat.o: meow$"); + REQUIRE(result.size() == 2); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow$"); + } + + SUBCASE("Parse depfile with a trailing backslash") + { + std::vector result = parse_depfile("cat.o: meow\\"); + REQUIRE(result.size() == 2); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow\\"); + } + + SUBCASE("Parse depfile with a trailing backslash newline") + { + std::vector result = parse_depfile("cat.o: meow\\\n"); + REQUIRE(result.size() == 2); + CHECK(result[0] == "cat.o:"); + CHECK(result[1] == "meow"); + } +} + TEST_SUITE_END(); -- 2.47.3