From: Joel Rosdahl Date: Mon, 13 Jul 2020 11:39:45 +0000 (+0200) Subject: Remove dependency on std::regex X-Git-Tag: v4.0~332 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a6adb502923ce83e3eef11ae15a0717af340fb8e;p=thirdparty%2Fccache.git Remove dependency on std::regex std::regex is not available for GCC 4.8. It’s also a bit bloated. The reason for not using POSIX regex functionality is that it’s not available in MinGW. --- diff --git a/src/Util.cpp b/src/Util.cpp index ebbcc483a..9218aa93f 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -26,7 +26,6 @@ #include #include -#include #ifdef HAVE_LINUX_FS_H # include @@ -44,6 +43,40 @@ using nonstd::string_view; namespace { +// Search for the first match of the following regular expression: +// +// \x1b\[[\x30-\x3f]*[\x20-\x2f]*[Km] +// +// The primary reason for not using std::regex is that it's not available for +// GCC 4.8. It's also a bit bloated. The reason for not using POSIX regex +// functionality is that it's are not available in MinGW. +string_view +find_first_ansi_csi_seq(string_view string) +{ + size_t pos = 0; + while (pos < string.length() && string[pos] != 0x1b) { + ++pos; + } + if (pos + 1 >= string.length() || string[pos + 1] != '[') { + return {}; + } + size_t start = pos; + pos += 2; + while (pos < string.length() + && (string[pos] >= 0x30 && string[pos] <= 0x3f)) { + ++pos; + } + while (pos < string.length() + && (string[pos] >= 0x20 && string[pos] <= 0x2f)) { + ++pos; + } + if (pos < string.length() && (string[pos] == 'K' || string[pos] == 'm')) { + return string.substr(start, pos + 1 - start); + } else { + return {}; + } +} + size_t path_max(const char* path) { @@ -221,29 +254,6 @@ dir_name(string_view path) } } -std::string -edit_ansi_csi_seqs(string_view string, const SubstringEditor& editor) -{ - static const std::regex csi_regex( - "\x1B\\[[\x30-\x3F]*[\x20-\x2F]*[\x40-\x7E]"); - std::string ret; - std::string substr; - ret.reserve(string.size()); - for (std::cregex_token_iterator itr( - string.begin(), string.end(), csi_regex, {-1, 0}); - itr != std::cregex_token_iterator{}; - ++itr) { - ret.append(itr->first, itr->second); - if (++itr == std::cregex_token_iterator{}) { - break; - } - substr.assign(itr->first, itr->second); - editor(itr->first - string.begin(), substr); - ret.append(substr); - } - return ret; -} - bool ends_with(string_view string, string_view suffix) { @@ -778,14 +788,25 @@ starts_with(string_view string, string_view prefix) } std::string -strip_ansi_csi_seqs(string_view string, string_view strip_actions) +strip_ansi_csi_seqs(string_view string) { - return edit_ansi_csi_seqs( - string, [=](string_view::size_type /*pos*/, std::string& substr) { - if (strip_actions.find(substr.back()) != string_view::npos) { - substr.clear(); - } - }); + size_t pos = 0; + std::string result; + + while (true) { + auto seq_span = find_first_ansi_csi_seq(string.substr(pos)); + auto data_start = string.data() + pos; + auto data_length = + seq_span.empty() ? string.length() - pos : seq_span.data() - data_start; + result.append(data_start, data_length); + if (seq_span.empty()) { + // Reached tail. + break; + } + pos += data_length + seq_span.length(); + } + + return result; } std::string diff --git a/src/Util.hpp b/src/Util.hpp index c6c2657a4..c455b2fb1 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -114,12 +114,6 @@ std::pair create_temp_fd(nonstd::string_view path_prefix); // Get directory name of path. nonstd::string_view dir_name(nonstd::string_view path); -// Returns a copy of string with any contained ANSI CSI sequences edited by the -// given SubstringEditor, which is invoked once for each ANSI CSI sequence -// encountered in string. The original string is not modified. -[[gnu::warn_unused_result]] std::string -edit_ansi_csi_seqs(nonstd::string_view string, const SubstringEditor& editor); - // Return true if suffix is a suffix of string. bool ends_with(nonstd::string_view string, nonstd::string_view suffix); @@ -321,8 +315,7 @@ bool starts_with(nonstd::string_view string, nonstd::string_view prefix); // Returns a copy of string with the specified ANSI CSI sequences removed. [[gnu::warn_unused_result]] std::string -strip_ansi_csi_seqs(nonstd::string_view string, - nonstd::string_view strip_actions = "Km"); +strip_ansi_csi_seqs(nonstd::string_view string); // Strip whitespace from left and right side of a string. [[gnu::warn_unused_result]] std::string diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index 678c8c69d..3421fcb37 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -130,58 +130,15 @@ TEST_CASE("Util::dir_name") CHECK(Util::dir_name("/foo/bar/f.txt") == "/foo/bar"); } -TEST_CASE("Util::{edit,strip}_ansi_csi_seqs") +TEST_CASE("Util::strip_ansi_csi_seqs") { - static constexpr auto input = - "Normal, " - "\x1B[K\x1B[1mbold\x1B[m, " - "\x1B[31mred\x1B[m, " - "\x1B[1;32mbold green\x1B[m.\n"; + const char input[] = + "Normal," + " \x1B[K\x1B[1mbold\x1B[m," + " \x1B[31mred\x1B[m," + " \x1B[1;32mbold green\x1B[m.\n"; - SUBCASE("Remove bold attributes") - { - CHECK(Util::edit_ansi_csi_seqs(input, - [](nonstd::string_view::size_type, - std::string& substr) - { - if (substr.size() > 3 && substr.back() == 'm') { - nonstd::string_view attrs = substr; - attrs.remove_prefix(2); // ESC [ - attrs.remove_suffix(1); // m - std::string edited; - edited.reserve(attrs.size()); - for (auto& attr : Util::split_into_views(attrs, ";")) { - if (attr != "1") { - if (!edited.empty()) { - edited += ';'; - } - edited.append(attr.begin(), attr.end()); - } - } - if (edited.empty()) { - substr.clear(); - } else { - substr.replace(2, attrs.size(), std::move(edited)); - } - } - }) == - "Normal, " - "\x1B[Kbold\x1B[m, " - "\x1B[31mred\x1B[m, " - "\x1B[32mbold green\x1B[m.\n"); - } - - SUBCASE("Strip SGR sequences only") - { - CHECK(Util::strip_ansi_csi_seqs(input, "m") - == "Normal, \x1B[Kbold, red, bold green.\n"); - } - - SUBCASE("Strip default set of CSI sequences") - { - CHECK(Util::strip_ansi_csi_seqs(input) - == "Normal, bold, red, bold green.\n"); - } + CHECK(Util::strip_ansi_csi_seqs(input) == "Normal, bold, red, bold green.\n"); } TEST_CASE("Util::ends_with")