From: Joel Rosdahl Date: Tue, 11 Nov 2025 19:09:42 +0000 (+0100) Subject: enhance: Add util::SimpleJsonParser X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=74e48c17ed4c23aac378c6f598d38d62be66001d;p=thirdparty%2Fccache.git enhance: Add util::SimpleJsonParser --- diff --git a/src/ccache/util/CMakeLists.txt b/src/ccache/util/CMakeLists.txt index 7fe6c1fb..494457be 100644 --- a/src/ccache/util/CMakeLists.txt +++ b/src/ccache/util/CMakeLists.txt @@ -13,6 +13,7 @@ set( file.cpp filelock.cpp filesystem.cpp + json.cpp lockfile.cpp logging.cpp longlivedlockfilemanager.cpp diff --git a/src/ccache/util/json.cpp b/src/ccache/util/json.cpp new file mode 100644 index 00000000..676fad5d --- /dev/null +++ b/src/ccache/util/json.cpp @@ -0,0 +1,364 @@ +// Copyright (C) 2025 Joel Rosdahl and other contributors +// +// See doc/authors.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "json.hpp" + +#include +#include + +namespace { + +struct ParseState +{ + std::string_view doc; + size_t pos; +}; + +void +skip_whitespace(ParseState& state) +{ + while (state.pos < state.doc.size() && util::is_space(state.doc[state.pos])) { + ++state.pos; + } +} + +tl::expected +parse_string(ParseState& state) +{ + if (state.pos >= state.doc.size() || state.doc[state.pos] != '"') { + return tl::unexpected("Expected string"); + } + ++state.pos; // Skip opening '"' + + std::string result; + while (state.pos < state.doc.size()) { + char ch = state.doc[state.pos]; + + if (ch == '"') { + ++state.pos; // Skip closing '"' + return result; + } + + if (ch == '\\') { + ++state.pos; + if (state.pos >= state.doc.size()) { + return tl::unexpected("Unexpected end of string"); + } + + char escaped = state.doc[state.pos]; + switch (escaped) { + case '"': + case '\\': + case '/': + result += escaped; + break; + case 'b': + result += '\b'; + break; + case 'f': + result += '\f'; + break; + case 'n': + result += '\n'; + break; + case 'r': + result += '\r'; + break; + case 't': + result += '\t'; + break; + case 'u': + return tl::unexpected("\\uXXXX escape sequences are not supported"); + default: + return tl::unexpected(FMT("Unknown escape sequence: \\{}", escaped)); + } + ++state.pos; + } else { + result += ch; + ++state.pos; + } + } + + return tl::unexpected("Unterminated string"); +} + +void +skip_primitive(ParseState& state) +{ + // Skip numbers, true, false, null + while (state.pos < state.doc.size()) { + char ch = state.doc[state.pos]; + if (util::is_space(ch) || ch == ',' || ch == '}' || ch == ']') { + break; + } + ++state.pos; + } +} + +tl::expected +skip_array(ParseState& state) +{ + if (state.pos >= state.doc.size() || state.doc[state.pos] != '[') { + return tl::unexpected("Expected array"); + } + ++state.pos; // Skip '[' + + int depth = 1; + while (state.pos < state.doc.size() && depth > 0) { + char ch = state.doc[state.pos]; + if (ch == '"') { + auto str_result = parse_string(state); // Parse and discard + if (!str_result) { + return tl::unexpected(str_result.error()); + } + } else if (ch == '[') { + ++depth; + ++state.pos; + } else if (ch == ']') { + --depth; + ++state.pos; + } else { + ++state.pos; + } + } + + if (depth != 0) { + return tl::unexpected("Unterminated array"); + } + return {}; +} + +tl::expected +skip_object(ParseState& state) +{ + if (state.pos >= state.doc.size() || state.doc[state.pos] != '{') { + return tl::unexpected("Expected object"); + } + ++state.pos; // Skip '{' + + int depth = 1; + while (state.pos < state.doc.size() && depth > 0) { + char ch = state.doc[state.pos]; + if (ch == '"') { + auto str_result = parse_string(state); // Parse and discard + if (!str_result) { + return tl::unexpected(str_result.error()); + } + } else if (ch == '{') { + ++depth; + ++state.pos; + } else if (ch == '}') { + --depth; + ++state.pos; + } else { + ++state.pos; + } + } + + if (depth != 0) { + return tl::unexpected("Unterminated object"); + } + return {}; +} + +tl::expected +skip_value(ParseState& state) +{ + if (state.pos >= state.doc.size()) { + return tl::unexpected("Unexpected end of document"); + } + + char ch = state.doc[state.pos]; + + if (ch == '"') { + auto str_result = parse_string(state); // Parse and discard + if (!str_result) { + return tl::unexpected(str_result.error()); + } + } else if (ch == '{') { + auto obj_result = skip_object(state); + if (!obj_result) { + return tl::unexpected(obj_result.error()); + } + } else if (ch == '[') { + auto arr_result = skip_array(state); + if (!arr_result) { + return tl::unexpected(arr_result.error()); + } + } else if (ch == 't' || ch == 'f' || ch == 'n' || ch == '-' + || util::is_digit(ch)) { + skip_primitive(state); + } else { + return tl::unexpected(FMT("Unexpected character: '{}'", ch)); + } + return {}; +} + +tl::expected +navigate_to_key(ParseState& state, std::string_view key) +{ + if (state.pos >= state.doc.size() || state.doc[state.pos] != '{') { + return tl::unexpected("Expected object"); + } + ++state.pos; // Skip '{' + + while (true) { + skip_whitespace(state); + + if (state.pos >= state.doc.size()) { + return tl::unexpected(FMT("Key '{}' not found", key)); + } + + if (state.doc[state.pos] == '}') { + return tl::unexpected(FMT("Key '{}' not found", key)); + } + + if (state.doc[state.pos] != '"') { + return tl::unexpected("Expected string key"); + } + auto current_key_result = parse_string(state); + if (!current_key_result) { + return tl::unexpected(current_key_result.error()); + } + + skip_whitespace(state); + if (state.pos >= state.doc.size() || state.doc[state.pos] != ':') { + return tl::unexpected("Expected ':' after key"); + } + ++state.pos; // Skip ':' + + skip_whitespace(state); + + if (*current_key_result == key) { + return {}; // Found the key, state.pos is now at the value + } + + auto skip_result = skip_value(state); + if (!skip_result) { + return tl::unexpected(skip_result.error()); + } + + skip_whitespace(state); + if (state.pos < state.doc.size() && state.doc[state.pos] == ',') { + ++state.pos; // Skip comma + } + } +} + +tl::expected, std::string> +parse_string_array(ParseState& state) +{ + if (state.pos >= state.doc.size() || state.doc[state.pos] != '[') { + return tl::unexpected("Expected array"); + } + ++state.pos; // Skip '[' + + std::vector result; + + while (true) { + skip_whitespace(state); + + if (state.pos >= state.doc.size()) { + return tl::unexpected("Unterminated array"); + } + + if (state.doc[state.pos] == ']') { + ++state.pos; // Skip ']' + return result; + } + + if (state.doc[state.pos] != '"') { + return tl::unexpected("Expected string in array"); + } + + auto str_result = parse_string(state); + if (!str_result) { + return tl::unexpected(str_result.error()); + } + result.push_back(*str_result); + + skip_whitespace(state); + + if (state.pos >= state.doc.size()) { + return tl::unexpected("Unterminated array"); + } + + if (state.doc[state.pos] == ',') { + ++state.pos; // Skip comma + } else if (state.doc[state.pos] != ']') { + return tl::unexpected("Expected ',' or ']' in array"); + } + } +} + +} // namespace + +namespace util { + +SimpleJsonParser::SimpleJsonParser(std::string_view document) + : m_document(document) +{ +} + +tl::expected, std::string> +SimpleJsonParser::get_string_array(std::string_view filter) const +{ + if (filter.empty() || filter[0] != '.') { + return tl::unexpected("Invalid filter: must start with '.'"); + } + + // Parse filter path, e.g. ".Data.Includes" -> ["Data", "Includes"]. + auto path = split_into_views(filter.substr(1), "."); + if (path.empty()) { + return tl::unexpected("Empty filter path"); + } + + ParseState state{m_document, 0}; + skip_whitespace(state); + + if (state.pos >= state.doc.size() || state.doc[state.pos] != '{') { + return tl::unexpected("Expected object at root"); + } + + // Navigate through nested objects. + for (size_t i = 0; i < path.size() - 1; ++i) { + auto nav_result = navigate_to_key(state, path[i]); + if (!nav_result) { + return tl::unexpected(nav_result.error()); + } + skip_whitespace(state); + if (state.pos >= state.doc.size() || state.doc[state.pos] != '{') { + return tl::unexpected(FMT("Expected object for key '{}'", path[i])); + } + } + + // Navigate to the final key which should contain an array. + auto nav_result = navigate_to_key(state, path.back()); + if (!nav_result) { + return tl::unexpected(nav_result.error()); + } + skip_whitespace(state); + + if (state.pos >= state.doc.size() || state.doc[state.pos] != '[') { + return tl::unexpected(FMT("Expected array for key '{}'", path.back())); + } + + return parse_string_array(state); +} + +} // namespace util diff --git a/src/ccache/util/json.hpp b/src/ccache/util/json.hpp new file mode 100644 index 00000000..4b7f9e5e --- /dev/null +++ b/src/ccache/util/json.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2025 Joel Rosdahl and other contributors +// +// See doc/authors.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include + +#include +#include +#include + +namespace util { + +// Simple JSON parser that is tailored for parsing MSVC's /sourceDependencies +// files. +// +// Does not support \uXXXX escapes and lots of other things. +class SimpleJsonParser +{ +public: + explicit SimpleJsonParser(std::string_view document); + + // Extract array of strings from the document. `filter` is a jq-like filter + // (e.g. ".Data.Includes") that locates the string array to extract. The + // filter syntax currently only supports nested objects. + tl::expected, std::string> + get_string_array(std::string_view filter) const; + +private: + std::string_view m_document; +}; + +} // namespace util diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 98e9fdc8..84dbe706 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -28,6 +28,7 @@ set( test_util_exec.cpp test_util_expected.cpp test_util_file.cpp + test_util_json.cpp test_util_lockfile.cpp test_util_path.cpp test_util_string.cpp diff --git a/unittest/test_util_json.cpp b/unittest/test_util_json.cpp new file mode 100644 index 00000000..558484af --- /dev/null +++ b/unittest/test_util_json.cpp @@ -0,0 +1,344 @@ +// Copyright (C) 2025 Joel Rosdahl and other contributors +// +// See doc/authors.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "testutil.hpp" + +#include +#include + +#include + +TEST_SUITE_BEGIN("json"); + +TEST_CASE("SimpleJsonParser") +{ + SUBCASE("Parse MSVC /sourceDependencies file") + { + std::string json = R"({ + "Version": "1.1", + "Data": { + "Source": "C:\\path\\to\\source.cpp", + "ProvidedModule": "", + "Includes": [ + "C:\\path\\to\\header\"with\"quotes.h", + "C:\\path\\to\\header\\with\\backslashes.h", + "C:\\日本語\\header1.h" + ] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 3); + CHECK((*includes)[0] == "C:\\path\\to\\header\"with\"quotes.h"); + CHECK((*includes)[1] == "C:\\path\\to\\header\\with\\backslashes.h"); + CHECK((*includes)[2] == "C:\\日本語\\header1.h"); + } + + SUBCASE("Empty array") + { + std::string json = R"({ + "Data": { + "Includes": [] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + CHECK(includes->empty()); + } + + SUBCASE("Single element array") + { + std::string json = R"({ + "Data": { + "Includes": ["single.h"] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 1); + CHECK((*includes)[0] == "single.h"); + } + + SUBCASE("Array with whitespace variations") + { + std::string json = R"({ +"Data":{"Includes":["a.h" , "b.h","c.h"]} +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 3); + CHECK((*includes)[0] == "a.h"); + CHECK((*includes)[1] == "b.h"); + CHECK((*includes)[2] == "c.h"); + } + + SUBCASE("Escape sequences") + { + std::string json = R"({ + "Data": { + "Includes": [ + "path\\with\\backslashes", + "string\"with\"quotes", + "line1\nline2", + "tab\tseparated", + "carriage\rreturn", + "form\ffeed", + "back\bspace", + "forward/slash" + ] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 8); + CHECK((*includes)[0] == "path\\with\\backslashes"); + CHECK((*includes)[1] == "string\"with\"quotes"); + CHECK((*includes)[2] == "line1\nline2"); + CHECK((*includes)[3] == "tab\tseparated"); + CHECK((*includes)[4] == "carriage\rreturn"); + CHECK((*includes)[5] == "form\ffeed"); + CHECK((*includes)[6] == "back\bspace"); + CHECK((*includes)[7] == "forward/slash"); + } + + SUBCASE("UTF-8 characters") + { + std::string json = R"({ + "Data": { + "Includes": [ + "日本語.h", + "中文.cpp", + "한글.hpp", + "emoji😀.c", + "Ελληνικά.h" + ] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 5); + CHECK((*includes)[0] == "日本語.h"); + CHECK((*includes)[1] == "中文.cpp"); + CHECK((*includes)[2] == "한글.hpp"); + CHECK((*includes)[3] == "emoji😀.c"); + CHECK((*includes)[4] == "Ελληνικά.h"); + } + + SUBCASE("Nested objects") + { + std::string json = R"({ + "Level1": { + "Level2": { + "Level3": { + "Files": ["deep.h"] + } + } + } +})"; + + util::SimpleJsonParser parser(json); + auto files = parser.get_string_array(".Level1.Level2.Level3.Files"); + REQUIRE(files); + REQUIRE(files->size() == 1); + CHECK((*files)[0] == "deep.h"); + } + + SUBCASE("Object with multiple keys") + { + std::string json = R"({ + "Version": "1.0", + "Data": { + "Source": "main.cpp", + "Includes": ["header.h"], + "Flags": ["-O2", "-Wall"] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 1); + CHECK((*includes)[0] == "header.h"); + + util::SimpleJsonParser parser2(json); + auto flags = parser2.get_string_array(".Data.Flags"); + REQUIRE(flags); + REQUIRE(flags->size() == 2); + CHECK((*flags)[0] == "-O2"); + CHECK((*flags)[1] == "-Wall"); + } + + SUBCASE("Skip non-target values") + { + std::string json = R"({ + "Other": { + "NestedArray": [1, 2, 3], + "NestedObject": {"key": "value"} + }, + "Data": { + "Includes": ["target.h"] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 1); + CHECK((*includes)[0] == "target.h"); + } + + SUBCASE("Empty strings in array") + { + std::string json = R"({ + "Data": { + "Includes": ["", "file.h", ""] + } +})"; + + util::SimpleJsonParser parser(json); + auto includes = parser.get_string_array(".Data.Includes"); + REQUIRE(includes); + REQUIRE(includes->size() == 3); + CHECK((*includes)[0] == ""); + CHECK((*includes)[1] == "file.h"); + CHECK((*includes)[2] == ""); + } + + SUBCASE("Error: Invalid filter (no leading dot)") + { + std::string json = R"({"Data": {"Includes": []}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array("Data.Includes"); + CHECK(!result); + CHECK(result.error() == "Invalid filter: must start with '.'"); + } + + SUBCASE("Error: Invalid filter (empty)") + { + std::string json = R"({"Data": {"Includes": []}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(""); + CHECK(!result); + CHECK(result.error() == "Invalid filter: must start with '.'"); + } + + SUBCASE("Error: Key not found") + { + std::string json = R"({"Data": {"Other": []}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error().find("not found") != std::string::npos); + } + + SUBCASE("Error: Not an array") + { + std::string json = R"({"Data": {"Includes": "not-an-array"}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error().find("Expected array") != std::string::npos); + } + + SUBCASE("Error: Not an object") + { + std::string json = R"({"Data": "not-an-object"})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error().find("Expected object") != std::string::npos); + } + + SUBCASE("Error: Unterminated string") + { + std::string json = R"({"Data": {"Includes": ["unterminated]}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "Unterminated string"); + } + + SUBCASE("Error: Unterminated array") + { + std::string json = R"({"Data": {"Includes": ["file.h")"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "Unterminated array"); + } + + SUBCASE("Error: Invalid escape sequence") + { + std::string json = R"({"Data": {"Includes": ["invalid\xescape"]}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error().find("Unknown escape sequence") != std::string::npos); + } + + SUBCASE("Error: \\uXXXX escape sequence not supported") + { + std::string json = R"({"Data": {"Includes": ["unicode\u0041char"]}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "\\uXXXX escape sequences are not supported"); + } + + SUBCASE("Error: \\uXXXX in nested object") + { + std::string json = + R"({"Data": {"Key": "value\u1234", "Includes": ["file.h"]}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "\\uXXXX escape sequences are not supported"); + } + + SUBCASE("Error: Root is not an object") + { + std::string json = R"(["array", "at", "root"])"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "Expected object at root"); + } + + SUBCASE("Error: Non-string element in array") + { + std::string json = R"({"Data": {"Includes": ["file.h", 123]}})"; + util::SimpleJsonParser parser(json); + auto result = parser.get_string_array(".Data.Includes"); + CHECK(!result); + CHECK(result.error() == "Expected string in array"); + } +} + +TEST_SUITE_END();