From: David Malcolm Date: Tue, 20 Jan 2026 17:06:12 +0000 (-0500) Subject: sarif-replay: skip "sarif:/" embedded links [PR123056] X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=25910642d367097d19ea90fb2b54e2858ddd74b9;p=thirdparty%2Fgcc.git sarif-replay: skip "sarif:/" embedded links [PR123056] PR sarif-replay/123056 notes that when using sarif-replay to generate HTML from a .sarif file containing an embedded "sarif:/" link we get bogus output containing SGR codes. The links in question come from GCC's sarif output for cross-referencing event IDs within an execution path. These links are JSON pointers. I experimented with propertly supporting the JSON Pointer spec (RFC 6901) within GCC, and I have a partially working implementation which parses JSON pointers here, and, where appropriate, reconstructs the pertinent event ID. However, that feels too invasive to be pushing in stage 4. Hence for GCC 16, this patch simply skips the link part of "sarif:/" links in sarif-replay, avoiding corrupt output, deferring the more ambitious round-tripping fix to GCC 17. gcc/ChangeLog: PR sarif-replay/123056 * libsarifreplay.cc (struct embedded_link): Move decl earlier. (sarif_replayer::append_embeddded_link): New. (sarif_replayer::make_plain_text_within_result_message): Move the link-replay logic to the above, and skip the link part of intra-sarif links. gcc/testsuite/ChangeLog: PR sarif-replay/123056 * sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif: New test. * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py: New test script. * sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py: New test script. Signed-off-by: David Malcolm --- diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc index 01d3a975f62..20a0aec3c7f 100644 --- a/gcc/libsarifreplay.cc +++ b/gcc/libsarifreplay.cc @@ -292,6 +292,12 @@ public: libgdiagnostics::message_buffer m_label; }; +struct embedded_link +{ + std::string text; + std::string destination; +}; + using id_map = std::map; class sarif_replayer @@ -724,6 +730,10 @@ private: return sub; } + void + append_embeddded_link (libgdiagnostics::message_buffer &result, + const embedded_link &link); + /* The manager to replay the SARIF files to. */ libgdiagnostics::manager m_output_mgr; @@ -1499,12 +1509,6 @@ maybe_consume_placeholder (const char *&iter_src, unsigned *out_arg_idx) return false; } -struct embedded_link -{ - std::string text; - std::string destination; -}; - /* If ITER_SRC starts with an embedded link as per §3.11.6, advance ITER_SRC to immediately beyond the link, and return the link. @@ -1578,6 +1582,21 @@ maybe_consume_embedded_link (const char *&iter_src) return std::make_unique (std::move (result)); } +void +sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result, + const embedded_link &link) +{ + /* We can't yet decode intra-sarif links, so simply use their text. */ + if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/"))) + { + result += link.text.c_str (); + return; + } + result.begin_url (link.destination.c_str ()); + result += link.text.c_str (); + result.end_url (); +} + /* Lookup the plain text string within a result.message (§3.27.11), and substitute for any placeholders (§3.11.5) and handle any embedded links (§3.11.6). @@ -1662,13 +1681,7 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, } } else if (auto link = maybe_consume_embedded_link (iter_src)) - { - result.begin_url (link->destination.c_str ()); - result += link->text.c_str (); - result.end_url (); - /* TODO: potentially could try to convert - intra-sarif links into event ids. */ - } + append_embeddded_link (result, *link); else { result += ch; diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif new file mode 100644 index 00000000000..cbf5f4af64b --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif @@ -0,0 +1,139 @@ +/* { dg-additional-options "-fdiagnostics-add-output=experimental-html:file=3.11.6-embedded-links-pr123056.sarif.html,javascript=no" } */ +/* { dg-additional-options "-fdiagnostics-add-output=sarif:file=3.11.6-embedded-links-pr123056.sarif.roundtrip.sarif" } */ + +{"$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json", + "version": "2.1.0", + "runs": [{"tool": {"driver": {"name": "GNU C23", + "fullName": "GNU C23 (GCC) version 16.0.1 20260114 (experimental) (x86_64-pc-linux-gnu)", + "version": "16.0.1 20260114 (experimental)", + "informationUri": "https://gcc.gnu.org/gcc-16/", + "rules": [{"id": "-Wanalyzer-malloc-leak", + "helpUri": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}}, + "taxonomies": [{"name": "CWE", + "version": "4.7", + "organization": "MITRE", + "shortDescription": {"text": "The MITRE Common Weakness Enumeration"}, + "taxa": [{"id": "401", + "helpUri": "https://cwe.mitre.org/data/definitions/401.html"}]}], + "invocations": [{"arguments": ["./cc1", + "-quiet", + "-iprefix", + "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/", + "-isystem", + "./include", + "-isystem", + "./include-fixed", + "pr123056.c", + "-quiet", + "-dumpbase", + "pr123056.c", + "-dumpbase-ext", + ".c", + "-mtune=generic", + "-march=x86-64", + "-fanalyzer", + "-fdiagnostics-add-output=sarif", + "-fdiagnostics-add-output=experimental-html", + "-o", + "pr123056.s"], + "workingDirectory": {"uri": "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"}, + "startTimeUtc": "2026-01-16T17:43:19Z", + "executionSuccessful": true, + "toolExecutionNotifications": [], + "endTimeUtc": "2026-01-16T17:43:19Z"}], + "originalUriBaseIds": {"PWD": {"uri": "file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}}, + "artifacts": [{"location": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "sourceLanguage": "c", + "contents": {"text": "void test (void)\n{\n void *p = __builtin_malloc (1024);\n}\n"}, + "roles": ["analysisTarget", + "tracedFile"]}], + "results": [{"ruleId": "-Wanalyzer-malloc-leak", + "taxa": [{"id": "401", + "toolComponent": {"name": "cwe"}}], + "properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 0, + "gcc/analyzer/saved_diagnostic/duplicates": [{"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc", + "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5}, + "gcc/analyzer/saved_diagnostic/var": "p_3", + "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)", + "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})", + "gcc/analyzer/saved_diagnostic/idx": 1, + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}], + "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}, + "level": "warning", + "message": {"text": "leak of ‘p’"}, + "locations": [{"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}]}], + "codeFlows": [{"threadFlows": [{"id": "main", + "locations": [{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)", + "gcc/analyzer/checker_event/kind": "state_change"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 3, + "startColumn": 13, + "endColumn": 36}, + "contextRegion": {"startLine": 3, + "snippet": {"text": " void *p = __builtin_malloc (1024);\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "allocated here"}}, + "kinds": ["acquire", + "memory"], + "nestingLevel": 1, + "executionOrder": 1}, + {"properties": {"gcc/analyzer/checker_event/emission_id": "(2)", + "gcc/analyzer/checker_event/kind": "warning"}, + "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c", + "uriBaseId": "PWD"}, + "region": {"startLine": 4, + "startColumn": 1, + "endColumn": 2}, + "contextRegion": {"startLine": 4, + "snippet": {"text": "}\n"}}}, + "logicalLocations": [{"index": 0, + "fullyQualifiedName": "test"}], + "message": {"text": "‘p’ leaks here; was allocated at [(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)"}}, + "kinds": ["danger"], + "nestingLevel": 1, + "executionOrder": 2}]}]}]}], + "logicalLocations": [{"name": "test", + "fullyQualifiedName": "test", + "decoratedName": "test", + "kind": "function", + "index": 0}]}]} + +/* { dg-begin-multiline-output "" } +In function 'test': +pr123056.c:4:1: warning: leak of ‘p’ [-Wanalyzer-malloc-leak] + 4 | } + | ^ + 'test': events 1-2 + 3 | void *p = __builtin_malloc (1024); + | ^~~~~~~~~~~~~~~~~~~~~~~ + | | + | (1) allocated here + 4 | } + | ~ + | | + | (2) ‘p’ leaks here; was allocated at (1) + { dg-end-multiline-output "" } */ + +/* Use a Python script to verify various properties about the generated + .html file: + { dg-final { run-html-pytest 3.11.6-embedded-links-pr123056.sarif "2.1.0-valid/embedded-links-pr123056-check-html.py" } } */ + +/* Use a Python script to verify various properties about the *generated* + .sarif file: + { dg-final { run-sarif-pytest 3.11.6-embedded-links-pr123056.sarif.roundtrip "2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py" } } */ diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py new file mode 100644 index 00000000000..1e8069f2177 --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py @@ -0,0 +1,25 @@ +from htmltest import * + +import pytest + +@pytest.fixture(scope='function', autouse=True) +def html_tree(): + return html_tree_from_env() + +def test_generated_html(html_tree): + root = html_tree.getroot () + assert root.tag == make_tag('html') + + head = root.find('xhtml:head', ns) + assert head is not None + + diag = get_diag_by_index(html_tree, 0) + + exec_path = diag.find("./xhtml:div[@id='execution-path']", ns) + assert exec_path is not None + + label = exec_path.find('xhtml:label', ns) + assert label.text == 'Execution path with 2 events' + + final_event = exec_path.find(".//xhtml:span[@id='gcc-diag-0-event-1']", ns) + assert final_event.text == '(2) ‘p’ leaks here; was allocated at (1)' diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py new file mode 100644 index 00000000000..5e45e2a0c7f --- /dev/null +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py @@ -0,0 +1,14 @@ +from sarif import * + +import pytest + +@pytest.fixture(scope='function', autouse=True) +def sarif(): + return sarif_from_env() + +def test_roundtrip_of_url_in_generated_sarif(sarif): + result = get_result_by_index(sarif, 0) + assert result['level'] == 'warning' + assert result['message']['text'] == "leak of ‘p’" + assert (result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text'] + == "‘p’ leaks here; was allocated at (1)")