]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
sarif-replay: skip "sarif:/" embedded links [PR123056]
authorDavid Malcolm <dmalcolm@redhat.com>
Tue, 20 Jan 2026 17:06:12 +0000 (12:06 -0500)
committerDavid Malcolm <dmalcolm@redhat.com>
Tue, 20 Jan 2026 17:06:12 +0000 (12:06 -0500)
PR sarif-replay/123056 notes that when using sarif-replay to generate
HTML from a .sarif file containing an embedded "sarif:/" link we get
bogus output containing SGR codes.

The links in question come from GCC's sarif output for cross-referencing
event IDs within an execution path.

These links are JSON pointers.  I experimented with propertly supporting
the JSON Pointer spec (RFC 6901) within GCC, and I have a partially
working implementation which parses JSON pointers here, and, where
appropriate, reconstructs the pertinent event ID.

However, that feels too invasive to be pushing in stage 4.   Hence for
GCC 16, this patch simply skips the link part of "sarif:/" links in
sarif-replay, avoiding corrupt output, deferring the more ambitious
round-tripping fix to GCC 17.

gcc/ChangeLog:
PR sarif-replay/123056
* libsarifreplay.cc (struct embedded_link): Move decl earlier.
(sarif_replayer::append_embeddded_link): New.
(sarif_replayer::make_plain_text_within_result_message): Move the
link-replay logic to the above, and skip the link part of
intra-sarif links.

gcc/testsuite/ChangeLog:
PR sarif-replay/123056
* sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif: New test.
* sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py:
New test script.
* sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py:
New test script.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
gcc/libsarifreplay.cc
gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif [new file with mode: 0644]
gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py [new file with mode: 0644]
gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py [new file with mode: 0644]

index 01d3a975f62f05db5ffa4aed1a269286321b539f..20a0aec3c7fcb6127b8a6c194c1fb19b6823f435 100644 (file)
@@ -292,6 +292,12 @@ public:
   libgdiagnostics::message_buffer m_label;
 };
 
+struct embedded_link
+{
+  std::string text;
+  std::string destination;
+};
+
 using id_map = std::map<std::string, const json::string *>;
 
 class sarif_replayer
@@ -724,6 +730,10 @@ private:
     return sub;
   }
 
+  void
+  append_embeddded_link (libgdiagnostics::message_buffer &result,
+                        const embedded_link &link);
+
   /* The manager to replay the SARIF files to.  */
   libgdiagnostics::manager m_output_mgr;
 
@@ -1499,12 +1509,6 @@ maybe_consume_placeholder (const char *&iter_src, unsigned *out_arg_idx)
   return false;
 }
 
-struct embedded_link
-{
-  std::string text;
-  std::string destination;
-};
-
 /*  If ITER_SRC starts with an embedded link as per §3.11.6, advance ITER_SRC
     to immediately beyond the link, and return the link.
 
@@ -1578,6 +1582,21 @@ maybe_consume_embedded_link (const char *&iter_src)
   return std::make_unique<embedded_link> (std::move (result));
 }
 
+void
+sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result,
+                                      const embedded_link &link)
+{
+  /* We can't yet decode intra-sarif links, so simply use their text.  */
+  if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/")))
+    {
+      result += link.text.c_str ();
+      return;
+    }
+  result.begin_url (link.destination.c_str ());
+  result += link.text.c_str ();
+  result.end_url ();
+}
+
 /* Lookup the plain text string within a result.message (§3.27.11),
    and substitute for any placeholders (§3.11.5) and handle any
    embedded links (§3.11.6).
@@ -1662,13 +1681,7 @@ make_plain_text_within_result_message (const json::object *tool_component_obj,
            }
        }
       else if (auto link = maybe_consume_embedded_link (iter_src))
-       {
-         result.begin_url (link->destination.c_str ());
-         result += link->text.c_str ();
-         result.end_url ();
-         /* TODO: potentially could try to convert
-            intra-sarif links into event ids.  */
-       }
+       append_embeddded_link (result, *link);
       else
        {
          result += ch;
diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/3.11.6-embedded-links-pr123056.sarif
new file mode 100644 (file)
index 0000000..cbf5f4a
--- /dev/null
@@ -0,0 +1,139 @@
+/* { dg-additional-options "-fdiagnostics-add-output=experimental-html:file=3.11.6-embedded-links-pr123056.sarif.html,javascript=no" } */
+/* { dg-additional-options "-fdiagnostics-add-output=sarif:file=3.11.6-embedded-links-pr123056.sarif.roundtrip.sarif" } */
+
+{"$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json",
+ "version": "2.1.0",
+ "runs": [{"tool": {"driver": {"name": "GNU C23",
+                               "fullName": "GNU C23 (GCC) version 16.0.1 20260114 (experimental) (x86_64-pc-linux-gnu)",
+                               "version": "16.0.1 20260114 (experimental)",
+                               "informationUri": "https://gcc.gnu.org/gcc-16/",
+                               "rules": [{"id": "-Wanalyzer-malloc-leak",
+                                          "helpUri": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}},
+           "taxonomies": [{"name": "CWE",
+                           "version": "4.7",
+                           "organization": "MITRE",
+                           "shortDescription": {"text": "The MITRE Common Weakness Enumeration"},
+                           "taxa": [{"id": "401",
+                                     "helpUri": "https://cwe.mitre.org/data/definitions/401.html"}]}],
+           "invocations": [{"arguments": ["./cc1",
+                                          "-quiet",
+                                          "-iprefix",
+                                          "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/",
+                                          "-isystem",
+                                          "./include",
+                                          "-isystem",
+                                          "./include-fixed",
+                                          "pr123056.c",
+                                          "-quiet",
+                                          "-dumpbase",
+                                          "pr123056.c",
+                                          "-dumpbase-ext",
+                                          ".c",
+                                          "-mtune=generic",
+                                          "-march=x86-64",
+                                          "-fanalyzer",
+                                          "-fdiagnostics-add-output=sarif",
+                                          "-fdiagnostics-add-output=experimental-html",
+                                          "-o",
+                                          "pr123056.s"],
+                            "workingDirectory": {"uri": "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"},
+                            "startTimeUtc": "2026-01-16T17:43:19Z",
+                            "executionSuccessful": true,
+                            "toolExecutionNotifications": [],
+                            "endTimeUtc": "2026-01-16T17:43:19Z"}],
+           "originalUriBaseIds": {"PWD": {"uri": "file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}},
+           "artifacts": [{"location": {"uri": "pr123056.c",
+                                       "uriBaseId": "PWD"},
+                          "sourceLanguage": "c",
+                          "contents": {"text": "void test (void)\n{\n  void *p = __builtin_malloc (1024);\n}\n"},
+                          "roles": ["analysisTarget",
+                                    "tracedFile"]}],
+           "results": [{"ruleId": "-Wanalyzer-malloc-leak",
+                        "taxa": [{"id": "401",
+                                  "toolComponent": {"name": "cwe"}}],
+                        "properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc",
+                                       "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
+                                       "gcc/analyzer/saved_diagnostic/var": "p_3",
+                                       "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)",
+                                       "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})",
+                                       "gcc/analyzer/saved_diagnostic/idx": 0,
+                                       "gcc/analyzer/saved_diagnostic/duplicates": [{"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc",
+                                                                                                    "gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
+                                                                                                    "gcc/analyzer/saved_diagnostic/var": "p_3",
+                                                                                                    "gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)",
+                                                                                                    "gcc/analyzer/saved_diagnostic/state": "unchecked ({free})",
+                                                                                                    "gcc/analyzer/saved_diagnostic/idx": 1,
+                                                                                                    "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}],
+                                       "gcc/analyzer/pending_diagnostic/kind": "malloc_leak"},
+                        "level": "warning",
+                        "message": {"text": "leak of ‘p’"},
+                        "locations": [{"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                                 "uriBaseId": "PWD"},
+                                                            "region": {"startLine": 4,
+                                                                       "startColumn": 1,
+                                                                       "endColumn": 2},
+                                                            "contextRegion": {"startLine": 4,
+                                                                              "snippet": {"text": "}\n"}}},
+                                       "logicalLocations": [{"index": 0,
+                                                             "fullyQualifiedName": "test"}]}],
+                        "codeFlows": [{"threadFlows": [{"id": "main",
+                                                        "locations": [{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)",
+                                                                                      "gcc/analyzer/checker_event/kind": "state_change"},
+                                                                       "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                                                                              "uriBaseId": "PWD"},
+                                                                                                         "region": {"startLine": 3,
+                                                                                                                    "startColumn": 13,
+                                                                                                                    "endColumn": 36},
+                                                                                                         "contextRegion": {"startLine": 3,
+                                                                                                                           "snippet": {"text": "  void *p = __builtin_malloc (1024);\n"}}},
+                                                                                    "logicalLocations": [{"index": 0,
+                                                                                                          "fullyQualifiedName": "test"}],
+                                                                                    "message": {"text": "allocated here"}},
+                                                                       "kinds": ["acquire",
+                                                                                 "memory"],
+                                                                       "nestingLevel": 1,
+                                                                       "executionOrder": 1},
+                                                                      {"properties": {"gcc/analyzer/checker_event/emission_id": "(2)",
+                                                                                      "gcc/analyzer/checker_event/kind": "warning"},
+                                                                       "location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
+                                                                                                                              "uriBaseId": "PWD"},
+                                                                                                         "region": {"startLine": 4,
+                                                                                                                    "startColumn": 1,
+                                                                                                                    "endColumn": 2},
+                                                                                                         "contextRegion": {"startLine": 4,
+                                                                                                                           "snippet": {"text": "}\n"}}},
+                                                                                    "logicalLocations": [{"index": 0,
+                                                                                                          "fullyQualifiedName": "test"}],
+                                                                                    "message": {"text": "‘p’ leaks here; was allocated at [(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)"}},
+                                                                       "kinds": ["danger"],
+                                                                       "nestingLevel": 1,
+                                                                       "executionOrder": 2}]}]}]}],
+           "logicalLocations": [{"name": "test",
+                                 "fullyQualifiedName": "test",
+                                 "decoratedName": "test",
+                                 "kind": "function",
+                                 "index": 0}]}]}
+
+/* { dg-begin-multiline-output "" }
+In function 'test':
+pr123056.c:4:1: warning: leak of ‘p’ [-Wanalyzer-malloc-leak]
+    4 | }
+      | ^
+  'test': events 1-2
+    3 |   void *p = __builtin_malloc (1024);
+      |             ^~~~~~~~~~~~~~~~~~~~~~~
+      |             |
+      |             (1) allocated here
+    4 | }
+      | ~            
+      | |
+      | (2) ‘p’ leaks here; was allocated at (1)
+   { dg-end-multiline-output "" } */
+
+/* Use a Python script to verify various properties about the generated
+   .html file:
+   { dg-final { run-html-pytest 3.11.6-embedded-links-pr123056.sarif "2.1.0-valid/embedded-links-pr123056-check-html.py" } } */
+
+/* Use a Python script to verify various properties about the *generated*
+   .sarif file:
+   { dg-final { run-sarif-pytest 3.11.6-embedded-links-pr123056.sarif.roundtrip "2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py" } } */
diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-html.py
new file mode 100644 (file)
index 0000000..1e8069f
--- /dev/null
@@ -0,0 +1,25 @@
+from htmltest import *
+
+import pytest
+
+@pytest.fixture(scope='function', autouse=True)
+def html_tree():
+    return html_tree_from_env()
+
+def test_generated_html(html_tree):
+    root = html_tree.getroot ()
+    assert root.tag == make_tag('html')
+
+    head = root.find('xhtml:head', ns)
+    assert head is not None
+
+    diag = get_diag_by_index(html_tree, 0)
+
+    exec_path = diag.find("./xhtml:div[@id='execution-path']", ns)
+    assert exec_path is not None
+
+    label = exec_path.find('xhtml:label', ns)
+    assert label.text == 'Execution path with 2 events'
+
+    final_event = exec_path.find(".//xhtml:span[@id='gcc-diag-0-event-1']", ns)
+    assert final_event.text == '(2) ‘p’ leaks here; was allocated at (1)'
diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py b/gcc/testsuite/sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
new file mode 100644 (file)
index 0000000..5e45e2a
--- /dev/null
@@ -0,0 +1,14 @@
+from sarif import *
+
+import pytest
+
+@pytest.fixture(scope='function', autouse=True)
+def sarif():
+    return sarif_from_env()
+
+def test_roundtrip_of_url_in_generated_sarif(sarif):
+    result = get_result_by_index(sarif, 0)
+    assert result['level'] == 'warning'
+    assert result['message']['text'] == "leak of ‘p’"
+    assert (result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text']
+            == "‘p’ leaks here; was allocated at (1)")