]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
sarif output: introduce sarif_serialization_format
authorDavid Malcolm <dmalcolm@redhat.com>
Wed, 30 Apr 2025 20:50:16 +0000 (16:50 -0400)
committerDavid Malcolm <dmalcolm@redhat.com>
Wed, 30 Apr 2025 20:50:16 +0000 (16:50 -0400)
The SARIF 2.1.0 spec says that although a "SARIF log file SHALL contain
a serialization of the SARIF object model into the JSON format ... in the
future, other serializations might be defined." (ยง3.1)

I've been experimenting with alternative serializations of SARIF (CBOR
and JSON5 for now).  To help with these experiments, this patch adds a
new param "serialization" to -fdiagnostics-add-output='s "sarif" scheme.

For now this must have value "json", but will be helpful for any
followup patches.

gcc/ChangeLog:
* diagnostic-format-sarif.cc
(sarif_serialization_format_json::write_to_file): New.
(sarif_builder::m_formatted): Replace field with...
(sarif_builder::m_serialization_format): ...this.
(sarif_builder::sarif_builder): Update for field change.
(sarif_builder::flush_to_file): Call m_serialization_format's
write_to_file vfunc.
(sarif_output_format::sarif_output_format): Replace param
"formatted" with "serialization_format".
(sarif_stream_output_format::sarif_output_format): Likewise.
(sarif_file_output_format::sarif_file_output_format): Likewise.
(diagnostic_output_format_init_sarif_stderr): Make a
sarif_serialization_format_json and pass it to
diagnostic_output_format_init_sarif.
(diagnostic_output_format_open_sarif_file): Split out into...
(diagnostic_output_file::try_to_open): ...this, adding
"serialization_kind" param.
(diagnostic_output_format_init_sarif_file): Update for new param
to diagnostic_output_format_open_sarif_file.  Make a
sarif_serialization_format_json and pass it to
diagnostic_output_format_init_sarif.
(diagnostic_output_format_init_sarif_stream): Make a
sarif_serialization_format_json and pass it to
diagnostic_output_format_init_sarif.
(make_sarif_sink): Replace param "formatted" with "serialization".
(selftest::test_make_location_object): Update for changes to
sarif_builder ctor.
* diagnostic-format-sarif.h (enum class sarif_serialization): New.
(diagnostic_output_format_open_sarif_file): Add param
"serialization_kind".
(class sarif_serialization_format): New.
(class sarif_serialization_format_json): New.
(make_sarif_sink): Replace param "formatted" with
"serialization_format".
* diagnostic-output-file.h (diagnostic_output_file::try_to_open):
New decl.
* diagnostic.h (enum diagnostics_output_format): Tweak comments.
* doc/invoke.texi (-fdiagnostics-add-output): Add "serialization"
param to sarif scheme.
* libgdiagnostics.cc (sarif_sink::sarif_sink): Update for change
to make_sarif_sink.
* opts-diagnostic.cc (sarif_scheme_handler::make_sink): Add
"serialization" param and pass it on to make_sarif_sink.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
gcc/diagnostic-format-sarif.cc
gcc/diagnostic-format-sarif.h
gcc/diagnostic-output-file.h
gcc/diagnostic.h
gcc/doc/invoke.texi
gcc/libgdiagnostics.cc
gcc/opts-diagnostic.cc

index f322991ab2eaebb0cc38282f55ac7109d5b97baa..bc6abdff5e4e04fed2c21864564dbfd02695e546 100644 (file)
@@ -634,6 +634,18 @@ private:
   std::vector<std::unique_ptr<sarif_result>> m_results;
 };
 
+/* Classes for abstracting away JSON vs other serialization formats.  */
+
+// class sarif_serialization_format_json : public sarif_serialization_format
+
+void
+sarif_serialization_format_json::write_to_file (FILE *outf,
+                                               const json::value &top)
+{
+  top.dump (outf, m_formatted);
+  fprintf (outf, "\n");
+}
+
 /* A class for managing SARIF output (for -fdiagnostics-format=sarif-stderr
    and -fdiagnostics-format=sarif-file).
 
@@ -687,7 +699,7 @@ public:
                 pretty_printer &printer,
                 const line_maps *line_maps,
                 const char *main_input_filename_,
-                bool formatted,
+                std::unique_ptr<sarif_serialization_format> serialization_format,
                 const sarif_generation_options &sarif_gen_opts);
   ~sarif_builder ();
 
@@ -891,7 +903,7 @@ private:
 
   int m_tabstop;
 
-  bool m_formatted;
+  std::unique_ptr<sarif_serialization_format> m_serialization_format;
   const sarif_generation_options m_sarif_gen_opts;
 
   unsigned m_next_result_idx;
@@ -1561,7 +1573,7 @@ sarif_builder::sarif_builder (diagnostic_context &context,
                              pretty_printer &printer,
                              const line_maps *line_maps,
                              const char *main_input_filename_,
-                             bool formatted,
+                             std::unique_ptr<sarif_serialization_format> serialization_format,
                              const sarif_generation_options &sarif_gen_opts)
 : m_context (context),
   m_printer (&printer),
@@ -1576,12 +1588,13 @@ sarif_builder::sarif_builder (diagnostic_context &context,
   m_rule_id_set (),
   m_rules_arr (new json::array ()),
   m_tabstop (context.m_tabstop),
-  m_formatted (formatted),
+  m_serialization_format (std::move (serialization_format)),
   m_sarif_gen_opts (sarif_gen_opts),
   m_next_result_idx (0),
   m_current_code_flow (nullptr)
 {
   gcc_assert (m_line_maps);
+  gcc_assert (m_serialization_format);
 
   /* Mark MAIN_INPUT_FILENAME_ as the artifact that the tool was
      instructed to scan.
@@ -1823,8 +1836,7 @@ void
 sarif_builder::flush_to_file (FILE *outf)
 {
   std::unique_ptr<sarif_log> top = flush_to_object ();
-  top->dump (outf, m_formatted);
-  fprintf (outf, "\n");
+  m_serialization_format->write_to_file (outf, *top);
 }
 
 /* Attempt to convert DIAG_KIND to a suitable value for the "level"
@@ -3534,11 +3546,11 @@ protected:
   sarif_output_format (diagnostic_context &context,
                       const line_maps *line_maps,
                       const char *main_input_filename_,
-                      bool formatted,
+                      std::unique_ptr<sarif_serialization_format> serialization_format,
                       const sarif_generation_options &sarif_gen_opts)
   : diagnostic_output_format (context),
     m_builder (context, *get_printer (), line_maps, main_input_filename_,
-              formatted, sarif_gen_opts),
+              std::move (serialization_format), sarif_gen_opts),
     m_buffer (nullptr)
   {}
 
@@ -3552,12 +3564,11 @@ public:
   sarif_stream_output_format (diagnostic_context &context,
                              const line_maps *line_maps,
                              const char *main_input_filename_,
-                             bool formatted,
+                             std::unique_ptr<sarif_serialization_format> serialization_format,
                              const sarif_generation_options &sarif_gen_opts,
                              FILE *stream)
   : sarif_output_format (context, line_maps, main_input_filename_,
-                        formatted,
-                        sarif_gen_opts),
+                        std::move (serialization_format), sarif_gen_opts),
     m_stream (stream)
   {
   }
@@ -3579,11 +3590,11 @@ public:
   sarif_file_output_format (diagnostic_context &context,
                            const line_maps *line_maps,
                            const char *main_input_filename_,
-                           bool formatted,
+                           std::unique_ptr<sarif_serialization_format> serialization_format,
                            const sarif_generation_options &sarif_gen_opts,
                            diagnostic_output_file output_file)
   : sarif_output_format (context, line_maps, main_input_filename_,
-                        formatted, sarif_gen_opts),
+                        std::move (serialization_format), sarif_gen_opts),
     m_output_file (std::move (output_file))
   {
     gcc_assert (m_output_file.get_open_file ());
@@ -3747,26 +3758,33 @@ diagnostic_output_format_init_sarif_stderr (diagnostic_context &context,
 {
   gcc_assert (line_maps);
   const sarif_generation_options sarif_gen_opts;
+  auto serialization
+    = std::make_unique<sarif_serialization_format_json> (formatted);
   diagnostic_output_format_init_sarif
     (context,
      std::make_unique<sarif_stream_output_format> (context,
                                                   line_maps,
                                                   main_input_filename_,
-                                                  formatted,
+                                                  std::move (serialization),
                                                   sarif_gen_opts,
                                                   stderr));
 }
 
-/* Attempt to open BASE_FILE_NAME.sarif for writing.
+/* Attempt to open "BASE_FILE_NAME""EXTENSION" for writing.
    Return a non-null diagnostic_output_file,
    or return a null diagnostic_output_file and complain to CONTEXT
    using LINE_MAPS.  */
 
 diagnostic_output_file
-diagnostic_output_format_open_sarif_file (diagnostic_context &context,
-                                         line_maps *line_maps,
-                                         const char *base_file_name)
+diagnostic_output_file::try_to_open (diagnostic_context &context,
+                                    line_maps *line_maps,
+                                    const char *base_file_name,
+                                    const char *extension,
+                                    bool is_binary)
 {
+  gcc_assert (extension);
+  gcc_assert (extension[0] == '.');
+
   if (!base_file_name)
     {
       rich_location richloc (line_maps, UNKNOWN_LOCATION);
@@ -3777,21 +3795,51 @@ diagnostic_output_format_open_sarif_file (diagnostic_context &context,
     }
 
   label_text filename = label_text::take (concat (base_file_name,
-                                                 ".sarif",
+                                                 extension,
                                                  nullptr));
-  FILE *outf = fopen (filename.get (), "w");
+  FILE *outf = fopen (filename.get (), is_binary ? "wb" : "w");
   if (!outf)
     {
       rich_location richloc (line_maps, UNKNOWN_LOCATION);
       context.emit_diagnostic_with_group
        (DK_ERROR, richloc, nullptr, 0,
-        "unable to open %qs for SARIF output: %m",
+        "unable to open %qs for diagnostic output: %m",
         filename.get ());
       return diagnostic_output_file ();
     }
   return diagnostic_output_file (outf, true, std::move (filename));
 }
 
+/* Attempt to open BASE_FILE_NAME.sarif for writing JSON.
+   Return a non-null diagnostic_output_file,
+   or return a null diagnostic_output_file and complain to CONTEXT
+   using LINE_MAPS.  */
+
+diagnostic_output_file
+diagnostic_output_format_open_sarif_file (diagnostic_context &context,
+                                         line_maps *line_maps,
+                                         const char *base_file_name,
+                                         enum sarif_serialization_kind serialization_kind)
+{
+  const char *suffix;
+  bool is_binary;
+  switch (serialization_kind)
+    {
+    default:
+      gcc_unreachable ();
+    case sarif_serialization_kind::json:
+      suffix = ".sarif";
+      is_binary = false;
+      break;
+    }
+
+  return diagnostic_output_file::try_to_open (context,
+                                             line_maps,
+                                             base_file_name,
+                                             suffix,
+                                             is_binary);
+}
+
 /* Populate CONTEXT in preparation for SARIF output to a file named
    BASE_FILE_NAME.sarif.  */
 
@@ -3807,7 +3855,10 @@ diagnostic_output_format_init_sarif_file (diagnostic_context &context,
   diagnostic_output_file output_file
     = diagnostic_output_format_open_sarif_file (context,
                                                line_maps,
-                                               base_file_name);
+                                               base_file_name,
+                                               sarif_serialization_kind::json);
+  auto serialization
+    = std::make_unique<sarif_serialization_format_json> (formatted);
 
   const sarif_generation_options sarif_gen_opts;
   diagnostic_output_format_init_sarif
@@ -3815,7 +3866,7 @@ diagnostic_output_format_init_sarif_file (diagnostic_context &context,
      std::make_unique<sarif_file_output_format> (context,
                                                 line_maps,
                                                 main_input_filename_,
-                                                formatted,
+                                                std::move (serialization),
                                                 sarif_gen_opts,
                                                 std::move (output_file)));
 }
@@ -3831,12 +3882,14 @@ diagnostic_output_format_init_sarif_stream (diagnostic_context &context,
 {
   gcc_assert (line_maps);
   const sarif_generation_options sarif_gen_opts;
+  auto serialization
+    = std::make_unique<sarif_serialization_format_json> (formatted);
   diagnostic_output_format_init_sarif
     (context,
      std::make_unique<sarif_stream_output_format> (context,
                                                   line_maps,
                                                   main_input_filename_,
-                                                  formatted,
+                                                  std::move (serialization),
                                                   sarif_gen_opts,
                                                   stream));
 }
@@ -3845,7 +3898,7 @@ std::unique_ptr<diagnostic_output_format>
 make_sarif_sink (diagnostic_context &context,
                 const line_maps &line_maps,
                 const char *main_input_filename_,
-                bool formatted,
+                std::unique_ptr<sarif_serialization_format> serialization,
                 const sarif_generation_options &sarif_gen_opts,
                 diagnostic_output_file output_file)
 {
@@ -3853,7 +3906,7 @@ make_sarif_sink (diagnostic_context &context,
     = std::make_unique<sarif_file_output_format> (context,
                                                  &line_maps,
                                                  main_input_filename_,
-                                                 formatted,
+                                                 std::move (serialization),
                                                  sarif_gen_opts,
                                                  std::move (output_file));
   sink->update_printer ();
@@ -3908,7 +3961,9 @@ private:
                            bool formatted,
                            const sarif_generation_options &sarif_gen_opts)
     : sarif_output_format (context, line_maps, main_input_filename_,
-                          formatted,  sarif_gen_opts)
+                          std::make_unique<sarif_serialization_format_json>
+                            (formatted),
+                          sarif_gen_opts)
     {
     }
     bool machine_readable_stderr_p () const final override
@@ -3940,8 +3995,10 @@ test_make_location_object (const sarif_generation_options &sarif_gen_opts,
 
   test_diagnostic_context dc;
   pretty_printer pp;
-  sarif_builder builder (dc, pp, line_table, "MAIN_INPUT_FILENAME",
-                        true, sarif_gen_opts);
+  sarif_builder builder
+    (dc, pp, line_table, "MAIN_INPUT_FILENAME",
+     std::make_unique<sarif_serialization_format_json> (true),
+     sarif_gen_opts);
 
   /* These "columns" are byte offsets, whereas later on the columns
      in the generated SARIF use sarif_builder::get_sarif_column and
index 524a0c7c6b5c097781ed99fadda9320e1d81b4d4..644625747cc8661c1d68fe35f6aa11131de3e4a2 100644 (file)
@@ -27,10 +27,20 @@ along with GCC; see the file COPYING3.  If not see
 
 class logical_location;
 
+/* Enum for choosing what format to serializing the generated SARIF into.  */
+
+enum class sarif_serialization_kind
+{
+   json,
+
+   num_values
+};
+
 extern diagnostic_output_file
 diagnostic_output_format_open_sarif_file (diagnostic_context &context,
                                          line_maps *line_maps,
-                                         const char *base_file_name);
+                                         const char *base_file_name,
+                                         enum sarif_serialization_kind serialization_kind);
 
 extern void
 diagnostic_output_format_init_sarif_stderr (diagnostic_context &context,
@@ -50,6 +60,34 @@ diagnostic_output_format_init_sarif_stream (diagnostic_context &context,
                                            bool formatted,
                                            FILE *stream);
 
+/* Abstract base class for handling JSON output vs other kinds of
+   serialization of the json tree.  */
+
+class sarif_serialization_format
+{
+public:
+  virtual ~sarif_serialization_format () {}
+  virtual void write_to_file (FILE *outf,
+                             const json::value &top) = 0;
+};
+
+/* Concrete subclass for serializing SARIF as JSON.  */
+
+class sarif_serialization_format_json : public sarif_serialization_format
+{
+public:
+  sarif_serialization_format_json (bool formatted)
+  : m_formatted (formatted)
+  {
+  }
+  void write_to_file (FILE *outf, const json::value &top) final override;
+
+private:
+  bool m_formatted;
+};
+
+/* Control of SARIF generation.  */
+
 enum class sarif_version
 {
   v2_1_0,
@@ -73,7 +111,7 @@ extern std::unique_ptr<diagnostic_output_format>
 make_sarif_sink (diagnostic_context &context,
                 const line_maps &line_maps,
                 const char *main_input_filename_,
-                bool formatted,
+                std::unique_ptr<sarif_serialization_format> serialization_format,
                 const sarif_generation_options &sarif_gen_opts,
                 diagnostic_output_file output_file);
 
index 2e877c998cc2db579258dd10a26d7354ff56be7e..a0b2e1bf4596956273609245421a146b71d27fac 100644 (file)
@@ -91,6 +91,13 @@ public:
   FILE *get_open_file () const { return m_outf; }
   const char *get_filename () const { return m_filename.get (); }
 
+  static diagnostic_output_file
+  try_to_open (diagnostic_context &context,
+              line_maps *line_maps,
+              const char *base_file_name,
+              const char *extension,
+              bool binary);
+
 private:
   FILE *m_outf;
   bool m_owned;
index 36f4a1c8f9f3f188a7a0f66658d832bfe7b57c76..5cde881c0743f6e0ce16ccc1bafbb67432542e8a 100644 (file)
@@ -72,10 +72,10 @@ enum diagnostics_output_format
   /* JSON-based output, to a file.  */
   DIAGNOSTICS_OUTPUT_FORMAT_JSON_FILE,
 
-  /* SARIF-based output, to stderr.  */
+  /* SARIF-based output, as JSON to stderr.  */
   DIAGNOSTICS_OUTPUT_FORMAT_SARIF_STDERR,
 
-  /* SARIF-based output, to a file.  */
+  /* SARIF-based output, to a JSON file.  */
   DIAGNOSTICS_OUTPUT_FORMAT_SARIF_FILE
 };
 
index 5f1c0b89c1cca53dcf5c3f65dab2496b239c7fbe..d1925c98c2f34b0699b35f31d9aebce19195e61d 100644 (file)
@@ -6053,6 +6053,11 @@ Specify the filename to write the SARIF output to, potentially with a
 leading absolute or relative path.  If not specified, it defaults to
 @file{@var{source}.sarif}.
 
+@item serialization=@r{[}json@r{]}
+Specify the serialization format to use when writing out the SARIF.
+Currently this can only be @code{json}, but is present as an
+extension point for experimenting with other serializations.
+
 @item version=@r{[}2.1@r{|}2.2-prerelease@r{]}
 Specify the version of SARIF to use for the output.  If not specified,
 defaults to 2.1.  @code{2.2-prerelease} uses an unofficial draft of the
index 49524cc922d4156de6ef9979dcf634fba1c53da3..c2eb9757d181ca0e6ef63df85dc70e93d6232669 100644 (file)
@@ -1076,10 +1076,11 @@ sarif_sink::sarif_sink (diagnostic_manager &mgr,
 {
   diagnostic_output_file output_file (dst_stream, false,
                                      label_text::borrow ("sarif_sink"));
+  auto serialization = std::make_unique<sarif_serialization_format_json> (true);
   auto inner_sink = make_sarif_sink (mgr.get_dc (),
                                     *mgr.get_line_table (),
                                     main_input_file->get_name (),
-                                    true,
+                                    std::move (serialization),
                                     sarif_gen_opts,
                                     std::move (output_file));
   mgr.get_dc ().add_sink (std::move (inner_sink));
index b51c8a8b42208413808a293ffe60b2590b8bd68c..1eec0103d3b6c94cdca67de6e20aea7feb0fe9d6 100644 (file)
@@ -434,6 +434,8 @@ sarif_scheme_handler::make_sink (const context &ctxt,
                                 const scheme_name_and_params &parsed_arg) const
 {
   label_text filename;
+  enum sarif_serialization_kind serialization_kind
+    = sarif_serialization_kind::json;
   enum sarif_version version = sarif_version::v2_1_0;
   for (auto& iter : parsed_arg.m_kvs)
     {
@@ -444,6 +446,20 @@ sarif_scheme_handler::make_sink (const context &ctxt,
          filename = label_text::take (xstrdup (value.c_str ()));
          continue;
        }
+      if (key == "serialization")
+       {
+         static const std::array<std::pair<const char *, enum sarif_serialization_kind>,
+                                 (size_t)sarif_serialization_kind::num_values> value_names
+           {{{"json", sarif_serialization_kind::json}}};
+
+         if (!parse_enum_value<enum sarif_serialization_kind>
+                (ctxt, unparsed_arg,
+                 key, value,
+                 value_names,
+                 serialization_kind))
+           return nullptr;
+         continue;
+       }
       if (key == "version")
        {
          static const std::array<std::pair<const char *, enum sarif_version>,
@@ -462,6 +478,7 @@ sarif_scheme_handler::make_sink (const context &ctxt,
       /* Key not found.  */
       auto_vec<const char *> known_keys;
       known_keys.safe_push ("file");
+      known_keys.safe_push ("serialization");
       known_keys.safe_push ("version");
       ctxt.report_unknown_key (unparsed_arg, key, get_scheme_name (),
                               known_keys);
@@ -479,7 +496,8 @@ sarif_scheme_handler::make_sink (const context &ctxt,
                              : ctxt.m_opts.x_main_input_basename);
       output_file = diagnostic_output_format_open_sarif_file (ctxt.m_dc,
                                                              line_table,
-                                                             basename);
+                                                             basename,
+                                                             serialization_kind);
     }
   if (!output_file)
     return nullptr;
@@ -487,10 +505,21 @@ sarif_scheme_handler::make_sink (const context &ctxt,
   sarif_generation_options sarif_gen_opts;
   sarif_gen_opts.m_version = version;
 
+  std::unique_ptr<sarif_serialization_format> serialization_obj;
+  switch (serialization_kind)
+    {
+    default:
+      gcc_unreachable ();
+    case sarif_serialization_kind::json:
+      serialization_obj
+       = std::make_unique<sarif_serialization_format_json> (true);
+      break;
+    }
+
   auto sink = make_sarif_sink (ctxt.m_dc,
                               *line_table,
                               ctxt.m_opts.x_main_input_filename,
-                              true,
+                              std::move (serialization_obj),
                               sarif_gen_opts,
                               std::move (output_file));
   return sink;