]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
scripts/sbom: add JSON-LD serialization
authorLuis Augenstein <luis.augenstein@tngtech.com>
Mon, 18 May 2026 06:20:55 +0000 (08:20 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 May 2026 11:14:41 +0000 (13:14 +0200)
Add infrastructure to serialize an SPDX graph as a JSON-LD
document. NamespaceMaps in the SPDX document are converted
to custom prefixes in the @context field of the JSON-LD output.

The SBOM tool uses NamespaceMaps solely to shorten SPDX IDs,
avoiding repetition of full namespace URIs by using short prefixes.

Assisted-by: Cursor:claude-sonnet-4-5
Assisted-by: OpenCode:GLM-4-7
Co-developed-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Luis Augenstein <luis.augenstein@tngtech.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Makefile
scripts/sbom/sbom.py
scripts/sbom/sbom/config.py
scripts/sbom/sbom/spdx_graph/__init__.py [new file with mode: 0644]
scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py [new file with mode: 0644]
scripts/sbom/sbom/spdx_graph/spdx_graph_model.py [new file with mode: 0644]

index 4c6133af55496c95df814d11c3243267098696d0..2443d4c824548837272f8af0faf36b7079eb7711 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2212,7 +2212,8 @@ quiet_cmd_sbom = GEN     $(sbom_targets)
                      --src-tree $(abspath $(srctree)) \
                      --obj-tree $(abspath $(objtree)) \
                      --roots-file "$(tmp-target)" \
-                     --output-directory $(abspath $(objtree));
+                     --output-directory $(abspath $(objtree)) \
+                     --generate-spdx;
 PHONY += sbom
 sbom: $(notdir $(KBUILD_IMAGE)) include/generated/autoconf.h $(if $(CONFIG_MODULES),modules modules.order)
        $(call cmd,sbom)
index d700e4f294f767dd8c41dc01781e50cb9a6228f6..764175b9c8933e768c127256cd80dc00572a0d57 100644 (file)
@@ -6,13 +6,18 @@
 Compute software bill of materials in SPDX format describing a kernel build.
 """
 
+import json
 import logging
 import os
 import sys
 import time
+import uuid
 import sbom.sbom_logging as sbom_logging
 from sbom.config import get_config
 from sbom.path_utils import is_relative_to
+from sbom.spdx import JsonLdSpdxDocument, SpdxIdGenerator
+from sbom.spdx.core import CreationInfo, SpdxDocument
+from sbom.spdx_graph import SpdxIdGeneratorCollection, build_spdx_graphs
 from sbom.cmd_graph import CmdGraph
 
 
@@ -71,6 +76,57 @@ def main():
                 f.write("\n".join(str(file_path) for file_path in used_files))
             logging.debug(f"Successfully saved {used_files_path}")
 
+    if config.generate_spdx is False:
+        _exit_with_summary(config.write_output_on_error)
+        return
+
+    # Build SPDX Documents
+    logging.debug("Start generating SPDX graph based on cmd graph")
+    start_time = time.time()
+
+    # The real uuid will be generated based on the content of the SPDX graphs
+    # to ensure that the same SPDX document is always assigned the same uuid.
+    PLACEHOLDER_UUID = "00000000-0000-0000-0000-000000000000"
+    spdx_id_base_namespace = f"{config.spdxId_prefix}{PLACEHOLDER_UUID}/"
+    spdx_id_generators = SpdxIdGeneratorCollection(
+        base=SpdxIdGenerator(prefix="p", namespace=spdx_id_base_namespace),
+        source=SpdxIdGenerator(prefix="s", namespace=f"{spdx_id_base_namespace}source/"),
+        build=SpdxIdGenerator(prefix="b", namespace=f"{spdx_id_base_namespace}build/"),
+        output=SpdxIdGenerator(prefix="o", namespace=f"{spdx_id_base_namespace}output/"),
+    )
+
+    spdx_graphs = build_spdx_graphs(
+        cmd_graph,
+        spdx_id_generators,
+        config,
+    )
+    spdx_id_uuid = uuid.uuid5(
+        uuid.NAMESPACE_URL,
+        "".join(
+            json.dumps(element.to_dict()) for spdx_graph in spdx_graphs.values() for element in spdx_graph.to_list()
+        ),
+    )
+    logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds")
+
+    if not sbom_logging.has_errors() or config.write_output_on_error:
+        for kernel_sbom_kind, spdx_graph in spdx_graphs.items():
+            spdx_graph_objects = spdx_graph.to_list()
+            # Add warning and error summary to creation info comment
+            creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo))
+            creation_info.comment = "\n".join([
+                sbom_logging.summarize_warnings(),
+                sbom_logging.summarize_errors(),
+            ]).strip()
+            # Replace Placeholder uuid with real uuid for spdxIds
+            spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument))
+            for namespaceMap in spdx_document.namespaceMap:
+                namespaceMap.namespace = namespaceMap.namespace.replace(PLACEHOLDER_UUID, str(spdx_id_uuid))
+            # Serialize SPDX graph to JSON-LD
+            spdx_doc = JsonLdSpdxDocument(graph=spdx_graph_objects)
+            save_path = os.path.join(config.output_directory, config.spdx_file_names[kernel_sbom_kind])
+            spdx_doc.save(save_path, config.prettify_json)
+            logging.debug(f"Successfully saved {save_path}")
+
     _exit_with_summary(config.write_output_on_error)
 
 
index b8c1a2b404dfc0f9b675c2032de9bc758ad3bd02..98c7d939364d77038bd4d7124abfaf9c5908c3d1 100644 (file)
@@ -3,11 +3,18 @@
 
 import argparse
 from dataclasses import dataclass
+from enum import Enum
 import os
 from typing import Any
 from sbom.path_utils import PathStr
 
 
+class KernelSpdxDocumentKind(Enum):
+    SOURCE = "source"
+    BUILD = "build"
+    OUTPUT = "output"
+
+
 @dataclass
 class KernelSbomConfig:
     src_tree: PathStr
@@ -19,6 +26,13 @@ class KernelSbomConfig:
     root_paths: list[PathStr]
     """List of paths to root outputs (relative to obj_tree) to base the SBOM on."""
 
+    generate_spdx: bool
+    """Whether to generate SPDX SBOM documents. If False, no SPDX files are created."""
+
+    spdx_file_names: dict[KernelSpdxDocumentKind, str]
+    """If `generate_spdx` is True, defines the file names for each SPDX SBOM kind
+    (source, build, output) to store on disk."""
+
     generate_used_files: bool
     """Whether to generate a flat list of all source files used in the build.
     If False, no used-files document is created."""
@@ -38,6 +52,12 @@ class KernelSbomConfig:
     write_output_on_error: bool
     """Whether to write output documents even if errors occur."""
 
+    spdxId_prefix: str
+    """Prefix to use for all SPDX element IDs."""
+
+    prettify_json: bool
+    """Whether to pretty-print generated SPDX JSON documents."""
+
 
 def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]:
     """
@@ -67,6 +87,15 @@ def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]:
         "--roots-file",
         help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.",
     )
+    parser.add_argument(
+        "--generate-spdx",
+        action="store_true",
+        default=False,
+        help=(
+            "Whether to create sbom-source.spdx.json, sbom-build.spdx.json and "
+            "sbom-output.spdx.json documents (default: False)"
+        ),
+    )
     parser.add_argument(
         "--generate-used-files",
         action="store_true",
@@ -114,6 +143,20 @@ def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]:
         ),
     )
 
+    # SPDX specific options
+    spdx_group = parser.add_argument_group("SPDX options", "Options for customizing SPDX document generation")
+    spdx_group.add_argument(
+        "--spdxId-prefix",
+        default="urn:spdx.dev:",
+        help="The prefix to use for all spdxId properties. (default: urn:spdx.dev:)",
+    )
+    spdx_group.add_argument(
+        "--prettify-json",
+        action="store_true",
+        default=False,
+        help="Whether to pretty print the generated spdx.json documents (default: False)",
+    )
+
     args = vars(parser.parse_args())
     return args
 
@@ -144,6 +187,7 @@ def get_config() -> KernelSbomConfig:
         root_paths = args["roots"]
     _validate_path_arguments(parser, src_tree, obj_tree, root_paths)
 
+    generate_spdx = args["generate_spdx"]
     generate_used_files = args["generate_used_files"]
     output_directory = os.path.realpath(args["output_directory"])
     debug = args["debug"]
@@ -151,19 +195,31 @@ def get_config() -> KernelSbomConfig:
     fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"]
     write_output_on_error = args["write_output_on_error"]
 
+    spdxId_prefix = args["spdxId_prefix"]
+    prettify_json = args["prettify_json"]
+
     # Hardcoded config
+    spdx_file_names = {
+        KernelSpdxDocumentKind.SOURCE: "sbom-source.spdx.json",
+        KernelSpdxDocumentKind.BUILD: "sbom-build.spdx.json",
+        KernelSpdxDocumentKind.OUTPUT: "sbom-output.spdx.json",
+    }
     used_files_file_name = "sbom.used-files.txt"
 
     return KernelSbomConfig(
         src_tree=src_tree,
         obj_tree=obj_tree,
         root_paths=root_paths,
+        generate_spdx=generate_spdx,
+        spdx_file_names=spdx_file_names,
         generate_used_files=generate_used_files,
         used_files_file_name=used_files_file_name,
         output_directory=output_directory,
         debug=debug,
         fail_on_unknown_build_command=fail_on_unknown_build_command,
         write_output_on_error=write_output_on_error,
+        spdxId_prefix=spdxId_prefix,
+        prettify_json=prettify_json,
     )
 
 
diff --git a/scripts/sbom/sbom/spdx_graph/__init__.py b/scripts/sbom/sbom/spdx_graph/__init__.py
new file mode 100644 (file)
index 0000000..3557b1d
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .build_spdx_graphs import build_spdx_graphs
+from .spdx_graph_model import SpdxIdGeneratorCollection
+
+__all__ = ["build_spdx_graphs", "SpdxIdGeneratorCollection"]
diff --git a/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py
new file mode 100644 (file)
index 0000000..bb3db4e
--- /dev/null
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+
+from typing import Protocol
+
+from sbom.config import KernelSpdxDocumentKind
+from sbom.cmd_graph import CmdGraph
+from sbom.path_utils import PathStr
+from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection
+
+
+class SpdxGraphConfig(Protocol):
+    obj_tree: PathStr
+    src_tree: PathStr
+
+
+def build_spdx_graphs(
+    cmd_graph: CmdGraph,
+    spdx_id_generators: SpdxIdGeneratorCollection,
+    config: SpdxGraphConfig,
+) -> dict[KernelSpdxDocumentKind, SpdxGraph]:
+    """
+    Builds SPDX graphs (output, source, and build) based on a cmd dependency graph.
+    If the source and object trees are identical, no dedicated source graph can be created.
+    In that case the source files are added to the build graph instead.
+
+    Args:
+        cmd_graph: The dependency graph of a kernel build.
+        spdx_id_generators: Collection of SPDX ID generators.
+        config: Configuration options.
+
+    Returns:
+        Dictionary of SPDX graphs
+    """
+    return {}
diff --git a/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py
new file mode 100644 (file)
index 0000000..682194d
--- /dev/null
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from sbom.spdx.core import CreationInfo, SoftwareAgent, SpdxDocument, SpdxObject
+from sbom.spdx.software import Sbom
+from sbom.spdx.spdxId import SpdxIdGenerator
+
+
+@dataclass
+class SpdxGraph:
+    """Represents the complete graph of a single SPDX document."""
+
+    spdx_document: SpdxDocument
+    agent: SoftwareAgent
+    creation_info: CreationInfo
+    sbom: Sbom
+
+    def to_list(self) -> list[SpdxObject]:
+        return [
+            self.spdx_document,
+            self.agent,
+            self.creation_info,
+            self.sbom,
+            *self.sbom.element,
+        ]
+
+
+@dataclass
+class SpdxIdGeneratorCollection:
+    """Holds SPDX ID generators for different document types to ensure globally unique SPDX IDs."""
+
+    base: SpdxIdGenerator
+    source: SpdxIdGenerator
+    build: SpdxIdGenerator
+    output: SpdxIdGenerator