]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
scripts/sbom: add SPDX classes
authorLuis Augenstein <luis.augenstein@tngtech.com>
Mon, 18 May 2026 06:20:54 +0000 (08:20 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 22 May 2026 11:14:41 +0000 (13:14 +0200)
Implement Python dataclasses to model the SPDX classes
required within an SPDX document. The class and property
names are consistent with the SPDX 3.0.1 specification.

Assisted-by: Cursor:claude-sonnet-4-5
Assisted-by: OpenCode:GLM-4-7
Co-developed-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Luis Augenstein <luis.augenstein@tngtech.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
scripts/sbom/sbom/spdx/__init__.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/build.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/core.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/serialization.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/simplelicensing.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/software.py [new file with mode: 0644]
scripts/sbom/sbom/spdx/spdxId.py [new file with mode: 0644]

diff --git a/scripts/sbom/sbom/spdx/__init__.py b/scripts/sbom/sbom/spdx/__init__.py
new file mode 100644 (file)
index 0000000..4097b59
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .spdxId import SpdxId, SpdxIdGenerator
+from .serialization import JsonLdSpdxDocument
+
+__all__ = ["JsonLdSpdxDocument", "SpdxId", "SpdxIdGenerator"]
diff --git a/scripts/sbom/sbom/spdx/build.py b/scripts/sbom/sbom/spdx/build.py
new file mode 100644 (file)
index 0000000..a39ec9c
--- /dev/null
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from sbom.spdx.core import DictionaryEntry, Element, Hash
+
+
+@dataclass(kw_only=True)
+class Build(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Build/Classes/Build/"""
+
+    type: str = field(init=False, default="build_Build")
+    build_buildType: str
+    build_buildId: str
+    build_environment: list[DictionaryEntry] = field(default_factory=list)
+    build_configSourceUri: list[str] = field(default_factory=list)
+    build_configSourceDigest: list[Hash] = field(default_factory=list)
diff --git a/scripts/sbom/sbom/spdx/core.py b/scripts/sbom/sbom/spdx/core.py
new file mode 100644 (file)
index 0000000..7eb376a
--- /dev/null
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+
+from typing import Any, Literal
+from sbom.spdx.spdxId import SpdxId
+
+SPDX_SPEC_VERSION = "3.0.1"
+
+ExternalIdentifierType = Literal["email", "gitoid", "urlScheme"]
+HashAlgorithm = Literal["sha256", "sha512"]
+ProfileIdentifierType = Literal["core", "software", "build", "lite", "simpleLicensing"]
+RelationshipType = Literal[
+    "contains",
+    "generates",
+    "hasDeclaredLicense",
+    "hasInput",
+    "hasOutput",
+    "ancestorOf",
+    "hasDistributionArtifact",
+    "dependsOn",
+]
+RelationshipCompleteness = Literal["complete", "incomplete", "noAssertion"]
+
+
+@dataclass
+class SpdxObject:
+    def to_dict(self) -> dict[str, Any]:
+        def _to_dict(v: Any):
+            return v.to_dict() if hasattr(v, "to_dict") else v
+
+        d: dict[str, Any] = {}
+        for field_name in self.__dataclass_fields__:
+            value = getattr(self, field_name)
+            if value is None or value == [] or value == "":
+                continue
+
+            if isinstance(value, Element):
+                d[field_name] = value.spdxId
+            elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], Element):  # type: ignore
+                value: list[Element] = value
+                d[field_name] = [v.spdxId for v in value]
+            else:
+                d[field_name] = [_to_dict(v) for v in value] if isinstance(value, list) else _to_dict(value)  # type: ignore
+        return d
+
+
+@dataclass(kw_only=True)
+class IntegrityMethod(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/IntegrityMethod/"""
+
+
+@dataclass(kw_only=True)
+class Hash(IntegrityMethod):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Hash/"""
+
+    type: str = field(init=False, default="Hash")
+    hashValue: str
+    algorithm: HashAlgorithm
+
+
+@dataclass(kw_only=True)
+class Element(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Element/"""
+
+    type: str = field(init=False, default="Element")
+    spdxId: SpdxId
+    creationInfo: str = "_:creationinfo"
+    name: str | None = None
+    verifiedUsing: list[Hash] = field(default_factory=list)
+    comment: str | None = None
+
+
+@dataclass(kw_only=True)
+class ExternalMap(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ExternalMap/"""
+
+    type: str = field(init=False, default="ExternalMap")
+    externalSpdxId: SpdxId
+
+
+@dataclass(kw_only=True)
+class NamespaceMap(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/NamespaceMap/"""
+
+    type: str = field(init=False, default="NamespaceMap")
+    prefix: str
+    namespace: str
+
+
+@dataclass(kw_only=True)
+class ElementCollection(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ElementCollection/"""
+
+    type: str = field(init=False, default="ElementCollection")
+    element: list[Element] = field(default_factory=list)
+    rootElement: list[Element] = field(default_factory=list)
+    profileConformance: list[ProfileIdentifierType] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class SpdxDocument(ElementCollection):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SpdxDocument/"""
+
+    type: str = field(init=False, default="SpdxDocument")
+    import_: list[ExternalMap] = field(default_factory=list)
+    namespaceMap: list[NamespaceMap] = field(default_factory=list)
+
+    def to_dict(self) -> dict[str, Any]:
+        return {("import" if k == "import_" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Agent(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Agent/"""
+
+    type: str = field(init=False, default="Agent")
+
+
+@dataclass(kw_only=True)
+class SoftwareAgent(Agent):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SoftwareAgent/"""
+
+    type: str = field(init=False, default="SoftwareAgent")
+
+
+@dataclass(kw_only=True)
+class CreationInfo(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/CreationInfo/"""
+
+    type: str = field(init=False, default="CreationInfo")
+    id: SpdxId = "_:creationinfo"
+    specVersion: str = SPDX_SPEC_VERSION
+    createdBy: list[Agent]
+    created: str
+    comment: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {("@id" if k == "id" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Relationship(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Relationship/"""
+
+    type: str = field(init=False, default="Relationship")
+    relationshipType: RelationshipType
+    from_: Element  # underscore because 'from' is a reserved keyword
+    to: list[Element]
+    completeness: RelationshipCompleteness | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {("from" if k == "from_" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Artifact(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Artifact/"""
+
+    type: str = field(init=False, default="Artifact")
+
+
+@dataclass(kw_only=True)
+class DictionaryEntry(SpdxObject):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/DictionaryEntry/"""
+
+    type: str = field(init=False, default="DictionaryEntry")
+    key: str
+    value: str
diff --git a/scripts/sbom/sbom/spdx/serialization.py b/scripts/sbom/sbom/spdx/serialization.py
new file mode 100644 (file)
index 0000000..b4df7d3
--- /dev/null
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import json
+from typing import Any
+from sbom.path_utils import PathStr
+from sbom.spdx.core import SPDX_SPEC_VERSION, SpdxDocument, SpdxObject
+
+
+class JsonLdSpdxDocument:
+    """Represents an SPDX document in JSON-LD format for serialization."""
+
+    graph: list[SpdxObject]
+
+    def __init__(self, graph: list[SpdxObject]) -> None:
+        """
+        Initialize a JSON-LD SPDX document from a graph of SPDX objects.
+        The graph must contain a single SpdxDocument element.
+
+        Args:
+            graph: List of SPDX objects representing the complete SPDX document.
+        """
+        self.graph = graph
+
+    @property
+    def context(self) -> list[str | dict[str, str]]:
+        spdx_document = next(element for element in self.graph if isinstance(element, SpdxDocument))
+        return [
+            f"https://spdx.org/rdf/{SPDX_SPEC_VERSION}/spdx-context.jsonld",
+            {ns.prefix: ns.namespace for ns in spdx_document.namespaceMap},
+        ]
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Convert the SPDX document to a dictionary representation suitable for JSON serialization.
+
+        Returns:
+            Dictionary with @context and @graph keys following JSON-LD format.
+        """
+        def _item_to_dict(item: SpdxObject) -> dict:
+            d = item.to_dict()
+            if isinstance(item, SpdxDocument):
+                d.pop("namespaceMap", None)
+            return d
+        return {
+            "@context": self.context,
+            "@graph": [_item_to_dict(item) for item in self.graph],
+        }
+
+    def save(self, path: PathStr, prettify: bool) -> None:
+        """
+        Save the SPDX document to a JSON file.
+
+        Args:
+            path: File path where the document will be saved.
+            prettify: Whether to pretty-print the JSON with indentation.
+        """
+        with open(path, "w", encoding="utf-8") as f:
+            if prettify:
+                json.dump(self.to_dict(), f, indent=2)
+            else:
+                json.dump(self.to_dict(), f, separators=(",", ":"))
diff --git a/scripts/sbom/sbom/spdx/simplelicensing.py b/scripts/sbom/sbom/spdx/simplelicensing.py
new file mode 100644 (file)
index 0000000..750ddd2
--- /dev/null
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from sbom.spdx.core import Element
+
+
+@dataclass(kw_only=True)
+class AnyLicenseInfo(Element):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/AnyLicenseInfo/"""
+
+    type: str = field(init=False, default="simplelicensing_AnyLicenseInfo")
+
+
+@dataclass(kw_only=True)
+class LicenseExpression(AnyLicenseInfo):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/LicenseExpression/"""
+
+    type: str = field(init=False, default="simplelicensing_LicenseExpression")
+    simplelicensing_licenseExpression: str
diff --git a/scripts/sbom/sbom/spdx/software.py b/scripts/sbom/sbom/spdx/software.py
new file mode 100644 (file)
index 0000000..2f46de7
--- /dev/null
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from typing import Literal
+from sbom.spdx.core import Artifact, ElementCollection, IntegrityMethod
+
+
+SbomType = Literal["source", "build"]
+FileKindType = Literal["file", "directory"]
+SoftwarePurpose = Literal[
+    "source",
+    "archive",
+    "library",
+    "file",
+    "data",
+    "configuration",
+    "executable",
+    "module",
+    "application",
+    "documentation",
+    "other",
+]
+ContentIdentifierType = Literal["gitoid", "swhid"]
+
+
+@dataclass(kw_only=True)
+class Sbom(ElementCollection):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Sbom/"""
+
+    type: str = field(init=False, default="software_Sbom")
+    software_sbomType: list[SbomType] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class ContentIdentifier(IntegrityMethod):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/ContentIdentifier/"""
+
+    type: str = field(init=False, default="software_ContentIdentifier")
+    software_contentIdentifierType: ContentIdentifierType
+    software_contentIdentifierValue: str
+
+
+@dataclass(kw_only=True)
+class SoftwareArtifact(Artifact):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/SoftwareArtifact/"""
+
+    type: str = field(init=False, default="software_Artifact")
+    software_primaryPurpose: SoftwarePurpose | None = None
+    software_copyrightText: str | None = None
+    software_contentIdentifier: list[ContentIdentifier] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class Package(SoftwareArtifact):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Package/"""
+
+    type: str = field(init=False, default="software_Package")
+    name: str  # type: ignore
+    software_packageVersion: str | None = None
+
+
+@dataclass(kw_only=True)
+class File(SoftwareArtifact):
+    """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/File/"""
+
+    type: str = field(init=False, default="software_File")
+    name: str  # type: ignore
+    software_fileKind: FileKindType | None = None
diff --git a/scripts/sbom/sbom/spdx/spdxId.py b/scripts/sbom/sbom/spdx/spdxId.py
new file mode 100644 (file)
index 0000000..589e85c
--- /dev/null
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from itertools import count
+from typing import Iterator
+
+SpdxId = str
+
+
+class SpdxIdGenerator:
+    _namespace: str
+    _prefix: str | None = None
+    _counter: Iterator[int]
+
+    def __init__(self, namespace: str, prefix: str | None = None) -> None:
+        """
+        Initialize the SPDX ID generator with a namespace.
+
+        Args:
+            namespace: The full namespace to use for generated IDs.
+            prefix: Optional. If provided, generated IDs will use this prefix instead of the full namespace.
+        """
+        self._namespace = namespace
+        self._prefix = prefix
+        self._counter = count(0)
+
+    def generate(self) -> SpdxId:
+        return f"{f'{self._prefix}:' if self._prefix else self._namespace}{next(self._counter)}"
+
+    @property
+    def prefix(self) -> str | None:
+        return self._prefix
+
+    @property
+    def namespace(self) -> str:
+        return self._namespace