]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
tools: Add script to detect unused symbols in libshared
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Wed, 19 Nov 2025 09:30:01 +0000 (10:30 +0100)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Wed, 19 Nov 2025 12:14:15 +0000 (13:14 +0100)
Symbols exported by libshared can't get pruned by the linker, so
every unused exported symbol is effectively dead code we ship to users
for no good reason. Let's add a script to analyze how many such symbols
we have.

We also add a meson test to run the script on all of our binaries.
Since it detects unused symbols and still has a few false positives,
don't enable the test by default similar to the clang-tidy tests.

The script was 100% vibe coded by Github Copilot with Claude Sonnet 4.5
as the model.

Current results are (without the unused symbols list):

Analysis of libsystemd-shared-259.so
======================================================================
Total exported symbols: 4830
  (excluding public API symbols starting with 'sd_')
Used symbols: 4672
Unused symbols: 158
Usage rate: 96.7%

meson.build
tools/find-unused-library-symbols.py [new file with mode: 0755]

index 858d7926a7ae7ba4555ae43840b1453ae95e3bee..c12cc645607be16e422ed9a03e71a9f632c16b52 100644 (file)
@@ -15,7 +15,7 @@ project('systemd', 'c',
 
 add_test_setup(
         'default',
-        exclude_suites : ['clang-tidy', 'integration-tests'],
+        exclude_suites : ['clang-tidy', 'unused-symbols', 'integration-tests'],
         is_default : true,
 )
 
@@ -3000,6 +3000,19 @@ if meson.version().version_compare('>=1.4.0')
         endforeach
 endif
 
+symbol_analysis_exes = []
+foreach name, exe : executables_by_name
+        symbol_analysis_exes += exe
+endforeach
+
+find_unused_library_symbols = find_program('tools/find-unused-library-symbols.py')
+test(
+        'libshared-unused-symbols',
+        find_unused_library_symbols,
+        suite : 'unused-symbols',
+        args : [libshared, libcore] + nss_targets + pam_targets + symbol_analysis_exes,
+)
+
 run_target(
         'check-api-docs',
         depends : [man, libsystemd, libudev],
diff --git a/tools/find-unused-library-symbols.py b/tools/find-unused-library-symbols.py
new file mode 100755 (executable)
index 0000000..47f96df
--- /dev/null
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+"""
+Find unused symbols in a shared library.
+
+This script analyzes a shared library and a list of executables that link
+against it to determine which publicly exported symbols from the library
+are not used by any of the executables or by the library itself internally.
+
+The script checks for symbol usage in three ways:
+1. Internal library references: Uses objdump -R to find relocations within
+   the library that reference its own exported symbols
+2. Executable dependencies: Uses nm to find undefined symbols in executables
+   that match the library's exported symbols
+3. Cross-references: Identifies symbols used across all provided binaries
+
+This comprehensive approach ensures that symbols used internally by the
+library are not incorrectly marked as unused.
+"""
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+
+def get_exported_symbols(library_path):
+    """
+    Extract all exported (public) symbols from a shared library.
+
+    Public API symbols (those starting with 'sd_') are excluded from the analysis
+    since they cannot be removed or made private due to API compatibility requirements.
+
+    Returns a set of symbol names that are defined and exported by the library.
+    """
+    try:
+        result = subprocess.run(
+            ['nm', '--dynamic', '--defined-only', '--extern-only', library_path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Failed to run nm on {library_path}: {e}", file=sys.stderr)
+        sys.exit(1)
+    except FileNotFoundError:
+        print("Error: 'nm' command not found. Please install binutils.", file=sys.stderr)
+        sys.exit(1)
+
+    symbols = set()
+    for line in result.stdout.splitlines():
+        parts = line.split()
+        if len(parts) >= 3:
+            # Format: address type name
+            symbol_type = parts[1]
+            symbol_name = parts[2]
+            # Include text (T) and data (D, B, R) symbols
+            if symbol_type in ('T', 'D', 'B', 'R', 'W'):
+                # Strip version information (e.g., @@SD_SHARED or @SD_SHARED)
+                symbol_name = symbol_name.split('@')[0]
+                # Skip public API symbols (those starting with sd_)
+                if symbol_name.startswith('sd_'):
+                    continue
+                symbols.add(symbol_name)
+
+    return symbols
+
+
+def get_undefined_symbols(executable_path):
+    """
+    Extract all undefined symbols from an executable.
+
+    These are symbols that the executable expects to be provided by
+    shared libraries it links against.
+    """
+    try:
+        result = subprocess.run(
+            ['nm', '--dynamic', '--undefined-only', executable_path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: Failed to run nm on {executable_path}: {e}", file=sys.stderr)
+        return set()
+    except FileNotFoundError:
+        print("Error: 'nm' command not found. Please install binutils.", file=sys.stderr)
+        sys.exit(1)
+
+    symbols = set()
+    for line in result.stdout.splitlines():
+        parts = line.split()
+        if len(parts) >= 2:
+            # Format: type name (no address for undefined symbols)
+            symbol_name = parts[1]
+            # Strip version information (e.g., @SD_SHARED)
+            symbol_name = symbol_name.split('@')[0]
+            symbols.add(symbol_name)
+
+    return symbols
+
+
+def verify_executable_links_library(executable_path, library_name):
+    """
+    Verify that an executable actually links against the given library.
+
+    Returns True if the executable links against a library with the given name.
+    """
+    try:
+        result = subprocess.run(
+            ['ldd', executable_path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        # If ldd fails or doesn't exist, we'll skip the verification
+        return True
+
+    # Check if library_name appears in the ldd output
+    for line in result.stdout.splitlines():
+        if library_name in line:
+            return True
+
+    return False
+
+
+def get_library_internal_references(library_path, exported_symbols):
+    """
+    Find which exported symbols are referenced internally within the library itself.
+
+    This uses objdump to look for relocations that reference the exported symbols.
+    """
+    try:
+        result = subprocess.run(
+            ['objdump', '-R', library_path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Warning: Failed to run objdump on {library_path}: {e}", file=sys.stderr)
+        return set()
+    except FileNotFoundError:
+        print("Warning: 'objdump' command not found. Internal references won't be detected.",
+              file=sys.stderr)
+        return set()
+
+    internal_refs = set()
+    for line in result.stdout.splitlines():
+        parts = line.split()
+        if len(parts) >= 3:
+            # objdump -R format: offset type symbol
+            # The symbol is typically the last field
+            symbol_name = parts[-1]
+            # Strip version information
+            symbol_name = symbol_name.split('@')[0]
+            # Only include if it's one of our exported symbols
+            if symbol_name in exported_symbols:
+                internal_refs.add(symbol_name)
+
+    return internal_refs
+
+
+def find_unused_symbols(library_path, executable_paths, verify_linkage=True):
+    """
+    Find symbols exported by the library that are not used by any executable.
+
+    Args:
+        library_path: Path to the shared library
+        executable_paths: List of paths to executables
+        verify_linkage: Whether to verify executables link against the library
+
+    Returns:
+        Tuple of (unused_symbols, exported_symbols, used_symbols)
+    """
+    library_name = Path(library_path).name
+
+    # Get all exported symbols from the library (excluding public API symbols)
+    exported_symbols = get_exported_symbols(library_path)
+
+    if not exported_symbols:
+        print(f"Warning: No exported symbols found in {library_path}", file=sys.stderr)
+        return set(), set(), set()
+
+    # Collect all symbols used by the executables
+    used_symbols = set()
+
+    # First, check if the library references its own exported symbols internally
+    internal_refs = get_library_internal_references(library_path, exported_symbols)
+    used_symbols.update(internal_refs)
+
+    for exe_path in executable_paths:
+        # Optionally verify linkage
+        if verify_linkage and not verify_executable_links_library(exe_path, library_name):
+            print(f"Warning: {exe_path} does not appear to link against {library_name}",
+                  file=sys.stderr)
+
+        undefined_symbols = get_undefined_symbols(exe_path)
+        # Only count symbols that are actually exported by our library
+        used_symbols.update(undefined_symbols & exported_symbols)
+
+    # Find unused symbols
+    unused_symbols = exported_symbols - used_symbols
+
+    return unused_symbols, exported_symbols, used_symbols
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Find unused exported symbols in a shared library'
+    )
+    parser.add_argument(
+        'library',
+        help='Path to the shared library to analyze'
+    )
+    parser.add_argument(
+        'executables',
+        nargs='+',
+        help='Paths to executables that link against the library'
+    )
+    parser.add_argument(
+        '--no-verify-linkage',
+        action='store_true',
+        help='Skip verification that executables actually link against the library'
+    )
+    parser.add_argument(
+        '--show-used',
+        action='store_true',
+        help='Also show used symbols'
+    )
+    parser.add_argument(
+        '--stats-only',
+        action='store_true',
+        help='Only show statistics, not individual symbols'
+    )
+
+    args = parser.parse_args()
+
+    # Verify library exists
+    library_path = Path(args.library)
+    if not library_path.exists():
+        print(f"Error: Library not found: {library_path}", file=sys.stderr)
+        sys.exit(1)
+
+    # Verify executables exist
+    executable_paths = []
+    for exe in args.executables:
+        exe_path = Path(exe)
+        if not exe_path.exists():
+            print(f"Warning: Executable not found: {exe_path}", file=sys.stderr)
+        else:
+            executable_paths.append(str(exe_path))
+
+    if not executable_paths:
+        print("Error: No valid executables provided", file=sys.stderr)
+        sys.exit(1)
+
+    # Analyze symbols
+    unused, exported, used = find_unused_symbols(
+        str(library_path),
+        executable_paths,
+        verify_linkage=not args.no_verify_linkage
+    )
+
+    # Print results
+    print(f"Analysis of {library_path.name}")
+    print("=" * 70)
+    print(f"Total exported symbols: {len(exported)}")
+    print(f"  (excluding public API symbols starting with 'sd_')")
+    print(f"Used symbols: {len(used)}")
+    print(f"Unused symbols: {len(unused)}")
+    print(f"Usage rate: {len(used)/len(exported)*100:.1f}%" if exported else "N/A")
+    print()
+
+    if not args.stats_only:
+        if unused:
+            print("Unused symbols:")
+            print("-" * 70)
+            for symbol in sorted(unused):
+                print(f"  {symbol}")
+            print()
+        else:
+            print("All exported symbols are used!")
+            print()
+
+        if args.show_used and used:
+            print("Used symbols:")
+            print("-" * 70)
+            for symbol in sorted(used):
+                print(f"  {symbol}")
+            print()
+
+    # Exit with non-zero if there are unused symbols (useful for CI)
+    sys.exit(0 if not unused else 1)
+
+
+if __name__ == '__main__':
+    main()