dt-bindings: add DTS style checker

author Daniel Golle <daniel@makrotopia.org>

Wed, 27 May 2026 19:32:10 +0000 (20:32 +0100)

committer Rob Herring (Arm) <robh@kernel.org>

Wed, 10 Jun 2026 18:37:05 +0000 (13:37 -0500)
author Daniel Golle <daniel@makrotopia.org>
Wed, 27 May 2026 19:32:10 +0000 (20:32 +0100)
committer Rob Herring (Arm) <robh@kernel.org>
Wed, 10 Jun 2026 18:37:05 +0000 (13:37 -0500)
diff --git a/scripts/dtc/dt-check-style b/scripts/dtc/dt-check-style

new file mode 100755 (executable)

index 0000000..2d5723d
--- /dev/null
+++ b/scripts/dtc/dt-check-style
@@ -0,0 +1,1192 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Check DTS coding style on YAML binding examples and on
+# .dts/.dtsi/.dtso source files. Enforces rules from
+# Documentation/devicetree/bindings/dts-coding-style.rst.
+#
+# Two modes:
+#   --mode=relaxed (default)
+#     Only rules that produce zero warnings on the current tree.
+#     Suitable for dt_binding_check.
+#   --mode=strict
+#     All rules. Required for new submissions.
+#
+# Two input types (auto-detected by file extension):
+#   *.yaml             -- DT binding; check each example block
+#   *.dts/*.dtsi/*.dtso -- DTS source; whole file is one block
+#
+# Rules are declared in a registry (see RULES below); each rule is
+# tagged with the lowest mode that runs it. Promoting a rule from
+# 'strict' to 'relaxed' is a one-line change.
+
+import argparse
+import re
+import sys
+from enum import Enum, auto
+
+import ruamel.yaml
+
+
+# ---------------------------------------------------------------------------
+# Line classification
+# ---------------------------------------------------------------------------
+
+class LineType(Enum):
+    BLANK = auto()
+    COMMENT = auto()         # // ... or /* ... */ on one line
+    COMMENT_START = auto()   # /* without closing */
+    COMMENT_BODY = auto()    # inside a multi-line comment
+    COMMENT_END = auto()     # closing */
+    PREPROCESSOR = auto()    # #include / #define / #ifdef / ...
+    NODE_OPEN = auto()       # something { (with optional label/name/addr)
+    NODE_CLOSE = auto()      # };
+    PROPERTY = auto()        # name = value; or name;
+    CONTINUATION = auto()    # continuation of a multi-line property
+
+
+re_cpp_directive = re.compile(
+    r'^#\s*(include|define|undef|ifdef|ifndef|if|else|elif|endif|'
+    r'pragma|error|warning)\b')
+
+# label: name@addr {  -- label and addr optional; name can be "/"
+# Per the DT spec a node name may start with a digit (e.g. 1wire@...).
+# The address part is captured loosely (any non-space, non-brace run) so
+# malformed addresses (e.g. memory@0x1000) still reach
+# check_unit_address_format() instead of silently bypassing the check.
+re_node_header = re.compile(
+    r'^(?:([a-zA-Z_][a-zA-Z0-9_]*):\s*)?'
+    r'([a-zA-Z0-9][a-zA-Z0-9,._+-]*|/)'
+    r'(?:@([^\s{]+))?'
+    r'\s*\{$')
+
+re_ref_node = re.compile(
+    r'^&([a-zA-Z_][a-zA-Z0-9_]*)\s*\{$')
+
+
+def is_preprocessor(stripped):
+    """Tell C preprocessor directives apart from DTS '#'-prefixed props."""
+    return re_cpp_directive.match(stripped) is not None
+
+
+class DtsLine:
+    __slots__ = ('lineno', 'raw', 'linetype', 'indent_str', 'stripped',
+                 'prop_name', 'continuations',
+                 'node_name', 'node_addr', 'label', 'ref_name', 'depth',
+                 'closures')
+
+    def __init__(self, lineno, raw, linetype, indent_str, stripped):
+        self.lineno = lineno      # 1-based within the block
+        self.raw = raw
+        self.linetype = linetype
+        self.indent_str = indent_str  # leading whitespace as-is
+        self.stripped = stripped
+        self.prop_name = None
+        self.continuations = []
+        self.node_name = None
+        self.node_addr = None
+        self.label = None
+        self.ref_name = None
+        self.depth = 0            # filled in by classify_lines
+        self.closures = 1         # count of '}' on a NODE_CLOSE line
+
+
+def _split_code(text):
+    """Return (code, opens_block) for a leading-stripped line: the
+    code portion with // and /* */ comments removed (string literals
+    kept verbatim), and whether a /* */ block comment is left open.
+    The code portion is right-stripped so the endswith() checks in
+    classify_lines see code only, not a trailing comment or blanks."""
+    out = []
+    i = 0
+    n = len(text)
+    while i < n:
+        c = text[i]
+        if c == '"':
+            j = i + 1
+            while j < n:
+                if text[j] == '\\':
+                    j += 2
+                    continue
+                if text[j] == '"':
+                    j += 1
+                    break
+                j += 1
+            out.append(text[i:j])
+            i = j
+            continue
+        if c == '/' and i + 1 < n and text[i + 1] == '/':
+            break
+        if c == '/' and i + 1 < n and text[i + 1] == '*':
+            end = text.find('*/', i + 2)
+            if end < 0:
+                return (''.join(out).rstrip(), True)
+            i = end + 2
+            continue
+        out.append(c)
+        i += 1
+    return (''.join(out).rstrip(), False)
+
+
+re_only_closures = re.compile(r'(?:\}\s*;?\s*)+$')
+
+
+def classify_lines(text):
+    """Return a list of DtsLine. Tracks { } depth and groups
+    continuation lines onto their leading PROPERTY line."""
+    out = []
+    in_block_comment = False
+    in_cpp_macro = False
+    prev_complete = True
+    depth = 0
+
+    # Split preserving the indent string verbatim
+    re_lead = re.compile(r'^([ \t]*)(.*)$')
+
+    for i, raw in enumerate(text.split('\n'), start=1):
+        m = re_lead.match(raw)
+        indent_str = m.group(1)
+        stripped = m.group(2)
+
+        # Continuation of a multi-line C preprocessor directive: the
+        # previous PREPROCESSOR line ended with a '\\' line splice, so
+        # this line is part of the same macro. Treat it as
+        # PREPROCESSOR until the splice chain ends (no trailing '\\'
+        # or a blank line).
+        if in_cpp_macro:
+            dl = DtsLine(i, raw, LineType.PREPROCESSOR,
+                         indent_str, stripped)
+            dl.depth = depth
+            out.append(dl)
+            in_cpp_macro = (bool(stripped) and
+                            stripped.rstrip().endswith('\\'))
+            continue
+
+        if not stripped:
+            dl = DtsLine(i, raw, LineType.BLANK, '', '')
+            dl.depth = depth
+            out.append(dl)
+            continue
+
+        if in_block_comment:
+            ltype = (LineType.COMMENT_END if '*/' in stripped
+                     else LineType.COMMENT_BODY)
+            if ltype == LineType.COMMENT_END:
+                in_block_comment = False
+            dl = DtsLine(i, raw, ltype, indent_str, stripped)
+            dl.depth = depth
+            out.append(dl)
+            continue
+
+        if stripped.startswith('#') and is_preprocessor(stripped):
+            dl = DtsLine(i, raw, LineType.PREPROCESSOR,
+                         indent_str, stripped)
+            dl.depth = depth
+            out.append(dl)
+            prev_complete = True
+            in_cpp_macro = stripped.rstrip().endswith('\\')
+            continue
+
+        # Strip comments first so all later structural checks see code
+        # only. An unclosed /* sets in_block_comment for the next line.
+        code, opens_block = _split_code(stripped)
+        if opens_block:
+            in_block_comment = True
+
+        # Pure-comment line: nothing left after stripping. Classify as
+        # COMMENT_START (carries to next line) or COMMENT, and skip the
+        # structural classification entirely.
+        if not code:
+            ltype = LineType.COMMENT_START if opens_block else LineType.COMMENT
+            dl = DtsLine(i, raw, ltype, indent_str, stripped)
+            dl.depth = depth
+            out.append(dl)
+            continue
+
+        if not prev_complete:
+            dl = DtsLine(i, raw, LineType.CONTINUATION, indent_str, code)
+            dl.depth = depth
+            out.append(dl)
+            prev_complete = (code.endswith(';') or
+                             code.endswith('{') or
+                             code.endswith('};'))
+            continue
+
+        # NODE_CLOSE: the canonical form is "}" or "};" alone. A line
+        # that is nothing but closures (e.g. "}; };") is still treated
+        # as NODE_CLOSE for depth tracking, but the multi-closure case
+        # is flagged separately by check_node_close_alone via
+        # dl.closures.
+        if re_only_closures.match(code):
+            closures = code.count('}')
+            depth = max(depth - closures, 0)
+            dl = DtsLine(i, raw, LineType.NODE_CLOSE, indent_str, code)
+            dl.depth = depth
+            dl.closures = closures
+            out.append(dl)
+            prev_complete = True
+            continue
+
+        if code.endswith('{'):
+            dl = DtsLine(i, raw, LineType.NODE_OPEN, indent_str, code)
+            parse_node_header(dl)
+            dl.depth = depth
+            out.append(dl)
+            depth += 1
+            prev_complete = True
+            continue
+
+        # Property (or first line of a multi-line property).
+        dl = DtsLine(i, raw, LineType.PROPERTY, indent_str, code)
+        parse_property_name(dl)
+        dl.depth = depth
+        out.append(dl)
+        prev_complete = code.endswith(';')
+
+    # Group continuation lines onto their leading PROPERTY.
+    last_prop = None
+    grouped = []
+    for dl in out:
+        if dl.linetype == LineType.CONTINUATION and last_prop is not None:
+            last_prop.continuations.append(dl)
+            continue
+        if dl.linetype == LineType.PROPERTY:
+            last_prop = dl
+        elif dl.linetype != LineType.BLANK and \
+                dl.linetype not in (LineType.COMMENT, LineType.COMMENT_BODY,
+                                    LineType.COMMENT_END,
+                                    LineType.COMMENT_START):
+            last_prop = None
+        grouped.append(dl)
+    return grouped
+
+
+def parse_node_header(dl):
+    m = re_node_header.match(dl.stripped)
+    if m:
+        dl.label = m.group(1)
+        dl.node_name = m.group(2)
+        dl.node_addr = m.group(3)
+        return
+    m = re_ref_node.match(dl.stripped)
+    if m:
+        dl.ref_name = m.group(1)
+
+
+def parse_property_name(dl):
+    m = re.match(r'^([a-zA-Z0-9#][a-zA-Z0-9,._+#-]*)\s*[=;]', dl.stripped)
+    if m:
+        dl.prop_name = m.group(1)
+
+
+def collect_labels_and_refs(text):
+    """Return (defined_labels, referenced_labels) found anywhere outside
+    /* */ comments and string literals. Labels named fake_intc* (injected
+    by dt-extract-example) are skipped."""
+    # Strip block comments first so labels inside them don't count
+    stripped = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
+    # Strip line comments
+    stripped = re.sub(r'//[^\n]*', '', stripped)
+    # Strip string literals so words inside quotes (e.g. "Error: foo")
+    # are not picked up as label definitions or &-references.
+    stripped = re.sub(r'"(?:[^"\\]|\\.)*"', '""', stripped)
+    defined = set()
+    referenced = set()
+    # A label precedes a node header; the next non-space token may start
+    # with a letter (foo, &ref), a digit (1wire), or '/' (root node).
+    for m in re.finditer(
+            r'(?:^|[\s{])([a-zA-Z_][a-zA-Z0-9_]*):\s*[a-zA-Z0-9/&]',
+            stripped):
+        name = m.group(1)
+        if not name.startswith('fake_intc'):
+            defined.add(name)
+    for m in re.finditer(r'&([a-zA-Z_][a-zA-Z0-9_]*)', stripped):
+        referenced.add(m.group(1))
+    return defined, referenced
+
+
+# ---------------------------------------------------------------------------
+# Rule registry
+# ---------------------------------------------------------------------------
+
+class Ctx:
+    """Context passed to each rule check. Carries the parsed lines,
+    raw text, mode, and indent kind."""
+
+    def __init__(self, lines, text, mode, indent_kind):
+        self.lines = lines
+        self.text = text
+        self.mode = mode               # 'relaxed' or 'strict'
+        self.indent_kind = indent_kind  # 'spaces' or 'tab'
+
+
+class Rule:
+    __slots__ = ('name', 'mode', 'description', 'check', 'applies_to')
+
+    def __init__(self, name, mode, description, check,
+                 applies_to=('yaml', 'dts', 'dtsi', 'dtso')):
+        self.name = name
+        self.mode = mode               # 'relaxed' or 'strict'
+        self.description = description
+        self.check = check
+        self.applies_to = applies_to   # input types this rule covers
+
+
+# --- individual rule check functions --------------------------------------
+
+def check_trailing_whitespace(ctx):
+    for dl in ctx.lines:
+        if dl.raw != dl.raw.rstrip():
+            yield (dl.lineno, 'trailing whitespace')
+
+
+def check_tab_in_dts(ctx):
+    """Reject literal tabs in DTS lines when input is YAML.
+
+    For YAML examples, indent and content must use spaces. Tabs inside
+    a #define value are tolerated (those are CPP macros, not DTS).
+    For .dts files, this rule does not apply -- tabs are required.
+    """
+    if ctx.indent_kind != 'spaces':
+        return
+    for dl in ctx.lines:
+        if dl.linetype == LineType.PREPROCESSOR:
+            continue
+        if dl.linetype == LineType.BLANK:
+            continue
+        if '\t' in dl.raw:
+            yield (dl.lineno, 'tab character not allowed in DTS example')
+
+
+def check_mixed_indent_chars(ctx):
+    """Indent must be all-spaces or all-tabs, never mixed on one line."""
+    for dl in ctx.lines:
+        if not dl.indent_str:
+            continue
+        if dl.linetype == LineType.PREPROCESSOR:
+            continue
+        if ' ' in dl.indent_str and '\t' in dl.indent_str:
+            yield (dl.lineno, 'mixed tabs and spaces in indent')
+
+
+def detect_indent_unit(ctx):
+    """Find the indent unit used at depth 1 in this block.
+
+    Returns one of: '  ' (2 spaces), '    ' (4 spaces), '\\t' (tab),
+    or None if depth-1 is empty or ambiguous."""
+    for dl in ctx.lines:
+        if dl.depth != 1:
+            continue
+        if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
+            continue
+        if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
+            continue
+        if not dl.indent_str:
+            continue
+        if dl.indent_str == '\t':
+            return '\t'
+        if dl.indent_str == '    ':
+            return '    '
+        if dl.indent_str == '  ':
+            return '  '
+        # Anything else at depth 1 is non-canonical; flag elsewhere.
+        return dl.indent_str
+    return None
+
+
+def check_indent_unit_relaxed(ctx):
+    """YAML examples: 2 or 4 spaces. Never tabs or other widths."""
+    unit = detect_indent_unit(ctx)
+    if unit is None:
+        return
+    if unit not in ('  ', '    '):
+        yield (1, 'indent unit must be 2 or 4 spaces, got %r' % unit)
+
+
+def check_indent_unit_dts(ctx):
+    """DTS files: 1 tab per level. Always required."""
+    unit = detect_indent_unit(ctx)
+    if unit is None:
+        return
+    if unit != '\t':
+        yield (1, 'indent unit must be 1 tab in DTS, got %r' % unit)
+
+
+def check_indent_unit_strict(ctx):
+    """YAML: must be exactly 4 spaces. DTS: 1 tab (same as relaxed)."""
+    unit = detect_indent_unit(ctx)
+    if unit is None:
+        return
+    if ctx.indent_kind == 'spaces':
+        if unit != '    ':
+            yield (1, 'indent unit must be 4 spaces in strict mode, '
+                   'got %r' % unit)
+
+
+def check_indent_consistent(ctx):
+    """All indented lines must be a multiple of the detected unit."""
+    unit = detect_indent_unit(ctx)
+    if unit is None:
+        return
+    if ctx.indent_kind == 'spaces':
+        if unit not in ('  ', '    '):
+            return  # let check_indent_unit_* report this
+    else:
+        if unit != '\t':
+            return
+
+    for dl in ctx.lines:
+        if dl.linetype in (LineType.BLANK, LineType.PREPROCESSOR):
+            continue
+        if dl.linetype == LineType.CONTINUATION:
+            continue   # continuations align to <, not to indent unit
+        if dl.linetype in (LineType.COMMENT_BODY, LineType.COMMENT_END):
+            continue
+        if not dl.indent_str:
+            continue
+        # The indent must be 'unit' repeated dl.depth times, exactly.
+        # NODE_CLOSE lines have depth equal to the post-decrement value,
+        # which matches the indent expected.
+        expected = unit * dl.depth
+        if dl.indent_str != expected:
+            yield (dl.lineno,
+                   'indent mismatch (expected depth %d * %r)' %
+                   (dl.depth, unit))
+
+
+def check_blank_lines(ctx):
+    """No two consecutive blank lines, no leading/trailing blank lines
+    in any node body."""
+    lines = ctx.lines
+    # Consecutive blanks
+    for i in range(1, len(lines)):
+        if lines[i].linetype == LineType.BLANK and \
+                lines[i - 1].linetype == LineType.BLANK:
+            yield (lines[i].lineno, 'consecutive blank lines')
+    # Blank right after { or right before }
+    for i, dl in enumerate(lines):
+        if dl.linetype != LineType.BLANK:
+            continue
+        prev = lines[i - 1] if i > 0 else None
+        nxt = lines[i + 1] if i + 1 < len(lines) else None
+        if prev is not None and prev.linetype == LineType.NODE_OPEN:
+            yield (dl.lineno, 'blank line at start of node body')
+        if nxt is not None and nxt.linetype == LineType.NODE_CLOSE:
+            yield (dl.lineno, 'blank line at end of node body')
+
+
+def _walk_bodies(lines):
+    """Yield lists of immediate-child NODE_OPEN lines for each node body
+    in the input. Skips ref-nodes (&label) since those don't have an
+    intrinsic ordering."""
+    body_stack = [[]]
+    for dl in lines:
+        if dl.linetype == LineType.NODE_OPEN:
+            body_stack[-1].append(dl)
+            body_stack.append([])
+            continue
+        if dl.linetype == LineType.NODE_CLOSE:
+            if len(body_stack) <= 1:
+                # Unbalanced; ignore to avoid crashing on malformed input
+                continue
+            yield body_stack.pop()
+            continue
+    while body_stack:
+        yield body_stack.pop()
+
+
+def _natural_sort_key(s):
+    """Split a string into a tuple of (kind, value) pairs that compares
+    numeric runs as ints, so 'foo10' sorts after 'foo2'."""
+    parts = []
+    for part in re.split(r'(\d+)', s):
+        if part.isdigit():
+            parts.append((0, int(part)))
+        else:
+            parts.append((1, part))
+    return tuple(parts)
+
+
+def check_child_address_order(ctx):
+    """Addressed siblings (foo@N) must appear in ascending address
+    order within their parent node body."""
+    for children in _walk_bodies(ctx.lines):
+        addressed = []
+        for c in children:
+            if c.node_addr is None:
+                continue
+            try:
+                parts = tuple(int(p, 16) for p in c.node_addr.split(','))
+            except ValueError:
+                continue
+            addressed.append((parts, c))
+        for i in range(1, len(addressed)):
+            if addressed[i][0] < addressed[i - 1][0]:
+                dl = addressed[i][1]
+                yield (dl.lineno,
+                       'child node @%s out of address order' %
+                       dl.node_addr)
+
+
+def check_child_name_order(ctx):
+    """Unaddressed siblings must appear in natural-sort order by node
+    name within their parent node body. Addressed children are scoped
+    by check_child_address_order; reference nodes (&label { ... }) and
+    the root node are skipped."""
+    for children in _walk_bodies(ctx.lines):
+        unaddressed = []
+        for c in children:
+            if c.node_addr is not None:
+                continue
+            if c.node_name in (None, '/'):
+                continue
+            if c.ref_name is not None:
+                continue
+            unaddressed.append((_natural_sort_key(c.node_name), c))
+        for i in range(1, len(unaddressed)):
+            if unaddressed[i][0] < unaddressed[i - 1][0]:
+                dl = unaddressed[i][1]
+                yield (dl.lineno,
+                       'child node %r out of name order' % dl.node_name)
+
+
+def _property_bucket(name):
+    """Return the canonical bucket index for a property:
+       0 compatible
+       1 reg / reg-names
+       2 ranges
+       3 standard properties (no vendor comma in #-stripped name)
+       4 vendor-specific properties
+       5 status
+    Plus a sub-key inside the bucket for fixed slots (compatible, reg,
+    reg-names, ranges, status). 'standard' and 'vendor' return None for
+    the sub-key, signalling that the within-bucket key is computed by
+    the pairing rules."""
+    stripped = name.lstrip('#')
+    if name == 'compatible':
+        return (0, 0)
+    if name == 'reg':
+        return (1, 0)
+    if name == 'reg-names':
+        return (1, 1)
+    if name == 'ranges':
+        return (2, 0)
+    if name == 'status':
+        return (5, 0)
+    return (4 if ',' in stripped else 3, None)
+
+
+# Declarative pairing rules: each is a callable
+#   (name, all_names) -> anchor_name_or_None
+# If a rule returns an anchor, the property sorts immediately after the
+# anchor. Rules are tried in order; the first match wins. If none
+# matches, the within-bucket key falls back to natural sort by the
+# #-stripped name.
+
+def _pair_pinctrl_names(name, all_names):
+    """pinctrl-names follows the highest pinctrl-N in the same node."""
+    if name != 'pinctrl-names':
+        return None
+    cands = [n for n in all_names if re.match(r'^pinctrl-\d+$', n)]
+    if not cands:
+        return None
+    return max(cands, key=_natural_sort_key)
+
+
+def _pair_x_names(name, all_names):
+    """Generic <x>-names follows its owning property. The owner is
+    usually plural (clocks/clock-names, dmas/dma-names,
+    resets/reset-names) but occasionally singular (reg/reg-names is
+    handled by the fixed slot above; this rule catches anything else)."""
+    if not name.endswith('-names'):
+        return None
+    base = name[:-len('-names')]
+    # Try plural and singular forms.
+    if (base + 's') in all_names:
+        return base + 's'
+    if base in all_names:
+        return base
+    return None
+
+
+PAIRING_RULES = (_pair_pinctrl_names, _pair_x_names)
+
+
+def _property_sort_key(name, all_names):
+    """Sort key for a property among its node-body siblings.
+
+    Format: (bucket, within_key, tiebreak). 'within_key' for
+    standard/vendor buckets follows pairing rules: a property paired
+    with anchor X sorts as if it were X with a higher tiebreak."""
+    bucket, fixed_sub = _property_bucket(name)
+    if fixed_sub is not None:
+        return (bucket, (), fixed_sub)
+
+    for rule in PAIRING_RULES:
+        anchor = rule(name, all_names)
+        if anchor is not None:
+            return (bucket, _natural_sort_key(anchor.lstrip('#')), 1)
+
+    return (bucket, _natural_sort_key(name.lstrip('#')), 0)
+
+
+def check_property_order(ctx):
+    """Properties within a node body must appear in canonical order:
+    compatible, reg(/reg-names), ranges, then the standard group, then
+    the vendor-specific group, then status. Inside the standard and
+    vendor groups, pairing rules apply (e.g. <x>-names follows <x>);
+    everything else falls back to natural sort by the #-stripped name."""
+    lines = ctx.lines
+    for i, dl in enumerate(lines):
+        if dl.linetype != LineType.NODE_OPEN:
+            continue
+        body_depth = dl.depth + 1
+        props = []
+        for j in range(i + 1, len(lines)):
+            d = lines[j]
+            if d.linetype == LineType.NODE_CLOSE and \
+                    d.depth == body_depth - 1:
+                break
+            if d.linetype == LineType.PROPERTY and d.depth == body_depth \
+                    and d.prop_name is not None:
+                props.append(d)
+        if len(props) < 2:
+            continue
+        all_names = [p.prop_name for p in props]
+        keyed = [(p, _property_sort_key(p.prop_name, all_names))
+                 for p in props]
+        for k in range(1, len(keyed)):
+            if keyed[k][1] < keyed[k - 1][1]:
+                p = keyed[k][0]
+                prev = keyed[k - 1][0]
+                yield (p.lineno,
+                       'property %r out of canonical order '
+                       '(should sort before %r)' %
+                       (p.prop_name, prev.prop_name))
+
+
+def _strip_strings_and_comments(text):
+    """Remove string literals and /* */ + // comments from a single
+    line, replacing them with empty strings. Used so syntactic checks
+    (whitespace, hex case, etc.) don't false-positive on contents of
+    quoted strings or comments. An unclosed /* on the line is treated
+    as a comment running to end of line."""
+    text = re.sub(r'"(?:[^"\\]|\\.)*"', '""', text)
+    text = re.sub(r'/\*.*?\*/', '', text)
+    text = re.sub(r'/\*.*$', '', text)
+    text = re.sub(r'//.*$', '', text)
+    return text
+
+
+def check_required_blank_lines(ctx):
+    """A blank line must precede each child node and the 'status'
+    property within a node body, except when these are the first
+    substantive item in the body."""
+    lines = ctx.lines
+    for i, open_dl in enumerate(lines):
+        if open_dl.linetype != LineType.NODE_OPEN:
+            continue
+        body_depth = open_dl.depth + 1
+        prev_substantive = None
+        between_blanks = 0
+        depth_inside = 0
+        for j in range(i + 1, len(lines)):
+            d = lines[j]
+            if d.linetype == LineType.NODE_CLOSE and \
+                    d.depth == body_depth - 1 and depth_inside == 0:
+                break
+            # Track depth inside nested children so we only look at
+            # immediate-body items.
+            if d.linetype == LineType.NODE_OPEN and \
+                    d.depth >= body_depth and depth_inside > 0:
+                depth_inside += 1
+                continue
+            if d.linetype == LineType.NODE_CLOSE and depth_inside > 0:
+                depth_inside -= 1
+                continue
+            if depth_inside > 0:
+                continue
+            if d.linetype == LineType.BLANK:
+                if prev_substantive is not None:
+                    between_blanks += 1
+                continue
+            if d.linetype in (LineType.COMMENT, LineType.COMMENT_START,
+                              LineType.COMMENT_BODY, LineType.COMMENT_END,
+                              LineType.PREPROCESSOR):
+                continue
+            if d.linetype == LineType.CONTINUATION:
+                continue
+
+            needs_blank = False
+            if d.linetype == LineType.NODE_OPEN:
+                needs_blank = True
+                depth_inside = 1   # entered the child body
+            elif d.linetype == LineType.PROPERTY and d.prop_name == 'status':
+                needs_blank = True
+
+            if needs_blank and prev_substantive is not None and \
+                    between_blanks == 0:
+                if d.linetype == LineType.NODE_OPEN:
+                    yield (d.lineno,
+                           'child node must be preceded by a blank line')
+                else:
+                    yield (d.lineno,
+                           '"status" must be preceded by a blank line')
+
+            prev_substantive = d
+            between_blanks = 0
+
+
+def check_hex_case(ctx):
+    """Hex literals (0xN) must use lowercase digits and prefix."""
+    for dl in ctx.lines:
+        if dl.linetype in (LineType.BLANK, LineType.COMMENT,
+                           LineType.COMMENT_START, LineType.COMMENT_BODY,
+                           LineType.COMMENT_END, LineType.PREPROCESSOR):
+            continue
+        text = _strip_strings_and_comments(dl.raw)
+        for m in re.finditer(r'\b0[xX][0-9a-fA-F]+\b', text):
+            lit = m.group(0)
+            if any(c.isupper() for c in lit[2:]) or lit[1] == 'X':
+                yield (dl.lineno,
+                       'hex literal %r must be lowercase' % lit)
+
+
+def check_unit_address_format(ctx):
+    """Unit addresses must be lowercase hex without leading zeros and
+    without a '0x' prefix. For multi-cell addresses (comma-separated),
+    each part is checked independently. A single '0' is permitted
+    (canonical zero)."""
+    for dl in ctx.lines:
+        if dl.linetype != LineType.NODE_OPEN:
+            continue
+        if dl.node_addr is None:
+            continue
+        addr = dl.node_addr
+        for part in addr.split(','):
+            if part[:2] in ('0x', '0X'):
+                yield (dl.lineno,
+                       'unit address %r must not have a "0x" prefix' %
+                       addr)
+                break
+            if not re.match(r'^[0-9a-fA-F]+$', part):
+                yield (dl.lineno,
+                       'unit address %r is not valid hex' % addr)
+                break
+            if any(c in 'ABCDEF' for c in part):
+                yield (dl.lineno,
+                       'unit address %r must be lowercase hex' % addr)
+                break
+            if len(part) > 1 and part.startswith('0'):
+                yield (dl.lineno,
+                       'unit address %r has leading zeros' % addr)
+                break
+
+
+def check_value_whitespace(ctx):
+    """A <...> cell list must have no whitespace directly after '<'
+    or directly before '>'. Continuation lines are joined onto the
+    property so a <...> split across lines is checked too; a '<' or
+    '>' at a line break is glued straight to the neighbouring value,
+    so the break itself is not counted as padding. Outside strings
+    and comments only."""
+    for dl in ctx.lines:
+        if dl.linetype != LineType.PROPERTY:
+            continue
+        segs = [_strip_strings_and_comments(dl.raw).strip()]
+        for cont in dl.continuations:
+            segs.append(_strip_strings_and_comments(cont.stripped).strip())
+        text = ''
+        for s in segs:
+            if not s:
+                continue
+            if not text or text.endswith('<') or s.startswith('>'):
+                text += s
+            else:
+                text += ' ' + s
+        for m in re.finditer(r'<([^<>]*)>', text):
+            content = m.group(1)
+            if content and content != content.strip():
+                yield (dl.lineno, 'extra whitespace inside <...>')
+                break
+
+
+def check_node_close_alone(ctx):
+    """The closing '};' of a node must be on its own line. The
+    classifier accepts a canonical "}" or "};" as NODE_CLOSE; a line
+    that is all closures (e.g. "}; };") is still NODE_CLOSE for depth
+    tracking but is flagged here via dl.closures. Any other line that
+    still contains '};' (in code, not in strings or comments) is
+    mixing a node close with something else."""
+    for dl in ctx.lines:
+        if dl.linetype == LineType.NODE_CLOSE:
+            if dl.closures > 1:
+                yield (dl.lineno,
+                       'closing brace must be on its own line')
+            continue
+        if dl.linetype in (LineType.BLANK, LineType.COMMENT,
+                           LineType.COMMENT_START, LineType.COMMENT_BODY,
+                           LineType.COMMENT_END, LineType.PREPROCESSOR):
+            continue
+        text = _strip_strings_and_comments(dl.raw)
+        if '};' in text:
+            yield (dl.lineno,
+                   'closing brace must be on its own line')
+
+
+def _display_col(text):
+    """Visual column width of text, with tabs expanded to the next
+    8-column stop, matching how printf and most editors render a
+    line and the kernel-wide line length convention."""
+    col = 0
+    for ch in text:
+        if ch == '\t':
+            col = (col // 8 + 1) * 8
+        else:
+            col += 1
+    return col
+
+
+def check_line_length(ctx):
+    """Lines must not exceed 80 columns; tabs count as 8 (see
+    _display_col)."""
+    for dl in ctx.lines:
+        if dl.linetype == LineType.BLANK:
+            continue
+        cols = _display_col(dl.raw)
+        if cols > 80:
+            yield (dl.lineno,
+                   'line exceeds 80 columns (%d)' % cols)
+
+
+def check_continuation_alignment(ctx):
+    """A multi-line property's continuation lines must align their
+    first non-whitespace character to the display column of the first
+    '<' or '"' after the '=' in the leading line. Display columns are
+    used so tab-indented .dts files (where a continuation aligns with
+    tabs plus spaces) are compared correctly."""
+    for dl in ctx.lines:
+        if dl.linetype != LineType.PROPERTY:
+            continue
+        if not dl.continuations:
+            continue
+        eq = dl.raw.find('=')
+        if eq < 0:
+            continue
+        # First '<' or '"' after '='
+        rest = dl.raw[eq + 1:]
+        m = re.search(r'[<"]', rest)
+        if not m:
+            continue
+        target_col = _display_col(dl.raw[:eq + 1 + m.start()])
+        for cont in dl.continuations:
+            if _display_col(cont.indent_str) != target_col:
+                yield (cont.lineno,
+                       'continuation should align to column %d '
+                       '(under "<" or \\")' % (target_col + 1))
+
+
+def check_unclosed_block_comment(ctx):
+    """Every /* must have a matching */ in the same block. Catches both
+    a comment opened on its own line (COMMENT_START) and a tail comment
+    opened on a PROPERTY or other code line (where in_block_comment is
+    set by _split_code so the next line becomes COMMENT_BODY without a
+    preceding COMMENT_START)."""
+    open_lineno = None
+    for dl in ctx.lines:
+        if dl.linetype == LineType.COMMENT_START:
+            open_lineno = dl.lineno
+        elif dl.linetype == LineType.COMMENT_END:
+            open_lineno = None
+        elif dl.linetype == LineType.COMMENT_BODY and open_lineno is None:
+            # Block was opened by a /* tail on a code line; report at
+            # the first orphan body line since the originating line is
+            # already classified as something else.
+            open_lineno = dl.lineno
+    if open_lineno is not None:
+        yield (open_lineno, 'unclosed /* block comment')
+
+
+def check_unused_labels(ctx):
+    """Labels defined but never referenced are clutter."""
+    defined, referenced = collect_labels_and_refs(ctx.text)
+    for label in sorted(defined - referenced):
+        # Find the line where this label is defined for line-number
+        # reporting.
+        m = re.search(r'(?m)^.*\b' + re.escape(label) + r'\s*:', ctx.text)
+        lineno = ctx.text[:m.start()].count('\n') + 1 if m else 1
+        yield (lineno, 'label %r defined but never &-referenced' % label)
+
+
+# --- registry --------------------------------------------------------------
+
+RULES = [
+    # 'relaxed' is the default; rules in this group must produce zero
+    # output on a clean kernel tree (post the small prep-cleanup
+    # commit at the head of this series).
+    Rule('trailing-whitespace', 'relaxed',
+         'no trailing whitespace on any line',
+         check_trailing_whitespace),
+    Rule('tab-in-dts', 'relaxed',
+         'YAML examples may not contain tab characters',
+         check_tab_in_dts, applies_to=('yaml',)),
+    Rule('mixed-indent-chars', 'relaxed',
+         'indent must not mix tabs and spaces',
+         check_mixed_indent_chars),
+    Rule('unclosed-block-comment', 'relaxed',
+         'every /* block comment must close with */',
+         check_unclosed_block_comment),
+
+    # DTS files always use tabs; this is not negotiable per kernel
+    # coding style (.dts files are real source). Relaxed mode.
+    Rule('indent-unit-dts', 'relaxed',
+         'DTS files: 1 tab per nesting level',
+         check_indent_unit_dts,
+         applies_to=('dts', 'dtsi', 'dtso')),
+
+    # 'strict' rules are opt-in (e.g. for new submissions via
+    # checkpatch.pl in a follow-up series). They flag many existing
+    # files and can be promoted to relaxed once those are cleaned up.
+    Rule('indent-unit', 'strict',
+         'YAML: 2 or 4 spaces per level',
+         check_indent_unit_relaxed, applies_to=('yaml',)),
+    Rule('indent-unit-strict', 'strict',
+         'YAML: must be 4 spaces per level',
+         check_indent_unit_strict, applies_to=('yaml',)),
+    Rule('indent-consistent', 'strict',
+         'every line indented at depth * unit',
+         check_indent_consistent),
+    Rule('blank-lines', 'strict',
+         'no consecutive blanks; no blanks at node body edges',
+         check_blank_lines),
+    Rule('child-address-order', 'strict',
+         'addressed siblings must be in ascending address order',
+         check_child_address_order),
+    Rule('child-name-order', 'strict',
+         'unaddressed siblings must be in natural-sort name order',
+         check_child_name_order),
+    Rule('property-order', 'strict',
+         'canonical bucket + pairing + natural-sort order of properties',
+         check_property_order),
+    Rule('required-blank-lines', 'strict',
+         'blank line before child nodes and before "status"',
+         check_required_blank_lines),
+    Rule('hex-case', 'strict',
+         'hex literals must be lowercase',
+         check_hex_case),
+    Rule('unit-address-format', 'strict',
+         'unit addresses must be lowercase hex without leading zeros',
+         check_unit_address_format),
+    Rule('value-whitespace', 'strict',
+         'no whitespace directly inside <...> brackets',
+         check_value_whitespace),
+    Rule('node-close-alone', 'strict',
+         'closing brace must be on its own line',
+         check_node_close_alone),
+    Rule('line-length', 'strict',
+         'lines must not exceed 80 columns',
+         check_line_length),
+    Rule('continuation-alignment', 'strict',
+         'multi-line property continuations align under "<" or "\\""',
+         check_continuation_alignment),
+    Rule('unused-labels', 'strict',
+         'every label must be &-referenced in the same example/file '
+         '(skipped for .dtsi/.dtso since labels there are exported)',
+         check_unused_labels, applies_to=('yaml', 'dts')),
+]
+
+
+def select_rules(mode, input_kind):
+    """Return rules that apply to the given mode and input type."""
+    rank = {'relaxed': 0, 'strict': 1}
+    out = []
+    for r in RULES:
+        if rank[r.mode] > rank[mode]:
+            continue
+        if input_kind not in r.applies_to:
+            continue
+        out.append(r)
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Block runner
+# ---------------------------------------------------------------------------
+
+def check_block(text, mode, indent_kind, input_type):
+    """Run all selected rules on a single block of DTS text. Returns a
+    list of (lineno, rule_name, message) tuples."""
+    lines = classify_lines(text)
+    ctx = Ctx(lines, text, mode, indent_kind)
+    rules = select_rules(mode, input_type)
+    findings = []
+    for r in rules:
+        for lineno, msg in r.check(ctx):
+            findings.append((lineno, r.name, msg))
+    findings.sort(key=lambda t: (t[0], t[1]))
+    return findings
+
+
+# ---------------------------------------------------------------------------
+# Input drivers (YAML examples vs raw DTS)
+# ---------------------------------------------------------------------------
+
+def _yaml_loader():
+    return ruamel.yaml.YAML()
+
+
+def iter_yaml_examples(filepath):
+    """Yield (example_text, base_lineno_in_file, example_index) tuples."""
+    yaml = _yaml_loader()
+    try:
+        with open(filepath, encoding='utf-8') as f:
+            data = yaml.load(f)
+    except Exception as e:
+        print('%s: error loading YAML: %s' % (filepath, e),
+              file=sys.stderr)
+        return
+    if not isinstance(data, dict) or 'examples' not in data:
+        return
+    examples = data['examples']
+    if not hasattr(examples, '__iter__'):
+        return
+    for i, ex in enumerate(examples):
+        if not isinstance(ex, str):
+            continue
+        try:
+            base = examples.lc.item(i)[0] + 2
+        except Exception:
+            base = 1
+        yield (str(ex), base, i)
+
+
+def iter_dts_file(filepath):
+    """Treat the whole file as a single block."""
+    try:
+        with open(filepath, encoding='utf-8') as f:
+            text = f.read()
+    except Exception as e:
+        print('%s: error reading: %s' % (filepath, e), file=sys.stderr)
+        return
+    yield (text, 1, None)
+
+
+# ---------------------------------------------------------------------------
+# Top-level processing
+# ---------------------------------------------------------------------------
+
+def input_kind(filepath):
+    p = filepath.lower()
+    if p.endswith('.yaml') or p.endswith('.yml'):
+        return 'yaml'
+    if p.endswith('.dts'):
+        return 'dts'
+    if p.endswith('.dtsi'):
+        return 'dtsi'
+    if p.endswith('.dtso'):
+        return 'dtso'
+    return None
+
+
+# All input types that use tab indentation and follow DTS coding style.
+DTS_FAMILY = ('dts', 'dtsi', 'dtso')
+
+
+def collect_findings(filepath, mode):
+    """Return a (lines, count) pair for filepath. lines is a list of
+    formatted output strings; count is the number of findings."""
+    kind = input_kind(filepath)
+    if kind == 'yaml':
+        indent_kind = 'spaces'
+        iterator = iter_yaml_examples(filepath)
+    elif kind in DTS_FAMILY:
+        indent_kind = 'tab'
+        iterator = iter_dts_file(filepath)
+    else:
+        return (['%s: unknown file type, skipping' % filepath], 0)
+
+    out = []
+    for text, base, idx in iterator:
+        for lineno, rule, msg in check_block(text, mode, indent_kind, kind):
+            abs_line = base + lineno - 1
+            ex_tag = '' if idx is None else ' example %d' % idx
+            out.append('%s:%d:%s [%s] %s' %
+                       (filepath, abs_line, ex_tag, rule, msg))
+    return (out, len(out))
+
+
+# Worker entry point for ProcessPoolExecutor.map(). Top-level so it is
+# picklable on every platform.
+def _worker(args):
+    filepath, mode = args
+    return collect_findings(filepath, mode)
+
+
+def main():
+    import os
+    ap = argparse.ArgumentParser(
+        description='Check DTS coding style on YAML examples and '
+        '.dts/.dtsi/.dtso files.',
+        fromfile_prefix_chars='@')
+    ap.add_argument('--mode', choices=('relaxed', 'strict'),
+                    default='relaxed',
+                    help='which rule set to apply (default: relaxed)')
+    ap.add_argument('-j', '--jobs', type=int, default=0,
+                    metavar='N',
+                    help='run N workers in parallel (default: respect '
+                    'the make jobserver via $PARALLELISM, otherwise '
+                    'os.cpu_count(); use 1 to disable multiprocessing)')
+    ap.add_argument('--list-rules', action='store_true',
+                    help='print all rules with their mode and exit')
+    ap.add_argument('files', nargs='*', metavar='file',
+                    help='YAML binding files or .dts/.dtsi/.dtso files; '
+                    'use @argfile to read paths from a file')
+    args = ap.parse_args()
+
+    if args.list_rules:
+        for r in RULES:
+            applies = ','.join(r.applies_to)
+            print('%-22s %-7s [%s] %s' %
+                  (r.name, r.mode, applies, r.description))
+        return 0
+
+    if not args.files:
+        ap.error('no input files')
+
+    if args.jobs > 0:
+        jobs = args.jobs
+    else:
+        # When invoked under scripts/jobserver-exec, $PARALLELISM
+        # holds the slot count make has reserved for us; this lets
+        # `make -j N dt_binding_check` constrain our worker pool to N.
+        try:
+            jobs = int(os.environ['PARALLELISM'])
+        except (KeyError, ValueError):
+            jobs = os.cpu_count() or 1
+    # Single-process path: keep import surface small for tests and
+    # easy debugging.
+    if jobs == 1 or len(args.files) == 1:
+        total = 0
+        for f in args.files:
+            lines, n = collect_findings(f, args.mode)
+            for line in lines:
+                print(line, file=sys.stderr)
+            total += n
+        return 1 if total else 0
+
+    # Multi-process path. ex.map preserves input order so output is
+    # deterministic across runs.
+    from concurrent.futures import ProcessPoolExecutor
+    total = 0
+    work = [(f, args.mode) for f in args.files]
+    chunk = max(1, len(work) // (jobs * 8)) if work else 1
+    with ProcessPoolExecutor(max_workers=jobs) as ex:
+        for lines, n in ex.map(_worker, work, chunksize=chunk):
+            for line in lines:
+                print(line, file=sys.stderr)
+            total += n
+    return 1 if total else 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
author	Daniel Golle <daniel@makrotopia.org>
	Wed, 27 May 2026 19:32:10 +0000 (20:32 +0100)
committer	Rob Herring (Arm) <robh@kernel.org>
	Wed, 10 Jun 2026 18:37:05 +0000 (13:37 -0500)