From: Tom Tromey <tromey@adacore.com>
Date: Tue, 27 May 2025 18:18:30 +0000 (-0600)
Subject: Fix regression with DW_AT_bit_offset handling
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=21b25b168dc6ed25a14c88fa43ae8128487cb557;p=thirdparty%2Fbinutils-gdb.git

Fix regression with DW_AT_bit_offset handling

Internal AdaCore testing using -gdwarf-4 found a spot where GCC will
emit a negative DW_AT_bit_offset.  However, my recent signed/unsigned
changes assumed that this value had to be positive.

I feel this bug somewhat invalidates my previous thinking about how
DWARF attributes should be handled.

In particular, both GCC and LLVM at understand that a negative bit
offset can be generated -- but for positive offsets they might use a
smaller "data" form, which is expected not to be sign-extended.  LLVM
has similar code but GCC does:

  if (bit_offset < 0)
    add_AT_int (die, DW_AT_bit_offset, bit_offset);
  else
    add_AT_unsigned (die, DW_AT_bit_offset, (unsigned HOST_WIDE_INT) bit_offset);

What this means is that this attribute is "signed but default
unsigned".

To fix this, I've added a new attribute::confused_constant method.
This should be used when a constant value might be signed, but where
narrow forms (e.g., DW_FORM_data1) should *not* cause sign extension.

I examined the GCC and LLVM DWARF writers to come up with the list of
attributes where this applies, namely DW_AT_bit_offset,
DW_AT_const_value and DW_AT_data_member_location (GCC only, but LLVM
always emits it as unsigned, so we're safe here).

This patch corrects the bug and imports the relevant test case.

Regression tested on x86-64 Fedora 41.

Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32680
Bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118837
Approved-By: Simon Marchi <simon.marchi@efficios.com>
---

diff --git a/gdb/dwarf2/attribute.c b/gdb/dwarf2/attribute.c
index 8ff03531613..d2b53646617 100644
--- a/gdb/dwarf2/attribute.c
+++ b/gdb/dwarf2/attribute.c
@@ -216,6 +216,22 @@ attribute::signed_constant () const
 
 /* See attribute.h.  */
 
+std::optional<LONGEST>
+attribute::confused_constant () const
+{
+  if (form_is_strictly_signed ())
+    return u.snd;
+  else if (form_is_constant ())
+    return u.unsnd;
+
+  /* For DW_FORM_data16 see attribute::form_is_constant.  */
+  complaint (_("Attribute value is not a constant (%s)"),
+	     dwarf_form_name (form));
+  return {};
+}
+
+/* See attribute.h.  */
+
 bool
 attribute::form_is_unsigned () const
 {
diff --git a/gdb/dwarf2/attribute.h b/gdb/dwarf2/attribute.h
index 31ff018a80c..234de4e109f 100644
--- a/gdb/dwarf2/attribute.h
+++ b/gdb/dwarf2/attribute.h
@@ -123,6 +123,25 @@ struct attribute
      empty value is returned.  */
   std::optional<LONGEST> signed_constant () const;
 
+  /* Return a signed constant value.  However, for narrow forms like
+     DW_FORM_data1, sign extension is not done.
+
+     DWARF advises compilers to generally use DW_FORM_[su]data to
+     avoid ambiguity.  However, both GCC and LLVM ignore this for
+     certain attributes.  Furthermore in DWARF, whether a narrower
+     form causes sign-extension depends on the attribute -- for
+     attributes that can only assume non-negative values, sign
+     extension is not done.
+
+     Unfortunately, both compilers also emit certain attributes in a
+     "confused" way, using DW_FORM_sdata for signed values, and
+     possibly choosing a narrow form (e.g., DW_FORM_data1) otherwise
+     -- assuming that sign-extension will not be done.
+
+     This method should only be called when this "confused" treatment
+     is necessary.  */
+  std::optional<LONGEST> confused_constant () const;
+
   /* Return non-zero if ATTR's value falls in the 'constant' class, or
      zero otherwise.  When this function returns true, you can apply
      the constant_value method to it.
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 22345657d36..583b6dbaab0 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -9936,7 +9936,7 @@ handle_member_location (struct die_info *die, struct dwarf2_cu *cu,
       if (attr2 != nullptr && attr2->form_is_constant ())
 	{
 	  has_bit_offset = true;
-	  bit_offset = attr2->unsigned_constant ().value_or (0);
+	  bit_offset = attr2->confused_constant ().value_or (0);
 	  attr2 = dwarf2_attr (die, DW_AT_byte_size, cu);
 	  if (attr2 != nullptr && attr2->form_is_constant ())
 	    {
@@ -9949,7 +9949,7 @@ handle_member_location (struct die_info *die, struct dwarf2_cu *cu,
 
       if (attr->form_is_constant ())
 	{
-	  LONGEST offset = attr->unsigned_constant ().value_or (0);
+	  LONGEST offset = attr->confused_constant ().value_or (0);
 
 	  /* Work around this GCC 11 bug, where it would erroneously use -1
 	     data member locations, instead of 0:
@@ -17208,29 +17208,6 @@ new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
   return (sym);
 }
 
-/* Given an attr with a DW_FORM_dataN value in host byte order,
-   zero-extend it as appropriate for the symbol's type.  The DWARF
-   standard (v4) is not entirely clear about the meaning of using
-   DW_FORM_dataN for a constant with a signed type, where the type is
-   wider than the data.  The conclusion of a discussion on the DWARF
-   list was that this is unspecified.  We choose to always zero-extend
-   because that is the interpretation long in use by GCC.  */
-
-static void
-dwarf2_const_value_data (const struct attribute *attr, LONGEST *value,
-			 int bits)
-{
-  LONGEST l = attr->constant_value (0);
-
-  if (bits < sizeof (*value) * 8)
-    {
-      l &= ((LONGEST) 1 << bits) - 1;
-      *value = l;
-    }
-  else
-    *value = l;
-}
-
 /* Read a constant value from an attribute.  Either set *VALUE, or if
    the value does not fit in *VALUE, set *BYTES - either already
    allocated on the objfile obstack, or newly allocated on OBSTACK,
@@ -17314,25 +17291,13 @@ dwarf2_const_value_attr (const struct attribute *attr, struct type *type,
 	 converted to host endianness, so we just need to sign- or
 	 zero-extend it as appropriate.  */
     case DW_FORM_data1:
-      dwarf2_const_value_data (attr, value, 8);
-      break;
     case DW_FORM_data2:
-      dwarf2_const_value_data (attr, value, 16);
-      break;
     case DW_FORM_data4:
-      dwarf2_const_value_data (attr, value, 32);
-      break;
     case DW_FORM_data8:
-      dwarf2_const_value_data (attr, value, 64);
-      break;
-
     case DW_FORM_sdata:
     case DW_FORM_implicit_const:
-      *value = attr->as_signed ();
-      break;
-
     case DW_FORM_udata:
-      *value = attr->as_unsigned ();
+      *value = attr->confused_constant ().value_or (0);
       break;
 
     default:
@@ -18524,39 +18489,19 @@ dwarf2_fetch_constant_bytes (sect_offset sect_off,
 	 symbol's value "represented as it would be on the target
 	 architecture."  By the time we get here, it's already been
 	 converted to host endianness, so we just need to sign- or
-	 zero-extend it as appropriate.  */
+	 zero-extend it as appropriate.
+
+	 Both GCC and LLVM agree that these are always signed, though.  */
     case DW_FORM_data1:
-      type = die_type (die, cu);
-      dwarf2_const_value_data (attr, &value, 8);
-      result = write_constant_as_bytes (obstack, byte_order, type, value, len);
-      break;
     case DW_FORM_data2:
-      type = die_type (die, cu);
-      dwarf2_const_value_data (attr, &value, 16);
-      result = write_constant_as_bytes (obstack, byte_order, type, value, len);
-      break;
     case DW_FORM_data4:
-      type = die_type (die, cu);
-      dwarf2_const_value_data (attr, &value, 32);
-      result = write_constant_as_bytes (obstack, byte_order, type, value, len);
-      break;
     case DW_FORM_data8:
-      type = die_type (die, cu);
-      dwarf2_const_value_data (attr, &value, 64);
-      result = write_constant_as_bytes (obstack, byte_order, type, value, len);
-      break;
-
     case DW_FORM_sdata:
     case DW_FORM_implicit_const:
-      type = die_type (die, cu);
-      result = write_constant_as_bytes (obstack, byte_order,
-					type, attr->as_signed (), len);
-      break;
-
     case DW_FORM_udata:
       type = die_type (die, cu);
-      result = write_constant_as_bytes (obstack, byte_order,
-					type, attr->as_unsigned (), len);
+      value = attr->confused_constant ().value_or (0);
+      result = write_constant_as_bytes (obstack, byte_order, type, value, len);
       break;
 
     default:
diff --git a/gdb/testsuite/gdb.ada/negative-bit-offset.exp b/gdb/testsuite/gdb.ada/negative-bit-offset.exp
new file mode 100644
index 00000000000..c5fcae12722
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/negative-bit-offset.exp
@@ -0,0 +1,36 @@
+# Copyright 2025 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test negative DW_AT_bit_offset.
+
+load_lib "ada.exp"
+
+require allow_ada_tests
+
+standard_ada_testfile prog
+
+# This particular output is only generated with -gdwarf-4.
+if {[gdb_compile_ada "${srcfile}" "${binfile}" executable \
+	 {debug additional_flags=-gdwarf-4}] != ""} {
+    return
+}
+
+clean_restart ${testfile}
+
+set bp_location [gdb_get_line_number "STOP" ${testdir}/prog.adb]
+runto "prog.adb:$bp_location"
+
+gdb_test "print xp" \
+    [string_to_regexp "(x => 21, y => (-1, -2, -3, -4, -5, -6, -7, -8, -9, -10))"]
diff --git a/gdb/testsuite/gdb.ada/negative-bit-offset/prog.adb b/gdb/testsuite/gdb.ada/negative-bit-offset/prog.adb
new file mode 100644
index 00000000000..e3c17758480
--- /dev/null
+++ b/gdb/testsuite/gdb.ada/negative-bit-offset/prog.adb
@@ -0,0 +1,36 @@
+--  Copyright 2025 Free Software Foundation, Inc.
+--
+--  This program is free software; you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation; either version 3 of the License, or
+--  (at your option) any later version.
+--
+--  This program is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+procedure Prog is
+
+   type Small is range -32 .. 31;
+   for Small'Size use 6;
+
+   type SomeArray is array (POSITIVE range <>) of Small;
+
+   type SomePackedArray is array (POSITIVE range <>) of Small;
+   pragma Pack (SomePackedArray);
+
+   type SomePackedRecord is record
+      X: Small;
+      Y: SomePackedArray (1 .. 10);
+   end record;
+   pragma Pack (SomePackedRecord);
+
+   XP: SomePackedRecord := (21, (-1, -2, -3, -4, -5, -6, -7, -8, -9, -10));
+
+begin
+   null;    -- STOP
+end;