From af68056244a6c84ee296db5560d8d53f4e829261 Mon Sep 17 00:00:00 2001 From: Andrew Burgess Date: Tue, 4 Nov 2025 09:59:09 +0000 Subject: [PATCH] gdb/python: fix gdb.Block repr output I noticed that when printing a gdb.Block object in Python, I would occasionally get corrupted, nonsensical output, like this: {intintyinty_1inty_3inty_5... (-5 more symbols)}> The symbol list is missing commas, it should be: int, inty, inty_1, inty_3, inty_5, ... And the '-5 more symbols' is clearly not right. The problem is in python/py-block.c, we use this line to calculate the number of symbols in a block: const int len = mdict_size (block->multidict ()); Then we loop over the symbols in the block like this: for (struct symbol *symbol : block_iterator_range (block)) ... The problem here is that 'block_iterator_range (block)' can loop over more symbols than just those within 'block'. For global and static blocks, block_iterator_range() takes into account included CUs; and so can step through multiple global or static blocks. See block_iterator_step and find_iterator_compunit_symtab in block.c for more details. In contrast, 'mdict_size (block->multidict ())' only counts the symbols contained within 'block' itself. I could fix this by either fixing LEN, or by only iterating over the symbols within 'block'. I assume that printing a gdb.Block object is used mostly for debug purposes; the output isn't really user friendly, so I cannot imagine a user script that is relying on printing a gdb.Block as a way to inform the user about blocks in their program. As such, I think it makes more sense if the symbols listed are restricted to those strictly held within the block. And so, instead of block_iterator_range, I've switched to iterating over the multidict symbols. Now the calculated LEN will match the number of symbols being printed, which fixes the output seen above. However, as we're now only printing symbols that are within the block being examined, the output above becomes: {}> All the symbols that GDB previously tried to print, are coming from an included CU. For testing, I've made use of an existing DWARF test that tests DW_AT_import. In the wild I saw this in an inferior that used multiple shared libraries that has their debug information stored in a separate debug file, and then parts of that debug information was combined into a third separate file using the DWZ tool. I made a few attempts to craft a simpler reproducer, but failed. In the end it was easier to just use a DWARF assembler test to reproduce the issue. I have added some more typedef symbols into the DWARF test, I don't believe that this will impact the existing test, but makes the corrupted output more obvious. Approved-By: Tom Tromey --- gdb/python/py-block.c | 8 ++- gdb/testsuite/gdb.dwarf2/imported-unit-c.exp | 60 ++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/gdb/python/py-block.c b/gdb/python/py-block.c index cf7b7b31518..d1a32b34b5a 100644 --- a/gdb/python/py-block.c +++ b/gdb/python/py-block.c @@ -470,7 +470,13 @@ blpy_repr (PyObject *self) unsigned int written_symbols = 0; const int len = mdict_size (block->multidict ()); static constexpr int SYMBOLS_TO_SHOW = 5; - for (struct symbol *symbol : block_iterator_range (block)) + + /* Don't use block_iterator_range here as that will find symbols through + included symtabs (for global and static blocks), while LEN only counts + symbols that are actually in BLOCK itself. As this is really only for + basic debug to allow blocks to be identified, we limit ourselves to + just printing the symbols that are actually in BLOCK. */ + for (struct symbol *symbol : block->multidict_symbols ()) { if (written_symbols == SYMBOLS_TO_SHOW) { diff --git a/gdb/testsuite/gdb.dwarf2/imported-unit-c.exp b/gdb/testsuite/gdb.dwarf2/imported-unit-c.exp index 6bfc8c7ebd0..1210fff8471 100644 --- a/gdb/testsuite/gdb.dwarf2/imported-unit-c.exp +++ b/gdb/testsuite/gdb.dwarf2/imported-unit-c.exp @@ -1,4 +1,5 @@ load_lib dwarf.exp +load_lib gdb-python.exp # This test can only be run on targets which support DWARF-2 and use gas. require dwarf2_support @@ -34,6 +35,26 @@ Dwarf::assemble $asm_file { DW_AT_name inty DW_AT_type :$int_label } + DW_TAG_typedef { + DW_AT_name inty_1 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_2 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_3 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_4 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_5 + DW_AT_type :$int2_label + } } } @@ -52,6 +73,26 @@ Dwarf::assemble $asm_file { DW_AT_name inty DW_AT_type :$int2_label } + DW_TAG_typedef { + DW_AT_name inty_1 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_2 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_3 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_4 + DW_AT_type :$int2_label + } + DW_TAG_typedef { + DW_AT_name inty_5 + DW_AT_type :$int2_label + } } } @@ -96,13 +137,32 @@ if { [prepare_for_testing "failed to prepare" ${testfile} \ return -1 } +# If Python testing is enabled then try printing the global and static +# blocks for the current frame, which is in function FUNC. +proc test_python_block_printing { func } { + if {![allow_python_tests]} { + return + } + + gdb_test "python print(gdb.selected_frame().block().static_block)" \ + [string_to_regexp " {}>"] \ + "print static block in $func" + gdb_test "python print(gdb.selected_frame().block().global_block)" \ + [string_to_regexp " {$func}>"] \ + "print global block in $func" +} + if {![runto_main]} { return -1 } gdb_test "ptype inty" "type = int" "ptype in main" +test_python_block_printing main + gdb_breakpoint "foo" gdb_continue_to_breakpoint "continue to breakpoint for foo" gdb_test "ptype inty" "type = unsigned int" "ptype in foo" + +test_python_block_printing foo -- 2.47.3