]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gdb/python/py-utils.c
Automatic date update in version.in
[thirdparty/binutils-gdb.git] / gdb / python / py-utils.c
CommitLineData
d57a3c85
TJB
1/* General utility routines for GDB/Python.
2
d01e8234 3 Copyright (C) 2008-2025 Free Software Foundation, Inc.
d57a3c85
TJB
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
ef0f16cc 20#include "top.h"
d57a3c85 21#include "charset.h"
595939de 22#include "value.h"
d57a3c85 23#include "python-internal.h"
d57a3c85 24
d57a3c85
TJB
25/* Converts a Python 8-bit string to a unicode string object. Assumes the
26 8-bit string is in the host charset. If an error occurs during conversion,
27 returns NULL with a python exception set.
28
29 As an added bonus, the functions accepts a unicode string and returns it
30 right away, so callers don't need to check which kind of string they've
256458bc 31 got. In Python 3, all strings are Unicode so this case is always the
9a27f2c6 32 one that applies.
d57a3c85
TJB
33
34 If the given object is not one of the mentioned string types, NULL is
35 returned, with the TypeError python exception set. */
833d985d 36gdbpy_ref<>
d57a3c85
TJB
37python_string_to_unicode (PyObject *obj)
38{
39 PyObject *unicode_str;
40
41 /* If obj is already a unicode string, just return it.
42 I wish life was always that simple... */
43 if (PyUnicode_Check (obj))
83390453
PM
44 {
45 unicode_str = obj;
46 Py_INCREF (obj);
47 }
d57a3c85
TJB
48 else
49 {
50 PyErr_SetString (PyExc_TypeError,
edae3fd6 51 _("Expected a string object."));
d57a3c85
TJB
52 unicode_str = NULL;
53 }
54
833d985d 55 return gdbpy_ref<> (unicode_str);
d57a3c85
TJB
56}
57
58/* Returns a newly allocated string with the contents of the given unicode
08c637de 59 string object converted to CHARSET. If an error occurs during the
075c55e0
TT
60 conversion, NULL will be returned and a python exception will be
61 set. */
9b972014 62static gdb::unique_xmalloc_ptr<char>
08c637de 63unicode_to_encoded_string (PyObject *unicode_str, const char *charset)
d57a3c85 64{
08c637de 65 /* Translate string to named charset. */
7780f186 66 gdbpy_ref<> string (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
d57a3c85
TJB
67 if (string == NULL)
68 return NULL;
69
ea38e5df
TT
70 return gdb::unique_xmalloc_ptr<char>
71 (xstrdup (PyBytes_AsString (string.get ())));
08c637de
TJB
72}
73
fbb8f299
PM
74/* Returns a PyObject with the contents of the given unicode string
75 object converted to a named charset. If an error occurs during
76 the conversion, NULL will be returned and a python exception will
77 be set. */
833d985d 78static gdbpy_ref<>
fbb8f299
PM
79unicode_to_encoded_python_string (PyObject *unicode_str, const char *charset)
80{
fbb8f299 81 /* Translate string to named charset. */
833d985d 82 return gdbpy_ref<> (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
fbb8f299
PM
83}
84
9b972014
TT
85/* Returns a newly allocated string with the contents of the given
86 unicode string object converted to the target's charset. If an
87 error occurs during the conversion, NULL will be returned and a
88 python exception will be set. */
89gdb::unique_xmalloc_ptr<char>
08c637de
TJB
90unicode_to_target_string (PyObject *unicode_str)
91{
1da5d0e6
TT
92 return (unicode_to_encoded_string
93 (unicode_str,
94 target_charset (gdbpy_enter::get_gdbarch ())));
d57a3c85
TJB
95}
96
fbb8f299
PM
97/* Returns a PyObject with the contents of the given unicode string
98 object converted to the target's charset. If an error occurs
99 during the conversion, NULL will be returned and a python exception
100 will be set. */
833d985d 101static gdbpy_ref<>
fbb8f299
PM
102unicode_to_target_python_string (PyObject *unicode_str)
103{
1da5d0e6
TT
104 return (unicode_to_encoded_python_string
105 (unicode_str,
106 target_charset (gdbpy_enter::get_gdbarch ())));
fbb8f299
PM
107}
108
d57a3c85 109/* Converts a python string (8-bit or unicode) to a target string in
9b972014
TT
110 the target's charset. Returns NULL on error, with a python
111 exception set. */
112gdb::unique_xmalloc_ptr<char>
d57a3c85
TJB
113python_string_to_target_string (PyObject *obj)
114{
833d985d 115 gdbpy_ref<> str = python_string_to_unicode (obj);
d57a3c85
TJB
116 if (str == NULL)
117 return NULL;
118
830a4934 119 return unicode_to_target_string (str.get ());
d57a3c85 120}
08c637de 121
fbb8f299
PM
122/* Converts a python string (8-bit or unicode) to a target string in the
123 target's charset. Returns NULL on error, with a python exception
9a27f2c6
PK
124 set.
125
126 In Python 3, the returned object is a "bytes" object (not a string). */
833d985d 127gdbpy_ref<>
fbb8f299
PM
128python_string_to_target_python_string (PyObject *obj)
129{
833d985d 130 gdbpy_ref<> str = python_string_to_unicode (obj);
fbb8f299 131 if (str == NULL)
833d985d 132 return str;
fbb8f299 133
830a4934 134 return unicode_to_target_python_string (str.get ());
fbb8f299
PM
135}
136
08c637de 137/* Converts a python string (8-bit or unicode) to a target string in
9b972014
TT
138 the host's charset. Returns NULL on error, with a python exception
139 set. */
140gdb::unique_xmalloc_ptr<char>
08c637de
TJB
141python_string_to_host_string (PyObject *obj)
142{
833d985d 143 gdbpy_ref<> str = python_string_to_unicode (obj);
08c637de
TJB
144 if (str == NULL)
145 return NULL;
146
830a4934 147 return unicode_to_encoded_string (str.get (), host_charset ());
08c637de
TJB
148}
149
4ae6cc19
DE
150/* Convert a host string to a python string. */
151
833d985d 152gdbpy_ref<>
4ae6cc19
DE
153host_string_to_python_string (const char *str)
154{
5aee4587
SM
155 return gdbpy_ref<> (PyUnicode_Decode (str, strlen (str), host_charset (),
156 NULL));
4ae6cc19
DE
157}
158
08c637de
TJB
159/* Return true if OBJ is a Python string or unicode object, false
160 otherwise. */
161
162int
163gdbpy_is_string (PyObject *obj)
164{
9a27f2c6 165 return PyUnicode_Check (obj);
08c637de 166}
07ca107c
DE
167
168/* Return the string representation of OBJ, i.e., str (obj).
07ca107c
DE
169 If the result is NULL a python error occurred, the caller must clear it. */
170
9b972014 171gdb::unique_xmalloc_ptr<char>
07ca107c
DE
172gdbpy_obj_to_string (PyObject *obj)
173{
7780f186 174 gdbpy_ref<> str_obj (PyObject_Str (obj));
07ca107c
DE
175
176 if (str_obj != NULL)
edae3fd6 177 return python_string_to_host_string (str_obj.get ());
07ca107c
DE
178
179 return NULL;
180}
181
5c329e6a 182/* See python-internal.h. */
07ca107c 183
9b972014 184gdb::unique_xmalloc_ptr<char>
5c329e6a 185gdbpy_err_fetch::to_string () const
07ca107c 186{
07ca107c
DE
187 /* There are a few cases to consider.
188 For example:
5c329e6a
TT
189 value is a string when PyErr_SetString is used.
190 value is not a string when raise "foo" is used, instead it is None
191 and type is "foo".
192 So the algorithm we use is to print `str (value)' if it's not
193 None, otherwise we print `str (type)'.
07ca107c
DE
194 Using str (aka PyObject_Str) will fetch the error message from
195 gdb.GdbError ("message"). */
196
b1abf8b1
TV
197 gdbpy_ref<> value = this->value ();
198 if (value.get () != nullptr && value.get () != Py_None)
199 return gdbpy_obj_to_string (value.get ());
07ca107c 200 else
b1abf8b1 201 return gdbpy_obj_to_string (this->type ().get ());
5c329e6a
TT
202}
203
204/* See python-internal.h. */
205
206gdb::unique_xmalloc_ptr<char>
207gdbpy_err_fetch::type_to_string () const
208{
b1abf8b1 209 return gdbpy_obj_to_string (this->type ().get ());
07ca107c 210}
595939de 211
621c8364 212/* Convert a GDB exception to the appropriate Python exception.
256458bc 213
56cc411c 214 This sets the Python error indicator. */
621c8364 215
56cc411c 216void
94aeb44b 217gdbpy_convert_exception (const struct gdb_exception &exception)
621c8364
TT
218{
219 PyObject *exc_class;
220
221 if (exception.reason == RETURN_QUIT)
222 exc_class = PyExc_KeyboardInterrupt;
b940a061
KB
223 else if (exception.reason == RETURN_FORCED_QUIT)
224 quit_force (NULL, 0);
621c8364
TT
225 else if (exception.error == MEMORY_ERROR)
226 exc_class = gdbpy_gdb_memory_error;
227 else
228 exc_class = gdbpy_gdb_error;
229
3d6e9d23 230 PyErr_Format (exc_class, "%s", exception.what ());
621c8364
TT
231}
232
595939de
PM
233/* Converts OBJ to a CORE_ADDR value.
234
b86af38a 235 Returns 0 on success or -1 on failure, with a Python exception set.
595939de
PM
236*/
237
238int
239get_addr_from_python (PyObject *obj, CORE_ADDR *addr)
240{
241 if (gdbpy_is_value_object (obj))
b86af38a 242 {
b86af38a 243
a70b8144 244 try
b86af38a
TT
245 {
246 *addr = value_as_address (value_object_to_value (obj));
247 }
230d2906 248 catch (const gdb_exception &except)
492d29ea 249 {
fa61a48d 250 return gdbpy_handle_gdb_exception (-1, except);
492d29ea 251 }
b86af38a 252 }
74aedc46 253 else
595939de 254 {
7780f186 255 gdbpy_ref<> num (PyNumber_Long (obj));
74aedc46
TT
256 gdb_py_ulongest val;
257
258 if (num == NULL)
b86af38a 259 return -1;
595939de 260
830a4934 261 val = gdb_py_long_as_ulongest (num.get ());
74aedc46 262 if (PyErr_Occurred ())
b86af38a 263 return -1;
595939de 264
74aedc46
TT
265 if (sizeof (val) > sizeof (CORE_ADDR) && ((CORE_ADDR) val) != val)
266 {
267 PyErr_SetString (PyExc_ValueError,
268 _("Overflow converting to address."));
b86af38a 269 return -1;
74aedc46 270 }
595939de 271
74aedc46 272 *addr = val;
595939de
PM
273 }
274
b86af38a 275 return 0;
595939de 276}
74aedc46
TT
277
278/* Convert a LONGEST to the appropriate Python object -- either an
279 integer object or a long object, depending on its value. */
280
12dfa12a 281gdbpy_ref<>
74aedc46
TT
282gdb_py_object_from_longest (LONGEST l)
283{
9a27f2c6 284 if (sizeof (l) > sizeof (long))
12dfa12a
TT
285 return gdbpy_ref<> (PyLong_FromLongLong (l));
286 return gdbpy_ref<> (PyLong_FromLong (l));
74aedc46
TT
287}
288
289/* Convert a ULONGEST to the appropriate Python object -- either an
290 integer object or a long object, depending on its value. */
291
12dfa12a 292gdbpy_ref<>
74aedc46
TT
293gdb_py_object_from_ulongest (ULONGEST l)
294{
9a27f2c6 295 if (sizeof (l) > sizeof (unsigned long))
12dfa12a
TT
296 return gdbpy_ref<> (PyLong_FromUnsignedLongLong (l));
297 return gdbpy_ref<> (PyLong_FromUnsignedLong (l));
74aedc46
TT
298}
299
5aee4587 300/* Like PyLong_AsLong, but returns 0 on failure, 1 on success, and puts
74aedc46
TT
301 the value into an out parameter. */
302
303int
304gdb_py_int_as_long (PyObject *obj, long *result)
305{
5aee4587 306 *result = PyLong_AsLong (obj);
74aedc46
TT
307 return ! (*result == -1 && PyErr_Occurred ());
308}
2e8265fd
TT
309
310\f
311
312/* Generic implementation of the __dict__ attribute for objects that
313 have a dictionary. The CLOSURE argument should be the type object.
314 This only handles positive values for tp_dictoffset. */
315
316PyObject *
317gdb_py_generic_dict (PyObject *self, void *closure)
318{
319 PyObject *result;
19ba03f4 320 PyTypeObject *type_obj = (PyTypeObject *) closure;
2e8265fd
TT
321 char *raw_ptr;
322
323 raw_ptr = (char *) self + type_obj->tp_dictoffset;
324 result = * (PyObject **) raw_ptr;
325
326 Py_INCREF (result);
327 return result;
328}
aa36459a
TT
329
330/* Like PyModule_AddObject, but does not steal a reference to
331 OBJECT. */
332
333int
334gdb_pymodule_addobject (PyObject *module, const char *name, PyObject *object)
335{
336 int result;
337
338 Py_INCREF (object);
6c28e44a 339 result = PyModule_AddObject (module, name, object);
aa36459a 340 if (result < 0)
1915daeb 341 Py_DECREF (object);
aa36459a
TT
342 return result;
343}
2b4ad2fe 344
740b42ce
AB
345/* See python-internal.h. */
346
347void
348gdbpy_error (const char *fmt, ...)
349{
350 va_list ap;
351 va_start (ap, fmt);
352 std::string str = string_vprintf (fmt, ap);
353 va_end (ap);
354
355 const char *msg = str.c_str ();
356 if (msg != nullptr && *msg != '\0')
357 error (_("Error occurred in Python: %s"), msg);
358 else
359 error (_("Error occurred in Python."));
360}
361
2b4ad2fe
TT
362/* Handle a Python exception when the special gdb.GdbError treatment
363 is desired. This should only be called when an exception is set.
364 If the exception is a gdb.GdbError, throw a gdb exception with the
365 exception text. For other exceptions, print the Python stack and
366 then throw a gdb exception. */
367
368void
369gdbpy_handle_exception ()
370{
5c329e6a
TT
371 gdbpy_err_fetch fetched_error;
372 gdb::unique_xmalloc_ptr<char> msg = fetched_error.to_string ();
2b4ad2fe
TT
373
374 if (msg == NULL)
375 {
376 /* An error occurred computing the string representation of the
377 error message. This is rare, but we should inform the user. */
6cb06a8c
TT
378 gdb_printf (_("An error occurred in Python "
379 "and then another occurred computing the "
380 "error message.\n"));
2b4ad2fe
TT
381 gdbpy_print_stack ();
382 }
383
384 /* Don't print the stack for gdb.GdbError exceptions.
385 It is generally used to flag user errors.
386
387 We also don't want to print "Error occurred in Python command"
388 for user errors. However, a missing message for gdb.GdbError
389 exceptions is arguably a bug, so we flag it as such. */
390
5c329e6a 391 if (fetched_error.type_matches (PyExc_KeyboardInterrupt))
bc543c90 392 throw_quit ("Quit");
dfba4847
TV
393 else if (fetched_error.type_matches (PyExc_SystemExit))
394 {
395 gdbpy_ref<> value = fetched_error.value ();
396 gdbpy_ref<> code (PyObject_GetAttrString (value.get (), "code"));
397 int exit_arg;
398
399 if (code.get () == Py_None)
400 {
401 /* CODE == None: exit status is 0. */
402 exit_arg = 0;
403 }
404 else if (code.get () != nullptr && PyLong_Check (code.get ()))
405 {
406 /* CODE == integer: exit status is aforementioned integer. */
407 exit_arg = PyLong_AsLong (code.get ());
408 }
409 else
410 {
411 if (code.get () == nullptr)
412 gdbpy_print_stack ();
413
414 /* Otherwise: exit status is 1, print code to stderr. */
415 if (msg != nullptr)
416 gdb_printf (gdb_stderr, "%s\n", msg.get ());
417 exit_arg = 1;
418 }
419
420 quit_force (&exit_arg, 0);
421 }
5c329e6a
TT
422 else if (! fetched_error.type_matches (gdbpy_gdberror_exc)
423 || msg == NULL || *msg == '\0')
2b4ad2fe 424 {
5c329e6a 425 fetched_error.restore ();
2b4ad2fe
TT
426 gdbpy_print_stack ();
427 if (msg != NULL && *msg != '\0')
428 error (_("Error occurred in Python: %s"), msg.get ());
429 else
430 error (_("Error occurred in Python."));
431 }
432 else
5c329e6a 433 error ("%s", msg.get ());
2b4ad2fe 434}
51e8dbe1
AB
435
436/* See python-internal.h. */
437
438gdb::unique_xmalloc_ptr<char>
439gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc)
440{
441 /* A structure used to track the white-space information on each line of
442 DOC. */
443 struct line_whitespace
444 {
445 /* Constructor. OFFSET is the offset from the start of DOC, WS_COUNT
446 is the number of whitespace characters starting at OFFSET. */
447 line_whitespace (size_t offset, int ws_count)
448 : m_offset (offset),
449 m_ws_count (ws_count)
450 { /* Nothing. */ }
451
452 /* The offset from the start of DOC. */
453 size_t offset () const
454 { return m_offset; }
455
456 /* The number of white-space characters at the start of this line. */
457 int ws () const
458 { return m_ws_count; }
459
460 private:
461 /* The offset from the start of DOC to the first character of this
462 line. */
463 size_t m_offset;
464
465 /* White space count on this line, the first character of this
466 whitespace is at OFFSET. */
467 int m_ws_count;
468 };
469
470 /* Count the number of white-space character starting at TXT. We
471 currently only count true single space characters, things like tabs,
472 newlines, etc are not counted. */
473 auto count_whitespace = [] (const char *txt) -> int
474 {
475 int count = 0;
476
477 while (*txt == ' ')
478 {
479 ++txt;
480 ++count;
481 }
482
483 return count;
484 };
485
486 /* In MIN_WHITESPACE we track the smallest number of whitespace
487 characters seen at the start of a line (that has actual content), this
488 is the number of characters that we can delete off all lines without
489 altering the relative indentation of all lines in DOC.
490
491 The first line often has no indentation, but instead starts immediates
492 after the 3-quotes marker within the Python doc string, so, if the
493 first line has zero white-space then we just ignore it, and don't set
494 MIN_WHITESPACE to zero.
495
496 Lines without any content should (ideally) have no white-space at
497 all, but if they do then they might have an artificially low number
498 (user left a single stray space at the start of an otherwise blank
499 line), we don't consider lines without content when updating the
500 MIN_WHITESPACE value. */
6b09f134 501 std::optional<int> min_whitespace;
51e8dbe1
AB
502
503 /* The index into WS_INFO at which the processing of DOC can be
504 considered "all done", that is, after this point there are no further
505 lines with useful content and we should just stop. */
6b09f134 506 std::optional<size_t> all_done_idx;
51e8dbe1
AB
507
508 /* White-space information for each line in DOC. */
509 std::vector<line_whitespace> ws_info;
510
511 /* Now look through DOC and collect the required information. */
512 const char *tmp = doc.get ();
513 while (*tmp != '\0')
514 {
515 /* Add an entry for the offset to the start of this line, and how
516 much white-space there is at the start of this line. */
517 size_t offset = tmp - doc.get ();
518 int ws_count = count_whitespace (tmp);
519 ws_info.emplace_back (offset, ws_count);
520
521 /* Skip over the white-space. */
522 tmp += ws_count;
523
524 /* Remember where the content of this line starts, and skip forward
525 to either the end of this line (newline) or the end of the DOC
526 string (null character), whichever comes first. */
527 const char *content_start = tmp;
528 while (*tmp != '\0' && *tmp != '\n')
529 ++tmp;
530
531 /* If this is not the first line, and if this line has some content,
532 then update MIN_WHITESPACE, this reflects the smallest number of
533 whitespace characters we can delete from all lines without
534 impacting the relative indentation of all the lines of DOC. */
535 if (offset > 0 && tmp > content_start)
536 {
537 if (!min_whitespace.has_value ())
538 min_whitespace = ws_count;
539 else
540 min_whitespace = std::min (*min_whitespace, ws_count);
541 }
542
543 /* Each time we encounter a line that has some content we update
544 ALL_DONE_IDX to be the index of the next line. If the last lines
545 of DOC don't contain any content then ALL_DONE_IDX will be left
546 pointing at an earlier line. When we rewrite DOC, when we reach
547 ALL_DONE_IDX then we can stop, the allows us to trim any blank
548 lines from the end of DOC. */
549 if (tmp > content_start)
550 all_done_idx = ws_info.size ();
551
552 /* If we reached a newline then skip forward to the start of the next
553 line. The other possibility at this point is that we're at the
554 very end of the DOC string (null terminator). */
555 if (*tmp == '\n')
556 ++tmp;
557 }
558
559 /* We found no lines with content, fail safe by just returning the
560 original documentation string. */
561 if (!all_done_idx.has_value () || !min_whitespace.has_value ())
562 return doc;
563
564 /* Setup DST and SRC, both pointing into the DOC string. We're going to
565 rewrite DOC in-place, as we only ever make DOC shorter (by removing
566 white-space), thus we know this will not overflow. */
567 char *dst = doc.get ();
568 char *src = doc.get ();
569
570 /* Array indices used with DST, SRC, and WS_INFO respectively. */
571 size_t dst_offset = 0;
572 size_t src_offset = 0;
573 size_t ws_info_offset = 0;
574
575 /* Now, walk over the source string, this is the original DOC. */
576 while (src[src_offset] != '\0')
577 {
578 /* If we are at the start of the next line (in WS_INFO), then we may
579 need to skip some white-space characters. */
580 if (src_offset == ws_info[ws_info_offset].offset ())
581 {
582 /* If a line has leading white-space then we need to skip over
583 some number of characters now. */
584 if (ws_info[ws_info_offset].ws () > 0)
585 {
586 /* If the line is entirely white-space then we skip all of
587 the white-space, the next character to copy will be the
588 newline or null character. Otherwise, we skip the just
589 some portion of the leading white-space. */
590 if (src[src_offset + ws_info[ws_info_offset].ws ()] == '\n'
591 || src[src_offset + ws_info[ws_info_offset].ws ()] == '\0')
592 src_offset += ws_info[ws_info_offset].ws ();
593 else
594 src_offset += std::min (*min_whitespace,
595 ws_info[ws_info_offset].ws ());
596
597 /* If we skipped white-space, and are now at the end of the
598 input, then we're done. */
599 if (src[src_offset] == '\0')
600 break;
601 }
602 if (ws_info_offset < (ws_info.size () - 1))
603 ++ws_info_offset;
604 if (ws_info_offset > *all_done_idx)
605 break;
606 }
607
608 /* Don't copy a newline to the start of the DST string, this would
609 result in a leading blank line. But in all other cases, copy the
610 next character into the destination string. */
611 if ((dst_offset > 0 || src[src_offset] != '\n'))
612 {
613 dst[dst_offset] = src[src_offset];
614 ++dst_offset;
615 }
616
617 /* Move to the next source character. */
618 ++src_offset;
619 }
620
621 /* Remove the trailing newline character(s), and ensure we have a null
622 terminator in place. */
623 while (dst_offset > 1 && dst[dst_offset - 1] == '\n')
624 --dst_offset;
625 dst[dst_offset] = '\0';
626
627 return doc;
628}
aef117b7
AB
629
630/* See python-internal.h. */
631
632PyObject *
633gdb_py_invalid_object_repr (PyObject *self)
634{
635 return PyUnicode_FromFormat ("<%s (invalid)>", Py_TYPE (self)->tp_name);
636}