]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - gdb/testsuite/gdb.base/charset.exp
Update copyright year range in header of all files managed by GDB
[thirdparty/binutils-gdb.git] / gdb / testsuite / gdb.base / charset.exp
1 # This testcase is part of GDB, the GNU debugger.
2
3 # Copyright 2001-2024 Free Software Foundation, Inc.
4
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 # Please email any bugs, comments, and/or additions to this file to:
19 # bug-gdb@gnu.org
20
21 # Test GDB's character set support.
22
23
24 standard_testfile .c charset-malloc.c
25
26 if { [prepare_for_testing "failed to prepare" ${testfile} [list $srcfile $srcfile2]] } {
27 return -1
28 }
29
30 # Parse the output from a `show charset' command. Return the host
31 # and target charset as a two-element list.
32 proc parse_show_charset_output {testname} {
33 global gdb_prompt
34
35 gdb_expect {
36 -re "The host character set is \"(.*)\"\\.\[\r\n\]+The target character set is \"(.*)\"\\.\[\r\n\]+The target wide character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" {
37 set host_charset $expect_out(1,string)
38 set target_charset $expect_out(2,string)
39 set retlist [list $host_charset $target_charset]
40 pass $testname
41 }
42 -re "The host character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" {
43 set host_charset $expect_out(1,string)
44 set retlist [list $host_charset]
45 pass $testname
46 }
47 -re "The target character set is \"(.*)\"\\.\[\r\n\]+$gdb_prompt $" {
48 set target_charset $expect_out(1,string)
49 set retlist [list $target_charset]
50 pass $testname
51 }
52 -re ".*$gdb_prompt $" {
53 fail $testname
54 }
55 timeout {
56 fail "$testname (timeout)"
57 }
58 }
59
60 return $retlist
61 }
62
63
64 # Try the various `show charset' commands.
65
66 send_gdb "show charset\n"
67 set show_charset [parse_show_charset_output "show charset"]
68
69 send_gdb "show target-charset\n"
70 set show_target_charset \
71 [lindex [parse_show_charset_output "show target-charset"] 0]
72
73 if {[lsearch -exact $show_charset $show_target_charset] >= 0} {
74 pass "check `show target-charset' against `show charset'"
75 } else {
76 fail "check `show target-charset' against `show charset'"
77 }
78
79 send_gdb "show host-charset\n"
80 set show_host_charset \
81 [lindex [parse_show_charset_output "show host-charset"] 0]
82
83 if {[lsearch -exact $show_charset $show_host_charset] >= 0} {
84 pass "check `show host-charset' against `show charset'"
85 } else {
86 fail "check `show host-charset' against `show charset'"
87 }
88
89 # Try a malformed `set charset'.
90 gdb_test "set charset" \
91 "Requires an argument. Valid arguments are.*" \
92 "try malformed `set charset'"
93
94 # Try using `set host-charset' on an invalid character set.
95 gdb_test "set host-charset my_grandma_bonnie" \
96 "Undefined item: \"my_grandma_bonnie\"." \
97 "try `set host-charset' with invalid charset"
98
99 # Try using `set target-charset' on an invalid character set.
100 gdb_test "set target-charset my_grandma_bonnie" \
101 "Undefined item: \"my_grandma_bonnie\"." \
102 "try `set target-charset' with invalid charset"
103
104 # A Tcl array mapping the names of all the character sets we've seen
105 # to "1" if the character set can be used as a host character set, or
106 # "0" otherwise. We can use `array names charsets' just to get a list
107 # of all character sets.
108 array set charsets {}
109
110 proc all_charset_names {} {
111 global charsets
112 return [array names charsets]
113 }
114
115 proc valid_host_charset {charset} {
116 global charsets
117 return [expr {[info exists charsets($charset)] && $charsets($charset)}]
118 }
119
120 proc valid_target_charset {charset} {
121 global charsets
122 return [info exists charsets($charset)]
123 }
124
125 send_gdb "set host-charset\n"
126 gdb_expect {
127 -re "Requires an argument. Valid arguments are (.*)\\.\r\n$gdb_prompt $" {
128 set host_charset_list $expect_out(1,string)
129 regsub -all {, } $host_charset_list {,} host_charset_list
130 foreach host_charset [split $host_charset_list ","] {
131 set charsets($host_charset) 1
132 }
133 pass "capture valid host charsets"
134 }
135
136 -re ".*$gdb_prompt $" {
137 fail "capture valid host charsets"
138 }
139
140 timeout {
141 fail "(timeout) capture valid host charsets"
142 }
143 }
144
145 # If gdb was built with a phony iconv, it will only have two character
146 # sets: "auto" and the default. In this situation, this set of tests
147 # is pointless.
148 if {[llength [array names charsets]] < 3} {
149 untested "fewer than 3 charsets"
150 return -1
151 }
152
153 send_gdb "set target-charset\n"
154 gdb_expect {
155 -re "Requires an argument. Valid arguments are (.*)\\.\r\n$gdb_prompt $" {
156 set target_charset_list $expect_out(1,string)
157 regsub -all {, } $target_charset_list {,} target_charset_list
158 foreach target_charset [split $target_charset_list ","] {
159 if {! [info exists charsets($target_charset)]} {
160 set charsets($target_charset) 0
161 }
162 }
163 pass "capture valid target charsets"
164 }
165
166 -re ".*$gdb_prompt $" {
167 fail "capture valid target charsets"
168 }
169
170 timeout {
171 fail "(timeout) capture valid target charsets"
172 }
173 }
174
175 # We don't want to test all the charset names here, since that would
176 # be too many combinations. We we pick a subset.
177 set charset_subset {ASCII ISO-8859-1 EBCDIC-US IBM1047}
178 foreach_with_prefix host_charset $charset_subset {
179 if {[valid_host_charset $host_charset]} {
180
181 set testname "try `set host-charset $host_charset'"
182 send_gdb "set host-charset $host_charset\n"
183 gdb_expect {
184 -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
185 # How did it get into `charsets' then?
186 fail "$testname (didn't recognize name)"
187 }
188 -re "GDB can't use `.*' as its host character set\\.\[\r\n]+${gdb_prompt} $" {
189 # Well, then why does its `charsets' entry say it can?
190 fail $testname
191 }
192 -re "${gdb_prompt} $" {
193 pass $testname
194 }
195 timeout {
196 fail "$testname (timeout)"
197 }
198 }
199
200 # Check that the command actually had its intended effect:
201 # $host_charset should now be the host character set.
202 send_gdb "show charset\n"
203 set result [parse_show_charset_output "parse `show charset' after `set host-charset $host_charset'"]
204 if {! [string compare [lindex $result 0] $host_charset]} {
205 pass "check effect of `set host-charset $host_charset'"
206 } else {
207 fail "check effect of `set host-charset $host_charset'"
208 }
209
210 # Now try setting every possible target character set,
211 # given that host charset.
212 foreach target_charset $charset_subset {
213 if {![valid_target_charset $target_charset]} {
214 continue
215 }
216 set testname "try `set target-charset $target_charset'"
217 send_gdb "set target-charset $target_charset\n"
218 gdb_expect {
219 -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
220 fail "$testname (didn't recognize name)"
221 }
222 -re "GDB can't convert from the .* character set to .*\\.\[\r\n\]+${gdb_prompt} $" {
223 # This is a serious problem. GDB should be able to convert
224 # between any arbitrary pair of character sets.
225 fail "$testname (can't convert)"
226 }
227 -re "${gdb_prompt} $" {
228 pass $testname
229 }
230 timeout {
231 fail "$testname (timeout)"
232 }
233 }
234
235 # Check that the command actually had its intended effect:
236 # $target_charset should now be the target charset.
237 send_gdb "show charset\n"
238 set result [parse_show_charset_output "parse `show charset' after `set target-charset $target_charset'"]
239 if {! [string compare $result [list $host_charset $target_charset]]} {
240 pass "check effect of `set target-charset $target_charset'"
241 } else {
242 fail "check effect of `set target-charset $target_charset'"
243 }
244
245 # Test handling of characters in the host charset which
246 # can't be translated into the target charset. \xA2 is
247 # `cent' in ISO-8859-1, which has no equivalent in ASCII.
248 #
249 # On some systems, the pseudo-tty through which we
250 # communicate with GDB insists on stripping the high bit
251 # from input characters, meaning that `cent' turns into
252 # `"'. Since ISO-8859-1 and ASCII are identical in the
253 # lower 128 characters, it's tough to see how we can test
254 # this behavior on such systems, so we just xfail it.
255 #
256 # Note: the \x16 (Control-V) is an escape to allow \xA2 to
257 # get past readline.
258 if {! [string compare $host_charset iso-8859-1] && ! [string compare $target_charset ascii]} {
259
260 set testname "untranslatable character in character literal"
261 send_gdb "print '\x16\xA2'\n"
262 gdb_expect {
263 -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
264 pass $testname
265 }
266 -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
267 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
268 }
269 -re "$gdb_prompt $" {
270 fail $testname
271 }
272 timeout {
273 fail "$testname (timeout)"
274 }
275 }
276
277 set testname "untranslatable character in string literal"
278 # If the PTTY zeros bit seven, then this turns into
279 # print """
280 # which gets us a syntax error. We don't care.
281 send_gdb "print \"\x16\xA2\"\n"
282 gdb_expect {
283 -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
284 pass $testname
285 }
286 -re "Unterminated string in expression.\[\r\n\]+$gdb_prompt $" {
287 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
288 }
289 -re "$gdb_prompt $" {
290 fail $testname
291 }
292 timeout {
293 fail "$testname (timeout)"
294 }
295 }
296
297 set testname "untranslatable characters in backslash escape"
298 send_gdb "print '\\\x16\xA2'\n"
299 gdb_expect {
300 -re "The escape sequence .* is equivalent to plain .*, which has no equivalent\[\r\n\]+in the .* character set\\.\[\r\n\]+$gdb_prompt $" {
301 pass $testname
302 }
303 -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
304 xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
305 }
306 -re "$gdb_prompt $" {
307 fail $testname
308 }
309 timeout {
310 fail "$testname (timeout)"
311 }
312 }
313 }
314 }
315 }
316 }
317
318
319 # Set the host character set to plain ASCII, and try actually printing
320 # some strings in various target character sets. We need to run the
321 # test program to the point at which the strings have been
322 # initialized.
323 gdb_test "break ${srcfile}:[gdb_get_line_number "all strings initialized"]" \
324 ".*Breakpoint.* at .*" \
325 "set breakpoint after all strings have been initialized"
326 gdb_run_cmd
327 gdb_test "" "Breakpoint.*all strings initialized.*" "run until all strings have been initialized"
328
329 # We only try the wide character tests on machines where the wchar_t
330 # typedef in the test case has the right size.
331 set wchar_size [get_sizeof wchar_t 99]
332 set wchar_ok 0
333 if {$wchar_size == 2} {
334 lappend charset_subset UTF-16
335 set wchar_ok 1
336 } elseif {$wchar_size == 4} {
337 lappend charset_subset UTF-32
338 set wchar_ok 1
339 }
340
341 gdb_test_no_output "set host-charset ASCII"
342 foreach target_charset $charset_subset {
343 if {![valid_target_charset $target_charset]} {
344 continue
345 }
346
347 if {$target_charset == "UTF-32" || $target_charset == "UTF-16"} {
348 set param target-wide-charset
349 set L L
350 } else {
351 set param target-charset
352 set L ""
353 }
354 gdb_test_no_output "set $param $target_charset"
355
356 # Try printing the null character. There seems to be a bug in
357 # gdb_test that requires us to use gdb_expect here.
358 send_gdb "print $L'\\0'\n"
359 gdb_expect {
360 -re "\\\$${decimal} = 0 $L'\\\\000'\[\r\n\]+$gdb_prompt $" {
361 pass "print the null character in ${target_charset}"
362 }
363 -re "$gdb_prompt $" {
364 fail "print the null character in ${target_charset}"
365 }
366 timeout {
367 fail "print the null character in ${target_charset} (timeout)"
368 }
369 }
370
371 # Compute the name of the variable in the test program that holds
372 # a string in $target_charset. The variable's name is the
373 # character set's name, in lower-case, with all non-identifier
374 # characters replaced with '_', with "_string" stuck on the end.
375 if {$target_charset == "UTF-16"} {
376 # We still use the utf_32_string variable -- but the size is
377 # correct for UTF-16.
378 set var_name utf_32_string
379 } else {
380 set var_name [string tolower "${target_charset}_string"]
381 regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name
382 }
383
384 # Compute a regexp matching the results we expect. This is static,
385 # but it's easier than writing it out.
386 regsub -all "." "abfnrtv" "(\\\\&|x)" escapes
387 set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
388 set lowercase "abcdefghijklmnopqrstuvwxyz"
389 set digits "0123456789"
390 set octal_escape "\\\\\[0-9\]+"
391
392 send_gdb "print $var_name\n"
393 # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal}
394 gdb_expect {
395 -re ".* = $L\"(\\\\a|x)(\\\\b|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(${octal_escape}|x)+\"\[\r\n\]+$gdb_prompt $" {
396 pass "print string in $target_charset"
397 }
398 -re "$gdb_prompt $" {
399 fail "print string in $target_charset"
400 }
401 timeout {
402 fail "print string in $target_charset (timeout)"
403 }
404 }
405
406 # Try entering a character literal, and see if it comes back unchanged.
407 gdb_test "print $L'A'" \
408 " = \[0-9-\]+ $L'A'" \
409 "parse character literal in ${target_charset}"
410
411 # Check that the character literal was encoded correctly.
412 gdb_test "print /d $L'A' == $var_name\[7\]" \
413 " = 1" \
414 "check value of parsed character literal in ${target_charset}"
415
416 # Try entering a string literal, and see if it comes back unchanged.
417 gdb_test "print $L\"abcdefABCDEF012345\"" \
418 " = $L\"abcdefABCDEF012345\"" \
419 "parse string literal in ${target_charset}"
420
421 # Check that the string literal was encoded correctly.
422 gdb_test "print /d $L\"q\"\[0\] == $var_name\[49\]" \
423 " = 1" \
424 "check value of parsed string literal in ${target_charset}"
425
426 # Test handling of characters in the target charset which
427 # can't be translated into the host charset.
428 if {! [string compare $target_charset iso-8859-1]} {
429 gdb_test "print iso_8859_1_string\[69\]" \
430 " = \[0-9-\]+ '\\\\242'" \
431 "print character with no equivalent in host character set"
432 gdb_test "print iso_8859_1_string + 70" \
433 " = ${hex} \"\\\\242.*\"" \
434 "print string with no equivalent in host character set"
435 }
436
437 # Make sure that we don't apply the ISO-8859-1 `print_literally'
438 # function to ASCII.
439 if {! [string compare $target_charset ascii]} {
440 gdb_test "print iso_8859_1_string\[69\]" \
441 " = \[0-9-\]+ '\\\\242'" \
442 "print ASCII unprintable character"
443 gdb_test "print iso_8859_1_string + 70" \
444 " = ${hex} \"\\\\242.*\"" \
445 "print ASCII unprintable string"
446 }
447
448 # Try printing characters with backslash escape equivalents.
449 set escapees {a b f n r t v}
450 for {set i 0} {$i < [llength $escapees]} {incr i} {
451 set escape [lindex $escapees $i]
452 send_gdb "print $var_name\[$i\]\n"
453 set have_escape 1
454 gdb_expect {
455 -re "= \[0-9-\]+ $L'\\\\${escape}'\[\r\n\]+$gdb_prompt $" {
456 pass "try printing '\\${escape}' in ${target_charset}"
457 }
458 -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" {
459 xfail "try printing '\\${escape}' in ${target_charset} (no such escape)"
460 set have_escape 0
461 }
462 -re "$gdb_prompt $" {
463 fail "try printing '\\${escape}' in ${target_charset}"
464 }
465 timeout {
466 fail "try printing '\\${escape}' in ${target_charset} (timeout)"
467 }
468 }
469
470 if {$have_escape} {
471
472 # Try parsing a backslash escape in a character literal.
473 gdb_test "print /d $L'\\${escape}' == $var_name\[$i\]" \
474 " = 1" \
475 "check value of '\\${escape}' in ${target_charset}"
476
477 # Try parsing a backslash escape in a string literal.
478 gdb_test "print /d $L\"\\${escape}\"\[0\] == $var_name\[$i\]" \
479 " = 1" \
480 "check value of \"\\${escape}\" in ${target_charset}"
481 }
482 }
483
484 # Try printing a character escape that doesn't exist. We should
485 # get the unescaped character, in the target character set.
486 gdb_test "print $L'\\q'" " = \[0-9-\]+ $L'q'" \
487 "print escape that doesn't exist in $target_charset"
488 gdb_test "print /d $L'\\q' == $var_name\[49\]" " = 1" \
489 "check value of escape that doesn't exist in $target_charset"
490 }
491
492 # Reset the target charset.
493 gdb_test_no_output "set target-charset UTF-8"
494
495 # \242 is not a valid UTF-8 character.
496 gdb_test "print \"\\242\"" " = \"\\\\242\"" \
497 "non-representable target character"
498
499 gdb_test "print '\\x'" "\\\\x escape without a following hex digit"
500 gdb_test "print '\\u'" "\\\\u escape without a following hex digit"
501 gdb_test "print '\\9'" " = \[0-9\]+ '9'"
502
503 # An octal escape can only be 3 digits.
504 gdb_test "print \"\\1011\"" " = \"A1\""
505
506 # The final digit does not need to be escaped here.
507 foreach val {0 1 2 3 4 5 6 7 8 9 a b c d e f} {
508 gdb_test "print \"\\0\" \"${val}\"" " = \"\\\\000${val}\""
509 }
510
511 # Tests for wide- or unicode- strings. L is the prefix letter to use,
512 # either "L" (for wide strings), "u" (for UTF-16), or "U" (for UTF-32).
513 # NAME is used in the test names and should be related to the prefix
514 # letter in some easy-to-undestand way.
515 proc test_wide_or_unicode {L name} {
516 gdb_test "print $L\"ab\" $L\"c\"" " = $L\"abc\"" \
517 "basic $name string concatenation"
518 gdb_test "print $L\"ab\" \"c\"" " = $L\"abc\"" \
519 "narrow and $name string concatenation"
520 gdb_test "print \"ab\" $L\"c\"" " = $L\"abc\"" \
521 "$name and narrow string concatenation"
522 gdb_test "print $L\"\\xe\" $L\"c\"" " = $L\"\\\\016c\"" \
523 "$name string concatenation with escape"
524 gdb_test "print $L\"\" \"abcdef\" \"g\"" \
525 "$L\"abcdefg\"" \
526 "concatenate three strings with empty $name string"
527 gdb_test "print $L\"\\xffef\" $L\"f\"" \
528 "$L\"\\\\xffef\\\\146\"" \
529 "test multi-char escape sequence case for $name"
530
531 gdb_test "print $L'a'" "= \[0-9\]+ $L'a'" \
532 "basic $name character"
533 }
534
535 if {$wchar_ok} {
536 test_wide_or_unicode L wide
537 }
538
539 set ucs2_ok [expr {[get_sizeof char16_t 99] == 2}]
540
541 if ![valid_host_charset "UTF-16"] {
542 verbose -log "Disabling UTF-16 tests."
543 set ucs2_ok 0
544 }
545
546 if {$ucs2_ok} {
547 test_wide_or_unicode u UTF-16
548 }
549
550 set ucs4_ok [expr {[get_sizeof char32_t 99] == 4}]
551 if {$ucs4_ok} {
552 test_wide_or_unicode U UTF-32
553 }
554
555 # Test an invalid string combination.
556 proc test_combination {L1 name1 L2 name2} {
557 gdb_test "print $L1\"abc\" $L2\"def\"" \
558 "Undefined string concatenation." \
559 "undefined concatenation of $name1 and $name2"
560 }
561
562 if {$wchar_ok && $ucs2_ok} {
563 test_combination L wide u UTF-16
564 }
565 if {$wchar_ok && $ucs4_ok} {
566 test_combination L wide U UTF-32
567 # Regression test for a typedef to a typedef.
568 gdb_test "print myvar" "= \[0-9\]+ L'A'" \
569 "typedef to wchar_t"
570 }
571 if {$ucs2_ok && $ucs4_ok} {
572 test_combination u UTF-16 U UTF-32
573 }
574
575 if {$ucs2_ok} {
576 set go 1
577 gdb_test_multiple "python print ('hello, world!')" \
578 "verify python support for charset tests" {
579 -re "not supported.*$gdb_prompt $" {
580 unsupported "python support is disabled"
581 set go 0
582 }
583 -re "$gdb_prompt $" {}
584 }
585
586 if {$go} {
587 gdb_test "print u\"abcdef\"" " = u\"abcdef\"" \
588 "set up for python printing of utf-16 string"
589
590 gdb_test "python print (gdb.history(0).string())" "abcdef" \
591 "extract utf-16 string using python"
592 }
593 }
594
595 # Regression test for a cleanup bug in the charset code.
596 gdb_test "print /d 'a' == 'a' || 'b' == 'b'" \
597 ".* = 1" \
598 "EVAL_SKIP cleanup handling regression test"
599
600
601 proc string_display { var_name set_prefix x_size x_type} {
602 with_test_prefix "set_prefix=$set_prefix" {
603 gdb_test_no_output "set ${var_name} = ${set_prefix}\"Test String\\0with zeroes\""\
604 "assign ${var_name} with prefix ${set_prefix}"
605 gdb_test "x /2${x_size}s ${var_name}" ".*\t${x_type}\"Test String\"\[\r\n\]+.*\t${x_type}\"with zeroes\"" \
606 "display String ${var_name} with x/${x_size}s"
607 }
608 }
609
610 if {$ucs2_ok} {
611 string_display String16 u h u
612 if {$wchar_size == 2} {
613 string_display String16 L h u
614 }
615 }
616
617 string_display String32 U w U
618 if {$wchar_size == 4} {
619 string_display String32 L w U
620 }
621
622
623 foreach name {short int long} {
624 # We're really just checking to make sure this doesn't give an
625 # error.
626 gdb_test "print ${name}_array = \"hi\"" \
627 " = {.*}" \
628 "assign string to $name array"
629 }
630
631
632 gdb_exit