]>
Commit | Line | Data |
---|---|---|
252b5132 | 1 | /* strings -- print the strings of printable characters in files |
d87bef3a | 2 | Copyright (C) 1993-2023 Free Software Foundation, Inc. |
252b5132 RH |
3 | |
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
32866df7 | 6 | the Free Software Foundation; either version 3, or (at your option) |
252b5132 RH |
7 | any later version. |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
b43b5d5f NC |
16 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
17 | 02110-1301, USA. */ | |
252b5132 RH |
18 | \f |
19 | /* Usage: strings [options] file... | |
20 | ||
21 | Options: | |
22 | --all | |
23 | -a | |
7fac9594 NC |
24 | - Scan each file in its entirety. |
25 | ||
26 | --data | |
27 | -d Scan only the initialized data section(s) of object files. | |
252b5132 RH |
28 | |
29 | --print-file-name | |
30 | -f Print the name of the file before each string. | |
31 | ||
32 | --bytes=min-len | |
33 | -n min-len | |
34 | -min-len Print graphic char sequences, MIN-LEN or more bytes long, | |
8fee99c3 NC |
35 | that are followed by a NUL or a non-displayable character. |
36 | Default is 4. | |
252b5132 RH |
37 | |
38 | --radix={o,x,d} | |
39 | -t {o,x,d} Print the offset within the file before each string, | |
40 | in octal/hex/decimal. | |
41 | ||
334ac421 EA |
42 | --include-all-whitespace |
43 | -w By default tab and space are the only whitepace included in graphic | |
44 | char sequences. This option considers all of isspace() valid. | |
45 | ||
252b5132 RH |
46 | -o Like -to. (Some other implementations have -o like -to, |
47 | others like -td. We chose one arbitrarily.) | |
48 | ||
8745eafa NC |
49 | --encoding={s,S,b,l,B,L} |
50 | -e {s,S,b,l,B,L} | |
51 | Select character encoding: 7-bit-character, 8-bit-character, | |
52 | bigendian 16-bit, littleendian 16-bit, bigendian 32-bit, | |
53 | littleendian 32-bit. | |
d132876a | 54 | |
252b5132 | 55 | --target=BFDNAME |
3bf31ec9 | 56 | -T {bfdname} |
252b5132 RH |
57 | Specify a non-default object file format. |
58 | ||
b3aa80b4 | 59 | --unicode={default|locale|invalid|hex|escape|highlight} |
584294c4 | 60 | -U {d|l|i|x|e|h} |
795588ae | 61 | Determine how to handle UTF-8 unicode characters. The default |
b3aa80b4 NC |
62 | is no special treatment. All other versions of this option |
63 | only apply if the encoding is valid and enabling the option | |
64 | implies --encoding=S. | |
65 | The 'locale' option displays the characters according to the | |
66 | current locale. The 'invalid' option treats them as | |
67 | non-string characters. The 'hex' option displays them as hex | |
68 | byte sequences. The 'escape' option displays them as escape | |
69 | sequences and the 'highlight' option displays them as | |
70 | coloured escape sequences. | |
71 | ||
55edd97b EA |
72 | --output-separator=sep_string |
73 | -s sep_string String used to separate parsed strings in output. | |
74 | Default is newline. | |
75 | ||
252b5132 RH |
76 | --help |
77 | -h Print the usage message on the standard output. | |
78 | ||
79 | --version | |
ffbe5983 | 80 | -V |
252b5132 RH |
81 | -v Print the program version number. |
82 | ||
83 | Written by Richard Stallman <rms@gnu.ai.mit.edu> | |
84 | and David MacKenzie <djm@gnu.ai.mit.edu>. */ | |
85 | ||
3db64b00 | 86 | #include "sysdep.h" |
252b5132 | 87 | #include "bfd.h" |
e9792343 | 88 | #include "getopt.h" |
252b5132 | 89 | #include "libiberty.h" |
3882b010 | 90 | #include "safe-ctype.h" |
3db64b00 | 91 | #include "bucomm.h" |
252b5132 | 92 | |
b3aa80b4 NC |
93 | #ifndef streq |
94 | #define streq(a,b) (strcmp ((a),(b)) == 0) | |
95 | #endif | |
96 | ||
97 | typedef enum unicode_display_type | |
98 | { | |
99 | unicode_default = 0, | |
100 | unicode_locale, | |
101 | unicode_escape, | |
102 | unicode_hex, | |
103 | unicode_highlight, | |
104 | unicode_invalid | |
105 | } unicode_display_type; | |
106 | ||
107 | static unicode_display_type unicode_display = unicode_default; | |
108 | ||
8745eafa NC |
109 | #define STRING_ISGRAPHIC(c) \ |
110 | ( (c) >= 0 \ | |
111 | && (c) <= 255 \ | |
334ac421 | 112 | && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \ |
535b785f | 113 | || (include_all_whitespace && ISSPACE (c))) \ |
334ac421 | 114 | ) |
252b5132 RH |
115 | |
116 | #ifndef errno | |
117 | extern int errno; | |
118 | #endif | |
119 | ||
120 | /* The BFD section flags that identify an initialized data section. */ | |
121 | #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS) | |
122 | ||
123 | /* Radix for printing addresses (must be 8, 10 or 16). */ | |
124 | static int address_radix; | |
125 | ||
126 | /* Minimum length of sequence of graphic chars to trigger output. */ | |
795588ae | 127 | static unsigned int string_min; |
252b5132 | 128 | |
334ac421 | 129 | /* Whether or not we include all whitespace as a graphic char. */ |
015dc7e1 | 130 | static bool include_all_whitespace; |
334ac421 | 131 | |
b34976b6 | 132 | /* TRUE means print address within file for each string. */ |
015dc7e1 | 133 | static bool print_addresses; |
252b5132 | 134 | |
b34976b6 | 135 | /* TRUE means print filename for each string. */ |
015dc7e1 | 136 | static bool print_filenames; |
252b5132 | 137 | |
b34976b6 | 138 | /* TRUE means for object files scan only the data section. */ |
015dc7e1 | 139 | static bool datasection_only; |
252b5132 | 140 | |
252b5132 RH |
141 | /* The BFD object file format. */ |
142 | static char *target; | |
143 | ||
d132876a NC |
144 | /* The character encoding format. */ |
145 | static char encoding; | |
146 | static int encoding_bytes; | |
147 | ||
55edd97b EA |
148 | /* Output string used to separate parsed strings */ |
149 | static char *output_separator; | |
150 | ||
252b5132 RH |
151 | static struct option long_options[] = |
152 | { | |
153 | {"all", no_argument, NULL, 'a'}, | |
b3aa80b4 | 154 | {"bytes", required_argument, NULL, 'n'}, |
7fac9594 | 155 | {"data", no_argument, NULL, 'd'}, |
b3aa80b4 NC |
156 | {"encoding", required_argument, NULL, 'e'}, |
157 | {"help", no_argument, NULL, 'h'}, | |
158 | {"include-all-whitespace", no_argument, NULL, 'w'}, | |
159 | {"output-separator", required_argument, NULL, 's'}, | |
252b5132 | 160 | {"print-file-name", no_argument, NULL, 'f'}, |
252b5132 RH |
161 | {"radix", required_argument, NULL, 't'}, |
162 | {"target", required_argument, NULL, 'T'}, | |
b3aa80b4 | 163 | {"unicode", required_argument, NULL, 'U'}, |
252b5132 RH |
164 | {"version", no_argument, NULL, 'v'}, |
165 | {NULL, 0, NULL, 0} | |
166 | }; | |
167 | ||
015dc7e1 | 168 | static bool strings_file (char *); |
b3aa80b4 | 169 | static void print_strings (const char *, FILE *, file_ptr, int, char *); |
1e0f0b4d | 170 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; |
252b5132 | 171 | \f |
2da42df6 | 172 | int main (int, char **); |
65de42c0 | 173 | |
3713e829 NC |
174 | static void |
175 | set_string_min (const char * arg) | |
176 | { | |
177 | char *s; | |
178 | unsigned long l = strtoul (arg, &s, 0); | |
179 | ||
180 | if (s != NULL && *s != 0) | |
181 | fatal (_("invalid integer argument %s"), arg); | |
182 | ||
183 | string_min = (unsigned int) l; | |
184 | ||
185 | if (l != (unsigned long) string_min) | |
186 | fatal (_("minimum string length is too big: %s"), arg); | |
187 | ||
188 | if (string_min < 1) | |
189 | fatal (_("minimum string length is too small: %s"), arg); | |
190 | ||
191 | /* PR 30595: Look for minimum string lengths that overflow an 'int'. */ | |
192 | if (string_min + 1 == 0) | |
193 | fatal (_("minimum string length %s is too big"), arg); | |
194 | ||
195 | /* FIXME: Should we warn for unreasonably large minimum | |
196 | string lengths, even if technically they will work ? */ | |
197 | } | |
198 | ||
252b5132 | 199 | int |
2da42df6 | 200 | main (int argc, char **argv) |
252b5132 RH |
201 | { |
202 | int optc; | |
203 | int exit_status = 0; | |
015dc7e1 | 204 | bool files_given = false; |
e36aef42 | 205 | int numeric_opt = 0; |
252b5132 | 206 | |
1c529ca6 | 207 | setlocale (LC_ALL, ""); |
252b5132 RH |
208 | bindtextdomain (PACKAGE, LOCALEDIR); |
209 | textdomain (PACKAGE); | |
210 | ||
211 | program_name = argv[0]; | |
212 | xmalloc_set_program_name (program_name); | |
86eafac0 | 213 | bfd_set_error_program_name (program_name); |
869b9d07 MM |
214 | |
215 | expandargv (&argc, &argv); | |
216 | ||
c904a764 | 217 | string_min = 4; |
015dc7e1 AM |
218 | include_all_whitespace = false; |
219 | print_addresses = false; | |
220 | print_filenames = false; | |
7fac9594 | 221 | if (DEFAULT_STRINGS_ALL) |
015dc7e1 | 222 | datasection_only = false; |
7fac9594 | 223 | else |
015dc7e1 | 224 | datasection_only = true; |
252b5132 | 225 | target = NULL; |
d132876a | 226 | encoding = 's'; |
55edd97b | 227 | output_separator = NULL; |
252b5132 | 228 | |
b3aa80b4 | 229 | while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789", |
252b5132 RH |
230 | long_options, (int *) 0)) != EOF) |
231 | { | |
232 | switch (optc) | |
233 | { | |
234 | case 'a': | |
015dc7e1 | 235 | datasection_only = false; |
252b5132 RH |
236 | break; |
237 | ||
7fac9594 | 238 | case 'd': |
015dc7e1 | 239 | datasection_only = true; |
7fac9594 NC |
240 | break; |
241 | ||
252b5132 | 242 | case 'f': |
015dc7e1 | 243 | print_filenames = true; |
252b5132 RH |
244 | break; |
245 | ||
8b53311e | 246 | case 'H': |
252b5132 RH |
247 | case 'h': |
248 | usage (stdout, 0); | |
249 | ||
250 | case 'n': | |
3713e829 | 251 | set_string_min (optarg); |
252b5132 RH |
252 | break; |
253 | ||
334ac421 | 254 | case 'w': |
015dc7e1 | 255 | include_all_whitespace = true; |
334ac421 EA |
256 | break; |
257 | ||
252b5132 | 258 | case 'o': |
015dc7e1 | 259 | print_addresses = true; |
252b5132 RH |
260 | address_radix = 8; |
261 | break; | |
262 | ||
263 | case 't': | |
015dc7e1 | 264 | print_addresses = true; |
252b5132 RH |
265 | if (optarg[1] != '\0') |
266 | usage (stderr, 1); | |
267 | switch (optarg[0]) | |
268 | { | |
269 | case 'o': | |
270 | address_radix = 8; | |
271 | break; | |
272 | ||
273 | case 'd': | |
274 | address_radix = 10; | |
275 | break; | |
276 | ||
277 | case 'x': | |
278 | address_radix = 16; | |
279 | break; | |
280 | ||
281 | default: | |
282 | usage (stderr, 1); | |
283 | } | |
284 | break; | |
285 | ||
286 | case 'T': | |
287 | target = optarg; | |
288 | break; | |
289 | ||
d132876a NC |
290 | case 'e': |
291 | if (optarg[1] != '\0') | |
292 | usage (stderr, 1); | |
293 | encoding = optarg[0]; | |
294 | break; | |
295 | ||
55edd97b EA |
296 | case 's': |
297 | output_separator = optarg; | |
795588ae | 298 | break; |
55edd97b | 299 | |
b3aa80b4 NC |
300 | case 'U': |
301 | if (streq (optarg, "default") || streq (optarg, "d")) | |
302 | unicode_display = unicode_default; | |
303 | else if (streq (optarg, "locale") || streq (optarg, "l")) | |
304 | unicode_display = unicode_locale; | |
305 | else if (streq (optarg, "escape") || streq (optarg, "e")) | |
306 | unicode_display = unicode_escape; | |
307 | else if (streq (optarg, "invalid") || streq (optarg, "i")) | |
308 | unicode_display = unicode_invalid; | |
309 | else if (streq (optarg, "hex") || streq (optarg, "x")) | |
310 | unicode_display = unicode_hex; | |
311 | else if (streq (optarg, "highlight") || streq (optarg, "h")) | |
312 | unicode_display = unicode_highlight; | |
313 | else | |
314 | fatal (_("invalid argument to -U/--unicode: %s"), optarg); | |
315 | break; | |
316 | ||
8b53311e | 317 | case 'V': |
252b5132 RH |
318 | case 'v': |
319 | print_version ("strings"); | |
320 | break; | |
321 | ||
322 | case '?': | |
323 | usage (stderr, 1); | |
324 | ||
325 | default: | |
e36aef42 | 326 | numeric_opt = optind; |
252b5132 RH |
327 | break; |
328 | } | |
329 | } | |
330 | ||
b3aa80b4 NC |
331 | if (unicode_display != unicode_default) |
332 | encoding = 'S'; | |
333 | ||
e36aef42 | 334 | if (numeric_opt != 0) |
3713e829 | 335 | set_string_min (argv[numeric_opt - 1] + 1); |
252b5132 | 336 | |
d132876a NC |
337 | switch (encoding) |
338 | { | |
8745eafa | 339 | case 'S': |
d132876a NC |
340 | case 's': |
341 | encoding_bytes = 1; | |
342 | break; | |
343 | case 'b': | |
344 | case 'l': | |
345 | encoding_bytes = 2; | |
346 | break; | |
347 | case 'B': | |
348 | case 'L': | |
349 | encoding_bytes = 4; | |
350 | break; | |
351 | default: | |
352 | usage (stderr, 1); | |
353 | } | |
354 | ||
bf2dd8d7 AM |
355 | if (bfd_init () != BFD_INIT_MAGIC) |
356 | fatal (_("fatal error: libbfd ABI mismatch")); | |
252b5132 RH |
357 | set_default_bfd_target (); |
358 | ||
359 | if (optind >= argc) | |
360 | { | |
015dc7e1 | 361 | datasection_only = false; |
5af11cab | 362 | SET_BINARY (fileno (stdin)); |
b3aa80b4 | 363 | print_strings ("{standard input}", stdin, 0, 0, (char *) NULL); |
015dc7e1 | 364 | files_given = true; |
252b5132 RH |
365 | } |
366 | else | |
367 | { | |
368 | for (; optind < argc; ++optind) | |
369 | { | |
b3aa80b4 | 370 | if (streq (argv[optind], "-")) |
015dc7e1 | 371 | datasection_only = false; |
252b5132 RH |
372 | else |
373 | { | |
015dc7e1 | 374 | files_given = true; |
535b785f | 375 | exit_status |= !strings_file (argv[optind]); |
252b5132 RH |
376 | } |
377 | } | |
378 | } | |
379 | ||
b34976b6 | 380 | if (!files_given) |
252b5132 RH |
381 | usage (stderr, 1); |
382 | ||
383 | return (exit_status); | |
384 | } | |
385 | \f | |
19871f45 AM |
386 | /* Scan section SECT of the file ABFD, whose printable name is |
387 | FILENAME. If it contains initialized data set GOT_A_SECTION and | |
388 | print the strings in it. */ | |
252b5132 RH |
389 | |
390 | static void | |
19871f45 | 391 | strings_a_section (bfd *abfd, asection *sect, const char *filename, |
015dc7e1 | 392 | bool *got_a_section) |
252b5132 | 393 | { |
06803313 | 394 | bfd_size_type sectsize; |
19871f45 | 395 | bfd_byte *mem; |
3aade688 | 396 | |
06803313 NC |
397 | if ((sect->flags & DATA_FLAGS) != DATA_FLAGS) |
398 | return; | |
399 | ||
fd361982 | 400 | sectsize = bfd_section_size (sect); |
19871f45 | 401 | if (sectsize == 0) |
06803313 NC |
402 | return; |
403 | ||
19871f45 | 404 | if (!bfd_malloc_and_get_section (abfd, sect, &mem)) |
252b5132 | 405 | { |
19871f45 AM |
406 | non_fatal (_("%s: Reading section %s failed: %s"), |
407 | filename, sect->name, bfd_errmsg (bfd_get_error ())); | |
408 | return; | |
252b5132 | 409 | } |
06803313 | 410 | |
015dc7e1 | 411 | *got_a_section = true; |
b3aa80b4 | 412 | print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem); |
06803313 | 413 | free (mem); |
252b5132 RH |
414 | } |
415 | ||
416 | /* Scan all of the sections in FILE, and print the strings | |
417 | in the initialized data section(s). | |
418 | ||
b34976b6 AM |
419 | Return TRUE if successful, |
420 | FALSE if not (such as if FILE is not an object file). */ | |
252b5132 | 421 | |
015dc7e1 | 422 | static bool |
2da42df6 | 423 | strings_object_file (const char *file) |
252b5132 | 424 | { |
06803313 | 425 | bfd *abfd; |
19871f45 | 426 | asection *s; |
015dc7e1 | 427 | bool got_a_section; |
06803313 NC |
428 | |
429 | abfd = bfd_openr (file, target); | |
252b5132 RH |
430 | |
431 | if (abfd == NULL) | |
8745eafa | 432 | /* Treat the file as a non-object file. */ |
015dc7e1 | 433 | return false; |
252b5132 RH |
434 | |
435 | /* This call is mainly for its side effect of reading in the sections. | |
436 | We follow the traditional behavior of `strings' in that we don't | |
437 | complain if we don't recognize a file to be an object file. */ | |
b34976b6 | 438 | if (!bfd_check_format (abfd, bfd_object)) |
252b5132 RH |
439 | { |
440 | bfd_close (abfd); | |
015dc7e1 | 441 | return false; |
252b5132 RH |
442 | } |
443 | ||
015dc7e1 | 444 | got_a_section = false; |
19871f45 AM |
445 | for (s = abfd->sections; s != NULL; s = s->next) |
446 | strings_a_section (abfd, s, file, &got_a_section); | |
252b5132 RH |
447 | |
448 | if (!bfd_close (abfd)) | |
449 | { | |
450 | bfd_nonfatal (file); | |
015dc7e1 | 451 | return false; |
252b5132 RH |
452 | } |
453 | ||
454 | return got_a_section; | |
455 | } | |
456 | ||
b34976b6 | 457 | /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */ |
252b5132 | 458 | |
015dc7e1 | 459 | static bool |
2da42df6 | 460 | strings_file (char *file) |
252b5132 | 461 | { |
ee2fb9eb JK |
462 | struct stat st; |
463 | ||
464 | /* get_file_size does not support non-S_ISREG files. */ | |
fb5b5478 | 465 | |
ee2fb9eb | 466 | if (stat (file, &st) < 0) |
fb5b5478 JJ |
467 | { |
468 | if (errno == ENOENT) | |
469 | non_fatal (_("'%s': No such file"), file); | |
470 | else | |
471 | non_fatal (_("Warning: could not locate '%s'. reason: %s"), | |
472 | file, strerror (errno)); | |
015dc7e1 | 473 | return false; |
fb5b5478 | 474 | } |
0e158763 NC |
475 | else if (S_ISDIR (st.st_mode)) |
476 | { | |
477 | non_fatal (_("Warning: '%s' is a directory"), file); | |
015dc7e1 | 478 | return false; |
0e158763 | 479 | } |
f24ddbdd | 480 | |
252b5132 RH |
481 | /* If we weren't told to scan the whole file, |
482 | try to open it as an object file and only look at | |
483 | initialized data sections. If that fails, fall back to the | |
484 | whole file. */ | |
485 | if (!datasection_only || !strings_object_file (file)) | |
486 | { | |
487 | FILE *stream; | |
488 | ||
ee2fb9eb | 489 | stream = fopen (file, FOPEN_RB); |
252b5132 RH |
490 | if (stream == NULL) |
491 | { | |
492 | fprintf (stderr, "%s: ", program_name); | |
493 | perror (file); | |
015dc7e1 | 494 | return false; |
252b5132 RH |
495 | } |
496 | ||
b3aa80b4 | 497 | print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL); |
252b5132 RH |
498 | |
499 | if (fclose (stream) == EOF) | |
500 | { | |
501 | fprintf (stderr, "%s: ", program_name); | |
502 | perror (file); | |
015dc7e1 | 503 | return false; |
252b5132 RH |
504 | } |
505 | } | |
506 | ||
015dc7e1 | 507 | return true; |
252b5132 RH |
508 | } |
509 | \f | |
d132876a NC |
510 | /* Read the next character, return EOF if none available. |
511 | Assume that STREAM is positioned so that the next byte read | |
512 | is at address ADDRESS in the file. | |
513 | ||
514 | If STREAM is NULL, do not read from it. | |
515 | The caller can supply a buffer of characters | |
516 | to be processed before the data in STREAM. | |
517 | MAGIC is the address of the buffer and | |
518 | MAGICCOUNT is how many characters are in it. */ | |
519 | ||
520 | static long | |
ee2fb9eb | 521 | get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic) |
d132876a NC |
522 | { |
523 | int c, i; | |
c54e2ec1 | 524 | long r = 0; |
d132876a NC |
525 | |
526 | for (i = 0; i < encoding_bytes; i++) | |
527 | { | |
528 | if (*magiccount) | |
529 | { | |
530 | (*magiccount)--; | |
531 | c = *(*magic)++; | |
532 | } | |
533 | else | |
534 | { | |
535 | if (stream == NULL) | |
536 | return EOF; | |
b7d4af3a JW |
537 | |
538 | /* Only use getc_unlocked if we found a declaration for it. | |
539 | Otherwise, libc is not thread safe by default, and we | |
540 | should not use it. */ | |
541 | ||
542 | #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | |
cedd9a58 JJ |
543 | c = getc_unlocked (stream); |
544 | #else | |
d132876a | 545 | c = getc (stream); |
cedd9a58 | 546 | #endif |
d132876a NC |
547 | if (c == EOF) |
548 | return EOF; | |
549 | } | |
550 | ||
551 | (*address)++; | |
c54e2ec1 | 552 | r = (r << 8) | (c & 0xff); |
d132876a NC |
553 | } |
554 | ||
555 | switch (encoding) | |
556 | { | |
c54e2ec1 | 557 | default: |
d132876a NC |
558 | break; |
559 | case 'l': | |
c54e2ec1 | 560 | r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8); |
d132876a NC |
561 | break; |
562 | case 'L': | |
c54e2ec1 AM |
563 | r = (((r & 0xff) << 24) | ((r & 0xff00) << 8) |
564 | | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24)); | |
d132876a NC |
565 | break; |
566 | } | |
567 | ||
d132876a NC |
568 | return r; |
569 | } | |
7ca166c9 AM |
570 | |
571 | /* Throw away one byte of a (possibly) multi-byte char C, updating | |
572 | address and buffer to suit. */ | |
573 | ||
574 | static void | |
575 | unget_part_char (long c, file_ptr *address, int *magiccount, char **magic) | |
576 | { | |
577 | static char tmp[4]; | |
578 | ||
579 | if (encoding_bytes > 1) | |
580 | { | |
581 | *address -= encoding_bytes - 1; | |
582 | ||
583 | if (*magiccount == 0) | |
584 | { | |
585 | /* If no magic buffer exists, use temp buffer. */ | |
586 | switch (encoding) | |
587 | { | |
588 | default: | |
589 | break; | |
590 | case 'b': | |
591 | tmp[0] = c & 0xff; | |
592 | *magiccount = 1; | |
593 | break; | |
594 | case 'l': | |
595 | tmp[0] = (c >> 8) & 0xff; | |
596 | *magiccount = 1; | |
597 | break; | |
598 | case 'B': | |
599 | tmp[0] = (c >> 16) & 0xff; | |
600 | tmp[1] = (c >> 8) & 0xff; | |
601 | tmp[2] = c & 0xff; | |
602 | *magiccount = 3; | |
603 | break; | |
604 | case 'L': | |
605 | tmp[0] = (c >> 8) & 0xff; | |
606 | tmp[1] = (c >> 16) & 0xff; | |
607 | tmp[2] = (c >> 24) & 0xff; | |
608 | *magiccount = 3; | |
609 | break; | |
610 | } | |
611 | *magic = tmp; | |
612 | } | |
613 | else | |
614 | { | |
615 | /* If magic buffer exists, rewind. */ | |
616 | *magic -= encoding_bytes - 1; | |
617 | *magiccount += encoding_bytes - 1; | |
618 | } | |
619 | } | |
620 | } | |
b3aa80b4 NC |
621 | |
622 | static void | |
623 | print_filename_and_address (const char * filename, file_ptr address) | |
624 | { | |
625 | if (print_filenames) | |
626 | printf ("%s: ", filename); | |
627 | ||
628 | if (! print_addresses) | |
629 | return; | |
630 | ||
631 | switch (address_radix) | |
632 | { | |
633 | case 8: | |
634 | if (sizeof (address) > sizeof (long)) | |
635 | { | |
636 | #ifndef __MSVCRT__ | |
637 | printf ("%7llo ", (unsigned long long) address); | |
638 | #else | |
639 | printf ("%7I64o ", (unsigned long long) address); | |
640 | #endif | |
641 | } | |
642 | else | |
643 | printf ("%7lo ", (unsigned long) address); | |
644 | break; | |
645 | ||
646 | case 10: | |
647 | if (sizeof (address) > sizeof (long)) | |
648 | { | |
649 | #ifndef __MSVCRT__ | |
650 | printf ("%7llu ", (unsigned long long) address); | |
651 | #else | |
652 | printf ("%7I64d ", (unsigned long long) address); | |
653 | #endif | |
654 | } | |
655 | else | |
656 | printf ("%7ld ", (long) address); | |
657 | break; | |
658 | ||
659 | case 16: | |
660 | if (sizeof (address) > sizeof (long)) | |
661 | { | |
662 | #ifndef __MSVCRT__ | |
663 | printf ("%7llx ", (unsigned long long) address); | |
664 | #else | |
665 | printf ("%7I64x ", (unsigned long long) address); | |
666 | #endif | |
667 | } | |
668 | else | |
669 | printf ("%7lx ", (unsigned long) address); | |
670 | break; | |
671 | } | |
672 | } | |
673 | ||
674 | /* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding. | |
675 | If the encoding is valid then returns the number of bytes it uses. */ | |
676 | ||
677 | static unsigned int | |
678 | is_valid_utf8 (const unsigned char * buffer, unsigned long buflen) | |
679 | { | |
680 | if (buffer[0] < 0xc0) | |
681 | return 0; | |
682 | ||
683 | if (buflen < 2) | |
684 | return 0; | |
685 | ||
686 | if ((buffer[1] & 0xc0) != 0x80) | |
687 | return 0; | |
688 | ||
689 | if ((buffer[0] & 0x20) == 0) | |
690 | return 2; | |
691 | ||
692 | if (buflen < 3) | |
693 | return 0; | |
694 | ||
695 | if ((buffer[2] & 0xc0) != 0x80) | |
696 | return 0; | |
795588ae | 697 | |
b3aa80b4 NC |
698 | if ((buffer[0] & 0x10) == 0) |
699 | return 3; | |
700 | ||
701 | if (buflen < 4) | |
702 | return 0; | |
703 | ||
704 | if ((buffer[3] & 0xc0) != 0x80) | |
705 | return 0; | |
706 | ||
707 | return 4; | |
708 | } | |
709 | ||
710 | /* Display a UTF-8 encoded character in BUFFER according to the setting | |
711 | of unicode_display. The character is known to be valid. | |
712 | Returns the number of bytes consumed. */ | |
713 | ||
795588ae | 714 | static unsigned int |
b3aa80b4 NC |
715 | display_utf8_char (const unsigned char * buffer) |
716 | { | |
795588ae PS |
717 | unsigned int j; |
718 | unsigned int utf8_len; | |
b3aa80b4 NC |
719 | |
720 | switch (buffer[0] & 0x30) | |
721 | { | |
722 | case 0x00: | |
723 | case 0x10: | |
724 | utf8_len = 2; | |
725 | break; | |
726 | case 0x20: | |
727 | utf8_len = 3; | |
728 | break; | |
729 | default: | |
730 | utf8_len = 4; | |
731 | } | |
795588ae | 732 | |
b3aa80b4 NC |
733 | switch (unicode_display) |
734 | { | |
735 | default: | |
736 | fprintf (stderr, "ICE: unexpected unicode display type\n"); | |
737 | break; | |
738 | ||
739 | case unicode_escape: | |
740 | case unicode_highlight: | |
741 | if (unicode_display == unicode_highlight && isatty (1)) | |
742 | printf ("\x1B[31;47m"); /* Red. */ | |
743 | ||
744 | switch (utf8_len) | |
745 | { | |
746 | case 2: | |
747 | printf ("\\u%02x%02x", | |
795588ae | 748 | ((buffer[0] & 0x1c) >> 2), |
b3aa80b4 NC |
749 | ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f)); |
750 | break; | |
751 | ||
752 | case 3: | |
753 | printf ("\\u%02x%02x", | |
754 | ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2), | |
755 | ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f))); | |
756 | break; | |
757 | ||
758 | case 4: | |
759 | printf ("\\u%02x%02x%02x", | |
760 | ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2), | |
761 | ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2), | |
762 | ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f))); | |
763 | break; | |
764 | default: | |
765 | /* URG. */ | |
766 | break; | |
767 | } | |
768 | ||
769 | if (unicode_display == unicode_highlight && isatty (1)) | |
770 | printf ("\033[0m"); /* Default colour. */ | |
771 | break; | |
772 | ||
773 | case unicode_hex: | |
774 | putchar ('<'); | |
775 | printf ("0x"); | |
776 | for (j = 0; j < utf8_len; j++) | |
777 | printf ("%02x", buffer [j]); | |
778 | putchar ('>'); | |
779 | break; | |
780 | ||
781 | case unicode_locale: | |
782 | printf ("%.1s", buffer); | |
783 | break; | |
784 | } | |
785 | ||
786 | return utf8_len; | |
787 | } | |
788 | ||
789 | /* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered | |
790 | according to the setting of the unicode_display variable. The buffer | |
791 | contains BUFLEN bytes. | |
792 | ||
793 | Display the characters as if they started at ADDRESS and are contained in | |
794 | FILENAME. */ | |
795 | ||
796 | static void | |
797 | print_unicode_buffer (const char * filename, | |
798 | file_ptr address, | |
799 | const unsigned char * buffer, | |
800 | unsigned long buflen) | |
801 | { | |
802 | /* Paranoia checks... */ | |
803 | if (filename == NULL | |
804 | || buffer == NULL | |
805 | || unicode_display == unicode_default | |
806 | || encoding != 'S' | |
807 | || encoding_bytes != 1) | |
808 | { | |
809 | fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n"); | |
810 | return; | |
811 | } | |
812 | ||
813 | if (buflen == 0) | |
814 | return; | |
815 | ||
816 | /* We must only display strings that are at least string_min *characters* | |
817 | long. So we scan the buffer in two stages. First we locate the start | |
818 | of a potential string. Then we walk along it until we have found | |
819 | string_min characters. Then we go back to the start point and start | |
820 | displaying characters according to the unicode_display setting. */ | |
821 | ||
822 | unsigned long start_point = 0; | |
823 | unsigned long i = 0; | |
824 | unsigned int char_len = 1; | |
825 | unsigned int num_found = 0; | |
826 | ||
827 | for (i = 0; i < buflen; i += char_len) | |
828 | { | |
829 | int c = buffer[i]; | |
830 | ||
831 | char_len = 1; | |
832 | ||
833 | /* Find the first potential character of a string. */ | |
834 | if (! STRING_ISGRAPHIC (c)) | |
835 | { | |
836 | num_found = 0; | |
837 | continue; | |
838 | } | |
839 | ||
840 | if (c > 126) | |
841 | { | |
842 | if (c < 0xc0) | |
843 | { | |
844 | num_found = 0; | |
845 | continue; | |
846 | } | |
847 | ||
848 | if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0) | |
849 | { | |
850 | char_len = 1; | |
851 | num_found = 0; | |
852 | continue; | |
853 | } | |
854 | ||
855 | if (unicode_display == unicode_invalid) | |
856 | { | |
857 | /* We have found a valid UTF-8 character, but we treat it as non-graphic. */ | |
858 | num_found = 0; | |
859 | continue; | |
860 | } | |
861 | } | |
862 | ||
863 | if (num_found == 0) | |
864 | /* We have found a potential starting point for a string. */ | |
865 | start_point = i; | |
866 | ||
867 | ++ num_found; | |
868 | ||
869 | if (num_found >= string_min) | |
870 | break; | |
871 | } | |
872 | ||
873 | if (num_found < string_min) | |
874 | return; | |
875 | ||
876 | print_filename_and_address (filename, address + start_point); | |
795588ae | 877 | |
b3aa80b4 NC |
878 | /* We have found string_min characters. Display them and any |
879 | more that follow. */ | |
880 | for (i = start_point; i < buflen; i += char_len) | |
881 | { | |
882 | int c = buffer[i]; | |
883 | ||
884 | char_len = 1; | |
885 | ||
886 | if (! STRING_ISGRAPHIC (c)) | |
887 | break; | |
888 | else if (c < 127) | |
889 | putchar (c); | |
890 | else if (! is_valid_utf8 (buffer + i, buflen - i)) | |
891 | break; | |
892 | else if (unicode_display == unicode_invalid) | |
893 | break; | |
894 | else | |
895 | char_len = display_utf8_char (buffer + i); | |
896 | } | |
897 | ||
898 | if (output_separator) | |
899 | fputs (output_separator, stdout); | |
900 | else | |
901 | putchar ('\n'); | |
902 | ||
903 | /* FIXME: Using tail recursion here is lazy programming... */ | |
904 | print_unicode_buffer (filename, address + i, buffer + i, buflen - i); | |
905 | } | |
906 | ||
907 | static int | |
795588ae PS |
908 | get_unicode_byte (FILE * stream, |
909 | unsigned char * putback, | |
910 | unsigned int * num_putback, | |
911 | unsigned int * num_read) | |
b3aa80b4 NC |
912 | { |
913 | if (* num_putback > 0) | |
914 | { | |
915 | * num_putback = * num_putback - 1; | |
916 | return putback [* num_putback]; | |
917 | } | |
918 | ||
919 | * num_read = * num_read + 1; | |
920 | ||
921 | #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | |
922 | return getc_unlocked (stream); | |
923 | #else | |
924 | return getc (stream); | |
925 | #endif | |
926 | } | |
927 | ||
928 | /* Helper function for print_unicode_stream. */ | |
929 | ||
930 | static void | |
931 | print_unicode_stream_body (const char * filename, | |
932 | file_ptr address, | |
933 | FILE * stream, | |
934 | unsigned char * putback_buf, | |
795588ae | 935 | unsigned int num_putback, |
b3aa80b4 NC |
936 | unsigned char * print_buf) |
937 | { | |
938 | /* It would be nice if we could just read the stream into a buffer | |
939 | and then process if with print_unicode_buffer. But the input | |
940 | might be huge or it might time-locked (eg stdin). So instead | |
941 | we go one byte at a time... */ | |
942 | ||
943 | file_ptr start_point = 0; | |
795588ae PS |
944 | unsigned int num_read = 0; |
945 | unsigned int num_chars = 0; | |
946 | unsigned int num_print = 0; | |
a9a09f51 | 947 | int c = 0; |
b3aa80b4 NC |
948 | |
949 | /* Find a series of string_min characters. Put them into print_buf. */ | |
950 | do | |
951 | { | |
952 | if (num_chars >= string_min) | |
953 | break; | |
954 | ||
955 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
956 | if (c == EOF) | |
957 | break; | |
958 | ||
959 | if (! STRING_ISGRAPHIC (c)) | |
960 | { | |
961 | num_chars = num_print = 0; | |
962 | continue; | |
963 | } | |
964 | ||
965 | if (num_chars == 0) | |
966 | start_point = num_read - 1; | |
967 | ||
968 | if (c < 127) | |
969 | { | |
970 | print_buf[num_print] = c; | |
971 | num_chars ++; | |
972 | num_print ++; | |
973 | continue; | |
974 | } | |
975 | ||
976 | if (c < 0xc0) | |
977 | { | |
978 | num_chars = num_print = 0; | |
979 | continue; | |
980 | } | |
981 | ||
982 | /* We *might* have a UTF-8 sequence. Time to start peeking. */ | |
983 | char utf8[4]; | |
984 | ||
985 | utf8[0] = c; | |
986 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
987 | if (c == EOF) | |
988 | break; | |
989 | utf8[1] = c; | |
990 | ||
991 | if ((utf8[1] & 0xc0) != 0x80) | |
992 | { | |
993 | /* Invalid UTF-8. */ | |
994 | putback_buf[num_putback++] = utf8[1]; | |
995 | num_chars = num_print = 0; | |
996 | continue; | |
997 | } | |
998 | else if ((utf8[0] & 0x20) == 0) | |
999 | { | |
1000 | /* A valid 2-byte UTF-8 encoding. */ | |
1001 | if (unicode_display == unicode_invalid) | |
1002 | { | |
1003 | putback_buf[num_putback++] = utf8[1]; | |
1004 | num_chars = num_print = 0; | |
1005 | } | |
1006 | else | |
1007 | { | |
1008 | print_buf[num_print ++] = utf8[0]; | |
1009 | print_buf[num_print ++] = utf8[1]; | |
1010 | num_chars ++; | |
1011 | } | |
1012 | continue; | |
1013 | } | |
1014 | ||
1015 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1016 | if (c == EOF) | |
1017 | break; | |
1018 | utf8[2] = c; | |
1019 | ||
1020 | if ((utf8[2] & 0xc0) != 0x80) | |
1021 | { | |
1022 | /* Invalid UTF-8. */ | |
1023 | putback_buf[num_putback++] = utf8[2]; | |
1024 | putback_buf[num_putback++] = utf8[1]; | |
1025 | num_chars = num_print = 0; | |
1026 | continue; | |
1027 | } | |
1028 | else if ((utf8[0] & 0x10) == 0) | |
1029 | { | |
1030 | /* A valid 3-byte UTF-8 encoding. */ | |
1031 | if (unicode_display == unicode_invalid) | |
1032 | { | |
1033 | putback_buf[num_putback++] = utf8[2]; | |
1034 | putback_buf[num_putback++] = utf8[1]; | |
1035 | num_chars = num_print = 0; | |
1036 | } | |
1037 | else | |
1038 | { | |
1039 | print_buf[num_print ++] = utf8[0]; | |
1040 | print_buf[num_print ++] = utf8[1]; | |
1041 | print_buf[num_print ++] = utf8[2]; | |
1042 | num_chars ++; | |
1043 | } | |
1044 | continue; | |
1045 | } | |
1046 | ||
1047 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1048 | if (c == EOF) | |
1049 | break; | |
1050 | utf8[3] = c; | |
1051 | ||
1052 | if ((utf8[3] & 0xc0) != 0x80) | |
1053 | { | |
1054 | /* Invalid UTF-8. */ | |
1055 | putback_buf[num_putback++] = utf8[3]; | |
1056 | putback_buf[num_putback++] = utf8[2]; | |
1057 | putback_buf[num_putback++] = utf8[1]; | |
1058 | num_chars = num_print = 0; | |
1059 | } | |
1060 | /* We have a valid 4-byte UTF-8 encoding. */ | |
1061 | else if (unicode_display == unicode_invalid) | |
1062 | { | |
1063 | putback_buf[num_putback++] = utf8[3]; | |
1064 | putback_buf[num_putback++] = utf8[1]; | |
1065 | putback_buf[num_putback++] = utf8[2]; | |
1066 | num_chars = num_print = 0; | |
1067 | } | |
1068 | else | |
1069 | { | |
1070 | print_buf[num_print ++] = utf8[0]; | |
1071 | print_buf[num_print ++] = utf8[1]; | |
1072 | print_buf[num_print ++] = utf8[2]; | |
1073 | print_buf[num_print ++] = utf8[3]; | |
1074 | num_chars ++; | |
1075 | } | |
1076 | } | |
1077 | while (1); | |
1078 | ||
1079 | if (num_chars >= string_min) | |
1080 | { | |
1081 | /* We know that we have string_min valid characters in print_buf, | |
1082 | and there may be more to come in the stream. Start displaying | |
1083 | them. */ | |
1084 | ||
1085 | print_filename_and_address (filename, address + start_point); | |
1086 | ||
795588ae | 1087 | unsigned int i; |
b3aa80b4 NC |
1088 | for (i = 0; i < num_print;) |
1089 | { | |
1090 | if (print_buf[i] < 127) | |
1091 | putchar (print_buf[i++]); | |
1092 | else | |
1093 | i += display_utf8_char (print_buf + i); | |
1094 | } | |
1095 | ||
1096 | /* OK so now we have to start read unchecked bytes. */ | |
1097 | ||
795588ae | 1098 | /* Find a series of string_min characters. Put them into print_buf. */ |
b3aa80b4 NC |
1099 | do |
1100 | { | |
1101 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1102 | if (c == EOF) | |
1103 | break; | |
1104 | ||
1105 | if (! STRING_ISGRAPHIC (c)) | |
1106 | break; | |
1107 | ||
1108 | if (c < 127) | |
1109 | { | |
1110 | putchar (c); | |
1111 | continue; | |
1112 | } | |
1113 | ||
1114 | if (c < 0xc0) | |
1115 | break; | |
1116 | ||
1117 | /* We *might* have a UTF-8 sequence. Time to start peeking. */ | |
1118 | unsigned char utf8[4]; | |
1119 | ||
1120 | utf8[0] = c; | |
1121 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1122 | if (c == EOF) | |
1123 | break; | |
1124 | utf8[1] = c; | |
1125 | ||
1126 | if ((utf8[1] & 0xc0) != 0x80) | |
1127 | { | |
1128 | /* Invalid UTF-8. */ | |
1129 | putback_buf[num_putback++] = utf8[1]; | |
1130 | break; | |
1131 | } | |
1132 | else if ((utf8[0] & 0x20) == 0) | |
1133 | { | |
1134 | /* Valid 2-byte UTF-8. */ | |
1135 | if (unicode_display == unicode_invalid) | |
1136 | { | |
1137 | putback_buf[num_putback++] = utf8[1]; | |
1138 | break; | |
1139 | } | |
1140 | else | |
1141 | { | |
1142 | (void) display_utf8_char (utf8); | |
1143 | continue; | |
1144 | } | |
1145 | } | |
1146 | ||
1147 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1148 | if (c == EOF) | |
1149 | break; | |
1150 | utf8[2] = c; | |
1151 | ||
1152 | if ((utf8[2] & 0xc0) != 0x80) | |
1153 | { | |
1154 | /* Invalid UTF-8. */ | |
1155 | putback_buf[num_putback++] = utf8[2]; | |
1156 | putback_buf[num_putback++] = utf8[1]; | |
1157 | break; | |
1158 | } | |
1159 | else if ((utf8[0] & 0x10) == 0) | |
1160 | { | |
1161 | /* Valid 3-byte UTF-8. */ | |
1162 | if (unicode_display == unicode_invalid) | |
1163 | { | |
1164 | putback_buf[num_putback++] = utf8[2]; | |
1165 | putback_buf[num_putback++] = utf8[1]; | |
1166 | break; | |
1167 | } | |
1168 | else | |
1169 | { | |
1170 | (void) display_utf8_char (utf8); | |
1171 | continue; | |
1172 | } | |
1173 | } | |
1174 | ||
1175 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1176 | if (c == EOF) | |
1177 | break; | |
1178 | utf8[3] = c; | |
1179 | ||
1180 | if ((utf8[3] & 0xc0) != 0x80) | |
1181 | { | |
1182 | /* Invalid UTF-8. */ | |
1183 | putback_buf[num_putback++] = utf8[3]; | |
1184 | putback_buf[num_putback++] = utf8[2]; | |
1185 | putback_buf[num_putback++] = utf8[1]; | |
1186 | break; | |
1187 | } | |
1188 | else if (unicode_display == unicode_invalid) | |
1189 | { | |
1190 | putback_buf[num_putback++] = utf8[3]; | |
1191 | putback_buf[num_putback++] = utf8[2]; | |
1192 | putback_buf[num_putback++] = utf8[1]; | |
1193 | break; | |
1194 | } | |
1195 | else | |
1196 | /* A valid 4-byte UTF-8 encoding. */ | |
1197 | (void) display_utf8_char (utf8); | |
1198 | } | |
1199 | while (1); | |
1200 | ||
1201 | if (output_separator) | |
1202 | fputs (output_separator, stdout); | |
1203 | else | |
1204 | putchar ('\n'); | |
1205 | } | |
1206 | ||
1207 | if (c != EOF) | |
1208 | /* FIXME: Using tail recursion here is lazy, but it works. */ | |
1209 | print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf); | |
1210 | } | |
1211 | ||
1212 | /* Display strings read in from STREAM. Treat any UTF-8 encoded characters | |
1213 | encountered according to the setting of the unicode_display variable. | |
1214 | The stream is positioned at ADDRESS and is attached to FILENAME. */ | |
1215 | ||
1216 | static void | |
1217 | print_unicode_stream (const char * filename, | |
1218 | file_ptr address, | |
1219 | FILE * stream) | |
1220 | { | |
1221 | /* Paranoia checks... */ | |
1222 | if (filename == NULL | |
1223 | || stream == NULL | |
1224 | || unicode_display == unicode_default | |
1225 | || encoding != 'S' | |
1226 | || encoding_bytes != 1) | |
1227 | { | |
1228 | fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n"); | |
1229 | return; | |
1230 | } | |
1231 | ||
1232 | /* Allocate space for string_min 4-byte utf-8 characters. */ | |
3713e829 NC |
1233 | size_t amt = string_min; |
1234 | amt = (4 * amt) + 1; | |
1235 | unsigned char * print_buf = xmalloc (amt); | |
b3aa80b4 NC |
1236 | /* We should never have to put back more than 4 bytes. */ |
1237 | unsigned char putback_buf[5]; | |
795588ae | 1238 | unsigned int num_putback = 0; |
b3aa80b4 NC |
1239 | |
1240 | print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf); | |
1241 | free (print_buf); | |
1242 | } | |
d132876a | 1243 | \f |
252b5132 RH |
1244 | /* Find the strings in file FILENAME, read from STREAM. |
1245 | Assume that STREAM is positioned so that the next byte read | |
1246 | is at address ADDRESS in the file. | |
252b5132 RH |
1247 | |
1248 | If STREAM is NULL, do not read from it. | |
1249 | The caller can supply a buffer of characters | |
1250 | to be processed before the data in STREAM. | |
1251 | MAGIC is the address of the buffer and | |
1252 | MAGICCOUNT is how many characters are in it. | |
1253 | Those characters come at address ADDRESS and the data in STREAM follow. */ | |
1254 | ||
1255 | static void | |
ee2fb9eb | 1256 | print_strings (const char *filename, FILE *stream, file_ptr address, |
b3aa80b4 | 1257 | int magiccount, char *magic) |
252b5132 | 1258 | { |
b3aa80b4 NC |
1259 | if (unicode_display != unicode_default) |
1260 | { | |
1261 | if (magic != NULL) | |
1262 | print_unicode_buffer (filename, address, | |
1263 | (const unsigned char *) magic, magiccount); | |
1264 | ||
1265 | if (stream != NULL) | |
1266 | print_unicode_stream (filename, address, stream); | |
1267 | return; | |
1268 | } | |
1269 | ||
d132876a | 1270 | char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1)); |
252b5132 RH |
1271 | |
1272 | while (1) | |
1273 | { | |
ee2fb9eb | 1274 | file_ptr start; |
795588ae | 1275 | unsigned int i; |
d132876a | 1276 | long c; |
252b5132 RH |
1277 | |
1278 | /* See if the next `string_min' chars are all graphic chars. */ | |
1279 | tryline: | |
252b5132 RH |
1280 | start = address; |
1281 | for (i = 0; i < string_min; i++) | |
1282 | { | |
d132876a NC |
1283 | c = get_char (stream, &address, &magiccount, &magic); |
1284 | if (c == EOF) | |
68187828 NC |
1285 | { |
1286 | free (buf); | |
1287 | return; | |
1288 | } | |
71f5e3f7 | 1289 | |
8745eafa | 1290 | if (! STRING_ISGRAPHIC (c)) |
71f5e3f7 | 1291 | { |
7ca166c9 AM |
1292 | /* Found a non-graphic. Try again starting with next byte. */ |
1293 | unget_part_char (c, &address, &magiccount, &magic); | |
71f5e3f7 NC |
1294 | goto tryline; |
1295 | } | |
252b5132 RH |
1296 | buf[i] = c; |
1297 | } | |
1298 | ||
1299 | /* We found a run of `string_min' graphic characters. Print up | |
e9f87780 | 1300 | to the next non-graphic character. */ |
b3aa80b4 | 1301 | print_filename_and_address (filename, start); |
252b5132 RH |
1302 | |
1303 | buf[i] = '\0'; | |
1304 | fputs (buf, stdout); | |
1305 | ||
1306 | while (1) | |
1307 | { | |
d132876a NC |
1308 | c = get_char (stream, &address, &magiccount, &magic); |
1309 | if (c == EOF) | |
1310 | break; | |
8745eafa | 1311 | if (! STRING_ISGRAPHIC (c)) |
dcd9adc5 | 1312 | { |
7ca166c9 | 1313 | unget_part_char (c, &address, &magiccount, &magic); |
dcd9adc5 NC |
1314 | break; |
1315 | } | |
252b5132 RH |
1316 | putchar (c); |
1317 | } | |
1318 | ||
55edd97b | 1319 | if (output_separator) |
7ca166c9 | 1320 | fputs (output_separator, stdout); |
55edd97b | 1321 | else |
7ca166c9 | 1322 | putchar ('\n'); |
252b5132 | 1323 | } |
68187828 | 1324 | free (buf); |
252b5132 RH |
1325 | } |
1326 | \f | |
252b5132 | 1327 | static void |
2da42df6 | 1328 | usage (FILE *stream, int status) |
252b5132 | 1329 | { |
8b53311e NC |
1330 | fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name); |
1331 | fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n")); | |
7fac9594 NC |
1332 | fprintf (stream, _(" The options are:\n")); |
1333 | ||
1334 | if (DEFAULT_STRINGS_ALL) | |
1335 | fprintf (stream, _("\ | |
1336 | -a - --all Scan the entire file, not just the data section [default]\n\ | |
1337 | -d --data Only scan the data sections in the file\n")); | |
1338 | else | |
1339 | fprintf (stream, _("\ | |
8b53311e | 1340 | -a - --all Scan the entire file, not just the data section\n\ |
7fac9594 NC |
1341 | -d --data Only scan the data sections in the file [default]\n")); |
1342 | ||
1343 | fprintf (stream, _("\ | |
8b53311e | 1344 | -f --print-file-name Print the name of the file before each string\n\ |
8fee99c3 NC |
1345 | -n <number> Locate & print any sequence of at least <number>\n\ |
1346 | --bytes=<number> displayable characters. (The default is 4).\n\ | |
d412a550 | 1347 | -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\ |
334ac421 | 1348 | -w --include-all-whitespace Include all whitespace as valid string characters\n\ |
8b53311e NC |
1349 | -o An alias for --radix=o\n\ |
1350 | -T --target=<BFDNAME> Specify the binary file format\n\ | |
8745eafa NC |
1351 | -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\ |
1352 | s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\ | |
b3aa80b4 | 1353 | --unicode={default|show|invalid|hex|escape|highlight}\n\ |
584294c4 | 1354 | -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ |
55edd97b | 1355 | -s --output-separator=<string> String used to separate strings in output.\n\ |
07012eee | 1356 | @<file> Read options from <file>\n\ |
8b53311e | 1357 | -h --help Display this information\n\ |
ffbe5983 | 1358 | -v -V --version Print the program's version number\n")); |
252b5132 | 1359 | list_supported_targets (program_name, stream); |
92f01d61 | 1360 | if (REPORT_BUGS_TO[0] && status == 0) |
8ad3436c | 1361 | fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO); |
252b5132 RH |
1362 | exit (status); |
1363 | } |