]>
Commit | Line | Data |
---|---|---|
252b5132 | 1 | /* strings -- print the strings of printable characters in files |
a2c58332 | 2 | Copyright (C) 1993-2022 Free Software Foundation, Inc. |
252b5132 RH |
3 | |
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
32866df7 | 6 | the Free Software Foundation; either version 3, or (at your option) |
252b5132 RH |
7 | any later version. |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
b43b5d5f NC |
16 | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
17 | 02110-1301, USA. */ | |
252b5132 RH |
18 | \f |
19 | /* Usage: strings [options] file... | |
20 | ||
21 | Options: | |
22 | --all | |
23 | -a | |
7fac9594 NC |
24 | - Scan each file in its entirety. |
25 | ||
26 | --data | |
27 | -d Scan only the initialized data section(s) of object files. | |
252b5132 RH |
28 | |
29 | --print-file-name | |
30 | -f Print the name of the file before each string. | |
31 | ||
32 | --bytes=min-len | |
33 | -n min-len | |
34 | -min-len Print graphic char sequences, MIN-LEN or more bytes long, | |
8fee99c3 NC |
35 | that are followed by a NUL or a non-displayable character. |
36 | Default is 4. | |
252b5132 RH |
37 | |
38 | --radix={o,x,d} | |
39 | -t {o,x,d} Print the offset within the file before each string, | |
40 | in octal/hex/decimal. | |
41 | ||
334ac421 EA |
42 | --include-all-whitespace |
43 | -w By default tab and space are the only whitepace included in graphic | |
44 | char sequences. This option considers all of isspace() valid. | |
45 | ||
252b5132 RH |
46 | -o Like -to. (Some other implementations have -o like -to, |
47 | others like -td. We chose one arbitrarily.) | |
48 | ||
8745eafa NC |
49 | --encoding={s,S,b,l,B,L} |
50 | -e {s,S,b,l,B,L} | |
51 | Select character encoding: 7-bit-character, 8-bit-character, | |
52 | bigendian 16-bit, littleendian 16-bit, bigendian 32-bit, | |
53 | littleendian 32-bit. | |
d132876a | 54 | |
252b5132 | 55 | --target=BFDNAME |
3bf31ec9 | 56 | -T {bfdname} |
252b5132 RH |
57 | Specify a non-default object file format. |
58 | ||
b3aa80b4 | 59 | --unicode={default|locale|invalid|hex|escape|highlight} |
584294c4 | 60 | -U {d|l|i|x|e|h} |
795588ae | 61 | Determine how to handle UTF-8 unicode characters. The default |
b3aa80b4 NC |
62 | is no special treatment. All other versions of this option |
63 | only apply if the encoding is valid and enabling the option | |
64 | implies --encoding=S. | |
65 | The 'locale' option displays the characters according to the | |
66 | current locale. The 'invalid' option treats them as | |
67 | non-string characters. The 'hex' option displays them as hex | |
68 | byte sequences. The 'escape' option displays them as escape | |
69 | sequences and the 'highlight' option displays them as | |
70 | coloured escape sequences. | |
71 | ||
55edd97b EA |
72 | --output-separator=sep_string |
73 | -s sep_string String used to separate parsed strings in output. | |
74 | Default is newline. | |
75 | ||
252b5132 RH |
76 | --help |
77 | -h Print the usage message on the standard output. | |
78 | ||
79 | --version | |
ffbe5983 | 80 | -V |
252b5132 RH |
81 | -v Print the program version number. |
82 | ||
83 | Written by Richard Stallman <rms@gnu.ai.mit.edu> | |
84 | and David MacKenzie <djm@gnu.ai.mit.edu>. */ | |
85 | ||
3db64b00 | 86 | #include "sysdep.h" |
252b5132 | 87 | #include "bfd.h" |
e9792343 | 88 | #include "getopt.h" |
252b5132 | 89 | #include "libiberty.h" |
3882b010 | 90 | #include "safe-ctype.h" |
3db64b00 | 91 | #include "bucomm.h" |
252b5132 | 92 | |
b3aa80b4 NC |
93 | #ifndef streq |
94 | #define streq(a,b) (strcmp ((a),(b)) == 0) | |
95 | #endif | |
96 | ||
97 | typedef enum unicode_display_type | |
98 | { | |
99 | unicode_default = 0, | |
100 | unicode_locale, | |
101 | unicode_escape, | |
102 | unicode_hex, | |
103 | unicode_highlight, | |
104 | unicode_invalid | |
105 | } unicode_display_type; | |
106 | ||
107 | static unicode_display_type unicode_display = unicode_default; | |
108 | ||
8745eafa NC |
109 | #define STRING_ISGRAPHIC(c) \ |
110 | ( (c) >= 0 \ | |
111 | && (c) <= 255 \ | |
334ac421 | 112 | && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \ |
535b785f | 113 | || (include_all_whitespace && ISSPACE (c))) \ |
334ac421 | 114 | ) |
252b5132 RH |
115 | |
116 | #ifndef errno | |
117 | extern int errno; | |
118 | #endif | |
119 | ||
120 | /* The BFD section flags that identify an initialized data section. */ | |
121 | #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS) | |
122 | ||
123 | /* Radix for printing addresses (must be 8, 10 or 16). */ | |
124 | static int address_radix; | |
125 | ||
126 | /* Minimum length of sequence of graphic chars to trigger output. */ | |
795588ae | 127 | static unsigned int string_min; |
252b5132 | 128 | |
334ac421 | 129 | /* Whether or not we include all whitespace as a graphic char. */ |
015dc7e1 | 130 | static bool include_all_whitespace; |
334ac421 | 131 | |
b34976b6 | 132 | /* TRUE means print address within file for each string. */ |
015dc7e1 | 133 | static bool print_addresses; |
252b5132 | 134 | |
b34976b6 | 135 | /* TRUE means print filename for each string. */ |
015dc7e1 | 136 | static bool print_filenames; |
252b5132 | 137 | |
b34976b6 | 138 | /* TRUE means for object files scan only the data section. */ |
015dc7e1 | 139 | static bool datasection_only; |
252b5132 | 140 | |
252b5132 RH |
141 | /* The BFD object file format. */ |
142 | static char *target; | |
143 | ||
d132876a NC |
144 | /* The character encoding format. */ |
145 | static char encoding; | |
146 | static int encoding_bytes; | |
147 | ||
55edd97b EA |
148 | /* Output string used to separate parsed strings */ |
149 | static char *output_separator; | |
150 | ||
252b5132 RH |
151 | static struct option long_options[] = |
152 | { | |
153 | {"all", no_argument, NULL, 'a'}, | |
b3aa80b4 | 154 | {"bytes", required_argument, NULL, 'n'}, |
7fac9594 | 155 | {"data", no_argument, NULL, 'd'}, |
b3aa80b4 NC |
156 | {"encoding", required_argument, NULL, 'e'}, |
157 | {"help", no_argument, NULL, 'h'}, | |
158 | {"include-all-whitespace", no_argument, NULL, 'w'}, | |
159 | {"output-separator", required_argument, NULL, 's'}, | |
252b5132 | 160 | {"print-file-name", no_argument, NULL, 'f'}, |
252b5132 RH |
161 | {"radix", required_argument, NULL, 't'}, |
162 | {"target", required_argument, NULL, 'T'}, | |
b3aa80b4 | 163 | {"unicode", required_argument, NULL, 'U'}, |
252b5132 RH |
164 | {"version", no_argument, NULL, 'v'}, |
165 | {NULL, 0, NULL, 0} | |
166 | }; | |
167 | ||
015dc7e1 | 168 | static bool strings_file (char *); |
b3aa80b4 | 169 | static void print_strings (const char *, FILE *, file_ptr, int, char *); |
1e0f0b4d | 170 | static void usage (FILE *, int) ATTRIBUTE_NORETURN; |
252b5132 | 171 | \f |
2da42df6 | 172 | int main (int, char **); |
65de42c0 | 173 | |
252b5132 | 174 | int |
2da42df6 | 175 | main (int argc, char **argv) |
252b5132 RH |
176 | { |
177 | int optc; | |
178 | int exit_status = 0; | |
015dc7e1 | 179 | bool files_given = false; |
508e676d | 180 | char *s; |
e36aef42 | 181 | int numeric_opt = 0; |
252b5132 | 182 | |
1c529ca6 | 183 | setlocale (LC_ALL, ""); |
252b5132 RH |
184 | bindtextdomain (PACKAGE, LOCALEDIR); |
185 | textdomain (PACKAGE); | |
186 | ||
187 | program_name = argv[0]; | |
188 | xmalloc_set_program_name (program_name); | |
86eafac0 | 189 | bfd_set_error_program_name (program_name); |
869b9d07 MM |
190 | |
191 | expandargv (&argc, &argv); | |
192 | ||
c904a764 | 193 | string_min = 4; |
015dc7e1 AM |
194 | include_all_whitespace = false; |
195 | print_addresses = false; | |
196 | print_filenames = false; | |
7fac9594 | 197 | if (DEFAULT_STRINGS_ALL) |
015dc7e1 | 198 | datasection_only = false; |
7fac9594 | 199 | else |
015dc7e1 | 200 | datasection_only = true; |
252b5132 | 201 | target = NULL; |
d132876a | 202 | encoding = 's'; |
55edd97b | 203 | output_separator = NULL; |
252b5132 | 204 | |
b3aa80b4 | 205 | while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:U:Vv0123456789", |
252b5132 RH |
206 | long_options, (int *) 0)) != EOF) |
207 | { | |
208 | switch (optc) | |
209 | { | |
210 | case 'a': | |
015dc7e1 | 211 | datasection_only = false; |
252b5132 RH |
212 | break; |
213 | ||
7fac9594 | 214 | case 'd': |
015dc7e1 | 215 | datasection_only = true; |
7fac9594 NC |
216 | break; |
217 | ||
252b5132 | 218 | case 'f': |
015dc7e1 | 219 | print_filenames = true; |
252b5132 RH |
220 | break; |
221 | ||
8b53311e | 222 | case 'H': |
252b5132 RH |
223 | case 'h': |
224 | usage (stdout, 0); | |
225 | ||
226 | case 'n': | |
508e676d JK |
227 | string_min = (int) strtoul (optarg, &s, 0); |
228 | if (s != NULL && *s != 0) | |
229 | fatal (_("invalid integer argument %s"), optarg); | |
252b5132 RH |
230 | break; |
231 | ||
334ac421 | 232 | case 'w': |
015dc7e1 | 233 | include_all_whitespace = true; |
334ac421 EA |
234 | break; |
235 | ||
252b5132 | 236 | case 'o': |
015dc7e1 | 237 | print_addresses = true; |
252b5132 RH |
238 | address_radix = 8; |
239 | break; | |
240 | ||
241 | case 't': | |
015dc7e1 | 242 | print_addresses = true; |
252b5132 RH |
243 | if (optarg[1] != '\0') |
244 | usage (stderr, 1); | |
245 | switch (optarg[0]) | |
246 | { | |
247 | case 'o': | |
248 | address_radix = 8; | |
249 | break; | |
250 | ||
251 | case 'd': | |
252 | address_radix = 10; | |
253 | break; | |
254 | ||
255 | case 'x': | |
256 | address_radix = 16; | |
257 | break; | |
258 | ||
259 | default: | |
260 | usage (stderr, 1); | |
261 | } | |
262 | break; | |
263 | ||
264 | case 'T': | |
265 | target = optarg; | |
266 | break; | |
267 | ||
d132876a NC |
268 | case 'e': |
269 | if (optarg[1] != '\0') | |
270 | usage (stderr, 1); | |
271 | encoding = optarg[0]; | |
272 | break; | |
273 | ||
55edd97b EA |
274 | case 's': |
275 | output_separator = optarg; | |
795588ae | 276 | break; |
55edd97b | 277 | |
b3aa80b4 NC |
278 | case 'U': |
279 | if (streq (optarg, "default") || streq (optarg, "d")) | |
280 | unicode_display = unicode_default; | |
281 | else if (streq (optarg, "locale") || streq (optarg, "l")) | |
282 | unicode_display = unicode_locale; | |
283 | else if (streq (optarg, "escape") || streq (optarg, "e")) | |
284 | unicode_display = unicode_escape; | |
285 | else if (streq (optarg, "invalid") || streq (optarg, "i")) | |
286 | unicode_display = unicode_invalid; | |
287 | else if (streq (optarg, "hex") || streq (optarg, "x")) | |
288 | unicode_display = unicode_hex; | |
289 | else if (streq (optarg, "highlight") || streq (optarg, "h")) | |
290 | unicode_display = unicode_highlight; | |
291 | else | |
292 | fatal (_("invalid argument to -U/--unicode: %s"), optarg); | |
293 | break; | |
294 | ||
8b53311e | 295 | case 'V': |
252b5132 RH |
296 | case 'v': |
297 | print_version ("strings"); | |
298 | break; | |
299 | ||
300 | case '?': | |
301 | usage (stderr, 1); | |
302 | ||
303 | default: | |
e36aef42 | 304 | numeric_opt = optind; |
252b5132 RH |
305 | break; |
306 | } | |
307 | } | |
308 | ||
b3aa80b4 NC |
309 | if (unicode_display != unicode_default) |
310 | encoding = 'S'; | |
311 | ||
e36aef42 AM |
312 | if (numeric_opt != 0) |
313 | { | |
314 | string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0); | |
315 | if (s != NULL && *s != 0) | |
316 | fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1); | |
317 | } | |
c904a764 NC |
318 | if (string_min < 1) |
319 | fatal (_("invalid minimum string length %d"), string_min); | |
252b5132 | 320 | |
d132876a NC |
321 | switch (encoding) |
322 | { | |
8745eafa | 323 | case 'S': |
d132876a NC |
324 | case 's': |
325 | encoding_bytes = 1; | |
326 | break; | |
327 | case 'b': | |
328 | case 'l': | |
329 | encoding_bytes = 2; | |
330 | break; | |
331 | case 'B': | |
332 | case 'L': | |
333 | encoding_bytes = 4; | |
334 | break; | |
335 | default: | |
336 | usage (stderr, 1); | |
337 | } | |
338 | ||
bf2dd8d7 AM |
339 | if (bfd_init () != BFD_INIT_MAGIC) |
340 | fatal (_("fatal error: libbfd ABI mismatch")); | |
252b5132 RH |
341 | set_default_bfd_target (); |
342 | ||
343 | if (optind >= argc) | |
344 | { | |
015dc7e1 | 345 | datasection_only = false; |
5af11cab | 346 | SET_BINARY (fileno (stdin)); |
b3aa80b4 | 347 | print_strings ("{standard input}", stdin, 0, 0, (char *) NULL); |
015dc7e1 | 348 | files_given = true; |
252b5132 RH |
349 | } |
350 | else | |
351 | { | |
352 | for (; optind < argc; ++optind) | |
353 | { | |
b3aa80b4 | 354 | if (streq (argv[optind], "-")) |
015dc7e1 | 355 | datasection_only = false; |
252b5132 RH |
356 | else |
357 | { | |
015dc7e1 | 358 | files_given = true; |
535b785f | 359 | exit_status |= !strings_file (argv[optind]); |
252b5132 RH |
360 | } |
361 | } | |
362 | } | |
363 | ||
b34976b6 | 364 | if (!files_given) |
252b5132 RH |
365 | usage (stderr, 1); |
366 | ||
367 | return (exit_status); | |
368 | } | |
369 | \f | |
19871f45 AM |
370 | /* Scan section SECT of the file ABFD, whose printable name is |
371 | FILENAME. If it contains initialized data set GOT_A_SECTION and | |
372 | print the strings in it. */ | |
252b5132 RH |
373 | |
374 | static void | |
19871f45 | 375 | strings_a_section (bfd *abfd, asection *sect, const char *filename, |
015dc7e1 | 376 | bool *got_a_section) |
252b5132 | 377 | { |
06803313 | 378 | bfd_size_type sectsize; |
19871f45 | 379 | bfd_byte *mem; |
3aade688 | 380 | |
06803313 NC |
381 | if ((sect->flags & DATA_FLAGS) != DATA_FLAGS) |
382 | return; | |
383 | ||
fd361982 | 384 | sectsize = bfd_section_size (sect); |
19871f45 | 385 | if (sectsize == 0) |
06803313 NC |
386 | return; |
387 | ||
19871f45 | 388 | if (!bfd_malloc_and_get_section (abfd, sect, &mem)) |
252b5132 | 389 | { |
19871f45 AM |
390 | non_fatal (_("%s: Reading section %s failed: %s"), |
391 | filename, sect->name, bfd_errmsg (bfd_get_error ())); | |
392 | return; | |
252b5132 | 393 | } |
06803313 | 394 | |
015dc7e1 | 395 | *got_a_section = true; |
b3aa80b4 | 396 | print_strings (filename, NULL, sect->filepos, sectsize, (char *) mem); |
06803313 | 397 | free (mem); |
252b5132 RH |
398 | } |
399 | ||
400 | /* Scan all of the sections in FILE, and print the strings | |
401 | in the initialized data section(s). | |
402 | ||
b34976b6 AM |
403 | Return TRUE if successful, |
404 | FALSE if not (such as if FILE is not an object file). */ | |
252b5132 | 405 | |
015dc7e1 | 406 | static bool |
2da42df6 | 407 | strings_object_file (const char *file) |
252b5132 | 408 | { |
06803313 | 409 | bfd *abfd; |
19871f45 | 410 | asection *s; |
015dc7e1 | 411 | bool got_a_section; |
06803313 NC |
412 | |
413 | abfd = bfd_openr (file, target); | |
252b5132 RH |
414 | |
415 | if (abfd == NULL) | |
8745eafa | 416 | /* Treat the file as a non-object file. */ |
015dc7e1 | 417 | return false; |
252b5132 RH |
418 | |
419 | /* This call is mainly for its side effect of reading in the sections. | |
420 | We follow the traditional behavior of `strings' in that we don't | |
421 | complain if we don't recognize a file to be an object file. */ | |
b34976b6 | 422 | if (!bfd_check_format (abfd, bfd_object)) |
252b5132 RH |
423 | { |
424 | bfd_close (abfd); | |
015dc7e1 | 425 | return false; |
252b5132 RH |
426 | } |
427 | ||
015dc7e1 | 428 | got_a_section = false; |
19871f45 AM |
429 | for (s = abfd->sections; s != NULL; s = s->next) |
430 | strings_a_section (abfd, s, file, &got_a_section); | |
252b5132 RH |
431 | |
432 | if (!bfd_close (abfd)) | |
433 | { | |
434 | bfd_nonfatal (file); | |
015dc7e1 | 435 | return false; |
252b5132 RH |
436 | } |
437 | ||
438 | return got_a_section; | |
439 | } | |
440 | ||
b34976b6 | 441 | /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */ |
252b5132 | 442 | |
015dc7e1 | 443 | static bool |
2da42df6 | 444 | strings_file (char *file) |
252b5132 | 445 | { |
ee2fb9eb JK |
446 | struct stat st; |
447 | ||
448 | /* get_file_size does not support non-S_ISREG files. */ | |
fb5b5478 | 449 | |
ee2fb9eb | 450 | if (stat (file, &st) < 0) |
fb5b5478 JJ |
451 | { |
452 | if (errno == ENOENT) | |
453 | non_fatal (_("'%s': No such file"), file); | |
454 | else | |
455 | non_fatal (_("Warning: could not locate '%s'. reason: %s"), | |
456 | file, strerror (errno)); | |
015dc7e1 | 457 | return false; |
fb5b5478 | 458 | } |
0e158763 NC |
459 | else if (S_ISDIR (st.st_mode)) |
460 | { | |
461 | non_fatal (_("Warning: '%s' is a directory"), file); | |
015dc7e1 | 462 | return false; |
0e158763 | 463 | } |
f24ddbdd | 464 | |
252b5132 RH |
465 | /* If we weren't told to scan the whole file, |
466 | try to open it as an object file and only look at | |
467 | initialized data sections. If that fails, fall back to the | |
468 | whole file. */ | |
469 | if (!datasection_only || !strings_object_file (file)) | |
470 | { | |
471 | FILE *stream; | |
472 | ||
ee2fb9eb | 473 | stream = fopen (file, FOPEN_RB); |
252b5132 RH |
474 | if (stream == NULL) |
475 | { | |
476 | fprintf (stderr, "%s: ", program_name); | |
477 | perror (file); | |
015dc7e1 | 478 | return false; |
252b5132 RH |
479 | } |
480 | ||
b3aa80b4 | 481 | print_strings (file, stream, (file_ptr) 0, 0, (char *) NULL); |
252b5132 RH |
482 | |
483 | if (fclose (stream) == EOF) | |
484 | { | |
485 | fprintf (stderr, "%s: ", program_name); | |
486 | perror (file); | |
015dc7e1 | 487 | return false; |
252b5132 RH |
488 | } |
489 | } | |
490 | ||
015dc7e1 | 491 | return true; |
252b5132 RH |
492 | } |
493 | \f | |
d132876a NC |
494 | /* Read the next character, return EOF if none available. |
495 | Assume that STREAM is positioned so that the next byte read | |
496 | is at address ADDRESS in the file. | |
497 | ||
498 | If STREAM is NULL, do not read from it. | |
499 | The caller can supply a buffer of characters | |
500 | to be processed before the data in STREAM. | |
501 | MAGIC is the address of the buffer and | |
502 | MAGICCOUNT is how many characters are in it. */ | |
503 | ||
504 | static long | |
ee2fb9eb | 505 | get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic) |
d132876a NC |
506 | { |
507 | int c, i; | |
c54e2ec1 | 508 | long r = 0; |
d132876a NC |
509 | |
510 | for (i = 0; i < encoding_bytes; i++) | |
511 | { | |
512 | if (*magiccount) | |
513 | { | |
514 | (*magiccount)--; | |
515 | c = *(*magic)++; | |
516 | } | |
517 | else | |
518 | { | |
519 | if (stream == NULL) | |
520 | return EOF; | |
b7d4af3a JW |
521 | |
522 | /* Only use getc_unlocked if we found a declaration for it. | |
523 | Otherwise, libc is not thread safe by default, and we | |
524 | should not use it. */ | |
525 | ||
526 | #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | |
cedd9a58 JJ |
527 | c = getc_unlocked (stream); |
528 | #else | |
d132876a | 529 | c = getc (stream); |
cedd9a58 | 530 | #endif |
d132876a NC |
531 | if (c == EOF) |
532 | return EOF; | |
533 | } | |
534 | ||
535 | (*address)++; | |
c54e2ec1 | 536 | r = (r << 8) | (c & 0xff); |
d132876a NC |
537 | } |
538 | ||
539 | switch (encoding) | |
540 | { | |
c54e2ec1 | 541 | default: |
d132876a NC |
542 | break; |
543 | case 'l': | |
c54e2ec1 | 544 | r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8); |
d132876a NC |
545 | break; |
546 | case 'L': | |
c54e2ec1 AM |
547 | r = (((r & 0xff) << 24) | ((r & 0xff00) << 8) |
548 | | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24)); | |
d132876a NC |
549 | break; |
550 | } | |
551 | ||
d132876a NC |
552 | return r; |
553 | } | |
7ca166c9 AM |
554 | |
555 | /* Throw away one byte of a (possibly) multi-byte char C, updating | |
556 | address and buffer to suit. */ | |
557 | ||
558 | static void | |
559 | unget_part_char (long c, file_ptr *address, int *magiccount, char **magic) | |
560 | { | |
561 | static char tmp[4]; | |
562 | ||
563 | if (encoding_bytes > 1) | |
564 | { | |
565 | *address -= encoding_bytes - 1; | |
566 | ||
567 | if (*magiccount == 0) | |
568 | { | |
569 | /* If no magic buffer exists, use temp buffer. */ | |
570 | switch (encoding) | |
571 | { | |
572 | default: | |
573 | break; | |
574 | case 'b': | |
575 | tmp[0] = c & 0xff; | |
576 | *magiccount = 1; | |
577 | break; | |
578 | case 'l': | |
579 | tmp[0] = (c >> 8) & 0xff; | |
580 | *magiccount = 1; | |
581 | break; | |
582 | case 'B': | |
583 | tmp[0] = (c >> 16) & 0xff; | |
584 | tmp[1] = (c >> 8) & 0xff; | |
585 | tmp[2] = c & 0xff; | |
586 | *magiccount = 3; | |
587 | break; | |
588 | case 'L': | |
589 | tmp[0] = (c >> 8) & 0xff; | |
590 | tmp[1] = (c >> 16) & 0xff; | |
591 | tmp[2] = (c >> 24) & 0xff; | |
592 | *magiccount = 3; | |
593 | break; | |
594 | } | |
595 | *magic = tmp; | |
596 | } | |
597 | else | |
598 | { | |
599 | /* If magic buffer exists, rewind. */ | |
600 | *magic -= encoding_bytes - 1; | |
601 | *magiccount += encoding_bytes - 1; | |
602 | } | |
603 | } | |
604 | } | |
b3aa80b4 NC |
605 | |
606 | static void | |
607 | print_filename_and_address (const char * filename, file_ptr address) | |
608 | { | |
609 | if (print_filenames) | |
610 | printf ("%s: ", filename); | |
611 | ||
612 | if (! print_addresses) | |
613 | return; | |
614 | ||
615 | switch (address_radix) | |
616 | { | |
617 | case 8: | |
618 | if (sizeof (address) > sizeof (long)) | |
619 | { | |
620 | #ifndef __MSVCRT__ | |
621 | printf ("%7llo ", (unsigned long long) address); | |
622 | #else | |
623 | printf ("%7I64o ", (unsigned long long) address); | |
624 | #endif | |
625 | } | |
626 | else | |
627 | printf ("%7lo ", (unsigned long) address); | |
628 | break; | |
629 | ||
630 | case 10: | |
631 | if (sizeof (address) > sizeof (long)) | |
632 | { | |
633 | #ifndef __MSVCRT__ | |
634 | printf ("%7llu ", (unsigned long long) address); | |
635 | #else | |
636 | printf ("%7I64d ", (unsigned long long) address); | |
637 | #endif | |
638 | } | |
639 | else | |
640 | printf ("%7ld ", (long) address); | |
641 | break; | |
642 | ||
643 | case 16: | |
644 | if (sizeof (address) > sizeof (long)) | |
645 | { | |
646 | #ifndef __MSVCRT__ | |
647 | printf ("%7llx ", (unsigned long long) address); | |
648 | #else | |
649 | printf ("%7I64x ", (unsigned long long) address); | |
650 | #endif | |
651 | } | |
652 | else | |
653 | printf ("%7lx ", (unsigned long) address); | |
654 | break; | |
655 | } | |
656 | } | |
657 | ||
658 | /* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding. | |
659 | If the encoding is valid then returns the number of bytes it uses. */ | |
660 | ||
661 | static unsigned int | |
662 | is_valid_utf8 (const unsigned char * buffer, unsigned long buflen) | |
663 | { | |
664 | if (buffer[0] < 0xc0) | |
665 | return 0; | |
666 | ||
667 | if (buflen < 2) | |
668 | return 0; | |
669 | ||
670 | if ((buffer[1] & 0xc0) != 0x80) | |
671 | return 0; | |
672 | ||
673 | if ((buffer[0] & 0x20) == 0) | |
674 | return 2; | |
675 | ||
676 | if (buflen < 3) | |
677 | return 0; | |
678 | ||
679 | if ((buffer[2] & 0xc0) != 0x80) | |
680 | return 0; | |
795588ae | 681 | |
b3aa80b4 NC |
682 | if ((buffer[0] & 0x10) == 0) |
683 | return 3; | |
684 | ||
685 | if (buflen < 4) | |
686 | return 0; | |
687 | ||
688 | if ((buffer[3] & 0xc0) != 0x80) | |
689 | return 0; | |
690 | ||
691 | return 4; | |
692 | } | |
693 | ||
694 | /* Display a UTF-8 encoded character in BUFFER according to the setting | |
695 | of unicode_display. The character is known to be valid. | |
696 | Returns the number of bytes consumed. */ | |
697 | ||
795588ae | 698 | static unsigned int |
b3aa80b4 NC |
699 | display_utf8_char (const unsigned char * buffer) |
700 | { | |
795588ae PS |
701 | unsigned int j; |
702 | unsigned int utf8_len; | |
b3aa80b4 NC |
703 | |
704 | switch (buffer[0] & 0x30) | |
705 | { | |
706 | case 0x00: | |
707 | case 0x10: | |
708 | utf8_len = 2; | |
709 | break; | |
710 | case 0x20: | |
711 | utf8_len = 3; | |
712 | break; | |
713 | default: | |
714 | utf8_len = 4; | |
715 | } | |
795588ae | 716 | |
b3aa80b4 NC |
717 | switch (unicode_display) |
718 | { | |
719 | default: | |
720 | fprintf (stderr, "ICE: unexpected unicode display type\n"); | |
721 | break; | |
722 | ||
723 | case unicode_escape: | |
724 | case unicode_highlight: | |
725 | if (unicode_display == unicode_highlight && isatty (1)) | |
726 | printf ("\x1B[31;47m"); /* Red. */ | |
727 | ||
728 | switch (utf8_len) | |
729 | { | |
730 | case 2: | |
731 | printf ("\\u%02x%02x", | |
795588ae | 732 | ((buffer[0] & 0x1c) >> 2), |
b3aa80b4 NC |
733 | ((buffer[0] & 0x03) << 6) | (buffer[1] & 0x3f)); |
734 | break; | |
735 | ||
736 | case 3: | |
737 | printf ("\\u%02x%02x", | |
738 | ((buffer[0] & 0x0f) << 4) | ((buffer[1] & 0x3c) >> 2), | |
739 | ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3f))); | |
740 | break; | |
741 | ||
742 | case 4: | |
743 | printf ("\\u%02x%02x%02x", | |
744 | ((buffer[0] & 0x07) << 6) | ((buffer[1] & 0x3c) >> 2), | |
745 | ((buffer[1] & 0x03) << 6) | ((buffer[2] & 0x3c) >> 2), | |
746 | ((buffer[2] & 0x03) << 6) | ((buffer[3] & 0x3f))); | |
747 | break; | |
748 | default: | |
749 | /* URG. */ | |
750 | break; | |
751 | } | |
752 | ||
753 | if (unicode_display == unicode_highlight && isatty (1)) | |
754 | printf ("\033[0m"); /* Default colour. */ | |
755 | break; | |
756 | ||
757 | case unicode_hex: | |
758 | putchar ('<'); | |
759 | printf ("0x"); | |
760 | for (j = 0; j < utf8_len; j++) | |
761 | printf ("%02x", buffer [j]); | |
762 | putchar ('>'); | |
763 | break; | |
764 | ||
765 | case unicode_locale: | |
766 | printf ("%.1s", buffer); | |
767 | break; | |
768 | } | |
769 | ||
770 | return utf8_len; | |
771 | } | |
772 | ||
773 | /* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered | |
774 | according to the setting of the unicode_display variable. The buffer | |
775 | contains BUFLEN bytes. | |
776 | ||
777 | Display the characters as if they started at ADDRESS and are contained in | |
778 | FILENAME. */ | |
779 | ||
780 | static void | |
781 | print_unicode_buffer (const char * filename, | |
782 | file_ptr address, | |
783 | const unsigned char * buffer, | |
784 | unsigned long buflen) | |
785 | { | |
786 | /* Paranoia checks... */ | |
787 | if (filename == NULL | |
788 | || buffer == NULL | |
789 | || unicode_display == unicode_default | |
790 | || encoding != 'S' | |
791 | || encoding_bytes != 1) | |
792 | { | |
793 | fprintf (stderr, "ICE: bad arguments to print_unicode_buffer\n"); | |
794 | return; | |
795 | } | |
796 | ||
797 | if (buflen == 0) | |
798 | return; | |
799 | ||
800 | /* We must only display strings that are at least string_min *characters* | |
801 | long. So we scan the buffer in two stages. First we locate the start | |
802 | of a potential string. Then we walk along it until we have found | |
803 | string_min characters. Then we go back to the start point and start | |
804 | displaying characters according to the unicode_display setting. */ | |
805 | ||
806 | unsigned long start_point = 0; | |
807 | unsigned long i = 0; | |
808 | unsigned int char_len = 1; | |
809 | unsigned int num_found = 0; | |
810 | ||
811 | for (i = 0; i < buflen; i += char_len) | |
812 | { | |
813 | int c = buffer[i]; | |
814 | ||
815 | char_len = 1; | |
816 | ||
817 | /* Find the first potential character of a string. */ | |
818 | if (! STRING_ISGRAPHIC (c)) | |
819 | { | |
820 | num_found = 0; | |
821 | continue; | |
822 | } | |
823 | ||
824 | if (c > 126) | |
825 | { | |
826 | if (c < 0xc0) | |
827 | { | |
828 | num_found = 0; | |
829 | continue; | |
830 | } | |
831 | ||
832 | if ((char_len = is_valid_utf8 (buffer + i, buflen - i)) == 0) | |
833 | { | |
834 | char_len = 1; | |
835 | num_found = 0; | |
836 | continue; | |
837 | } | |
838 | ||
839 | if (unicode_display == unicode_invalid) | |
840 | { | |
841 | /* We have found a valid UTF-8 character, but we treat it as non-graphic. */ | |
842 | num_found = 0; | |
843 | continue; | |
844 | } | |
845 | } | |
846 | ||
847 | if (num_found == 0) | |
848 | /* We have found a potential starting point for a string. */ | |
849 | start_point = i; | |
850 | ||
851 | ++ num_found; | |
852 | ||
853 | if (num_found >= string_min) | |
854 | break; | |
855 | } | |
856 | ||
857 | if (num_found < string_min) | |
858 | return; | |
859 | ||
860 | print_filename_and_address (filename, address + start_point); | |
795588ae | 861 | |
b3aa80b4 NC |
862 | /* We have found string_min characters. Display them and any |
863 | more that follow. */ | |
864 | for (i = start_point; i < buflen; i += char_len) | |
865 | { | |
866 | int c = buffer[i]; | |
867 | ||
868 | char_len = 1; | |
869 | ||
870 | if (! STRING_ISGRAPHIC (c)) | |
871 | break; | |
872 | else if (c < 127) | |
873 | putchar (c); | |
874 | else if (! is_valid_utf8 (buffer + i, buflen - i)) | |
875 | break; | |
876 | else if (unicode_display == unicode_invalid) | |
877 | break; | |
878 | else | |
879 | char_len = display_utf8_char (buffer + i); | |
880 | } | |
881 | ||
882 | if (output_separator) | |
883 | fputs (output_separator, stdout); | |
884 | else | |
885 | putchar ('\n'); | |
886 | ||
887 | /* FIXME: Using tail recursion here is lazy programming... */ | |
888 | print_unicode_buffer (filename, address + i, buffer + i, buflen - i); | |
889 | } | |
890 | ||
891 | static int | |
795588ae PS |
892 | get_unicode_byte (FILE * stream, |
893 | unsigned char * putback, | |
894 | unsigned int * num_putback, | |
895 | unsigned int * num_read) | |
b3aa80b4 NC |
896 | { |
897 | if (* num_putback > 0) | |
898 | { | |
899 | * num_putback = * num_putback - 1; | |
900 | return putback [* num_putback]; | |
901 | } | |
902 | ||
903 | * num_read = * num_read + 1; | |
904 | ||
905 | #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED | |
906 | return getc_unlocked (stream); | |
907 | #else | |
908 | return getc (stream); | |
909 | #endif | |
910 | } | |
911 | ||
912 | /* Helper function for print_unicode_stream. */ | |
913 | ||
914 | static void | |
915 | print_unicode_stream_body (const char * filename, | |
916 | file_ptr address, | |
917 | FILE * stream, | |
918 | unsigned char * putback_buf, | |
795588ae | 919 | unsigned int num_putback, |
b3aa80b4 NC |
920 | unsigned char * print_buf) |
921 | { | |
922 | /* It would be nice if we could just read the stream into a buffer | |
923 | and then process if with print_unicode_buffer. But the input | |
924 | might be huge or it might time-locked (eg stdin). So instead | |
925 | we go one byte at a time... */ | |
926 | ||
927 | file_ptr start_point = 0; | |
795588ae PS |
928 | unsigned int num_read = 0; |
929 | unsigned int num_chars = 0; | |
930 | unsigned int num_print = 0; | |
a9a09f51 | 931 | int c = 0; |
b3aa80b4 NC |
932 | |
933 | /* Find a series of string_min characters. Put them into print_buf. */ | |
934 | do | |
935 | { | |
936 | if (num_chars >= string_min) | |
937 | break; | |
938 | ||
939 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
940 | if (c == EOF) | |
941 | break; | |
942 | ||
943 | if (! STRING_ISGRAPHIC (c)) | |
944 | { | |
945 | num_chars = num_print = 0; | |
946 | continue; | |
947 | } | |
948 | ||
949 | if (num_chars == 0) | |
950 | start_point = num_read - 1; | |
951 | ||
952 | if (c < 127) | |
953 | { | |
954 | print_buf[num_print] = c; | |
955 | num_chars ++; | |
956 | num_print ++; | |
957 | continue; | |
958 | } | |
959 | ||
960 | if (c < 0xc0) | |
961 | { | |
962 | num_chars = num_print = 0; | |
963 | continue; | |
964 | } | |
965 | ||
966 | /* We *might* have a UTF-8 sequence. Time to start peeking. */ | |
967 | char utf8[4]; | |
968 | ||
969 | utf8[0] = c; | |
970 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
971 | if (c == EOF) | |
972 | break; | |
973 | utf8[1] = c; | |
974 | ||
975 | if ((utf8[1] & 0xc0) != 0x80) | |
976 | { | |
977 | /* Invalid UTF-8. */ | |
978 | putback_buf[num_putback++] = utf8[1]; | |
979 | num_chars = num_print = 0; | |
980 | continue; | |
981 | } | |
982 | else if ((utf8[0] & 0x20) == 0) | |
983 | { | |
984 | /* A valid 2-byte UTF-8 encoding. */ | |
985 | if (unicode_display == unicode_invalid) | |
986 | { | |
987 | putback_buf[num_putback++] = utf8[1]; | |
988 | num_chars = num_print = 0; | |
989 | } | |
990 | else | |
991 | { | |
992 | print_buf[num_print ++] = utf8[0]; | |
993 | print_buf[num_print ++] = utf8[1]; | |
994 | num_chars ++; | |
995 | } | |
996 | continue; | |
997 | } | |
998 | ||
999 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1000 | if (c == EOF) | |
1001 | break; | |
1002 | utf8[2] = c; | |
1003 | ||
1004 | if ((utf8[2] & 0xc0) != 0x80) | |
1005 | { | |
1006 | /* Invalid UTF-8. */ | |
1007 | putback_buf[num_putback++] = utf8[2]; | |
1008 | putback_buf[num_putback++] = utf8[1]; | |
1009 | num_chars = num_print = 0; | |
1010 | continue; | |
1011 | } | |
1012 | else if ((utf8[0] & 0x10) == 0) | |
1013 | { | |
1014 | /* A valid 3-byte UTF-8 encoding. */ | |
1015 | if (unicode_display == unicode_invalid) | |
1016 | { | |
1017 | putback_buf[num_putback++] = utf8[2]; | |
1018 | putback_buf[num_putback++] = utf8[1]; | |
1019 | num_chars = num_print = 0; | |
1020 | } | |
1021 | else | |
1022 | { | |
1023 | print_buf[num_print ++] = utf8[0]; | |
1024 | print_buf[num_print ++] = utf8[1]; | |
1025 | print_buf[num_print ++] = utf8[2]; | |
1026 | num_chars ++; | |
1027 | } | |
1028 | continue; | |
1029 | } | |
1030 | ||
1031 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1032 | if (c == EOF) | |
1033 | break; | |
1034 | utf8[3] = c; | |
1035 | ||
1036 | if ((utf8[3] & 0xc0) != 0x80) | |
1037 | { | |
1038 | /* Invalid UTF-8. */ | |
1039 | putback_buf[num_putback++] = utf8[3]; | |
1040 | putback_buf[num_putback++] = utf8[2]; | |
1041 | putback_buf[num_putback++] = utf8[1]; | |
1042 | num_chars = num_print = 0; | |
1043 | } | |
1044 | /* We have a valid 4-byte UTF-8 encoding. */ | |
1045 | else if (unicode_display == unicode_invalid) | |
1046 | { | |
1047 | putback_buf[num_putback++] = utf8[3]; | |
1048 | putback_buf[num_putback++] = utf8[1]; | |
1049 | putback_buf[num_putback++] = utf8[2]; | |
1050 | num_chars = num_print = 0; | |
1051 | } | |
1052 | else | |
1053 | { | |
1054 | print_buf[num_print ++] = utf8[0]; | |
1055 | print_buf[num_print ++] = utf8[1]; | |
1056 | print_buf[num_print ++] = utf8[2]; | |
1057 | print_buf[num_print ++] = utf8[3]; | |
1058 | num_chars ++; | |
1059 | } | |
1060 | } | |
1061 | while (1); | |
1062 | ||
1063 | if (num_chars >= string_min) | |
1064 | { | |
1065 | /* We know that we have string_min valid characters in print_buf, | |
1066 | and there may be more to come in the stream. Start displaying | |
1067 | them. */ | |
1068 | ||
1069 | print_filename_and_address (filename, address + start_point); | |
1070 | ||
795588ae | 1071 | unsigned int i; |
b3aa80b4 NC |
1072 | for (i = 0; i < num_print;) |
1073 | { | |
1074 | if (print_buf[i] < 127) | |
1075 | putchar (print_buf[i++]); | |
1076 | else | |
1077 | i += display_utf8_char (print_buf + i); | |
1078 | } | |
1079 | ||
1080 | /* OK so now we have to start read unchecked bytes. */ | |
1081 | ||
795588ae | 1082 | /* Find a series of string_min characters. Put them into print_buf. */ |
b3aa80b4 NC |
1083 | do |
1084 | { | |
1085 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1086 | if (c == EOF) | |
1087 | break; | |
1088 | ||
1089 | if (! STRING_ISGRAPHIC (c)) | |
1090 | break; | |
1091 | ||
1092 | if (c < 127) | |
1093 | { | |
1094 | putchar (c); | |
1095 | continue; | |
1096 | } | |
1097 | ||
1098 | if (c < 0xc0) | |
1099 | break; | |
1100 | ||
1101 | /* We *might* have a UTF-8 sequence. Time to start peeking. */ | |
1102 | unsigned char utf8[4]; | |
1103 | ||
1104 | utf8[0] = c; | |
1105 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1106 | if (c == EOF) | |
1107 | break; | |
1108 | utf8[1] = c; | |
1109 | ||
1110 | if ((utf8[1] & 0xc0) != 0x80) | |
1111 | { | |
1112 | /* Invalid UTF-8. */ | |
1113 | putback_buf[num_putback++] = utf8[1]; | |
1114 | break; | |
1115 | } | |
1116 | else if ((utf8[0] & 0x20) == 0) | |
1117 | { | |
1118 | /* Valid 2-byte UTF-8. */ | |
1119 | if (unicode_display == unicode_invalid) | |
1120 | { | |
1121 | putback_buf[num_putback++] = utf8[1]; | |
1122 | break; | |
1123 | } | |
1124 | else | |
1125 | { | |
1126 | (void) display_utf8_char (utf8); | |
1127 | continue; | |
1128 | } | |
1129 | } | |
1130 | ||
1131 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1132 | if (c == EOF) | |
1133 | break; | |
1134 | utf8[2] = c; | |
1135 | ||
1136 | if ((utf8[2] & 0xc0) != 0x80) | |
1137 | { | |
1138 | /* Invalid UTF-8. */ | |
1139 | putback_buf[num_putback++] = utf8[2]; | |
1140 | putback_buf[num_putback++] = utf8[1]; | |
1141 | break; | |
1142 | } | |
1143 | else if ((utf8[0] & 0x10) == 0) | |
1144 | { | |
1145 | /* Valid 3-byte UTF-8. */ | |
1146 | if (unicode_display == unicode_invalid) | |
1147 | { | |
1148 | putback_buf[num_putback++] = utf8[2]; | |
1149 | putback_buf[num_putback++] = utf8[1]; | |
1150 | break; | |
1151 | } | |
1152 | else | |
1153 | { | |
1154 | (void) display_utf8_char (utf8); | |
1155 | continue; | |
1156 | } | |
1157 | } | |
1158 | ||
1159 | c = get_unicode_byte (stream, putback_buf, & num_putback, & num_read); | |
1160 | if (c == EOF) | |
1161 | break; | |
1162 | utf8[3] = c; | |
1163 | ||
1164 | if ((utf8[3] & 0xc0) != 0x80) | |
1165 | { | |
1166 | /* Invalid UTF-8. */ | |
1167 | putback_buf[num_putback++] = utf8[3]; | |
1168 | putback_buf[num_putback++] = utf8[2]; | |
1169 | putback_buf[num_putback++] = utf8[1]; | |
1170 | break; | |
1171 | } | |
1172 | else if (unicode_display == unicode_invalid) | |
1173 | { | |
1174 | putback_buf[num_putback++] = utf8[3]; | |
1175 | putback_buf[num_putback++] = utf8[2]; | |
1176 | putback_buf[num_putback++] = utf8[1]; | |
1177 | break; | |
1178 | } | |
1179 | else | |
1180 | /* A valid 4-byte UTF-8 encoding. */ | |
1181 | (void) display_utf8_char (utf8); | |
1182 | } | |
1183 | while (1); | |
1184 | ||
1185 | if (output_separator) | |
1186 | fputs (output_separator, stdout); | |
1187 | else | |
1188 | putchar ('\n'); | |
1189 | } | |
1190 | ||
1191 | if (c != EOF) | |
1192 | /* FIXME: Using tail recursion here is lazy, but it works. */ | |
1193 | print_unicode_stream_body (filename, address + num_read, stream, putback_buf, num_putback, print_buf); | |
1194 | } | |
1195 | ||
1196 | /* Display strings read in from STREAM. Treat any UTF-8 encoded characters | |
1197 | encountered according to the setting of the unicode_display variable. | |
1198 | The stream is positioned at ADDRESS and is attached to FILENAME. */ | |
1199 | ||
1200 | static void | |
1201 | print_unicode_stream (const char * filename, | |
1202 | file_ptr address, | |
1203 | FILE * stream) | |
1204 | { | |
1205 | /* Paranoia checks... */ | |
1206 | if (filename == NULL | |
1207 | || stream == NULL | |
1208 | || unicode_display == unicode_default | |
1209 | || encoding != 'S' | |
1210 | || encoding_bytes != 1) | |
1211 | { | |
1212 | fprintf (stderr, "ICE: bad arguments to print_unicode_stream\n"); | |
1213 | return; | |
1214 | } | |
1215 | ||
1216 | /* Allocate space for string_min 4-byte utf-8 characters. */ | |
1217 | unsigned char * print_buf = xmalloc ((4 * string_min) + 1); | |
1218 | /* We should never have to put back more than 4 bytes. */ | |
1219 | unsigned char putback_buf[5]; | |
795588ae | 1220 | unsigned int num_putback = 0; |
b3aa80b4 NC |
1221 | |
1222 | print_unicode_stream_body (filename, address, stream, putback_buf, num_putback, print_buf); | |
1223 | free (print_buf); | |
1224 | } | |
d132876a | 1225 | \f |
252b5132 RH |
1226 | /* Find the strings in file FILENAME, read from STREAM. |
1227 | Assume that STREAM is positioned so that the next byte read | |
1228 | is at address ADDRESS in the file. | |
252b5132 RH |
1229 | |
1230 | If STREAM is NULL, do not read from it. | |
1231 | The caller can supply a buffer of characters | |
1232 | to be processed before the data in STREAM. | |
1233 | MAGIC is the address of the buffer and | |
1234 | MAGICCOUNT is how many characters are in it. | |
1235 | Those characters come at address ADDRESS and the data in STREAM follow. */ | |
1236 | ||
1237 | static void | |
ee2fb9eb | 1238 | print_strings (const char *filename, FILE *stream, file_ptr address, |
b3aa80b4 | 1239 | int magiccount, char *magic) |
252b5132 | 1240 | { |
b3aa80b4 NC |
1241 | if (unicode_display != unicode_default) |
1242 | { | |
1243 | if (magic != NULL) | |
1244 | print_unicode_buffer (filename, address, | |
1245 | (const unsigned char *) magic, magiccount); | |
1246 | ||
1247 | if (stream != NULL) | |
1248 | print_unicode_stream (filename, address, stream); | |
1249 | return; | |
1250 | } | |
1251 | ||
d132876a | 1252 | char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1)); |
252b5132 RH |
1253 | |
1254 | while (1) | |
1255 | { | |
ee2fb9eb | 1256 | file_ptr start; |
795588ae | 1257 | unsigned int i; |
d132876a | 1258 | long c; |
252b5132 RH |
1259 | |
1260 | /* See if the next `string_min' chars are all graphic chars. */ | |
1261 | tryline: | |
252b5132 RH |
1262 | start = address; |
1263 | for (i = 0; i < string_min; i++) | |
1264 | { | |
d132876a NC |
1265 | c = get_char (stream, &address, &magiccount, &magic); |
1266 | if (c == EOF) | |
68187828 NC |
1267 | { |
1268 | free (buf); | |
1269 | return; | |
1270 | } | |
71f5e3f7 | 1271 | |
8745eafa | 1272 | if (! STRING_ISGRAPHIC (c)) |
71f5e3f7 | 1273 | { |
7ca166c9 AM |
1274 | /* Found a non-graphic. Try again starting with next byte. */ |
1275 | unget_part_char (c, &address, &magiccount, &magic); | |
71f5e3f7 NC |
1276 | goto tryline; |
1277 | } | |
252b5132 RH |
1278 | buf[i] = c; |
1279 | } | |
1280 | ||
1281 | /* We found a run of `string_min' graphic characters. Print up | |
e9f87780 | 1282 | to the next non-graphic character. */ |
b3aa80b4 | 1283 | print_filename_and_address (filename, start); |
252b5132 RH |
1284 | |
1285 | buf[i] = '\0'; | |
1286 | fputs (buf, stdout); | |
1287 | ||
1288 | while (1) | |
1289 | { | |
d132876a NC |
1290 | c = get_char (stream, &address, &magiccount, &magic); |
1291 | if (c == EOF) | |
1292 | break; | |
8745eafa | 1293 | if (! STRING_ISGRAPHIC (c)) |
dcd9adc5 | 1294 | { |
7ca166c9 | 1295 | unget_part_char (c, &address, &magiccount, &magic); |
dcd9adc5 NC |
1296 | break; |
1297 | } | |
252b5132 RH |
1298 | putchar (c); |
1299 | } | |
1300 | ||
55edd97b | 1301 | if (output_separator) |
7ca166c9 | 1302 | fputs (output_separator, stdout); |
55edd97b | 1303 | else |
7ca166c9 | 1304 | putchar ('\n'); |
252b5132 | 1305 | } |
68187828 | 1306 | free (buf); |
252b5132 RH |
1307 | } |
1308 | \f | |
252b5132 | 1309 | static void |
2da42df6 | 1310 | usage (FILE *stream, int status) |
252b5132 | 1311 | { |
8b53311e NC |
1312 | fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name); |
1313 | fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n")); | |
7fac9594 NC |
1314 | fprintf (stream, _(" The options are:\n")); |
1315 | ||
1316 | if (DEFAULT_STRINGS_ALL) | |
1317 | fprintf (stream, _("\ | |
1318 | -a - --all Scan the entire file, not just the data section [default]\n\ | |
1319 | -d --data Only scan the data sections in the file\n")); | |
1320 | else | |
1321 | fprintf (stream, _("\ | |
8b53311e | 1322 | -a - --all Scan the entire file, not just the data section\n\ |
7fac9594 NC |
1323 | -d --data Only scan the data sections in the file [default]\n")); |
1324 | ||
1325 | fprintf (stream, _("\ | |
8b53311e | 1326 | -f --print-file-name Print the name of the file before each string\n\ |
8fee99c3 NC |
1327 | -n <number> Locate & print any sequence of at least <number>\n\ |
1328 | --bytes=<number> displayable characters. (The default is 4).\n\ | |
d412a550 | 1329 | -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\ |
334ac421 | 1330 | -w --include-all-whitespace Include all whitespace as valid string characters\n\ |
8b53311e NC |
1331 | -o An alias for --radix=o\n\ |
1332 | -T --target=<BFDNAME> Specify the binary file format\n\ | |
8745eafa NC |
1333 | -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\ |
1334 | s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\ | |
b3aa80b4 | 1335 | --unicode={default|show|invalid|hex|escape|highlight}\n\ |
584294c4 | 1336 | -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\ |
55edd97b | 1337 | -s --output-separator=<string> String used to separate strings in output.\n\ |
07012eee | 1338 | @<file> Read options from <file>\n\ |
8b53311e | 1339 | -h --help Display this information\n\ |
ffbe5983 | 1340 | -v -V --version Print the program's version number\n")); |
252b5132 | 1341 | list_supported_targets (program_name, stream); |
92f01d61 | 1342 | if (REPORT_BUGS_TO[0] && status == 0) |
8ad3436c | 1343 | fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO); |
252b5132 RH |
1344 | exit (status); |
1345 | } |