1 /* strings -- print the strings of printable characters in files
2 Copyright (C) 1993-2023 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Usage: strings [options] file...
24 - Scan each file in its entirety.
27 -d Scan only the initialized data section(s) of object files.
30 -f Print the name of the file before each string.
34 -min-len Print graphic char sequences, MIN-LEN or more bytes long,
35 that are followed by a NUL or a non-displayable character.
39 -t {o,x,d} Print the offset within the file before each string,
42 --include-all-whitespace
43 -w By default tab and space are the only whitepace included in graphic
44 char sequences. This option considers all of isspace() valid.
46 -o Like -to. (Some other implementations have -o like -to,
47 others like -td. We chose one arbitrarily.)
49 --encoding={s,S,b,l,B,L}
51 Select character encoding: 7-bit-character, 8-bit-character,
52 bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
57 Specify a non-default object file format.
59 --unicode={default|locale|invalid|hex|escape|highlight}
61 Determine how to handle UTF-8 unicode characters. The default
62 is no special treatment. All other versions of this option
63 only apply if the encoding is valid and enabling the option
65 The 'locale' option displays the characters according to the
66 current locale. The 'invalid' option treats them as
67 non-string characters. The 'hex' option displays them as hex
68 byte sequences. The 'escape' option displays them as escape
69 sequences and the 'highlight' option displays them as
70 coloured escape sequences.
72 --output-separator=sep_string
73 -s sep_string String used to separate parsed strings in output.
77 -h Print the usage message on the standard output.
81 -v Print the program version number.
83 Written by Richard Stallman <rms@gnu.ai.mit.edu>
84 and David MacKenzie <djm@gnu.ai.mit.edu>. */
89 #include "libiberty.h"
90 #include "safe-ctype.h"
94 #define streq(a,b) (strcmp ((a),(b)) == 0)
97 typedef enum unicode_display_type
105 } unicode_display_type
;
107 static unicode_display_type unicode_display
= unicode_default
;
109 #define STRING_ISGRAPHIC(c) \
112 && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
113 || (include_all_whitespace && ISSPACE (c))) \
120 /* The BFD section flags that identify an initialized data section. */
121 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
123 /* Radix for printing addresses (must be 8, 10 or 16). */
124 static int address_radix
;
126 /* Minimum length of sequence of graphic chars to trigger output. */
127 static unsigned int string_min
;
129 /* Whether or not we include all whitespace as a graphic char. */
130 static bool include_all_whitespace
;
132 /* TRUE means print address within file for each string. */
133 static bool print_addresses
;
135 /* TRUE means print filename for each string. */
136 static bool print_filenames
;
138 /* TRUE means for object files scan only the data section. */
139 static bool datasection_only
;
141 /* The BFD object file format. */
144 /* The character encoding format. */
145 static char encoding
;
146 static int encoding_bytes
;
148 /* Output string used to separate parsed strings */
149 static char *output_separator
;
151 static struct option long_options
[] =
153 {"all", no_argument
, NULL
, 'a'},
154 {"bytes", required_argument
, NULL
, 'n'},
155 {"data", no_argument
, NULL
, 'd'},
156 {"encoding", required_argument
, NULL
, 'e'},
157 {"help", no_argument
, NULL
, 'h'},
158 {"include-all-whitespace", no_argument
, NULL
, 'w'},
159 {"output-separator", required_argument
, NULL
, 's'},
160 {"print-file-name", no_argument
, NULL
, 'f'},
161 {"radix", required_argument
, NULL
, 't'},
162 {"target", required_argument
, NULL
, 'T'},
163 {"unicode", required_argument
, NULL
, 'U'},
164 {"version", no_argument
, NULL
, 'v'},
168 static bool strings_file (char *);
169 static void print_strings (const char *, FILE *, file_ptr
, int, char *);
170 static void usage (FILE *, int) ATTRIBUTE_NORETURN
;
172 int main (int, char **);
175 main (int argc
, char **argv
)
179 bool files_given
= false;
183 setlocale (LC_ALL
, "");
184 bindtextdomain (PACKAGE
, LOCALEDIR
);
185 textdomain (PACKAGE
);
187 program_name
= argv
[0];
188 xmalloc_set_program_name (program_name
);
189 bfd_set_error_program_name (program_name
);
191 expandargv (&argc
, &argv
);
194 include_all_whitespace
= false;
195 print_addresses
= false;
196 print_filenames
= false;
197 if (DEFAULT_STRINGS_ALL
)
198 datasection_only
= false;
200 datasection_only
= true;
203 output_separator
= NULL
;
205 while ((optc
= getopt_long (argc
, argv
, "adfhHn:wot:e:T:s:U:Vv0123456789",
206 long_options
, (int *) 0)) != EOF
)
211 datasection_only
= false;
215 datasection_only
= true;
219 print_filenames
= true;
227 string_min
= (int) strtoul (optarg
, &s
, 0);
228 if (s
!= NULL
&& *s
!= 0)
229 fatal (_("invalid integer argument %s"), optarg
);
233 include_all_whitespace
= true;
237 print_addresses
= true;
242 print_addresses
= true;
243 if (optarg
[1] != '\0')
269 if (optarg
[1] != '\0')
271 encoding
= optarg
[0];
275 output_separator
= optarg
;
279 if (streq (optarg
, "default") || streq (optarg
, "d"))
280 unicode_display
= unicode_default
;
281 else if (streq (optarg
, "locale") || streq (optarg
, "l"))
282 unicode_display
= unicode_locale
;
283 else if (streq (optarg
, "escape") || streq (optarg
, "e"))
284 unicode_display
= unicode_escape
;
285 else if (streq (optarg
, "invalid") || streq (optarg
, "i"))
286 unicode_display
= unicode_invalid
;
287 else if (streq (optarg
, "hex") || streq (optarg
, "x"))
288 unicode_display
= unicode_hex
;
289 else if (streq (optarg
, "highlight") || streq (optarg
, "h"))
290 unicode_display
= unicode_highlight
;
292 fatal (_("invalid argument to -U/--unicode: %s"), optarg
);
297 print_version ("strings");
304 numeric_opt
= optind
;
309 if (unicode_display
!= unicode_default
)
312 if (numeric_opt
!= 0)
314 string_min
= (int) strtoul (argv
[numeric_opt
- 1] + 1, &s
, 0);
315 if (s
!= NULL
&& *s
!= 0)
316 fatal (_("invalid integer argument %s"), argv
[numeric_opt
- 1] + 1);
320 fatal (_("invalid minimum string length %d"), string_min
);
321 /* PR 30595: Look for excessive minimum string lengths.
322 The "(4 * string_min) + 1" is because this is the value
323 used to allocate space in print_unicode_stream(). */
324 else if (string_min
== -1U || ((4 * string_min
) + 1) == 0)
325 fatal (_("minimum string length %#x is too big"), string_min
);
345 if (bfd_init () != BFD_INIT_MAGIC
)
346 fatal (_("fatal error: libbfd ABI mismatch"));
347 set_default_bfd_target ();
351 datasection_only
= false;
352 SET_BINARY (fileno (stdin
));
353 print_strings ("{standard input}", stdin
, 0, 0, (char *) NULL
);
358 for (; optind
< argc
; ++optind
)
360 if (streq (argv
[optind
], "-"))
361 datasection_only
= false;
365 exit_status
|= !strings_file (argv
[optind
]);
373 return (exit_status
);
376 /* Scan section SECT of the file ABFD, whose printable name is
377 FILENAME. If it contains initialized data set GOT_A_SECTION and
378 print the strings in it. */
381 strings_a_section (bfd
*abfd
, asection
*sect
, const char *filename
,
384 bfd_size_type sectsize
;
387 if ((sect
->flags
& DATA_FLAGS
) != DATA_FLAGS
)
390 sectsize
= bfd_section_size (sect
);
394 if (!bfd_malloc_and_get_section (abfd
, sect
, &mem
))
396 non_fatal (_("%s: Reading section %s failed: %s"),
397 filename
, sect
->name
, bfd_errmsg (bfd_get_error ()));
401 *got_a_section
= true;
402 print_strings (filename
, NULL
, sect
->filepos
, sectsize
, (char *) mem
);
406 /* Scan all of the sections in FILE, and print the strings
407 in the initialized data section(s).
409 Return TRUE if successful,
410 FALSE if not (such as if FILE is not an object file). */
413 strings_object_file (const char *file
)
419 abfd
= bfd_openr (file
, target
);
422 /* Treat the file as a non-object file. */
425 /* This call is mainly for its side effect of reading in the sections.
426 We follow the traditional behavior of `strings' in that we don't
427 complain if we don't recognize a file to be an object file. */
428 if (!bfd_check_format (abfd
, bfd_object
))
434 got_a_section
= false;
435 for (s
= abfd
->sections
; s
!= NULL
; s
= s
->next
)
436 strings_a_section (abfd
, s
, file
, &got_a_section
);
438 if (!bfd_close (abfd
))
444 return got_a_section
;
447 /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
450 strings_file (char *file
)
454 /* get_file_size does not support non-S_ISREG files. */
456 if (stat (file
, &st
) < 0)
459 non_fatal (_("'%s': No such file"), file
);
461 non_fatal (_("Warning: could not locate '%s'. reason: %s"),
462 file
, strerror (errno
));
465 else if (S_ISDIR (st
.st_mode
))
467 non_fatal (_("Warning: '%s' is a directory"), file
);
471 /* If we weren't told to scan the whole file,
472 try to open it as an object file and only look at
473 initialized data sections. If that fails, fall back to the
475 if (!datasection_only
|| !strings_object_file (file
))
479 stream
= fopen (file
, FOPEN_RB
);
482 fprintf (stderr
, "%s: ", program_name
);
487 print_strings (file
, stream
, (file_ptr
) 0, 0, (char *) NULL
);
489 if (fclose (stream
) == EOF
)
491 fprintf (stderr
, "%s: ", program_name
);
500 /* Read the next character, return EOF if none available.
501 Assume that STREAM is positioned so that the next byte read
502 is at address ADDRESS in the file.
504 If STREAM is NULL, do not read from it.
505 The caller can supply a buffer of characters
506 to be processed before the data in STREAM.
507 MAGIC is the address of the buffer and
508 MAGICCOUNT is how many characters are in it. */
511 get_char (FILE *stream
, file_ptr
*address
, int *magiccount
, char **magic
)
516 for (i
= 0; i
< encoding_bytes
; i
++)
528 /* Only use getc_unlocked if we found a declaration for it.
529 Otherwise, libc is not thread safe by default, and we
530 should not use it. */
532 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
533 c
= getc_unlocked (stream
);
542 r
= (r
<< 8) | (c
& 0xff);
550 r
= ((r
& 0xff) << 8) | ((r
& 0xff00) >> 8);
553 r
= (((r
& 0xff) << 24) | ((r
& 0xff00) << 8)
554 | ((r
& 0xff0000) >> 8) | ((r
& 0xff000000) >> 24));
561 /* Throw away one byte of a (possibly) multi-byte char C, updating
562 address and buffer to suit. */
565 unget_part_char (long c
, file_ptr
*address
, int *magiccount
, char **magic
)
569 if (encoding_bytes
> 1)
571 *address
-= encoding_bytes
- 1;
573 if (*magiccount
== 0)
575 /* If no magic buffer exists, use temp buffer. */
585 tmp
[0] = (c
>> 8) & 0xff;
589 tmp
[0] = (c
>> 16) & 0xff;
590 tmp
[1] = (c
>> 8) & 0xff;
595 tmp
[0] = (c
>> 8) & 0xff;
596 tmp
[1] = (c
>> 16) & 0xff;
597 tmp
[2] = (c
>> 24) & 0xff;
605 /* If magic buffer exists, rewind. */
606 *magic
-= encoding_bytes
- 1;
607 *magiccount
+= encoding_bytes
- 1;
613 print_filename_and_address (const char * filename
, file_ptr address
)
616 printf ("%s: ", filename
);
618 if (! print_addresses
)
621 switch (address_radix
)
624 if (sizeof (address
) > sizeof (long))
627 printf ("%7llo ", (unsigned long long) address
);
629 printf ("%7I64o ", (unsigned long long) address
);
633 printf ("%7lo ", (unsigned long) address
);
637 if (sizeof (address
) > sizeof (long))
640 printf ("%7llu ", (unsigned long long) address
);
642 printf ("%7I64d ", (unsigned long long) address
);
646 printf ("%7ld ", (long) address
);
650 if (sizeof (address
) > sizeof (long))
653 printf ("%7llx ", (unsigned long long) address
);
655 printf ("%7I64x ", (unsigned long long) address
);
659 printf ("%7lx ", (unsigned long) address
);
664 /* Return non-zero if the bytes starting at BUFFER form a valid UTF-8 encoding.
665 If the encoding is valid then returns the number of bytes it uses. */
668 is_valid_utf8 (const unsigned char * buffer
, unsigned long buflen
)
670 if (buffer
[0] < 0xc0)
676 if ((buffer
[1] & 0xc0) != 0x80)
679 if ((buffer
[0] & 0x20) == 0)
685 if ((buffer
[2] & 0xc0) != 0x80)
688 if ((buffer
[0] & 0x10) == 0)
694 if ((buffer
[3] & 0xc0) != 0x80)
700 /* Display a UTF-8 encoded character in BUFFER according to the setting
701 of unicode_display. The character is known to be valid.
702 Returns the number of bytes consumed. */
705 display_utf8_char (const unsigned char * buffer
)
708 unsigned int utf8_len
;
710 switch (buffer
[0] & 0x30)
723 switch (unicode_display
)
726 fprintf (stderr
, "ICE: unexpected unicode display type\n");
730 case unicode_highlight
:
731 if (unicode_display
== unicode_highlight
&& isatty (1))
732 printf ("\x1B[31;47m"); /* Red. */
737 printf ("\\u%02x%02x",
738 ((buffer
[0] & 0x1c) >> 2),
739 ((buffer
[0] & 0x03) << 6) | (buffer
[1] & 0x3f));
743 printf ("\\u%02x%02x",
744 ((buffer
[0] & 0x0f) << 4) | ((buffer
[1] & 0x3c) >> 2),
745 ((buffer
[1] & 0x03) << 6) | ((buffer
[2] & 0x3f)));
749 printf ("\\u%02x%02x%02x",
750 ((buffer
[0] & 0x07) << 6) | ((buffer
[1] & 0x3c) >> 2),
751 ((buffer
[1] & 0x03) << 6) | ((buffer
[2] & 0x3c) >> 2),
752 ((buffer
[2] & 0x03) << 6) | ((buffer
[3] & 0x3f)));
759 if (unicode_display
== unicode_highlight
&& isatty (1))
760 printf ("\033[0m"); /* Default colour. */
766 for (j
= 0; j
< utf8_len
; j
++)
767 printf ("%02x", buffer
[j
]);
772 printf ("%.1s", buffer
);
779 /* Display strings in BUFFER. Treat any UTF-8 encoded characters encountered
780 according to the setting of the unicode_display variable. The buffer
781 contains BUFLEN bytes.
783 Display the characters as if they started at ADDRESS and are contained in
787 print_unicode_buffer (const char * filename
,
789 const unsigned char * buffer
,
790 unsigned long buflen
)
792 /* Paranoia checks... */
795 || unicode_display
== unicode_default
797 || encoding_bytes
!= 1)
799 fprintf (stderr
, "ICE: bad arguments to print_unicode_buffer\n");
806 /* We must only display strings that are at least string_min *characters*
807 long. So we scan the buffer in two stages. First we locate the start
808 of a potential string. Then we walk along it until we have found
809 string_min characters. Then we go back to the start point and start
810 displaying characters according to the unicode_display setting. */
812 unsigned long start_point
= 0;
814 unsigned int char_len
= 1;
815 unsigned int num_found
= 0;
817 for (i
= 0; i
< buflen
; i
+= char_len
)
823 /* Find the first potential character of a string. */
824 if (! STRING_ISGRAPHIC (c
))
838 if ((char_len
= is_valid_utf8 (buffer
+ i
, buflen
- i
)) == 0)
845 if (unicode_display
== unicode_invalid
)
847 /* We have found a valid UTF-8 character, but we treat it as non-graphic. */
854 /* We have found a potential starting point for a string. */
859 if (num_found
>= string_min
)
863 if (num_found
< string_min
)
866 print_filename_and_address (filename
, address
+ start_point
);
868 /* We have found string_min characters. Display them and any
870 for (i
= start_point
; i
< buflen
; i
+= char_len
)
876 if (! STRING_ISGRAPHIC (c
))
880 else if (! is_valid_utf8 (buffer
+ i
, buflen
- i
))
882 else if (unicode_display
== unicode_invalid
)
885 char_len
= display_utf8_char (buffer
+ i
);
888 if (output_separator
)
889 fputs (output_separator
, stdout
);
893 /* FIXME: Using tail recursion here is lazy programming... */
894 print_unicode_buffer (filename
, address
+ i
, buffer
+ i
, buflen
- i
);
898 get_unicode_byte (FILE * stream
,
899 unsigned char * putback
,
900 unsigned int * num_putback
,
901 unsigned int * num_read
)
903 if (* num_putback
> 0)
905 * num_putback
= * num_putback
- 1;
906 return putback
[* num_putback
];
909 * num_read
= * num_read
+ 1;
911 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
912 return getc_unlocked (stream
);
914 return getc (stream
);
918 /* Helper function for print_unicode_stream. */
921 print_unicode_stream_body (const char * filename
,
924 unsigned char * putback_buf
,
925 unsigned int num_putback
,
926 unsigned char * print_buf
)
928 /* It would be nice if we could just read the stream into a buffer
929 and then process if with print_unicode_buffer. But the input
930 might be huge or it might time-locked (eg stdin). So instead
931 we go one byte at a time... */
933 file_ptr start_point
= 0;
934 unsigned int num_read
= 0;
935 unsigned int num_chars
= 0;
936 unsigned int num_print
= 0;
939 /* Find a series of string_min characters. Put them into print_buf. */
942 if (num_chars
>= string_min
)
945 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
949 if (! STRING_ISGRAPHIC (c
))
951 num_chars
= num_print
= 0;
956 start_point
= num_read
- 1;
960 print_buf
[num_print
] = c
;
968 num_chars
= num_print
= 0;
972 /* We *might* have a UTF-8 sequence. Time to start peeking. */
976 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
981 if ((utf8
[1] & 0xc0) != 0x80)
984 putback_buf
[num_putback
++] = utf8
[1];
985 num_chars
= num_print
= 0;
988 else if ((utf8
[0] & 0x20) == 0)
990 /* A valid 2-byte UTF-8 encoding. */
991 if (unicode_display
== unicode_invalid
)
993 putback_buf
[num_putback
++] = utf8
[1];
994 num_chars
= num_print
= 0;
998 print_buf
[num_print
++] = utf8
[0];
999 print_buf
[num_print
++] = utf8
[1];
1005 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1010 if ((utf8
[2] & 0xc0) != 0x80)
1012 /* Invalid UTF-8. */
1013 putback_buf
[num_putback
++] = utf8
[2];
1014 putback_buf
[num_putback
++] = utf8
[1];
1015 num_chars
= num_print
= 0;
1018 else if ((utf8
[0] & 0x10) == 0)
1020 /* A valid 3-byte UTF-8 encoding. */
1021 if (unicode_display
== unicode_invalid
)
1023 putback_buf
[num_putback
++] = utf8
[2];
1024 putback_buf
[num_putback
++] = utf8
[1];
1025 num_chars
= num_print
= 0;
1029 print_buf
[num_print
++] = utf8
[0];
1030 print_buf
[num_print
++] = utf8
[1];
1031 print_buf
[num_print
++] = utf8
[2];
1037 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1042 if ((utf8
[3] & 0xc0) != 0x80)
1044 /* Invalid UTF-8. */
1045 putback_buf
[num_putback
++] = utf8
[3];
1046 putback_buf
[num_putback
++] = utf8
[2];
1047 putback_buf
[num_putback
++] = utf8
[1];
1048 num_chars
= num_print
= 0;
1050 /* We have a valid 4-byte UTF-8 encoding. */
1051 else if (unicode_display
== unicode_invalid
)
1053 putback_buf
[num_putback
++] = utf8
[3];
1054 putback_buf
[num_putback
++] = utf8
[1];
1055 putback_buf
[num_putback
++] = utf8
[2];
1056 num_chars
= num_print
= 0;
1060 print_buf
[num_print
++] = utf8
[0];
1061 print_buf
[num_print
++] = utf8
[1];
1062 print_buf
[num_print
++] = utf8
[2];
1063 print_buf
[num_print
++] = utf8
[3];
1069 if (num_chars
>= string_min
)
1071 /* We know that we have string_min valid characters in print_buf,
1072 and there may be more to come in the stream. Start displaying
1075 print_filename_and_address (filename
, address
+ start_point
);
1078 for (i
= 0; i
< num_print
;)
1080 if (print_buf
[i
] < 127)
1081 putchar (print_buf
[i
++]);
1083 i
+= display_utf8_char (print_buf
+ i
);
1086 /* OK so now we have to start read unchecked bytes. */
1088 /* Find a series of string_min characters. Put them into print_buf. */
1091 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1095 if (! STRING_ISGRAPHIC (c
))
1107 /* We *might* have a UTF-8 sequence. Time to start peeking. */
1108 unsigned char utf8
[4];
1111 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1116 if ((utf8
[1] & 0xc0) != 0x80)
1118 /* Invalid UTF-8. */
1119 putback_buf
[num_putback
++] = utf8
[1];
1122 else if ((utf8
[0] & 0x20) == 0)
1124 /* Valid 2-byte UTF-8. */
1125 if (unicode_display
== unicode_invalid
)
1127 putback_buf
[num_putback
++] = utf8
[1];
1132 (void) display_utf8_char (utf8
);
1137 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1142 if ((utf8
[2] & 0xc0) != 0x80)
1144 /* Invalid UTF-8. */
1145 putback_buf
[num_putback
++] = utf8
[2];
1146 putback_buf
[num_putback
++] = utf8
[1];
1149 else if ((utf8
[0] & 0x10) == 0)
1151 /* Valid 3-byte UTF-8. */
1152 if (unicode_display
== unicode_invalid
)
1154 putback_buf
[num_putback
++] = utf8
[2];
1155 putback_buf
[num_putback
++] = utf8
[1];
1160 (void) display_utf8_char (utf8
);
1165 c
= get_unicode_byte (stream
, putback_buf
, & num_putback
, & num_read
);
1170 if ((utf8
[3] & 0xc0) != 0x80)
1172 /* Invalid UTF-8. */
1173 putback_buf
[num_putback
++] = utf8
[3];
1174 putback_buf
[num_putback
++] = utf8
[2];
1175 putback_buf
[num_putback
++] = utf8
[1];
1178 else if (unicode_display
== unicode_invalid
)
1180 putback_buf
[num_putback
++] = utf8
[3];
1181 putback_buf
[num_putback
++] = utf8
[2];
1182 putback_buf
[num_putback
++] = utf8
[1];
1186 /* A valid 4-byte UTF-8 encoding. */
1187 (void) display_utf8_char (utf8
);
1191 if (output_separator
)
1192 fputs (output_separator
, stdout
);
1198 /* FIXME: Using tail recursion here is lazy, but it works. */
1199 print_unicode_stream_body (filename
, address
+ num_read
, stream
, putback_buf
, num_putback
, print_buf
);
1202 /* Display strings read in from STREAM. Treat any UTF-8 encoded characters
1203 encountered according to the setting of the unicode_display variable.
1204 The stream is positioned at ADDRESS and is attached to FILENAME. */
1207 print_unicode_stream (const char * filename
,
1211 /* Paranoia checks... */
1212 if (filename
== NULL
1214 || unicode_display
== unicode_default
1216 || encoding_bytes
!= 1)
1218 fprintf (stderr
, "ICE: bad arguments to print_unicode_stream\n");
1222 /* Allocate space for string_min 4-byte utf-8 characters. */
1223 unsigned char * print_buf
= xmalloc ((4 * string_min
) + 1);
1224 /* We should never have to put back more than 4 bytes. */
1225 unsigned char putback_buf
[5];
1226 unsigned int num_putback
= 0;
1228 print_unicode_stream_body (filename
, address
, stream
, putback_buf
, num_putback
, print_buf
);
1232 /* Find the strings in file FILENAME, read from STREAM.
1233 Assume that STREAM is positioned so that the next byte read
1234 is at address ADDRESS in the file.
1236 If STREAM is NULL, do not read from it.
1237 The caller can supply a buffer of characters
1238 to be processed before the data in STREAM.
1239 MAGIC is the address of the buffer and
1240 MAGICCOUNT is how many characters are in it.
1241 Those characters come at address ADDRESS and the data in STREAM follow. */
1244 print_strings (const char *filename
, FILE *stream
, file_ptr address
,
1245 int magiccount
, char *magic
)
1247 if (unicode_display
!= unicode_default
)
1250 print_unicode_buffer (filename
, address
,
1251 (const unsigned char *) magic
, magiccount
);
1254 print_unicode_stream (filename
, address
, stream
);
1258 char *buf
= (char *) xmalloc (sizeof (char) * (string_min
+ 1));
1266 /* See if the next `string_min' chars are all graphic chars. */
1269 for (i
= 0; i
< string_min
; i
++)
1271 c
= get_char (stream
, &address
, &magiccount
, &magic
);
1278 if (! STRING_ISGRAPHIC (c
))
1280 /* Found a non-graphic. Try again starting with next byte. */
1281 unget_part_char (c
, &address
, &magiccount
, &magic
);
1287 /* We found a run of `string_min' graphic characters. Print up
1288 to the next non-graphic character. */
1289 print_filename_and_address (filename
, start
);
1292 fputs (buf
, stdout
);
1296 c
= get_char (stream
, &address
, &magiccount
, &magic
);
1299 if (! STRING_ISGRAPHIC (c
))
1301 unget_part_char (c
, &address
, &magiccount
, &magic
);
1307 if (output_separator
)
1308 fputs (output_separator
, stdout
);
1316 usage (FILE *stream
, int status
)
1318 fprintf (stream
, _("Usage: %s [option(s)] [file(s)]\n"), program_name
);
1319 fprintf (stream
, _(" Display printable strings in [file(s)] (stdin by default)\n"));
1320 fprintf (stream
, _(" The options are:\n"));
1322 if (DEFAULT_STRINGS_ALL
)
1323 fprintf (stream
, _("\
1324 -a - --all Scan the entire file, not just the data section [default]\n\
1325 -d --data Only scan the data sections in the file\n"));
1327 fprintf (stream
, _("\
1328 -a - --all Scan the entire file, not just the data section\n\
1329 -d --data Only scan the data sections in the file [default]\n"));
1331 fprintf (stream
, _("\
1332 -f --print-file-name Print the name of the file before each string\n\
1333 -n <number> Locate & print any sequence of at least <number>\n\
1334 --bytes=<number> displayable characters. (The default is 4).\n\
1335 -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
1336 -w --include-all-whitespace Include all whitespace as valid string characters\n\
1337 -o An alias for --radix=o\n\
1338 -T --target=<BFDNAME> Specify the binary file format\n\
1339 -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
1340 s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
1341 --unicode={default|show|invalid|hex|escape|highlight}\n\
1342 -U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\
1343 -s --output-separator=<string> String used to separate strings in output.\n\
1344 @<file> Read options from <file>\n\
1345 -h --help Display this information\n\
1346 -v -V --version Print the program's version number\n"));
1347 list_supported_targets (program_name
, stream
);
1348 if (REPORT_BUGS_TO
[0] && status
== 0)
1349 fprintf (stream
, _("Report bugs to %s\n"), REPORT_BUGS_TO
);