]>
| Commit | Line | Data |
|---|---|---|
| ccc6cda3 JA |
1 | /* pathexp.c -- The shell interface to the globbing library. */ |
| 2 | ||
| b8c60bc9 | 3 | /* Copyright (C) 1995-2024 Free Software Foundation, Inc. |
| ccc6cda3 JA |
4 | |
| 5 | This file is part of GNU Bash, the Bourne Again SHell. | |
| 6 | ||
| 3185942a JA |
7 | Bash is free software: you can redistribute it and/or modify |
| 8 | it under the terms of the GNU General Public License as published by | |
| 9 | the Free Software Foundation, either version 3 of the License, or | |
| 10 | (at your option) any later version. | |
| ccc6cda3 | 11 | |
| 3185942a JA |
12 | Bash is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 15 | GNU General Public License for more details. | |
| ccc6cda3 | 16 | |
| 3185942a JA |
17 | You should have received a copy of the GNU General Public License |
| 18 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
| 19 | */ | |
| ccc6cda3 JA |
20 | |
| 21 | #include "config.h" | |
| 22 | ||
| 23 | #include "bashtypes.h" | |
| 24 | #include <stdio.h> | |
| 25 | ||
| 26 | #if defined (HAVE_UNISTD_H) | |
| 27 | # include <unistd.h> | |
| 28 | #endif | |
| 29 | ||
| b8c60bc9 CR |
30 | #include "posixstat.h" |
| 31 | #include "stat-time.h" | |
| 32 | ||
| ccc6cda3 JA |
33 | #include "bashansi.h" |
| 34 | ||
| 35 | #include "shell.h" | |
| 36 | #include "pathexp.h" | |
| 37 | #include "flags.h" | |
| 38 | ||
| 7117c2d2 | 39 | #include "shmbutil.h" |
| 3185942a | 40 | #include "bashintl.h" |
| 7117c2d2 | 41 | |
| b8c60bc9 | 42 | |
| f73dda09 | 43 | #include <glob/strmatch.h> |
| b72432fd | 44 | |
| b8c60bc9 CR |
45 | static int glob_name_is_acceptable (const char *); |
| 46 | static void ignore_globbed_names (char **, sh_ignore_func_t *); | |
| 47 | static char *split_ignorespec (char *, int *); | |
| 48 | static void sh_sortglob (char **); | |
| ac50fbac | 49 | |
| 74091dd4 | 50 | #include <glob/glob.h> |
| ccc6cda3 JA |
51 | |
| 52 | /* Control whether * matches .files in globbing. */ | |
| 53 | int glob_dot_filenames; | |
| 54 | ||
| cce855bc | 55 | /* Control whether the extended globbing features are enabled. */ |
| 0001803f | 56 | int extended_glob = EXTGLOB_DEFAULT; |
| cce855bc | 57 | |
| 3185942a JA |
58 | /* Control enabling special handling of `**' */ |
| 59 | int glob_star = 0; | |
| 60 | ||
| 8868edaf CR |
61 | /* Return nonzero if STRING has any unquoted special globbing chars in it. |
| 62 | This is supposed to be called when pathname expansion is performed, so | |
| 63 | it implements the rules in Posix 2.13.3, specifically that an unquoted | |
| 64 | slash cannot appear in a bracket expression. */ | |
| ccc6cda3 | 65 | int |
| b8c60bc9 | 66 | unquoted_glob_pattern_p (char *string) |
| ccc6cda3 JA |
67 | { |
| 68 | register int c; | |
| 7117c2d2 | 69 | char *send; |
| b8c60bc9 | 70 | int open; |
| ccc6cda3 | 71 | |
| 7117c2d2 JA |
72 | DECLARE_MBSTATE; |
| 73 | ||
| b8c60bc9 | 74 | open = 0; |
| 7117c2d2 JA |
75 | send = string + strlen (string); |
| 76 | ||
| ccc6cda3 JA |
77 | while (c = *string++) |
| 78 | { | |
| 79 | switch (c) | |
| 80 | { | |
| 81 | case '?': | |
| 82 | case '*': | |
| 83 | return (1); | |
| 84 | ||
| 85 | case '[': | |
| 86 | open++; | |
| 87 | continue; | |
| 88 | ||
| 89 | case ']': | |
| 8868edaf | 90 | if (open) /* XXX - if --open == 0? */ |
| ccc6cda3 JA |
91 | return (1); |
| 92 | continue; | |
| 93 | ||
| 8868edaf CR |
94 | case '/': |
| 95 | if (open) | |
| 96 | open = 0; | |
| b8c60bc9 | 97 | continue; |
| 8868edaf | 98 | |
| cce855bc JA |
99 | case '+': |
| 100 | case '@': | |
| 101 | case '!': | |
| b8c60bc9 | 102 | if (extended_glob && *string == '(') /*)*/ |
| cce855bc JA |
103 | return (1); |
| 104 | continue; | |
| 105 | ||
| ccc6cda3 | 106 | case '\\': |
| b8c60bc9 | 107 | if (*string == CTLESC) |
| fcf6ae7d | 108 | { |
| fcf6ae7d | 109 | string++; |
| b8c60bc9 CR |
110 | /* If the CTLESC was quoting a CTLESC, skip it so that it's not |
| 111 | treated as a quoting character */ | |
| 112 | if (*string == CTLESC) | |
| 113 | string++; | |
| 8868edaf | 114 | } |
| b8c60bc9 CR |
115 | else |
| 116 | /*FALLTHROUGH*/ | |
| 117 | case CTLESC: | |
| ccc6cda3 JA |
118 | if (*string++ == '\0') |
| 119 | return (0); | |
| 120 | } | |
| 7117c2d2 JA |
121 | |
| 122 | /* Advance one fewer byte than an entire multibyte character to | |
| 123 | account for the auto-increment in the loop above. */ | |
| 124 | #ifdef HANDLE_MULTIBYTE | |
| 125 | string--; | |
| 126 | ADVANCE_CHAR_P (string, send - string); | |
| 127 | string++; | |
| 128 | #else | |
| 129 | ADVANCE_CHAR_P (string, send - string); | |
| 130 | #endif | |
| ccc6cda3 | 131 | } |
| fcf6ae7d | 132 | |
| 8868edaf | 133 | return (0); |
| ccc6cda3 JA |
134 | } |
| 135 | ||
| f1be666c JA |
136 | /* Return 1 if C is a character that is `special' in a POSIX ERE and needs to |
| 137 | be quoted to match itself. */ | |
| 138 | static inline int | |
| b8c60bc9 | 139 | ere_char (int c) |
| f1be666c JA |
140 | { |
| 141 | switch (c) | |
| 142 | { | |
| 143 | case '.': | |
| 144 | case '[': | |
| 145 | case '\\': | |
| 146 | case '(': | |
| 147 | case ')': | |
| 148 | case '*': | |
| 149 | case '+': | |
| 150 | case '?': | |
| 151 | case '{': | |
| 152 | case '|': | |
| 153 | case '^': | |
| 154 | case '$': | |
| 155 | return 1; | |
| 156 | default: | |
| 157 | return 0; | |
| 158 | } | |
| 159 | return (0); | |
| 160 | } | |
| 161 | ||
| 74091dd4 | 162 | /* This is only used to determine whether to backslash-quote a character. */ |
| 3185942a | 163 | int |
| b8c60bc9 | 164 | glob_char_p (const char *s) |
| 3185942a JA |
165 | { |
| 166 | switch (*s) | |
| 167 | { | |
| b8c60bc9 CR |
168 | #if defined (EXTENDED_GLOB) |
| 169 | case '+': | |
| 170 | case '@': | |
| 171 | return (s[1] == '('); /*)*/ | |
| 172 | case '(': | |
| 173 | case '|': | |
| 174 | case ')': | |
| 175 | #endif | |
| 176 | case '!': | |
| 177 | case '^': | |
| 178 | case '-': | |
| 179 | case '.': | |
| 180 | case ':': | |
| 181 | case '=': | |
| 3185942a JA |
182 | case '*': |
| 183 | case '[': | |
| 184 | case ']': | |
| 185 | case '?': | |
| 186 | case '\\': | |
| 187 | return 1; | |
| 3185942a JA |
188 | } |
| 189 | return 0; | |
| 190 | } | |
| 191 | ||
| b8c60bc9 CR |
192 | static inline int |
| 193 | glob_quote_char (const char *s) | |
| 194 | { | |
| 195 | return (glob_char_p (s) || (*s == '%') || (*s == '#')); | |
| 196 | } | |
| 197 | ||
| ccc6cda3 JA |
198 | /* PATHNAME can contain characters prefixed by CTLESC; this indicates |
| 199 | that the character is to be quoted. We quote it here in the style | |
| cce855bc | 200 | that the glob library recognizes. If flags includes QGLOB_CVTNULL, |
| ccc6cda3 JA |
201 | we change quoted null strings (pathname[0] == CTLNUL) into empty |
| 202 | strings (pathname[0] == 0). If this is called after quote removal | |
| cce855bc | 203 | is performed, (flags & QGLOB_CVTNULL) should be 0; if called when quote |
| ccc6cda3 | 204 | removal has not been done (for example, before attempting to match a |
| cce855bc | 205 | pattern while executing a case statement), flags should include |
| d233b485 CR |
206 | QGLOB_CVTNULL. If flags includes QGLOB_CTLESC, we need to remove CTLESC |
| 207 | quoting CTLESC or CTLNUL (as if dequote_string were called). If flags | |
| 208 | includes QGLOB_FILENAME, appropriate quoting to match a filename should be | |
| 209 | performed. QGLOB_REGEXP means we're quoting for a Posix ERE (for | |
| 210 | [[ string =~ pat ]]) and that requires some special handling. */ | |
| ccc6cda3 | 211 | char * |
| b8c60bc9 | 212 | quote_string_for_globbing (const char *pathname, int qflags) |
| ccc6cda3 JA |
213 | { |
| 214 | char *temp; | |
| cce855bc | 215 | register int i, j; |
| d233b485 | 216 | int cclass, collsym, equiv, c, last_was_backslash; |
| a0c0a00f | 217 | int savei, savej; |
| b8c60bc9 | 218 | unsigned char cc; |
| ccc6cda3 | 219 | |
| ac50fbac | 220 | temp = (char *)xmalloc (2 * strlen (pathname) + 1); |
| ccc6cda3 | 221 | |
| cce855bc | 222 | if ((qflags & QGLOB_CVTNULL) && QUOTED_NULL (pathname)) |
| ccc6cda3 JA |
223 | { |
| 224 | temp[0] = '\0'; | |
| 225 | return temp; | |
| 226 | } | |
| 227 | ||
| d233b485 | 228 | cclass = collsym = equiv = last_was_backslash = 0; |
| cce855bc | 229 | for (i = j = 0; pathname[i]; i++) |
| ccc6cda3 | 230 | { |
| ac50fbac CR |
231 | /* Fix for CTLESC at the end of the string? */ |
| 232 | if (pathname[i] == CTLESC && pathname[i+1] == '\0') | |
| 233 | { | |
| 234 | temp[j++] = pathname[i++]; | |
| 235 | break; | |
| 236 | } | |
| 237 | /* If we are parsing regexp, turn CTLESC CTLESC into CTLESC. It's not an | |
| 238 | ERE special character, so we should just be able to pass it through. */ | |
| d233b485 | 239 | else if ((qflags & (QGLOB_REGEXP|QGLOB_CTLESC)) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) |
| ac50fbac CR |
240 | { |
| 241 | i++; | |
| 242 | temp[j++] = pathname[i]; | |
| 243 | continue; | |
| 244 | } | |
| 245 | else if (pathname[i] == CTLESC) | |
| 28ef6c31 | 246 | { |
| 8868edaf | 247 | convert_to_backslash: |
| b8c60bc9 CR |
248 | cc = pathname[i+1]; |
| 249 | ||
| 28ef6c31 JA |
250 | if ((qflags & QGLOB_FILENAME) && pathname[i+1] == '/') |
| 251 | continue; | |
| b8c60bc9 | 252 | |
| ac50fbac | 253 | /* What to do if preceding char is backslash? */ |
| b8c60bc9 CR |
254 | |
| 255 | /* We don't have to backslash-quote non-special ERE characters if | |
| 256 | we're quoting a regexp. */ | |
| 257 | if (cc != CTLESC && (qflags & QGLOB_REGEXP) && ere_char (cc) == 0) | |
| 258 | continue; | |
| 259 | ||
| 260 | /* We don't have to backslash-quote non-special BRE characters if | |
| 261 | we're quoting a glob pattern. */ | |
| 262 | if (cc != CTLESC && (qflags & QGLOB_REGEXP) == 0 && glob_quote_char (pathname+i+1) == 0) | |
| f1be666c | 263 | continue; |
| b8c60bc9 CR |
264 | |
| 265 | /* If we're in a multibyte locale, don't bother quoting multibyte | |
| 266 | characters. It matters if we're going to convert NFD to NFC on | |
| 267 | macOS, and doesn't make a difference on other systems. */ | |
| 268 | if (cc != CTLESC && locale_utf8locale && UTF8_SINGLEBYTE (cc) == 0) | |
| 269 | continue; /* probably don't need to check for UTF-8 locale */ | |
| 270 | ||
| cce855bc | 271 | temp[j++] = '\\'; |
| 7117c2d2 JA |
272 | i++; |
| 273 | if (pathname[i] == '\0') | |
| 274 | break; | |
| 28ef6c31 | 275 | } |
| ac50fbac CR |
276 | else if ((qflags & QGLOB_REGEXP) && (i == 0 || pathname[i-1] != CTLESC) && pathname[i] == '[') /*]*/ |
| 277 | { | |
| ac50fbac | 278 | temp[j++] = pathname[i++]; /* open bracket */ |
| a0c0a00f CR |
279 | savej = j; |
| 280 | savei = i; | |
| ac50fbac | 281 | c = pathname[i++]; /* c == char after open bracket */ |
| d233b485 CR |
282 | if (c == '^') /* ignore pattern negation */ |
| 283 | { | |
| 284 | temp[j++] = c; | |
| 285 | c = pathname[i++]; | |
| 286 | } | |
| 287 | if (c == ']') /* ignore right bracket if first char */ | |
| 288 | { | |
| 289 | temp[j++] = c; | |
| 290 | c = pathname[i++]; | |
| 291 | } | |
| ac50fbac CR |
292 | do |
| 293 | { | |
| 294 | if (c == 0) | |
| 295 | goto endpat; | |
| 296 | else if (c == CTLESC) | |
| 297 | { | |
| 298 | /* skip c, check for EOS, let assignment at end of loop */ | |
| 299 | /* pathname[i] == backslash-escaped character */ | |
| 300 | if (pathname[i] == 0) | |
| 301 | goto endpat; | |
| 302 | temp[j++] = pathname[i++]; | |
| 303 | } | |
| 304 | else if (c == '[' && pathname[i] == ':') | |
| 305 | { | |
| 306 | temp[j++] = c; | |
| 307 | temp[j++] = pathname[i++]; | |
| 308 | cclass = 1; | |
| 309 | } | |
| 310 | else if (cclass && c == ':' && pathname[i] == ']') | |
| 311 | { | |
| 312 | temp[j++] = c; | |
| 313 | temp[j++] = pathname[i++]; | |
| 314 | cclass = 0; | |
| 315 | } | |
| 316 | else if (c == '[' && pathname[i] == '=') | |
| 317 | { | |
| 318 | temp[j++] = c; | |
| 319 | temp[j++] = pathname[i++]; | |
| 320 | if (pathname[i] == ']') | |
| 321 | temp[j++] = pathname[i++]; /* right brack can be in equiv */ | |
| 322 | equiv = 1; | |
| 323 | } | |
| 324 | else if (equiv && c == '=' && pathname[i] == ']') | |
| 325 | { | |
| 326 | temp[j++] = c; | |
| 327 | temp[j++] = pathname[i++]; | |
| 328 | equiv = 0; | |
| 329 | } | |
| 330 | else if (c == '[' && pathname[i] == '.') | |
| 331 | { | |
| 332 | temp[j++] = c; | |
| 333 | temp[j++] = pathname[i++]; | |
| 334 | if (pathname[i] == ']') | |
| 335 | temp[j++] = pathname[i++]; /* right brack can be in collsym */ | |
| 336 | collsym = 1; | |
| 337 | } | |
| 338 | else if (collsym && c == '.' && pathname[i] == ']') | |
| 339 | { | |
| 340 | temp[j++] = c; | |
| 341 | temp[j++] = pathname[i++]; | |
| 342 | collsym = 0; | |
| 343 | } | |
| 344 | else | |
| 345 | temp[j++] = c; | |
| 346 | } | |
| a0c0a00f CR |
347 | while (((c = pathname[i++]) != ']') && c != 0); |
| 348 | ||
| 349 | /* If we don't find the closing bracket before we hit the end of | |
| 350 | the string, rescan string without treating it as a bracket | |
| 351 | expression (has implications for backslash and special ERE | |
| 352 | chars) */ | |
| 353 | if (c == 0) | |
| 354 | { | |
| 355 | i = savei - 1; /* -1 for autoincrement above */ | |
| 356 | j = savej; | |
| 357 | continue; | |
| 358 | } | |
| 359 | ||
| ac50fbac CR |
360 | temp[j++] = c; /* closing right bracket */ |
| 361 | i--; /* increment will happen above in loop */ | |
| 362 | continue; /* skip double assignment below */ | |
| 363 | } | |
| 364 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP) == 0) | |
| 3185942a | 365 | { |
| ac50fbac | 366 | /* XXX - if not quoting regexp, use backslash as quote char. Should |
| 8868edaf | 367 | We just pass it through without treating it as special? That is |
| ac50fbac CR |
368 | what ksh93 seems to do. */ |
| 369 | ||
| 370 | /* If we want to pass through backslash unaltered, comment out these | |
| 371 | lines. */ | |
| 3185942a | 372 | temp[j++] = '\\'; |
| ac50fbac | 373 | |
| 3185942a JA |
374 | i++; |
| 375 | if (pathname[i] == '\0') | |
| 376 | break; | |
| d233b485 CR |
377 | /* If we are turning CTLESC CTLESC into CTLESC, we need to do that |
| 378 | even when the first CTLESC is preceded by a backslash. */ | |
| 379 | if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC && (pathname[i+1] == CTLESC || pathname[i+1] == CTLNUL)) | |
| 380 | i++; /* skip over the CTLESC */ | |
| 8868edaf CR |
381 | else if ((qflags & QGLOB_CTLESC) && pathname[i] == CTLESC) |
| 382 | /* A little more general: if there is an unquoted backslash in the | |
| 383 | pattern and we are handling quoted characters in the pattern, | |
| 384 | convert the CTLESC to backslash and add the next character on | |
| 385 | the theory that the backslash will quote the next character | |
| 386 | but it would be inconsistent not to replace the CTLESC with | |
| 387 | another backslash here. We can't tell at this point whether the | |
| 388 | CTLESC comes from a backslash or other form of quoting in the | |
| 389 | original pattern. */ | |
| 390 | goto convert_to_backslash; | |
| 3185942a | 391 | } |
| ac50fbac CR |
392 | else if (pathname[i] == '\\' && (qflags & QGLOB_REGEXP)) |
| 393 | last_was_backslash = 1; | |
| b8c60bc9 CR |
394 | #if 0 |
| 395 | /* TAG:bash-5.4 Takaaki Konno <re_c25@yahoo.co.jp> 6/23/2025 */ | |
| 396 | else if (pathname[i] == CTLNUL && (qflags & QGLOB_CVTNULL) | |
| 397 | && (qflags & QGLOB_CTLESC)) | |
| 398 | /* If we have an unescaped CTLNUL in the string, and QFLAGS says | |
| 399 | we want to remove those (QGLOB_CVTNULL) but the string is quoted | |
| 400 | (QGLOB_CVTNULL and QGLOB_CTLESC), we need to remove it. This can | |
| 401 | happen when the pattern contains a quoted null string adjacent | |
| 402 | to non-null characters, and it is not removed by quote removal. */ | |
| 403 | continue; | |
| 404 | #endif | |
| 405 | ||
| 7117c2d2 | 406 | temp[j++] = pathname[i]; |
| ccc6cda3 | 407 | } |
| ac50fbac | 408 | endpat: |
| cce855bc | 409 | temp[j] = '\0'; |
| ccc6cda3 JA |
410 | |
| 411 | return (temp); | |
| 412 | } | |
| 413 | ||
| 414 | char * | |
| b8c60bc9 | 415 | quote_globbing_chars (const char *string) |
| ccc6cda3 | 416 | { |
| 7117c2d2 | 417 | size_t slen; |
| a0c0a00f CR |
418 | char *temp, *t; |
| 419 | const char *s, *send; | |
| 7117c2d2 JA |
420 | DECLARE_MBSTATE; |
| 421 | ||
| 422 | slen = strlen (string); | |
| 423 | send = string + slen; | |
| ccc6cda3 | 424 | |
| 7117c2d2 | 425 | temp = (char *)xmalloc (slen * 2 + 1); |
| ccc6cda3 JA |
426 | for (t = temp, s = string; *s; ) |
| 427 | { | |
| 3185942a JA |
428 | if (glob_char_p (s)) |
| 429 | *t++ = '\\'; | |
| 7117c2d2 JA |
430 | |
| 431 | /* Copy a single (possibly multibyte) character from s to t, | |
| ac50fbac | 432 | incrementing both. */ |
| 7117c2d2 | 433 | COPY_CHAR_P (t, s, send); |
| ccc6cda3 JA |
434 | } |
| 435 | *t = '\0'; | |
| 436 | return temp; | |
| 437 | } | |
| 438 | ||
| b8c60bc9 CR |
439 | /* Call the glob library to do globbing on PATHNAME, honoring all the shell |
| 440 | variables that control globbing. */ | |
| ccc6cda3 | 441 | char ** |
| b8c60bc9 | 442 | shell_glob_filename (const char *pathname, int qflags) |
| ccc6cda3 | 443 | { |
| ccc6cda3 | 444 | char *temp, **results; |
| 8868edaf | 445 | int gflags, quoted_pattern; |
| ccc6cda3 JA |
446 | |
| 447 | noglob_dot_filenames = glob_dot_filenames == 0; | |
| 448 | ||
| 8868edaf | 449 | temp = quote_string_for_globbing (pathname, QGLOB_FILENAME|qflags); |
| d233b485 CR |
450 | gflags = glob_star ? GX_GLOBSTAR : 0; |
| 451 | results = glob_filename (temp, gflags); | |
| ccc6cda3 JA |
452 | free (temp); |
| 453 | ||
| 454 | if (results && ((GLOB_FAILED (results)) == 0)) | |
| 455 | { | |
| 456 | if (should_ignore_glob_matches ()) | |
| 457 | ignore_glob_matches (results); | |
| 458 | if (results && results[0]) | |
| b8c60bc9 | 459 | sh_sortglob (results); |
| ccc6cda3 JA |
460 | else |
| 461 | { | |
| 462 | FREE (results); | |
| 463 | results = (char **)&glob_error_return; | |
| 464 | } | |
| 465 | } | |
| 466 | ||
| 467 | return (results); | |
| ccc6cda3 JA |
468 | } |
| 469 | ||
| b8c60bc9 CR |
470 | #if defined (READLINE) && defined (PROGRAMMABLE_COMPLETION) |
| 471 | char ** | |
| 472 | noquote_glob_filename (char *pathname) | |
| 473 | { | |
| 474 | char **results; | |
| 475 | int gflags; | |
| 476 | ||
| 477 | noglob_dot_filenames = glob_dot_filenames == 0; | |
| 478 | gflags = glob_star ? GX_GLOBSTAR : 0; | |
| 479 | ||
| 480 | results = glob_filename (pathname, gflags); | |
| 481 | ||
| 482 | if (results && GLOB_FAILED (results)) | |
| 483 | results = (char **)NULL; | |
| 484 | ||
| 485 | if (results && results[0]) | |
| 486 | sh_sortglob (results); | |
| 487 | ||
| 488 | return (results); | |
| 489 | } | |
| 490 | #endif | |
| 491 | ||
| ccc6cda3 JA |
492 | /* Stuff for GLOBIGNORE. */ |
| 493 | ||
| 494 | static struct ignorevar globignore = | |
| 495 | { | |
| 496 | "GLOBIGNORE", | |
| 497 | (struct ign *)0, | |
| 498 | 0, | |
| 499 | (char *)0, | |
| f73dda09 | 500 | (sh_iv_item_func_t *)0, |
| ccc6cda3 JA |
501 | }; |
| 502 | ||
| 503 | /* Set up to ignore some glob matches because the value of GLOBIGNORE | |
| 504 | has changed. If GLOBIGNORE is being unset, we also need to disable | |
| 505 | the globbing of filenames beginning with a `.'. */ | |
| 506 | void | |
| b8c60bc9 | 507 | setup_glob_ignore (const char *name) |
| ccc6cda3 JA |
508 | { |
| 509 | char *v; | |
| 510 | ||
| 511 | v = get_string_value (name); | |
| 512 | setup_ignore_patterns (&globignore); | |
| 513 | ||
| 514 | if (globignore.num_ignores) | |
| 515 | glob_dot_filenames = 1; | |
| 516 | else if (v == 0) | |
| 517 | glob_dot_filenames = 0; | |
| 518 | } | |
| 519 | ||
| 520 | int | |
| b8c60bc9 | 521 | should_ignore_glob_matches (void) |
| ccc6cda3 JA |
522 | { |
| 523 | return globignore.num_ignores; | |
| 524 | } | |
| 525 | ||
| 526 | /* Return 0 if NAME matches a pattern in the globignore.ignores list. */ | |
| 527 | static int | |
| b8c60bc9 | 528 | glob_name_is_acceptable (const char *name) |
| ccc6cda3 JA |
529 | { |
| 530 | struct ign *p; | |
| 8868edaf | 531 | char *n; |
| cce855bc | 532 | int flags; |
| ccc6cda3 | 533 | |
| 8868edaf CR |
534 | /* . and .. are never matched. We extend this to the terminal component of a |
| 535 | pathname. */ | |
| 536 | n = strrchr (name, '/'); | |
| 537 | if (n == 0 || n[1] == 0) | |
| 538 | n = (char *)name; | |
| 539 | else | |
| 540 | n++; | |
| 541 | ||
| 542 | if (n[0] == '.' && (n[1] == '\0' || (n[1] == '.' && n[2] == '\0'))) | |
| ccc6cda3 JA |
543 | return (0); |
| 544 | ||
| a0c0a00f | 545 | flags = FNM_PATHNAME | FNMATCH_EXTFLAG | FNMATCH_NOCASEGLOB; |
| ccc6cda3 JA |
546 | for (p = globignore.ignores; p->val; p++) |
| 547 | { | |
| f73dda09 | 548 | if (strmatch (p->val, (char *)name, flags) != FNM_NOMATCH) |
| 28ef6c31 | 549 | return (0); |
| ccc6cda3 JA |
550 | } |
| 551 | return (1); | |
| 552 | } | |
| 553 | ||
| 554 | /* Internal function to test whether filenames in NAMES should be | |
| 555 | ignored. NAME_FUNC is a pointer to a function to call with each | |
| 556 | name. It returns non-zero if the name is acceptable to the particular | |
| 557 | ignore function which called _ignore_names; zero if the name should | |
| 558 | be removed from NAMES. */ | |
| 559 | ||
| 560 | static void | |
| b8c60bc9 | 561 | ignore_globbed_names (char **names, sh_ignore_func_t *name_func) |
| ccc6cda3 JA |
562 | { |
| 563 | char **newnames; | |
| b8c60bc9 | 564 | size_t n, i; |
| ccc6cda3 JA |
565 | |
| 566 | for (i = 0; names[i]; i++) | |
| 567 | ; | |
| 7117c2d2 | 568 | newnames = strvec_create (i + 1); |
| ccc6cda3 JA |
569 | |
| 570 | for (n = i = 0; names[i]; i++) | |
| 571 | { | |
| 572 | if ((*name_func) (names[i])) | |
| 28ef6c31 | 573 | newnames[n++] = names[i]; |
| ccc6cda3 JA |
574 | else |
| 575 | free (names[i]); | |
| 576 | } | |
| 577 | ||
| 578 | newnames[n] = (char *)NULL; | |
| 579 | ||
| 580 | if (n == 0) | |
| 581 | { | |
| 582 | names[0] = (char *)NULL; | |
| 583 | free (newnames); | |
| 584 | return; | |
| 585 | } | |
| 586 | ||
| 587 | /* Copy the acceptable names from NEWNAMES back to NAMES and set the | |
| 588 | new array end. */ | |
| 589 | for (n = 0; newnames[n]; n++) | |
| 590 | names[n] = newnames[n]; | |
| 591 | names[n] = (char *)NULL; | |
| d166f048 | 592 | free (newnames); |
| ccc6cda3 JA |
593 | } |
| 594 | ||
| 595 | void | |
| b8c60bc9 | 596 | ignore_glob_matches (char **names) |
| ccc6cda3 JA |
597 | { |
| 598 | if (globignore.num_ignores == 0) | |
| 599 | return; | |
| 600 | ||
| 601 | ignore_globbed_names (names, glob_name_is_acceptable); | |
| 602 | } | |
| 603 | ||
| 495aee44 | 604 | static char * |
| b8c60bc9 | 605 | split_ignorespec (char *s, int *ip) |
| 495aee44 CR |
606 | { |
| 607 | char *t; | |
| 608 | int n, i; | |
| 609 | ||
| 610 | if (s == 0) | |
| 611 | return 0; | |
| 612 | ||
| 613 | i = *ip; | |
| 614 | if (s[i] == 0) | |
| 615 | return 0; | |
| 616 | ||
| a0c0a00f | 617 | n = skip_to_delim (s, i, ":", SD_NOJMP|SD_EXTGLOB|SD_GLOB); |
| 495aee44 CR |
618 | t = substring (s, i, n); |
| 619 | ||
| 620 | if (s[n] == ':') | |
| 621 | n++; | |
| 622 | *ip = n; | |
| 623 | return t; | |
| 624 | } | |
| 625 | ||
| ccc6cda3 | 626 | void |
| b8c60bc9 | 627 | setup_ignore_patterns (struct ignorevar *ivp) |
| ccc6cda3 JA |
628 | { |
| 629 | int numitems, maxitems, ptr; | |
| 630 | char *colon_bit, *this_ignoreval; | |
| 631 | struct ign *p; | |
| 632 | ||
| 633 | this_ignoreval = get_string_value (ivp->varname); | |
| 634 | ||
| 635 | /* If nothing has changed then just exit now. */ | |
| 636 | if ((this_ignoreval && ivp->last_ignoreval && STREQ (this_ignoreval, ivp->last_ignoreval)) || | |
| 637 | (!this_ignoreval && !ivp->last_ignoreval)) | |
| 638 | return; | |
| 639 | ||
| 640 | /* Oops. The ignore variable has changed. Re-parse it. */ | |
| 641 | ivp->num_ignores = 0; | |
| 642 | ||
| 643 | if (ivp->ignores) | |
| 644 | { | |
| 645 | for (p = ivp->ignores; p->val; p++) | |
| 646 | free(p->val); | |
| 647 | free (ivp->ignores); | |
| 648 | ivp->ignores = (struct ign *)NULL; | |
| 649 | } | |
| 650 | ||
| 651 | if (ivp->last_ignoreval) | |
| 652 | { | |
| 653 | free (ivp->last_ignoreval); | |
| 654 | ivp->last_ignoreval = (char *)NULL; | |
| 655 | } | |
| 656 | ||
| 657 | if (this_ignoreval == 0 || *this_ignoreval == '\0') | |
| 658 | return; | |
| 659 | ||
| 660 | ivp->last_ignoreval = savestring (this_ignoreval); | |
| 661 | ||
| 662 | numitems = maxitems = ptr = 0; | |
| 663 | ||
| 495aee44 | 664 | while (colon_bit = split_ignorespec (this_ignoreval, &ptr)) |
| ccc6cda3 JA |
665 | { |
| 666 | if (numitems + 1 >= maxitems) | |
| 667 | { | |
| 668 | maxitems += 10; | |
| 669 | ivp->ignores = (struct ign *)xrealloc (ivp->ignores, maxitems * sizeof (struct ign)); | |
| 670 | } | |
| 671 | ivp->ignores[numitems].val = colon_bit; | |
| 672 | ivp->ignores[numitems].len = strlen (colon_bit); | |
| 673 | ivp->ignores[numitems].flags = 0; | |
| 674 | if (ivp->item_func) | |
| 28ef6c31 | 675 | (*ivp->item_func) (&ivp->ignores[numitems]); |
| ccc6cda3 JA |
676 | numitems++; |
| 677 | } | |
| 678 | ivp->ignores[numitems].val = (char *)NULL; | |
| 679 | ivp->num_ignores = numitems; | |
| 680 | } | |
| b8c60bc9 CR |
681 | |
| 682 | /* Functions to handle sorting glob results in different ways depending on | |
| 683 | the value of the GLOBSORT variable. */ | |
| 684 | ||
| 685 | static int glob_sorttype = SORT_NONE; | |
| 686 | ||
| 687 | static STRING_INT_ALIST sorttypes[] = { | |
| 688 | { "name", SORT_NAME }, | |
| 689 | { "size", SORT_SIZE }, | |
| 690 | { "mtime", SORT_MTIME }, | |
| 691 | { "atime", SORT_ATIME }, | |
| 692 | { "ctime", SORT_CTIME }, | |
| 693 | { "blocks", SORT_BLOCKS }, | |
| 694 | { "numeric", SORT_NUMERIC }, | |
| 695 | { "nosort", SORT_NOSORT }, | |
| 696 | { (char *)NULL, -1 } | |
| 697 | }; | |
| 698 | ||
| 699 | /* A subset of the fields in the posix stat struct -- the ones we need -- | |
| 700 | normalized to using struct timespec. */ | |
| 701 | struct globstat { | |
| 702 | off_t size; | |
| 703 | struct timespec mtime; | |
| 704 | struct timespec atime; | |
| 705 | struct timespec ctime; | |
| 706 | int blocks; | |
| 707 | }; | |
| 708 | ||
| 709 | struct globsort_t { | |
| 710 | char *name; | |
| 711 | struct globstat st; | |
| 712 | }; | |
| 713 | ||
| 714 | static struct globstat glob_nullstat = { -1, { -1, -1 }, { -1, -1 }, { -1, -1 }, -1 }; | |
| 715 | ||
| 716 | static inline int | |
| 717 | glob_findtype (char *t) | |
| 718 | { | |
| 719 | int type; | |
| 720 | ||
| 721 | type = find_string_in_alist (t, sorttypes, 0); | |
| 722 | return (type == -1 ? SORT_NONE : type); | |
| 723 | } | |
| 724 | ||
| 725 | void | |
| 726 | setup_globsort (const char *varname) | |
| 727 | { | |
| 728 | char *val; | |
| 729 | int r, t; | |
| 730 | ||
| 731 | glob_sorttype = SORT_NONE; | |
| 732 | val = get_string_value (varname); | |
| 733 | if (val == 0 || *val == 0) | |
| 734 | return; | |
| 735 | ||
| 736 | t = r = 0; | |
| 737 | while (*val && whitespace (*val)) | |
| 738 | val++; /* why not? */ | |
| 739 | if (*val == '+') | |
| 740 | val++; /* allow leading `+' but ignore it */ | |
| 741 | else if (*val == '-') | |
| 742 | { | |
| 743 | r = SORT_REVERSE; /* leading `-' reverses sort order */ | |
| 744 | val++; | |
| 745 | } | |
| 746 | ||
| 747 | if (*val == 0) | |
| 748 | { | |
| 749 | /* A bare `+' means the default sort by name in ascending order; a bare | |
| 750 | `-' means to sort by name in descending order. */ | |
| 751 | glob_sorttype = SORT_NAME | r; | |
| 752 | return; | |
| 753 | } | |
| 754 | ||
| 755 | t = glob_findtype (val); | |
| 756 | /* any other value is equivalent to the historical behavior */ | |
| 757 | glob_sorttype = (t == SORT_NONE) ? t : t | r; | |
| 758 | } | |
| 759 | ||
| 760 | static int | |
| 761 | globsort_namecmp (char **s1, char **s2) | |
| 762 | { | |
| 763 | return ((glob_sorttype < SORT_REVERSE) ? strvec_posixcmp (s1, s2) : strvec_posixcmp (s2, s1)); | |
| 764 | } | |
| 765 | ||
| 766 | /* Generic transitive comparison of two numeric values for qsort */ | |
| 767 | /* #define GENCMP(a,b) ((a) < (b) ? -1 : ((a) > (b) ? 1 : 0)) */ | |
| 768 | /* A clever idea from gnulib */ | |
| 769 | #define GENCMP(a,b) (((a) > (b)) - ((a) < (b))) | |
| 770 | ||
| 771 | static int | |
| 772 | globsort_sizecmp (struct globsort_t *g1, struct globsort_t *g2) | |
| 773 | { | |
| 774 | int x; | |
| 775 | ||
| 776 | x = (glob_sorttype < SORT_REVERSE) ? GENCMP(g1->st.size, g2->st.size) : GENCMP(g2->st.size, g1->st.size); | |
| 777 | return (x == 0) ? (globsort_namecmp (&g1->name, &g2->name)) : x; | |
| 778 | } | |
| 779 | ||
| 780 | static int | |
| 781 | globsort_timecmp (struct globsort_t *g1, struct globsort_t *g2) | |
| 782 | { | |
| 783 | int t, x; | |
| 784 | struct timespec t1, t2; | |
| 785 | ||
| 786 | t = (glob_sorttype < SORT_REVERSE) ? glob_sorttype : glob_sorttype - SORT_REVERSE; | |
| 787 | if (t == SORT_MTIME) | |
| 788 | { | |
| 789 | t1 = g1->st.mtime; | |
| 790 | t2 = g2->st.mtime; | |
| 791 | } | |
| 792 | else if (t == SORT_ATIME) | |
| 793 | { | |
| 794 | t1 = g1->st.atime; | |
| 795 | t2 = g2->st.atime; | |
| 796 | } | |
| 797 | else | |
| 798 | { | |
| 799 | t1 = g1->st.ctime; | |
| 800 | t2 = g2->st.ctime; | |
| 801 | } | |
| 802 | ||
| 803 | x = (glob_sorttype < SORT_REVERSE) ? timespec_cmp (t1, t2) : timespec_cmp (t2, t1); | |
| 804 | return (x == 0) ? (globsort_namecmp (&g1->name, &g2->name)) : x; | |
| 805 | } | |
| 806 | ||
| 807 | static int | |
| 808 | globsort_blockscmp (struct globsort_t *g1, struct globsort_t *g2) | |
| 809 | { | |
| 810 | int x; | |
| 811 | ||
| 812 | x = (glob_sorttype < SORT_REVERSE) ? GENCMP(g1->st.blocks, g2->st.blocks) : GENCMP(g2->st.blocks, g1->st.blocks); | |
| 813 | return (x == 0) ? (globsort_namecmp (&g1->name, &g2->name)) : x; | |
| 814 | } | |
| 815 | ||
| 816 | static inline int | |
| 817 | gs_checknum (char *string, intmax_t *val) | |
| 818 | { | |
| 819 | int v; | |
| 820 | intmax_t i; | |
| 821 | ||
| 822 | v = all_digits (string); | |
| 823 | if (v) | |
| 824 | *val = strtoimax (string, (char **)NULL, 10); | |
| 825 | return v; | |
| 826 | } | |
| 827 | ||
| 828 | static int | |
| 829 | globsort_numericcmp (struct globsort_t *g1, struct globsort_t *g2) | |
| 830 | { | |
| 831 | intmax_t i1, i2; | |
| 832 | int v1, v2, x; | |
| 833 | ||
| 834 | /* like valid_number but doesn't allow leading/trailing whitespace or sign */ | |
| 835 | v1 = gs_checknum (g1->name, &i1); | |
| 836 | v2 = gs_checknum (g2->name, &i2); | |
| 837 | ||
| 838 | if (v1 && v2) /* both valid numbers */ | |
| 839 | /* Don't need to fall back to name comparison here */ | |
| 840 | return (glob_sorttype < SORT_REVERSE) ? GENCMP(i1, i2) : GENCMP(i2, i1); | |
| 841 | else if (v1 == 0 && v2 == 0) /* neither valid numbers */ | |
| 842 | return (globsort_namecmp (&g1->name, &g2->name)); | |
| 843 | else if (v1 != 0 && v2 == 0) | |
| 844 | return (glob_sorttype < SORT_REVERSE) ? -1 : 1; | |
| 845 | else | |
| 846 | return (glob_sorttype < SORT_REVERSE) ? 1 : -1; | |
| 847 | } | |
| 848 | ||
| 849 | #undef GENCMP | |
| 850 | ||
| 851 | static struct globsort_t * | |
| 852 | globsort_buildarray (char **array, size_t len) | |
| 853 | { | |
| 854 | struct globsort_t *ret; | |
| 855 | int i; | |
| 856 | struct stat st; | |
| 857 | ||
| 858 | ret = (struct globsort_t *)xmalloc (len * sizeof (struct globsort_t)); | |
| 859 | ||
| 860 | for (i = 0; i < len; i++) | |
| 861 | { | |
| 862 | ret[i].name = array[i]; | |
| 863 | if (stat (array[i], &st) != 0) | |
| 864 | ret[i].st = glob_nullstat; | |
| 865 | else | |
| 866 | { | |
| 867 | ret[i].st.size = st.st_size; | |
| 868 | ret[i].st.mtime = get_stat_mtime (&st); | |
| 869 | ret[i].st.atime = get_stat_atime (&st); | |
| 870 | ret[i].st.ctime = get_stat_ctime (&st); | |
| 871 | ret[i].st.blocks = st.st_blocks; | |
| 872 | } | |
| 873 | } | |
| 874 | ||
| 875 | return ret; | |
| 876 | } | |
| 877 | ||
| 878 | static inline void | |
| 879 | globsort_sortbyname (char **results) | |
| 880 | { | |
| 881 | qsort (results, strvec_len (results), sizeof (char *), (QSFUNC *)globsort_namecmp); | |
| 882 | } | |
| 883 | ||
| 884 | static void | |
| 885 | globsort_sortarray (struct globsort_t *garray, size_t len) | |
| 886 | { | |
| 887 | int t; | |
| 888 | QSFUNC *sortfunc; | |
| 889 | ||
| 890 | t = (glob_sorttype < SORT_REVERSE) ? glob_sorttype : glob_sorttype - SORT_REVERSE; | |
| 891 | ||
| 892 | switch (t) | |
| 893 | { | |
| 894 | case SORT_SIZE: | |
| 895 | sortfunc = (QSFUNC *)globsort_sizecmp; | |
| 896 | break; | |
| 897 | case SORT_ATIME: | |
| 898 | case SORT_MTIME: | |
| 899 | case SORT_CTIME: | |
| 900 | sortfunc = (QSFUNC *)globsort_timecmp; | |
| 901 | break; | |
| 902 | case SORT_BLOCKS: | |
| 903 | sortfunc = (QSFUNC *)globsort_blockscmp; | |
| 904 | break; | |
| 905 | case SORT_NUMERIC: | |
| 906 | sortfunc = (QSFUNC *)globsort_numericcmp; | |
| 907 | break; | |
| 908 | default: | |
| 909 | internal_error (_("invalid glob sort type")); | |
| 910 | break; | |
| 911 | } | |
| 912 | ||
| 913 | qsort (garray, len, sizeof (struct globsort_t), sortfunc); | |
| 914 | } | |
| 915 | ||
| 916 | static void | |
| 917 | sh_sortglob (char **results) | |
| 918 | { | |
| 919 | size_t rlen; | |
| 920 | struct globsort_t *garray; | |
| 921 | ||
| 922 | if (glob_sorttype == SORT_NOSORT || glob_sorttype == (SORT_NOSORT|SORT_REVERSE)) | |
| 923 | return; | |
| 924 | ||
| 925 | if (glob_sorttype == SORT_NONE || glob_sorttype == SORT_NAME) | |
| 926 | globsort_sortbyname (results); /* posix sort */ | |
| 927 | else if (glob_sorttype == (SORT_NAME|SORT_REVERSE)) | |
| 928 | globsort_sortbyname (results); /* posix sort reverse order */ | |
| 929 | else | |
| 930 | { | |
| 931 | int i; | |
| 932 | ||
| 933 | rlen = strvec_len (results); | |
| 934 | /* populate an array of name/statinfo, sort it appropriately, copy the | |
| 935 | names from the sorted array back to RESULTS, and free the array */ | |
| 936 | garray = globsort_buildarray (results, rlen); | |
| 937 | globsort_sortarray (garray, rlen); | |
| 938 | for (i = 0; i < rlen; i++) | |
| 939 | results[i] = garray[i].name; | |
| 940 | free (garray); | |
| 941 | } | |
| 942 | } |