posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-2013 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    The GNU C Library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Lesser General Public
   6    License as published by the Free Software Foundation; either
   7    version 2.1 of the License, or (at your option) any later version.
   8
   9    The GNU C Library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public
  15    License along with the GNU C Library; if not, see
  16    <http://www.gnu.org/licenses/>.  */
  17
  18 #include <stdint.h>
  19
  20 struct STRUCT
  21 {
  22   const CHAR *pattern;
  23   const CHAR *string;
  24   int no_leading_period;
  25 };
  26
  27 /* Match STRING against the filename pattern PATTERN, returning zero if
  28    it matches, nonzero if not.  */
  29 static int FCT (const CHAR *pattern, const CHAR *string,
  30                 const CHAR *string_end, int no_leading_period, int flags,
  31                 struct STRUCT *ends, size_t alloca_used)
  32      internal_function;
  33 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  34                 const CHAR *string_end, int no_leading_period, int flags,
  35                 size_t alloca_used)
  36      internal_function;
  37 static const CHAR *END (const CHAR *patternp) internal_function;
  38
  39 static int
  40 internal_function
  41 FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
  42      const CHAR *pattern;
  43      const CHAR *string;
  44      const CHAR *string_end;
  45      int no_leading_period;
  46      int flags;
  47      struct STRUCT *ends;
  48      size_t alloca_used;
  49 {
  50   const CHAR *p = pattern, *n = string;
  51   UCHAR c;
  52 #ifdef _LIBC
  53 # if WIDE_CHAR_VERSION
  54   const char *collseq = (const char *)
  55     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  56 # else
  57   const UCHAR *collseq = (const UCHAR *)
  58     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  59 # endif
  60 #endif
  61
  62   while ((c = *p++) != L('\0'))
  63     {
  64       int new_no_leading_period = 0;
  65       c = FOLD (c);
  66
  67       switch (c)
  68         {
  69         case L('?'):
  70           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  71             {
  72               int res = EXT (c, p, n, string_end, no_leading_period,
  73                              flags, alloca_used);
  74               if (res != -1)
  75                 return res;
  76             }
  77
  78           if (n == string_end)
  79             return FNM_NOMATCH;
  80           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  81             return FNM_NOMATCH;
  82           else if (*n == L('.') && no_leading_period)
  83             return FNM_NOMATCH;
  84           break;
  85
  86         case L('\\'):
  87           if (!(flags & FNM_NOESCAPE))
  88             {
  89               c = *p++;
  90               if (c == L('\0'))
  91                 /* Trailing \ loses.  */
  92                 return FNM_NOMATCH;
  93               c = FOLD (c);
  94             }
  95           if (n == string_end || FOLD ((UCHAR) *n) != c)
  96             return FNM_NOMATCH;
  97           break;
  98
  99         case L('*'):
 100           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 101             {
 102               int res = EXT (c, p, n, string_end, no_leading_period,
 103                              flags, alloca_used);
 104               if (res != -1)
 105                 return res;
 106             }
 107           else if (ends != NULL)
 108             {
 109               ends->pattern = p - 1;
 110               ends->string = n;
 111               ends->no_leading_period = no_leading_period;
 112               return 0;
 113             }
 114
 115           if (n != string_end && *n == L('.') && no_leading_period)
 116             return FNM_NOMATCH;
 117
 118           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 119             {
 120               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 121                 {
 122                   const CHAR *endp = END (p);
 123                   if (endp != p)
 124                     {
 125                       /* This is a pattern.  Skip over it.  */
 126                       p = endp;
 127                       continue;
 128                     }
 129                 }
 130
 131               if (c == L('?'))
 132                 {
 133                   /* A ? needs to match one character.  */
 134                   if (n == string_end)
 135                     /* There isn't another character; no match.  */
 136                     return FNM_NOMATCH;
 137                   else if (*n == L('/')
 138                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 139                     /* A slash does not match a wildcard under
 140                        FNM_FILE_NAME.  */
 141                     return FNM_NOMATCH;
 142                   else
 143                     /* One character of the string is consumed in matching
 144                        this ? wildcard, so *??? won't match if there are
 145                        less than three characters.  */
 146                     ++n;
 147                 }
 148             }
 149
 150           if (c == L('\0'))
 151             /* The wildcard(s) is/are the last element of the pattern.
 152                If the name is a file name and contains another slash
 153                this means it cannot match, unless the FNM_LEADING_DIR
 154                flag is set.  */
 155             {
 156               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 157
 158               if (flags & FNM_FILE_NAME)
 159                 {
 160                   if (flags & FNM_LEADING_DIR)
 161                     result = 0;
 162                   else
 163                     {
 164                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 165                         result = 0;
 166                     }
 167                 }
 168
 169               return result;
 170             }
 171           else
 172             {
 173               const CHAR *endp;
 174               struct STRUCT end;
 175
 176               end.pattern = NULL;
 177               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 178                              string_end - n);
 179               if (endp == NULL)
 180                 endp = string_end;
 181
 182               if (c == L('[')
 183                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 184                       && (c == L('@') || c == L('+') || c == L('!'))
 185                       && *p == L('(')))
 186                 {
 187                   int flags2 = ((flags & FNM_FILE_NAME)
 188                                 ? flags : (flags & ~FNM_PERIOD));
 189
 190                   for (--p; n < endp; ++n, no_leading_period = 0)
 191                     if (FCT (p, n, string_end, no_leading_period, flags2,
 192                              &end, alloca_used) == 0)
 193                       goto found;
 194                 }
 195               else if (c == L('/') && (flags & FNM_FILE_NAME))
 196                 {
 197                   while (n < string_end && *n != L('/'))
 198                     ++n;
 199                   if (n < string_end && *n == L('/')
 200                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
 201                                NULL, alloca_used) == 0))
 202                     return 0;
 203                 }
 204               else
 205                 {
 206                   int flags2 = ((flags & FNM_FILE_NAME)
 207                                 ? flags : (flags & ~FNM_PERIOD));
 208
 209                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 210                     c = *p;
 211                   c = FOLD (c);
 212                   for (--p; n < endp; ++n, no_leading_period = 0)
 213                     if (FOLD ((UCHAR) *n) == c
 214                         && (FCT (p, n, string_end, no_leading_period, flags2,
 215                                  &end, alloca_used) == 0))
 216                       {
 217                       found:
 218                         if (end.pattern == NULL)
 219                           return 0;
 220                         break;
 221                       }
 222                   if (end.pattern != NULL)
 223                     {
 224                       p = end.pattern;
 225                       n = end.string;
 226                       no_leading_period = end.no_leading_period;
 227                       continue;
 228                     }
 229                 }
 230             }
 231
 232           /* If we come here no match is possible with the wildcard.  */
 233           return FNM_NOMATCH;
 234
 235         case L('['):
 236           {
 237             /* Nonzero if the sense of the character class is inverted.  */
 238             const CHAR *p_init = p;
 239             const CHAR *n_init = n;
 240             int not;
 241             CHAR cold;
 242             UCHAR fn;
 243
 244             if (posixly_correct == 0)
 245               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 246
 247             if (n == string_end)
 248               return FNM_NOMATCH;
 249
 250             if (*n == L('.') && no_leading_period)
 251               return FNM_NOMATCH;
 252
 253             if (*n == L('/') && (flags & FNM_FILE_NAME))
 254               /* `/' cannot be matched.  */
 255               return FNM_NOMATCH;
 256
 257             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 258             if (not)
 259               ++p;
 260
 261             fn = FOLD ((UCHAR) *n);
 262
 263             c = *p++;
 264             for (;;)
 265               {
 266                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 267                   {
 268                     if (*p == L('\0'))
 269                       return FNM_NOMATCH;
 270                     c = FOLD ((UCHAR) *p);
 271                     ++p;
 272
 273                     goto normal_bracket;
 274                   }
 275                 else if (c == L('[') && *p == L(':'))
 276                   {
 277                     /* Leave room for the null.  */
 278                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 279                     size_t c1 = 0;
 280 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 281                     wctype_t wt;
 282 #endif
 283                     const CHAR *startp = p;
 284
 285                     for (;;)
 286                       {
 287                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 288                           /* The name is too long and therefore the pattern
 289                              is ill-formed.  */
 290                           return FNM_NOMATCH;
 291
 292                         c = *++p;
 293                         if (c == L(':') && p[1] == L(']'))
 294                           {
 295                             p += 2;
 296                             break;
 297                           }
 298                         if (c < L('a') || c >= L('z'))
 299                           {
 300                             /* This cannot possibly be a character class name.
 301                                Match it as a normal range.  */
 302                             p = startp;
 303                             c = L('[');
 304                             goto normal_bracket;
 305                           }
 306                         str[c1++] = c;
 307                       }
 308                     str[c1] = L('\0');
 309
 310 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 311                     wt = IS_CHAR_CLASS (str);
 312                     if (wt == 0)
 313                       /* Invalid character class name.  */
 314                       return FNM_NOMATCH;
 315
 316 # if defined _LIBC && ! WIDE_CHAR_VERSION
 317                     /* The following code is glibc specific but does
 318                        there a good job in speeding up the code since
 319                        we can avoid the btowc() call.  */
 320                     if (_ISCTYPE ((UCHAR) *n, wt))
 321                       goto matched;
 322 # else
 323                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 324                       goto matched;
 325 # endif
 326 #else
 327                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 328                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 329                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 330                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 331                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 332                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 333                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 334                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 335                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 336                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 337                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 338                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 339                       goto matched;
 340 #endif
 341                     c = *p++;
 342                   }
 343 #ifdef _LIBC
 344                 else if (c == L('[') && *p == L('='))
 345                   {
 346                     UCHAR str[1];
 347                     uint32_t nrules =
 348                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 349                     const CHAR *startp = p;
 350
 351                     c = *++p;
 352                     if (c == L('\0'))
 353                       {
 354                         p = startp;
 355                         c = L('[');
 356                         goto normal_bracket;
 357                       }
 358                     str[0] = c;
 359
 360                     c = *++p;
 361                     if (c != L('=') || p[1] != L(']'))
 362                       {
 363                         p = startp;
 364                         c = L('[');
 365                         goto normal_bracket;
 366                       }
 367                     p += 2;
 368
 369                     if (nrules == 0)
 370                       {
 371                         if ((UCHAR) *n == str[0])
 372                           goto matched;
 373                       }
 374                     else
 375                       {
 376                         const int32_t *table;
 377 # if WIDE_CHAR_VERSION
 378                         const int32_t *weights;
 379                         const int32_t *extra;
 380 # else
 381                         const unsigned char *weights;
 382                         const unsigned char *extra;
 383 # endif
 384                         const int32_t *indirect;
 385                         int32_t idx;
 386                         const UCHAR *cp = (const UCHAR *) str;
 387
 388                         /* This #include defines a local function!  */
 389 # if WIDE_CHAR_VERSION
 390 #  include <locale/weightwc.h>
 391 # else
 392 #  include <locale/weight.h>
 393 # endif
 394
 395 # if WIDE_CHAR_VERSION
 396                         table = (const int32_t *)
 397                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 398                         weights = (const int32_t *)
 399                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 400                         extra = (const int32_t *)
 401                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 402                         indirect = (const int32_t *)
 403                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 404 # else
 405                         table = (const int32_t *)
 406                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 407                         weights = (const unsigned char *)
 408                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 409                         extra = (const unsigned char *)
 410                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 411                         indirect = (const int32_t *)
 412                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 413 # endif
 414
 415                         idx = findidx (&cp, 1);
 416                         if (idx != 0)
 417                           {
 418                             /* We found a table entry.  Now see whether the
 419                                character we are currently at has the same
 420                                equivalance class value.  */
 421                             int len = weights[idx & 0xffffff];
 422                             int32_t idx2;
 423                             const UCHAR *np = (const UCHAR *) n;
 424
 425                             idx2 = findidx (&np, string_end - n);
 426                             if (idx2 != 0
 427                                 && (idx >> 24) == (idx2 >> 24)
 428                                 && len == weights[idx2 & 0xffffff])
 429                               {
 430                                 int cnt = 0;
 431
 432                                 idx &= 0xffffff;
 433                                 idx2 &= 0xffffff;
 434
 435                                 while (cnt < len
 436                                        && (weights[idx + 1 + cnt]
 437                                            == weights[idx2 + 1 + cnt]))
 438                                   ++cnt;
 439
 440                                 if (cnt == len)
 441                                   goto matched;
 442                               }
 443                           }
 444                       }
 445
 446                     c = *p++;
 447                   }
 448 #endif
 449                 else if (c == L('\0'))
 450                   {
 451                     /* [ unterminated, treat as normal character.  */
 452                     p = p_init;
 453                     n = n_init;
 454                     c = L('[');
 455                     goto normal_match;
 456                   }
 457                 else
 458                   {
 459                     int is_range = 0;
 460
 461 #ifdef _LIBC
 462                     int is_seqval = 0;
 463
 464                     if (c == L('[') && *p == L('.'))
 465                       {
 466                         uint32_t nrules =
 467                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 468                         const CHAR *startp = p;
 469                         size_t c1 = 0;
 470
 471                         while (1)
 472                           {
 473                             c = *++p;
 474                             if (c == L('.') && p[1] == L(']'))
 475                               {
 476                                 p += 2;
 477                                 break;
 478                               }
 479                             if (c == '\0')
 480                               return FNM_NOMATCH;
 481                             ++c1;
 482                           }
 483
 484                         /* We have to handling the symbols differently in
 485                            ranges since then the collation sequence is
 486                            important.  */
 487                         is_range = *p == L('-') && p[1] != L('\0');
 488
 489                         if (nrules == 0)
 490                           {
 491                             /* There are no names defined in the collation
 492                                data.  Therefore we only accept the trivial
 493                                names consisting of the character itself.  */
 494                             if (c1 != 1)
 495                               return FNM_NOMATCH;
 496
 497                             if (!is_range && *n == startp[1])
 498                               goto matched;
 499
 500                             cold = startp[1];
 501                             c = *p++;
 502                           }
 503                         else
 504                           {
 505                             int32_t table_size;
 506                             const int32_t *symb_table;
 507 # ifdef WIDE_CHAR_VERSION
 508                             char str[c1];
 509                             unsigned int strcnt;
 510 # else
 511 #  define str (startp + 1)
 512 # endif
 513                             const unsigned char *extra;
 514                             int32_t idx;
 515                             int32_t elem;
 516                             int32_t second;
 517                             int32_t hash;
 518
 519 # ifdef WIDE_CHAR_VERSION
 520                             /* We have to convert the name to a single-byte
 521                                string.  This is possible since the names
 522                                consist of ASCII characters and the internal
 523                                representation is UCS4.  */
 524                             for (strcnt = 0; strcnt < c1; ++strcnt)
 525                               str[strcnt] = startp[1 + strcnt];
 526 #endif
 527
 528                             table_size =
 529                               _NL_CURRENT_WORD (LC_COLLATE,
 530                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 531                             symb_table = (const int32_t *)
 532                               _NL_CURRENT (LC_COLLATE,
 533                                            _NL_COLLATE_SYMB_TABLEMB);
 534                             extra = (const unsigned char *)
 535                               _NL_CURRENT (LC_COLLATE,
 536                                            _NL_COLLATE_SYMB_EXTRAMB);
 537
 538                             /* Locate the character in the hashing table.  */
 539                             hash = elem_hash (str, c1);
 540
 541                             idx = 0;
 542                             elem = hash % table_size;
 543                             if (symb_table[2 * elem] != 0)
 544                               {
 545                                 second = hash % (table_size - 2) + 1;
 546
 547                                 do
 548                                   {
 549                                     /* First compare the hashing value.  */
 550                                     if (symb_table[2 * elem] == hash
 551                                         && (c1
 552                                             == extra[symb_table[2 * elem + 1]])
 553                                         && memcmp (str,
 554                                                    &extra[symb_table[2 * elem
 555                                                                      + 1]
 556                                                           + 1], c1) == 0)
 557                                       {
 558                                         /* Yep, this is the entry.  */
 559                                         idx = symb_table[2 * elem + 1];
 560                                         idx += 1 + extra[idx];
 561                                         break;
 562                                       }
 563
 564                                     /* Next entry.  */
 565                                     elem += second;
 566                                   }
 567                                 while (symb_table[2 * elem] != 0);
 568                               }
 569
 570                             if (symb_table[2 * elem] != 0)
 571                               {
 572                                 /* Compare the byte sequence but only if
 573                                    this is not part of a range.  */
 574 # ifdef WIDE_CHAR_VERSION
 575                                 int32_t *wextra;
 576
 577                                 idx += 1 + extra[idx];
 578                                 /* Adjust for the alignment.  */
 579                                 idx = (idx + 3) & ~3;
 580
 581                                 wextra = (int32_t *) &extra[idx + 4];
 582 # endif
 583
 584                                 if (! is_range)
 585                                   {
 586 # ifdef WIDE_CHAR_VERSION
 587                                     for (c1 = 0;
 588                                          (int32_t) c1 < wextra[idx];
 589                                          ++c1)
 590                                       if (n[c1] != wextra[1 + c1])
 591                                         break;
 592
 593                                     if ((int32_t) c1 == wextra[idx])
 594                                       goto matched;
 595 # else
 596                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 597                                       if (n[c1] != extra[1 + c1])
 598                                         break;
 599
 600                                     if (c1 == extra[idx])
 601                                       goto matched;
 602 # endif
 603                                   }
 604
 605                                 /* Get the collation sequence value.  */
 606                                 is_seqval = 1;
 607 # ifdef WIDE_CHAR_VERSION
 608                                 cold = wextra[1 + wextra[idx]];
 609 # else
 610                                 /* Adjust for the alignment.  */
 611                                 idx += 1 + extra[idx];
 612                                 idx = (idx + 3) & ~4;
 613                                 cold = *((int32_t *) &extra[idx]);
 614 # endif
 615
 616                                 c = *p++;
 617                               }
 618                             else if (c1 == 1)
 619                               {
 620                                 /* No valid character.  Match it as a
 621                                    single byte.  */
 622                                 if (!is_range && *n == str[0])
 623                                   goto matched;
 624
 625                                 cold = str[0];
 626                                 c = *p++;
 627                               }
 628                             else
 629                               return FNM_NOMATCH;
 630                           }
 631                       }
 632                     else
 633 # undef str
 634 #endif
 635                       {
 636                         c = FOLD (c);
 637                       normal_bracket:
 638
 639                         /* We have to handling the symbols differently in
 640                            ranges since then the collation sequence is
 641                            important.  */
 642                         is_range = (*p == L('-') && p[1] != L('\0')
 643                                     && p[1] != L(']'));
 644
 645                         if (!is_range && c == fn)
 646                           goto matched;
 647
 648                         /* This is needed if we goto normal_bracket; from
 649                            outside of is_seqval's scope.  */
 650                         is_seqval = 0;
 651                         cold = c;
 652                         c = *p++;
 653                       }
 654
 655                     if (c == L('-') && *p != L(']'))
 656                       {
 657 #if _LIBC
 658                         /* We have to find the collation sequence
 659                            value for C.  Collation sequence is nothing
 660                            we can regularly access.  The sequence
 661                            value is defined by the order in which the
 662                            definitions of the collation values for the
 663                            various characters appear in the source
 664                            file.  A strange concept, nowhere
 665                            documented.  */
 666                         uint32_t fcollseq;
 667                         uint32_t lcollseq;
 668                         UCHAR cend = *p++;
 669
 670 # ifdef WIDE_CHAR_VERSION
 671                         /* Search in the `names' array for the characters.  */
 672                         fcollseq = __collseq_table_lookup (collseq, fn);
 673                         if (fcollseq == ~((uint32_t) 0))
 674                           /* XXX We don't know anything about the character
 675                              we are supposed to match.  This means we are
 676                              failing.  */
 677                           goto range_not_matched;
 678
 679                         if (is_seqval)
 680                           lcollseq = cold;
 681                         else
 682                           lcollseq = __collseq_table_lookup (collseq, cold);
 683 # else
 684                         fcollseq = collseq[fn];
 685                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 686 # endif
 687
 688                         is_seqval = 0;
 689                         if (cend == L('[') && *p == L('.'))
 690                           {
 691                             uint32_t nrules =
 692                               _NL_CURRENT_WORD (LC_COLLATE,
 693                                                 _NL_COLLATE_NRULES);
 694                             const CHAR *startp = p;
 695                             size_t c1 = 0;
 696
 697                             while (1)
 698                               {
 699                                 c = *++p;
 700                                 if (c == L('.') && p[1] == L(']'))
 701                                   {
 702                                     p += 2;
 703                                     break;
 704                                   }
 705                                 if (c == '\0')
 706                                   return FNM_NOMATCH;
 707                                 ++c1;
 708                               }
 709
 710                             if (nrules == 0)
 711                               {
 712                                 /* There are no names defined in the
 713                                    collation data.  Therefore we only
 714                                    accept the trivial names consisting
 715                                    of the character itself.  */
 716                                 if (c1 != 1)
 717                                   return FNM_NOMATCH;
 718
 719                                 cend = startp[1];
 720                               }
 721                             else
 722                               {
 723                                 int32_t table_size;
 724                                 const int32_t *symb_table;
 725 # ifdef WIDE_CHAR_VERSION
 726                                 char str[c1];
 727                                 unsigned int strcnt;
 728 # else
 729 #  define str (startp + 1)
 730 # endif
 731                                 const unsigned char *extra;
 732                                 int32_t idx;
 733                                 int32_t elem;
 734                                 int32_t second;
 735                                 int32_t hash;
 736
 737 # ifdef WIDE_CHAR_VERSION
 738                                 /* We have to convert the name to a single-byte
 739                                    string.  This is possible since the names
 740                                    consist of ASCII characters and the internal
 741                                    representation is UCS4.  */
 742                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 743                                   str[strcnt] = startp[1 + strcnt];
 744 # endif
 745
 746                                 table_size =
 747                                   _NL_CURRENT_WORD (LC_COLLATE,
 748                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 749                                 symb_table = (const int32_t *)
 750                                   _NL_CURRENT (LC_COLLATE,
 751                                                _NL_COLLATE_SYMB_TABLEMB);
 752                                 extra = (const unsigned char *)
 753                                   _NL_CURRENT (LC_COLLATE,
 754                                                _NL_COLLATE_SYMB_EXTRAMB);
 755
 756                                 /* Locate the character in the hashing
 757                                    table.  */
 758                                 hash = elem_hash (str, c1);
 759
 760                                 idx = 0;
 761                                 elem = hash % table_size;
 762                                 if (symb_table[2 * elem] != 0)
 763                                   {
 764                                     second = hash % (table_size - 2) + 1;
 765
 766                                     do
 767                                       {
 768                                         /* First compare the hashing value.  */
 769                                         if (symb_table[2 * elem] == hash
 770                                             && (c1
 771                                                 == extra[symb_table[2 * elem + 1]])
 772                                             && memcmp (str,
 773                                                        &extra[symb_table[2 * elem + 1]
 774                                                               + 1], c1) == 0)
 775                                           {
 776                                             /* Yep, this is the entry.  */
 777                                             idx = symb_table[2 * elem + 1];
 778                                             idx += 1 + extra[idx];
 779                                             break;
 780                                           }
 781
 782                                         /* Next entry.  */
 783                                         elem += second;
 784                                       }
 785                                     while (symb_table[2 * elem] != 0);
 786                                   }
 787
 788                                 if (symb_table[2 * elem] != 0)
 789                                   {
 790                                     /* Compare the byte sequence but only if
 791                                        this is not part of a range.  */
 792 # ifdef WIDE_CHAR_VERSION
 793                                     int32_t *wextra;
 794
 795                                     idx += 1 + extra[idx];
 796                                     /* Adjust for the alignment.  */
 797                                     idx = (idx + 3) & ~4;
 798
 799                                     wextra = (int32_t *) &extra[idx + 4];
 800 # endif
 801                                     /* Get the collation sequence value.  */
 802                                     is_seqval = 1;
 803 # ifdef WIDE_CHAR_VERSION
 804                                     cend = wextra[1 + wextra[idx]];
 805 # else
 806                                     /* Adjust for the alignment.  */
 807                                     idx += 1 + extra[idx];
 808                                     idx = (idx + 3) & ~4;
 809                                     cend = *((int32_t *) &extra[idx]);
 810 # endif
 811                                   }
 812                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 813                                   {
 814                                     cend = str[0];
 815                                     c = *p++;
 816                                   }
 817                                 else
 818                                   return FNM_NOMATCH;
 819                               }
 820 # undef str
 821                           }
 822                         else
 823                           {
 824                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 825                               cend = *p++;
 826                             if (cend == L('\0'))
 827                               return FNM_NOMATCH;
 828                             cend = FOLD (cend);
 829                           }
 830
 831                         /* XXX It is not entirely clear to me how to handle
 832                            characters which are not mentioned in the
 833                            collation specification.  */
 834                         if (
 835 # ifdef WIDE_CHAR_VERSION
 836                             lcollseq == 0xffffffff ||
 837 # endif
 838                             lcollseq <= fcollseq)
 839                           {
 840                             /* We have to look at the upper bound.  */
 841                             uint32_t hcollseq;
 842
 843                             if (is_seqval)
 844                               hcollseq = cend;
 845                             else
 846                               {
 847 # ifdef WIDE_CHAR_VERSION
 848                                 hcollseq =
 849                                   __collseq_table_lookup (collseq, cend);
 850                                 if (hcollseq == ~((uint32_t) 0))
 851                                   {
 852                                     /* Hum, no information about the upper
 853                                        bound.  The matching succeeds if the
 854                                        lower bound is matched exactly.  */
 855                                     if (lcollseq != fcollseq)
 856                                       goto range_not_matched;
 857
 858                                     goto matched;
 859                                   }
 860 # else
 861                                 hcollseq = collseq[cend];
 862 # endif
 863                               }
 864
 865                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 866                               goto matched;
 867                           }
 868 # ifdef WIDE_CHAR_VERSION
 869                       range_not_matched:
 870 # endif
 871 #else
 872                         /* We use a boring value comparison of the character
 873                            values.  This is better than comparing using
 874                            `strcoll' since the latter would have surprising
 875                            and sometimes fatal consequences.  */
 876                         UCHAR cend = *p++;
 877
 878                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 879                           cend = *p++;
 880                         if (cend == L('\0'))
 881                           return FNM_NOMATCH;
 882
 883                         /* It is a range.  */
 884                         if (cold <= fn && fn <= cend)
 885                           goto matched;
 886 #endif
 887
 888                         c = *p++;
 889                       }
 890                   }
 891
 892                 if (c == L(']'))
 893                   break;
 894               }
 895
 896             if (!not)
 897               return FNM_NOMATCH;
 898             break;
 899
 900           matched:
 901             /* Skip the rest of the [...] that already matched.  */
 902             do
 903               {
 904               ignore_next:
 905                 c = *p++;
 906
 907                 if (c == L('\0'))
 908                   /* [... (unterminated) loses.  */
 909                   return FNM_NOMATCH;
 910
 911                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 912                   {
 913                     if (*p == L('\0'))
 914                       return FNM_NOMATCH;
 915                     /* XXX 1003.2d11 is unclear if this is right.  */
 916                     ++p;
 917                   }
 918                 else if (c == L('[') && *p == L(':'))
 919                   {
 920                     int c1 = 0;
 921                     const CHAR *startp = p;
 922
 923                     while (1)
 924                       {
 925                         c = *++p;
 926                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 927                           return FNM_NOMATCH;
 928
 929                         if (*p == L(':') && p[1] == L(']'))
 930                           break;
 931
 932                         if (c < L('a') || c >= L('z'))
 933                           {
 934                             p = startp;
 935                             goto ignore_next;
 936                           }
 937                       }
 938                     p += 2;
 939                     c = *p++;
 940                   }
 941                 else if (c == L('[') && *p == L('='))
 942                   {
 943                     c = *++p;
 944                     if (c == L('\0'))
 945                       return FNM_NOMATCH;
 946                     c = *++p;
 947                     if (c != L('=') || p[1] != L(']'))
 948                       return FNM_NOMATCH;
 949                     p += 2;
 950                     c = *p++;
 951                   }
 952                 else if (c == L('[') && *p == L('.'))
 953                   {
 954                     ++p;
 955                     while (1)
 956                       {
 957                         c = *++p;
 958                         if (c == '\0')
 959                           return FNM_NOMATCH;
 960
 961                         if (*p == L('.') && p[1] == L(']'))
 962                           break;
 963                       }
 964                     p += 2;
 965                     c = *p++;
 966                   }
 967               }
 968             while (c != L(']'));
 969             if (not)
 970               return FNM_NOMATCH;
 971           }
 972           break;
 973
 974         case L('+'):
 975         case L('@'):
 976         case L('!'):
 977           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 978             {
 979               int res = EXT (c, p, n, string_end, no_leading_period, flags,
 980                              alloca_used);
 981               if (res != -1)
 982                 return res;
 983             }
 984           goto normal_match;
 985
 986         case L('/'):
 987           if (NO_LEADING_PERIOD (flags))
 988             {
 989               if (n == string_end || c != (UCHAR) *n)
 990                 return FNM_NOMATCH;
 991
 992               new_no_leading_period = 1;
 993               break;
 994             }
 995           /* FALLTHROUGH */
 996         default:
 997         normal_match:
 998           if (n == string_end || c != FOLD ((UCHAR) *n))
 999             return FNM_NOMATCH;
1000         }
1001
1002       no_leading_period = new_no_leading_period;
1003       ++n;
1004     }
1005
1006   if (n == string_end)
1007     return 0;
1008
1009   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1010     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
1011     return 0;
1012
1013   return FNM_NOMATCH;
1014 }
1015
1016
1017 static const CHAR *
1018 internal_function
1019 END (const CHAR *pattern)
1020 {
1021   const CHAR *p = pattern;
1022
1023   while (1)
1024     if (*++p == L('\0'))
1025       /* This is an invalid pattern.  */
1026       return pattern;
1027     else if (*p == L('['))
1028       {
1029         /* Handle brackets special.  */
1030         if (posixly_correct == 0)
1031           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1032
1033         /* Skip the not sign.  We have to recognize it because of a possibly
1034            following ']'.  */
1035         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1036           ++p;
1037         /* A leading ']' is recognized as such.  */
1038         if (*p == L(']'))
1039           ++p;
1040         /* Skip over all characters of the list.  */
1041         while (*p != L(']'))
1042           if (*p++ == L('\0'))
1043             /* This is no valid pattern.  */
1044             return pattern;
1045       }
1046     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1047               || *p == L('!')) && p[1] == L('('))
1048       p = END (p + 1);
1049     else if (*p == L(')'))
1050       break;
1051
1052   return p + 1;
1053 }
1054
1055
1056 static int
1057 internal_function
1058 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1059      int no_leading_period, int flags, size_t alloca_used)
1060 {
1061   const CHAR *startp;
1062   int level;
1063   struct patternlist
1064   {
1065     struct patternlist *next;
1066     CHAR malloced;
1067     CHAR str[0];
1068   } *list = NULL;
1069   struct patternlist **lastp = &list;
1070   size_t pattern_len = STRLEN (pattern);
1071   int any_malloced = 0;
1072   const CHAR *p;
1073   const CHAR *rs;
1074   int retval = 0;
1075
1076   /* Parse the pattern.  Store the individual parts in the list.  */
1077   level = 0;
1078   for (startp = p = pattern + 1; level >= 0; ++p)
1079     if (*p == L('\0'))
1080       {
1081         /* This is an invalid pattern.  */
1082         retval = -1;
1083         goto out;
1084       }
1085     else if (*p == L('['))
1086       {
1087         /* Handle brackets special.  */
1088         if (posixly_correct == 0)
1089           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1090
1091         /* Skip the not sign.  We have to recognize it because of a possibly
1092            following ']'.  */
1093         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1094           ++p;
1095         /* A leading ']' is recognized as such.  */
1096         if (*p == L(']'))
1097           ++p;
1098         /* Skip over all characters of the list.  */
1099         while (*p != L(']'))
1100           if (*p++ == L('\0'))
1101             {
1102               /* This is no valid pattern.  */
1103               retval = -1;
1104               goto out;
1105             }
1106       }
1107     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1108               || *p == L('!')) && p[1] == L('('))
1109       /* Remember the nesting level.  */
1110       ++level;
1111     else if (*p == L(')'))
1112       {
1113         if (level-- == 0)
1114           {
1115             /* This means we found the end of the pattern.  */
1116 #define NEW_PATTERN \
1117             struct patternlist *newp;                                         \
1118             size_t slen = (opt == L('?') || opt == L('@')                     \
1119                            ? pattern_len : (p - startp + 1));                 \
1120             slen = sizeof (struct patternlist) + (slen * sizeof (CHAR));      \
1121             int malloced = ! __libc_use_alloca (alloca_used + slen);          \
1122             if (__builtin_expect (malloced, 0))                               \
1123               {                                                               \
1124                 newp = malloc (slen);                                         \
1125                 if (newp == NULL)                                             \
1126                   {                                                           \
1127                     retval = -2;                                              \
1128                     goto out;                                                 \
1129                   }                                                           \
1130                 any_malloced = 1;                                             \
1131               }                                                               \
1132             else                                                              \
1133               newp = alloca_account (slen, alloca_used);                      \
1134             newp->next = NULL;                                                \
1135             newp->malloced = malloced;                                        \
1136             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1137             *lastp = newp;                                                    \
1138             lastp = &newp->next
1139             NEW_PATTERN;
1140           }
1141       }
1142     else if (*p == L('|'))
1143       {
1144         if (level == 0)
1145           {
1146             NEW_PATTERN;
1147             startp = p + 1;
1148           }
1149       }
1150   assert (list != NULL);
1151   assert (p[-1] == L(')'));
1152 #undef NEW_PATTERN
1153
1154   switch (opt)
1155     {
1156     case L('*'):
1157       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1158                alloca_used) == 0)
1159         goto success;
1160       /* FALLTHROUGH */
1161
1162     case L('+'):
1163       do
1164         {
1165           for (rs = string; rs <= string_end; ++rs)
1166             /* First match the prefix with the current pattern with the
1167                current pattern.  */
1168             if (FCT (list->str, string, rs, no_leading_period,
1169                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1170                      NULL, alloca_used) == 0
1171                 /* This was successful.  Now match the rest with the rest
1172                    of the pattern.  */
1173                 && (FCT (p, rs, string_end,
1174                          rs == string
1175                          ? no_leading_period
1176                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1177                          flags & FNM_FILE_NAME
1178                          ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1179                     /* This didn't work.  Try the whole pattern.  */
1180                     || (rs != string
1181                         && FCT (pattern - 1, rs, string_end,
1182                                 rs == string
1183                                 ? no_leading_period
1184                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1185                                    ? 1 : 0),
1186                                 flags & FNM_FILE_NAME
1187                                 ? flags : flags & ~FNM_PERIOD, NULL,
1188                                 alloca_used) == 0)))
1189               /* It worked.  Signal success.  */
1190               goto success;
1191         }
1192       while ((list = list->next) != NULL);
1193
1194       /* None of the patterns lead to a match.  */
1195       retval = FNM_NOMATCH;
1196       break;
1197
1198     case L('?'):
1199       if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1200                alloca_used) == 0)
1201         goto success;
1202       /* FALLTHROUGH */
1203
1204     case L('@'):
1205       do
1206         /* I cannot believe it but `strcat' is actually acceptable
1207            here.  Match the entire string with the prefix from the
1208            pattern list and the rest of the pattern following the
1209            pattern list.  */
1210         if (FCT (STRCAT (list->str, p), string, string_end,
1211                  no_leading_period,
1212                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1213                  NULL, alloca_used) == 0)
1214           /* It worked.  Signal success.  */
1215           goto success;
1216       while ((list = list->next) != NULL);
1217
1218       /* None of the patterns lead to a match.  */
1219       retval = FNM_NOMATCH;
1220       break;
1221
1222     case L('!'):
1223       for (rs = string; rs <= string_end; ++rs)
1224         {
1225           struct patternlist *runp;
1226
1227           for (runp = list; runp != NULL; runp = runp->next)
1228             if (FCT (runp->str, string, rs,  no_leading_period,
1229                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230                      NULL, alloca_used) == 0)
1231               break;
1232
1233           /* If none of the patterns matched see whether the rest does.  */
1234           if (runp == NULL
1235               && (FCT (p, rs, string_end,
1236                        rs == string
1237                        ? no_leading_period
1238                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1239                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1240                        NULL, alloca_used) == 0))
1241             /* This is successful.  */
1242             goto success;
1243         }
1244
1245       /* None of the patterns together with the rest of the pattern
1246          lead to a match.  */
1247       retval = FNM_NOMATCH;
1248       break;
1249
1250     default:
1251       assert (! "Invalid extended matching operator");
1252       retval = -1;
1253       break;
1254     }
1255
1256  success:
1257  out:
1258   if (any_malloced)
1259     while (list != NULL)
1260       {
1261         struct patternlist *old = list;
1262         list = list->next;
1263         if (old->malloced)
1264           free (old);
1265       }
1266
1267   return retval;
1268 }
1269
1270
1271 #undef FOLD
1272 #undef CHAR
1273 #undef UCHAR
1274 #undef INT
1275 #undef FCT
1276 #undef EXT
1277 #undef END
1278 #undef STRUCT
1279 #undef MEMPCPY
1280 #undef MEMCHR
1281 #undef STRCOLL
1282 #undef STRLEN
1283 #undef STRCAT
1284 #undef L
1285 #undef BTOWC