posix/fnmatch_loop.c

   1 /* Copyright (C) 1991-1993, 1996-2000, 2001 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3
   4    This library is free software; you can redistribute it and/or
   5    modify it under the terms of the GNU Library General Public License as
   6    published by the Free Software Foundation; either version 2 of the
   7    License, or (at your option) any later version.
   8
   9    This library is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12    Library General Public License for more details.
  13
  14    You should have received a copy of the GNU Library General Public
  15    License along with this library; see the file COPYING.LIB.  If not,
  16    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  17    Boston, MA 02111-1307, USA.  */
  18
  19 /* Match STRING against the filename pattern PATTERN, returning zero if
  20    it matches, nonzero if not.  */
  21 static int FCT (const CHAR *pattern, const CHAR *string,
  22                 const CHAR *string_end, int no_leading_period, int flags)
  23      internal_function;
  24 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
  25                 const CHAR *string_end, int no_leading_period, int flags)
  26      internal_function;
  27 static const CHAR *END (const CHAR *patternp) internal_function;
  28
  29 static int
  30 internal_function
  31 FCT (pattern, string, string_end, no_leading_period, flags)
  32      const CHAR *pattern;
  33      const CHAR *string;
  34      const CHAR *string_end;
  35      int no_leading_period;
  36      int flags;
  37 {
  38   register const CHAR *p = pattern, *n = string;
  39   register UCHAR c;
  40 #ifdef _LIBC
  41 # if WIDE_CHAR_VERSION
  42   const char *collseq = (const char *)
  43     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  44 # else
  45   const UCHAR *collseq = (const UCHAR *)
  46     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  47 # endif
  48 #endif
  49
  50   while ((c = *p++) != L('\0'))
  51     {
  52       int new_no_leading_period = 0;
  53       c = FOLD (c);
  54
  55       switch (c)
  56         {
  57         case L('?'):
  58           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  59             {
  60               int res;
  61
  62               res = EXT (c, p, n, string_end, no_leading_period,
  63                          flags);
  64               if (res != -1)
  65                 return res;
  66             }
  67
  68           if (n == string_end)
  69             return FNM_NOMATCH;
  70           else if (*n == L('/') && (flags & FNM_FILE_NAME))
  71             return FNM_NOMATCH;
  72           else if (*n == L('.') && no_leading_period)
  73             return FNM_NOMATCH;
  74           break;
  75
  76         case L('\\'):
  77           if (!(flags & FNM_NOESCAPE))
  78             {
  79               c = *p++;
  80               if (c == L('\0'))
  81                 /* Trailing \ loses.  */
  82                 return FNM_NOMATCH;
  83               c = FOLD (c);
  84             }
  85           if (n == string_end || FOLD ((UCHAR) *n) != c)
  86             return FNM_NOMATCH;
  87           break;
  88
  89         case L('*'):
  90           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  91             {
  92               int res;
  93
  94               res = EXT (c, p, n, string_end, no_leading_period,
  95                          flags);
  96               if (res != -1)
  97                 return res;
  98             }
  99
 100           if (n != string_end && *n == L('.') && no_leading_period)
 101             return FNM_NOMATCH;
 102
 103           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
 104             {
 105               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
 106                 {
 107                   const CHAR *endp = END (p);
 108                   if (endp != p)
 109                     {
 110                       /* This is a pattern.  Skip over it.  */
 111                       p = endp;
 112                       continue;
 113                     }
 114                 }
 115
 116               if (c == L('?'))
 117                 {
 118                   /* A ? needs to match one character.  */
 119                   if (n == string_end)
 120                     /* There isn't another character; no match.  */
 121                     return FNM_NOMATCH;
 122                   else if (*n == L('/')
 123                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
 124                     /* A slash does not match a wildcard under
 125                        FNM_FILE_NAME.  */
 126                     return FNM_NOMATCH;
 127                   else
 128                     /* One character of the string is consumed in matching
 129                        this ? wildcard, so *??? won't match if there are
 130                        less than three characters.  */
 131                     ++n;
 132                 }
 133             }
 134
 135           if (c == L('\0'))
 136             /* The wildcard(s) is/are the last element of the pattern.
 137                If the name is a file name and contains another slash
 138                this means it cannot match, unless the FNM_LEADING_DIR
 139                flag is set.  */
 140             {
 141               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
 142
 143               if (flags & FNM_FILE_NAME)
 144                 {
 145                   if (flags & FNM_LEADING_DIR)
 146                     result = 0;
 147                   else
 148                     {
 149                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
 150                         result = 0;
 151                     }
 152                 }
 153
 154               return result;
 155             }
 156           else
 157             {
 158               const CHAR *endp;
 159
 160               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
 161                              string_end - n);
 162               if (endp == NULL)
 163                 endp = string_end;
 164
 165               if (c == L('[')
 166                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
 167                       && (c == L('@') || c == L('+') || c == L('!'))
 168                       && *p == L('(')))
 169                 {
 170                   int flags2 = ((flags & FNM_FILE_NAME)
 171                                 ? flags : (flags & ~FNM_PERIOD));
 172                   int no_leading_period2 = no_leading_period;
 173
 174                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 175                     if (FCT (p, n, string_end, no_leading_period2, flags2)
 176                         == 0)
 177                       return 0;
 178                 }
 179               else if (c == L('/') && (flags & FNM_FILE_NAME))
 180                 {
 181                   while (n < string_end && *n != L('/'))
 182                     ++n;
 183                   if (n < string_end && *n == L('/')
 184                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
 185                           == 0))
 186                     return 0;
 187                 }
 188               else
 189                 {
 190                   int flags2 = ((flags & FNM_FILE_NAME)
 191                                 ? flags : (flags & ~FNM_PERIOD));
 192                   int no_leading_period2 = no_leading_period;
 193
 194                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
 195                     c = *p;
 196                   c = FOLD (c);
 197                   for (--p; n < endp; ++n, no_leading_period2 = 0)
 198                     if (FOLD ((UCHAR) *n) == c
 199                         && (FCT (p, n, string_end, no_leading_period2, flags2)
 200                             == 0))
 201                       return 0;
 202                 }
 203             }
 204
 205           /* If we come here no match is possible with the wildcard.  */
 206           return FNM_NOMATCH;
 207
 208         case L('['):
 209           {
 210             /* Nonzero if the sense of the character class is inverted.  */
 211             register int not;
 212             CHAR cold;
 213             UCHAR fn;
 214
 215             if (posixly_correct == 0)
 216               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 217
 218             if (n == string_end)
 219               return FNM_NOMATCH;
 220
 221             if (*n == L('.') && no_leading_period)
 222               return FNM_NOMATCH;
 223
 224             if (*n == L('/') && (flags & FNM_FILE_NAME))
 225               /* `/' cannot be matched.  */
 226               return FNM_NOMATCH;
 227
 228             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
 229             if (not)
 230               ++p;
 231
 232             fn = FOLD ((UCHAR) *n);
 233
 234             c = *p++;
 235             for (;;)
 236               {
 237                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 238                   {
 239                     if (*p == L('\0'))
 240                       return FNM_NOMATCH;
 241                     c = FOLD ((UCHAR) *p);
 242                     ++p;
 243
 244                     if (c == fn)
 245                       goto matched;
 246                   }
 247                 else if (c == L('[') && *p == L(':'))
 248                   {
 249                     /* Leave room for the null.  */
 250                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
 251                     size_t c1 = 0;
 252 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 253                     wctype_t wt;
 254 #endif
 255                     const CHAR *startp = p;
 256
 257                     for (;;)
 258                       {
 259                         if (c1 == CHAR_CLASS_MAX_LENGTH)
 260                           /* The name is too long and therefore the pattern
 261                              is ill-formed.  */
 262                           return FNM_NOMATCH;
 263
 264                         c = *++p;
 265                         if (c == L(':') && p[1] == L(']'))
 266                           {
 267                             p += 2;
 268                             break;
 269                           }
 270                         if (c < L('a') || c >= L('z'))
 271                           {
 272                             /* This cannot possibly be a character class name.
 273                                Match it as a normal range.  */
 274                             p = startp;
 275                             c = L('[');
 276                             goto normal_bracket;
 277                           }
 278                         str[c1++] = c;
 279                       }
 280                     str[c1] = L('\0');
 281
 282 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
 283                     wt = IS_CHAR_CLASS (str);
 284                     if (wt == 0)
 285                       /* Invalid character class name.  */
 286                       return FNM_NOMATCH;
 287
 288 # if defined _LIBC && ! WIDE_CHAR_VERSION
 289                     /* The following code is glibc specific but does
 290                        there a good job in speeding up the code since
 291                        we can avoid the btowc() call.  */
 292                     if (_ISCTYPE ((UCHAR) *n, wt))
 293                       goto matched;
 294 # else
 295                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
 296                       goto matched;
 297 # endif
 298 #else
 299                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
 300                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
 301                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
 302                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
 303                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
 304                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
 305                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
 306                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
 307                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
 308                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
 309                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
 310                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
 311                       goto matched;
 312 #endif
 313                     c = *p++;
 314                   }
 315 #ifdef _LIBC
 316                 else if (c == L('[') && *p == L('='))
 317                   {
 318                     UCHAR str[1];
 319                     uint32_t nrules =
 320                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 321                     const CHAR *startp = p;
 322
 323                     c = *++p;
 324                     if (c == L('\0'))
 325                       {
 326                         p = startp;
 327                         c = L('[');
 328                         goto normal_bracket;
 329                       }
 330                     str[0] = c;
 331
 332                     c = *++p;
 333                     if (c != L('=') || p[1] != L(']'))
 334                       {
 335                         p = startp;
 336                         c = L('[');
 337                         goto normal_bracket;
 338                       }
 339                     p += 2;
 340
 341                     if (nrules == 0)
 342                       {
 343                         if ((UCHAR) *n == str[0])
 344                           goto matched;
 345                       }
 346                     else
 347                       {
 348                         const int32_t *table;
 349 # if WIDE_CHAR_VERSION
 350                         const int32_t *weights;
 351                         const int32_t *extra;
 352 # else
 353                         const unsigned char *weights;
 354                         const unsigned char *extra;
 355 # endif
 356                         const int32_t *indirect;
 357                         int32_t idx;
 358                         const UCHAR *cp = (const UCHAR *) str;
 359
 360                         /* This #include defines a local function!  */
 361 # if WIDE_CHAR_VERSION
 362 #  include <locale/weightwc.h>
 363 # else
 364 #  include <locale/weight.h>
 365 # endif
 366
 367 # if WIDE_CHAR_VERSION
 368                         table = (const int32_t *)
 369                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
 370                         weights = (const int32_t *)
 371                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
 372                         extra = (const int32_t *)
 373                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
 374                         indirect = (const int32_t *)
 375                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
 376 # else
 377                         table = (const int32_t *)
 378                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
 379                         weights = (const unsigned char *)
 380                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
 381                         extra = (const unsigned char *)
 382                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 383                         indirect = (const int32_t *)
 384                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 385 # endif
 386
 387                         idx = findidx (&cp);
 388                         if (idx != 0)
 389                           {
 390                             /* We found a table entry.  Now see whether the
 391                                character we are currently at has the same
 392                                equivalance class value.  */
 393                             int len = weights[idx];
 394                             int32_t idx2;
 395                             const UCHAR *np = (const UCHAR *) n;
 396
 397                             idx2 = findidx (&np);
 398                             if (idx2 != 0 && len == weights[idx2])
 399                               {
 400                                 int cnt = 0;
 401
 402                                 while (cnt < len
 403                                        && (weights[idx + 1 + cnt]
 404                                            == weights[idx2 + 1 + cnt]))
 405                                   ++cnt;
 406
 407                                 if (cnt == len)
 408                                   goto matched;
 409                               }
 410                           }
 411                       }
 412
 413                     c = *p++;
 414                   }
 415 #endif
 416                 else if (c == L('\0'))
 417                   /* [ (unterminated) loses.  */
 418                   return FNM_NOMATCH;
 419                 else
 420                   {
 421                     int is_range = 0;
 422
 423 #ifdef _LIBC
 424                     int is_seqval = 0;
 425
 426                     if (c == L('[') && *p == L('.'))
 427                       {
 428                         uint32_t nrules =
 429                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
 430                         const CHAR *startp = p;
 431                         size_t c1 = 0;
 432
 433                         while (1)
 434                           {
 435                             c = *++p;
 436                             if (c == L('.') && p[1] == L(']'))
 437                               {
 438                                 p += 2;
 439                                 break;
 440                               }
 441                             if (c == '\0')
 442                               return FNM_NOMATCH;
 443                             ++c1;
 444                           }
 445
 446                         /* We have to handling the symbols differently in
 447                            ranges since then the collation sequence is
 448                            important.  */
 449                         is_range = *p == L('-') && p[1] != L('\0');
 450
 451                         if (nrules == 0)
 452                           {
 453                             /* There are no names defined in the collation
 454                                data.  Therefore we only accept the trivial
 455                                names consisting of the character itself.  */
 456                             if (c1 != 1)
 457                               return FNM_NOMATCH;
 458
 459                             if (!is_range && *n == startp[1])
 460                               goto matched;
 461
 462                             cold = startp[1];
 463                             c = *p++;
 464                           }
 465                         else
 466                           {
 467                             int32_t table_size;
 468                             const int32_t *symb_table;
 469 # ifdef WIDE_CHAR_VERSION
 470                             char str[c1];
 471                             unsigned int strcnt;
 472 # else
 473 #  define str (startp + 1)
 474 # endif
 475                             const unsigned char *extra;
 476                             int32_t idx;
 477                             int32_t elem;
 478                             int32_t second;
 479                             int32_t hash;
 480
 481 # ifdef WIDE_CHAR_VERSION
 482                             /* We have to convert the name to a single-byte
 483                                string.  This is possible since the names
 484                                consist of ASCII characters and the internal
 485                                representation is UCS4.  */
 486                             for (strcnt = 0; strcnt < c1; ++strcnt)
 487                               str[strcnt] = startp[1 + strcnt];
 488 #endif
 489
 490                             table_size =
 491                               _NL_CURRENT_WORD (LC_COLLATE,
 492                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
 493                             symb_table = (const int32_t *)
 494                               _NL_CURRENT (LC_COLLATE,
 495                                            _NL_COLLATE_SYMB_TABLEMB);
 496                             extra = (const unsigned char *)
 497                               _NL_CURRENT (LC_COLLATE,
 498                                            _NL_COLLATE_SYMB_EXTRAMB);
 499
 500                             /* Locate the character in the hashing table.  */
 501                             hash = elem_hash (str, c1);
 502
 503                             idx = 0;
 504                             elem = hash % table_size;
 505                             second = hash % (table_size - 2);
 506                             while (symb_table[2 * elem] != 0)
 507                               {
 508                                 /* First compare the hashing value.  */
 509                                 if (symb_table[2 * elem] == hash
 510                                     && c1 == extra[symb_table[2 * elem + 1]]
 511                                     && memcmp (str,
 512                                                &extra[symb_table[2 * elem + 1]
 513                                                      + 1], c1) == 0)
 514                                   {
 515                                     /* Yep, this is the entry.  */
 516                                     idx = symb_table[2 * elem + 1];
 517                                     idx += 1 + extra[idx];
 518                                     break;
 519                                   }
 520
 521                                 /* Next entry.  */
 522                                 elem += second;
 523                               }
 524
 525                             if (symb_table[2 * elem] != 0)
 526                               {
 527                                 /* Compare the byte sequence but only if
 528                                    this is not part of a range.  */
 529 # ifdef WIDE_CHAR_VERSION
 530                                 int32_t *wextra;
 531
 532                                 idx += 1 + extra[idx];
 533                                 /* Adjust for the alignment.  */
 534                                 idx = (idx + 3) & ~4;
 535
 536                                 wextra = (int32_t *) &extra[idx + 4];
 537 # endif
 538
 539                                 if (! is_range)
 540                                   {
 541 # ifdef WIDE_CHAR_VERSION
 542                                     for (c1 = 0; c1 < wextra[idx]; ++c1)
 543                                       if (n[c1] != wextra[1 + c1])
 544                                         break;
 545
 546                                     if (c1 == wextra[idx])
 547                                       goto matched;
 548 # else
 549                                     for (c1 = 0; c1 < extra[idx]; ++c1)
 550                                       if (n[c1] != extra[1 + c1])
 551                                         break;
 552
 553                                     if (c1 == extra[idx])
 554                                       goto matched;
 555 # endif
 556                                   }
 557
 558                                 /* Get the collation sequence value.  */
 559                                 is_seqval = 1;
 560 # ifdef WIDE_CHAR_VERSION
 561                                 cold = wextra[1 + wextra[idx]];
 562 # else
 563                                 /* Adjust for the alignment.  */
 564                                 idx += 1 + extra[idx];
 565                                 idx = (idx + 3) & ~4;
 566                                 cold = *((int32_t *) &extra[idx]);
 567 # endif
 568
 569                                 c = *p++;
 570                               }
 571                             else if (c1 == 1)
 572                               {
 573                                 /* No valid character.  Match it as a
 574                                    single byte.  */
 575                                 if (!is_range && *n == str[0])
 576                                   goto matched;
 577
 578                                 cold = str[0];
 579                                 c = *p++;
 580                               }
 581                             else
 582                               return FNM_NOMATCH;
 583                           }
 584                       }
 585                     else
 586 # undef str
 587 #endif
 588                       {
 589                         c = FOLD (c);
 590                       normal_bracket:
 591
 592                         /* We have to handling the symbols differently in
 593                            ranges since then the collation sequence is
 594                            important.  */
 595                         is_range = *p == L('-') && p[1] != L('\0');
 596
 597                         if (!is_range && c == fn)
 598                           goto matched;
 599
 600                         cold = c;
 601                         c = *p++;
 602                       }
 603
 604                     if (c == L('-') && *p != L(']'))
 605                       {
 606 #if _LIBC
 607                         /* We have to find the collation sequence
 608                            value for C.  Collation sequence is nothing
 609                            we can regularly access.  The sequence
 610                            value is defined by the order in which the
 611                            definitions of the collation values for the
 612                            various characters appear in the source
 613                            file.  A strange concept, nowhere
 614                            documented.  */
 615                         uint32_t fcollseq;
 616                         uint32_t lcollseq;
 617                         UCHAR cend = *p++;
 618
 619 # ifdef WIDE_CHAR_VERSION
 620                         /* Search in the `names' array for the characters.  */
 621                         fcollseq = collseq_table_lookup (collseq, fn);
 622                         if (fcollseq == ~((uint32_t) 0))
 623                           /* XXX We don't know anything about the character
 624                              we are supposed to match.  This means we are
 625                              failing.  */
 626                           goto range_not_matched;
 627
 628                         if (is_seqval)
 629                           lcollseq = cold;
 630                         else
 631                           lcollseq = collseq_table_lookup (collseq, cold);
 632 # else
 633                         fcollseq = collseq[fn];
 634                         lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
 635 # endif
 636
 637                         is_seqval = 0;
 638                         if (cend == L('[') && *p == L('.'))
 639                           {
 640                             uint32_t nrules =
 641                               _NL_CURRENT_WORD (LC_COLLATE,
 642                                                 _NL_COLLATE_NRULES);
 643                             const CHAR *startp = p;
 644                             size_t c1 = 0;
 645
 646                             while (1)
 647                               {
 648                                 c = *++p;
 649                                 if (c == L('.') && p[1] == L(']'))
 650                                   {
 651                                     p += 2;
 652                                     break;
 653                                   }
 654                                 if (c == '\0')
 655                                   return FNM_NOMATCH;
 656                                 ++c1;
 657                               }
 658
 659                             if (nrules == 0)
 660                               {
 661                                 /* There are no names defined in the
 662                                    collation data.  Therefore we only
 663                                    accept the trivial names consisting
 664                                    of the character itself.  */
 665                                 if (c1 != 1)
 666                                   return FNM_NOMATCH;
 667
 668                                 cend = startp[1];
 669                               }
 670                             else
 671                               {
 672                                 int32_t table_size;
 673                                 const int32_t *symb_table;
 674 # ifdef WIDE_CHAR_VERSION
 675                                 char str[c1];
 676                                 unsigned int strcnt;
 677 # else
 678 #  define str (startp + 1)
 679 # endif
 680                                 const unsigned char *extra;
 681                                 int32_t idx;
 682                                 int32_t elem;
 683                                 int32_t second;
 684                                 int32_t hash;
 685
 686 # ifdef WIDE_CHAR_VERSION
 687                                 /* We have to convert the name to a single-byte
 688                                    string.  This is possible since the names
 689                                    consist of ASCII characters and the internal
 690                                    representation is UCS4.  */
 691                                 for (strcnt = 0; strcnt < c1; ++strcnt)
 692                                   str[strcnt] = startp[1 + strcnt];
 693 # endif
 694
 695                                 table_size =
 696                                   _NL_CURRENT_WORD (LC_COLLATE,
 697                                                     _NL_COLLATE_SYMB_HASH_SIZEMB);
 698                                 symb_table = (const int32_t *)
 699                                   _NL_CURRENT (LC_COLLATE,
 700                                                _NL_COLLATE_SYMB_TABLEMB);
 701                                 extra = (const unsigned char *)
 702                                   _NL_CURRENT (LC_COLLATE,
 703                                                _NL_COLLATE_SYMB_EXTRAMB);
 704
 705                                 /* Locate the character in the hashing
 706                                    table.  */
 707                                 hash = elem_hash (str, c1);
 708
 709                                 idx = 0;
 710                                 elem = hash % table_size;
 711                                 second = hash % (table_size - 2);
 712                                 while (symb_table[2 * elem] != 0)
 713                                   {
 714                                 /* First compare the hashing value.  */
 715                                     if (symb_table[2 * elem] == hash
 716                                         && (c1
 717                                             == extra[symb_table[2 * elem + 1]])
 718                                         && memcmp (str,
 719                                                    &extra[symb_table[2 * elem + 1]
 720                                                          + 1], c1) == 0)
 721                                       {
 722                                         /* Yep, this is the entry.  */
 723                                         idx = symb_table[2 * elem + 1];
 724                                         idx += 1 + extra[idx];
 725                                         break;
 726                                       }
 727
 728                                     /* Next entry.  */
 729                                     elem += second;
 730                                   }
 731
 732                                 if (symb_table[2 * elem] != 0)
 733                                   {
 734                                     /* Compare the byte sequence but only if
 735                                        this is not part of a range.  */
 736 # ifdef WIDE_CHAR_VERSION
 737                                     int32_t *wextra;
 738
 739                                     idx += 1 + extra[idx];
 740                                     /* Adjust for the alignment.  */
 741                                     idx = (idx + 3) & ~4;
 742
 743                                     wextra = (int32_t *) &extra[idx + 4];
 744 # endif
 745                                     /* Get the collation sequence value.  */
 746                                     is_seqval = 1;
 747 # ifdef WIDE_CHAR_VERSION
 748                                     cend = wextra[1 + wextra[idx]];
 749 # else
 750                                     /* Adjust for the alignment.  */
 751                                     idx += 1 + extra[idx];
 752                                     idx = (idx + 3) & ~4;
 753                                     cend = *((int32_t *) &extra[idx]);
 754 # endif
 755                                   }
 756                                 else if (symb_table[2 * elem] != 0 && c1 == 1)
 757                                   {
 758                                     cend = str[0];
 759                                     c = *p++;
 760                                   }
 761                                 else
 762                                   return FNM_NOMATCH;
 763                               }
 764 # undef str
 765                           }
 766                         else
 767                           {
 768                             if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 769                               cend = *p++;
 770                             if (cend == L('\0'))
 771                               return FNM_NOMATCH;
 772                             cend = FOLD (cend);
 773                           }
 774
 775                         /* XXX It is not entirely clear to me how to handle
 776                            characters which are not mentioned in the
 777                            collation specification.  */
 778                         if (
 779 # ifdef WIDE_CHAR_VERSION
 780                             lcollseq == 0xffffffff ||
 781 # endif
 782                             lcollseq <= fcollseq)
 783                           {
 784                             /* We have to look at the upper bound.  */
 785                             uint32_t hcollseq;
 786
 787                             if (is_seqval)
 788                               hcollseq = cend;
 789                             else
 790                               {
 791 # ifdef WIDE_CHAR_VERSION
 792                                 hcollseq =
 793                                   collseq_table_lookup (collseq, cend);
 794                                 if (hcollseq == ~((uint32_t) 0))
 795                                   {
 796                                     /* Hum, no information about the upper
 797                                        bound.  The matching succeeds if the
 798                                        lower bound is matched exactly.  */
 799                                     if (lcollseq != fcollseq)
 800                                       goto range_not_matched;
 801
 802                                     goto matched;
 803                                   }
 804 # else
 805                                 hcollseq = collseq[cend];
 806 # endif
 807                               }
 808
 809                             if (lcollseq <= hcollseq && fcollseq <= hcollseq)
 810                               goto matched;
 811                           }
 812 # ifdef WIDE_CHAR_VERSION
 813                       range_not_matched:
 814 # endif
 815 #else
 816                         /* We use a boring value comparison of the character
 817                            values.  This is better than comparing using
 818                            `strcoll' since the latter would have surprising
 819                            and sometimes fatal consequences.  */
 820                         UCHAR cend = *p++;
 821
 822                         if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
 823                           cend = *p++;
 824                         if (cend == L('\0'))
 825                           return FNM_NOMATCH;
 826
 827                         /* It is a range.  */
 828                         if (cold <= fn && fn <= cend)
 829                           goto matched;
 830 #endif
 831
 832                         c = *p++;
 833                       }
 834                   }
 835
 836                 if (c == L(']'))
 837                   break;
 838               }
 839
 840             if (!not)
 841               return FNM_NOMATCH;
 842             break;
 843
 844           matched:
 845             /* Skip the rest of the [...] that already matched.  */
 846             do
 847               {
 848               ignore_next:
 849                 c = *p++;
 850
 851                 if (c == L('\0'))
 852                   /* [... (unterminated) loses.  */
 853                   return FNM_NOMATCH;
 854
 855                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
 856                   {
 857                     if (*p == L('\0'))
 858                       return FNM_NOMATCH;
 859                     /* XXX 1003.2d11 is unclear if this is right.  */
 860                     ++p;
 861                   }
 862                 else if (c == L('[') && *p == L(':'))
 863                   {
 864                     int c1 = 0;
 865                     const CHAR *startp = p;
 866
 867                     while (1)
 868                       {
 869                         c = *++p;
 870                         if (++c1 == CHAR_CLASS_MAX_LENGTH)
 871                           return FNM_NOMATCH;
 872
 873                         if (*p == L(':') && p[1] == L(']'))
 874                           break;
 875
 876                         if (c < L('a') || c >= L('z'))
 877                           {
 878                             p = startp;
 879                             goto ignore_next;
 880                           }
 881                       }
 882                     p += 2;
 883                     c = *p++;
 884                   }
 885                 else if (c == L('[') && *p == L('='))
 886                   {
 887                     c = *++p;
 888                     if (c == L('\0'))
 889                       return FNM_NOMATCH;
 890                     c = *++p;
 891                     if (c != L('=') || p[1] != L(']'))
 892                       return FNM_NOMATCH;
 893                     p += 2;
 894                     c = *p++;
 895                   }
 896                 else if (c == L('[') && *p == L('.'))
 897                   {
 898                     ++p;
 899                     while (1)
 900                       {
 901                         c = *++p;
 902                         if (c == '\0')
 903                           return FNM_NOMATCH;
 904
 905                         if (*p == L('.') && p[1] == L(']'))
 906                           break;
 907                       }
 908                     p += 2;
 909                     c = *p++;
 910                   }
 911               }
 912             while (c != L(']'));
 913             if (not)
 914               return FNM_NOMATCH;
 915           }
 916           break;
 917
 918         case L('+'):
 919         case L('@'):
 920         case L('!'):
 921           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
 922             {
 923               int res;
 924
 925               res = EXT (c, p, n, string_end, no_leading_period, flags);
 926               if (res != -1)
 927                 return res;
 928             }
 929           goto normal_match;
 930
 931         case L('/'):
 932           if (NO_LEADING_PERIOD (flags))
 933             {
 934               if (n == string_end || c != *n)
 935                 return FNM_NOMATCH;
 936
 937               new_no_leading_period = 1;
 938               break;
 939             }
 940           /* FALLTHROUGH */
 941         default:
 942         normal_match:
 943           if (n == string_end || c != FOLD ((UCHAR) *n))
 944             return FNM_NOMATCH;
 945         }
 946
 947       no_leading_period = new_no_leading_period;
 948       ++n;
 949     }
 950
 951   if (n == string_end)
 952     return 0;
 953
 954   if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
 955     /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz".  */
 956     return 0;
 957
 958   return FNM_NOMATCH;
 959 }
 960
 961
 962 static const CHAR *
 963 internal_function
 964 END (const CHAR *pattern)
 965 {
 966   const CHAR *p = pattern;
 967
 968   while (1)
 969     if (*++p == L('\0'))
 970       /* This is an invalid pattern.  */
 971       return pattern;
 972     else if (*p == L('['))
 973       {
 974         /* Handle brackets special.  */
 975         if (posixly_correct == 0)
 976           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
 977
 978         /* Skip the not sign.  We have to recognize it because of a possibly
 979            following ']'.  */
 980         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
 981           ++p;
 982         /* A leading ']' is recognized as such.  */
 983         if (*p == L(']'))
 984           ++p;
 985         /* Skip over all characters of the list.  */
 986         while (*p != L(']'))
 987           if (*p++ == L('\0'))
 988             /* This is no valid pattern.  */
 989             return pattern;
 990       }
 991     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
 992               || *p == L('!')) && p[1] == L('('))
 993       p = END (p + 1);
 994     else if (*p == L(')'))
 995       break;
 996
 997   return p + 1;
 998 }
 999
1000
1001 static int
1002 internal_function
1003 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1004      int no_leading_period, int flags)
1005 {
1006   const CHAR *startp;
1007   int level;
1008   struct patternlist
1009   {
1010     struct patternlist *next;
1011     CHAR str[0];
1012   } *list = NULL;
1013   struct patternlist **lastp = &list;
1014   size_t pattern_len = STRLEN (pattern);
1015   const CHAR *p;
1016   const CHAR *rs;
1017
1018   /* Parse the pattern.  Store the individual parts in the list.  */
1019   level = 0;
1020   for (startp = p = pattern + 1; level >= 0; ++p)
1021     if (*p == L('\0'))
1022       /* This is an invalid pattern.  */
1023       return -1;
1024     else if (*p == L('['))
1025       {
1026         /* Handle brackets special.  */
1027         if (posixly_correct == 0)
1028           posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1029
1030         /* Skip the not sign.  We have to recognize it because of a possibly
1031            following ']'.  */
1032         if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1033           ++p;
1034         /* A leading ']' is recognized as such.  */
1035         if (*p == L(']'))
1036           ++p;
1037         /* Skip over all characters of the list.  */
1038         while (*p != L(']'))
1039           if (*p++ == L('\0'))
1040             /* This is no valid pattern.  */
1041             return -1;
1042       }
1043     else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1044               || *p == L('!')) && p[1] == L('('))
1045       /* Remember the nesting level.  */
1046       ++level;
1047     else if (*p == L(')'))
1048       {
1049         if (level-- == 0)
1050           {
1051             /* This means we found the end of the pattern.  */
1052 #define NEW_PATTERN \
1053             struct patternlist *newp;                                         \
1054                                                                               \
1055             if (opt == L('?') || opt == L('@'))                               \
1056               newp = alloca (sizeof (struct patternlist)                      \
1057                              + (pattern_len * sizeof (CHAR)));                \
1058             else                                                              \
1059               newp = alloca (sizeof (struct patternlist)                      \
1060                              + ((p - startp + 1) * sizeof (CHAR)));           \
1061             *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0');    \
1062             newp->next = NULL;                                                \
1063             *lastp = newp;                                                    \
1064             lastp = &newp->next
1065             NEW_PATTERN;
1066           }
1067       }
1068     else if (*p == L('|'))
1069       {
1070         if (level == 0)
1071           {
1072             NEW_PATTERN;
1073             startp = p + 1;
1074           }
1075       }
1076   assert (list != NULL);
1077   assert (p[-1] == L(')'));
1078
1079   switch (opt)
1080     {
1081     case L('*'):
1082       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1083         return 0;
1084       /* FALLTHROUGH */
1085
1086     case L('+'):
1087       do
1088         {
1089           for (rs = string; rs <= string_end; ++rs)
1090             /* First match the prefix with the current pattern with the
1091                current pattern.  */
1092             if (FCT (list->str, string, rs, no_leading_period,
1093                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1094                 /* This was successful.  Now match the rest with the rest
1095                    of the pattern.  */
1096                 && (FCT (p, rs, string_end,
1097                          rs == string
1098                          ? no_leading_period
1099                          : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1100                          flags & FNM_FILE_NAME
1101                          ? flags : flags & ~FNM_PERIOD) == 0
1102                     /* This didn't work.  Try the whole pattern.  */
1103                     || (rs != string
1104                         && FCT (pattern - 1, rs, string_end,
1105                                 rs == string
1106                                 ? no_leading_period
1107                                 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1108                                    ? 1 : 0),
1109                                 flags & FNM_FILE_NAME
1110                                 ? flags : flags & ~FNM_PERIOD) == 0)))
1111               /* It worked.  Signal success.  */
1112               return 0;
1113         }
1114       while ((list = list->next) != NULL);
1115
1116       /* None of the patterns lead to a match.  */
1117       return FNM_NOMATCH;
1118
1119     case L('?'):
1120       if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1121         return 0;
1122       /* FALLTHROUGH */
1123
1124     case L('@'):
1125       do
1126         /* I cannot believe it but `strcat' is actually acceptable
1127            here.  Match the entire string with the prefix from the
1128            pattern list and the rest of the pattern following the
1129            pattern list.  */
1130         if (FCT (STRCAT (list->str, p), string, string_end,
1131                  no_leading_period,
1132                  flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1133           /* It worked.  Signal success.  */
1134           return 0;
1135       while ((list = list->next) != NULL);
1136
1137       /* None of the patterns lead to a match.  */
1138       return FNM_NOMATCH;
1139
1140     case L('!'):
1141       for (rs = string; rs <= string_end; ++rs)
1142         {
1143           struct patternlist *runp;
1144
1145           for (runp = list; runp != NULL; runp = runp->next)
1146             if (FCT (runp->str, string, rs,  no_leading_period,
1147                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1148               break;
1149
1150           /* If none of the patterns matched see whether the rest does.  */
1151           if (runp == NULL
1152               && (FCT (p, rs, string_end,
1153                        rs == string
1154                        ? no_leading_period
1155                        : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1156                        flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1157                   == 0))
1158             /* This is successful.  */
1159             return 0;
1160         }
1161
1162       /* None of the patterns together with the rest of the pattern
1163          lead to a match.  */
1164       return FNM_NOMATCH;
1165
1166     default:
1167       assert (! "Invalid extended matching operator");
1168       break;
1169     }
1170
1171   return -1;
1172 }
1173
1174
1175 #undef FOLD
1176 #undef CHAR
1177 #undef UCHAR
1178 #undef INT
1179 #undef FCT
1180 #undef EXT
1181 #undef END
1182 #undef MEMPCPY
1183 #undef MEMCHR
1184 #undef STRCOLL
1185 #undef STRLEN
1186 #undef STRCAT
1187 #undef L
1188 #undef BTOWC