(linenum→info "unix/slp.c:2238")

glibc/2.7/posix/fnmatch_loop.c

    1: /* Copyright (C) 1991-1993,1996-2001,2003-2005,2007
    2:    Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4: 
    5:    The GNU C Library is free software; you can redistribute it and/or
    6:    modify it under the terms of the GNU Lesser General Public
    7:    License as published by the Free Software Foundation; either
    8:    version 2.1 of the License, or (at your option) any later version.
    9: 
   10:    The GNU C Library is distributed in the hope that it will be useful,
   11:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13:    Lesser General Public License for more details.
   14: 
   15:    You should have received a copy of the GNU Lesser General Public
   16:    License along with the GNU C Library; if not, write to the Free
   17:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   18:    02111-1307 USA.  */
   19: 
   20: struct STRUCT
   21: {
   22:   const CHAR *pattern;
   23:   const CHAR *string;
   24:   int no_leading_period;
   25: };
   26: 
   27: /* Match STRING against the filename pattern PATTERN, returning zero if
   28:    it matches, nonzero if not.  */
   29: static int FCT (const CHAR *pattern, const CHAR *string,
   30:                 const CHAR *string_end, int no_leading_period, int flags,
   31:                 struct STRUCT *ends)
   32:      internal_function;
   33: static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
   34:                 const CHAR *string_end, int no_leading_period, int flags)
   35:      internal_function;
   36: static const CHAR *END (const CHAR *patternp) internal_function;
   37: 
   38: static int
   39: internal_function
   40: FCT (pattern, string, string_end, no_leading_period, flags, ends)
   41:      const CHAR *pattern;
   42:      const CHAR *string;
   43:      const CHAR *string_end;
   44:      int no_leading_period;
   45:      int flags;
   46:      struct STRUCT *ends;
   47: {
   48:   register const CHAR *p = pattern, *n = string;
   49:   register UCHAR c;
   50: #ifdef _LIBC
   51: # if WIDE_CHAR_VERSION
   52:   const char *collseq = (const char *)
   53:     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   54: # else
   55:   const UCHAR *collseq = (const UCHAR *)
   56:     _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
   57: # endif
   58: #endif
   59: 
   60:   while ((c = *p++) != L('\0'))
   61:     {
   62:       int new_no_leading_period = 0;
   63:       c = FOLD (c);
   64: 
   65:       switch (c)
   66:         {
   67:         case L('?'):
   68:           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
   69:             {
   70:               int res;
   71: 
   72:               res = EXT (c, p, n, string_end, no_leading_period,
   73:                          flags);
   74:               if (res != -1)
   75:                 return res;
   76:             }
   77: 
   78:           if (n == string_end)
   79:             return FNM_NOMATCH;
   80:           else if (*n == L('/') && (flags & FNM_FILE_NAME))
   81:             return FNM_NOMATCH;
   82:           else if (*n == L('.') && no_leading_period)
   83:             return FNM_NOMATCH;
   84:           break;
   85: 
   86:         case L('\\'):
   87:           if (!(flags & FNM_NOESCAPE))
   88:             {
   89:               c = *p++;
   90:               if (c == L('\0'))
   91:                 /* Trailing \ loses.  */
   92:                 return FNM_NOMATCH;
   93:               c = FOLD (c);
   94:             }
   95:           if (n == string_end || FOLD ((UCHAR) *n) != c)
   96:             return FNM_NOMATCH;
   97:           break;
   98: 
   99:         case L('*'):
  100:           if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
  101:             {
  102:               int res;
  103: 
  104:               res = EXT (c, p, n, string_end, no_leading_period,
  105:                          flags);
  106:               if (res != -1)
  107:                 return res;
  108:             }
  109:           else if (ends != NULL)
  110:             {
  111:               ends->pattern = p - 1;
  112:               ends->string = n;
  113:               ends->no_leading_period = no_leading_period;
  114:               return 0;
  115:             }
  116: 
  117:           if (n != string_end && *n == L('.') && no_leading_period)
  118:             return FNM_NOMATCH;
  119: 
  120:           for (c = *p++; c == L('?') || c == L('*'); c = *p++)
  121:             {
  122:               if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
  123:                 {
  124:                   const CHAR *endp = END (p);
  125:                   if (endp != p)
  126:                     {
  127:                       /* This is a pattern.  Skip over it.  */
  128:                       p = endp;
  129:                       continue;
  130:                     }
  131:                 }
  132: 
  133:               if (c == L('?'))
  134:                 {
  135:                   /* A ? needs to match one character.  */
  136:                   if (n == string_end)
  137:                     /* There isn't another character; no match.  */
  138:                     return FNM_NOMATCH;
  139:                   else if (*n == L('/')
  140:                            && __builtin_expect (flags & FNM_FILE_NAME, 0))
  141:                     /* A slash does not match a wildcard under
  142:                        FNM_FILE_NAME.  */
  143:                     return FNM_NOMATCH;
  144:                   else
  145:                     /* One character of the string is consumed in matching
  146:                        this ? wildcard, so *??? won't match if there are
  147:                        less than three characters.  */
  148:                     ++n;
  149:                 }
  150:             }
  151: 
  152:           if (c == L('\0'))
  153:             /* The wildcard(s) is/are the last element of the pattern.
  154:                If the name is a file name and contains another slash
  155:                this means it cannot match, unless the FNM_LEADING_DIR
  156:                flag is set.  */
  157:             {
  158:               int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
  159: 
  160:               if (flags & FNM_FILE_NAME)
  161:                 {
  162:                   if (flags & FNM_LEADING_DIR)
  163:                     result = 0;
  164:                   else
  165:                     {
  166:                       if (MEMCHR (n, L('/'), string_end - n) == NULL)
  167:                         result = 0;
  168:                     }
  169:                 }
  170: 
  171:               return result;
  172:             }
  173:           else
  174:             {
  175:               const CHAR *endp;
  176:               struct STRUCT end;
  177: 
  178:               end.pattern = NULL;
  179:               endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
  180:                              string_end - n);
  181:               if (endp == NULL)
  182:                 endp = string_end;
  183: 
  184:               if (c == L('[')
  185:                   || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
  186:                       && (c == L('@') || c == L('+') || c == L('!'))
  187:                       && *p == L('(')))
  188:                 {
  189:                   int flags2 = ((flags & FNM_FILE_NAME)
  190:                                 ? flags : (flags & ~FNM_PERIOD));
  191: 
  192:                   for (--p; n < endp; ++n, no_leading_period = 0)
  193:                     if (FCT (p, n, string_end, no_leading_period, flags2,
  194:                              &end) == 0)
  195:                       goto found;
  196:                 }
  197:               else if (c == L('/') && (flags & FNM_FILE_NAME))
  198:                 {
  199:                   while (n < string_end && *n != L('/'))
  200:                     ++n;
  201:                   if (n < string_end && *n == L('/')
  202:                       && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
  203:                                NULL) == 0))
  204:                     return 0;
  205:                 }
  206:               else
  207:                 {
  208:                   int flags2 = ((flags & FNM_FILE_NAME)
  209:                                 ? flags : (flags & ~FNM_PERIOD));
  210: 
  211:                   if (c == L('\\') && !(flags & FNM_NOESCAPE))
  212:                     c = *p;
  213:                   c = FOLD (c);
  214:                   for (--p; n < endp; ++n, no_leading_period = 0)
  215:                     if (FOLD ((UCHAR) *n) == c
  216:                         && (FCT (p, n, string_end, no_leading_period, flags2,
  217:                                  &end) == 0))
  218:                       {
  219:                       found:
  220:                         if (end.pattern == NULL)
  221:                           return 0;
  222:                         break;
  223:                       }
  224:                   if (end.pattern != NULL)
  225:                     {
  226:                       p = end.pattern;
  227:                       n = end.string;
  228:                       no_leading_period = end.no_leading_period;
  229:                       continue;
  230:                     }
  231:                 }
  232:             }
  233: 
  234:           /* If we come here no match is possible with the wildcard.  */
  235:           return FNM_NOMATCH;
  236: 
  237:         case L('['):
  238:           {
  239:             /* Nonzero if the sense of the character class is inverted.  */
  240:             register int not;
  241:             CHAR cold;
  242:             UCHAR fn;
  243: 
  244:             if (posixly_correct == 0)
  245:               posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
  246: 
  247:             if (n == string_end)
  248:               return FNM_NOMATCH;
  249: 
  250:             if (*n == L('.') && no_leading_period)
  251:               return FNM_NOMATCH;
  252: 
  253:             if (*n == L('/') && (flags & FNM_FILE_NAME))
  254:               /* `/' cannot be matched.  */
  255:               return FNM_NOMATCH;
  256: 
  257:             not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
  258:             if (not)
  259:               ++p;
  260: 
  261:             fn = FOLD ((UCHAR) *n);
  262: 
  263:             c = *p++;
  264:             for (;;)
  265:               {
  266:                 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
  267:                   {
  268:                     if (*p == L('\0'))
  269:                       return FNM_NOMATCH;
  270:                     c = FOLD ((UCHAR) *p);
  271:                     ++p;
  272: 
  273:                     goto normal_bracket;
  274:                   }
  275:                 else if (c == L('[') && *p == L(':'))
  276:                   {
  277:                     /* Leave room for the null.  */
  278:                     CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
  279:                     size_t c1 = 0;
  280: #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
  281:                     wctype_t wt;
  282: #endif
  283:                     const CHAR *startp = p;
  284: 
  285:                     for (;;)
  286:                       {
  287:                         if (c1 == CHAR_CLASS_MAX_LENGTH)
  288:                           /* The name is too long and therefore the pattern
  289:                              is ill-formed.  */
  290:                           return FNM_NOMATCH;
  291: 
  292:                         c = *++p;
  293:                         if (c == L(':') && p[1] == L(']'))
  294:                           {
  295:                             p += 2;
  296:                             break;
  297:                           }
  298:                         if (c < L('a') || c >= L('z'))
  299:                           {
  300:                             /* This cannot possibly be a character class name.
  301:                                Match it as a normal range.  */
  302:                             p = startp;
  303:                             c = L('[');
  304:                             goto normal_bracket;
  305:                           }
  306:                         str[c1++] = c;
  307:                       }
  308:                     str[c1] = L('\0');
  309: 
  310: #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
  311:                     wt = IS_CHAR_CLASS (str);
  312:                     if (wt == 0)
  313:                       /* Invalid character class name.  */
  314:                       return FNM_NOMATCH;
  315: 
  316: # if defined _LIBC && ! WIDE_CHAR_VERSION
  317:                     /* The following code is glibc specific but does
  318:                        there a good job in speeding up the code since
  319:                        we can avoid the btowc() call.  */
  320:                     if (_ISCTYPE ((UCHAR) *n, wt))
  321:                       goto matched;
  322: # else
  323:                     if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
  324:                       goto matched;
  325: # endif
  326: #else
  327:                     if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
  328:                         || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
  329:                         || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
  330:                         || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
  331:                         || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
  332:                         || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
  333:                         || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
  334:                         || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
  335:                         || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
  336:                         || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
  337:                         || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
  338:                         || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
  339:                       goto matched;
  340: #endif
  341:                     c = *p++;
  342:                   }
  343: #ifdef _LIBC
  344:                 else if (c == L('[') && *p == L('='))
  345:                   {
  346:                     UCHAR str[1];
  347:                     uint32_t nrules =
  348:                       _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  349:                     const CHAR *startp = p;
  350: 
  351:                     c = *++p;
  352:                     if (c == L('\0'))
  353:                       {
  354:                         p = startp;
  355:                         c = L('[');
  356:                         goto normal_bracket;
  357:                       }
  358:                     str[0] = c;
  359: 
  360:                     c = *++p;
  361:                     if (c != L('=') || p[1] != L(']'))
  362:                       {
  363:                         p = startp;
  364:                         c = L('[');
  365:                         goto normal_bracket;
  366:                       }
  367:                     p += 2;
  368: 
  369:                     if (nrules == 0)
  370:                       {
  371:                         if ((UCHAR) *n == str[0])
  372:                           goto matched;
  373:                       }
  374:                     else
  375:                       {
  376:                         const int32_t *table;
  377: # if WIDE_CHAR_VERSION
  378:                         const int32_t *weights;
  379:                         const int32_t *extra;
  380: # else
  381:                         const unsigned char *weights;
  382:                         const unsigned char *extra;
  383: # endif
  384:                         const int32_t *indirect;
  385:                         int32_t idx;
  386:                         const UCHAR *cp = (const UCHAR *) str;
  387: 
  388:                         /* This #include defines a local function!  */
  389: # if WIDE_CHAR_VERSION
  390: #  include <locale/weightwc.h>
  391: # else
  392: #  include <locale/weight.h>
  393: # endif
  394: 
  395: # if WIDE_CHAR_VERSION
  396:                         table = (const int32_t *)
  397:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
  398:                         weights = (const int32_t *)
  399:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
  400:                         extra = (const int32_t *)
  401:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
  402:                         indirect = (const int32_t *)
  403:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
  404: # else
  405:                         table = (const int32_t *)
  406:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
  407:                         weights = (const unsigned char *)
  408:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
  409:                         extra = (const unsigned char *)
  410:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
  411:                         indirect = (const int32_t *)
  412:                           _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
  413: # endif
  414: 
  415:                         idx = findidx (&cp);
  416:                         if (idx != 0)
  417:                           {
  418:                             /* We found a table entry.  Now see whether the
  419:                                character we are currently at has the same
  420:                                equivalance class value.  */
  421:                             int len = weights[idx & 0xffffff];
  422:                             int32_t idx2;
  423:                             const UCHAR *np = (const UCHAR *) n;
  424: 
  425:                             idx2 = findidx (&np);
  426:                             if (idx2 != 0
  427:                                 && (idx >> 24) == (idx2 >> 24)
  428:                                 && len == weights[idx2 & 0xffffff])
  429:                               {
  430:                                 int cnt = 0;
  431: 
  432:                                 idx &= 0xffffff;
  433:                                 idx2 &= 0xffffff;
  434: 
  435:                                 while (cnt < len
  436:                                        && (weights[idx + 1 + cnt]
  437:                                            == weights[idx2 + 1 + cnt]))
  438:                                   ++cnt;
  439: 
  440:                                 if (cnt == len)
  441:                                   goto matched;
  442:                               }
  443:                           }
  444:                       }
  445: 
  446:                     c = *p++;
  447:                   }
  448: #endif
  449:                 else if (c == L('\0'))
  450:                   /* [ (unterminated) loses.  */
  451:                   return FNM_NOMATCH;
  452:                 else
  453:                   {
  454:                     int is_range = 0;
  455: 
  456: #ifdef _LIBC
  457:                     int is_seqval = 0;
  458: 
  459:                     if (c == L('[') && *p == L('.'))
  460:                       {
  461:                         uint32_t nrules =
  462:                           _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  463:                         const CHAR *startp = p;
  464:                         size_t c1 = 0;
  465: 
  466:                         while (1)
  467:                           {
  468:                             c = *++p;
  469:                             if (c == L('.') && p[1] == L(']'))
  470:                               {
  471:                                 p += 2;
  472:                                 break;
  473:                               }
  474:                             if (c == '\0')
  475:                               return FNM_NOMATCH;
  476:                             ++c1;
  477:                           }
  478: 
  479:                         /* We have to handling the symbols differently in
  480:                            ranges since then the collation sequence is
  481:                            important.  */
  482:                         is_range = *p == L('-') && p[1] != L('\0');
  483: 
  484:                         if (nrules == 0)
  485:                           {
  486:                             /* There are no names defined in the collation
  487:                                data.  Therefore we only accept the trivial
  488:                                names consisting of the character itself.  */
  489:                             if (c1 != 1)
  490:                               return FNM_NOMATCH;
  491: 
  492:                             if (!is_range && *n == startp[1])
  493:                               goto matched;
  494: 
  495:                             cold = startp[1];
  496:                             c = *p++;
  497:                           }
  498:                         else
  499:                           {
  500:                             int32_t table_size;
  501:                             const int32_t *symb_table;
  502: # ifdef WIDE_CHAR_VERSION
  503:                             char str[c1];
  504:                             unsigned int strcnt;
  505: # else
  506: #  define str (startp + 1)
  507: # endif
  508:                             const unsigned char *extra;
  509:                             int32_t idx;
  510:                             int32_t elem;
  511:                             int32_t second;
  512:                             int32_t hash;
  513: 
  514: # ifdef WIDE_CHAR_VERSION
  515:                             /* We have to convert the name to a single-byte
  516:                                string.  This is possible since the names
  517:                                consist of ASCII characters and the internal
  518:                                representation is UCS4.  */
  519:                             for (strcnt = 0; strcnt < c1; ++strcnt)
  520:                               str[strcnt] = startp[1 + strcnt];
  521: #endif
  522: 
  523:                             table_size =
  524:                               _NL_CURRENT_WORD (LC_COLLATE,
  525:                                                 _NL_COLLATE_SYMB_HASH_SIZEMB);
  526:                             symb_table = (const int32_t *)
  527:                               _NL_CURRENT (LC_COLLATE,
  528:                                            _NL_COLLATE_SYMB_TABLEMB);
  529:                             extra = (const unsigned char *)
  530:                               _NL_CURRENT (LC_COLLATE,
  531:                                            _NL_COLLATE_SYMB_EXTRAMB);
  532: 
  533:                             /* Locate the character in the hashing table.  */
  534:                             hash = elem_hash (str, c1);
  535: 
  536:                             idx = 0;
  537:                             elem = hash %