(linenum→info "unix/slp.c:2238")

bsd-games/2.17/quiz/rxp.c

    1: /*      $NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $  */
    2: 
    3: /*-
    4:  * Copyright (c) 1991, 1993
    5:  *      The Regents of the University of California.  All rights reserved.
    6:  *
    7:  * This code is derived from software contributed to Berkeley by
    8:  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
    9:  * Commodore Business Machines.
   10:  *
   11:  * Redistribution and use in source and binary forms, with or without
   12:  * modification, are permitted provided that the following conditions
   13:  * are met:
   14:  * 1. Redistributions of source code must retain the above copyright
   15:  *    notice, this list of conditions and the following disclaimer.
   16:  * 2. Redistributions in binary form must reproduce the above copyright
   17:  *    notice, this list of conditions and the following disclaimer in the
   18:  *    documentation and/or other materials provided with the distribution.
   19:  * 3. Neither the name of the University nor the names of its contributors
   20:  *    may be used to endorse or promote products derived from this software
   21:  *    without specific prior written permission.
   22:  *
   23:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   24:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   27:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33:  * SUCH DAMAGE.
   34:  */
   35: 
   36: #include <sys/cdefs.h>
   37: #ifndef lint
   38: #if 0
   39: static char sccsid[] = "@(#)rxp.c       8.1 (Berkeley) 5/31/93";
   40: #else
   41: __RCSID("$NetBSD: rxp.c,v 1.12 2004/01/27 20:30:30 jsm Exp $");
   42: #endif
   43: #endif /* not lint */
   44: 
   45: /*
   46:  * regular expression parser
   47:  *
   48:  * external functions and return values are:
   49:  * rxp_compile(s)
   50:  *      TRUE success
   51:  *      FALSE        parse failure; error message will be in char rxperr[]
   52:  * metas are:
   53:  *      {...}        optional pattern, equialent to [...|]
   54:  *      |    alternate pattern
   55:  *      [...]        pattern delimiters
   56:  *
   57:  * rxp_match(s)
   58:  *      TRUE string s matches compiled pattern
   59:  *      FALSE        match failure or regexp error
   60:  *
   61:  * rxp_expand()
   62:  *      char *       reverse-engineered regular expression string
   63:  *      NULL regexp error
   64:  */
   65: 
   66: #include <stdio.h>
   67: #include <stdlib.h>
   68: #include <ctype.h>
   69: #include "quiz.h"
   70:                                         /* regexp tokens,  arg */
   71: #define LIT     (-1)                        /* literal character,        char */
   72: #define SOT     (-2)                        /* start text anchor,        - */
   73: #define EOT     (-3)                        /* end text anchor,  - */
   74: #define GRP_S   (-4)                      /* start alternate grp,    ptr_to_end */
   75: #define GRP_E   (-5)                      /* end group,              - */
   76: #define ALT_S   (-6)                      /* alternate starts,       ptr_to_next */
   77: #define ALT_E   (-7)                      /* alternate ends, - */
   78: #define END     (-8)                        /* end of regexp,    - */
   79: 
   80: typedef short Rxp_t;                    /* type for regexp tokens */
   81: 
   82: static Rxp_t rxpbuf[RXP_LINE_SZ];       /* compiled regular expression buffer */
   83: char rxperr[128];                       /* parser error message */
   84: 
   85: static int       rxp__compile(const char *, int);
   86: static char     *rxp__expand(int);
   87: static int       rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);
   88: 
   89: int
   90: rxp_compile(s)
   91:         const char *   s;
   92: {
   93:         return (rxp__compile(s, TRUE));
   94: }
   95: 
   96: static int
   97: rxp__compile(s, first)
   98:         const char *s;
   99:         int first;
  100: {
  101:         static Rxp_t *rp;
  102:         static const char *sp;
  103:         Rxp_t *grp_ptr;
  104:         Rxp_t *alt_ptr;
  105:         int esc, err;
  106: 
  107:         esc = 0;
  108:         if (first) {
  109:                 rp = rxpbuf;
  110:                 sp = s;
  111:                 *rp++ = SOT;  /* auto-anchor: pat is really ^pat$ */
  112:                 *rp++ = GRP_S;        /* auto-group: ^pat$ is really ^[pat]$ */
  113:                 *rp++ = 0;
  114:         }
  115:         *rp++ = ALT_S;
  116:         alt_ptr = rp;
  117:         *rp++ = 0;
  118:         for (; *sp; ++sp) {
  119:                 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
  120:                         (void)snprintf(rxperr, sizeof(rxperr),
  121:                             "regular expression too long %s", s);
  122:                         return (FALSE);
  123:                 }
  124:                 if (*sp == ':' && !esc)
  125:                         break;
  126:                 if (esc) {
  127:                         *rp++ = LIT;
  128:                         *rp++ = *sp;
  129:                         esc = 0;
  130:                 }
  131:                 else switch (*sp) {
  132:                 case '\\':
  133:                         esc = 1;
  134:                         break;
  135:                 case '{':
  136:                 case '[':
  137:                         *rp++ = GRP_S;
  138:                         grp_ptr = rp;
  139:                         *rp++ = 0;
  140:                         sp++;
  141:                         if ((err = rxp__compile(s, FALSE)) != TRUE)
  142:                                 return (err);
  143:                         *rp++ = GRP_E;
  144:                         *grp_ptr = rp - rxpbuf;
  145:                         break;
  146:                 case '}':
  147:                 case ']':
  148:                 case '|':
  149:                         *rp++ = ALT_E;
  150:                         *alt_ptr = rp - rxpbuf;
  151:                         if (*sp != ']') {
  152:                                 *rp++ = ALT_S;
  153:                                 alt_ptr = rp;
  154:                                 *rp++ = 0;
  155:                         }
  156:                         if (*sp != '|') {
  157:                                 if (*sp != ']') {
  158:                                         *rp++ = ALT_E;
  159:                                         *alt_ptr = rp - rxpbuf;
  160:                                 }
  161:                                 if (first) {
  162:                                         (void)snprintf(rxperr, sizeof(rxperr),
  163:                                             "unmatched alternator in regexp %s",
  164:                                              s);
  165:                                         return (FALSE);
  166:                                 }
  167:                                 return (TRUE);
  168:                         }
  169:                         break;
  170:                 default:
  171:                         *rp++ = LIT;
  172:                         *rp++ = *sp;
  173:                         esc = 0;
  174:                         break;
  175:                 }
  176:         }
  177:         if (!first) {
  178:                 (void)snprintf(rxperr, sizeof(rxperr),
  179:                     "unmatched alternator in regexp %s", s);
  180:                 return (FALSE);
  181:         }
  182:         *rp++ = ALT_E;
  183:         *alt_ptr = rp - rxpbuf;
  184:         *rp++ = GRP_E;
  185:         *(rxpbuf + 2) = rp - rxpbuf;
  186:         *rp++ = EOT;
  187:         *rp = END;
  188:         return (TRUE);
  189: }
  190: 
  191: /*
  192:  * match string against compiled regular expression
  193:  */
  194: int
  195: rxp_match(s)
  196:         const char *   s;
  197: {
  198:         return (rxp__match(s, TRUE, NULL, NULL, NULL));
  199: }
  200: 
  201: static int
  202: rxp__match(s, first, j_succ, j_fail, sp_fail)
  203:         const char *s;
  204:         int first;
  205:         Rxp_t *j_succ;         /* jump here on successful alt match */
  206:         Rxp_t *j_fail;         /* jump here on failed match */
  207:         const char *sp_fail;           /* reset sp to here on failed match */
  208: {
  209:         static Rxp_t *rp;
  210:         static const char *sp;
  211:         int ch;
  212:         Rxp_t *grp_end = NULL;
  213: 
  214:         if (first) {
  215:                 rp = rxpbuf;
  216:                 sp = s;
  217:         }
  218:         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
  219:                 switch(*rp) {
  220:                 case LIT:
  221:                         rp++;
  222:                         ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
  223:                         if (ch != *sp++) {
  224:                                 rp = j_fail;
  225:                                 sp = sp_fail;
  226:                                 return (FALSE);
  227:                         }
  228:                         rp++;
  229:                         break;
  230:                 case SOT:
  231:                         if (sp != s)
  232:                                 return (FALSE);
  233:                         rp++;
  234:                         break;
  235:                 case EOT:
  236:                         if (*sp != 0)
  237:                                 return (FALSE);
  238:                         rp++;
  239:                         break;
  240:                 case GRP_S:
  241:                         rp++;
  242:                         grp_end = rxpbuf + *rp++;
  243:                         break;
  244:                 case ALT_S:
  245:                         rp++;
  246:                         rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
  247:                         break;
  248:                 case ALT_E:
  249:                         rp = j_succ;
  250:                         return (TRUE);
  251:                 case GRP_E:
  252:                         rp = j_fail;
  253:                         sp = sp_fail;
  254:                         return (FALSE);
  255:                 default:
  256:                         abort();
  257:                 }
  258:         return (*rp != END ? FALSE : TRUE);
  259: }
  260: 
  261: /*
  262:  * Reverse engineer the regular expression, by picking first of all alternates.
  263:  */
  264: char *
  265: rxp_expand()
  266: {
  267:         return (rxp__expand(TRUE));
  268: }
  269: 
  270: static char *
  271: rxp__expand(first)
  272:         int first;
  273: {
  274:         static char buf[RXP_LINE_SZ/2];
  275:         static Rxp_t *rp;
  276:         static char *bp;
  277:         Rxp_t *grp_ptr;
  278:         char *err;
  279: 
  280:         if (first) {
  281:                 rp = rxpbuf;
  282:                 bp = buf;
  283:         }
  284:         while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
  285:                 switch(*rp) {
  286:                 case LIT:
  287:                         rp++;
  288:                         *bp++ = *rp++;
  289:                         break;
  290:                 case GRP_S:
  291:                         rp++;
  292:                         grp_ptr = rxpbuf + *rp;
  293:                         rp++;
  294:                         if ((err = rxp__expand(FALSE)) == NULL)
  295:                                 return (err);
  296:                         rp = grp_ptr;
  297:                         break;
  298:                 case ALT_E:
  299:                         return (buf);
  300:                 case ALT_S:
  301:                         rp++;
  302:                         /* FALLTHROUGH */
  303:                 case SOT:
  304:                 case EOT:
  305:                 case GRP_E:
  306:                         rp++;
  307:                         break;
  308:                 default:
  309:                         return (NULL);
  310:                 }
  311:         if (first) {
  312:                 if (*rp != END)
  313:                         return (NULL);
  314:                 *bp = '\0';
  315:         }
  316:         return (buf);
  317: }
Syntax (Markdown)