(linenum→info "unix/slp.c:2238")

glibc/2.7/iconv/loop.c

    1: /* Conversion loop frame work.
    2:    Copyright (C) 1998-2002, 2003, 2005 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: /* This file provides a frame for the reader loop in all conversion modules.
   22:    The actual code must (of course) be provided in the actual module source
   23:    code but certain actions can be written down generically, with some
   24:    customization options which are these:
   25: 
   26:      MIN_NEEDED_INPUT   minimal number of input bytes needed for the next
   27:                         conversion.
   28:      MIN_NEEDED_OUTPUT  minimal number of bytes produced by the next round
   29:                         of conversion.
   30: 
   31:      MAX_NEEDED_INPUT   you guess it, this is the maximal number of input
   32:                         bytes needed.  It defaults to MIN_NEEDED_INPUT
   33:      MAX_NEEDED_OUTPUT  likewise for output bytes.
   34: 
   35:      LOOPFCT            name of the function created.  If not specified
   36:                         the name is `loop' but this prevents the use
   37:                         of multiple functions in the same file.
   38: 
   39:      BODY               this is supposed to expand to the body of the loop.
   40:                         The user must provide this.
   41: 
   42:      EXTRA_LOOP_DECLS   extra arguments passed from converion loop call.
   43: 
   44:      INIT_PARAMS        code to define and initialize variables from params.
   45:      UPDATE_PARAMS      code to store result in params.
   46: 
   47:      ONEBYTE_BODY       body of the specialized conversion function for a
   48:                         single byte from the current character set to INTERNAL.
   49: */
   50: 
   51: #include <assert.h>
   52: #include <endian.h>
   53: #include <gconv.h>
   54: #include <stdint.h>
   55: #include <string.h>
   56: #include <wchar.h>
   57: #include <sys/param.h>          /* For MIN.  */
   58: #define __need_size_t
   59: #include <stddef.h>
   60: 
   61: 
   62: /* We have to provide support for machines which are not able to handled
   63:    unaligned memory accesses.  Some of the character encodings have
   64:    representations with a fixed width of 2 or 4 bytes.  But if we cannot
   65:    access unaligned memory we still have to read byte-wise.  */
   66: #undef FCTNAME2
   67: #if defined _STRING_ARCH_unaligned || !defined DEFINE_UNALIGNED
   68: /* We can handle unaligned memory access.  */
   69: # define get16(addr) *((__const uint16_t *) (addr))
   70: # define get32(addr) *((__const uint32_t *) (addr))
   71: 
   72: /* We need no special support for writing values either.  */
   73: # define put16(addr, val) *((uint16_t *) (addr)) = (val)
   74: # define put32(addr, val) *((uint32_t *) (addr)) = (val)
   75: 
   76: # define FCTNAME2(name) name
   77: #else
   78: /* Distinguish between big endian and little endian.  */
   79: # if __BYTE_ORDER == __LITTLE_ENDIAN
   80: #  define get16(addr) \
   81:      (((__const unsigned char *) (addr))[1] << 8                              \
   82:       | ((__const unsigned char *) (addr))[0])
   83: #  define get32(addr) \
   84:      (((((__const unsigned char *) (addr))[3] << 8                            \
   85:         | ((__const unsigned char *) (addr))[2]) << 8                        \
   86:        | ((__const unsigned char *) (addr))[1]) << 8                          \
   87:       | ((__const unsigned char *) (addr))[0])
   88: 
   89: #  define put16(addr, val) \
   90:      ({ uint16_t __val = (val);                                               \
   91:         ((unsigned char *) (addr))[0] = __val;                               \
   92:         ((unsigned char *) (addr))[1] = __val >> 8;                          \
   93:         (void) 0; })
   94: #  define put32(addr, val) \
   95:      ({ uint32_t __val = (val);                                               \
   96:         ((unsigned char *) (addr))[0] = __val;                               \
   97:         __val >>= 8;                                                         \
   98:         ((unsigned char *) (addr))[1] = __val;                               \
   99:         __val >>= 8;                                                         \
  100:         ((unsigned char *) (addr))[2] = __val;                               \
  101:         __val >>= 8;                                                         \
  102:         ((unsigned char *) (addr))[3] = __val;                               \
  103:         (void) 0; })
  104: # else
  105: #  define get16(addr) \
  106:      (((__const unsigned char *) (addr))[0] << 8                              \
  107:       | ((__const unsigned char *) (addr))[1])
  108: #  define get32(addr) \
  109:      (((((__const unsigned char *) (addr))[0] << 8                            \
  110:         | ((__const unsigned char *) (addr))[1]) << 8                        \
  111:        | ((__const unsigned char *) (addr))[2]) << 8                          \
  112:       | ((__const unsigned char *) (addr))[3])
  113: 
  114: #  define put16(addr, val) \
  115:      ({ uint16_t __val = (val);                                               \
  116:         ((unsigned char *) (addr))[1] = __val;                               \
  117:         ((unsigned char *) (addr))[0] = __val >> 8;                          \
  118:         (void) 0; })
  119: #  define put32(addr, val) \
  120:      ({ uint32_t __val = (val);                                               \
  121:         ((unsigned char *) (addr))[3] = __val;                               \
  122:         __val >>= 8;                                                         \
  123:         ((unsigned char *) (addr))[2] = __val;                               \
  124:         __val >>= 8;                                                         \
  125:         ((unsigned char *) (addr))[1] = __val;                               \
  126:         __val >>= 8;                                                         \
  127:         ((unsigned char *) (addr))[0] = __val;                               \
  128:         (void) 0; })
  129: # endif
  130: 
  131: # define FCTNAME2(name) name##_unaligned
  132: #endif
  133: #define FCTNAME(name) FCTNAME2(name)
  134: 
  135: 
  136: /* We need at least one byte for the next round.  */
  137: #ifndef MIN_NEEDED_INPUT
  138: # error "MIN_NEEDED_INPUT definition missing"
  139: #elif MIN_NEEDED_INPUT < 1
  140: # error "MIN_NEEDED_INPUT must be >= 1"
  141: #endif
  142: 
  143: /* Let's see how many bytes we produce.  */
  144: #ifndef MAX_NEEDED_INPUT
  145: # define MAX_NEEDED_INPUT       MIN_NEEDED_INPUT
  146: #endif
  147: 
  148: /* We produce at least one byte in the next round.  */
  149: #ifndef MIN_NEEDED_OUTPUT
  150: # error "MIN_NEEDED_OUTPUT definition missing"
  151: #elif MIN_NEEDED_OUTPUT < 1
  152: # error "MIN_NEEDED_OUTPUT must be >= 1"
  153: #endif
  154: 
  155: /* Let's see how many bytes we produce.  */
  156: #ifndef MAX_NEEDED_OUTPUT
  157: # define MAX_NEEDED_OUTPUT      MIN_NEEDED_OUTPUT
  158: #endif
  159: 
  160: /* Default name for the function.  */
  161: #ifndef LOOPFCT
  162: # define LOOPFCT                loop
  163: #endif
  164: 
  165: /* Make sure we have a loop body.  */
  166: #ifndef BODY
  167: # error "Definition of BODY missing for function" LOOPFCT
  168: #endif
  169: 
  170: 
  171: /* If no arguments have to passed to the loop function define the macro
  172:    as empty.  */
  173: #ifndef EXTRA_LOOP_DECLS
  174: # define EXTRA_LOOP_DECLS
  175: #endif
  176: 
  177: 
  178: /* To make it easier for the writers of the modules, we define a macro
  179:    to test whether we have to ignore errors.  */
  180: #define ignore_errors_p() \
  181:   (irreversible != NULL && (flags & __GCONV_IGNORE_ERRORS))
  182: 
  183: 
  184: /* Error handling for the FROM_LOOP direction, with ignoring of errors.
  185:    Note that we cannot use the do while (0) trick since `break' and
  186:    `continue' must reach certain points.  */
  187: #define STANDARD_FROM_LOOP_ERR_HANDLER(Incr) \
  188:   {                                                                           \
  189:     result = __GCONV_ILLEGAL_INPUT;                                           \
  190:                                                                               \
  191:     if (! ignore_errors_p ())                                                 \
  192:       break;                                                                  \
  193:                                                                               \
  194:     /* We ignore the invalid input byte sequence.  */                         \
  195:     inptr += (Incr);                                                          \
  196:     ++*irreversible;                                                          \
  197:     /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
  198:        that "iconv -c" must give the same exitcode as "iconv".  */            \
  199:     continue;                                                                 \
  200:   }
  201: 
  202: /* Error handling for the TO_LOOP direction, with use of transliteration/
  203:    transcription functions and ignoring of errors.  Note that we cannot use
  204:    the do while (0) trick since `break' and `continue' must reach certain
  205:    points.  */
  206: #define STANDARD_TO_LOOP_ERR_HANDLER(Incr) \
  207:   {                                                                           \
  208:     struct __gconv_trans_data *trans;                                         \
  209:                                                                               \
  210:     result = __GCONV_ILLEGAL_INPUT;                                           \
  211:                                                                               \
  212:     if (irreversible == NULL)                                                 \
  213:       /* This means we are in call from __gconv_transliterate.  In this       \
  214:          case we are not doing any error recovery outself.  */               \
  215:       break;                                                                  \
  216:                                                                               \
  217:     /* First try the transliteration methods.  */                             \
  218:     for (trans = step_data->__trans; trans != NULL; trans = trans->__next)    \
  219:       {                                                                       \
  220:         result = DL_CALL_FCT (trans->__trans_fct,                            \
  221:                               (step, step_data, trans->__data, *inptrp,            \
  222:                                &inptr, inend, &outptr, irreversible));             \
  223:         if (result != __GCONV_ILLEGAL_INPUT)                                 \
  224:           break;                                                             \
  225:       }                                                                       \
  226:     /* If any of them recognized the input continue with the loop.  */        \
  227:     if (result != __GCONV_ILLEGAL_INPUT)                                      \
  228:       continue;                                                               \
  229:                                                                               \
  230:     /* Next see whether we have to ignore the error.  If not, stop.  */       \
  231:     if (! ignore_errors_p ())                                                 \
  232:       break;                                                                  \
  233:                                                                               \
  234:     /* When we come here it means we ignore the character.  */                \
  235:     ++*irreversible;                                                          \
  236:     inptr += Incr;                                                            \
  237:     /* But we keep result == __GCONV_ILLEGAL_INPUT, because of the constraint \
  238:        that "iconv -c" must give the same exitcode as "iconv".  */            \
  239:     continue;                                                                 \
  240:   }
  241: 
  242: 
  243: /* Handling of Unicode 3.1 TAG characters.  Unicode recommends
  244:    "If language codes are not relevant to the particular processing
  245:     operation, then they should be ignored."  This macro is usually
  246:    called right before  STANDARD_TO_LOOP_ERR_HANDLER (Incr).  */
  247: #define UNICODE_TAG_HANDLER(Character, Incr) \
  248:   {                                                                           \
  249:     /* TAG characters are those in the range U+E0000..U+E007F.  */            \
  250:     if (((Character) >> 7) == (0xe0000 >> 7))                                 \
  251:       {                                                                       \
  252:         inptr += Incr;                                                       \
  253:         continue;                                                            \
  254:       }                                                                       \
  255:   }
  256: 
  257: 
  258: /* The function returns the status, as defined in gconv.h.  */
  259: static inline int
  260: __attribute ((always_inline))
  261: FCTNAME (LOOPFCT) (struct __gconv_step *step,
  262:                    struct __gconv_step_data *step_data,
  263:                    const unsigned char **inptrp, const unsigned char *inend,
  264:                    unsigned char **outptrp, const unsigned char *outend,
  265:                    size_t *irreversible EXTRA_LOOP_DECLS)
  266: {
  267: #ifdef LOOP_NEED_STATE
  268:   mbstate_t *state = step_data->__statep;
  269: #endif
  270: #ifdef LOOP_NEED_FLAGS
  271:   int flags = step_data->__flags;
  272: #endif
  273: #ifdef LOOP_NEED_DATA
  274:   void *data = step->__data;
  275: #endif
  276:   int result = __GCONV_EMPTY_INPUT;
  277:   const unsigned char *inptr = *inptrp;
  278:   unsigned char *outptr = *outptrp;
  279: 
  280: #ifdef INIT_PARAMS
  281:   INIT_PARAMS;
  282: #endif
  283: 
  284:   while (inptr != inend)
  285:     {
  286:       /* `if' cases for MIN_NEEDED_OUTPUT ==/!= 1 is made to help the
  287:          compiler generating better code.  They will be optimized away
  288:          since MIN_NEEDED_OUTPUT is always a constant.  */
  289:       if (MIN_NEEDED_INPUT > 1
  290:           && __builtin_expect (inptr + MIN_NEEDED_INPUT > inend, 0))
  291:         {
  292:           /* We don't have enough input for another complete input
  293:              character.  */
  294:           result = __GCONV_INCOMPLETE_INPUT;
  295:           break;
  296:         }
  297:       if ((MIN_NEEDED_OUTPUT != 1
  298:            && __builtin_expect (outptr + MIN_NEEDED_OUTPUT > outend, 0))
  299:           || (MIN_NEEDED_OUTPUT == 1
  300:               && __builtin_expect (outptr >= outend, 0)))
  301:         {
  302:           /* Overflow in the output buffer.  */
  303:           result = __GCONV_FULL_OUTPUT;
  304:           break;
  305:         }
  306: 
  307:       /* Here comes the body the user provides.  It can stop with
  308:          RESULT set to GCONV_INCOMPLETE_INPUT (if the size of the
  309:          input characters vary in size), GCONV_ILLEGAL_INPUT, or
  310:          GCONV_FULL_OUTPUT (if the output characters vary in size).  */
  311:       BODY
  312:     }
  313: 
  314:   /* Update the pointers pointed to by the parameters.  */
  315:   *inptrp = inptr;
  316:   *outptrp = outptr;
  317: #ifdef UPDATE_PARAMS
  318:   UPDATE_PARAMS;
  319: #endif
  320: 
  321:   return result;
  322: }
  323: 
  324: 
  325: /* Include the file a second time to define the function to handle
  326:    unaligned access.  */
  327: #if !defined DEFINE_UNALIGNED && !defined _STRING_ARCH_unaligned \
  328:     && MIN_NEEDED_INPUT != 1 && MAX_NEEDED_INPUT % MIN_NEEDED_INPUT == 0 \
  329:     && MIN_NEEDED_OUTPUT != 1 && MAX_NEEDED_OUTPUT % MIN_NEEDED_OUTPUT == 0
  330: # undef get16
  331: # undef get32
  332: # undef put16
  333: # undef put32
  334: # undef unaligned
  335: 
  336: # define DEFINE_UNALIGNED
  337: # include "loop.c"
  338: # undef DEFINE_UNALIGNED
  339: #endif
  340: 
  341: 
  342: #if MAX_NEEDED_INPUT > 1
  343: # define SINGLE(fct) SINGLE2 (fct)
  344: # define SINGLE2(fct) fct##_single
  345: static inline int
  346: __attribute ((always_inline))
  347: SINGLE(LOOPFCT) (struct __gconv_step *step,
  348:                  struct __gconv_step_data *step_data,
  349:                  const unsigned char **inptrp, const unsigned char *inend,
  350:                  unsigned char **outptrp, unsigned char *outend,
  351:                  size_t *irreversible EXTRA_LOOP_DECLS)
  352: {
  353:   mbstate_t *state = step_data->__statep;
  354: #ifdef LOOP_NEED_FLAGS
  355:   int flags = step_data->__flags;
  356: #endif
  357: #ifdef LOOP_NEED_DATA
  358:   void *data = step->__data;
  359: #endif
  360:   int result = __GCONV_OK;
  361:   unsigned char bytebuf[MAX_NEEDED_INPUT];
  362:   const unsigned char *inptr = *inptrp;
  363:   unsigned char *outptr = *outptrp;
  364:   size_t inlen;
  365: 
  366: #ifdef INIT_PARAMS
  367:   INIT_PARAMS;
  368: #endif
  369: 
  370: #ifdef UNPACK_BYTES
  371:   UNPACK_BYTES
  372: #else
  373:   /* Add the bytes from the state to the input buffer.  */
  374:   for (inlen = 0; inlen < (size_t) (state->__count & 7); ++inlen)
  375:     bytebuf[inlen] = state->__value.__wchb[inlen];
  376: #endif
  377: 
  378:   /* Are there enough bytes in the input buffer?  */
  379:   if (__builtin_expect (inptr + (MIN_NEEDED_INPUT - inlen) > inend, 0))
  380:     {
  381:       *inptrp = inend;
  382: #ifdef STORE_REST
  383:       inptr = bytebuf;
  384:       inptrp = &inptr;
  385:       inend = &bytebuf[inlen];
  386: 
  387:       STORE_REST
  388: #else
  389:       /* We don't have enough input for another complete input
  390:          character.  */
  391:       while (inptr < inend)
  392:         state->__value.__wchb[inlen++] = *inptr++;
  393: #endif
  394: 
  395:       return __GCONV_INCOMPLETE_INPUT;
  396:     }
  397: 
  398:   /* Enough space in output buffer.  */
  399:   if ((MIN_NEEDED_OUTPUT != 1 && outptr + MIN_NEEDED_OUTPUT > outend)
  400:       || (MIN_NEEDED_OUTPUT == 1 && outptr >= outend))
  401:     /* Overflow in the output buffer.  */
  402:     return __GCONV_FULL_OUTPUT;
  403: 
  404:   /*  Now add characters from the normal input buffer.  */
  405:   do
  406:     bytebuf[inlen++] = *inptr++;
  407:   while (inlen < MAX_NEEDED_INPUT && inptr < inend);
  408: 
  409:   inptr = bytebuf;
  410:   inend = &bytebuf[inlen];
  411: 
  412:   do
  413:     {
  414:       BODY
  415:     }
  416:   while (0);
  417: 
  418:   /* Now we either have produced an output character and consumed all the
  419:      bytes from the state and at least one more, or the character is still
  420:      incomplete, or we have some other error (like illegal input character,
  421:      no space in output buffer).  */
  422:   if (__builtin_expect (inptr != bytebuf, 1))
  423:     {
  424:       /* We found a new character.  */
  425:       assert (inptr - bytebuf > (state->__count & 7));
  426: 
  427:       *inptrp += inptr - bytebuf - (state->__count & 7);
  428:       *outptrp = outptr;
  429: 
  430:       result = __GCONV_OK;
  431: 
  432:       /* Clear the state buffer.  */
  433: #ifdef CLEAR_STATE
  434:       CLEAR_STATE;
  435: #else
  436:       state->__count &= ~7;
  437: #endif
  438:     }
  439:   else if (result == __GCONV_INCOMPLETE_INPUT)
  440:     {
  441:       /* This can only happen if we have less than MAX_NEEDED_INPUT bytes
  442:          available.  */
  443:       assert (inend != &bytebuf[MAX_NEEDED_INPUT]);
  444: 
  445:       *inptrp += inend - bytebuf - (state->__count & 7);
  446: #ifdef STORE_REST
  447:       inptrp = &inptr;
  448: 
  449:       STORE_REST
  450: #else
  451:       /* We don't have enough input for another complete input
  452:          character.  */
  453:       assert (inend - inptr > (state->__count & ~7));
  454:       assert (inend - inptr <= 7);
  455:       state->__count = (state->__count & ~7) | (inend - inptr);
  456:       inlen = 0;
  457:       while (inptr < inend)
  458:         state->__value.__wchb[inlen++] = *inptr++;
  459: #endif
  460:     }
  461: 
  462:   return result;
  463: }
  464: # undef SINGLE
  465: # undef SINGLE2
  466: #endif
  467: 
  468: 
  469: #ifdef ONEBYTE_BODY
  470: /* Define the shortcut function for btowc.  */
  471: static wint_t
  472: gconv_btowc (struct __gconv_step *step, unsigned char c)
  473:   ONEBYTE_BODY
  474: # define FROM_ONEBYTE gconv_btowc
  475: #endif
  476: 
  477: 
  478: /* We remove the macro definitions so that we can include this file again
  479:    for the definition of another function.  */
  480: #undef MIN_NEEDED_INPUT
  481: #undef MAX_NEEDED_INPUT
  482: #undef MIN_NEEDED_OUTPUT
  483: #undef MAX_NEEDED_OUTPUT
  484: #undef LOOPFCT
  485: #undef BODY
  486: #undef LOOPFCT
  487: #undef EXTRA_LOOP_DECLS
  488: #undef INIT_PARAMS
  489: #undef UPDATE_PARAMS
  490: #undef ONEBYTE_BODY
  491: #undef UNPACK_BYTES
  492: #undef CLEAR_STATE
  493: #undef LOOP_NEED_STATE
  494: #undef LOOP_NEED_FLAGS
  495: #undef LOOP_NEED_DATA
  496: #undef get16
  497: #undef get32
  498: #undef put16
  499: #undef put32
  500: #undef unaligned
1
Syntax (Markdown)