(linenum→info "unix/slp.c:2238")

glibc/2.7/iconv/gconv_simple.c

    1: /* Simple transformations functions.
    2:    Copyright (C) 1997-2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: #include <byteswap.h>
   22: #include <dlfcn.h>
   23: #include <endian.h>
   24: #include <errno.h>
   25: #include <gconv.h>
   26: #include <stdint.h>
   27: #include <stdlib.h>
   28: #include <string.h>
   29: #include <wchar.h>
   30: #include <sys/param.h>
   31: #include <gconv_int.h>
   32: 
   33: #define BUILTIN_ALIAS(s1, s2) /* nothing */
   34: #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
   35:                                MinF, MaxF, MinT, MaxT) \
   36:   extern int Fct (struct __gconv_step *, struct __gconv_step_data *,          \
   37:                   __const unsigned char **, __const unsigned char *,        \
   38:                   unsigned char **, size_t *, int, int);
   39: #include "gconv_builtin.h"
   40: 
   41: 
   42: #ifndef EILSEQ
   43: # define EILSEQ EINVAL
   44: #endif
   45: 
   46: 
   47: /* Specialized conversion function for a single byte to INTERNAL, recognizing
   48:    only ASCII characters.  */
   49: wint_t
   50: __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
   51: {
   52:   if (c < 0x80)
   53:     return c;
   54:   else
   55:     return WEOF;
   56: }
   57: 
   58: 
   59: /* Transform from the internal, UCS4-like format, to UCS4.  The
   60:    difference between the internal ucs4 format and the real UCS4
   61:    format is, if any, the endianess.  The Unicode/ISO 10646 says that
   62:    unless some higher protocol specifies it differently, the byte
   63:    order is big endian.*/
   64: #define DEFINE_INIT             0
   65: #define DEFINE_FINI             0
   66: #define MIN_NEEDED_FROM         4
   67: #define MIN_NEEDED_TO           4
   68: #define FROM_DIRECTION          1
   69: #define FROM_LOOP               internal_ucs4_loop
   70: #define TO_LOOP                 internal_ucs4_loop /* This is not used.  */
   71: #define FUNCTION_NAME           __gconv_transform_internal_ucs4
   72: 
   73: 
   74: static inline int
   75: __attribute ((always_inline))
   76: internal_ucs4_loop (struct __gconv_step *step,
   77:                     struct __gconv_step_data *step_data,
   78:                     const unsigned char **inptrp, const unsigned char *inend,
   79:                     unsigned char **outptrp, unsigned char *outend,
   80:                     size_t *irreversible)
   81: {
   82:   const unsigned char *inptr = *inptrp;
   83:   unsigned char *outptr = *outptrp;
   84:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
   85:   int result;
   86: 
   87: #if __BYTE_ORDER == __LITTLE_ENDIAN
   88:   /* Sigh, we have to do some real work.  */
   89:   size_t cnt;
   90:   uint32_t *outptr32 = (uint32_t *) outptr;
   91: 
   92:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
   93:     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
   94: 
   95:   *inptrp = inptr;
   96:   *outptrp = (unsigned char *) outptr32;
   97: #elif __BYTE_ORDER == __BIG_ENDIAN
   98:   /* Simply copy the data.  */
   99:   *inptrp = inptr + n_convert * 4;
  100:   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
  101: #else
  102: # error "This endianess is not supported."
  103: #endif
  104: 
  105:   /* Determine the status.  */
  106:   if (*inptrp == inend)
  107:     result = __GCONV_EMPTY_INPUT;
  108:   else if (*outptrp + 4 > outend)
  109:     result = __GCONV_FULL_OUTPUT;
  110:   else
  111:     result = __GCONV_INCOMPLETE_INPUT;
  112: 
  113:   return result;
  114: }
  115: 
  116: #ifndef _STRING_ARCH_unaligned
  117: static inline int
  118: __attribute ((always_inline))
  119: internal_ucs4_loop_unaligned (struct __gconv_step *step,
  120:                               struct __gconv_step_data *step_data,
  121:                               const unsigned char **inptrp,
  122:                               const unsigned char *inend,
  123:                               unsigned char **outptrp, unsigned char *outend,
  124:                               size_t *irreversible)
  125: {
  126:   const unsigned char *inptr = *inptrp;
  127:   unsigned char *outptr = *outptrp;
  128:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  129:   int result;
  130: 
  131: # if __BYTE_ORDER == __LITTLE_ENDIAN
  132:   /* Sigh, we have to do some real work.  */
  133:   size_t cnt;
  134: 
  135:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
  136:     {
  137:       outptr[0] = inptr[3];
  138:       outptr[1] = inptr[2];
  139:       outptr[2] = inptr[1];
  140:       outptr[3] = inptr[0];
  141:     }
  142: 
  143:   *inptrp = inptr;
  144:   *outptrp = outptr;
  145: # elif __BYTE_ORDER == __BIG_ENDIAN
  146:   /* Simply copy the data.  */
  147:   *inptrp = inptr + n_convert * 4;
  148:   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
  149: # else
  150: #  error "This endianess is not supported."
  151: # endif
  152: 
  153:   /* Determine the status.  */
  154:   if (*inptrp == inend)
  155:     result = __GCONV_EMPTY_INPUT;
  156:   else if (*outptrp + 4 > outend)
  157:     result = __GCONV_FULL_OUTPUT;
  158:   else
  159:     result = __GCONV_INCOMPLETE_INPUT;
  160: 
  161:   return result;
  162: }
  163: #endif
  164: 
  165: 
  166: static inline int
  167: __attribute ((always_inline))
  168: internal_ucs4_loop_single (struct __gconv_step *step,
  169:                            struct __gconv_step_data *step_data,
  170:                            const unsigned char **inptrp,
  171:                            const unsigned char *inend,
  172:                            unsigned char **outptrp, unsigned char *outend,
  173:                            size_t *irreversible)
  174: {
  175:   mbstate_t *state = step_data->__statep;
  176:   size_t cnt = state->__count & 7;
  177: 
  178:   while (*inptrp < inend && cnt < 4)
  179:     state->__value.__wchb[cnt++] = *(*inptrp)++;
  180: 
  181:   if (__builtin_expect (cnt < 4, 0))
  182:     {
  183:       /* Still not enough bytes.  Store the ones in the input buffer.  */
  184:       state->__count &= ~7;
  185:       state->__count |= cnt;
  186: 
  187:       return __GCONV_INCOMPLETE_INPUT;
  188:     }
  189: 
  190: #if __BYTE_ORDER == __LITTLE_ENDIAN
  191:   (*outptrp)[0] = state->__value.__wchb[3];
  192:   (*outptrp)[1] = state->__value.__wchb[2];
  193:   (*outptrp)[2] = state->__value.__wchb[1];
  194:   (*outptrp)[3] = state->__value.__wchb[0];
  195: 
  196: #elif __BYTE_ORDER == __BIG_ENDIAN
  197:   /* XXX unaligned */
  198:   (*outptrp)[0] = state->__value.__wchb[0];
  199:   (*outptrp)[1] = state->__value.__wchb[1];
  200:   (*outptrp)[2] = state->__value.__wchb[2];
  201:   (*outptrp)[3] = state->__value.__wchb[3];
  202: #else
  203: # error "This endianess is not supported."
  204: #endif
  205:   *outptrp += 4;
  206: 
  207:   /* Clear the state buffer.  */
  208:   state->__count &= ~7;
  209: 
  210:   return __GCONV_OK;
  211: }
  212: 
  213: #include <iconv/skeleton.c>
  214: 
  215: 
  216: /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
  217:    for the other direction we have to check for correct values here.  */
  218: #define DEFINE_INIT             0
  219: #define DEFINE_FINI             0
  220: #define MIN_NEEDED_FROM         4
  221: #define MIN_NEEDED_TO           4
  222: #define FROM_DIRECTION          1
  223: #define FROM_LOOP               ucs4_internal_loop
  224: #define TO_LOOP                 ucs4_internal_loop /* This is not used.  */
  225: #define FUNCTION_NAME           __gconv_transform_ucs4_internal
  226: 
  227: 
  228: static inline int
  229: __attribute ((always_inline))
  230: ucs4_internal_loop (struct __gconv_step *step,
  231:                     struct __gconv_step_data *step_data,
  232:                     const unsigned char **inptrp, const unsigned char *inend,
  233:                     unsigned char **outptrp, unsigned char *outend,
  234:                     size_t *irreversible)
  235: {
  236:   int flags = step_data->__flags;
  237:   const unsigned char *inptr = *inptrp;
  238:   unsigned char *outptr = *outptrp;
  239:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  240:   int result;
  241:   size_t cnt;
  242: 
  243:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
  244:     {
  245:       uint32_t inval;
  246: 
  247: #if __BYTE_ORDER == __LITTLE_ENDIAN
  248:       inval = bswap_32 (*(const uint32_t *) inptr);
  249: #else
  250:       inval = *(const uint32_t *) inptr;
  251: #endif
  252: 
  253:       if (__builtin_expect (inval > 0x7fffffff, 0))
  254:         {
  255:           /* The value is too large.  We don't try transliteration here since
  256:              this is not an error because of the lack of possibilities to
  257:              represent the result.  This is a genuine bug in the input since
  258:              UCS4 does not allow such values.  */
  259:           if (irreversible == NULL)
  260:             /* We are transliterating, don't try to correct anything.  */
  261:             return __GCONV_ILLEGAL_INPUT;
  262: 
  263:           if (flags & __GCONV_IGNORE_ERRORS)
  264:             {
  265:               /* Just ignore this character.  */
  266:               ++*irreversible;
  267:               continue;
  268:             }
  269: 
  270:           *inptrp = inptr;
  271:           *outptrp = outptr;
  272:           return __GCONV_ILLEGAL_INPUT;
  273:         }
  274: 
  275:       *((uint32_t *) outptr) = inval;
  276:       outptr += sizeof (uint32_t);
  277:     }
  278: 
  279:   *inptrp = inptr;
  280:   *outptrp = outptr;
  281: 
  282:   /* Determine the status.  */
  283:   if (*inptrp == inend)
  284:     result = __GCONV_EMPTY_INPUT;
  285:   else if (*outptrp + 4 > outend)
  286:     result = __GCONV_FULL_OUTPUT;
  287:   else
  288:     result = __GCONV_INCOMPLETE_INPUT;
  289: 
  290:   return result;
  291: }
  292: 
  293: #ifndef _STRING_ARCH_unaligned
  294: static inline int
  295: __attribute ((always_inline))
  296: ucs4_internal_loop_unaligned (struct __gconv_step *step,
  297:                               struct __gconv_step_data *step_data,
  298:                               const unsigned char **inptrp,
  299:                               const unsigned char *inend,
  300:                               unsigned char **outptrp, unsigned char *outend,
  301:                               size_t *irreversible)
  302: {
  303:   int flags = step_data->__flags;
  304:   const unsigned char *inptr = *inptrp;
  305:   unsigned char *outptr = *outptrp;
  306:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  307:   int result;
  308:   size_t cnt;
  309: 
  310:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
  311:     {
  312:       if (__builtin_expect (inptr[0] > 0x80, 0))
  313:         {
  314:           /* The value is too large.  We don't try transliteration here since
  315:              this is not an error because of the lack of possibilities to
  316:              represent the result.  This is a genuine bug in the input since
  317:              UCS4 does not allow such values.  */
  318:           if (irreversible == NULL)
  319:             /* We are transliterating, don't try to correct anything.  */
  320:             return __GCONV_ILLEGAL_INPUT;
  321: 
  322:           if (flags & __GCONV_IGNORE_ERRORS)
  323:             {
  324:               /* Just ignore this character.  */
  325:               ++*irreversible;
  326:               continue;
  327:             }
  328: 
  329:           *inptrp = inptr;
  330:           *outptrp = outptr;
  331:           return __GCONV_ILLEGAL_INPUT;
  332:         }
  333: 
  334: # if __BYTE_ORDER == __LITTLE_ENDIAN
  335:       outptr[3] = inptr[0];
  336:       outptr[2] = inptr[1];
  337:       outptr[1] = inptr[2];
  338:       outptr[0] = inptr[3];
  339: # else
  340:       outptr[0] = inptr[0];
  341:       outptr[1] = inptr[1];
  342:       outptr[2] = inptr[2];
  343:       outptr[3] = inptr[3];
  344: # endif
  345:       outptr += 4;
  346:     }
  347: 
  348:   *inptrp = inptr;
  349:   *outptrp = outptr;
  350: 
  351:   /* Determine the status.  */
  352:   if (*inptrp == inend)
  353:     result = __GCONV_EMPTY_INPUT;
  354:   else if (*outptrp + 4 > outend)
  355:     result = __GCONV_FULL_OUTPUT;
  356:   else
  357:     result = __GCONV_INCOMPLETE_INPUT;
  358: 
  359:   return result;
  360: }
  361: #endif
  362: 
  363: 
  364: static inline int
  365: __attribute ((always_inline))
  366: ucs4_internal_loop_single (struct __gconv_step *step,
  367:                            struct __gconv_step_data *step_data,
  368:                            const unsigned char **inptrp,
  369:                            const unsigned char *inend,
  370:                            unsigned char **outptrp, unsigned char *outend,
  371:                            size_t *irreversible)
  372: {
  373:   mbstate_t *state = step_data->__statep;
  374:   int flags = step_data->__flags;
  375:   size_t cnt = state->__count & 7;
  376: 
  377:   while (*inptrp < inend && cnt < 4)
  378:     state->__value.__wchb[cnt++] = *(*inptrp)++;
  379: 
  380:   if (__builtin_expect (cnt < 4, 0))
  381:     {
  382:       /* Still not enough bytes.  Store the ones in the input buffer.  */
  383:       state->__count &= ~7;
  384:       state->__count |= cnt;
  385: 
  386:       return __GCONV_INCOMPLETE_INPUT;
  387:     }
  388: 
  389:   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
  390:                         0))
  391:     {
  392:       /* The value is too large.  We don't try transliteration here since
  393:          this is not an error because of the lack of possibilities to
  394:          represent the result.  This is a genuine bug in the input since
  395:          UCS4 does not allow such values.  */
  396:       if (!(flags & __GCONV_IGNORE_ERRORS))
  397:         {
  398:           *inptrp -= cnt - (state->__count & 7);
  399:           return __GCONV_ILLEGAL_INPUT;
  400:         }
  401:     }
  402:   else
  403:     {
  404: #if __BYTE_ORDER == __LITTLE_ENDIAN
  405:       (*outptrp)[0] = state->__value.__wchb[3];
  406:       (*outptrp)[1] = state->__value.__wchb[2];
  407:       (*outptrp)[2] = state->__value.__wchb[1];
  408:       (*outptrp)[3] = state->__value.__wchb[0];
  409: #elif __BYTE_ORDER == __BIG_ENDIAN
  410:       (*outptrp)[0] = state->__value.__wchb[0];
  411:       (*outptrp)[1] = state->__value.__wchb[1];
  412:       (*outptrp)[2] = state->__value.__wchb[2];
  413:       (*outptrp)[3] = state->__value.__wchb[3];
  414: #endif
  415: 
  416:       *outptrp += 4;
  417:     }
  418: 
  419:   /* Clear the state buffer.  */
  420:   state->__count &= ~7;
  421: 
  422:   return __GCONV_OK;
  423: }
  424: 
  425: #include <iconv/skeleton.c>
  426: 
  427: 
  428: /* Similarly for the little endian form.  */
  429: #define DEFINE_INIT             0
  430: #define DEFINE_FINI             0
  431: #define MIN_NEEDED_FROM         4
  432: #define MIN_NEEDED_TO           4
  433: #define FROM_DIRECTION          1
  434: #define FROM_LOOP               internal_ucs4le_loop
  435: #define TO_LOOP                 internal_ucs4le_loop /* This is not used.  */
  436: #define FUNCTION_NAME           __gconv_transform_internal_ucs4le
  437: 
  438: 
  439: static inline int
  440: __attribute ((always_inline))
  441: internal_ucs4le_loop (struct __gconv_step *step,
  442:                       struct __gconv_step_data *step_data,
  443:                       const unsigned char **inptrp, const unsigned char *inend,
  444:                       unsigned char **outptrp, unsigned char *outend,
  445:                       size_t *irreversible)
  446: {
  447:   const unsigned char *inptr = *inptrp;
  448:   unsigned char *outptr = *outptrp;
  449:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  450:   int result;
  451: 
  452: #if __BYTE_ORDER == __BIG_ENDIAN
  453:   /* Sigh, we have to do some real work.  */
  454:   size_t cnt;
  455:   uint32_t *outptr32 = (uint32_t *) outptr;
  456: 
  457:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
  458:     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
  459:   outptr = (unsigned char *) outptr32;
  460: 
  461:   *inptrp = inptr;
  462:   *outptrp = outptr;
  463: #elif __BYTE_ORDER == __LITTLE_ENDIAN
  464:   /* Simply copy the data.  */
  465:   *inptrp = inptr + n_convert * 4;
  466:   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
  467: #else
  468: # error "This endianess is not supported."
  469: #endif
  470: 
  471:   /* Determine the status.  */
  472:   if (*inptrp == inend)
  473:     result = __GCONV_EMPTY_INPUT;
  474:   else if (*outptrp + 4 > outend)
  475:     result = __GCONV_FULL_OUTPUT;
  476:   else
  477:     result = __GCONV_INCOMPLETE_INPUT;
  478: 
  479:   return result;
  480: }
  481: 
  482: #ifndef _STRING_ARCH_unaligned
  483: static inline int
  484: __attribute ((always_inline))
  485: internal_ucs4le_loop_unaligned (struct __gconv_step *step,
  486:                                 struct __gconv_step_data *step_data,
  487:                                 const unsigned char **inptrp,
  488:                                 const unsigned char *inend,
  489:                                 unsigned char **outptrp, unsigned char *outend,
  490:                                 size_t *irreversible)
  491: {
  492:   const unsigned char *inptr = *inptrp;
  493:   unsigned char *outptr = *outptrp;
  494:   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
  495:   int result;
  496: 
  497: # if __BYTE_ORDER == __BIG_ENDIAN
  498:   /* Sigh, we have to do some real work.  */
  499:   size_t cnt;
  500: 
  501:   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
  502:     {
  503:       outptr[0] = inptr[3];
  504:       outptr[1] = inptr[2];
  505:       outptr[2] = inptr[1];
  506:       outptr[3] = inptr[0];
  507:     }
  508: 
  509:   *inptrp = inptr;
  510:   *outptrp = outptr;
  511: # elif __BYTE_ORDER == __LITTLE_ENDIAN
  512:   /* Simply copy the data.  */
  513:   *inptrp = inptr + n_convert * 4;
  514:   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
  515: # else
  516: #  error "This endianess is not supported."
  517: # endif
  518: 
  519:   /* Determine the status.  */
  520:   if (*inptrp == inend)
  521:     result = __GCONV_EMPTY_INPUT;
  522:   else if (*inptrp + 4 > inend)
  523:     result = __GCONV_INCOMPLETE_INPUT;
  524:   else
  525:     {
  526:       assert (*outptrp + 4 > outend);
  527:       result = __GCONV_FULL_OUTPUT;
  528:     }
  529: 
  530:   return result;
  531: }
  532: #endif
  533: 
  534: 
  535: static inline int
  536: <