(linenum→info "unix/slp.c:2238")

glibc/2.7/iconvdata/euc-jp.c

    1: /* Mapping tables for EUC-JP handling.
    2:    Copyright (C) 1998, 1999, 2000-2002 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: #include <dlfcn.h>
   22: #include <stdint.h>
   23: #include <gconv.h>
   24: #include <jis0201.h>
   25: #include <jis0208.h>
   26: #include <jis0212.h>
   27: 
   28: /* Definitions used in the body of the `gconv' function.  */
   29: #define CHARSET_NAME            "EUC-JP//"
   30: #define FROM_LOOP               from_euc_jp
   31: #define TO_LOOP                 to_euc_jp
   32: #define DEFINE_INIT             1
   33: #define DEFINE_FINI             1
   34: #define MIN_NEEDED_FROM         1
   35: #define MAX_NEEDED_FROM         3
   36: #define MIN_NEEDED_TO           4
   37: 
   38: 
   39: /* First define the conversion function from EUC-JP to UCS4.  */
   40: #define MIN_NEEDED_INPUT        MIN_NEEDED_FROM
   41: #define MAX_NEEDED_INPUT        MAX_NEEDED_FROM
   42: #define MIN_NEEDED_OUTPUT       MIN_NEEDED_TO
   43: #define LOOPFCT                 FROM_LOOP
   44: #define BODY \
   45:   {                                                                           \
   46:     uint32_t ch = *inptr;                                                     \
   47:                                                                               \
   48:     if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))                              \
   49:       ++inptr;                                                                \
   50:     else if (ch == 0xff)                                                      \
   51:       {                                                                       \
   52:         /* This is illegal.  */                                                      \
   53:         STANDARD_FROM_LOOP_ERR_HANDLER (1);                                  \
   54:       }                                                                       \
   55:     else                                                                      \
   56:       {                                                                       \
   57:         /* Two or more byte character.  First test whether the next          \
   58:            byte is also available.  */                                       \
   59:         int ch2;                                                             \
   60:                                                                               \
   61:         if (__builtin_expect (inptr + 1 >= inend, 0))                        \
   62:           {                                                                  \
   63:             /* The second byte is not available.  Store the                  \
   64:                intermediate result.  */                                              \
   65:             result = __GCONV_INCOMPLETE_INPUT;                               \
   66:             break;                                                           \
   67:           }                                                                  \
   68:                                                                               \
   69:         ch2 = inptr[1];                                                              \
   70:                                                                               \
   71:         /* All second bytes of a multibyte character must be >= 0xa1. */      \
   72:         if (__builtin_expect (ch2 < 0xa1, 0))                                \
   73:           STANDARD_FROM_LOOP_ERR_HANDLER (1);                                \
   74:                                                                               \
   75:         if (ch == 0x8e)                                                              \
   76:           {                                                                  \
   77:             /* This is code set 2: half-width katakana.  */                  \
   78:             ch = jisx0201_to_ucs4 (ch2);                                     \
   79:             if (__builtin_expect (ch, 0) == __UNKNOWN_10646_CHAR)            \
   80:               STANDARD_FROM_LOOP_ERR_HANDLER (1);                            \
   81:                                                                               \
   82:             inptr += 2;                                                              \
   83:           }                                                                  \
   84:         else                                                                 \
   85:           {                                                                  \
   86:             const unsigned char *endp;                                       \
   87:                                                                               \
   88:             if (ch == 0x8f)                                                  \
   89:               {                                                                      \
   90:                 /* This is code set 3: JIS X 0212-1990.  */                 \
   91:                 endp = inptr + 1;                                           \
   92:                                                                               \
   93:                 ch = jisx0212_to_ucs4 (&endp, inend - endp, 0x80);          \
   94:               }                                                                      \
   95:             else                                                             \
   96:               {                                                                      \
   97:                 /* This is code set 1: JIS X 0208.  */                              \
   98:                 endp = inptr;                                               \
   99:                                                                               \
  100:                 ch = jisx0208_to_ucs4 (&endp, inend - inptr, 0x80);         \
  101:               }                                                                      \
  102:                                                                               \
  103:             if (__builtin_expect (ch, 1) == 0)                               \
  104:               {                                                                      \
  105:                 /* Not enough input available.  */                          \
  106:                 result = __GCONV_INCOMPLETE_INPUT;                          \
  107:                 break;                                                              \
  108:               }                                                                      \
  109:             if (__builtin_expect (ch == __UNKNOWN_10646_CHAR, 0))            \
  110:               /* Illegal character.  */                                              \
  111:               STANDARD_FROM_LOOP_ERR_HANDLER (1);                            \
  112:                                                                               \
  113:             inptr = endp;                                                    \
  114:           }                                                                  \
  115:       }                                                                       \
  116:                                                                               \
  117:     put32 (outptr, ch);                                                       \
  118:     outptr += 4;                                                              \
  119:   }
  120: #define ONEBYTE_BODY \
  121:   {                                                                           \
  122:     if (c < 0x8e || (c >= 0x90 && c <= 0x9f))                                 \
  123:       return c;                                                               \
  124:     else                                                                      \
  125:       return WEOF;                                                            \
  126:   }
  127: #define LOOP_NEED_FLAGS
  128: #include <iconv/loop.c>
  129: 
  130: 
  131: /* Next, define the other direction.  */
  132: #define MIN_NEEDED_INPUT        MIN_NEEDED_TO
  133: #define MIN_NEEDED_OUTPUT       MIN_NEEDED_FROM
  134: #define MAX_NEEDED_OUTPUT       MAX_NEEDED_FROM
  135: #define LOOPFCT                 TO_LOOP
  136: #define BODY \
  137:   {                                                                           \
  138:     uint32_t ch = get32 (inptr);                                              \
  139:                                                                               \
  140:     if (ch < 0x8e || (ch >= 0x90 && ch <= 0x9f))                              \
  141:       /* It's plain ASCII or C1.  */                                          \
  142:       *outptr++ = ch;                                                         \
  143:     else if (ch == 0xa5)                                                      \
  144:       /* YEN sign => backslash  */                                            \
  145:       *outptr++ = 0x5c;                                                       \
  146:     else if (ch == 0x203e)                                                    \
  147:       /* overscore => asciitilde */                                           \
  148:       *outptr++ = 0x7e;                                                       \
  149:     else                                                                      \
  150:       {                                                                       \
  151:         /* Try the JIS character sets.  */                                   \
  152:         size_t found;                                                        \
  153:                                                                               \
  154:         /* See whether we have room for at least two characters.  */         \
  155:         if (__builtin_expect (outptr + 1 >= outend, 0))                              \
  156:           {                                                                  \
  157:             result = __GCONV_FULL_OUTPUT;                                    \
  158:             break;                                                           \
  159:           }                                                                  \
  160:                                                                               \
  161:         found = ucs4_to_jisx0201 (ch, outptr + 1);                           \
  162:         if (found != __UNKNOWN_10646_CHAR)                                   \
  163:           {                                                                  \
  164:             /* Yes, it's a JIS 0201 character.  Store the shift byte.  */     \
  165:             *outptr = 0x8e;                                                  \
  166:             outptr += 2;                                                     \
  167:           }                                                                  \
  168:         else                                                                 \
  169:           {                                                                  \
  170:             /* No JIS 0201 character.  */                                    \
  171:             found = ucs4_to_jisx0208 (ch, outptr, 2);                        \
  172:             /* Please note that we always have enough room for the output. */ \
  173:             if (found != __UNKNOWN_10646_CHAR)                               \
  174:               {                                                                      \
  175:                 /* It's a JIS 0208 character, adjust it for EUC-JP.  */             \
  176:                 *outptr++ += 0x80;                                          \
  177:                 *outptr++ += 0x80;                                          \
  178:               }                                                                      \
  179:             else                                                             \
  180:               {                                                                      \
  181:                 /* No JIS 0208 character.  */                               \
  182:                 found = ucs4_to_jisx0212 (ch, outptr + 1,                   \
  183:                                           outend - outptr - 1);                  \
  184:                                                                             \
  185:                 if (__builtin_expect (found, 1) == 0)                       \
  186:                   {                                                         \
  187:                     /* We ran out of space.  */                                     \
  188:                     result = __GCONV_FULL_OUTPUT;                           \
  189:                     break;                                                  \
  190:                   }                                                         \
  191:                 else if (__builtin_expect (found, 0) != __UNKNOWN_10646_CHAR) \
  192:                   {                                                         \
  193:                     /* It's a JIS 0212 character, adjust it for EUC-JP.  */   \
  194:                     *outptr++ = 0x8f;                                       \
  195:                     *outptr++ += 0x80;                                              \
  196:                     *outptr++ += 0x80;                                              \
  197:                   }                                                         \
  198:                 else                                                        \
  199:                   {                                                         \
  200:                     UNICODE_TAG_HANDLER (ch, 4);                            \
  201:                                                                               \
  202:                     /* Illegal character.  */                               \
  203:                     STANDARD_TO_LOOP_ERR_HANDLER (4);                       \
  204:                   }                                                         \
  205:               }                                                                      \
  206:           }                                                                  \
  207:       }                                                                       \
  208:                                                                               \
  209:     inptr += 4;                                                               \
  210:   }
  211: #define LOOP_NEED_FLAGS
  212: #include <iconv/loop.c>
  213: 
  214: 
  215: /* Now define the toplevel functions.  */
  216: #include <iconv/skeleton.c>
Syntax (Markdown)