(linenum→info "unix/slp.c:2238")

emacs/22.1/src/charset.c

    1: /* Basic multilingual character support.
    2:    Copyright (C) 2001, 2002, 2003, 2004, 2005,
    3:                  2006, 2007 Free Software Foundation, Inc.
    4:    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
    5:      2005, 2006, 2007
    6:      National Institute of Advanced Industrial Science and Technology (AIST)
    7:      Registration Number H14PRO021
    8: 
    9: This file is part of GNU Emacs.
   10: 
   11: GNU Emacs is free software; you can redistribute it and/or modify
   12: it under the terms of the GNU General Public License as published by
   13: the Free Software Foundation; either version 2, or (at your option)
   14: any later version.
   15: 
   16: GNU Emacs is distributed in the hope that it will be useful,
   17: but WITHOUT ANY WARRANTY; without even the implied warranty of
   18: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19: GNU General Public License for more details.
   20: 
   21: You should have received a copy of the GNU General Public License
   22: along with GNU Emacs; see the file COPYING.  If not, write to
   23: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   24: Boston, MA 02110-1301, USA.  */
   25: 
   26: /* At first, see the document in `charset.h' to understand the code in
   27:    this file.  */
   28: 
   29: #ifdef emacs
   30: #include <config.h>
   31: #endif
   32: 
   33: #include <stdio.h>
   34: 
   35: #ifdef emacs
   36: 
   37: #include <sys/types.h>
   38: #include "lisp.h"
   39: #include "buffer.h"
   40: #include "charset.h"
   41: #include "composite.h"
   42: #include "coding.h"
   43: #include "disptab.h"
   44: 
   45: #else  /* not emacs */
   46: 
   47: #include "mulelib.h"
   48: 
   49: #endif /* emacs */
   50: 
   51: Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
   52: Lisp_Object Qunknown;
   53: 
   54: /* Declaration of special leading-codes.  */
   55: EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
   56: EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
   57: EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
   58: EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
   59: 
   60: /* Declaration of special charsets.  The values are set by
   61:    Fsetup_special_charsets.  */
   62: int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
   63: int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
   64: int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
   65: int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
   66: int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
   67: int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
   68: int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
   69: int charset_mule_unicode_0100_24ff;
   70: int charset_mule_unicode_2500_33ff;
   71: int charset_mule_unicode_e000_ffff;
   72: 
   73: Lisp_Object Qcharset_table;
   74: 
   75: /* A char-table containing information of each character set.  */
   76: Lisp_Object Vcharset_table;
   77: 
   78: /* A vector of charset symbol indexed by charset-id.  This is used
   79:    only for returning charset symbol from C functions.  */
   80: Lisp_Object Vcharset_symbol_table;
   81: 
   82: /* A list of charset symbols ever defined.  */
   83: Lisp_Object Vcharset_list;
   84: 
   85: /* Vector of translation table ever defined.
   86:    ID of a translation table is used to index this vector.  */
   87: Lisp_Object Vtranslation_table_vector;
   88: 
   89: /* A char-table for characters which may invoke auto-filling.  */
   90: Lisp_Object Vauto_fill_chars;
   91: 
   92: Lisp_Object Qauto_fill_chars;
   93: 
   94: /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
   95: int bytes_by_char_head[256];
   96: int width_by_char_head[256];
   97: 
   98: /* Mapping table from ISO2022's charset (specified by DIMENSION,
   99:    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  100: int iso_charset_table[2][2][128];
  101: 
  102: /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  103: unsigned char *_fetch_multibyte_char_p;
  104: int _fetch_multibyte_char_len;
  105: 
  106: /* Offset to add to a non-ASCII value when inserting it.  */
  107: EMACS_INT nonascii_insert_offset;
  108: 
  109: /* Translation table for converting non-ASCII unibyte characters
  110:    to multibyte codes, or nil.  */
  111: Lisp_Object Vnonascii_translation_table;
  112: 
  113: /* List of all possible generic characters.  */
  114: Lisp_Object Vgeneric_character_list;
  115: 
  116: ^L
  117: void
  118: invalid_character (c)
  119:      int c;
  120: {
  121:   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
  122: }
  123: 
  124: /* Parse string STR of length LENGTH and fetch information of a
  125:    character at STR.  Set BYTES to the byte length the character
  126:    occupies, CHARSET, C1, C2 to proper values of the character. */
  127: 
  128: #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
  129:   do {                                                                       \
  130:     (c1) = *(str);                                                           \
  131:     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
  132:     if ((bytes) == 1)                                                        \
  133:       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
  134:     else if ((bytes) == 2)                                                   \
  135:       {                                                                      \
  136:         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                                     \
  137:           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;        \
  138:         else                                                                \
  139:           (charset) = (c1), (c1) = (str)[1] & 0x7F;                         \
  140:       }                                                                      \
  141:     else if ((bytes) == 3)                                                   \
  142:       {                                                                      \
  143:         if ((c1) < LEADING_CODE_PRIVATE_11)                                 \
  144:           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
  145:         else                                                                \
  146:           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                             \
  147:       }                                                                      \
  148:     else                                                                     \
  149:       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
  150:   } while (0)
  151: 
  152: /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
  153:    Note that this intentionally allows invalid components, such
  154:    as 0xA0 0xA0, because there exist many files that contain
  155:    such invalid byte sequences, especially in EUC-GB. */
  156: #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
  157:   ((charset) == CHARSET_ASCII                           \
  158:    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
  159:    : ((charset) == CHARSET_8_BIT_CONTROL                \
  160:       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
  161:       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
  162:          ? ((c1) >= 0x80 && (c1) <= 0xFF)              \
  163:          : (CHARSET_DIMENSION (charset) == 1           \
  164:             ? ((c1) >= 0x20 && (c1) <= 0x7F)           \
  165:             : ((c1) >= 0x20 && (c1) <= 0x7F            \
  166:                && (c2) >= 0x20 && (c2) <= 0x7F)))))
  167: 
  168: /* Store multi-byte form of the character C in STR.  The caller should
  169:    allocate at least 4-byte area at STR in advance.  Returns the
  170:    length of the multi-byte form.  If C is an invalid character code,
  171:    return -1.  */
  172: 
  173: int
  174: char_to_string_1 (c, str)
  175:      int c;
  176:      unsigned char *str;
  177: {
  178:   unsigned char *p = str;
  179: 
  180:   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
  181:     {
  182:       /* Multibyte character can't have a modifier bit.  */
  183:       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
  184:         return -1;
  185: 
  186:       /* For Meta, Shift, and Control modifiers, we need special care.  */
  187:       if (c & CHAR_META)
  188:         {
  189:           /* Move the meta bit to the right place for a string.  */
  190:           c = (c & ~CHAR_META) | 0x80;
  191:         }
  192:       if (c & CHAR_SHIFT)
  193:         {
  194:           /* Shift modifier is valid only with [A-Za-z].  */
  195:           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
  196:             c &= ~CHAR_SHIFT;
  197:           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
  198:             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
  199:         }
  200:       if (c & CHAR_CTL)
  201:         {
  202:           /* Simulate the code in lread.c.  */
  203:           /* Allow `\C- ' and `\C-?'.  */
  204:           if (c == (CHAR_CTL | ' '))
  205:             c = 0;
  206:           else if (c == (CHAR_CTL | '?'))
  207:             c = 127;
  208:           /* ASCII control chars are made from letters (both cases),
  209:              as well as the non-letters within 0100...0137.  */
  210:           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
  211:             c &= (037 | (~0177 & ~CHAR_CTL));
  212:           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
  213:             c &= (037 | (~0177 & ~CHAR_CTL));
  214:         }
  215: 
  216:       /* If C still has any modifier bits, just ignore it.  */
  217:       c &= ~CHAR_MODIFIER_MASK;
  218:     }
  219: 
  220:   if (SINGLE_BYTE_CHAR_P (c))
  221:     {
  222:       if (ASCII_BYTE_P (c) || c >= 0xA0)
  223:         *p++ = c;
  224:       else
  225:         {
  226:           *p++ = LEADING_CODE_8_BIT_CONTROL;
  227:           *p++ = c + 0x20;
  228:         }
  229:     }
  230:   else if (CHAR_VALID_P (c, 0))
  231:     {
  232:       int charset, c1, c2;
  233: 
  234:       SPLIT_CHAR (c, charset, c1, c2);
  235: 
  236:       if (charset >= LEADING_CODE_EXT_11)
  237:         *p++ = (charset < LEADING_CODE_EXT_12
  238:                 ? LEADING_CODE_PRIVATE_11
  239:                 : (charset < LEADING_CODE_EXT_21
  240:                    ? LEADING_CODE_PRIVATE_12
  241:                    : (charset < LEADING_CODE_EXT_22
  242:                       ? LEADING_CODE_PRIVATE_21
  243:                       : LEADING_CODE_PRIVATE_22)));
  244:       *p++ = charset;
  245:       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
  246:         return -1;
  247:       if (c1)
  248:         {
  249:           *p++ = c1 | 0x80;
  250:           if (c2 > 0)
  251:             *p++ = c2 | 0x80;
  252:         }
  253:     }
  254:   else
  255:     return -1;
  256: 
  257:   return (p - str);
  258: }
  259: 
  260: 
  261: /* Store multi-byte form of the character C in STR.  The caller should
  262:    allocate at least 4-byte area at STR in advance.  Returns the
  263:    length of the multi-byte form.  If C is an invalid character code,
  264:    signal an error.
  265: 
  266:    Use macro `CHAR_STRING (C, STR)' instead of calling this function
  267:    directly if C can be an ASCII character.  */
  268: 
  269: int
  270: char_to_string (c, str)
  271:      int c;
  272:      unsigned char *str;
  273: {
  274:   int len;
  275:   len = char_to_string_1 (c, str);
  276:   if (len == -1)
  277:     invalid_character (c);
  278:   return len;
  279: }
  280: 
  281: 
  282: /* Return the non-ASCII character corresponding to multi-byte form at
  283:    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
  284:    length of the multibyte form in *ACTUAL_LEN.
  285: 
  286:    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
  287:    this function directly if you want ot handle ASCII characters as
  288:    well.  */
  289: 
  290: int
  291: string_to_char (str, len, actual_len)
  292:      const unsigned char *str;
  293:      int len, *actual_len;
  294: {
  295:   int c, bytes, charset, c1, c2;
  296: 
  297:   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
  298:   c = MAKE_CHAR (charset, c1, c2);
  299:   if (actual_len)
  300:     *actual_len = bytes;
  301:   return c;
  302: }
  303: 
  304: /* Return the length of the multi-byte form at string STR of length LEN.
  305:    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
  306: int
  307: multibyte_form_length (str, len)
  308:      const unsigned char *str;
  309:      int len;
  310: {
  311:   int bytes;
  312: 
  313:   PARSE_MULTIBYTE_SEQ (str, len, bytes);
  314:   return bytes;
  315: }
  316: 
  317: /* Check multibyte form at string STR of length LEN and set variables
  318:    pointed by CHARSET, C1, and C2 to charset and position codes of the
  319:    character at STR, and return 0.  If there's no multibyte character,
  320:    return -1.  This should be used only in the macro SPLIT_STRING
  321:    which checks range of STR in advance.  */
  322: 
  323: int
  324: split_string (str, len, charset, c1, c2)
  325:      const unsigned char *str;
  326:      unsigned char *c1, *c2;
  327:      int len, *charset;
  328: {
  329:   register int bytes, cs, code1, code2 = -1;
  330: 
  331:   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
  332:   if (cs == CHARSET_ASCII)
  333:     return -1;
  334:   *charset = cs;
  335:   *c1 = code1;
  336:   *c2 = code2;
  337:   return 0;
  338: }
  339: 
  340: /* Return 1 iff character C has valid printable glyph.
  341:    Use the macro CHAR_PRINTABLE_P instead.  */
  342: int
  343: char_printable_p (c)
  344:      int c;
  345: {
  346:   int charset, c1, c2;
  347: 
  348:   if (ASCII_BYTE_P (c))
  349:     return 1;
  350:   else if (SINGLE_BYTE_CHAR_P (c))
  351:     return 0;
  352:   else if (c >= MAX_CHAR)
  353:     return 0;
  354: 
  355:   SPLIT_CHAR (c, charset, c1, c2);
  356:   if (! CHARSET_DEFINED_P (charset))
  357:     return 0;
  358:   if (CHARSET_CHARS (charset) == 94
  359:       ? c1 <= 32 || c1 >= 127
  360:       : c1 < 32)
  361:     return 0;
  362:   if (CHARSET_DIMENSION (charset) == 2
  363:       && (CHARSET_CHARS (charset) == 94
  364:           ? c2 <= 32 || c2 >= 127
  365:           : c2 < 32))
  366:     return 0;
  367:   return 1;
  368: }
  369: 
  370: /* Translate character C by translation table TABLE.  If C
  371:    is negative, translate a character specified by CHARSET, C1, and C2
  372:    (C1 and C2 are code points of the character).  If no translation is
  373:    found in TABLE, return C.  */
  374: int
  375: translate_char (table, c, charset, c1, c2)
  376:      Lisp_Object table;
  377:      int c, charset, c1, c2;
  378: {
  379:   Lisp_Object ch;
  380:   int alt_charset, alt_c1, alt_c2, dimension;
  381: 
  382:   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
  383:   if (!CHAR_TABLE_P (table)
  384:       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
  385:     return c;
  386: 
  387:   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
  388:   dimension = CHARSET_DIMENSION (alt_charset);
  389:   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
  390:     /* CH is not a generic character, just return it.  */
  391:     return XFASTINT (ch);
  392: 
  393:   /* Since CH is a generic character, we must return a specific
  394:      charater which has the same position codes as C from CH.  */
  395:   if (charset < 0)
  396:     SPLIT_CHAR (c, charset, c1, c2);
  397:   if (dimension != CHARSET_DIMENSION (charset))
  398:     /* We can't make such a character because of dimension mismatch.  */
  399:     return c;
  400:   return MAKE_CHAR (alt_charset, c1, c2);
  401: }
  402: 
  403: /* Convert the unibyte character C to multibyte based on
  404:    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
  405:    convert C to a valid multibyte character, convert it based on
  406:    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
  407: 
  408: int
  409: unibyte_char_to_multibyte (c)
  410:      int c;
  411: {
  412:   if (c < 0400 && c >= 0200)
  413:     {
  414:       int c_save = c;
  415: 
  416:       if (! NILP (Vnonascii_translation_table))
  417:         {
  418:           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
  419:           if (c >= 0400 && ! char_valid_p (c, 0))
  420:             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
  421:         }
  422:       else if (c >= 0240 && nonascii_insert_offset > 0)
  423:         {
  424:           c += nonascii_insert_offset;
  425:           if (c < 0400 || ! char_valid_p (c, 0))
  426:             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
  427:         }
  428:       else if (c >= 0240)
  429:         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
  430:     }
  431:   return c;
  432: }
  433: 
  434: 
  435: /* Convert the multibyte character C to unibyte 8-bit character based
  436:    on Vnonascii_translation_table or nonascii_insert_offset.  If
  437:    REV_TBL is non-nil, it should be a reverse table of
  438:    Vnonascii_translation_table, i.e. what given by:
  439:      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
  440: 
  441: int
  442: multibyte_char_to_unibyte (c, rev_tbl)
  443:      int c;
  444:      Lisp_Object rev_tbl;
  445: {
  446:   if (!SINGLE_BYTE_CHAR_P (c))
  447:     {
  448:       int c_save = c;
  449: 
  450:       if (! CHAR_TABLE_P (rev_tbl)
  451:           && CHAR_TABLE_P (Vnonascii_translation_table))
  452:         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
  453:                                           make_number (0));
  454:       if (CHAR_TABLE_P (rev_tbl))
  455:         {
  456:           Lisp_Object temp;
  457:           temp = Faref (rev_tbl, make_number (c));
  458:           if (INTEGERP (temp))
  459:             c = XINT (temp);
  460:           if (c >= 256)
  461:             c = (c_save & 0177) + 0200;
  462:         }
  463:       else
  464:         {
  465:           if (nonascii_insert_offset > 0)
  466:             c -= nonascii_insert_offset;
  467:           if (c < 128 || c >= 256)
  468:             c = (c_save & 0177) + 0200;
  469:         }
  470:     }
  471: 
  472:   return c;
  473: }
  474: 
  475: ^L
  476: /* Update the table Vcharset_table with the given arguments (see the
  477:    document of `define-charset' for the meaning of each argument).
  478:    Several other table contents are also updated.  The caller should
  479:    check the validity of CHARSET-ID and the remaining arguments in
  480:    advance.  */
  481: 
  482: void
  483: update_charset_table (charset_id, dimension, chars, width, direction,
  484:                       iso_final_char, iso_graphic_plane,
  485:                       short_name, long_name, description)
  486:      Lisp_Object charset_id, dimension, chars, width, direction;
  487:      Lisp_Object iso_final_char, iso_graphic_plane;
  488:      Lisp_Object short_name, long_name, description;
  489: {
  490:   int charset = XINT (charset_id);
  491:   int bytes;
  492:   unsigned char leading_code_base, leading_code_ext;
  493: 
  494:   if (NILP (CHARSET_TABLE_ENTRY (charset)))
  495:     CHARSET_TABLE_ENTRY (charset)
  496:       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
  497: 
  498:   if (NILP (long_name))
  499:     long_name = short_name;
  500:   if (NILP (description))
  501:     description = long_name;
  502: 
  503:   /* Get byte length of multibyte form, base leading-code, and
  504:      extended leading-code of the charset.  See the comment under the
  505:      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
  506:   bytes = XINT (dimension);
  507:   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
  508:     {
  509:       /* Official charset, it doesn't have an extended leading-code.  */
  510:       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
  511:         bytes += 1; /* For a base leading-code.  */
  512:       leading_code_base = charset;
  513:       leading_code_ext = 0;
  514:     }
  515:   else
  516:     {
  517:       /* Private charset.  */
  518:       bytes += 2; /* For base and extended leading-codes.  */
  519:       leading_code_base
  520:         = (charset < LEADING_CODE_EXT_12
  521:            ? LEADING_CODE_PRIVATE_11
  522:            : (charset < LEADING_CODE_EXT_21
  523:               ? LEADING_CODE_PRIVATE_12
  524:               : (charset < LEADING_CODE_EXT_22
  525:                  ? LEADING_CODE_PRIVATE_21
  526:                  : LEADING_CODE_PRIVATE_22)));
  527:       leading_code_ext = charset;
  528:       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
  529:         error ("Invalid dimension for the charset-ID %d", charset);
  530:     }
  531: 
  532:   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
  533:   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
  534:   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
  535:   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
  536:   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
  537:   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
  538:   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
  539:     = make_number (leading_code_base);
  540:   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
  541:     = make_number (leading_code_ext);
  542:   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
  543:   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
  544:     = iso_graphic_plane;
  545:   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
  546:   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
  547:   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
  548:   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
  549: 
  550:   {
  551:     /* If we have already defined a charset which has the same
  552:        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
  553:        DIRECTION, we must update the entry REVERSE-CHARSET of both
  554:        charsets.  If there's no such charset, the value of the entry
  555:        is set to nil.  */
  556:     int i;
  557: 
  558:     for (i = 0; i <= MAX_CHARSET; i++)
  559:       if (!NILP (CHARSET_TABLE_ENTRY (i)))
  560:         {
  561:           if (CHARSET_DIMENSION (i) == XINT (dimension)
  562:               && CHARSET_CHARS (i) == XINT (chars)
  563:               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
  564:               && CHARSET_DIRECTION (i) != XINT (direction))
  565:             {
  566:               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
  567:                 = make_number (i);
  568:               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
  569:               break;
  570:             }
  571:         }
  572:     if (i > MAX_CHARSET)
  573:       /* No such a charset.  */
  574:       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
  575:         = make_number (-1);
  576:   }
  577: 
  578:   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
  579:       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
  580:     {
  581:       bytes_by_char_head[leading_code_base] = bytes;
  582:       width_by_char_head[leading_code_base] = XINT (width);
  583: 
  584:       /* Update table emacs_code_class.  */
  585:       emacs_code_class[charset] = (bytes == 2
  586: