(linenum→info "unix/slp.c:2238")

emacs/22.1/src/coding.h

    1: /* Header for coding system handler.
    2:    Copyright (C) 2001, 2002, 2003, 2004, 2005,
    3:                  2006, 2007  Free Software Foundation, Inc.
    4:    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
    5:      2005, 2006, 2007
    6:      National Institute of Advanced Industrial Science and Technology (AIST)
    7:      Registration Number H14PRO021
    8: 
    9: This file is part of GNU Emacs.
   10: 
   11: GNU Emacs is free software; you can redistribute it and/or modify
   12: it under the terms of the GNU General Public License as published by
   13: the Free Software Foundation; either version 2, or (at your option)
   14: any later version.
   15: 
   16: GNU Emacs is distributed in the hope that it will be useful,
   17: but WITHOUT ANY WARRANTY; without even the implied warranty of
   18: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19: GNU General Public License for more details.
   20: 
   21: You should have received a copy of the GNU General Public License
   22: along with GNU Emacs; see the file COPYING.  If not, write to
   23: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   24: Boston, MA 02110-1301, USA.  */
   25: 
   26: #ifndef EMACS_CODING_H
   27: #define EMACS_CODING_H
   28: 
   29: #include "ccl.h"
   30: 
   31: /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
   32: 
   33: /* All code (1-byte) of Emacs' internal format is classified into one
   34:    of the followings.  See also `charset.h'.  */
   35: enum emacs_code_class_type
   36:   {
   37:     EMACS_control_code,         /* Control codes in the range
   38:                                    0x00..0x1F and 0x7F except for the
   39:                                    following two codes.  */
   40:     EMACS_linefeed_code,        /* 0x0A (linefeed) to denote
   41:                                    end-of-line.  */
   42:     EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
   43:                                    in selective display mode.  */
   44:     EMACS_ascii_code,           /* ASCII characters.  */
   45:     EMACS_leading_code_2,       /* Base leading code of official
   46:                                    TYPE9N character.  */
   47:     EMACS_leading_code_3,       /* Base leading code of private TYPE9N
   48:                                    or official TYPE9Nx9N character.  */
   49:     EMACS_leading_code_4,       /* Base leading code of private
   50:                                    TYPE9Nx9N character.  */
   51:     EMACS_invalid_code          /* Invalid code, i.e. a base leading
   52:                                    code not yet assigned to any
   53:                                    charset, or a code of the range
   54:                                    0xA0..0xFF.  */
   55:   };
   56: 
   57: extern enum emacs_code_class_type emacs_code_class[256];
   58: 
   59: /*** ISO2022 section ***/
   60: 
   61: /* Macros to define code of control characters for ISO2022's functions.  */
   62:                         /* code */   /* function */
   63: #define ISO_CODE_LF     0x0A                /* line-feed */
   64: #define ISO_CODE_CR     0x0D                /* carriage-return */
   65: #define ISO_CODE_SO     0x0E                /* shift-out */
   66: #define ISO_CODE_SI     0x0F                /* shift-in */
   67: #define ISO_CODE_SS2_7  0x19             /* single-shift-2 for 7-bit code */
   68: #define ISO_CODE_ESC    0x1B               /* escape */
   69: #define ISO_CODE_SS2    0x8E               /* single-shift-2 */
   70: #define ISO_CODE_SS3    0x8F               /* single-shift-3 */
   71: #define ISO_CODE_CSI    0x9B               /* control-sequence-introduce */
   72: 
   73: /* All code (1-byte) of ISO2022 is classified into one of the
   74:    followings.  */
   75: enum iso_code_class_type
   76:   {
   77:     ISO_control_0,              /* Control codes in the range
   78:                                    0x00..0x1F and 0x7F, except for the
   79:                                    following 5 codes.  */
   80:     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
   81:     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
   82:     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
   83:     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
   84:     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
   85:     ISO_control_1,              /* Control codes in the range
   86:                                    0x80..0x9F, except for the
   87:                                    following 3 codes.  */
   88:     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
   89:     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
   90:     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
   91:     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
   92:     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
   93:     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
   94:     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
   95:   };
   96: 
   97: /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
   98:   element in the structure `coding_system'.  This information is used
   99:   while encoding a text to ISO2022.  **/
  100: 
  101: /* If set, produce short-form designation sequence (e.g. ESC $ A)
  102:    instead of long-form sequence (e.g. ESC $ ( A).  */
  103: #define CODING_FLAG_ISO_SHORT_FORM      0x0001
  104: 
  105: /* If set, reset graphic planes and registers at end-of-line to the
  106:    initial state.  */
  107: #define CODING_FLAG_ISO_RESET_AT_EOL    0x0002
  108: 
  109: /* If set, reset graphic planes and registers before any control
  110:    characters to the initial state.  */
  111: #define CODING_FLAG_ISO_RESET_AT_CNTL   0x0004
  112: 
  113: /* If set, encode by 7-bit environment.  */
  114: #define CODING_FLAG_ISO_SEVEN_BITS      0x0008
  115: 
  116: /* If set, use locking-shift function.  */
  117: #define CODING_FLAG_ISO_LOCKING_SHIFT   0x0010
  118: 
  119: /* If set, use single-shift function.  Overwrite
  120:    CODING_FLAG_ISO_LOCKING_SHIFT.  */
  121: #define CODING_FLAG_ISO_SINGLE_SHIFT    0x0020
  122: 
  123: /* If set, designate JISX0201-Roman instead of ASCII.  */
  124: #define CODING_FLAG_ISO_USE_ROMAN       0x0040
  125: 
  126: /* If set, designate JISX0208-1978 instead of JISX0208-1983.  */
  127: #define CODING_FLAG_ISO_USE_OLDJIS      0x0080
  128: 
  129: /* If set, do not produce ISO6429's direction specifying sequence.  */
  130: #define CODING_FLAG_ISO_NO_DIRECTION    0x0100
  131: 
  132: /* If set, assume designation states are reset at beginning of line on
  133:    output.  */
  134: #define CODING_FLAG_ISO_INIT_AT_BOL     0x0200
  135: 
  136: /* If set, designation sequence should be placed at beginning of line
  137:    on output.  */
  138: #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
  139: 
  140: /* If set, do not encode unsafe characters on output.  */
  141: #define CODING_FLAG_ISO_SAFE            0x0800
  142: 
  143: /* If set, extra latin codes (128..159) are accepted as a valid code
  144:    on input.  */
  145: #define CODING_FLAG_ISO_LATIN_EXTRA     0x1000
  146: 
  147: /* If set, use designation escape sequence.  */
  148: #define CODING_FLAG_ISO_DESIGNATION     0x10000
  149: 
  150: /* A character to be produced on output if encoding of the original
  151:    character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
  152:    It must be an ASCII character.  */
  153: #define CODING_REPLACEMENT_CHARACTER '?'
  154: 
  155: /* Structure of the field `spec.iso2022' in the structure `coding_system'.  */
  156: struct iso2022_spec
  157: {
  158:   /* The current graphic register invoked to each graphic plane.  */
  159:   int current_invocation[2];
  160: 
  161:   /* The current charset designated to each graphic register.  */
  162:   int current_designation[4];
  163: 
  164:   /* A charset initially designated to each graphic register.  */
  165:   int initial_designation[4];
  166: 
  167:   /* If not -1, it is a graphic register specified in an invalid
  168:      designation sequence.  */
  169:   int last_invalid_designation_register;
  170: 
  171:   /* A graphic register to which each charset should be designated.  */
  172:   unsigned char requested_designation[MAX_CHARSET + 1];
  173: 
  174:   /* A revision number to be specified for each charset on encoding.
  175:      The value 255 means no revision number for the corresponding
  176:      charset.  */
  177:   unsigned char charset_revision_number[MAX_CHARSET + 1];
  178: 
  179:   /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
  180:      by single-shift while encoding.  */
  181:   int single_shifting;
  182: 
  183:   /* Set to 1 temporarily only when processing at beginning of line.  */
  184:   int bol;
  185: };
  186: 
  187: /* Macros to access each field in the structure `spec.iso2022'.  */
  188: #define CODING_SPEC_ISO_INVOCATION(coding, plane) \
  189:   (coding)->spec.iso2022.current_invocation[plane]
  190: #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
  191:   (coding)->spec.iso2022.current_designation[reg]
  192: #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
  193:   (coding)->spec.iso2022.initial_designation[reg]
  194: #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
  195:   (coding)->spec.iso2022.requested_designation[charset]
  196: #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
  197:   (coding)->spec.iso2022.charset_revision_number[charset]
  198: #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
  199:   (coding)->spec.iso2022.single_shifting
  200: #define CODING_SPEC_ISO_BOL(coding) \
  201:   (coding)->spec.iso2022.bol
  202: 
  203: /* A value which may appear in
  204:    coding->spec.iso2022.requested_designation indicating that the
  205:    corresponding charset does not request any graphic register to be
  206:    designated.  */
  207: #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
  208: 
  209: /* Return a charset which is currently designated to the graphic plane
  210:    PLANE in the coding-system CODING.  */
  211: #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane)    \
  212:   ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0)     \
  213:    ? -1                                                 \
  214:    : CODING_SPEC_ISO_DESIGNATION (coding,               \
  215:                                   CODING_SPEC_ISO_INVOCATION (coding, plane)))
  216: 
  217: /*** BIG5 section ***/
  218: 
  219: /* Macros to denote each type of BIG5 coding system.  */
  220: #define CODING_FLAG_BIG5_HKU    0x00 /* BIG5-HKU is one of variants of
  221:                                         BIG5 developed by Hong Kong
  222:                                         University.  */
  223: #define CODING_FLAG_BIG5_ETEN   0x01 /* BIG5_ETen is one of variants
  224:                                         of BIG5 developed by the
  225:                                         company ETen in Taiwan.  */
  226: 
  227: /*** GENERAL section ***/
  228: 
  229: /* Types of coding system.  */
  230: enum coding_type
  231:   {
  232:     coding_type_no_conversion,  /* A coding system which requires no
  233:                                    conversion for reading and writing
  234:                                    including end-of-line format.  */
  235:     coding_type_emacs_mule,     /* A coding system used in Emacs'
  236:                                    buffer and string.  Requires no
  237:                                    conversion for reading and writing
  238:                                    except for end-of-line format.  */
  239:     coding_type_undecided,      /* A coding system which requires
  240:                                    automatic detection of a real
  241:                                    coding system.  */
  242:     coding_type_sjis,           /* SJIS coding system for Japanese.  */
  243:     coding_type_iso2022,        /* Any coding system of ISO2022
  244:                                    variants.  */
  245:     coding_type_big5,           /* BIG5 coding system for Chinese.  */
  246:     coding_type_ccl,            /* The coding system of which decoder
  247:                                    and encoder are written in CCL.  */
  248:     coding_type_raw_text        /* A coding system for a text
  249:                                    containing random 8-bit code which
  250:                                    does not require code conversion
  251:                                    except for end-of-line format. */
  252:   };
  253: 
  254: /* Formats of end-of-line.  */
  255: #define CODING_EOL_LF   0 /* Line-feed only, same as Emacs'
  256:                                    internal format.  */
  257: #define CODING_EOL_CRLF 1       /* Sequence of carriage-return and
  258:                                    line-feed.  */
  259: #define CODING_EOL_CR   2 /* Carriage-return only.  */
  260: #define CODING_EOL_UNDECIDED 3  /* This value is used to denote the
  261:                                    eol-type is not yet decided.  */
  262: #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
  263:                                      eol-type is not consistent
  264:                                      through the file.  */
  265: 
  266: /* 1 iff composing.  */
  267: #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
  268: 
  269: #define COMPOSITION_DATA_SIZE 4080
  270: #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
  271: 
  272: /* Data structure to hold information about compositions of text that
  273:    is being decoded or encode.  ISO 2022 base code conversion routines
  274:    handle special ESC sequences for composition specification.  But,
  275:    they can't get/put such information directly from/to a buffer in
  276:    the deepest place.  So, they store or retrieve the information
  277:    through this structure.
  278: 
  279:    The encoder stores the information in this structure when it meets
  280:    ESC sequences for composition while encoding codes, then, after all
  281:    text codes are encoded, puts `composition' properties on the text
  282:    by referring to the structure.
  283: 
  284:    The decoder at first stores the information of a text to be
  285:    decoded, then, while decoding codes, generates ESC sequences for
  286:    composition at proper places by referring to the structure.  */
  287: 
  288: struct composition_data
  289: {
  290:   /* The character position of the first character to be encoded or
  291:      decoded.  START and END (see below) are relative to this
  292:      position.  */
  293:   int char_offset;
  294: 
  295:   /* The composition data.  These elements are repeated for each
  296:      composition:
  297:         LENGTH START END METHOD [ COMPONENT ... ]
  298:      where,
  299:         LENGTH is the number of elements for this composition.
  300: 
  301:         START and END are starting and ending character positions of
  302:         the composition relative to `char_offset'.
  303: 
  304:         METHOD is one of `enum composing_status' specifying the way of
  305:         composition.
  306: 
  307:         COMPONENT is a character or an encoded composition rule.  */
  308:   int data[COMPOSITION_DATA_SIZE];
  309: 
  310:   /* The number of elements in `data' currently used.  */
  311:   int used;
  312: 
  313:   /* Pointers to the previous and next structures.  When `data' is
  314:      filled up, another structure is allocated and linked in `next'.
  315:      The new structure has backward link to this structure in `prev'.
  316:      The number of chained structures depends on how many compositions
  317:      the text being encoded or decoded contains.  */
  318:   struct composition_data *prev, *next;
  319: };
  320: 
  321: /* Macros used for the member `result' of the struct
  322:    coding_system.  */
  323: #define CODING_FINISH_NORMAL            0
  324: #define CODING_FINISH_INSUFFICIENT_SRC  1
  325: #define CODING_FINISH_INSUFFICIENT_DST  2
  326: #define CODING_FINISH_INCONSISTENT_EOL  3
  327: #define CODING_FINISH_INSUFFICIENT_CMP  4
  328: #define CODING_FINISH_INTERRUPT         5
  329: 
  330: /* Macros used for the member `mode' of the struct coding_system.  */
  331: 
  332: /* If set, recover the original CR or LF of the already decoded text
  333:    when the decoding routine encounters an inconsistent eol format.  */
  334: #define CODING_MODE_INHIBIT_INCONSISTENT_EOL    0x01
  335: 
  336: /* If set, the decoding/encoding routines treat the current data as
  337:    the last block of the whole text to be converted, and do
  338:    appropriate finishing job.  */
  339: #define CODING_MODE_LAST_BLOCK                  0x02
  340: 
  341: /* If set, it means that the current source text is in a buffer which
  342:    enables selective display.  */
  343: #define CODING_MODE_SELECTIVE_DISPLAY           0x04
  344: 
  345: /* If set, replace unencodabae characters by `?' on encoding.  */
  346: #define CODING_MODE_INHIBIT_UNENCODABLE_CHAR    0x08
  347: 
  348: /* This flag is used by the decoding/encoding routines on the fly.  If
  349:    set, it means that right-to-left text is being processed.  */
  350: #define CODING_MODE_DIRECTION                   0x10
  351: 
  352: struct coding_system
  353: {
  354:   /* Type of the coding system.  */
  355:   enum coding_type type;
  356: 
  357:   /* Type of end-of-line format (LF, CRLF, or CR) of the coding system.  */
  358:   int eol_type;
  359: 
  360:   /* Flag bits of the coding system.  The meaning of each bit is common
  361:      to all types of coding systems.  */
  362:   unsigned int common_flags;
  363: 
  364:   /* Flag bits of the coding system.  The meaning of each bit depends
  365:      on the type of the coding system.  */
  366:   unsigned int flags;
  367: 
  368:   /* Mode bits of the coding system.  See the comments of the macros
  369:      CODING_MODE_XXX.  */
  370:   unsigned int mode;
  371: 
  372:   /* The current status of composition handling.  */
  373:   int composing;
  374: 
  375:   /* 1 iff the next character is a composition rule.  */
  376:   int composition_rule_follows;
  377: 
  378:   /* Information of compositions are stored here on decoding and set
  379:      in advance on encoding.  */
  380:   struct composition_data *cmp_data;
  381: 
  382:   /* Index to cmp_data->data for the first element for the current
  383:      composition.  */
  384:   int cmp_data_start;
  385: 
  386:   /* Index to cmp_data->data for the current element for the current
  387:      composition.  */
  388:   int cmp_data_index;
  389: 
  390:   /* Detailed information specific to each type of coding system.  */
  391:   union spec
  392:     {
  393:       struct iso2022_spec iso2022;
  394:       struct ccl_spec ccl;      /* Defined in ccl.h.  */
  395:     } spec;
  396: 
  397:   /* Index number of coding category of the coding system.  */
  398:   int category_idx;
  399: 
  400:   /* The following two members specify how characters 128..159 are
  401:      represented in source and destination text respectively.  1 means
  402:      they are represented by 2-byte sequence, 0 means they are
  403:      represented by 1-byte as is (see the comment in charset.h).  */
  404:   unsigned src_multibyte : 1;
  405:   unsigned dst_multibyte : 1;
  406: 
  407:   /* How may heading bytes we can skip for decoding.  This is set to
  408:      -1 in setup_coding_system, and updated by detect_coding.  So,
  409:      when this is equal to the byte length of the text being
  410:      converted, we can skip the actual conversion process.  */
  411:   int heading_ascii;
  412: 
  413:   /* The following members are set by encoding/decoding routine.  */
  414:   int produced, produced_char, consumed, consumed_char;
  415: 
  416:   /* Number of error source data found in a decoding routine.  */
  417:   int errors;
  418: 
  419:   /* Finish status of code conversion.  It should be one of macros
  420:      CODING_FINISH_XXXX.  */
  421:   int result;
  422: 
  423:   /* If nonzero, suppress error notification.  */
  424:   int suppress_error;
  425: 
  426:   /* The following members are all Lisp symbols.  We don't have to
  427:      protect them from GC because the current garbage collection
  428:      doesn't relocate Lisp symbols.  But, when it is changed, we must
  429:      find a way to protect them.  */
  430: 
  431:   /* Backward pointer to the Lisp symbol of the coding system.  */
  432:   Lisp_Object symbol;
  433: 
  434:   /* Lisp function (symbol) to be called after decoding to do
  435:      additional conversion, or nil.  */
  436:   Lisp_Object post_read_conversion;
  437: 
  438:   /* Lisp function (symbol) to be called before encoding to do
  439:      additional conversion, or nil.  */
  440:   Lisp_Object pre_write_conversion;
  441: 
  442:   /* Character translation tables to look up, or nil.  */
  443:   Lisp_Object translation_table_for_decode;
  444:   Lisp_Object translation_table_for_encode;
  445: };
  446: 
  447: /* Mask bits for (struct coding_system *)->common_flags.  */
  448: #define CODING_REQUIRE_FLUSHING_MASK    0x01
  449: #define CODING_REQUIRE_DECODING_MASK    0x02
  450: #define CODING_REQUIRE_ENCODING_MASK    0x04
  451: #define CODING_REQUIRE_DETECTION_MASK   0x08
  452: #define CODING_ASCII_INCOMPATIBLE_MASK  0x10
  453: 
  454: /* Return 1 if the coding system CODING requires specific code to be
  455:    attached at the tail of converted text.  */
  456: #define CODING_REQUIRE_FLUSHING(coding) \
  457:   ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
  458: 
  459: /* Return 1 if the coding system CODING requires code conversion on
  460:    decoding.  */
  461: #define CODING_REQUIRE_DECODING(coding) \
  462:   ((coding)->dst_multibyte              \
  463:    || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
  464: 
  465: /* Return 1 if the coding system CODING requires code conversion on
  466:    encoding.
  467:    The non-multibyte part of the condition is to support encoding of
  468:    unibyte strings/buffers generated by string-as-unibyte or
  469:    (set-buffer-multibyte nil) from multibyte strings/buffers.  */
  470: #define CODING_REQUIRE_ENCODING(coding) \
  471:   ((coding)->src_multibyte              \
  472:    || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
  473: 
  474: /* Return 1 if the coding system CODING requires some kind of code
  475:    detection.  */
  476: #define CODING_REQUIRE_DETECTION(coding) \
  477:   ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
  478: 
  479: /* Return 1 if the coding system CODING requires code conversion on
  480:    decoding or some kind of code detection.  */
  481: #define CODING_MAY_REQUIRE_DECODING(coding)     \
  482:   (CODING_REQUIRE_DECODING (coding)             \
  483:    || CODING_REQUIRE_DETECTION (coding))
  484: 
  485: /* Index for each coding category in `coding_category_table' */
  486: #define CODING_CATEGORY_IDX_EMACS_MULE  0
  487: #define CODING_CATEGORY_IDX_SJIS        1
  488: #define CODING_CATEGORY_IDX_ISO_7       2
  489: #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
  490: #define CODING_CATEGORY_IDX_ISO_8_1     4
  491: #define CODING_CATEGORY_IDX_ISO_8_2     5
  492: #define CODING_CATEGORY_IDX_ISO_7_ELSE  6
  493: #define CODING_CATEGORY_IDX_ISO_8_ELSE  7
  494: #define CODING_CATEGORY_IDX_CCL         8
  495: #define CODING_CATEGORY_IDX_BIG5        9
  496: #define CODING_CATEGORY_IDX_UTF_8       10
  497: #define CODING_CATEGORY_IDX_UTF_16_BE   11
  498: #define CODING_CATEGORY_IDX_UTF_16_LE   12
  499: #define CODING_CATEGORY_IDX_RAW_TEXT    13
  500: #define CODING_CATEGORY_IDX_BINARY      14
  501: #define CODING_CATEGORY_IDX_MAX         15
  502: 
  503: /* Definitions of flag bits returned by the function
  504:    detect_coding_mask ().  */
  505: #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
  506: #define CODING_CATEGORY_MASK_SJIS       (1 << CODING_CATEGORY_IDX_SJIS)
  507: #define CODING_CATEGORY_MASK_ISO_7      (1 << CODING_CATEGORY_IDX_ISO_7)
  508: #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
  509: #define CODING_CATEGORY_MASK_ISO_8_1    (1 << CODING_CATEGORY_IDX_ISO_8_1)
  510: #define CODING_CATEGORY_MASK_ISO_8_2    (1 << CODING_CATEGORY_IDX_ISO_8_2)
  511: #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
  512: #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
  513: #define CODING_CATEGORY_MASK_CCL        (1 << CODING_CATEGORY_IDX_CCL)
  514: #define CODING_CATEGORY_MASK_BIG5       (1 << CODING_CATEGORY_IDX_BIG5)
  515: #define CODING_CATEGORY_MASK_UTF_8      (1 << CODING_CATEGORY_IDX_UTF_8)
  516: #define CODING_CATEGORY_MASK_UTF_16_BE  (1 << CODING_CATEGORY_IDX_UTF_16_BE)
  517: #define CODING_CATEGORY_MASK_UTF_16_LE  (1 << CODING_CATEGORY_IDX_UTF_16_LE)
  518: #define CODING_CATEGORY_MASK_RAW_TEXT   (1 << CODING_CATEGORY_IDX_RAW_TEXT)
  519: #define CODING_CATEGORY_MASK_BINARY     (1 << CODING_CATEGORY_IDX_BINARY)
  520: 
  521: /* This value is returned if detect_coding_mask () find nothing other
  522:    than ASCII characters.  */
  523: #define CODING_CATEGORY_MASK_ANY        \
  524:   (  CODING_CATEGORY_MASK_EMACS_MULE    \
  525:    | CODING_CATEGORY_MASK_SJIS           \
  526:    | CODING_CATEGORY_MASK_ISO_7         \
  527:    | CODING_CATEGORY_MASK_ISO_7_TIGHT   \
  528:    | CODING_CATEGORY_MASK_ISO_8_1       \
  529:    | CODING_CATEGORY_MASK_ISO_8_2       \
  530:    | CODING_CATEGORY_MASK_ISO_7_ELSE    \
  531:    | CODING_CATEGORY_MASK_ISO_8_ELSE    \
  532:    | CODING_CATEGORY_MASK_CCL           \
  533:    | CODING_CATEGORY_MASK_BIG5          \
  534:    | CODING_CATEGORY_MASK_UTF_8         \
  535:    | CODING_CATEGORY_MASK_UTF_16_BE     \
  536:    | CODING_CATEGORY_MASK_UTF_16_LE)
  537: 
  538: #define CODING_CATEGORY_MASK_ISO_7BIT \
  539:   (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
  540: 
  541: #define CODING_CATEGORY_MASK_ISO_8BIT \
  542:   (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
  543: 
  544: #define CODING_CATEGORY_MASK_ISO_SHIFT \
  545:   (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
  546: 
  547: #define CODING_CATEGORY_MASK_ISO        \
  548:   (  CODING_CATEGORY_MASK_ISO_7BIT      \
  549:    | CODING_CATEGORY_MASK_ISO_SHIFT     \
  550:    | CODING_CATEGORY_MASK_ISO_8BIT)
  551: 
  552: #define CODING_CATEGORY_MASK_UTF_16_BE_LE \
  553:    (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
  554: 
  555: /* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
  556:    S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
  557:    system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
  558:    internal format.  */
  559: 
  560: #define DECODE_SJIS(s1, s2, c1, c2)                \
  561:   do {                                              \
  562:     if (s2 >= 0x9F)                                      \
  563:       c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),        \
  564:       c2 = s2 - 0x7E;                                  \
  565:     else                                           \
  566:       c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),      \
  567:       c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F);             \
  568:   } while (0)
  569: 
  570: #define ENCODE_SJIS(c1, c2, s1, s2)                     \
  571:   do {                                                  \
  572:     if (