
1: /* Header for coding system handler. 2: Copyright (C) 2001, 2002, 2003, 2004, 2005, 3: 2006, 2007 Free Software Foundation, Inc. 4: Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 5: 2005, 2006, 2007 6: National Institute of Advanced Industrial Science and Technology (AIST) 7: Registration Number H14PRO021 8: 9: This file is part of GNU Emacs. 10: 11: GNU Emacs is free software; you can redistribute it and/or modify 12: it under the terms of the GNU General Public License as published by 13: the Free Software Foundation; either version 2, or (at your option) 14: any later version. 15: 16: GNU Emacs is distributed in the hope that it will be useful, 17: but WITHOUT ANY WARRANTY; without even the implied warranty of 18: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19: GNU General Public License for more details. 20: 21: You should have received a copy of the GNU General Public License 22: along with GNU Emacs; see the file COPYING. If not, write to 23: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 24: Boston, MA 02110-1301, USA. */ 25: 26: #ifndef EMACS_CODING_H 27: #define EMACS_CODING_H 28: 29: #include "ccl.h" 30: 31: /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ 32: 33: /* All code (1-byte) of Emacs' internal format is classified into one 34: of the followings. See also `charset.h'. */ 35: enum emacs_code_class_type 36: { 37: EMACS_control_code, /* Control codes in the range 38: 0x00..0x1F and 0x7F except for the 39: following two codes. */ 40: EMACS_linefeed_code, /* 0x0A (linefeed) to denote 41: end-of-line. */ 42: EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used 43: in selective display mode. */ 44: EMACS_ascii_code, /* ASCII characters. */ 45: EMACS_leading_code_2, /* Base leading code of official 46: TYPE9N character. */ 47: EMACS_leading_code_3, /* Base leading code of private TYPE9N 48: or official TYPE9Nx9N character. */ 49: EMACS_leading_code_4, /* Base leading code of private 50: TYPE9Nx9N character. */ 51: EMACS_invalid_code /* Invalid code, i.e. a base leading 52: code not yet assigned to any 53: charset, or a code of the range 54: 0xA0..0xFF. */ 55: }; 56: 57: extern enum emacs_code_class_type emacs_code_class[256]; 58: 59: /*** ISO2022 section ***/ 60: 61: /* Macros to define code of control characters for ISO2022's functions. */ 62: /* code */ /* function */ 63: #define ISO_CODE_LF 0x0A /* line-feed */ 64: #define ISO_CODE_CR 0x0D /* carriage-return */ 65: #define ISO_CODE_SO 0x0E /* shift-out */ 66: #define ISO_CODE_SI 0x0F /* shift-in */ 67: #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ 68: #define ISO_CODE_ESC 0x1B /* escape */ 69: #define ISO_CODE_SS2 0x8E /* single-shift-2 */ 70: #define ISO_CODE_SS3 0x8F /* single-shift-3 */ 71: #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ 72: 73: /* All code (1-byte) of ISO2022 is classified into one of the 74: followings. */ 75: enum iso_code_class_type 76: { 77: ISO_control_0, /* Control codes in the range 78: 0x00..0x1F and 0x7F, except for the 79: following 5 codes. */ 80: ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ 81: ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 82: ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 83: ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 84: ISO_escape, /* ISO_CODE_SO (0x1B) */ 85: ISO_control_1, /* Control codes in the range 86: 0x80..0x9F, except for the 87: following 3 codes. */ 88: ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ 89: ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ 90: ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ 91: ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */ 92: ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */ 93: ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */ 94: ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */ 95: }; 96: 97: /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags' 98: element in the structure `coding_system'. This information is used 99: while encoding a text to ISO2022. **/ 100: 101: /* If set, produce short-form designation sequence (e.g. ESC $ A) 102: instead of long-form sequence (e.g. ESC $ ( A). */ 103: #define CODING_FLAG_ISO_SHORT_FORM 0x0001 104: 105: /* If set, reset graphic planes and registers at end-of-line to the 106: initial state. */ 107: #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002 108: 109: /* If set, reset graphic planes and registers before any control 110: characters to the initial state. */ 111: #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004 112: 113: /* If set, encode by 7-bit environment. */ 114: #define CODING_FLAG_ISO_SEVEN_BITS 0x0008 115: 116: /* If set, use locking-shift function. */ 117: #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010 118: 119: /* If set, use single-shift function. Overwrite 120: CODING_FLAG_ISO_LOCKING_SHIFT. */ 121: #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020 122: 123: /* If set, designate JISX0201-Roman instead of ASCII. */ 124: #define CODING_FLAG_ISO_USE_ROMAN 0x0040 125: 126: /* If set, designate JISX0208-1978 instead of JISX0208-1983. */ 127: #define CODING_FLAG_ISO_USE_OLDJIS 0x0080 128: 129: /* If set, do not produce ISO6429's direction specifying sequence. */ 130: #define CODING_FLAG_ISO_NO_DIRECTION 0x0100 131: 132: /* If set, assume designation states are reset at beginning of line on 133: output. */ 134: #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200 135: 136: /* If set, designation sequence should be placed at beginning of line 137: on output. */ 138: #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 139: 140: /* If set, do not encode unsafe characters on output. */ 141: #define CODING_FLAG_ISO_SAFE 0x0800 142: 143: /* If set, extra latin codes (128..159) are accepted as a valid code 144: on input. */ 145: #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000 146: 147: /* If set, use designation escape sequence. */ 148: #define CODING_FLAG_ISO_DESIGNATION 0x10000 149: 150: /* A character to be produced on output if encoding of the original 151: character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR. 152: It must be an ASCII character. */ 153: #define CODING_REPLACEMENT_CHARACTER '?' 154: 155: /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ 156: struct iso2022_spec 157: { 158: /* The current graphic register invoked to each graphic plane. */ 159: int current_invocation[2]; 160: 161: /* The current charset designated to each graphic register. */ 162: int current_designation[4]; 163: 164: /* A charset initially designated to each graphic register. */ 165: int initial_designation[4]; 166: 167: /* If not -1, it is a graphic register specified in an invalid 168: designation sequence. */ 169: int last_invalid_designation_register; 170: 171: /* A graphic register to which each charset should be designated. */ 172: unsigned char requested_designation[MAX_CHARSET + 1]; 173: 174: /* A revision number to be specified for each charset on encoding. 175: The value 255 means no revision number for the corresponding 176: charset. */ 177: unsigned char charset_revision_number[MAX_CHARSET + 1]; 178: 179: /* Set to 1 temporarily only when graphic register 2 or 3 is invoked 180: by single-shift while encoding. */ 181: int single_shifting; 182: 183: /* Set to 1 temporarily only when processing at beginning of line. */ 184: int bol; 185: }; 186: 187: /* Macros to access each field in the structure `spec.iso2022'. */ 188: #define CODING_SPEC_ISO_INVOCATION(coding, plane) \ 189: (coding)->spec.iso2022.current_invocation[plane] 190: #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \ 191: (coding)->spec.iso2022.current_designation[reg] 192: #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \ 193: (coding)->spec.iso2022.initial_designation[reg] 194: #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \ 195: (coding)->spec.iso2022.requested_designation[charset] 196: #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \ 197: (coding)->spec.iso2022.charset_revision_number[charset] 198: #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \ 199: (coding)->spec.iso2022.single_shifting 200: #define CODING_SPEC_ISO_BOL(coding) \ 201: (coding)->spec.iso2022.bol 202: 203: /* A value which may appear in 204: coding->spec.iso2022.requested_designation indicating that the 205: corresponding charset does not request any graphic register to be 206: designated. */ 207: #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4 208: 209: /* Return a charset which is currently designated to the graphic plane 210: PLANE in the coding-system CODING. */ 211: #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \ 212: ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \ 213: ? -1 \ 214: : CODING_SPEC_ISO_DESIGNATION (coding, \ 215: CODING_SPEC_ISO_INVOCATION (coding, plane))) 216: 217: /*** BIG5 section ***/ 218: 219: /* Macros to denote each type of BIG5 coding system. */ 220: #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of 221: BIG5 developed by Hong Kong 222: University. */ 223: #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants 224: of BIG5 developed by the 225: company ETen in Taiwan. */ 226: 227: /*** GENERAL section ***/ 228: 229: /* Types of coding system. */ 230: enum coding_type 231: { 232: coding_type_no_conversion, /* A coding system which requires no 233: conversion for reading and writing 234: including end-of-line format. */ 235: coding_type_emacs_mule, /* A coding system used in Emacs' 236: buffer and string. Requires no 237: conversion for reading and writing 238: except for end-of-line format. */ 239: coding_type_undecided, /* A coding system which requires 240: automatic detection of a real 241: coding system. */ 242: coding_type_sjis, /* SJIS coding system for Japanese. */ 243: coding_type_iso2022, /* Any coding system of ISO2022 244: variants. */ 245: coding_type_big5, /* BIG5 coding system for Chinese. */ 246: coding_type_ccl, /* The coding system of which decoder 247: and encoder are written in CCL. */ 248: coding_type_raw_text /* A coding system for a text 249: containing random 8-bit code which 250: does not require code conversion 251: except for end-of-line format. */ 252: }; 253: 254: /* Formats of end-of-line. */ 255: #define CODING_EOL_LF 0 /* Line-feed only, same as Emacs' 256: internal format. */ 257: #define CODING_EOL_CRLF 1 /* Sequence of carriage-return and 258: line-feed. */ 259: #define CODING_EOL_CR 2 /* Carriage-return only. */ 260: #define CODING_EOL_UNDECIDED 3 /* This value is used to denote the 261: eol-type is not yet decided. */ 262: #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the 263: eol-type is not consistent 264: through the file. */ 265: 266: /* 1 iff composing. */ 267: #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO) 268: 269: #define COMPOSITION_DATA_SIZE 4080 270: #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2) 271: 272: /* Data structure to hold information about compositions of text that 273: is being decoded or encode. ISO 2022 base code conversion routines 274: handle special ESC sequences for composition specification. But, 275: they can't get/put such information directly from/to a buffer in 276: the deepest place. So, they store or retrieve the information 277: through this structure. 278: 279: The encoder stores the information in this structure when it meets 280: ESC sequences for composition while encoding codes, then, after all 281: text codes are encoded, puts `composition' properties on the text 282: by referring to the structure. 283: 284: The decoder at first stores the information of a text to be 285: decoded, then, while decoding codes, generates ESC sequences for 286: composition at proper places by referring to the structure. */ 287: 288: struct composition_data 289: { 290: /* The character position of the first character to be encoded or 291: decoded. START and END (see below) are relative to this 292: position. */ 293: int char_offset; 294: 295: /* The composition data. These elements are repeated for each 296: composition: 297: LENGTH START END METHOD [ COMPONENT ... ] 298: where, 299: LENGTH is the number of elements for this composition. 300: 301: START and END are starting and ending character positions of 302: the composition relative to `char_offset'. 303: 304: METHOD is one of `enum composing_status' specifying the way of 305: composition. 306: 307: COMPONENT is a character or an encoded composition rule. */ 308: int data[COMPOSITION_DATA_SIZE]; 309: 310: /* The number of elements in `data' currently used. */ 311: int used; 312: 313: /* Pointers to the previous and next structures. When `data' is 314: filled up, another structure is allocated and linked in `next'. 315: The new structure has backward link to this structure in `prev'. 316: The number of chained structures depends on how many compositions 317: the text being encoded or decoded contains. */ 318: struct composition_data *prev, *next; 319: }; 320: 321: /* Macros used for the member `result' of the struct 322: coding_system. */ 323: #define CODING_FINISH_NORMAL 0 324: #define CODING_FINISH_INSUFFICIENT_SRC 1 325: #define CODING_FINISH_INSUFFICIENT_DST 2 326: #define CODING_FINISH_INCONSISTENT_EOL 3 327: #define CODING_FINISH_INSUFFICIENT_CMP 4 328: #define CODING_FINISH_INTERRUPT 5 329: 330: /* Macros used for the member `mode' of the struct coding_system. */ 331: 332: /* If set, recover the original CR or LF of the already decoded text 333: when the decoding routine encounters an inconsistent eol format. */ 334: #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 335: 336: /* If set, the decoding/encoding routines treat the current data as 337: the last block of the whole text to be converted, and do 338: appropriate finishing job. */ 339: #define CODING_MODE_LAST_BLOCK 0x02 340: 341: /* If set, it means that the current source text is in a buffer which 342: enables selective display. */ 343: #define CODING_MODE_SELECTIVE_DISPLAY 0x04 344: 345: /* If set, replace unencodabae characters by `?' on encoding. */ 346: #define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08 347: 348: /* This flag is used by the decoding/encoding routines on the fly. If 349: set, it means that right-to-left text is being processed. */ 350: #define CODING_MODE_DIRECTION 0x10 351: 352: struct coding_system 353: { 354: /* Type of the coding system. */ 355: enum coding_type type; 356: 357: /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */ 358: int eol_type; 359: 360: /* Flag bits of the coding system. The meaning of each bit is common 361: to all types of coding systems. */ 362: unsigned int common_flags; 363: 364: /* Flag bits of the coding system. The meaning of each bit depends 365: on the type of the coding system. */ 366: unsigned int flags; 367: 368: /* Mode bits of the coding system. See the comments of the macros 369: CODING_MODE_XXX. */ 370: unsigned int mode; 371: 372: /* The current status of composition handling. */ 373: int composing; 374: 375: /* 1 iff the next character is a composition rule. */ 376: int composition_rule_follows; 377: 378: /* Information of compositions are stored here on decoding and set 379: in advance on encoding. */ 380: struct composition_data *cmp_data; 381: 382: /* Index to cmp_data->data for the first element for the current 383: composition. */ 384: int cmp_data_start; 385: 386: /* Index to cmp_data->data for the current element for the current 387: composition. */ 388: int cmp_data_index; 389: 390: /* Detailed information specific to each type of coding system. */ 391: union spec 392: { 393: struct iso2022_spec iso2022; 394: struct ccl_spec ccl; /* Defined in ccl.h. */ 395: } spec; 396: 397: /* Index number of coding category of the coding system. */ 398: int category_idx; 399: 400: /* The following two members specify how characters 128..159 are 401: represented in source and destination text respectively. 1 means 402: they are represented by 2-byte sequence, 0 means they are 403: represented by 1-byte as is (see the comment in charset.h). */ 404: unsigned src_multibyte : 1; 405: unsigned dst_multibyte : 1; 406: 407: /* How may heading bytes we can skip for decoding. This is set to 408: -1 in setup_coding_system, and updated by detect_coding. So, 409: when this is equal to the byte length of the text being 410: converted, we can skip the actual conversion process. */ 411: int heading_ascii; 412: 413: /* The following members are set by encoding/decoding routine. */ 414: int produced, produced_char, consumed, consumed_char; 415: 416: /* Number of error source data found in a decoding routine. */ 417: int errors; 418: 419: /* Finish status of code conversion. It should be one of macros 420: CODING_FINISH_XXXX. */ 421: int result; 422: 423: /* If nonzero, suppress error notification. */ 424: int suppress_error; 425: 426: /* The following members are all Lisp symbols. We don't have to 427: protect them from GC because the current garbage collection 428: doesn't relocate Lisp symbols. But, when it is changed, we must 429: find a way to protect them. */ 430: 431: /* Backward pointer to the Lisp symbol of the coding system. */ 432: Lisp_Object symbol; 433: 434: /* Lisp function (symbol) to be called after decoding to do 435: additional conversion, or nil. */ 436: Lisp_Object post_read_conversion; 437: 438: /* Lisp function (symbol) to be called before encoding to do 439: additional conversion, or nil. */ 440: Lisp_Object pre_write_conversion; 441: 442: /* Character translation tables to look up, or nil. */ 443: Lisp_Object translation_table_for_decode; 444: Lisp_Object translation_table_for_encode; 445: }; 446: 447: /* Mask bits for (struct coding_system *)->common_flags. */ 448: #define CODING_REQUIRE_FLUSHING_MASK 0x01 449: #define CODING_REQUIRE_DECODING_MASK 0x02 450: #define CODING_REQUIRE_ENCODING_MASK 0x04 451: #define CODING_REQUIRE_DETECTION_MASK 0x08 452: #define CODING_ASCII_INCOMPATIBLE_MASK 0x10 453: 454: /* Return 1 if the coding system CODING requires specific code to be 455: attached at the tail of converted text. */ 456: #define CODING_REQUIRE_FLUSHING(coding) \ 457: ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) 458: 459: /* Return 1 if the coding system CODING requires code conversion on 460: decoding. */ 461: #define CODING_REQUIRE_DECODING(coding) \ 462: ((coding)->dst_multibyte \ 463: || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) 464: 465: /* Return 1 if the coding system CODING requires code conversion on 466: encoding. 467: The non-multibyte part of the condition is to support encoding of 468: unibyte strings/buffers generated by string-as-unibyte or 469: (set-buffer-multibyte nil) from multibyte strings/buffers. */ 470: #define CODING_REQUIRE_ENCODING(coding) \ 471: ((coding)->src_multibyte \ 472: || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK) 473: 474: /* Return 1 if the coding system CODING requires some kind of code 475: detection. */ 476: #define CODING_REQUIRE_DETECTION(coding) \ 477: ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) 478: 479: /* Return 1 if the coding system CODING requires code conversion on 480: decoding or some kind of code detection. */ 481: #define CODING_MAY_REQUIRE_DECODING(coding) \ 482: (CODING_REQUIRE_DECODING (coding) \ 483: || CODING_REQUIRE_DETECTION (coding)) 484: 485: /* Index for each coding category in `coding_category_table' */ 486: #define CODING_CATEGORY_IDX_EMACS_MULE 0 487: #define CODING_CATEGORY_IDX_SJIS 1 488: #define CODING_CATEGORY_IDX_ISO_7 2 489: #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3 490: #define CODING_CATEGORY_IDX_ISO_8_1 4 491: #define CODING_CATEGORY_IDX_ISO_8_2 5 492: #define CODING_CATEGORY_IDX_ISO_7_ELSE 6 493: #define CODING_CATEGORY_IDX_ISO_8_ELSE 7 494: #define CODING_CATEGORY_IDX_CCL 8 495: #define CODING_CATEGORY_IDX_BIG5 9 496: #define CODING_CATEGORY_IDX_UTF_8 10 497: #define CODING_CATEGORY_IDX_UTF_16_BE 11 498: #define CODING_CATEGORY_IDX_UTF_16_LE 12 499: #define CODING_CATEGORY_IDX_RAW_TEXT 13 500: #define CODING_CATEGORY_IDX_BINARY 14 501: #define CODING_CATEGORY_IDX_MAX 15 502: 503: /* Definitions of flag bits returned by the function 504: detect_coding_mask (). */ 505: #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE) 506: #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS) 507: #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7) 508: #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT) 509: #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1) 510: #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2) 511: #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE) 512: #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE) 513: #define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL) 514: #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5) 515: #define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8) 516: #define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE) 517: #define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE) 518: #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT) 519: #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY) 520: 521: /* This value is returned if detect_coding_mask () find nothing other 522: than ASCII characters. */ 523: #define CODING_CATEGORY_MASK_ANY \ 524: ( CODING_CATEGORY_MASK_EMACS_MULE \ 525: | CODING_CATEGORY_MASK_SJIS \ 526: | CODING_CATEGORY_MASK_ISO_7 \ 527: | CODING_CATEGORY_MASK_ISO_7_TIGHT \ 528: | CODING_CATEGORY_MASK_ISO_8_1 \ 529: | CODING_CATEGORY_MASK_ISO_8_2 \ 530: | CODING_CATEGORY_MASK_ISO_7_ELSE \ 531: | CODING_CATEGORY_MASK_ISO_8_ELSE \ 532: | CODING_CATEGORY_MASK_CCL \ 533: | CODING_CATEGORY_MASK_BIG5 \ 534: | CODING_CATEGORY_MASK_UTF_8 \ 535: | CODING_CATEGORY_MASK_UTF_16_BE \ 536: | CODING_CATEGORY_MASK_UTF_16_LE) 537: 538: #define CODING_CATEGORY_MASK_ISO_7BIT \ 539: (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT) 540: 541: #define CODING_CATEGORY_MASK_ISO_8BIT \ 542: (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2) 543: 544: #define CODING_CATEGORY_MASK_ISO_SHIFT \ 545: (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE) 546: 547: #define CODING_CATEGORY_MASK_ISO \ 548: ( CODING_CATEGORY_MASK_ISO_7BIT \ 549: | CODING_CATEGORY_MASK_ISO_SHIFT \ 550: | CODING_CATEGORY_MASK_ISO_8BIT) 551: 552: #define CODING_CATEGORY_MASK_UTF_16_BE_LE \ 553: (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE) 554: 555: /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and 556: S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding 557: system. C1 and C2 are the 1st and 2nd position codes of Emacs' 558: internal format. */ 559: 560: #define DECODE_SJIS(s1, s2, c1, c2) \ 561: do { \ 562: if (s2 >= 0x9F) \ 563: c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ 564: c2 = s2 - 0x7E; \ 565: else \ 566: c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ 567: c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \ 568: } while (0) 569: 570: #define ENCODE_SJIS(c1, c2, s1, s2) \ 571: do { \ 572: if (