
1: /* CCL (Code Conversion Language) interpreter. 2: Copyright (C) 2001, 2002, 2003, 2004, 2005, 3: 2006, 2007 Free Software Foundation, Inc. 4: Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 5: 2005, 2006, 2007 6: National Institute of Advanced Industrial Science and Technology (AIST) 7: Registration Number H14PRO021 8: 9: This file is part of GNU Emacs. 10: 11: GNU Emacs is free software; you can redistribute it and/or modify 12: it under the terms of the GNU General Public License as published by 13: the Free Software Foundation; either version 2, or (at your option) 14: any later version. 15: 16: GNU Emacs is distributed in the hope that it will be useful, 17: but WITHOUT ANY WARRANTY; without even the implied warranty of 18: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19: GNU General Public License for more details. 20: 21: You should have received a copy of the GNU General Public License 22: along with GNU Emacs; see the file COPYING. If not, write to 23: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 24: Boston, MA 02110-1301, USA. */ 25: 26: #include <config.h> 27: 28: #include <stdio.h> 29: 30: #include "lisp.h" 31: #include "charset.h" 32: #include "ccl.h" 33: #include "coding.h" 34: 35: /* This contains all code conversion map available to CCL. */ 36: Lisp_Object Vcode_conversion_map_vector; 37: 38: /* Alist of fontname patterns vs corresponding CCL program. */ 39: Lisp_Object Vfont_ccl_encoder_alist; 40: 41: /* This symbol is a property which assocates with ccl program vector. 42: Ex: (get 'ccl-big5-encoder 'ccl-program) returns ccl program vector. */ 43: Lisp_Object Qccl_program; 44: 45: /* These symbols are properties which associate with code conversion 46: map and their ID respectively. */ 47: Lisp_Object Qcode_conversion_map; 48: Lisp_Object Qcode_conversion_map_id; 49: 50: /* Symbols of ccl program have this property, a value of the property 51: is an index for Vccl_protram_table. */ 52: Lisp_Object Qccl_program_idx; 53: 54: /* Table of registered CCL programs. Each element is a vector of 55: NAME, CCL_PROG, RESOLVEDP, and UPDATEDP, where NAME (symbol) is the 56: name of the program, CCL_PROG (vector) is the compiled code of the 57: program, RESOLVEDP (t or nil) is the flag to tell if symbols in 58: CCL_PROG is already resolved to index numbers or not, UPDATEDP (t 59: or nil) is the flat to tell if the CCL program is updated after it 60: was once used. */ 61: Lisp_Object Vccl_program_table; 62: 63: /* Vector of registered hash tables for translation. */ 64: Lisp_Object Vtranslation_hash_table_vector; 65: 66: /* Return a hash table of id number ID. */ 67: #define GET_HASH_TABLE(id) \ 68: (XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)]))) 69: 70: /* CCL (Code Conversion Language) is a simple language which has 71: operations on one input buffer, one output buffer, and 7 registers. 72: The syntax of CCL is described in `ccl.el'. Emacs Lisp function 73: `ccl-compile' compiles a CCL program and produces a CCL code which 74: is a vector of integers. The structure of this vector is as 75: follows: The 1st element: buffer-magnification, a factor for the 76: size of output buffer compared with the size of input buffer. The 77: 2nd element: address of CCL code to be executed when encountered 78: with end of input stream. The 3rd and the remaining elements: CCL 79: codes. */ 80: 81: /* Header of CCL compiled code */ 82: #define CCL_HEADER_BUF_MAG 0 83: #define CCL_HEADER_EOF 1 84: #define CCL_HEADER_MAIN 2 85: 86: /* CCL code is a sequence of 28-bit non-negative integers (i.e. the 87: MSB is always 0), each contains CCL command and/or arguments in the 88: following format: 89: 90: |----------------- integer (28-bit) ------------------| 91: |------- 17-bit ------|- 3-bit --|- 3-bit --|- 5-bit -| 92: |--constant argument--|-register-|-register-|-command-| 93: ccccccccccccccccc RRR rrr XXXXX 94: or 95: |------- relative address -------|-register-|-command-| 96: cccccccccccccccccccc rrr XXXXX 97: or 98: |------------- constant or other args ----------------| 99: cccccccccccccccccccccccccccc 100: 101: where, `cc...c' is a non-negative integer indicating constant value 102: (the left most `c' is always 0) or an absolute jump address, `RRR' 103: and `rrr' are CCL register number, `XXXXX' is one of the following 104: CCL commands. */ 105: 106: /* CCL commands 107: 108: Each comment fields shows one or more lines for command syntax and 109: the following lines for semantics of the command. In semantics, IC 110: stands for Instruction Counter. */ 111: 112: #define CCL_SetRegister 0x00 /* Set register a register value: 113: 1:00000000000000000RRRrrrXXXXX 114: ------------------------------ 115: reg[rrr] = reg[RRR]; 116: */ 117: 118: #define CCL_SetShortConst 0x01 /* Set register a short constant value: 119: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 120: ------------------------------ 121: reg[rrr] = CCCCCCCCCCCCCCCCCCC; 122: */ 123: 124: #define CCL_SetConst 0x02 /* Set register a constant value: 125: 1:00000000000000000000rrrXXXXX 126: 2:CONSTANT 127: ------------------------------ 128: reg[rrr] = CONSTANT; 129: IC++; 130: */ 131: 132: #define CCL_SetArray 0x03 /* Set register an element of array: 133: 1:CCCCCCCCCCCCCCCCCRRRrrrXXXXX 134: 2:ELEMENT[0] 135: 3:ELEMENT[1] 136: ... 137: ------------------------------ 138: if (0 <= reg[RRR] < CC..C) 139: reg[rrr] = ELEMENT[reg[RRR]]; 140: IC += CC..C; 141: */ 142: 143: #define CCL_Jump 0x04 /* Jump: 144: 1:A--D--D--R--E--S--S-000XXXXX 145: ------------------------------ 146: IC += ADDRESS; 147: */ 148: 149: /* Note: If CC..C is greater than 0, the second code is omitted. */ 150: 151: #define CCL_JumpCond 0x05 /* Jump conditional: 152: 1:A--D--D--R--E--S--S-rrrXXXXX 153: ------------------------------ 154: if (!reg[rrr]) 155: IC += ADDRESS; 156: */ 157: 158: 159: #define CCL_WriteRegisterJump 0x06 /* Write register and jump: 160: 1:A--D--D--R--E--S--S-rrrXXXXX 161: ------------------------------ 162: write (reg[rrr]); 163: IC += ADDRESS; 164: */ 165: 166: #define CCL_WriteRegisterReadJump 0x07 /* Write register, read, and jump: 167: 1:A--D--D--R--E--S--S-rrrXXXXX 168: 2:A--D--D--R--E--S--S-rrrYYYYY 169: ----------------------------- 170: write (reg[rrr]); 171: IC++; 172: read (reg[rrr]); 173: IC += ADDRESS; 174: */ 175: /* Note: If read is suspended, the resumed execution starts from the 176: second code (YYYYY == CCL_ReadJump). */ 177: 178: #define CCL_WriteConstJump 0x08 /* Write constant and jump: 179: 1:A--D--D--R--E--S--S-000XXXXX 180: 2:CONST 181: ------------------------------ 182: write (CONST); 183: IC += ADDRESS; 184: */ 185: 186: #define CCL_WriteConstReadJump 0x09 /* Write constant, read, and jump: 187: 1:A--D--D--R--E--S--S-rrrXXXXX 188: 2:CONST 189: 3:A--D--D--R--E--S--S-rrrYYYYY 190: ----------------------------- 191: write (CONST); 192: IC += 2; 193: read (reg[rrr]); 194: IC += ADDRESS; 195: */ 196: /* Note: If read is suspended, the resumed execution starts from the 197: second code (YYYYY == CCL_ReadJump). */ 198: 199: #define CCL_WriteStringJump 0x0A /* Write string and jump: 200: 1:A--D--D--R--E--S--S-000XXXXX 201: 2:LENGTH 202: 3:0000STRIN[0]STRIN[1]STRIN[2] 203: ... 204: ------------------------------ 205: write_string (STRING, LENGTH); 206: IC += ADDRESS; 207: */ 208: 209: #define CCL_WriteArrayReadJump 0x0B /* Write an array element, read, and jump: 210: 1:A--D--D--R--E--S--S-rrrXXXXX 211: 2:LENGTH 212: 3:ELEMENET[0] 213: 4:ELEMENET[1] 214: ... 215: N:A--D--D--R--E--S--S-rrrYYYYY 216: ------------------------------ 217: if (0 <= reg[rrr] < LENGTH) 218: write (ELEMENT[reg[rrr]]); 219: IC += LENGTH + 2; (... pointing at N+1) 220: read (reg[rrr]); 221: IC += ADDRESS; 222: */ 223: /* Note: If read is suspended, the resumed execution starts from the 224: Nth code (YYYYY == CCL_ReadJump). */ 225: 226: #define CCL_ReadJump 0x0C /* Read and jump: 227: 1:A--D--D--R--E--S--S-rrrYYYYY 228: ----------------------------- 229: read (reg[rrr]); 230: IC += ADDRESS; 231: */ 232: 233: #define CCL_Branch 0x0D /* Jump by branch table: 234: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 235: 2:A--D--D--R--E-S-S[0]000XXXXX 236: 3:A--D--D--R--E-S-S[1]000XXXXX 237: ... 238: ------------------------------ 239: if (0 <= reg[rrr] < CC..C) 240: IC += ADDRESS[reg[rrr]]; 241: else 242: IC += ADDRESS[CC..C]; 243: */ 244: 245: #define CCL_ReadRegister 0x0E /* Read bytes into registers: 246: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 247: 2:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 248: ... 249: ------------------------------ 250: while (CCC--) 251: read (reg[rrr]); 252: */ 253: 254: #define CCL_WriteExprConst 0x0F /* write result of expression: 255: 1:00000OPERATION000RRR000XXXXX 256: 2:CONSTANT 257: ------------------------------ 258: write (reg[RRR] OPERATION CONSTANT); 259: IC++; 260: */ 261: 262: /* Note: If the Nth read is suspended, the resumed execution starts 263: from the Nth code. */ 264: 265: #define CCL_ReadBranch 0x10 /* Read one byte into a register, 266: and jump by branch table: 267: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 268: 2:A--D--D--R--E-S-S[0]000XXXXX 269: 3:A--D--D--R--E-S-S[1]000XXXXX 270: ... 271: ------------------------------ 272: read (read[rrr]); 273: if (0 <= reg[rrr] < CC..C) 274: IC += ADDRESS[reg[rrr]]; 275: else 276: IC += ADDRESS[CC..C]; 277: */ 278: 279: #define CCL_WriteRegister 0x11 /* Write registers: 280: 1:CCCCCCCCCCCCCCCCCCCrrrXXXXX 281: 2:CCCCCCCCCCCCCCCCCCCrrrXXXXX 282: ... 283: ------------------------------ 284: while (CCC--) 285: write (reg[rrr]); 286: ... 287: */ 288: 289: /* Note: If the Nth write is suspended, the resumed execution 290: starts from the Nth code. */ 291: 292: #define CCL_WriteExprRegister 0x12 /* Write result of expression 293: 1:00000OPERATIONRrrRRR000XXXXX 294: ------------------------------ 295: write (reg[RRR] OPERATION reg[Rrr]); 296: */ 297: 298: #define CCL_Call 0x13 /* Call the CCL program whose ID is 299: CC..C or cc..c. 300: 1:CCCCCCCCCCCCCCCCCCCCFFFXXXXX 301: [2:00000000cccccccccccccccccccc] 302: ------------------------------ 303: if (FFF) 304: call (cc..c) 305: IC++; 306: else 307: call (CC..C) 308: */ 309: 310: #define CCL_WriteConstString 0x14 /* Write a constant or a string: 311: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 312: [2:0000STRIN[0]STRIN[1]STRIN[2]] 313: [...] 314: ----------------------------- 315: if (!rrr) 316: write (CC..C) 317: else 318: write_string (STRING, CC..C); 319: IC += (CC..C + 2) / 3; 320: */ 321: 322: #define CCL_WriteArray 0x15 /* Write an element of array: 323: 1:CCCCCCCCCCCCCCCCCCCCrrrXXXXX 324: 2:ELEMENT[0] 325: 3:ELEMENT[1] 326: ... 327: ------------------------------ 328: if (0 <= reg[rrr] < CC..C) 329: write (ELEMENT[reg[rrr]]); 330: IC += CC..C; 331: */ 332: 333: #define CCL_End 0x16 /* Terminate: 334: 1:00000000000000000000000XXXXX 335: ------------------------------ 336: terminate (); 337: */ 338: 339: /* The following two codes execute an assignment arithmetic/logical 340: operation. The form of the operation is like REG OP= OPERAND. */ 341: 342: #define CCL_ExprSelfConst 0x17 /* REG OP= constant: 343: 1:00000OPERATION000000rrrXXXXX 344: 2:CONSTANT 345: ------------------------------ 346: reg[rrr] OPERATION= CONSTANT; 347: */ 348: 349: #define CCL_ExprSelfReg 0x18 /* REG1 OP= REG2: 350: 1:00000OPERATION000RRRrrrXXXXX 351: ------------------------------ 352: reg[rrr] OPERATION= reg[RRR]; 353: */ 354: 355: /* The following codes execute an arithmetic/logical operation. The 356: form of the operation is like REG_X = REG_Y OP OPERAND2. */ 357: 358: #define CCL_SetExprConst 0x19 /* REG_X = REG_Y OP constant: 359: 1:00000OPERATION000RRRrrrXXXXX 360: 2:CONSTANT 361: ------------------------------ 362: reg[rrr] = reg[RRR] OPERATION CONSTANT; 363: IC++; 364: */ 365: 366: #define CCL_SetExprReg 0x1A /* REG1 = REG2 OP REG3: 367: 1:00000OPERATIONRrrRRRrrrXXXXX 368: ------------------------------ 369: reg[rrr] = reg[RRR] OPERATION reg[Rrr]; 370: */ 371: 372: #define CCL_JumpCondExprConst 0x1B /* Jump conditional according to 373: an operation on constant: 374: 1:A--D--D--R--E--S--S-rrrXXXXX 375: 2:OPERATION 376: 3:CONSTANT 377: ----------------------------- 378: reg[7] = reg[rrr] OPERATION CONSTANT; 379: if (!(reg[7])) 380: IC += ADDRESS; 381: else 382: IC += 2 383: */ 384: 385: #define CCL_JumpCondExprReg 0x1C /* Jump conditional according to 386: an operation on register: 387: 1:A--D--D--R--E--S--S-rrrXXXXX 388: 2:OPERATION 389: 3:RRR 390: ----------------------------- 391: reg[7] = reg[rrr] OPERATION reg[RRR]; 392: if (!reg[7]) 393: IC += ADDRESS; 394: else 395: IC += 2; 396: */ 397: 398: #define CCL_ReadJumpCondExprConst 0x1D /* Read and jump conditional according 399: to an operation on constant: 400: 1:A--D--D--R--E--S--S-rrrXXXXX 401: 2:OPERATION 402: 3:CONSTANT 403: ----------------------------- 404: read (reg[rrr]); 405: reg[7] = reg[rrr] OPERATION CONSTANT; 406: if (!reg[7]) 407: IC += ADDRESS; 408: else 409: IC += 2; 410: */ 411: 412: #define CCL_ReadJumpCondExprReg 0x1E /* Read and jump conditional according 413: to an operation on register: 414: 1:A--D--D--R--E--S--S-rrrXXXXX 415: 2:OPERATION 416: 3:RRR 417: ----------------------------- 418: read (reg[rrr]); 419: reg[7] = reg[rrr] OPERATION reg[RRR]; 420: if (!reg[7]) 421: IC += ADDRESS; 422: else 423: IC += 2; 424: */ 425: 426: #define CCL_Extension 0x1F /* Extended CCL code 427: 1:ExtendedCOMMNDRrrRRRrrrXXXXX 428: 2:ARGUEMENT 429: 3:... 430: ------------------------------ 431: extended_command (rrr,RRR,Rrr,ARGS) 432: */ 433: 434: /* 435: Here after, Extended CCL Instructions. 436: Bit length of extended command is 14. 437: Therefore, the instruction code range is 0..16384(0x3fff). 438: */ 439: 440: /* Read a multibyte characeter. 441: A code point is stored into reg[rrr]. A charset ID is stored into 442: reg[RRR]. */ 443: 444: #define CCL_ReadMultibyteChar2 0x00 /* Read Multibyte Character 445: 1:ExtendedCOMMNDRrrRRRrrrXXXXX */ 446: 447: /* Write a multibyte character. 448: Write a character whose code point is reg[rrr] and the charset ID 449: is reg[RRR]. */ 450: 451: #define CCL_WriteMultibyteChar2 0x01 /* Write Multibyte Character 452: 1:ExtendedCOMMNDRrrRRRrrrXXXXX */ 453: 454: /* Translate a character whose code point is reg[rrr] and the charset 455: ID is reg[RRR] by a translation table whose ID is reg[Rrr]. 456: 457: A translated character is set in reg[rrr] (code point) and reg[RRR] 458: (charset ID). */ 459: 460: #define CCL_TranslateCharacter 0x02 /* Translate a multibyte character 461: 1:ExtendedCOMMNDRrrRRRrrrXXXXX */ 462: 463: /* Translate a character whose code point is reg[rrr] and the charset 464: ID is reg[RRR] by a translation table whose ID is ARGUMENT. 465: 466: A translated character is set in reg[rrr] (code point) and reg[RRR] 467: (charset ID). */ 468: 469: #define CCL_TranslateCharacterConstTbl 0x03 /* Translate a multibyte character 470: 1:ExtendedCOMMNDRrrRRRrrrXXXXX 471: 2:ARGUMENT(Translation Table ID) 472: */ 473: 474: /* Iterate looking up MAPs for reg[rrr] starting from the Nth (N = 475: reg[RRR]) MAP until some value is found. 476: 477: Each MAP is a Lisp vector whose element is number, nil, t, or 478: lambda. 479: If the element is nil, ignore the map and proceed to the next map. 480: If the element is t or lambda, finish without changing reg[rrr]. 481: If the element is a number, set reg[rrr] to the number and finish. 482: 483: Detail of the map structure is descibed in the comment for 484: CCL_MapMultiple below. */ 485: 486: #define CCL_IterateMultipleMap 0x10 /* Iterate multiple maps 487: 1:ExtendedCOMMNDXXXRRRrrrXXXXX 488: 2:NUMBER of MAPs 489: 3:MAP-ID1 490: 4:MAP-ID2 491: ... 492: */ 493: 494: /* Map the code in reg[rrr] by MAPs starting from the Nth (N = 495: reg[RRR]) map. 496: 497: MAPs are supplied in the succeeding CCL codes as follows: 498: 499: When CCL program gives this nested structure of map to this command: 500: ((MAP-ID11 501: MAP-ID12 502: (MAP-ID121 MAP-ID122 MAP-ID123) 503: MAP-ID13) 504: (MAP-ID21 505: (MAP-ID211 (MAP-ID2111) MAP-ID212) 506: MAP-ID22)), 507: the compiled CCL codes has this sequence: 508: CCL_MapMultiple (CCL code of this command) 509: 16 (total number of MAPs and SEPARATORs) 510: -7 (1st SEPARATOR) 511: MAP-ID11 512: MAP-ID12 513: -3 (2nd SEPARATOR) 514: MAP-ID121 515: MAP-ID122 516: MAP-ID123 517: MAP-ID13 518: -7 (3rd SEPARATOR) 519: MAP-ID21 520: -4 (4th SEPARATOR) 521: MAP-ID211 522: -1 (5th SEPARATOR) 523: MAP_ID2111 524: MAP-ID212 525: MAP-ID22 526: 527: A value of each SEPARATOR follows this rule: 528: MAP-SET := SEPARATOR [(MAP-ID | MAP-SET)]+ 529: SEPARATOR := -(number of MAP-IDs and SEPARATORs in the MAP-SET) 530: 531: (*)....Nest level of MAP-SET must not be over than MAX_MAP_SET_LEVEL. 532: 533: When some map fails to map (i.e. it doesn't have a value for 534: reg[rrr]), the mapping is treated as identity. 535: 536: The mapping is iterated for all maps in each map set (set of maps 537: separated by SEPARATOR) except in the case that lambda is 538: encountered. More precisely, the mapping proceeds as below: 539: 540: At first, VAL0 is set to reg[rrr], and it is translated by the 541: first map to VAL1. Then, VAL1 is translated by the next map to 542: VAL2. This mapping is iterated until the last map is used. The 543: result of the mapping is the last value of VAL?. When the mapping 544: process reached to the end of the map set, it moves to the next 545: map set. If the next does not exit, the mapping process terminates, 546: and regard the last value as a result. 547: 548: But, when VALm is mapped to VALn and VALn is not a number, the 549: mapping proceed as below: 550: 551: If VALn is nil, the lastest map is ignored and the mapping of VALm 552: proceed to the next map. 553: 554: In VALn is t, VALm is reverted to reg[rrr] and the mapping of VALm 555: proceed to the next map. 556: 557: If VALn is lambda, move to the next map set like reaching to the 558: end of the current map set. 559: 560: If VALn is a symbol, call the CCL program refered by it. 561: Then, use reg[rrr] as a mapped value except for -1, -2 and -3. 562: Such special values are regarded as nil, t, and lambda respectively. 563: 564: Each map is a Lisp vector of the following format (a) or (b): 565: (a)......[STARTPOINT VAL1 VAL2 ...] 566: (b)......[t VAL STARTPOINT ENDPOINT], 567: where 568: STARTPOINT is an offset to be used for indexing a map, 569: ENDPOINT is a maximum index number of a map, 570: VAL and VALn is a number, nil, t, or lambda. 571: 572: Valid index range of a map of type (a) is: 573: STARTPOINT <= index < STARTPOINT + map_size - 1 574: Valid index range of a map of type (b) is: 575: STARTPOINT <= index < ENDPOINT */ 576: 577: #define CCL_MapMultiple 0x11 /* Mapping by multiple code conversion maps 578: 1:ExtendedCOMMNDXXXRRRrrrXXXXX 579: 2:N-2 580: 3:SEPARATOR_1 (< 0) 581: 4:MAP-ID_1 582: 5:MAP-ID_2 583: ... 584: M:SEPARATOR_x (< 0) 585: M+1:MAP-ID_y 586: ... 587: N:SEPARATOR_z (< 0) 588: */ 589: 590: #define MAX_MAP_SET_LEVEL 30 591: 592: typedef struct 593: { 594: int rest_length; 595: int orig_val; 596: } tr_stack; 597: 598: static tr_stack mapping_stack[MAX_MAP_SET_LEVEL]; 599: static tr_stack *mapping_stack_pointer; 600: 601: /* If this variable is non-zero, it indicates the stack_idx 602: of immediately called by CCL_MapMultiple. */ 603: static int stack_idx_of_map_multiple; 604: 605: #define PUSH_MAPPING_STACK(restlen, orig) \ 606: do \ 607: { \ 608: mapping_stack_pointer->rest_length = (restlen); \ 609: mapping_stack_pointer->orig_val = (orig); \ 610: mapping_stack_pointer++; \ 611: } \ 612: while (0) 613: 614: #define POP_MAPPING_STACK(restlen, orig) \ 615: do \ 616: { \ 617: mapping_stack_pointer--; \ 618: