
1: /* Composite sequence support. 2: Copyright (C) 2001, 2002, 2003, 2004, 2005, 3: 2006, 2007 Free Software Foundation, Inc. 4: Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 5: National Institute of Advanced Industrial Science and Technology (AIST) 6: Registration Number H14PRO021 7: 8: This file is part of GNU Emacs. 9: 10: GNU Emacs is free software; you can redistribute it and/or modify 11: it under the terms of the GNU General Public License as published by 12: the Free Software Foundation; either version 2, or (at your option) 13: any later version. 14: 15: GNU Emacs is distributed in the hope that it will be useful, 16: but WITHOUT ANY WARRANTY; without even the implied warranty of 17: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18: GNU General Public License for more details. 19: 20: You should have received a copy of the GNU General Public License 21: along with GNU Emacs; see the file COPYING. If not, write to 22: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23: Boston, MA 02110-1301, USA. */ 24: 25: #include <config.h> 26: #include "lisp.h" 27: #include "buffer.h" 28: #include "charset.h" 29: #include "intervals.h" 30: 31: /* Emacs uses special text property `composition' to support character 32: composition. A sequence of characters that have the same (i.e. eq) 33: `composition' property value is treated as a single composite 34: sequence (we call it just `composition' here after). Characters in 35: a composition are all composed somehow on the screen. 36: 37: The property value has this form when the composition is made: 38: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC) 39: then turns to this form: 40: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC)) 41: when the composition is registered in composition_hash_table and 42: composition_table. These rather peculiar structures were designed 43: to make it easy to distinguish them quickly (we can do that by 44: checking only the first element) and to extract LENGTH (from the 45: former form) and COMPOSITION-ID (from the latter form). 46: 47: We register a composition when it is displayed, or when the width 48: is required (for instance, to calculate columns). 49: 50: LENGTH -- Length of the composition. This information is used to 51: check the validity of the composition. 52: 53: COMPONENTS -- Character, string, vector, list, or nil. 54: 55: If it is nil, characters in the text are composed relatively 56: according to their metrics in font glyphs. 57: 58: If it is a character or a string, the character or characters 59: in the string are composed relatively. 60: 61: If it is a vector or list of integers, the element is a 62: character or an encoded composition rule. The characters are 63: composed according to the rules. (2N)th elements are 64: characters to be composed and (2N+1)th elements are 65: composition rules to tell how to compose (2N+2)th element with 66: the previously composed 2N glyphs. 67: 68: COMPONENTS-VEC -- Vector of integers. In relative composition, the 69: elements are characters to be composed. In rule-base 70: composition, the elements are characters or encoded 71: composition rules. 72: 73: MODIFICATION-FUNC -- If non nil, it is a function to call when the 74: composition gets invalid after a modification in a buffer. If 75: it is nil, a function in `composition-function-table' of the 76: first character in the sequence is called. 77: 78: COMPOSITION-ID --Identification number of the composition. It is 79: used as an index to composition_table for the composition. 80: 81: When Emacs has to display a composition or has to know its 82: displaying width, the function get_composition_id is called. It 83: returns COMPOSITION-ID so that the caller can access the 84: information about the composition through composition_table. If a 85: COMPOSITION-ID has not yet been assigned to the composition, 86: get_composition_id checks the validity of `composition' property, 87: and, if valid, assigns a new ID, registers the information in 88: composition_hash_table and composition_table, and changes the form 89: of the property value. If the property is invalid, return -1 90: without changing the property value. 91: 92: We use two tables to keep information about composition; 93: composition_hash_table and composition_table. 94: 95: The former is a hash table in which keys are COMPONENTS-VECs and 96: values are the corresponding COMPOSITION-IDs. This hash table is 97: weak, but as each key (COMPONENTS-VEC) is also kept as a value of the 98: `composition' property, it won't be collected as garbage until all 99: bits of text that have the same COMPONENTS-VEC are deleted. 100: 101: The latter is a table of pointers to `struct composition' indexed 102: by COMPOSITION-ID. This structure keeps the other information (see 103: composite.h). 104: 105: In general, a text property holds information about individual 106: characters. But, a `composition' property holds information about 107: a sequence of characters (in this sense, it is like the `intangible' 108: property). That means that we should not share the property value 109: in adjacent compositions -- we can't distinguish them if they have the 110: same property. So, after any changes, we call 111: `update_compositions' and change a property of one of adjacent 112: compositions to a copy of it. This function also runs a proper 113: composition modification function to make a composition that gets 114: invalid by the change valid again. 115: 116: As the value of the `composition' property holds information about a 117: specific range of text, the value gets invalid if we change the 118: text in the range. We treat the `composition' property as always 119: rear-nonsticky (currently by setting default-text-properties to 120: (rear-nonsticky (composition))) and we never make properties of 121: adjacent compositions identical. Thus, any such changes make the 122: range just shorter. So, we can check the validity of the `composition' 123: property by comparing LENGTH information with the actual length of 124: the composition. 125: 126: */ 127: 128: 129: Lisp_Object Qcomposition; 130: 131: /* Table of pointers to the structure `composition' indexed by 132: COMPOSITION-ID. This structure is for storing information about 133: each composition except for COMPONENTS-VEC. */ 134: struct composition **composition_table; 135: 136: /* The current size of `composition_table'. */ 137: static int composition_table_size; 138: 139: /* Number of compositions currently made. */ 140: int n_compositions; 141: 142: /* Hash table for compositions. The key is COMPONENTS-VEC of 143: `composition' property. The value is the corresponding 144: COMPOSITION-ID. */ 145: Lisp_Object composition_hash_table; 146: 147: /* Function to call to adjust composition. */ 148: Lisp_Object Vcompose_chars_after_function; 149: 150: /* Char-table of patterns and functions to make a composition. */ 151: Lisp_Object Vcomposition_function_table; 152: Lisp_Object Qcomposition_function_table; 153: 154: /* Temporary variable used in macros COMPOSITION_XXX. */ 155: Lisp_Object composition_temp; 156: ^L 157: /* Return how many columns C will occupy on the screen. It always 158: returns 1 for control characters and 8-bit characters because those 159: are just ignored in a composition. */ 160: #define CHAR_WIDTH(c) \ 161: (SINGLE_BYTE_CHAR_P (c) ? 1 : CHARSET_WIDTH (CHAR_CHARSET (c))) 162: 163: /* Return COMPOSITION-ID of a composition at buffer position 164: CHARPOS/BYTEPOS and length NCHARS. The `composition' property of 165: the sequence is PROP. STRING, if non-nil, is a string that 166: contains the composition instead of the current buffer. 167: 168: If the composition is invalid, return -1. */ 169: 170: int 171: get_composition_id (charpos, bytepos, nchars, prop, string) 172: int charpos, bytepos, nchars; 173: Lisp_Object prop, string; 174: { 175: Lisp_Object id, length, components, key, *key_contents; 176: int glyph_len; 177: struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table); 178: int hash_index; 179: unsigned hash_code; 180: struct composition *cmp; 181: int i, ch; 182: 183: /* PROP should be 184: Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC) 185: or 186: Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC)) 187: */ 188: if (nchars == 0 || !CONSP (prop)) 189: goto invalid_composition; 190: 191: id = XCAR (prop); 192: if (INTEGERP (id)) 193: { 194: /* PROP should be Form-B. */ 195: if (XINT (id) < 0 || XINT (id) >= n_compositions) 196: goto invalid_composition; 197: return XINT (id); 198: } 199: 200: /* PROP should be Form-A. 201: Thus, ID should be (LENGTH . COMPONENTS). */ 202: if (!CONSP (id)) 203: goto invalid_composition; 204: length = XCAR (id); 205: if (!INTEGERP (length) || XINT (length) != nchars) 206: goto invalid_composition; 207: 208: components = XCDR (id); 209: 210: /* Check if the same composition has already been registered or not 211: by consulting composition_hash_table. The key for this table is 212: COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is 213: nil, vector of characters in the composition range. */ 214: if (INTEGERP (components)) 215: key = Fmake_vector (make_number (1), components); 216: else if (STRINGP (components) || CONSP (components)) 217: key = Fvconcat (1, &components); 218: else if (VECTORP (components)) 219: key = components; 220: else if (NILP (components)) 221: { 222: key = Fmake_vector (make_number (nchars), Qnil); 223: if (STRINGP (string)) 224: for (i = 0; i < nchars; i++) 225: { 226: FETCH_STRING_CHAR_ADVANCE (ch, string, charpos, bytepos); 227: XVECTOR (key)->contents[i] = make_number (ch); 228: } 229: else 230: for (i = 0; i < nchars; i++) 231: { 232: FETCH_CHAR_ADVANCE (ch, charpos, bytepos); 233: XVECTOR (key)->contents[i] = make_number (ch); 234: } 235: } 236: else 237: goto invalid_composition; 238: 239: hash_index = hash_lookup (hash_table, key, &hash_code); 240: if (hash_index >= 0) 241: { 242: /* We have already registered the same composition. Change PROP 243: from Form-A above to Form-B while replacing COMPONENTS with 244: COMPONENTS-VEC stored in the hash table. We can directly 245: modify the cons cell of PROP because it is not shared. */ 246: key = HASH_KEY (hash_table, hash_index); 247: id = HASH_VALUE (hash_table, hash_index); 248: XSETCAR (prop, id); 249: XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop)))); 250: return XINT (id); 251: } 252: 253: /* This composition is a new one. We must register it. */ 254: 255: /* Check if we have sufficient memory to store this information. */ 256: if (composition_table_size == 0) 257: { 258: composition_table_size = 256; 259: composition_table 260: = (struct composition **) xmalloc (sizeof (composition_table[0]) 261: * composition_table_size); 262: } 263: else if (composition_table_size <= n_compositions) 264: { 265: composition_table_size += 256; 266: composition_table 267: = (struct composition **) xrealloc (composition_table, 268: sizeof (composition_table[0]) 269: * composition_table_size); 270: } 271: 272: key_contents = XVECTOR (key)->contents; 273: 274: /* Check if the contents of COMPONENTS are valid if COMPONENTS is a 275: vector or a list. It should be a sequence of: 276: char1 rule1 char2 rule2 char3 ... ruleN charN+1 */ 277: if (VECTORP (components) || CONSP (components)) 278: { 279: int len = XVECTOR (key)->size; 280: 281: /* The number of elements should be odd. */ 282: if ((len % 2) == 0) 283: goto invalid_composition; 284: /* All elements should be integers (character or encoded 285: composition rule). */ 286: for (i = 0; i < len; i++) 287: { 288: if (!INTEGERP (key_contents[i])) 289: goto invalid_composition; 290: } 291: } 292: 293: /* Change PROP from Form-A above to Form-B. We can directly modify 294: the cons cell of PROP because it is not shared. */ 295: XSETFASTINT (id, n_compositions); 296: XSETCAR (prop, id); 297: XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop)))); 298: 299: /* Register the composition in composition_hash_table. */ 300: hash_index = hash_put (hash_table, key, id, hash_code); 301: 302: /* Register the composition in composition_table. */ 303: cmp = (struct composition *) xmalloc (sizeof (struct composition)); 304: 305: cmp->method = (NILP (components) 306: ? COMPOSITION_RELATIVE 307: : ((INTEGERP (components) || STRINGP (components)) 308: ? COMPOSITION_WITH_ALTCHARS 309: : COMPOSITION_WITH_RULE_ALTCHARS)); 310: cmp->hash_index = hash_index; 311: glyph_len = (cmp->method == COMPOSITION_WITH_RULE_ALTCHARS 312: ? (XVECTOR (key)->size + 1) / 2 313: : XVECTOR (key)->size); 314: cmp->glyph_len = glyph_len; 315: cmp->offsets = (short *) xmalloc (sizeof (short) * glyph_len * 2); 316: cmp->font = NULL; 317: 318: /* Calculate the width of overall glyphs of the composition. */ 319: if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS) 320: { 321: /* Relative composition. */ 322: cmp->width = 0; 323: for (i = 0; i < glyph_len; i++) 324: { 325: int this_width; 326: ch = XINT (key_contents[i]); 327: this_width = CHAR_WIDTH (ch); 328: if (cmp->width < this_width) 329: cmp->width = this_width; 330: } 331: } 332: else 333: { 334: /* Rule-base composition. */ 335: float leftmost = 0.0, rightmost; 336: 337: ch = XINT (key_contents[0]); 338: rightmost = CHAR_WIDTH (ch); 339: 340: for (i = 1; i < glyph_len; i += 2) 341: { 342: int rule, gref, nref; 343: int this_width; 344: float this_left; 345: 346: rule = XINT (key_contents[i]); 347: ch = XINT (key_contents[i + 1]); 348: this_width = CHAR_WIDTH (ch); 349: 350: /* A composition rule is specified by an integer value 351: that encodes global and new reference points (GREF and 352: NREF). GREF and NREF are specified by numbers as 353: below: 354: 0---1---2 -- ascent 355: | | 356: | | 357: | | 358: 9--10--11 -- center 359: | | 360: ---3---4---5--- baseline 361: | | 362: 6---7---8 -- descent 363: */ 364: COMPOSITION_DECODE_RULE (rule, gref, nref); 365: this_left = (leftmost 366: + (gref % 3) * (rightmost - leftmost) / 2.0 367: - (nref % 3) * this_width / 2.0); 368: 369: if (this_left < leftmost) 370: leftmost = this_left; 371: if (this_left + this_width > rightmost) 372: rightmost = this_left + this_width; 373: } 374: 375: cmp->width = rightmost - leftmost; 376: if (cmp->width < (rightmost - leftmost)) 377: /* To get a ceiling integer value. */ 378: cmp->width++; 379: } 380: 381: composition_table[n_compositions] = cmp; 382: 383: return n_compositions++; 384: 385: invalid_composition: 386: /* Would it be better to remove this `composition' property? */ 387: return -1; 388: } 389: 390: ^L 391: /* Find a composition at or nearest to position POS of OBJECT (buffer 392: or string). 393: 394: OBJECT defaults to the current buffer. If there's a composition at 395: POS, set *START and *END to the start and end of the sequence, 396: *PROP to the `composition' property, and return 1. 397: 398: If there's no composition at POS and LIMIT is negative, return 0. 399: 400: Otherwise, search for a composition forward (LIMIT > POS) or 401: backward (LIMIT < POS). In this case, LIMIT bounds the search. 402: 403: If a composition is found, set *START, *END, and *PROP as above, 404: and return 1, else return 0. 405: 406: This doesn't check the validity of composition. */ 407: 408: int 409: find_composition (pos, limit, start, end, prop, object) 410: int pos, limit, *start, *end; 411: Lisp_Object *prop, object; 412: { 413: Lisp_Object val; 414: 415: if (get_property_and_range (pos, Qcomposition, prop, start, end, object)) 416: return 1; 417: 418: if (limit < 0 || limit == pos) 419: return 0; 420: 421: if (limit > pos) /* search forward */ 422: { 423: val = Fnext_single_property_change (make_number (pos), Qcomposition, 424: object, make_number (limit)); 425: pos = XINT (val); 426: if (pos == limit) 427: return 0; 428: } 429: else /* search backward */ 430: { 431: if (get_property_and_range (pos - 1, Qcomposition, prop, start, end, 432: object)) 433: return 1; 434: val = Fprevious_single_property_change (make_number (pos), Qcomposition, 435: object, make_number (limit)); 436: pos = XINT (val); 437: if (pos == limit) 438: return 0; 439: pos--; 440: } 441: get_property_and_range (pos, Qcomposition, prop, start, end, object); 442: return 1; 443: } 444: 445: /* Run a proper function to adjust the composition sitting between 446: FROM and TO with property PROP. */ 447: 448: static void 449: run_composition_function (from, to, prop) 450: int from, to; 451: Lisp_Object prop; 452: { 453: Lisp_Object func; 454: int start, end; 455: 456: func = COMPOSITION_MODIFICATION_FUNC (prop); 457: /* If an invalid composition precedes or follows, try to make them 458: valid too. */ 459: if (from > BEGV 460: && find_composition (from - 1, -1, &start, &end, &prop, Qnil) 461: && !COMPOSITION_VALID_P (start, end, prop)) 462: from = start; 463: if (to < ZV 464: && find_composition (to, -1, &start, &end, &prop, Qnil) 465: && !COMPOSITION_VALID_P (start, end, prop)) 466: to = end; 467: if (!NILP (Ffboundp (func))) 468: call2 (func, make_number (from), make_number (to)); 469: else if (!NILP (Ffboundp (Vcompose_chars_after_function))) 470: call3 (Vcompose_chars_after_function, 471: make_number (from), make_number (to), Qnil); 472: } 473: 474: /* Make invalid compositions adjacent to or inside FROM and TO valid. 475: CHECK_MASK is bitwise `or' of mask bits defined by macros 476: CHECK_XXX (see the comment in composite.h). 477: 478: This function is called when a buffer text is changed. If the 479: change is deletion, FROM == TO. Otherwise, FROM < TO. */ 480: 481: void 482: update_compositions (from, to, check_mask) 483: int from, to, check_mask; 484: { 485: Lisp_Object prop; 486: int start, end; 487: 488: if (inhibit_modification_hooks) 489: return; 490: 491: /* If FROM and TO are not in a valid range, do nothing. */ 492: if (! (BEGV <= from && from <= to && to <= ZV)) 493: return; 494: 495: if (check_mask & CHECK_HEAD) 496: { 497: /* FROM should be at composition boundary. But, insertion or 498: deletion will make two compositions adjacent and 499: indistinguishable when they have same (eq) property. To 500: avoid it, in such a case, we change the property of the 501: latter to the copy of it. */ 502: if (from > BEGV 503: && find_composition (from - 1, -1, &start, &end, &prop, Qnil)) 504: { 505: if (from < end) 506: Fput_text_property (make_number (from), make_number (end), 507: Qcomposition, 508: Fcons (XCAR (prop), XCDR (prop)), Qnil); 509: run_composition_function (start, end, prop); 510: from = end; 511: } 512: else if (from < ZV 513: && find_composition (from, -1, &start, &from, &prop, Qnil)) 514: run_composition_function (start, from, prop); 515: } 516: 517: if (check_mask & CHECK_INSIDE) 518: { 519: /* In this case, we are sure that (check & CHECK_TAIL) is also 520: nonzero. Thus, here we should check only compositions before 521: (to - 1). */ 522: while (from < to - 1 523: && find_composition (from, to, &start, &from, &prop, Qnil) 524: && from < to - 1) 525: run_composition_function (start, from, prop); 526: } 527: 528: if (check_mask & CHECK_TAIL) 529: { 530: if (from < to 531: && find_composition (to - 1, -1, &start, &end, &prop, Qnil)) 532: { 533: /* TO should be also at composition boundary. But, 534: insertion or deletion will make two compositions adjacent 535: and indistinguishable when they have same (eq) property. 536: To avoid it, in such a case, we change the property of 537: the former to the copy of it. */ 538: if (to < end) 539: Fput_text_property (make_number (start), make_number (to), 540: Qcomposition, 541: Fcons (XCAR (prop), XCDR (prop)), Qnil); 542: run_composition_function (start, end, prop); 543: } 544: else if (to < ZV 545: && find_composition (to, -1, &start, &end, &prop, Qnil)) 546: run_composition_function (start, end, prop); 547: } 548: } 549: 550: 551: /* Modify composition property values in LIST destructively. LIST is 552: a list as returned from text_property_list. Change values to the 553: top-level copies of them so that none of them are `eq'. */ 554: 555: void 556: make_composition_value_copy (list) 557: Lisp_Object list; 558: { 559: Lisp_Object plist, val; 560: 561: for (; CONSP (list); list = XCDR (list)) 562: { 563: plist = XCAR (XCDR (XCDR (XCAR (list)))); 564: while (CONSP (plist) && CONSP (XCDR (plist))) 565: { 566: if (EQ (XCAR (plist), Qcomposition) 567: && (val = XCAR (XCDR (plist)), CONSP (val))) 568: XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val))); 569: plist = XCDR (XCDR (plist)); 570: } 571: } 572: } 573: 574: 575: /* Make text in the region between START and END a composition that 576: has COMPONENTS and MODIFICATION-FUNC. 577: 578: If STRING is non-nil, then operate on characters contained between 579: indices START and END in STRING. */ 580: 581: void 582: compose_text (start, end, components, modification_func, string) 583: int start, end; 584: Lisp_Object components, modification_func, string; 585: { 586: Lisp_Object prop; 587: 588: prop = Fcons (