(linenum→info "unix/slp.c:2238")

emacs/22.1/src/composite.c

    1: /* Composite sequence support.
    2:    Copyright (C) 2001, 2002, 2003, 2004, 2005,
    3:                  2006, 2007 Free Software Foundation, Inc.
    4:    Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    5:      National Institute of Advanced Industrial Science and Technology (AIST)
    6:      Registration Number H14PRO021
    7: 
    8: This file is part of GNU Emacs.
    9: 
   10: GNU Emacs is free software; you can redistribute it and/or modify
   11: it under the terms of the GNU General Public License as published by
   12: the Free Software Foundation; either version 2, or (at your option)
   13: any later version.
   14: 
   15: GNU Emacs is distributed in the hope that it will be useful,
   16: but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: GNU General Public License for more details.
   19: 
   20: You should have received a copy of the GNU General Public License
   21: along with GNU Emacs; see the file COPYING.  If not, write to
   22: the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   23: Boston, MA 02110-1301, USA.  */
   24: 
   25: #include <config.h>
   26: #include "lisp.h"
   27: #include "buffer.h"
   28: #include "charset.h"
   29: #include "intervals.h"
   30: 
   31: /* Emacs uses special text property `composition' to support character
   32:    composition.  A sequence of characters that have the same (i.e. eq)
   33:    `composition' property value is treated as a single composite
   34:    sequence (we call it just `composition' here after).  Characters in
   35:    a composition are all composed somehow on the screen.
   36: 
   37:    The property value has this form when the composition is made:
   38:         ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
   39:    then turns to this form:
   40:         (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
   41:    when the composition is registered in composition_hash_table and
   42:    composition_table.  These rather peculiar structures were designed
   43:    to make it easy to distinguish them quickly (we can do that by
   44:    checking only the first element) and to extract LENGTH (from the
   45:    former form) and COMPOSITION-ID (from the latter form).
   46: 
   47:    We register a composition when it is displayed, or when the width
   48:    is required (for instance, to calculate columns).
   49: 
   50:    LENGTH -- Length of the composition.  This information is used to
   51:         check the validity of the composition.
   52: 
   53:    COMPONENTS --  Character, string, vector, list, or nil.
   54: 
   55:         If it is nil, characters in the text are composed relatively
   56:         according to their metrics in font glyphs.
   57: 
   58:         If it is a character or a string, the character or characters
   59:         in the string are composed relatively.
   60: 
   61:         If it is a vector or list of integers, the element is a
   62:         character or an encoded composition rule.  The characters are
   63:         composed according to the rules.  (2N)th elements are
   64:         characters to be composed and (2N+1)th elements are
   65:         composition rules to tell how to compose (2N+2)th element with
   66:         the previously composed 2N glyphs.
   67: 
   68:    COMPONENTS-VEC -- Vector of integers.  In relative composition, the
   69:         elements are characters to be composed.  In rule-base
   70:         composition, the elements are characters or encoded
   71:         composition rules.
   72: 
   73:    MODIFICATION-FUNC -- If non nil, it is a function to call when the
   74:         composition gets invalid after a modification in a buffer.  If
   75:         it is nil, a function in `composition-function-table' of the
   76:         first character in the sequence is called.
   77: 
   78:    COMPOSITION-ID --Identification number of the composition.  It is
   79:         used as an index to composition_table for the composition.
   80: 
   81:    When Emacs has to display a composition or has to know its
   82:    displaying width, the function get_composition_id is called.  It
   83:    returns COMPOSITION-ID so that the caller can access the
   84:    information about the composition through composition_table.  If a
   85:    COMPOSITION-ID has not yet been assigned to the composition,
   86:    get_composition_id checks the validity of `composition' property,
   87:    and, if valid, assigns a new ID, registers the information in
   88:    composition_hash_table and composition_table, and changes the form
   89:    of the property value.  If the property is invalid, return -1
   90:    without changing the property value.
   91: 
   92:    We use two tables to keep information about composition;
   93:    composition_hash_table and composition_table.
   94: 
   95:    The former is a hash table in which keys are COMPONENTS-VECs and
   96:    values are the corresponding COMPOSITION-IDs.  This hash table is
   97:    weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
   98:    `composition' property, it won't be collected as garbage until all
   99:    bits of text that have the same COMPONENTS-VEC are deleted.
  100: 
  101:    The latter is a table of pointers to `struct composition' indexed
  102:    by COMPOSITION-ID.  This structure keeps the other information (see
  103:    composite.h).
  104: 
  105:    In general, a text property holds information about individual
  106:    characters.  But, a `composition' property holds information about
  107:    a sequence of characters (in this sense, it is like the `intangible'
  108:    property).  That means that we should not share the property value
  109:    in adjacent compositions -- we can't distinguish them if they have the
  110:    same property.  So, after any changes, we call
  111:    `update_compositions' and change a property of one of adjacent
  112:    compositions to a copy of it.  This function also runs a proper
  113:    composition modification function to make a composition that gets
  114:    invalid by the change valid again.
  115: 
  116:    As the value of the `composition' property holds information about a
  117:    specific range of text, the value gets invalid if we change the
  118:    text in the range.  We treat the `composition' property as always
  119:    rear-nonsticky (currently by setting default-text-properties to
  120:    (rear-nonsticky (composition))) and we never make properties of
  121:    adjacent compositions identical.  Thus, any such changes make the
  122:    range just shorter.  So, we can check the validity of the `composition'
  123:    property by comparing LENGTH information with the actual length of
  124:    the composition.
  125: 
  126: */
  127: 
  128: 
  129: Lisp_Object Qcomposition;
  130: 
  131: /* Table of pointers to the structure `composition' indexed by
  132:    COMPOSITION-ID.  This structure is for storing information about
  133:    each composition except for COMPONENTS-VEC.  */
  134: struct composition **composition_table;
  135: 
  136: /* The current size of `composition_table'.  */
  137: static int composition_table_size;
  138: 
  139: /* Number of compositions currently made. */
  140: int n_compositions;
  141: 
  142: /* Hash table for compositions.  The key is COMPONENTS-VEC of
  143:    `composition' property.  The value is the corresponding
  144:    COMPOSITION-ID.  */
  145: Lisp_Object composition_hash_table;
  146: 
  147: /* Function to call to adjust composition.  */
  148: Lisp_Object Vcompose_chars_after_function;
  149: 
  150: /* Char-table of patterns and functions to make a composition.  */
  151: Lisp_Object Vcomposition_function_table;
  152: Lisp_Object Qcomposition_function_table;
  153: 
  154: /* Temporary variable used in macros COMPOSITION_XXX.  */
  155: Lisp_Object composition_temp;
  156: ^L
  157: /* Return how many columns C will occupy on the screen.  It always
  158:    returns 1 for control characters and 8-bit characters because those
  159:    are just ignored in a composition.  */
  160: #define CHAR_WIDTH(c) \
  161:   (SINGLE_BYTE_CHAR_P (c) ? 1 : CHARSET_WIDTH (CHAR_CHARSET (c)))
  162: 
  163: /* Return COMPOSITION-ID of a composition at buffer position
  164:    CHARPOS/BYTEPOS and length NCHARS.  The `composition' property of
  165:    the sequence is PROP.  STRING, if non-nil, is a string that
  166:    contains the composition instead of the current buffer.
  167: 
  168:    If the composition is invalid, return -1.  */
  169: 
  170: int
  171: get_composition_id (charpos, bytepos, nchars, prop, string)
  172:      int charpos, bytepos, nchars;
  173:      Lisp_Object prop, string;
  174: {
  175:   Lisp_Object id, length, components, key, *key_contents;
  176:   int glyph_len;
  177:   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table);
  178:   int hash_index;
  179:   unsigned hash_code;
  180:   struct composition *cmp;
  181:   int i, ch;
  182: 
  183:   /* PROP should be
  184:         Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
  185:      or
  186:         Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
  187:   */
  188:   if (nchars == 0 || !CONSP (prop))
  189:     goto invalid_composition;
  190: 
  191:   id = XCAR (prop);
  192:   if (INTEGERP (id))
  193:     {
  194:       /* PROP should be Form-B.  */
  195:       if (XINT (id) < 0 || XINT (id) >= n_compositions)
  196:         goto invalid_composition;
  197:       return XINT (id);
  198:     }
  199: 
  200:   /* PROP should be Form-A.
  201:      Thus, ID should be (LENGTH . COMPONENTS).  */
  202:   if (!CONSP (id))
  203:     goto invalid_composition;
  204:   length = XCAR (id);
  205:   if (!INTEGERP (length) || XINT (length) != nchars)
  206:     goto invalid_composition;
  207: 
  208:   components = XCDR (id);
  209: 
  210:   /* Check if the same composition has already been registered or not
  211:      by consulting composition_hash_table.  The key for this table is
  212:      COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
  213:      nil, vector of characters in the composition range.  */
  214:   if (INTEGERP (components))
  215:     key = Fmake_vector (make_number (1), components);
  216:   else if (STRINGP (components) || CONSP (components))
  217:     key = Fvconcat (1, &components);
  218:   else if (VECTORP (components))
  219:     key = components;
  220:   else if (NILP (components))
  221:     {
  222:       key = Fmake_vector (make_number (nchars), Qnil);
  223:       if (STRINGP (string))
  224:         for (i = 0; i < nchars; i++)
  225:           {
  226:             FETCH_STRING_CHAR_ADVANCE (ch, string, charpos, bytepos);
  227:             XVECTOR (key)->contents[i] = make_number (ch);
  228:           }
  229:       else
  230:         for (i = 0; i < nchars; i++)
  231:           {
  232:             FETCH_CHAR_ADVANCE (ch, charpos, bytepos);
  233:             XVECTOR (key)->contents[i] = make_number (ch);
  234:           }
  235:     }
  236:   else
  237:     goto invalid_composition;
  238: 
  239:   hash_index = hash_lookup (hash_table, key, &hash_code);
  240:   if (hash_index >= 0)
  241:     {
  242:       /* We have already registered the same composition.  Change PROP
  243:          from Form-A above to Form-B while replacing COMPONENTS with
  244:          COMPONENTS-VEC stored in the hash table.  We can directly
  245:          modify the cons cell of PROP because it is not shared.  */
  246:       key = HASH_KEY (hash_table, hash_index);
  247:       id = HASH_VALUE (hash_table, hash_index);
  248:       XSETCAR (prop, id);
  249:       XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop))));
  250:       return XINT (id);
  251:     }
  252: 
  253:   /* This composition is a new one.  We must register it.  */
  254: 
  255:   /* Check if we have sufficient memory to store this information.  */
  256:   if (composition_table_size == 0)
  257:     {
  258:       composition_table_size = 256;
  259:       composition_table
  260:         = (struct composition **) xmalloc (sizeof (composition_table[0])
  261:                                            * composition_table_size);
  262:     }
  263:   else if (composition_table_size <= n_compositions)
  264:     {
  265:       composition_table_size += 256;
  266:       composition_table
  267:         = (struct composition **) xrealloc (composition_table,
  268:                                             sizeof (composition_table[0])
  269:                                             * composition_table_size);
  270:     }
  271: 
  272:   key_contents = XVECTOR (key)->contents;
  273: 
  274:   /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
  275:      vector or a list.  It should be a sequence of:
  276:         char1 rule1 char2 rule2 char3 ...    ruleN charN+1  */
  277:   if (VECTORP (components) || CONSP (components))
  278:     {
  279:       int len = XVECTOR (key)->size;
  280: 
  281:       /* The number of elements should be odd.  */
  282:       if ((len % 2) == 0)
  283:         goto invalid_composition;
  284:       /* All elements should be integers (character or encoded
  285:          composition rule).  */
  286:       for (i = 0; i < len; i++)
  287:         {
  288:           if (!INTEGERP (key_contents[i]))
  289:             goto invalid_composition;
  290:         }
  291:     }
  292: 
  293:   /* Change PROP from Form-A above to Form-B.  We can directly modify
  294:      the cons cell of PROP because it is not shared.  */
  295:   XSETFASTINT (id, n_compositions);
  296:   XSETCAR (prop, id);
  297:   XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop))));
  298: 
  299:   /* Register the composition in composition_hash_table.  */
  300:   hash_index = hash_put (hash_table, key, id, hash_code);
  301: 
  302:   /* Register the composition in composition_table.  */
  303:   cmp = (struct composition *) xmalloc (sizeof (struct composition));
  304: 
  305:   cmp->method = (NILP (components)
  306:                  ? COMPOSITION_RELATIVE
  307:                  : ((INTEGERP (components) || STRINGP (components))
  308:                     ? COMPOSITION_WITH_ALTCHARS
  309:                     : COMPOSITION_WITH_RULE_ALTCHARS));
  310:   cmp->hash_index = hash_index;
  311:   glyph_len = (cmp->method == COMPOSITION_WITH_RULE_ALTCHARS
  312:                ? (XVECTOR (key)->size + 1) / 2
  313:                : XVECTOR (key)->size);
  314:   cmp->glyph_len = glyph_len;
  315:   cmp->offsets = (short *) xmalloc (sizeof (short) * glyph_len * 2);
  316:   cmp->font = NULL;
  317: 
  318:   /* Calculate the width of overall glyphs of the composition.  */
  319:   if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
  320:     {
  321:       /* Relative composition.  */
  322:       cmp->width = 0;
  323:       for (i = 0; i < glyph_len; i++)
  324:         {
  325:           int this_width;
  326:           ch = XINT (key_contents[i]);
  327:           this_width = CHAR_WIDTH (ch);
  328:           if (cmp->width < this_width)
  329:             cmp->width = this_width;
  330:         }
  331:     }
  332:   else
  333:     {
  334:       /* Rule-base composition.  */
  335:       float leftmost = 0.0, rightmost;
  336: 
  337:       ch = XINT (key_contents[0]);
  338:       rightmost = CHAR_WIDTH (ch);
  339: 
  340:       for (i = 1; i < glyph_len; i += 2)
  341:         {
  342:           int rule, gref, nref;
  343:           int this_width;
  344:           float this_left;
  345: 
  346:           rule = XINT (key_contents[i]);
  347:           ch = XINT (key_contents[i + 1]);
  348:           this_width = CHAR_WIDTH (ch);
  349: 
  350:           /* A composition rule is specified by an integer value
  351:              that encodes global and new reference points (GREF and
  352:              NREF).  GREF and NREF are specified by numbers as
  353:              below:
  354:                 0---1---2 -- ascent
  355:                 |       |
  356:                 |       |
  357:                 |       |
  358:                 9--10--11 -- center
  359:                 |       |
  360:              ---3---4---5--- baseline
  361:                 |       |
  362:                 6---7---8 -- descent
  363:           */
  364:           COMPOSITION_DECODE_RULE (rule, gref, nref);
  365:           this_left = (leftmost
  366:                        + (gref % 3) * (rightmost - leftmost) / 2.0
  367:                        - (nref % 3) * this_width / 2.0);
  368: 
  369:           if (this_left < leftmost)
  370:             leftmost = this_left;
  371:           if (this_left + this_width > rightmost)
  372:             rightmost = this_left + this_width;
  373:         }
  374: 
  375:       cmp->width = rightmost - leftmost;
  376:       if (cmp->width < (rightmost - leftmost))
  377:         /* To get a ceiling integer value.  */
  378:         cmp->width++;
  379:     }
  380: 
  381:   composition_table[n_compositions] = cmp;
  382: 
  383:   return n_compositions++;
  384: 
  385:  invalid_composition:
  386:   /* Would it be better to remove this `composition' property?  */
  387:   return -1;
  388: }
  389: 
  390: ^L
  391: /* Find a composition at or nearest to position POS of OBJECT (buffer
  392:    or string).
  393: 
  394:    OBJECT defaults to the current buffer.  If there's a composition at
  395:    POS, set *START and *END to the start and end of the sequence,
  396:    *PROP to the `composition' property, and return 1.
  397: 
  398:    If there's no composition at POS and LIMIT is negative, return 0.
  399: 
  400:    Otherwise, search for a composition forward (LIMIT > POS) or
  401:    backward (LIMIT < POS).  In this case, LIMIT bounds the search.
  402: 
  403:    If a composition is found, set *START, *END, and *PROP as above,
  404:    and return 1, else return 0.
  405: 
  406:    This doesn't check the validity of composition.  */
  407: 
  408: int
  409: find_composition (pos, limit, start, end, prop, object)
  410:      int pos, limit, *start, *end;
  411:      Lisp_Object *prop, object;
  412: {
  413:   Lisp_Object val;
  414: 
  415:   if (get_property_and_range (pos, Qcomposition, prop, start, end, object))
  416:     return 1;
  417: 
  418:   if (limit < 0 || limit == pos)
  419:     return 0;
  420: 
  421:   if (limit > pos)              /* search forward */
  422:     {
  423:       val = Fnext_single_property_change (make_number (pos), Qcomposition,
  424:                                           object, make_number (limit));
  425:       pos = XINT (val);
  426:       if (pos == limit)
  427:         return 0;
  428:     }
  429:   else                          /* search backward */
  430:     {
  431:       if (get_property_and_range (pos - 1, Qcomposition, prop, start, end,
  432:                                   object))
  433:         return 1;
  434:       val = Fprevious_single_property_change (make_number (pos), Qcomposition,
  435:                                               object, make_number (limit));
  436:       pos = XINT (val);
  437:       if (pos == limit)
  438:         return 0;
  439:       pos--;
  440:     }
  441:   get_property_and_range (pos, Qcomposition, prop, start, end, object);
  442:   return 1;
  443: }
  444: 
  445: /* Run a proper function to adjust the composition sitting between
  446:    FROM and TO with property PROP.  */
  447: 
  448: static void
  449: run_composition_function (from, to, prop)
  450:      int from, to;
  451:      Lisp_Object prop;
  452: {
  453:   Lisp_Object func;
  454:   int start, end;
  455: 
  456:   func = COMPOSITION_MODIFICATION_FUNC (prop);
  457:   /* If an invalid composition precedes or follows, try to make them
  458:      valid too.  */
  459:   if (from > BEGV
  460:       && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
  461:       && !COMPOSITION_VALID_P (start, end, prop))
  462:     from = start;
  463:   if (to < ZV
  464:       && find_composition (to, -1, &start, &end, &prop, Qnil)
  465:       && !COMPOSITION_VALID_P (start, end, prop))
  466:     to = end;
  467:   if (!NILP (Ffboundp (func)))
  468:     call2 (func, make_number (from), make_number (to));
  469:   else if (!NILP (Ffboundp (Vcompose_chars_after_function)))
  470:     call3 (Vcompose_chars_after_function,
  471:            make_number (from), make_number (to), Qnil);
  472: }
  473: 
  474: /* Make invalid compositions adjacent to or inside FROM and TO valid.
  475:    CHECK_MASK is bitwise `or' of mask bits defined by macros
  476:    CHECK_XXX (see the comment in composite.h).
  477: 
  478:    This function is called when a buffer text is changed.  If the
  479:    change is deletion, FROM == TO.  Otherwise, FROM < TO.  */
  480: 
  481: void
  482: update_compositions (from, to, check_mask)
  483:      int from, to, check_mask;
  484: {
  485:   Lisp_Object prop;
  486:   int start, end;
  487: 
  488:   if (inhibit_modification_hooks)
  489:     return;
  490: 
  491:   /* If FROM and TO are not in a valid range, do nothing.  */
  492:   if (! (BEGV <= from && from <= to && to <= ZV))
  493:     return;
  494: 
  495:   if (check_mask & CHECK_HEAD)
  496:     {
  497:       /* FROM should be at composition boundary.  But, insertion or
  498:          deletion will make two compositions adjacent and
  499:          indistinguishable when they have same (eq) property.  To
  500:          avoid it, in such a case, we change the property of the
  501:          latter to the copy of it.  */
  502:       if (from > BEGV
  503:           && find_composition (from - 1, -1, &start, &end, &prop, Qnil))
  504:         {
  505:           if (from < end)
  506:             Fput_text_property (make_number (from), make_number (end),
  507:                                 Qcomposition,
  508:                                 Fcons (XCAR (prop), XCDR (prop)), Qnil);
  509:           run_composition_function (start, end, prop);
  510:           from = end;
  511:         }
  512:       else if (from < ZV
  513:                && find_composition (from, -1, &start, &from, &prop, Qnil))
  514:         run_composition_function (start, from, prop);
  515:     }
  516: 
  517:   if (check_mask & CHECK_INSIDE)
  518:     {
  519:       /* In this case, we are sure that (check & CHECK_TAIL) is also
  520:          nonzero.  Thus, here we should check only compositions before
  521:          (to - 1).  */
  522:       while (from < to - 1
  523:              && find_composition (from, to, &start, &from, &prop, Qnil)
  524:              && from < to - 1)
  525:         run_composition_function (start, from, prop);
  526:     }
  527: 
  528:   if (check_mask & CHECK_TAIL)
  529:     {
  530:       if (from < to
  531:           && find_composition (to - 1, -1, &start, &end, &prop, Qnil))
  532:         {
  533:           /* TO should be also at composition boundary.  But,
  534:              insertion or deletion will make two compositions adjacent
  535:              and indistinguishable when they have same (eq) property.
  536:              To avoid it, in such a case, we change the property of
  537:              the former to the copy of it.  */
  538:           if (to < end)
  539:             Fput_text_property (make_number (start), make_number (to),
  540:                                 Qcomposition,
  541:                                 Fcons (XCAR (prop), XCDR (prop)), Qnil);
  542:           run_composition_function (start, end, prop);
  543:         }
  544:       else if (to < ZV
  545:                && find_composition (to, -1, &start, &end, &prop, Qnil))
  546:         run_composition_function (start, end, prop);
  547:     }
  548: }
  549: 
  550: 
  551: /* Modify composition property values in LIST destructively.  LIST is
  552:    a list as returned from text_property_list.  Change values to the
  553:    top-level copies of them so that none of them are `eq'.  */
  554: 
  555: void
  556: make_composition_value_copy (list)
  557:      Lisp_Object list;
  558: {
  559:   Lisp_Object plist, val;
  560: 
  561:   for (; CONSP (list); list = XCDR (list))
  562:     {
  563:       plist = XCAR (XCDR (XCDR (XCAR (list))));
  564:       while (CONSP (plist) && CONSP (XCDR (plist)))
  565:         {
  566:           if (EQ (XCAR (plist), Qcomposition)
  567:               && (val = XCAR (XCDR (plist)), CONSP (val)))
  568:             XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val)));
  569:           plist = XCDR (XCDR (plist));
  570:         }
  571:     }
  572: }
  573: 
  574: 
  575: /* Make text in the region between START and END a composition that
  576:    has COMPONENTS and MODIFICATION-FUNC.
  577: 
  578:    If STRING is non-nil, then operate on characters contained between
  579:    indices START and END in STRING.  */
  580: 
  581: void
  582: compose_text (start, end, components, modification_func, string)
  583:      int start, end;
  584:      Lisp_Object components, modification_func, string;
  585: {
  586:   Lisp_Object prop;
  587: 
  588:   prop = Fcons (