(linenum→info "unix/slp.c:2238")

glibc/2.7/iconv/gconv_trans.c

    1: /* Transliteration using the locale's data.
    2:    Copyright (C) 2000 Free Software Foundation, Inc.
    3:    This file is part of the GNU C Library.
    4:    Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
    5: 
    6:    The GNU C Library is free software; you can redistribute it and/or
    7:    modify it under the terms of the GNU Lesser General Public
    8:    License as published by the Free Software Foundation; either
    9:    version 2.1 of the License, or (at your option) any later version.
   10: 
   11:    The GNU C Library is distributed in the hope that it will be useful,
   12:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:    Lesser General Public License for more details.
   15: 
   16:    You should have received a copy of the GNU Lesser General Public
   17:    License along with the GNU C Library; if not, write to the Free
   18:    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   19:    02111-1307 USA.  */
   20: 
   21: #include <assert.h>
   22: #include <dlfcn.h>
   23: #include <search.h>
   24: #include <stdint.h>
   25: #include <string.h>
   26: #include <stdlib.h>
   27: 
   28: #include <bits/libc-lock.h>
   29: #include "gconv_int.h"
   30: #include "../locale/localeinfo.h"
   31: 
   32: 
   33: int
   34: __gconv_transliterate (struct __gconv_step *step,
   35:                        struct __gconv_step_data *step_data,
   36:                        void *trans_data __attribute__ ((unused)),
   37:                        const unsigned char *inbufstart,
   38:                        const unsigned char **inbufp,
   39:                        const unsigned char *inbufend,
   40:                        unsigned char **outbufstart, size_t *irreversible)
   41: {
   42:   /* Find out about the locale's transliteration.  */
   43:   uint_fast32_t size;
   44:   const uint32_t *from_idx;
   45:   const uint32_t *from_tbl;
   46:   const uint32_t *to_idx;
   47:   const uint32_t *to_tbl;
   48:   const uint32_t *winbuf;
   49:   const uint32_t *winbufend;
   50:   uint_fast32_t low;
   51:   uint_fast32_t high;
   52: 
   53:   /* The input buffer.  There are actually 4-byte values.  */
   54:   winbuf = (const uint32_t *) *inbufp;
   55:   winbufend = (const uint32_t *) inbufend;
   56: 
   57:   __gconv_fct fct = step->__fct;
   58: #ifdef PTR_DEMANGLE
   59:   if (step->__shlib_handle != NULL)
   60:     PTR_DEMANGLE (fct);
   61: #endif
   62: 
   63:   /* If there is no transliteration information in the locale don't do
   64:      anything and return the error.  */
   65:   size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_TAB_SIZE);
   66:   if (size == 0)
   67:     goto no_rules;
   68: 
   69:   /* Get the rest of the values.  */
   70:   from_idx =
   71:     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
   72:   from_tbl =
   73:     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
   74:   to_idx =
   75:     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
   76:   to_tbl =
   77:     (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
   78: 
   79:   /* Test whether there is enough input.  */
   80:   if (winbuf + 1 > winbufend)
   81:     return (winbuf == winbufend
   82:             ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
   83: 
   84:   /* The array starting at FROM_IDX contains indeces to the string table
   85:      in FROM_TBL.  The indeces are sorted wrt to the strings.  I.e., we
   86:      are doing binary search.  */
   87:   low = 0;
   88:   high = size;
   89:   while (low < high)
   90:     {
   91:       uint_fast32_t med = (low + high) / 2;
   92:       uint32_t idx;
   93:       int cnt;
   94: 
   95:       /* Compare the string at this index with the string at the current
   96:          position in the input buffer.  */
   97:       idx = from_idx[med];
   98:       cnt = 0;
   99:       do
  100:         {
  101:           if (from_tbl[idx + cnt] != winbuf[cnt])
  102:             /* Does not match.  */
  103:             break;
  104:           ++cnt;
  105:         }
  106:       while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
  107: 
  108:       if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
  109:         {
  110:           /* Found a matching input sequence.  Now try to convert the
  111:              possible replacements.  */
  112:           uint32_t idx2 = to_idx[med];
  113: 
  114:           do
  115:             {
  116:               /* Determine length of replacement.  */
  117:               uint_fast32_t len = 0;
  118:               int res;
  119:               const unsigned char *toinptr;
  120:               unsigned char *outptr;
  121: 
  122:               while (to_tbl[idx2 + len] != L'\0')
  123:                 ++len;
  124: 
  125:               /* Try this input text.  */
  126:               toinptr = (const unsigned char *) &to_tbl[idx2];
  127:               outptr = *outbufstart;
  128:               res = DL_CALL_FCT (fct,
  129:                                  (step, step_data, &toinptr,
  130:                                   (const unsigned char *) &to_tbl[idx2 + len],
  131:                                   &outptr, NULL, 0, 0));
  132:               if (res != __GCONV_ILLEGAL_INPUT)
  133:                 {
  134:                   /* If the conversion succeeds we have to increment the
  135:                      input buffer.  */
  136:                   if (res == __GCONV_EMPTY_INPUT)
  137:                     {
  138:                       *inbufp += cnt * sizeof (uint32_t);
  139:                       ++*irreversible;
  140:                       res = __GCONV_OK;
  141:                     }
  142:                   *outbufstart = outptr;
  143: 
  144:                   return res;
  145:                 }
  146: 
  147:               /* Next replacement.  */
  148:               idx2 += len + 1;
  149:             }
  150:           while (to_tbl[idx2] != L'\0');
  151: 
  152:           /* Nothing found, continue searching.  */
  153:         }
  154:       else if (cnt > 0)
  155:         /* This means that the input buffer contents matches a prefix of
  156:            an entry.  Since we cannot match it unless we get more input,
  157:            we will tell the caller about it.  */
  158:         return __GCONV_INCOMPLETE_INPUT;
  159: 
  160:       if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
  161:         low = med + 1;
  162:       else
  163:         high = med;
  164:     }
  165: 
  166:  no_rules:
  167:   /* Maybe the character is supposed to be ignored.  */
  168:   if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0)
  169:     {
  170:       int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN);
  171:       const uint32_t *ranges =
  172:         (const uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE);
  173:       const uint32_t wc = *(const uint32_t *) (*inbufp);
  174:       int i;
  175: 
  176:       /* Test whether there is enough input.  */
  177:       if (winbuf + 1 > winbufend)
  178:         return (winbuf == winbufend
  179:                 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
  180: 
  181:       for (i = 0; i < n; ranges += 3, ++i)
  182:         if (ranges[0] <= wc && wc <= ranges[1]
  183:             && (wc - ranges[0]) % ranges[2] == 0)
  184:           {
  185:             /* Matches the range.  Ignore it.  */
  186:             *inbufp += 4;
  187:             ++*irreversible;
  188:             return __GCONV_OK;
  189:           }
  190:         else if (wc < ranges[0])
  191:           /* There cannot be any other matching range since they are
  192:              sorted.  */
  193:           break;
  194:     }
  195: 
  196:   /* One last chance: use the default replacement.  */
  197:   if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN) != 0)
  198:     {
  199:       const uint32_t *default_missing = (const uint32_t *)
  200:         _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
  201:       const unsigned char *toinptr = (const unsigned char *) default_missing;
  202:       uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
  203:                                        _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
  204:       unsigned char *outptr;
  205:       int res;
  206: 
  207:       /* Test whether there is enough input.  */
  208:       if (winbuf + 1 > winbufend)
  209:         return (winbuf == winbufend
  210:                 ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
  211: 
  212:       outptr = *outbufstart;
  213:       res = DL_CALL_FCT (fct,
  214:                          (step, step_data, &toinptr,
  215:                           (const unsigned char *) (default_missing + len),
  216:                           &outptr, NULL, 0, 0));
  217: 
  218:       if (res != __GCONV_ILLEGAL_INPUT)
  219:         {
  220:           /* If the conversion succeeds we have to increment the
  221:              input buffer.  */
  222:           if (res == __GCONV_EMPTY_INPUT)
  223:             {
  224:               /* This worked but is not reversible.  */
  225:               ++*irreversible;
  226:               *inbufp += 4;
  227:               res = __GCONV_OK;
  228:             }
  229:           *outbufstart = outptr;
  230: 
  231:           return res;
  232:         }
  233:     }
  234: 
  235:   /* Haven't found a match.  */
  236:   return __GCONV_ILLEGAL_INPUT;
  237: }
  238: 
  239: 
  240: /* Structure to represent results of found (or not) transliteration
  241:    modules.  */
  242: struct known_trans
  243: {
  244:   /* This structure must remain the first member.  */
  245:   struct trans_struct info;
  246: 
  247:   char *fname;
  248:   void *handle;
  249:   int open_count;
  250: };
  251: 
  252: 
  253: /* Tree with results of previous calls to __gconv_translit_find.  */
  254: static void *search_tree;
  255: 
  256: /* We modify global data.   */
  257: __libc_lock_define_initialized (static, lock);
  258: 
  259: 
  260: /* Compare two transliteration entries.  */
  261: static int
  262: trans_compare (const void *p1, const void *p2)
  263: {
  264:   const struct known_trans *s1 = (const struct known_trans *) p1;
  265:   const struct known_trans *s2 = (const struct known_trans *) p2;
  266: 
  267:   return strcmp (s1->info.name, s2->info.name);
  268: }
  269: 
  270: 
  271: /* Open (maybe reopen) the module named in the struct.  Get the function
  272:    and data structure pointers we need.  */
  273: static int
  274: open_translit (struct known_trans *trans)
  275: {
  276:   __gconv_trans_query_fct queryfct;
  277: 
  278:   trans->handle = __libc_dlopen (trans->fname);
  279:   if (trans->handle == NULL)
  280:     /* Not available.  */
  281:     return 1;
  282: 
  283:   /* Find the required symbol.  */
  284:   queryfct = __libc_dlsym (trans->handle, "gconv_trans_context");
  285:   if (queryfct == NULL)
  286:     {
  287:       /* We cannot live with that.  */
  288:     close_and_out:
  289:       __libc_dlclose (trans->handle);
  290:       trans->handle = NULL;
  291:       return 1;
  292:     }
  293: 
  294:   /* Get the context.  */
  295:   if (queryfct (trans->info.name, &trans->info.csnames, &trans->info.ncsnames)
  296:       != 0)
  297:     goto close_and_out;
  298: 
  299:   /* Of course we also have to have the actual function.  */
  300:   trans->info.trans_fct = __libc_dlsym (trans->handle, "gconv_trans");
  301:   if (trans->info.trans_fct == NULL)
  302:     goto close_and_out;
  303: 
  304:   /* Now the optional functions.  */
  305:   trans->info.trans_init_fct =
  306:     __libc_dlsym (trans->handle, "gconv_trans_init");
  307:   trans->info.trans_context_fct =
  308:     __libc_dlsym (trans->handle, "gconv_trans_context");
  309:   trans->info.trans_end_fct =
  310:     __libc_dlsym (trans->handle, "gconv_trans_end");
  311: 
  312:   trans->open_count = 1;
  313: 
  314:   return 0;
  315: }
  316: 
  317: 
  318: int
  319: internal_function
  320: __gconv_translit_find (struct trans_struct *trans)
  321: {
  322:   struct known_trans **found;
  323:   const struct path_elem *runp;
  324:   int res = 1;
  325: 
  326:   /* We have to have a name.  */
  327:   assert (trans->name != NULL);
  328: 
  329:   /* Acquire the lock.  */
  330:   __libc_lock_lock (lock);
  331: 
  332:   /* See whether we know this module already.  */
  333:   found = __tfind (trans, &search_tree, trans_compare);
  334:   if (found != NULL)
  335:     {
  336:       /* Is this module available?  */
  337:       if ((*found)->handle != NULL)
  338:         {
  339:           /* Maybe we have to reopen the file.  */
  340:           if ((*found)->handle != (void *) -1)
  341:             /* The object is not unloaded.  */
  342:             res = 0;
  343:           else if (open_translit (*found) == 0)
  344:             {
  345:               /* Copy the data.  */
  346:               *trans = (*found)->info;
  347:               (*found)->open_count++;
  348:               res = 0;
  349:             }
  350:         }
  351:     }
  352:   else
  353:     {
  354:       size_t name_len = strlen (trans->name) + 1;
  355:       int need_so = 0;
  356:       struct known_trans *newp;
  357: 
  358:       /* We have to continue looking for the module.  */
  359:       if (__gconv_path_elem == NULL)
  360:         __gconv_get_path ();
  361: 
  362:       /* See whether we have to append .so.  */
  363:       if (name_len <= 4 || memcmp (&trans->name[name_len - 4], ".so", 3) != 0)
  364:         need_so = 1;
  365: 
  366:       /* Create a new entry.  */
  367:       newp = (struct known_trans *) malloc (sizeof (struct known_trans)
  368:                                             + (__gconv_max_path_elem_len
  369:                                                + name_len + 3)
  370:                                             + name_len);
  371:       if (newp != NULL)
  372:         {
  373:           char *cp;
  374: 
  375:           /* Clear the struct.  */
  376:           memset (newp, '\0', sizeof (struct known_trans));
  377: 
  378:           /* Store a copy of the module name.  */
  379:           newp->info.name = cp = (char *) (newp + 1);
  380:           cp = __mempcpy (cp, trans->name, name_len);
  381: 
  382:           newp->fname = cp;
  383: 
  384:           /* Search in all the directories.  */
  385:           for (runp = __gconv_path_elem; runp->name != NULL; ++runp)
  386:             {
  387:               cp = __mempcpy (__stpcpy ((char *) newp->fname, runp->name),
  388:                               trans->name, name_len);
  389:               if (need_so)
  390:                 memcpy (cp, ".so", sizeof (".so"));
  391: 
  392:               if (open_translit (newp) == 0)
  393:                 {
  394:                   /* We found a module.  */
  395:                   res = 0;
  396:                   break;
  397:                 }
  398:             }
  399: 
  400:           if (res)
  401:             newp->fname = NULL;
  402: 
  403:           /* In any case we'll add the entry to our search tree.  */
  404:           if (__tsearch (newp, &search_tree, trans_compare) == NULL)
  405:             {
  406:               /* Yickes, this should not happen.  Unload the object.  */
  407:               res = 1;
  408:               /* XXX unload here.  */
  409:             }
  410:         }
  411:     }
  412: 
  413:   __libc_lock_unlock (lock);
  414: 
  415:   return res;
  416: }
Syntax (Markdown)