(linenum→info "unix/slp.c:2238")

glibc/2.7/catgets/gencat.c

    1: /* Copyright (C) 1996-2005, 2006, 2007 Free Software Foundation, Inc.
    2:    This file is part of the GNU C Library.
    3:    Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
    4: 
    5:    This program is free software; you can redistribute it and/or modify
    6:    it under the terms of the GNU General Public License as published
    7:    by the Free Software Foundation; version 2 of the License, or
    8:    (at your option) any later version.
    9: 
   10:    This program is distributed in the hope that it will be useful,
   11:    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12:    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13:    GNU General Public License for more details.
   14: 
   15:    You should have received a copy of the GNU General Public License
   16:    along with this program; if not, write to the Free Software Foundation,
   17:    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
   18: 
   19: #ifdef HAVE_CONFIG_H
   20: # include "config.h"
   21: #endif
   22: 
   23: #include <argp.h>
   24: #include <assert.h>
   25: #include <ctype.h>
   26: #include <endian.h>
   27: #include <errno.h>
   28: #include <error.h>
   29: #include <fcntl.h>
   30: #include <iconv.h>
   31: #include <langinfo.h>
   32: #include <locale.h>
   33: #include <libintl.h>
   34: #include <limits.h>
   35: #include <nl_types.h>
   36: #include <obstack.h>
   37: #include <stdint.h>
   38: #include <stdio.h>
   39: #include <stdlib.h>
   40: #include <string.h>
   41: #include <unistd.h>
   42: #include <wchar.h>
   43: 
   44: #include "version.h"
   45: 
   46: #include "catgetsinfo.h"
   47: 
   48: 
   49: #define SWAPU32(w) \
   50:   (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
   51: 
   52: struct message_list
   53: {
   54:   int number;
   55:   const char *message;
   56: 
   57:   const char *fname;
   58:   size_t line;
   59:   const char *symbol;
   60: 
   61:   struct message_list *next;
   62: };
   63: 
   64: 
   65: struct set_list
   66: {
   67:   int number;
   68:   int deleted;
   69:   struct message_list *messages;
   70:   int last_message;
   71: 
   72:   const char *fname;
   73:   size_t line;
   74:   const char *symbol;
   75: 
   76:   struct set_list *next;
   77: };
   78: 
   79: 
   80: struct catalog
   81: {
   82:   struct set_list *all_sets;
   83:   struct set_list *current_set;
   84:   size_t total_messages;
   85:   wint_t quote_char;
   86:   int last_set;
   87: 
   88:   struct obstack mem_pool;
   89: };
   90: 
   91: 
   92: /* If non-zero force creation of new file, not using existing one.  */
   93: static int force_new;
   94: 
   95: /* Name of output file.  */
   96: static const char *output_name;
   97: 
   98: /* Name of generated C header file.  */
   99: static const char *header_name;
  100: 
  101: /* Name and version of program.  */
  102: static void print_version (FILE *stream, struct argp_state *state);
  103: void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
  104: 
  105: #define OPT_NEW 1
  106: 
  107: /* Definitions of arguments for argp functions.  */
  108: static const struct argp_option options[] =
  109: {
  110:   { "header", 'H', N_("NAME"), 0,
  111:     N_("Create C header file NAME containing symbol definitions") },
  112:   { "new", OPT_NEW, NULL, 0,
  113:     N_("Do not use existing catalog, force new output file") },
  114:   { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
  115:   { NULL, 0, NULL, 0, NULL }
  116: };
  117: 
  118: /* Short description of program.  */
  119: static const char doc[] = N_("Generate message catalog.\
  120: \vIf INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n\
  121: is -, output is written to standard output.\n");
  122: 
  123: /* Strings for arguments in help texts.  */
  124: static const char args_doc[] = N_("\
  125: -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
  126: 
  127: /* Prototype for option handler.  */
  128: static error_t parse_opt (int key, char *arg, struct argp_state *state);
  129: 
  130: /* Function to print some extra text in the help message.  */
  131: static char *more_help (int key, const char *text, void *input);
  132: 
  133: /* Data structure to communicate with argp functions.  */
  134: static struct argp argp =
  135: {
  136:   options, parse_opt, args_doc, doc, NULL, more_help
  137: };
  138: 
  139: 
  140: /* Wrapper functions with error checking for standard functions.  */
  141: extern void *xmalloc (size_t n);
  142: extern void *xcalloc (size_t n, size_t s);
  143: extern void *xrealloc (void *o, size_t n);
  144: extern char *xstrdup (const char *);
  145: 
  146: /* Prototypes for local functions.  */
  147: static void error_print (void);
  148: static struct catalog *read_input_file (struct catalog *current,
  149:                                         const char *fname);
  150: static void write_out (struct catalog *result, const char *output_name,
  151:                        const char *header_name);
  152: static struct set_list *find_set (struct catalog *current, int number);
  153: static void normalize_line (const char *fname, size_t line, iconv_t cd,
  154:                             wchar_t *string, wchar_t quote_char,
  155:                             wchar_t escape_char);
  156: static void read_old (struct catalog *catalog, const char *file_name);
  157: static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
  158:                             iconv_t *cd_tombp, wchar_t *escape_charp);
  159: 
  160: 
  161: int
  162: main (int argc, char *argv[])
  163: {
  164:   struct catalog *result;
  165:   int remaining;
  166: 
  167:   /* Set program name for messages.  */
  168:   error_print_progname = error_print;
  169: 
  170:   /* Set locale via LC_ALL.  */
  171:   setlocale (LC_ALL, "");
  172: 
  173:   /* Set the text message domain.  */
  174:   textdomain (PACKAGE);
  175: 
  176:   /* Initialize local variables.  */
  177:   result = NULL;
  178: 
  179:   /* Parse and process arguments.  */
  180:   argp_parse (&argp, argc, argv, 0, &remaining, NULL);
  181: 
  182:   /* Determine output file.  */
  183:   if (output_name == NULL)
  184:     output_name = remaining < argc ? argv[remaining++] : "-";
  185: 
  186:   /* Process all input files.  */
  187:   setlocale (LC_CTYPE, "C");
  188:   if (remaining < argc)
  189:     do
  190:       result = read_input_file (result, argv[remaining]);
  191:     while (++remaining < argc);
  192:   else
  193:     result = read_input_file (NULL, "-");
  194: 
  195:   /* Write out the result.  */
  196:   if (result != NULL)
  197:     write_out (result, output_name, header_name);
  198: 
  199:   return error_message_count != 0;
  200: }
  201: 
  202: 
  203: /* Handle program arguments.  */
  204: static error_t
  205: parse_opt (int key, char *arg, struct argp_state *state)
  206: {
  207:   switch (key)
  208:     {
  209:     case 'H':
  210:       header_name = arg;
  211:       break;
  212:     case OPT_NEW:
  213:       force_new = 1;
  214:       break;
  215:     case 'o':
  216:       output_name = arg;
  217:       break;
  218:     default:
  219:       return ARGP_ERR_UNKNOWN;
  220:     }
  221:   return 0;
  222: }
  223: 
  224: 
  225: static char *
  226: more_help (int key, const char *text, void *input)
  227: {
  228:   switch (key)
  229:     {
  230:     case ARGP_KEY_HELP_EXTRA:
  231:       /* We print some extra information.  */
  232:       return strdup (gettext ("\
  233: For bug reporting instructions, please see:\n\
  234: <http://www.gnu.org/software/libc/bugs.html>.\n"));
  235:     default:
  236:       break;
  237:     }
  238:   return (char *) text;
  239: }
  240: 
  241: /* Print the version information.  */
  242: static void
  243: print_version (FILE *stream, struct argp_state *state)
  244: {
  245:   fprintf (stream, "gencat (GNU %s) %s\n", PACKAGE, VERSION);
  246:   fprintf (stream, gettext ("\
  247: Copyright (C) %s Free Software Foundation, Inc.\n\
  248: This is free software; see the source for copying conditions.  There is NO\n\
  249: warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
  250: "), "2007");
  251:   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
  252: }
  253: 
  254: 
  255: /* The address of this function will be assigned to the hook in the
  256:    error functions.  */
  257: static void
  258: error_print ()
  259: {
  260:   /* We don't want the program name to be printed in messages.  Emacs'
  261:      compile.el does not like this.  */
  262: }
  263: 
  264: 
  265: static struct catalog *
  266: read_input_file (struct catalog *current, const char *fname)
  267: {
  268:   FILE *fp;
  269:   char *buf;
  270:   size_t len;
  271:   size_t line_number;
  272:   wchar_t *wbuf;
  273:   size_t wbufsize;
  274:   iconv_t cd_towc = (iconv_t) -1;
  275:   iconv_t cd_tomb = (iconv_t) -1;
  276:   wchar_t escape_char = L'\\';
  277:   char *codeset = NULL;
  278: 
  279:   if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
  280:     {
  281:       fp = stdin;
  282:       fname = gettext ("*standard input*");
  283:     }
  284:   else
  285:     fp = fopen (fname, "r");
  286:   if (fp == NULL)
  287:     {
  288:       error (0, errno, gettext ("cannot open input file `%s'"), fname);
  289:       return current;
  290:     }
  291: 
  292:   /* If we haven't seen anything yet, allocate result structure.  */
  293:   if (current == NULL)
  294:     {
  295:       current = (struct catalog *) xcalloc (1, sizeof (*current));
  296: 
  297: #define obstack_chunk_alloc malloc
  298: #define obstack_chunk_free free
  299:       obstack_init (&current->mem_pool);
  300: 
  301:       current->current_set = find_set (current, NL_SETD);
  302:     }
  303: 
  304:   buf = NULL;
  305:   len = 0;
  306:   line_number = 0;
  307: 
  308:   wbufsize = 1024;
  309:   wbuf = (wchar_t *) xmalloc (wbufsize);
  310: 
  311:   while (!feof (fp))
  312:     {
  313:       int continued;
  314:       int used;
  315:       size_t start_line = line_number + 1;
  316:       char *this_line;
  317: 
  318:       do
  319:         {
  320:           int act_len;
  321: 
  322:           act_len = getline (&buf, &len, fp);
  323:           if (act_len <= 0)
  324:             break;
  325:           ++line_number;
  326: 
  327:           /* It the line continued?  */
  328:           continued = 0;
  329:           if (buf[act_len - 1] == '\n')
  330:             {
  331:               --act_len;
  332: 
  333:               /* There might be more than one backslash at the end of
  334:                  the line.  Only if there is an odd number of them is
  335:                  the line continued.  */
  336:               if (act_len > 0 && buf[act_len - 1] == '\\')
  337:                 {
  338:                   int temp_act_len = act_len;
  339: 
  340:                   do
  341:                     {
  342:                       --temp_act_len;
  343:                       continued = !continued;
  344:                     }
  345:                   while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
  346: 
  347:                   if (continued)
  348:                     --act_len;
  349:                 }
  350:             }
  351: 
  352:           /* Append to currently selected line.  */
  353:           obstack_grow (&current->mem_pool, buf, act_len);
  354:         }
  355:       while (continued);
  356: 
  357:       obstack_1grow (&current->mem_pool, '\0');
  358:       this_line = (char *) obstack_finish (&current->mem_pool);
  359: 
  360:       used = 0;
  361:       if (this_line[0] == '$')
  362:         {
  363:           if (isblank (this_line[1]))
  364:             {
  365:               int cnt = 1;
  366:               while (isblank (this_line[cnt]))
  367:                 ++cnt;
  368:               if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
  369:                 /* This is a comment line. Do nothing.  */;
  370:               else if (codeset != NULL)
  371:                 /* Ignore multiple codeset. */;
  372:               else
  373:                 {
  374:                   int start = cnt + 8;
  375:                   cnt = start;
  376:                   while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
  377:                     ++cnt;
  378:                   if (cnt != start)
  379:                     {
  380:                       int len = cnt - start;
  381:                       codeset = xmalloc (len + 1);
  382:                       *((char *) mempcpy (codeset, &this_line[start], len))
  383:                         = '\0';
  384:                     }
  385:                 }
  386:             }
  387:           else if (strncmp (&this_line[1], "set", 3) == 0)
  388:             {
  389:               int cnt = sizeof ("set");
  390:               int set_number;
  391:               const char *symbol = NULL;
  392:               while (isspace (this_line[cnt]))
  393:                 ++cnt;
  394: 
  395:               if (isdigit (this_line[cnt]))
  396:                 {
  397:                   set_number = atol (&this_line[cnt]);
  398: 
  399:                   /* If the given number for the character set is
  400:                      higher than any we used for symbolic set names
  401:                      avoid clashing by using only higher numbers for
  402:                      the following symbolic definitions.  */
  403:                   if (set_number > current->last_set)
  404:                     current->last_set = set_number;
  405:                 }
  406:               else
  407:                 {
  408:                   /* See whether it is a reasonable identifier.  */
  409:                   int start = cnt;
  410:                   while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
  411:                     ++cnt;
  412: 
  413:                   if (cnt == start)
  414:                     {
  415:                       /* No correct character found.  */
  416:                       error_at_line (0, 0, fname, start_line,
  417:                                      gettext ("illegal set number"));
  418:                       set_number = 0;
  419:                     }
  420:                   else
  421:                     {
  422:                       /* We have found seomthing that looks like a
  423:                          correct identifier.  */
  424:                       struct set_list *runp;
  425: 
  426:                       this_line[cnt] = '\0';
  427:                       used = 1;
  428:                       symbol = &this_line[start];
  429: 
  430:                       /* Test whether the identifier was already used.  */
  431:                       runp = current->all_sets;
  432:                       while (runp != 0)
  433:                         if (runp->symbol != NULL
  434:                             && strcmp (runp->symbol, symbol) == 0)
  435:                           break;
  436:                         else
  437:                           runp = runp->next;
  438: 
  439:                       if (runp != NULL)
  440:                         {
  441:                           /* We cannot allow duplicate identifiers for
  442:                              message sets.  */
  443:                           error_at_line (0, 0, fname, start_line,
  444:                                          gettext ("duplicate set definition"));
  445:                           error_at_line (0, 0, runp->fname, runp->line,
  446:                                          gettext ("\
  447: this is the first definition"));
  448:                           set_number = 0;
  449:                         }
  450:                       else
  451:                         /* Allocate next free message set for identifier.  */
  452:                         set_number = ++current->last_set;
  453:                     }
  454:                 }
  455: 
  456:               if (set_number != 0)
  457:                 {
  458:                   /* We found a legal set number.  */
  459:                   current->current_set = find_set (current, set_number);
  460:                   if (symbol != NULL)
  461:                       used = 1;
  462:                   current->current_set->symbol = symbol;
  463:                   current->current_set->fname = fname;
  464:                   current->current_set->line = start_line;
  465:                 }
  466:             }
  467:           else if (strncmp (&this_line[1], "delset", 6) == 0)
  468:             {
  469:               int cnt = sizeof ("delset");
  470:               size_t set_number;
  471:               while (isspace (this_line[cnt]))
  472:                 ++cnt;
  473: 
  474:               if (isdigit (this_line[cnt]))
  475:                 {
  476:                   size_t set_number = atol (&this_line[cnt]);
  477:                   struct set_list *set;
  478: 
  479:                   /* Mark the message set with the given number as
  480:                      deleted.  */
  481:                   set = find_set (current, set_number);
  482:                   set->deleted = 1;
  483:                 }
  484:               else
  485:                 {
  486:                   /* See whether it is a reasonable identifier.  */
  487:                   int start = cnt;
  488:                   while (isalnum (this_line[cnt]) || this_line[cnt] == '_')
  489:                     ++cnt;
  490: 
  491:                   if (cnt == start)
  492:                     {
  493:                       error_at_line (0, 0, fname, start_line,
  494:                                      gettext ("illegal set number"));
  495:                       set_number = 0;
  496:                     }
  497:                   else
  498:                     {
  499:                       const char *symbol;
  500:                       struct set_list *runp;
  501: 
  502:                       this_line[cnt] = '\0';
  503:                       used = 1;
  504:                       symbol = &this_line[start];
  505: 
  506:                       /* We have a symbolic set name.  This name must
  507:                          appear somewhere else in the catalogs read so
  508:                          far.  */
  509:                       set_number = 0;
  510:                       for (runp = current->all_sets; runp != NULL;
  511:                            runp = runp->next)
  512:                         {
  513:                           if (strcmp (runp->symbol, symbol) == 0)
  514:                             {
  515:                               runp->deleted = 1;
  516:                               break;
  517:                             }
  518:                         }
  519:                       if (runp == NULL)
  520:                         /* Name does not exist before.  */
  521:                         error_at_line (0, 0, fname, start_line,
  522:                                        gettext ("unknown set `%s'"), symbol);
  523:                     }
  524:                 }
  525:             }
  526:           else if (strncmp (&this_line[1], "quote", 5) == 0)
  527:             {
  528:               char buf[2];
  529:               char *bufptr;
  530:               size_t buflen;
  531:               char *wbufptr;
  532:               size_t wbuflen;
  533:               int cnt;
  534: 
  535:               cnt = sizeof ("quote");
  536:               while (isspace (this_line[cnt]))
  537:                 ++cnt;
  538: 
  539:               /* We need the conversion.  */
  540:               if (cd_towc == (iconv_t) -1
  541:                   && open_conversion (codeset, &cd_towc, &cd_tomb,
  542:                                       &escape_char) != 0)
  543:                 /* Something is wrong.  */
  544:                 goto out;
  545: 
  546:               /* Yes, the quote char can be '\0'; this means no quote
  547:                  char.  The function using the information works on
  548:                  wide characters so we have to convert it here.  */
  549:               buf[0] = this_line[cnt];
  550:               buf[1] = '\0';
  551:               bufptr = buf;
  552:               buflen = 2;
  553: 
  554:               wbufptr = (char *) wbuf;
  555:               wbuflen = wbufsize;
  556: 
  557:               /* Flush the state.  */
  558:               iconv (cd_towc, NULL, NULL, NULL, NULL);
  559: 
  560:               iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
  561:               if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
  562:                 error_at_line (0, 0, fname, start_line,
  563:                                gettext ("invalid quote character"));
  564:               else
  565:                 /* Use the converted wide character.  */
  566:                 current->quote_char = wbuf[0];
  567:             }
  568:           else
  569:             {
  570:               int cnt;
  571:               cnt = 2;
  572:               while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
  573:                 ++cnt;
  574:               this_line[cnt] = '\0';
  575:               error_at_line (0, 0, fname, start_line,
  576:                              gettext ("unknown directive `%s': line ignored"),
  577:                              &this_line[1]);
  578:             }
  579:         }
  580:       else if (isalnum (this_line[0]) || this_line[0] == '_')
  581:         {