1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20: #include <argp.h>
21: #include <assert.h>
22: #include <ctype.h>
23: #include <errno.h>
24: #include <error.h>
25: #include <fcntl.h>
26: #include <iconv.h>
27: #include <langinfo.h>
28: #include <locale.h>
29: #include <search.h>
30: #include <stdbool.h>
31: #include <stdio.h>
32: #include <stdlib.h>
33: #include <string.h>
34: #include <unistd.h>
35: #include <libintl.h>
36: #ifdef _POSIX_MAPPED_FILES
37: # include <sys/mman.h>
38: #endif
39: #include <charmap.h>
40: #include <gconv_int.h>
41: #include "iconv_prog.h"
42: #include "iconvconfig.h"
43:
44:
45: #include "../version.h"
46:
47: #define PACKAGE _libc_intl_domainname
48:
49:
50:
51: static void print_version (FILE *stream, struct argp_state *state);
52: void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
53:
54: #define OPT_VERBOSE 1000
55: #define OPT_LIST 'l'
56:
57:
58: static const struct argp_option options[] =
59: {
60: { NULL, 0, NULL, 0, N_("Input/Output format specification:") },
61: { "from-code", 'f', "NAME", 0, N_("encoding of original text") },
62: { "to-code", 't', "NAME", 0, N_("encoding for output") },
63: { NULL, 0, NULL, 0, N_("Information:") },
64: { "list", 'l', NULL, 0, N_("list all known coded character sets") },
65: { NULL, 0, NULL, 0, N_("Output control:") },
66: { NULL, 'c', NULL, 0, N_("omit invalid characters from output") },
67: { "output", 'o', "FILE", 0, N_("output file") },
68: { "silent", 's', NULL, 0, N_("suppress warnings") },
69: { "verbose", OPT_VERBOSE, NULL, 0, N_("print progress information") },
70: { NULL, 0, NULL, 0, NULL }
71: };
72:
73:
74: static const char doc[] = N_("\
75: Convert encoding of given files from one encoding to another.");
76:
77:
78: static const char args_doc[] = N_("[FILE...]");
79:
80:
81: static error_t parse_opt (int key, char *arg, struct argp_state *state);
82:
83:
84: static char *more_help (int key, const char *text, void *input);
85:
86:
87: static struct argp argp =
88: {
89: options, parse_opt, args_doc, doc, NULL, more_help
90: };
91:
92:
93:
94:
95: static const char *from_code = "";
96: static const char *to_code = "";
97:
98:
99: static const char *output_file;
100:
101:
102: int verbose;
103:
104:
105: static int list;
106:
107:
108: int omit_invalid;
109:
110:
111: static int process_block (iconv_t cd, char *addr, size_t len, FILE *output);
112: static int process_fd (iconv_t cd, int fd, FILE *output);
113: static int process_file (iconv_t cd, FILE *input, FILE *output);
114: static void print_known_names (void) internal_function;
115:
116:
117: int
118: main (int argc, char *argv[])
119: {
120: int status = EXIT_SUCCESS;
121: int remaining;
122: FILE *output;
123: iconv_t cd;
124: const char *orig_to_code;
125: struct charmap_t *from_charmap = NULL;
126: struct charmap_t *to_charmap = NULL;
127:
128:
129: setlocale (LC_ALL, "");
130:
131:
132: textdomain (_libc_intl_domainname);
133:
134:
135: argp_parse (&argp, argc, argv, 0, &remaining, NULL);
136:
137:
138: if (list)
139: {
140: print_known_names ();
141: exit (EXIT_SUCCESS);
142: }
143:
144:
145:
146: orig_to_code = to_code;
147: if (omit_invalid)
148: {
149: const char *errhand = strchrnul (to_code, '/');
150: int nslash = 2;
151: char *newp;
152: char *cp;
153:
154: if (*errhand == '/')
155: {
156: --nslash;
157: errhand = strchrnul (errhand, '/');
158:
159: if (*errhand == '/')
160: {
161: --nslash;
162: errhand = strchr (errhand, '\0');
163: }
164: }
165:
166: newp = (char *) alloca (errhand - to_code + nslash + 7 + 1);
167: cp = mempcpy (newp, to_code, errhand - to_code);
168: while (nslash-- > 0)
169: *cp++ = '/';
170: if (cp[-1] != '/')
171: *cp++ = ',';
172: memcpy (cp, "IGNORE", sizeof ("IGNORE"));
173:
174: to_code = newp;
175: }
176:
177:
178:
179:
180:
181:
182:
183:
184: if (strchr (from_code, '/') != NULL)
185:
186:
187: from_charmap = charmap_read (from_code, 1, 0, 0, 0);
188:
189: if (strchr (orig_to_code, '/') != NULL)
190:
191:
192: to_charmap = charmap_read (orig_to_code, 1, 0, 0, 0);
193:
194:
195:
196: if (output_file != NULL && strcmp (output_file, "-") != 0)
197: {
198: output = fopen (output_file, "w");
199: if (output == NULL)
200: error (EXIT_FAILURE, errno, _("cannot open output file"));
201: }
202: else
203: output = stdout;
204:
205:
206:
207:
208:
209:
210: if (from_charmap != NULL || to_charmap != NULL)
211:
212: status = charmap_conversion (from_code, from_charmap, to_code, to_charmap,
213: argc, remaining, argv, output);
214: else
215: {
216:
217: cd = iconv_open (to_code, from_code);
218: if (cd == (iconv_t) -1)
219: {
220: if (errno == EINVAL)
221: {
222:
223:
224:
225:
226:
227: bool from_wrong =
228: (iconv_open ("UTF-8", from_code) == (iconv_t) -1
229: && errno == EINVAL);
230: bool to_wrong =
231: (iconv_open (to_code, "UTF-8") == (iconv_t) -1
232: && errno == EINVAL);
233: const char *from_pretty =
234: (from_code[0] ? from_code : nl_langinfo (CODESET));
235: const char *to_pretty =
236: (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET));
237:
238: if (from_wrong)
239: {
240: if (to_wrong)
241: error (0, 0,
242: _("\
243: conversions from `%s' and to `%s' are not supported"),
244: from_pretty, to_pretty);
245: else
246: error (0, 0,
247: _("conversion from `%s' is not supported"),
248: from_pretty);
249: }
250: else
251: {
252: if (to_wrong)
253: error (0, 0,
254: _("conversion to `%s' is not supported"),
255: to_pretty);
256: else
257: error (0, 0,
258: _("conversion from `%s' to `%s' is not supported"),
259: from_pretty, to_pretty);
260: }
261:
262: argp_help (&argp, stderr, ARGP_HELP_SEE,
263: program_invocation_short_name);
264: exit (1);
265: }
266: else
267: error (EXIT_FAILURE, errno,
268: _("failed to start conversion processing"));
269: }
270:
271:
272:
273:
274: if (remaining == argc)
275: {
276: if (process_file (cd, stdin, output) != 0)
277: status = EXIT_FAILURE;
278: }
279: else
280: do
281: {
282: #ifdef _POSIX_MAPPED_FILES
283: struct stat st;
284: char *addr;
285: #endif
286: int fd, ret;
287:
288: if (verbose)
289: fprintf (stderr, "%s:\n", argv[remaining]);
290: if (strcmp (argv[remaining], "-") == 0)
291: fd = 0;
292: else
293: {
294: fd = open (argv[remaining], O_RDONLY);
295:
296: if (fd == -1)
297: {
298: error (0, errno, _("cannot open input file `%s'"),
299: argv[remaining]);
300: status = EXIT_FAILURE;
301: continue;
302: }
303: }
304:
305: #ifdef _POSIX_MAPPED_FILES
306:
307:
308: if (fstat (fd, &st) == 0
309: && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
310: fd, 0)) != MAP_FAILED))
311: {
312:
313:
314: if (close (fd) != 0)
315: error (EXIT_FAILURE, errno,
316: _("error while closing input `%s'"),
317: argv[remaining]);
318:
319: ret = process_block (cd, addr, st.st_size, output);
320:
321:
322: munmap ((void *) addr, st.st_size);
323:
324: if (ret != 0)
325: {
326: status = EXIT_FAILURE;
327:
328: if (ret < 0)
329:
330:
331:
332: break;
333: }
334: }
335: else
336: #endif
337: {
338:
339: ret = process_fd (cd, fd, output);
340:
341:
342: close (fd);
343:
344: if (ret != 0)
345: {
346:
347: status = EXIT_FAILURE;
348:
349: if (ret < 0)
350:
351:
352:
353: break;
354: }
355: }
356: }
357: while (++remaining < argc);
358: }
359:
360:
361: if (fclose (output))
362: error (EXIT_FAILURE, errno, _("error while closing output file"));
363:
364: return status;
365: }
366:
367:
368:
369: static error_t
370: parse_opt (int key, char *arg, struct argp_state *state)
371: {
372: switch (key)
373: {
374: case 'f':
375: from_code = arg;
376: break;
377: case 't':
378: to_code = arg;
379: break;
380: case 'o':
381: output_file = arg;
382: break;
383: case 's':
384:
385:
386: break;
387: case 'c':
388:
389: omit_invalid = 1;
390: break;
391: case OPT_VERBOSE:
392: verbose = 1;
393: break;
394: case OPT_LIST:
395: list = 1;
396: break;
397: default:
398: return ARGP_ERR_UNKNOWN;
399: }
400: return 0;
401: }
402:
403:
404: static char *
405: more_help (int key, const char *text, void *input)
406: {
407: switch (key)
408: {
409: case ARGP_KEY_HELP_EXTRA:
410:
411: return strdup (gettext ("\
412: For bug reporting instructions, please see:\n\
413: <http://www.gnu.org/software/libc/bugs.html>.\n"));
414: default:
415: break;
416: }
417: return (char *) text;
418: }
419:
420:
421:
422: static void
423: print_version (FILE *stream, struct argp_state *state)
424: {
425: fprintf (stream, "iconv (GNU %s) %s\n", PACKAGE, VERSION);
426: fprintf (stream, gettext ("\
427: Copyright (C) %s Free Software Foundation, Inc.\n\
428: This is free software; see the source for copying conditions. There is NO\n\
429: warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
430: "), "2007");
431: fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
432: }
433:
434:
435: static int
436: process_block (iconv_t cd, char *addr, size_t len, FILE *output)
437: {
438: #define OUTBUF_SIZE 32768
439: const char *start = addr;
440: char outbuf[OUTBUF_SIZE];
441: char *outptr;
442: size_t outlen;
443: size_t n;
444: int ret = 0;
445:
446: while (len > 0)
447: {
448: outptr = outbuf;
449: outlen = OUTBUF_SIZE;
450: n = iconv (cd, &addr, &len, &outptr, &outlen);
451:
452: if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
453: {
454: ret = 1;
455: if (len == 0)
456: n = 0;
457: else
458: errno = E2BIG;
459: }
460:
461: if (outptr != outbuf)
462: {
463:
464: int errno_save = errno;
465:
466: if (fwrite (outbuf, 1, outptr - outbuf, output)
467: < (size_t) (outptr - outbuf)
468: || ferror (output))
469: {
470:
471: error (0, 0, _("\
472: conversion stopped due to problem in writing the output"));
473: return -1;
474: }
475:
476: errno = errno_save;
477: }
478:
479: if (n != (size_t) -1)
480: {
481:
482:
483: outptr = outbuf;
484: outlen = OUTBUF_SIZE;
485: n = iconv (cd, NULL, NULL, &outptr, &outlen);
486:
487: if (outptr != outbuf)
488: {
489:
490: int errno_save = errno;
491:
492: if (fwrite (outbuf, 1, outptr - outbuf, output)
493: < (size_t) (outptr - outbuf)
494: || ferror (output))
495: {
496:
497: error (0, 0, _("\
498: conversion stopped due to problem in writing the output"));
499: return -1;
500: }
501:
502: errno = errno_save;
503: }
504:
505: if (n != (size_t) -1)
506: break;
507:
508: if (omit_invalid && errno == EILSEQ)
509: {
510: ret = 1;
511: break;
512: }
513: }
514:
515: if (errno != E2BIG)
516: {
517:
518: switch (errno)
519: {
520: case EILSEQ:
521: if (! omit_invalid)
522: error (0, 0, _("illegal input sequence at position %ld"),
523: (long int) (addr - start));
524: break;
525: case EINVAL:
526: error (0, 0, _("\
527: incomplete character or shift sequence at end of buffer"));
528: break;
529: case EBADF:
530: error (0, 0, _("internal error (illegal descriptor)"));
531: break;
532: default:
533: error (0, 0, _("unknown iconv() error %d"), errno);
534: break;
535: }
536:
537: return -1;
538: }
539: }
540:
541: return ret;
542: }
543:
544:
545: static int
546: process_fd (iconv_t cd, int fd, FILE *output)
547: {
548:
549:
550:
551:
552:
553: static char *inbuf = NULL;
554: static size_t maxlen = 0;
555: char *inptr = NULL;
556: size_t actlen = 0;
557:
558: while (actlen < maxlen)
559: {
560: ssize_t n = read (fd, inptr, maxlen - actlen);
561:
562: if (n == 0)
563:
564: break;
565:
566: if (n == -1)
567: {
568:
569: error (0, errno, _("error while reading the input"));
570: return -1;
571: }
572:
573: inptr += n;
574: actlen += n;
575: }
576:
577: if (actlen == maxlen)
578: while (1)
579: {
580: ssize_t n;
581: char *new_inbuf;
582:
583:
584: new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
585: