1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20: #include <assert.h>
21: #include <errno.h>
22: #include <error.h>
23: #include <fcntl.h>
24: #include <iconv.h>
25: #include <libintl.h>
26: #include <stdio.h>
27: #include <stdlib.h>
28: #include <unistd.h>
29: #include <sys/mman.h>
30: #include <sys/stat.h>
31:
32: #include "iconv_prog.h"
33:
34:
35:
36: extern void *xmalloc (size_t __n);
37: extern void *xcalloc (size_t __n, size_t __s);
38:
39:
40: struct convtable
41: {
42: int term[256 / 8];
43: union
44: {
45: struct convtable *sub;
46: struct charseq *out;
47: } val[256];
48: };
49:
50:
51: static inline struct convtable *
52: allocate_table (void)
53: {
54: return (struct convtable *) xcalloc (1, sizeof (struct convtable));
55: }
56:
57:
58: static inline int
59: is_term (struct convtable *tbl, unsigned int idx)
60: {
61: return tbl->term[idx / 8] & (1 << (idx % 8));
62: }
63:
64:
65: static inline void
66: clear_term (struct convtable *tbl, unsigned int idx)
67: {
68: tbl->term[idx / 8] &= ~(1 << (idx % 8));
69: }
70:
71:
72: static inline void
73: set_term (struct convtable *tbl, unsigned int idx)
74: {
75: tbl->term[idx / 8] |= 1 << (idx % 8);
76: }
77:
78:
79:
80: static struct convtable *use_from_charmap (struct charmap_t *from_charmap,
81: const char *to_code);
82: static struct convtable *use_to_charmap (const char *from_code,
83: struct charmap_t *to_charmap);
84: static struct convtable *use_both_charmaps (struct charmap_t *from_charmap,
85: struct charmap_t *to_charmap);
86:
87:
88: static int process_block (struct convtable *tbl, char *addr, size_t len,
89: FILE *output);
90: static int process_fd (struct convtable *tbl, int fd, FILE *output);
91: static int process_file (struct convtable *tbl, FILE *input, FILE *output);
92:
93:
94: int
95: charmap_conversion (const char *from_code, struct charmap_t *from_charmap,
96: const char *to_code, struct charmap_t *to_charmap,
97: int argc, int remaining, char *argv[], FILE *output)
98: {
99: struct convtable *cvtbl;
100: int status = EXIT_SUCCESS;
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118: if (from_charmap != NULL)
119: {
120: if (to_charmap == NULL)
121: cvtbl = use_from_charmap (from_charmap, to_code);
122: else
123: cvtbl = use_both_charmaps (from_charmap, to_charmap);
124: }
125: else
126: {
127: assert (to_charmap != NULL);
128: cvtbl = use_to_charmap (from_code, to_charmap);
129: }
130:
131:
132: if (cvtbl == NULL)
133: return EXIT_FAILURE;
134:
135:
136: if (remaining == argc)
137: {
138: if (process_file (cvtbl, stdin, output) != 0)
139: status = EXIT_FAILURE;
140: }
141: else
142: do
143: {
144: struct stat st;
145: char *addr;
146: int fd;
147:
148: if (verbose)
149: printf ("%s:\n", argv[remaining]);
150: if (strcmp (argv[remaining], "-") == 0)
151: fd = 0;
152: else
153: {
154: fd = open (argv[remaining], O_RDONLY);
155:
156: if (fd == -1)
157: {
158: error (0, errno, _("cannot open input file `%s'"),
159: argv[remaining]);
160: status = EXIT_FAILURE;
161: continue;
162: }
163: }
164:
165: #ifdef _POSIX_MAPPED_FILES
166:
167:
168: if (fstat (fd, &st) == 0
169: && ((addr = mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE,
170: fd, 0)) != MAP_FAILED))
171: {
172:
173:
174: if (close (fd) != 0)
175: error (EXIT_FAILURE, errno,
176: _("error while closing input `%s'"), argv[remaining]);
177:
178: if (process_block (cvtbl, addr, st.st_size, output) < 0)
179: {
180:
181: status = EXIT_FAILURE;
182:
183:
184: munmap ((void *) addr, st.st_size);
185:
186:
187:
188:
189: break;
190: }
191:
192:
193: munmap ((void *) addr, st.st_size);
194: }
195: else
196: #endif
197: {
198:
199: if (process_fd (cvtbl, fd, output) != 0)
200: {
201:
202: status = EXIT_FAILURE;
203:
204:
205: close (fd);
206:
207:
208:
209:
210: break;
211: }
212:
213:
214: close (fd);
215: }
216: }
217: while (++remaining < argc);
218:
219:
220: return status;
221: }
222:
223:
224: static void
225: add_bytes (struct convtable *tbl, struct charseq *in, struct charseq *out)
226: {
227: int n = 0;
228: unsigned int byte;
229:
230: assert (in->nbytes > 0);
231:
232: byte = ((unsigned char *) in->bytes)[n];
233: while (n + 1 < in->nbytes)
234: {
235: if (is_term (tbl, byte) || tbl->val[byte].sub == NULL)
236: {
237:
238:
239: clear_term (tbl, byte);
240: tbl->val[byte].sub =
241: (struct convtable *) xcalloc (1, sizeof (struct convtable));
242: }
243:
244: tbl = tbl->val[byte].sub;
245:
246: byte = ((unsigned char *) in->bytes)[++n];
247: }
248:
249:
250:
251: if (! is_term (tbl, byte) && tbl->val[byte].sub == NULL)
252: {
253: set_term (tbl, byte);
254: tbl->val[byte].out = out;
255: }
256: }
257:
258:
259: static struct convtable *
260: use_from_charmap (struct charmap_t *from_charmap, const char *to_code)
261: {
262:
263:
264:
265: struct convtable *rettbl;
266: iconv_t cd;
267: void *ptr = NULL;
268: const void *key;
269: size_t keylen;
270: void *data;
271:
272: cd = iconv_open (to_code, "WCHAR_T");
273: if (cd == (iconv_t) -1)
274:
275: return NULL;
276:
277: rettbl = allocate_table ();
278:
279: while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
280: >= 0)
281: {
282: struct charseq *in = (struct charseq *) data;
283:
284: if (in->ucs4 != UNINITIALIZED_CHAR_VALUE)
285: {
286:
287: wchar_t inbuf[1] = { in->ucs4 };
288: unsigned char outbuf[64];
289: char *inptr = (char *) inbuf;
290: size_t inlen = sizeof (inbuf);
291: char *outptr = (char *) outbuf;
292: size_t outlen = sizeof (outbuf);
293:
294: (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
295:
296: if (outptr != (char *) outbuf)
297: {
298:
299: struct charseq *newp;
300:
301: outlen = sizeof (outbuf) - outlen;
302: assert ((char *) outbuf + outlen == outptr);
303:
304: newp = (struct charseq *) xmalloc (sizeof (struct charseq)
305: + outlen);
306: newp->name = in->name;
307: newp->ucs4 = in->ucs4;
308: newp->nbytes = outlen;
309: memcpy (newp->bytes, outbuf, outlen);
310:
311: add_bytes (rettbl, in, newp);
312: }
313:
314:
315: (void) iconv (cd, NULL, NULL, NULL, NULL);
316: }
317: }
318:
319: iconv_close (cd);
320:
321: return rettbl;
322: }
323:
324:
325: static struct convtable *
326: use_to_charmap (const char *from_code, struct charmap_t *to_charmap)
327: {
328:
329:
330:
331: struct convtable *rettbl;
332: iconv_t cd;
333: void *ptr = NULL;
334: const void *key;
335: size_t keylen;
336: void *data;
337:
338:
339:
340:
341:
342: cd = iconv_open (from_code, "WCHAR_T");
343: if (cd == (iconv_t) -1)
344:
345: return NULL;
346:
347: rettbl = allocate_table ();
348:
349: while (iterate_table (&to_charmap->char_table, &ptr, &key, &keylen, &data)
350: >= 0)
351: {
352: struct charseq *out = (struct charseq *) data;
353:
354: if (out->ucs4 != UNINITIALIZED_CHAR_VALUE)
355: {
356:
357: wchar_t inbuf[1] = { out->ucs4 };
358: unsigned char outbuf[64];
359: char *inptr = (char *) inbuf;
360: size_t inlen = sizeof (inbuf);
361: char *outptr = (char *) outbuf;
362: size_t outlen = sizeof (outbuf);
363:
364: (void) iconv (cd, &inptr, &inlen, &outptr, &outlen);
365:
366: if (outptr != (char *) outbuf)
367: {
368:
369: union
370: {
371: struct charseq seq;
372: struct
373: {
374: const char *name;
375: uint32_t ucs4;
376: int nbytes;
377: unsigned char bytes[outlen];
378: } mem;
379: } new;
380:
381: outlen = sizeof (outbuf) - outlen;
382: assert ((char *) outbuf + outlen == outptr);
383:
384: new.mem.name = out->name;
385: new.mem.ucs4 = out->ucs4;
386: new.mem.nbytes = outlen;
387: memcpy (new.mem.bytes, outbuf, outlen);
388:
389: add_bytes (rettbl, &new.seq, out);
390: }
391:
392:
393: (void) iconv (cd, NULL, NULL, NULL, NULL);
394: }
395: }
396:
397: iconv_close (cd);
398:
399: return rettbl;
400: }
401:
402:
403: static struct convtable *
404: use_both_charmaps (struct charmap_t *from_charmap,
405: struct charmap_t *to_charmap)
406: {
407:
408:
409:
410: struct convtable *rettbl = allocate_table ();
411: void *ptr = NULL;
412: const void *key;
413: size_t keylen;
414: void *data;
415:
416: while (iterate_table (&from_charmap->char_table, &ptr, &key, &keylen, &data)
417: >= 0)
418: {
419: struct charseq *in = (struct charseq *) data;
420: struct charseq *out = charmap_find_value (to_charmap, key, keylen);
421:
422: if (out != NULL)
423: add_bytes (rettbl, in, out);
424: }
425:
426: return rettbl;
427: }
428:
429:
430: static int
431: process_block (struct convtable *tbl, char *addr, size_t len, FILE *output)
432: {
433: size_t n = 0;
434:
435: while (n < len)
436: {
437: struct convtable *cur = tbl;
438: unsigned char *curp = (unsigned char *) addr;
439: unsigned int byte = *curp;
440: int cnt;
441: struct charseq *out;
442:
443: while (! is_term (cur, byte))
444: if (cur->val[byte].sub == NULL)
445: {
446:
447:
448: if (! omit_invalid)
449: {
450: error (0, 0, _("illegal input sequence at position %Zd"), n);
451: return -1;
452: }
453:
454: n -= curp - (unsigned char *) addr;
455:
456: byte = *(curp = (unsigned char *) ++addr);
457: if (++n >= len)
458:
459: return 0;
460:
461: cur = tbl;
462: }
463: else
464: {
465: cur = cur->val[byte].sub;
466:
467: if (++n >= len)
468: {
469: error (0, 0, _("\
470: incomplete character or shift sequence at end of buffer"));
471: return -1;
472: }
473:
474: byte = *++curp;
475: }
476:
477:
478: out = cur->val[byte].out;
479: for (cnt = 0; cnt < out->nbytes; ++cnt)
480: fputc_unlocked (out->bytes[cnt], output);
481:
482: addr = (char *) curp + 1;
483: ++n;
484: }
485:
486: return 0;
487: }
488:
489:
490: static int
491: process_fd (struct convtable *tbl, int fd, FILE *output)
492: {
493:
494:
495:
496:
497:
498: static char *inbuf = NULL;
499: static size_t maxlen = 0;
500: char *inptr = inbuf;
501: size_t actlen = 0;
502:
503: while (actlen < maxlen)
504: {
505: ssize_t n = read (fd, inptr, maxlen - actlen);
506:
507: if (n == 0)
508:
509: break;
510:
511: if (n == -1)
512: {
513:
514: error (0, errno, _("error while reading the input"));
515: return -1;
516: }
517:
518: inptr += n;
519: actlen += n;
520: }
521:
522: if (actlen == maxlen)
523: while (1)
524: {
525: ssize_t n;
526: char *new_inbuf;
527:
528:
529: new_inbuf = (char *) realloc (inbuf, maxlen + 32768);
530: if (new_inbuf == NULL)
531: {
532: error (0, errno, _("unable to allocate buffer for input"));
533: return -1;
534: }
535: inbuf = new_inbuf;
536: maxlen += 32768;
537: inptr = inbuf + actlen;
538:
539: do
540: {
541: n = read (fd, inptr, maxlen - actlen);
542:
543: if (n == 0)
544:
545: break;
546:
547: if (n == -1)
548: {
549:
550: error (0, errno, _("error while reading the input"));
551: return -1;
552: }
553:
554: inptr += n;
555: actlen += n;
556: }
557: while (actlen < maxlen);
558:
559: if (n == 0)
560:
561: break;
562: }
563:
564:
565: return process_block (tbl, inbuf, actlen, output);
566: }
567:
568:
569: static int
570: process_file (struct convtable *tbl, FILE *input, FILE *output)
571: {
572:
573:
574: return process_fd (tbl, fileno (input), output);
575: }