1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36: #define LIBGAUCHE_BODY
37: #include "gauche.h"
38:
39: #include <ctype.h>
40: #include <string.h>
41:
42: void Scm_DStringDump(FILE *out, ScmDString *dstr);
43:
44: static void string_print(ScmObj obj, ScmPort *port, ScmWriteContext *ctx);
45: SCM_DEFINE_BUILTIN_CLASS(Scm_StringClass, string_print, NULL, NULL, NULL,
46: SCM_CLASS_SEQUENCE_CPL);
47:
48:
49:
50: static ScmString *make_str(int len, int siz, const char *p, int flags)
51: {
52: ScmString *s = SCM_NEW(ScmString);
53: SCM_SET_CLASS(s, SCM_CLASS_STRING);
54:
55: if (len < 0) flags |= SCM_STRING_INCOMPLETE;
56: if (flags & SCM_STRING_INCOMPLETE) len = siz;
57:
58: s->body = NULL;
59: s->initialBody.flags = flags & SCM_STRING_FLAG_MASK;
60: s->initialBody.length = len;
61: s->initialBody.size = siz;
62: s->initialBody.start = p;
63: return s;
64: }
65:
66: #if 0
67: static ScmStringBody *make_str_body(int len, int siz, const char *p, int flags)
68: {
69: ScmStringBody *b = SCM_NEW(ScmStringBody);
70: b->length = (len < 0)? siz : len;
71: b->size = siz;
72: b->start = p;
73: b->flags = flags;
74: return b;
75: }
76: #endif
77:
78: #define DUMP_LENGTH 50
79:
80:
81: #if SCM_DEBUG_HELPER
82: void Scm_StringDump(FILE *out, ScmObj str)
83: {
84: int i;
85: const ScmStringBody *b = SCM_STRING_BODY(str);
86: int s = SCM_STRING_BODY_SIZE(b);
87: const char *p = SCM_STRING_BODY_START(b);
88:
89: fprintf(out, "STR(len=%d,siz=%d) \"", SCM_STRING_BODY_LENGTH(b), s);
90: for (i=0; i < DUMP_LENGTH && s > 0;) {
91: int n = SCM_CHAR_NFOLLOWS(*p) + 1;
92: for (; n > 0 && s > 0; p++, n--, s--, i++) {
93: putc(*p, out);
94: }
95: }
96: if (s > 0) {
97: fputs("...\"\n", out);
98: } else {
99: fputs("\"\n", out);
100: }
101: }
102: #endif
103:
104:
105:
106:
107:
108:
109:
110:
111:
112: static inline int count_size_and_length(const char *str, int *psize, int *plen)
113: {
114: char c;
115: const char *p = str;
116: int size = 0, len = 0;
117: while ((c = *p++) != 0) {
118: int i = SCM_CHAR_NFOLLOWS(c);
119: len++;
120: size++;
121: while (i-- > 0) {
122: if (!*p++) { len = -1; goto eos; }
123: size++;
124: }
125: }
126: eos:
127: *psize = size;
128: *plen = len;
129: return len;
130: }
131:
132:
133: static inline int count_length(const char *str, int size)
134: {
135: int count = 0;
136:
137: while (size-- > 0) {
138: ScmChar ch;
139: unsigned char c = (unsigned char)*str;
140: int i = SCM_CHAR_NFOLLOWS(c);
141: if (i < 0 || i > size) return -1;
142: SCM_CHAR_GET(str, ch);
143: if (ch == SCM_CHAR_INVALID) return -1;
144: count++;
145: str += i+1;
146: size -= i;
147: }
148: return count;
149: }
150:
151:
152:
153:
154: int Scm_MBLen(const char *str, const char *stop)
155: {
156: int size = (stop == NULL)? (int)strlen(str) : (stop - str);
157: return count_length(str, size);
158: }
159:
160:
161:
162:
163:
164:
165: ScmObj Scm_MakeString(const char *str, int size, int len, int flags)
166: {
167: ScmString *s;
168:
169: flags &= ~SCM_STRING_TERMINATED;
170:
171: if (size < 0) {
172: count_size_and_length(str, &size, &len);
173: flags |= SCM_STRING_TERMINATED;
174: } else {
175: if (len < 0) len = count_length(str, size);
176: }
177:
178: if (flags & SCM_STRING_COPYING) {
179: char *nstr = SCM_NEW_ATOMIC2(char *, size + 1);
180: memcpy(nstr, str, size);
181: nstr[size] = '\0';
182: flags |= SCM_STRING_TERMINATED;
183: s = make_str(len, size, nstr, flags);
184: } else {
185: s = make_str(len, size, str, flags);
186: }
187: return SCM_OBJ(s);
188: }
189:
190: ScmObj Scm_MakeFillString(int len, ScmChar fill)
191: {
192: int size = SCM_CHAR_NBYTES(fill), i;
193: char *ptr = SCM_NEW_ATOMIC2(char *, size*len+1);
194: char *p;
195:
196: if (len < 0) Scm_Error("length out of range: %d", len);
197: for (i=0, p=ptr; i<len; i++, p+=size) {
198: SCM_CHAR_PUT(p, fill);
199: }
200: ptr[size*len] = '\0';
201: return SCM_OBJ(make_str(len, size*len, ptr, SCM_STRING_TERMINATED));
202: }
203:
204: ScmObj Scm_ListToString(ScmObj chars)
205: {
206: ScmObj cp;
207: int size = 0, len = 0;
208: ScmChar ch;
209: char *buf, *bufp;
210:
211: SCM_FOR_EACH(cp, chars) {
212: if (!SCM_CHARP(SCM_CAR(cp)))
213: Scm_Error("character required, but got %S", SCM_CAR(cp));
214: ch = SCM_CHAR_VALUE(SCM_CAR(cp));
215: size += SCM_CHAR_NBYTES(ch);
216: len++;
217: }
218: bufp = buf = SCM_NEW_ATOMIC2(char *, size+1);
219: SCM_FOR_EACH(cp, chars) {
220: ch = SCM_CHAR_VALUE(SCM_CAR(cp));
221: SCM_CHAR_PUT(bufp, ch);
222: bufp += SCM_CHAR_NBYTES(ch);
223: }
224: *bufp = '\0';
225: return Scm_MakeString(buf, size, len, 0);
226: }
227:
228:
229:
230: char *Scm_GetString(ScmString *str)
231: {
232: int size;
233: char *p;
234: const ScmStringBody *b = SCM_STRING_BODY(str);
235:
236: size = SCM_STRING_BODY_SIZE(b);
237: p = SCM_NEW_ATOMIC2(char *, size+1);
238: memcpy(p, SCM_STRING_BODY_START(b), size);
239: p[size] = '\0';
240: return p;
241: }
242:
243:
244: static const char *get_string_from_body(const ScmStringBody *b)
245: {
246: int size = SCM_STRING_BODY_SIZE(b);
247: if (SCM_STRING_BODY_HAS_FLAG(b, SCM_STRING_TERMINATED)) {
248:
249: return SCM_STRING_BODY_START(b);
250: } else {
251: char *p = SCM_NEW_ATOMIC2(char *, size+1);
252: memcpy(p, SCM_STRING_BODY_START(b), size);
253: p[size] = '\0';
254:
255:
256:
257:
258: ((ScmStringBody*)b)->start = p;
259: ((ScmStringBody*)b)->flags |= SCM_STRING_TERMINATED;
260: return p;
261: }
262: }
263:
264:
265:
266:
267: const char *Scm_GetStringConst(ScmString *str)
268: {
269: return get_string_from_body(SCM_STRING_BODY(str));
270: }
271:
272:
273:
274: const char *Scm_GetStringContent(ScmString *str,
275: unsigned int *psize,
276: unsigned int *plength,
277: unsigned int *pflags)
278: {
279: const ScmStringBody *b = SCM_STRING_BODY(str);
280: if (psize) *psize = SCM_STRING_BODY_SIZE(b);
281: if (plength) *plength = SCM_STRING_BODY_LENGTH(b);
282: if (pflags) *pflags = SCM_STRING_BODY_FLAGS(b);
283: return get_string_from_body(b);
284: }
285:
286:
287:
288:
289:
290:
291:
292:
293:
294:
295:
296:
297:
298:
299:
300: ScmObj Scm_CopyStringWithFlags(ScmString *x, int flags, int mask)
301: {
302: const ScmStringBody *b = SCM_STRING_BODY(x);
303: int size = SCM_STRING_BODY_SIZE(b);
304: int len = SCM_STRING_BODY_LENGTH(b);
305: const char *start = SCM_STRING_BODY_START(b);
306: int newflags = ((SCM_STRING_BODY_FLAGS(b) & ~mask)
307: | (flags & mask));
308:
309: return SCM_OBJ(make_str(len, size, start, newflags));
310: }
311:
312: ScmObj Scm_StringCompleteToIncomplete(ScmString *x)
313: {
314: return Scm_CopyStringWithFlags(x, SCM_STRING_INCOMPLETE,
315: SCM_STRING_INCOMPLETE);
316: }
317:
318: ScmObj Scm_StringIncompleteToComplete(ScmString *x,
319: int handling,
320: ScmChar substitute)
321: {
322: const ScmStringBody *b;
323: ScmObj r = SCM_FALSE;
324:
325: switch (handling) {
326: case SCM_ILLEGAL_CHAR_REJECT:
327: case SCM_ILLEGAL_CHAR_OMIT:
328: case SCM_ILLEGAL_CHAR_REPLACE:
329: break;
330: default:
331: Scm_Error("invalid 'handling' argument: %d", handling);
332: return SCM_UNDEFINED;
333: }
334:
335: b = SCM_STRING_BODY(x);
336: if (!SCM_STRING_BODY_INCOMPLETE_P(b)) {
337:
338: r = Scm_CopyString(x);
339: } else {
340: const char *s = SCM_STRING_BODY_START(b);
341: int siz = SCM_STRING_BODY_SIZE(b);
342: int len = count_length(s, siz);
343: if (len >= 0) {
344: r = Scm_MakeString(s, siz, len, 0);
345: } else if (handling == SCM_ILLEGAL_CHAR_REJECT) {
346: r = SCM_FALSE;
347: } else {
348: ScmDString ds;
349: const char *p = s;
350: ScmChar ch;
351:
352: Scm_DStringInit(&ds);
353:
354: while (p < s+siz) {
355: if (p + SCM_CHAR_NFOLLOWS(*p) >= s + siz) {
356: ch = SCM_CHAR_INVALID;
357: } else {
358: SCM_CHAR_GET(p, ch);
359: }
360:
361: if (ch != SCM_CHAR_INVALID) {
362: Scm_DStringPutc(&ds, ch);
363: p += SCM_CHAR_NBYTES(ch);
364: } else if (handling == SCM_ILLEGAL_CHAR_OMIT) {
365: p++;
366: } else {
367: Scm_DStringPutc(&ds, substitute);
368: p++;
369: }
370: }
371: r = Scm_DStringGet(&ds, 0);
372: }
373: }
374:
375: return r;
376: }
377:
378:
379:
380:
381:
382:
383: int Scm_StringEqual(ScmString *x, ScmString *y)
384: {
385: const ScmStringBody *xb = SCM_STRING_BODY(x);
386: const ScmStringBody *yb = SCM_STRING_BODY(y);
387: if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
388: return FALSE;
389: }
390: if (SCM_STRING_BODY_SIZE(xb) != SCM_STRING_BODY_SIZE(yb)) {
391: return FALSE;
392: }
393: return (memcmp(SCM_STRING_BODY_START(xb),
394: SCM_STRING_BODY_START(yb),
395: SCM_STRING_BODY_SIZE(xb)) == 0? TRUE : FALSE);
396: }
397:
398: int Scm_StringCmp(ScmString *x, ScmString *y)
399: {
400: int sizx, sizy, siz, r;
401: const ScmStringBody *xb = SCM_STRING_BODY(x);
402: const ScmStringBody *yb = SCM_STRING_BODY(y);
403: if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
404: Scm_Error("cannot compare incomplete vs complete string: %S, %S",
405: SCM_OBJ(x), SCM_OBJ(y));
406: }
407: sizx = SCM_STRING_BODY_SIZE(xb);
408: sizy = SCM_STRING_BODY_SIZE(yb);
409: siz = (sizx < sizy)? sizx : sizy;
410: r = memcmp(SCM_STRING_BODY_START(xb), SCM_STRING_BODY_START(yb), siz);
411: if (r == 0) return (sizx - sizy);
412: else return r;
413: }
414:
415:
416: static int sb_strcasecmp(const char *px, int sizx,
417: const char *py, int sizy)
418: {
419: char cx, cy;
420: for (; sizx > 0 && sizy > 0; sizx--, sizy--, px++, py++) {
421: cx = tolower(*px);
422: cy = tolower(*py);
423: if (cx == cy) continue;
424: return (cx - cy);
425: }
426: if (sizx > 0) return 1;
427: if (sizy > 0) return -1;
428: return 0;
429: }
430:
431:
432: static int mb_strcasecmp(const char *px, int lenx,
433: const char *py, int leny)
434: {
435: int cx, cy, ccx, ccy, ix, iy;
436: for (; lenx > 0 && leny > 0; lenx--, leny--, px+=ix, py+=iy) {
437: SCM_CHAR_GET(px, cx);
438: SCM_CHAR_GET(py, cy);
439: ccx = SCM_CHAR_UPCASE(cx);
440: ccy = SCM_CHAR_UPCASE(cy);
441: if (ccx != ccy) return (ccx - ccy);
442: ix = SCM_CHAR_NBYTES(cx);
443: iy = SCM_CHAR_NBYTES(cy);
444: }
445: if (lenx > 0) return 1;
446: if (leny > 0) return -1;
447: return 0;
448: }
449:
450: int Scm_StringCiCmp(ScmString *x, ScmString *y)
451: {
452: int sizx, lenx, sizy, leny;
453: const char *px, *py;
454: const ScmStringBody *xb = SCM_STRING_BODY(x);
455: const ScmStringBody *yb = SCM_STRING_BODY(y);
456:
457: if ((SCM_STRING_BODY_FLAGS(xb)^SCM_STRING_BODY_FLAGS(yb))&SCM_STRING_INCOMPLETE) {
458: Scm_Error("cannot compare incomplete strings in case-insensitive way: %S, %S",
459: SCM_OBJ(x), SCM_OBJ(y));
460: }
461: sizx = SCM_STRING_BODY_SIZE(xb); lenx = SCM_STRING_BODY_SIZE(xb);
462: sizy = SCM_STRING_BODY_SIZE(yb); leny = SCM_STRING_BODY_SIZE(yb);
463: px = SCM_STRING_BODY_START(xb);
464: py = SCM_STRING_BODY_START(yb);
465:
466: if (sizx == lenx && sizy == leny) {
467: return sb_strcasecmp(px, sizx, py, sizy);
468: } else {
469: return mb_strcasecmp(px, lenx, py, leny);
470: }
471: }
472:
473:
474:
475:
476:
477:
478: static const char *forward_pos(const char *current, int offset)
479: {
480: int n;
481:
482: while (offset--) {
483: n = SCM_CHAR_NFOLLOWS(*current);
484: current += n + 1;
485: }
486: return current;
487: }
488:
489:
490:
491:
492:
493:
494:
495:
496:
497: ScmChar Scm_StringRef(ScmString *str, int pos, int range_error)
498: {
499: const ScmStringBody *b = SCM_STRING_BODY(str);
500: int len = SCM_STRING_BODY_LENGTH(b);
501:
502:
503:
504: if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
505: Scm_Error("incomplete string not allowed : %S", str);
506: }
507: if (pos < 0 || pos >= len) {
508: if (range_error) {
509: Scm_Error("argument out of range: %d", pos);
510: } else {
511: return SCM_CHAR_INVALID;
512: }
513: }
514: if (SCM_STRING_BODY_SINGLE_BYTE_P(b)) {
515: return (ScmChar)(((unsigned char *)SCM_STRING_BODY_START(b))[pos]);
516: } else {
517: const char *p = forward_pos(SCM_STRING_BODY_START(b), pos);
518: ScmChar c;
519: SCM_CHAR_GET(p, c);
520: return c;
521: }
522: }
523:
524:
525:
526:
527:
528: int Scm_StringByteRef(ScmString *str, int offset, int range_error)
529: {
530: const ScmStringBody *b = SCM_STRING_BODY(str);
531: if (offset < 0 || offset >= (int)SCM_STRING_BODY_SIZE(b)) {
532: if (range_error) {
533: Scm_Error("argument out of range: %d", offset);
534: } else {
535: return -1;
536: }
537: }
538: return (ScmByte)SCM_STRING_BODY_START(b)[offset];
539: }
540:
541:
542:
543:
544:
545:
546:
547: const char *Scm_StringPosition(ScmString *str, int offset)
548: {
549: const ScmStringBody *b = SCM_STRING_BODY(str);
550: if (offset < 0 || offset > (int)SCM_STRING_BODY_LENGTH(b)) {
551: Scm_Error("argument out of range: %d", offset);
552: }
553: if (SCM_STRING_BODY_INCOMPLETE_P(b)) {
554: return (SCM_STRING_BODY_START(b)+offset);
555: } else {
556: return (forward_pos(SCM_STRING_BODY_START(b), offset));
557: }
558: }
559:
560:
561:
562: