1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21: #include <sys/types.h>
22: #include <mcheck.h>
23: #include <regex.h>
24: #include <stdio.h>
25: #include <stdlib.h>
26: #include <string.h>
27: #include <locale.h>
28:
29: #define BRE RE_SYNTAX_POSIX_BASIC
30: #define ERE RE_SYNTAX_POSIX_EXTENDED
31:
32: static struct test_s
33: {
34: int syntax;
35: const char *pattern;
36: const char *string;
37: int start, res;
38: } tests[] = {
39: {BRE, "\\<A", "CBAA", 0, -1},
40: {BRE, "\\<A", "CBAA", 2, -1},
41: {BRE, "A\\>", "CAAB", 1, -1},
42: {BRE, "\\bA", "CBAA", 0, -1},
43: {BRE, "\\bA", "CBAA", 2, -1},
44: {BRE, "A\\b", "CAAB", 1, -1},
45: {BRE, "\\<A", "AA", 0, 0},
46: {BRE, "\\<A", "C-AA", 2, 2},
47: {BRE, "A\\>", "CAA-", 1, 2},
48: {BRE, "A\\>", "CAA", 1, 2},
49: {BRE, "\\bA", "AA", 0, 0},
50: {BRE, "\\bA", "C-AA", 2, 2},
51: {BRE, "A\\b", "CAA-", 1, 2},
52: {BRE, "A\\b", "CAA", 1, 2},
53: {BRE, "\\<[A]", "CBAA", 0, -1},
54: {BRE, "\\<[A]", "CBAA", 2, -1},
55: {BRE, "[A]\\>", "CAAB", 1, -1},
56: {BRE, "\\b[A]", "CBAA", 0, -1},
57: {BRE, "\\b[A]", "CBAA", 2, -1},
58: {BRE, "[A]\\b", "CAAB", 1, -1},
59: {BRE, "\\<[A]", "AA", 0, 0},
60: {BRE, "\\<[A]", "C-AA", 2, 2},
61: {BRE, "[A]\\>", "CAA-", 1, 2},
62: {BRE, "[A]\\>", "CAA", 1, 2},
63: {BRE, "\\b[A]", "AA", 0, 0},
64: {BRE, "\\b[A]", "C-AA", 2, 2},
65: {BRE, "[A]\\b", "CAA-", 1, 2},
66: {BRE, "[A]\\b", "CAA", 1, 2},
67: {ERE, "\\b(A|!|.B)", "A=AC", 0, 0},
68: {ERE, "\\b(A|!|.B)", "=AC", 0, 1},
69: {ERE, "\\b(A|!|.B)", "!AC", 0, 1},
70: {ERE, "\\b(A|!|.B)", "=AB", 0, 1},
71: {ERE, "\\b(A|!|.B)", "DA!C", 0, 2},
72: {ERE, "\\b(A|!|.B)", "=CB", 0, 1},
73: {ERE, "\\b(A|!|.B)", "!CB", 0, 1},
74: {ERE, "\\b(A|!|.B)", "D,B", 0, 1},
75: {ERE, "\\b(A|!|.B)", "!.C", 0, -1},
76: {ERE, "\\b(A|!|.B)", "BCB", 0, -1},
77: {ERE, "(A|\\b)(A|B|C)", "DAAD", 0, 1},
78: {ERE, "(A|\\b)(A|B|C)", "DABD", 0, 1},
79: {ERE, "(A|\\b)(A|B|C)", "AD", 0, 0},
80: {ERE, "(A|\\b)(A|B|C)", "C!", 0, 0},
81: {ERE, "(A|\\b)(A|B|C)", "D,B", 0, 2},
82: {ERE, "(A|\\b)(A|B|C)", "DA?A", 0, 3},
83: {ERE, "(A|\\b)(A|B|C)", "BBC", 0, 0},
84: {ERE, "(A|\\b)(A|B|C)", "DA", 0, -1},
85: {ERE, "(!|\\b)(!|=|~)", "A!=\\", 0, 1},
86: {ERE, "(!|\\b)(!|=|~)", "/!=A", 0, 1},
87: {ERE, "(!|\\b)(!|=|~)", "A=A", 0, 1},
88: {ERE, "(!|\\b)(!|=|~)", "==!=", 0, 2},
89: {ERE, "(!|\\b)(!|=|~)", "==C~", 0, 3},
90: {ERE, "(!|\\b)(!|=|~)", "=~=", 0, -1},
91: {ERE, "(!|\\b)(!|=|~)", "~!", 0, -1},
92: {ERE, "(!|\\b)(!|=|~)", "~=~", 0, -1},
93: {ERE, "(\\b|A.)[ABC]", "AC", 0, 0},
94: {ERE, "(\\b|A.)[ABC]", "=A", 0, 1},
95: {ERE, "(\\b|A.)[ABC]", "DACC", 0, 1},
96: {ERE, "(\\b|A.)[A~C]", "AC", 0, 0},
97: {ERE, "(\\b|A.)[A~C]", "=A", 0, 1},
98: {ERE, "(\\b|A.)[A~C]", "DACC", 0, 1},
99: {ERE, "(\\b|A.)[A~C]", "B!A=", 0, 2},
100: {ERE, "(\\b|A.)[A~C]", "B~C", 0, 1},
101: {ERE, ".\\b.", "AA~", 0, 1},
102: {ERE, ".\\b.", "=A=", 0, 0},
103: {ERE, ".\\b.", "==", 0, -1},
104: {ERE, ".\\b.", "ABA", 0, -1},
105: {ERE, "[^k]\\b[^k]", "AA~", 0, 1},
106: {ERE, "[^k]\\b[^k]", "=A=", 0, 0},
107: {ERE, "[^k]\\b[^k]", "Ak~kA~", 0, 4},
108: {ERE, "[^k]\\b[^k]", "==", 0, -1},
109: {ERE, "[^k]\\b[^k]", "ABA", 0, -1},
110: {ERE, "[^k]\\b[^k]", "Ak~", 0, -1},
111: {ERE, "[^k]\\b[^k]", "k=k", 0, -1},
112: {ERE, "[^C]\\b[^C]", "AA~", 0, 1},
113: {ERE, "[^C]\\b[^C]", "=A=", 0, 0},
114: {ERE, "[^C]\\b[^C]", "AC~CA~", 0, 4},
115: {ERE, "[^C]\\b[^C]", "==", 0, -1},
116: {ERE, "[^C]\\b[^C]", "ABA", 0, -1},
117: {ERE, "[^C]\\b[^C]", "AC~", 0, -1},
118: {ERE, "[^C]\\b[^C]", "C=C", 0, -1},
119: {ERE, "\\<(A|!|.B)", "A=AC", 0, 0},
120: {ERE, "\\<(A|!|.B)", "=AC", 0, 1},
121: {ERE, "\\<(A|!|.B)", "!AC", 0, 1},
122: {ERE, "\\<(A|!|.B)", "=AB", 0, 1},
123: {ERE, "\\<(A|!|.B)", "=CB", 0, 1},
124: {ERE, "\\<(A|!|.B)", "!CB", 0, 1},
125: {ERE, "\\<(A|!|.B)", "DA!C", 0, -1},
126: {ERE, "\\<(A|!|.B)", "D,B", 0, -1},
127: {ERE, "\\<(A|!|.B)", "!.C", 0, -1},
128: {ERE, "\\<(A|!|.B)", "BCB", 0, -1},
129: {ERE, "(A|\\<)(A|B|C)", "DAAD", 0, 1},
130: {ERE, "(A|\\<)(A|B|C)", "DABD", 0, 1},
131: {ERE, "(A|\\<)(A|B|C)", "AD", 0, 0},
132: {ERE, "(A|\\<)(A|B|C)", "C!", 0, 0},
133: {ERE, "(A|\\<)(A|B|C)", "D,B", 0, 2},
134: {ERE, "(A|\\<)(A|B|C)", "DA?A", 0, 3},
135: {ERE, "(A|\\<)(A|B|C)", "BBC", 0, 0},
136: {ERE, "(A|\\<)(A|B|C)", "DA", 0, -1},
137: {ERE, "(!|\\<)(!|=|~)", "A!=\\", 0, 1},
138: {ERE, "(!|\\<)(!|=|~)", "/!=A", 0, 1},
139: {ERE, "(!|\\<)(!|=|~)", "==!=", 0, 2},
140: {ERE, "(!|\\<)(!|=|~)", "==C~", 0, -1},
141: {ERE, "(!|\\<)(!|=|~)", "A=A", 0, -1},
142: {ERE, "(!|\\<)(!|=|~)", "=~=", 0, -1},
143: {ERE, "(!|\\<)(!|=|~)", "~!", 0, -1},
144: {ERE, "(!|\\<)(!|=|~)", "~=~", 0, -1},
145: {ERE, "(\\<|A.)[ABC]", "AC", 0, 0},
146: {ERE, "(\\<|A.)[ABC]", "=A", 0, 1},
147: {ERE, "(\\<|A.)[ABC]", "DACC", 0, 1},
148: {ERE, "(\\<|A.)[A~C]", "AC", 0, 0},
149: {ERE, "(\\<|A.)[A~C]", "=A", 0, 1},
150: {ERE, "(\\<|A.)[A~C]", "DACC", 0, 1},
151: {ERE, "(\\<|A.)[A~C]", "B!A=", 0, 2},
152: {ERE, "(\\<|A.)[A~C]", "B~C", 0, 2},
153: {ERE, ".\\<.", "=A=", 0, 0},
154: {ERE, ".\\<.", "AA~", 0, -1},
155: {ERE, ".\\<.", "==", 0, -1},
156: {ERE, ".\\<.", "ABA", 0, -1},
157: {ERE, "[^k]\\<[^k]", "=k=A=", 0, 2},
158: {ERE, "[^k]\\<[^k]", "kk~", 0, -1},
159: {ERE, "[^k]\\<[^k]", "==", 0, -1},
160: {ERE, "[^k]\\<[^k]", "ABA", 0, -1},
161: {ERE, "[^k]\\<[^k]", "=k=", 0, -1},
162: {ERE, "[^C]\\<[^C]", "=C=A=", 0, 2},
163: {ERE, "[^C]\\<[^C]", "CC~", 0, -1},
164: {ERE, "[^C]\\<[^C]", "==", 0, -1},
165: {ERE, "[^C]\\<[^C]", "ABA", 0, -1},
166: {ERE, "[^C]\\<[^C]", "=C=", 0, -1},
167: {ERE, ".\\B.", "ABA", 0, 0},
168: {ERE, ".\\B.", "=BDC", 0, 1},
169: {ERE, "[^k]\\B[^k]", "kkkABA", 0, 3},
170: {ERE, "[^k]\\B[^k]", "kBk", 0, -1},
171: {ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
172: {ERE, "[^C]\\B[^C]", "CBC", 0, -1},
173: {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
174: {ERE, ".(\\b|\\B).", "A=C", 0, 0},
175: {ERE, ".(\\b|\\B).", "ABC", 0, 0},
176: {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
177: {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
178: {ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
179: {ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
180: {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
181: {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
182: {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
183: {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
184: {ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
185: {ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
186: {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
187: {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
188: {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
189: {ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
190: {ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
191: {ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
192: {ERE, "\\b([A]|[!]|.B)", "=AB", 0, 1},
193: {ERE, "\\b([A]|[!]|.B)", "DA!C", 0, 2},
194: {ERE, "\\b([A]|[!]|.B)", "=CB", 0, 1},
195: {ERE, "\\b([A]|[!]|.B)", "!CB", 0, 1},
196: {ERE, "\\b([A]|[!]|.B)", "D,B", 0, 1},
197: {ERE, "\\b([A]|[!]|.B)", "!.C", 0, -1},
198: {ERE, "\\b([A]|[!]|.B)", "BCB", 0, -1},
199: {ERE, "([A]|\\b)([A]|[B]|[C])", "DAAD", 0, 1},
200: {ERE, "([A]|\\b)([A]|[B]|[C])", "DABD", 0, 1},
201: {ERE, "([A]|\\b)([A]|[B]|[C])", "AD", 0, 0},
202: {ERE, "([A]|\\b)([A]|[B]|[C])", "C!", 0, 0},
203: {ERE, "([A]|\\b)([A]|[B]|[C])", "D,B", 0, 2},
204: {ERE, "([A]|\\b)([A]|[B]|[C])", "DA?A", 0, 3},
205: {ERE, "([A]|\\b)([A]|[B]|[C])", "BBC", 0, 0},
206: {ERE, "([A]|\\b)([A]|[B]|[C])", "DA", 0, -1},
207: {ERE, "([!]|\\b)([!]|[=]|[~])", "A!=\\", 0, 1},
208: {ERE, "([!]|\\b)([!]|[=]|[~])", "/!=A", 0, 1},
209: {ERE, "([!]|\\b)([!]|[=]|[~])", "A=A", 0, 1},
210: {ERE, "([!]|\\b)([!]|[=]|[~])", "==!=", 0, 2},
211: {ERE, "([!]|\\b)([!]|[=]|[~])", "==C~", 0, 3},
212: {ERE, "([!]|\\b)([!]|[=]|[~])", "=~=", 0, -1},
213: {ERE, "([!]|\\b)([!]|[=]|[~])", "~!", 0, -1},
214: {ERE, "([!]|\\b)([!]|[=]|[~])", "~=~", 0, -1},
215: {ERE, "\\<([A]|[!]|.B)", "A=AC", 0, 0},
216: {ERE, "\\<([A]|[!]|.B)", "=AC", 0, 1},
217: {ERE, "\\<([A]|[!]|.B)", "!AC", 0, 1},
218: {ERE, "\\<([A]|[!]|.B)", "=AB", 0, 1},
219: {ERE, "\\<([A]|[!]|.B)", "=CB", 0, 1},
220: {ERE, "\\<([A]|[!]|.B)", "!CB", 0, 1},
221: {ERE, "\\<([A]|[!]|.B)", "DA!C", 0, -1},
222: {ERE, "\\<([A]|[!]|.B)", "D,B", 0, -1},
223: {ERE, "\\<([A]|[!]|.B)", "!.C", 0, -1},
224: {ERE, "\\<([A]|[!]|.B)", "BCB", 0, -1},
225: {ERE, "([A]|\\<)([A]|[B]|[C])", "DAAD", 0, 1},
226: {ERE, "([A]|\\<)([A]|[B]|[C])", "DABD", 0, 1},
227: {ERE, "([A]|\\<)([A]|[B]|[C])", "AD", 0, 0},
228: {ERE, "([A]|\\<)([A]|[B]|[C])", "C!", 0, 0},
229: {ERE, "([A]|\\<)([A]|[B]|[C])", "D,B", 0, 2},
230: {ERE, "([A]|\\<)([A]|[B]|[C])", "DA?A", 0, 3},
231: {ERE, "([A]|\\<)([A]|[B]|[C])", "BBC", 0, 0},
232: {ERE, "([A]|\\<)([A]|[B]|[C])", "DA", 0, -1},
233: {ERE, "([!]|\\<)([!=]|[~])", "A!=\\", 0, 1},
234: {ERE, "([!]|\\<)([!=]|[~])", "/!=A", 0, 1},
235: {ERE, "([!]|\\<)([!=]|[~])", "==!=", 0, 2},
236: {ERE, "([!]|\\<)([!=]|[~])", "==C~", 0, -1},
237: {ERE, "([!]|\\<)([!=]|[~])", "A=A", 0, -1},
238: {ERE, "([!]|\\<)([!=]|[~])", "=~=", 0, -1},
239: {ERE, "([!]|\\<)([!=]|[~])", "~!", 0, -1},
240: {ERE, "([!]|\\<)([!=]|[~])", "~=~", 0, -1},
241: {ERE, "(\\<|[A].)[ABC]", "AC", 0, 0},
242: {ERE, "(\\<|[A].)[ABC]", "=A", 0, 1},
243: {ERE, "(\\<|[A].)[ABC]", "DACC", 0, 1},
244: {ERE, "(\\<|[A].)[A~C]", "AC", 0, 0},
245: {ERE, "(\\<|[A].)[A~C]", "=A", 0, 1},
246: {ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1},
247: {ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2},
248: {ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2},
249: {ERE, "^[^A]*\\bB", "==B", 0, 0},
250: {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0},
251: {ERE, "[^A]*\\bB", "==B", 2, 2}
252: };
253:
254: int
255: do_one_test (const struct test_s *test, const char *fail)
256: {
257: int res;
258: const char *err;
259: struct re_pattern_buffer regbuf;
260:
261: re_set_syntax (test->syntax);
262: memset (®buf, '\0', sizeof (regbuf));
263: err = re_compile_pattern (test->pattern, strlen (test->pattern),
264: ®buf);
265: if (err != NULL)
266: {
267: printf ("%sre_compile_pattern \"%s\" failed: %s\n", fail, test->pattern,
268: err);
269: return 1;
270: }
271:
272: res = re_search (®buf, test->string, strlen (test->string),
273: test->start, strlen (test->string) - test->start, NULL);
274: if (res != test->res)
275: {
276: printf ("%sre_search \"%s\" \"%s\" failed: %d (expected %d)\n",
277: fail, test->pattern, test->string, res, test->res);
278: regfree (®buf);
279: return 1;
280: }
281:
282: if (test->res > 0 && test->start == 0)
283: {
284: res = re_search (®buf, test->string, strlen (test->string),
285: test->res, strlen (test->string) - test->res, NULL);
286: if (res != test->res)
287: {
288: printf ("%sre_search from expected \"%s\" \"%s\" failed: %d (expected %d)\n",
289: fail, test->pattern, test->string, res, test->res);
290: regfree (®buf);
291: return 1;
292: }
293: }
294:
295: regfree (®buf);
296: return 0;
297: }
298:
299: static char *
300: replace (char *p, char c)
301: {
302: switch (c)
303: {
304:
305: case 'A': *p++ = '\xc3'; *p++ = '\x84'; break;
306:
307: case 'B': *p++ = '\xc3'; *p++ = '\x96'; break;
308:
309: case 'C': *p++ = '\xc3'; *p++ = '\x9c'; break;
310:
311: case 'D': *p++ = '\xc3'; *p++ = '\xa4'; break;
312:
313: case '!': *p++ = '\xc3'; *p++ = '\x97'; break;
314:
315: case '=': *p++ = '\xe2'; *p++ = '\x80'; *p++ = '\x94'; break;
316:
317: case '~': *p++ = '\xf0'; *p++ = '\x9d'; *p++ = '\x85'; *p++ = '\x9e';
318: break;
319: }
320: return p;
321: }
322:
323: int
324: do_mb_tests (const struct test_s *test)
325: {
326: int i, j;
327: struct test_s t;
328: const char *const chars = "ABCD!=~";
329: char repl[8], *p;
330: char pattern[strlen (test->pattern) * 4 + 1];
331: char string[strlen (test->string) * 4 + 1];
332: char fail[8 + sizeof ("UTF-8 ")];
333:
334: t = *test;
335: t.pattern = pattern;
336: t.string = string;
337: strcpy (fail, "UTF-8 ");
338: for (i = 1; i < 128; ++i)
339: {
340: p = repl;
341: for (j = 0; j < 7; ++j)
342: if (i & (1 << j))
343: {
344: if (!strchr (test->pattern, chars[j])
345: && !strchr (test->string, chars[j]))
346: break;
347: *p++ = chars[j];
348: }
349: if (j < 7)
350: continue;
351: *p = '\0';
352:
353: for (j = 0, p = pattern; test->pattern[j]; ++j)
354: if (strchr (repl, test->pattern[j]))
355: p = replace (p, test->pattern[j]);
356: else if (test->pattern[j] == '\\' && test->pattern[j + 1])
357: {
358: *p++ = test->pattern[j++];
359: *p++ = test->pattern[j];
360: }
361: else
362: *p++ = test->pattern[j];
363: *p = '\0';
364:
365: t.start = test->start;
366: t.res = test->res;
367:
368: for (j = 0, p = string; test->string[j]; ++j)
369: if (strchr (repl, test->string[j]))
370: {
371: char *d = replace (p, test->string[j]);
372: if (test->start > j)
373: t.start += d - p - 1;
374: if (test->res > j)
375: t.res += d - p - 1;
376: p = d;
377: }
378: else
379: *p++ = test->string[j];
380: *p = '\0';
381:
382: p = stpcpy (fail + strlen ("UTF-8 "), repl);
383: *p++ = ' ';
384: *p = '\0';
385:
386: if (do_one_test (&t, fail))
387: return 1;
388: }
389: return 0;
390: }
391:
392: int
393: main (void)
394: {
395: size_t i;
396: int ret = 0;
397:
398: mtrace ();
399:
400: for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
401: {
402: if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
403: {
404: puts ("setlocale de_DE.ISO-8859-1 failed");
405: ret = 1;
406: }
407: ret |= do_one_test (&tests[i], "");
408: if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
409: {
410: puts ("setlocale de_DE.UTF-8 failed");
411: ret = 1;
412: }
413: ret |= do_one_test (&tests[i], "UTF-8 ");
414: ret |= do_mb_tests (&tests[i]);
415: }
416:
417: return ret;
418: }