
1: ; 2: ; 3: ; this file contains a script of tests to run through regress.exe 4: ; 5: ; comments start with a semicolon and proceed to the end of the line 6: ; 7: ; changes to regular expression compile flags start with a "-" as the first 8: ; non-whitespace character and consist of a list of the printable names 9: ; of the flags, for example "match_default" 10: ; 11: ; Other lines contain a test to perform using the current flag status 12: ; the first token contains the expression to compile, the second the string 13: ; to match it against. If the second string is "!" then the expression should 14: ; not compile, that is the first string is an invalid regular expression. 15: ; This is then followed by a list of integers that specify what should match, 16: ; each pair represents the starting and ending positions of a subexpression 17: ; starting with the zeroth subexpression (the whole match). 18: ; A value of -1 indicates that the subexpression should not take part in the 19: ; match at all, if the first value is -1 then no part of the expression should 20: ; match the string. 21: ; 22: ; Tests taken from BOOST testsuite and adapted to glibc regex. 23: ; 24: ; Boost Software License - Version 1.0 - August 17th, 2003 25: ; 26: ; Permission is hereby granted, free of charge, to any person or organization 27: ; obtaining a copy of the software and accompanying documentation covered by 28: ; this license (the "Software") to use, reproduce, display, distribute, 29: ; execute, and transmit the Software, and to prepare derivative works of the 30: ; Software, and to permit third-parties to whom the Software is furnished to 31: ; do so, all subject to the following: 32: ; 33: ; The copyright notices in the Software and this entire statement, including 34: ; the above license grant, this restriction and the following disclaimer, 35: ; must be included in all copies of the Software, in whole or in part, and 36: ; all derivative works of the Software, unless such copies or derivative 37: ; works are solely in the form of machine-executable object code generated by 38: ; a source language processor. 39: ; 40: ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 41: ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 42: ; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 43: ; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 44: ; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 45: ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 46: ; DEALINGS IN THE SOFTWARE. 47: ; 48: 49: - match_default normal REG_EXTENDED 50: 51: ; 52: ; try some really simple literals: 53: a a 0 1 54: Z Z 0 1 55: Z aaa -1 -1 56: Z xxxxZZxxx 4 5 57: 58: ; and some simple brackets: 59: (a) zzzaazz 3 4 3 4 60: () zzz 0 0 0 0 61: () "" 0 0 0 0 62: ( ! 63: ) ) 0 1 64: (aa ! 65: aa) baa)b 1 4 66: a b -1 -1 67: \(\) () 0 2 68: \(a\) (a) 0 3 69: \() () 0 2 70: (\) ! 71: p(a)rameter ABCparameterXYZ 3 12 4 5 72: [pq](a)rameter ABCparameterXYZ 3 12 4 5 73: 74: ; now try escaped brackets: 75: - match_default bk_parens REG_BASIC 76: \(a\) zzzaazz 3 4 3 4 77: \(\) zzz 0 0 0 0 78: \(\) "" 0 0 0 0 79: \( ! 80: \) ! 81: \(aa ! 82: aa\) ! 83: () () 0 2 84: (a) (a) 0 3 85: (\) ! 86: \() ! 87: 88: ; now move on to "." wildcards 89: - match_default normal REG_EXTENDED REG_STARTEND 90: . a 0 1 91: . \n 0 1 92: . \r 0 1 93: . \0 0 1 94: 95: ; 96: ; now move on to the repetion ops, 97: ; starting with operator * 98: - match_default normal REG_EXTENDED 99: a* b 0 0 100: ab* a 0 1 101: ab* ab 0 2 102: ab* sssabbbbbbsss 3 10 103: ab*c* a 0 1 104: ab*c* abbb 0 4 105: ab*c* accc 0 4 106: ab*c* abbcc 0 5 107: *a ! 108: \<* ! 109: \>* ! 110: \n* \n\n 0 2 111: \** ** 0 2 112: \* * 0 1 113: 114: ; now try operator + 115: ab+ a -1 -1 116: ab+ ab 0 2 117: ab+ sssabbbbbbsss 3 10 118: ab+c+ a -1 -1 119: ab+c+ abbb -1 -1 120: ab+c+ accc -1 -1 121: ab+c+ abbcc 0 5 122: +a ! 123: \<+ ! 124: \>+ ! 125: \n+ \n\n 0 2 126: \+ + 0 1 127: \+ ++ 0 1 128: \++ ++ 0 2 129: 130: ; now try operator ? 131: - match_default normal REG_EXTENDED 132: a? b 0 0 133: ab? a 0 1 134: ab? ab 0 2 135: ab? sssabbbbbbsss 3 5 136: ab?c? a 0 1 137: ab?c? abbb 0 2 138: ab?c? accc 0 2 139: ab?c? abcc 0 3 140: ?a ! 141: \<? ! 142: \>? ! 143: \n? \n\n 0 1 144: \? ? 0 1 145: \? ?? 0 1 146: \?? ?? 0 1 147: 148: ; now try operator {} 149: - match_default normal REG_EXTENDED 150: a{2} a -1 -1 151: a{2} aa 0 2 152: a{2} aaa 0 2 153: a{2,} a -1 -1 154: a{2,} aa 0 2 155: a{2,} aaaaa 0 5 156: a{2,4} a -1 -1 157: a{2,4} aa 0 2 158: a{2,4} aaa 0 3 159: a{2,4} aaaa 0 4 160: a{2,4} aaaaa 0 4 161: a{} ! 162: a{2 ! 163: a} a} 0 2 164: \{\} {} 0 2 165: 166: - match_default normal REG_BASIC 167: a\{2\} a -1 -1 168: a\{2\} aa 0 2 169: a\{2\} aaa 0 2 170: a\{2,\} a -1 -1 171: a\{2,\} aa 0 2 172: a\{2,\} aaaaa 0 5 173: a\{2,4\} a -1 -1 174: a\{2,4\} aa 0 2 175: a\{2,4\} aaa 0 3 176: a\{2,4\} aaaa 0 4 177: a\{2,4\} aaaaa 0 4 178: {} {} 0 2 179: 180: ; now test the alternation operator | 181: - match_default normal REG_EXTENDED 182: a|b a 0 1 183: a|b b 0 1 184: a(b|c) ab 0 2 1 2 185: a(b|c) ac 0 2 1 2 186: a(b|c) ad -1 -1 -1 -1 187: a\| a| 0 2 188: 189: ; now test the set operator [] 190: - match_default normal REG_EXTENDED 191: ; try some literals first 192: [abc] a 0 1 193: [abc] b 0 1 194: [abc] c 0 1 195: [abc] d -1 -1 196: [^bcd] a 0 1 197: [^bcd] b -1 -1 198: [^bcd] d -1 -1 199: [^bcd] e 0 1 200: a[b]c abc 0 3 201: a[ab]c abc 0 3 202: a[^ab]c adc 0 3 203: a[]b]c a]c 0 3 204: a[[b]c a[c 0 3 205: a[-b]c a-c 0 3 206: a[^]b]c adc 0 3 207: a[^-b]c adc 0 3 208: a[b-]c a-c 0 3 209: a[b ! 210: a[] ! 211: 212: ; then some ranges 213: [b-e] a -1 -1 214: [b-e] b 0 1 215: [b-e] e 0 1 216: [b-e] f -1 -1 217: [^b-e] a 0 1 218: [^b-e] b -1 -1 219: [^b-e] e -1 -1 220: [^b-e] f 0 1 221: a[1-3]c a2c 0 3 222: a[3-1]c ! 223: a[1-3-5]c ! 224: a[1- ! 225: 226: ; and some classes 227: a[[:alpha:]]c abc 0 3 228: a[[:unknown:]]c ! 229: a[[: ! 230: a[[:alpha ! 231: a[[:alpha:] ! 232: a[[:alpha,:] ! 233: a[[:]:]]b ! 234: a[[:-:]]b ! 235: a[[:alph:]] ! 236: a[[:alphabet:]] ! 237: [[:alnum:]]+ -%@a0X_- 3 6 238: [[:alpha:]]+ -%@aX_0- 3 5 239: [[:blank:]]+ "a \tb" 1 4 240: [[:cntrl:]]+ a\n\tb 1 3 241: [[:digit:]]+ a019b 1 4 242: [[:graph:]]+ " a%b " 1 4 243: [[:lower:]]+ AabC 1 3 244: ; This test fails with STLPort, disable for now as this is a corner case anyway... 245: ;[[:print:]]+ "\na b\n" 1 4 246: [[:punct:]]+ " %-&\t" 1 4 247: [[:space:]]+ "a \n\t\rb" 1 5 248: [[:upper:]]+ aBCd 1 3 249: [[:xdigit:]]+ p0f3Cx 1 5 250: 251: ; now test flag settings: 252: - escape_in_lists REG_NO_POSIX_TEST 253: [\n] \n 0 1 254: - REG_NO_POSIX_TEST 255: 256: ; line anchors 257: - match_default normal REG_EXTENDED 258: ^ab ab 0 2 259: ^ab xxabxx -1 -1 260: ab$ ab 0 2 261: ab$ abxx -1 -1 262: - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL 263: ^ab ab -1 -1 264: ^ab xxabxx -1 -1 265: ab$ ab -1 -1 266: ab$ abxx -1 -1 267: 268: ; back references 269: - match_default normal REG_PERL 270: a(b)\2c ! 271: a(b\1)c ! 272: a(b*)c\1d abbcbbd 0 7 1 3 273: a(b*)c\1d abbcbd -1 -1 274: a(b*)c\1d abbcbbbd -1 -1 275: ^(.)\1 abc -1 -1 276: a([bc])\1d abcdabbd 4 8 5 6 277: ; strictly speaking this is at best ambiguous, at worst wrong, this is what most 278: ; re implimentations will match though. 279: a(([bc])\2)*d abbccd 0 6 3 5 3 4 280: 281: a(([bc])\2)*d abbcbd -1 -1 282: a((b)*\2)*d abbbd 0 5 1 4 2 3 283: ; perl only: 284: (ab*)[ab]*\1 ababaaa 0 7 0 1 285: (a)\1bcd aabcd 0 5 0 1 286: (a)\1bc*d aabcd 0 5 0 1 287: (a)\1bc*d aabd 0 4 0 1 288: (a)\1bc*d aabcccd 0 7 0 1 289: (a)\1bc*[ce]d aabcccd 0 7 0 1 290: ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 291: 292: ; posix only: 293: - match_default extended REG_EXTENDED 294: (ab*)[ab]*\1 ababaaa 0 7 0 1 295: 296: ; 297: ; word operators: 298: \w a 0 1 299: \w z 0 1 300: \w A 0 1 301: \w Z 0 1 302: \w _ 0 1 303: \w } -1 -1 304: \w ` -1 -1 305: \w [ -1 -1 306: \w @ -1 -1 307: ; non-word: 308: \W a -1 -1 309: \W z -1 -1 310: \W A -1 -1 311: \W Z -1 -1 312: \W _ -1 -1 313: \W } 0 1 314: \W ` 0 1 315: \W [ 0 1 316: \W @ 0 1 317: ; word start: 318: \<abcd " abcd" 2 6 319: \<ab cab -1 -1 320: \<ab "\nab" 1 3 321: \<tag ::tag 2 5 322: ;word end: 323: abc\> abc 0 3 324: abc\> abcd -1 -1 325: abc\> abc\n 0 3 326: abc\> abc:: 0 3 327: ; word boundary: 328: \babcd " abcd" 2 6 329: \bab cab -1 -1 330: \bab "\nab" 1 3 331: \btag ::tag 2 5 332: abc\b abc 0 3 333: abc\b abcd -1 -1 334: abc\b abc\n 0 3 335: abc\b abc:: 0 3 336: ; within word: 337: \B ab 1 1 338: a\Bb ab 0 2 339: a\B ab 0 1 340: a\B a -1 -1 341: a\B "a " -1 -1 342: 343: ; 344: ; buffer operators: 345: \`abc abc 0 3 346: \`abc \nabc -1 -1 347: \`abc " abc" -1 -1 348: abc\' abc 0 3 349: abc\' abc\n -1 -1 350: abc\' "abc " -1 -1 351: 352: ; 353: ; now follows various complex expressions designed to try and bust the matcher: 354: a(((b)))c abc 0 3 1 2 1 2 1 2 355: a(b|(c))d abd 0 3 1 2 -1 -1 356: a(b|(c))d acd 0 3 1 2 1 2 357: a(b*|c)d abbd 0 4 1 3 358: ; just gotta have one DFA-buster, of course 359: a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 360: ; and an inline expansion in case somebody gets tricky 361: a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 362: ; and in case somebody just slips in an NFA... 363: a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 364: ; one really big one 365: 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 366: ; fish for problems as brackets go past 8 367: [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 368: [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 369: [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 370: [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 371: ; and as parenthesis go past 9: 372: (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 373: (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 374: (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 375: (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 376: (a)d|(b)c abc 1 3 -1 -1 1 2 377: _+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 378: 379: ; subtleties of matching 380: ;a(b)?c\1d acd 0 3 -1 -1 381: ; POSIX is about the following test: 382: a(b)?c\1d acd -1 -1 -1 -1 383: a(b?c)+d accd 0 4 2 3 384: (wee|week)(knights|night) weeknights 0 10 0 3 3 10 385: .* abc 0 3 386: a(b|(c))d abd 0 3 1 2 -1 -1 387: a(b|(c))d acd 0 3 1 2 1 2 388: a(b*|c|e)d abbd 0 4 1 3 389: a(b*|c|e)d acd 0 3 1 2 390: a(b*|c|e)d ad 0 2 1 1 391: a(b?)c abc 0 3 1 2 392: a(b?)c ac 0 2 1 1 393: a(b+)c abc 0 3 1 2 394: a(b+)c abbbc 0 5 1 4 395: a(b*)c ac 0 2 1 1 396: (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 397: a([bc]?)c abc 0 3 1 2 398: a([bc]?)c ac 0 2 1 1 399: a([bc]+)c abc 0 3 1 2 400: a([bc]+)c abcc 0 4 1 3 401: a([bc]+)bc abcbc 0 5 1 3 402: a(bb+|b)b abb 0 3 1 2 403: a(bbb+|bb+|b)b abb 0 3 1 2 404: a(bbb+|bb+|b)b abbb 0 4 1 3 405: a(bbb+|bb+|b)bb abbb 0 4 1 2 406: (.*).* abcdef 0 6 0 6 407: (a*)* bc 0 0 0 0 408: xyx*xz xyxxxxyxxxz 5 11 409: 410: ; do we get the right subexpression when it is used more than once? 411: a(b|c)*d ad 0 2 -1 -1 412: a(b|c)*d abcd 0 4 2 3 413: a(b|c)+d abd 0 3 1 2 414: a(b|c)+d abcd 0 4 2 3 415: a(b|c?)+d ad 0 2 1 1 416: a(b|c){0,0}d ad 0 2 -1 -1 417: a(b|c){0,1}d ad 0 2 -1 -1 418: a(b|c){0,1}d abd 0 3 1 2 419: a(b|c){0,2}d ad 0 2 -1 -1 420: a(b|c){0,2}d abcd 0 4 2 3 421: a(b|c){0,}d ad 0 2 -1 -1 422: a(b|c){0,}d abcd 0 4 2 3 423: a(b|c){1,1}d abd 0 3 1 2 424: a(b|c){1,2}d abd 0 3 1 2 425: a(b|c){1,2}d abcd 0 4 2 3 426: a(b|c){1,}d abd 0 3 1 2 427: a(b|c){1,}d abcd 0 4 2 3 428: a(b|c){2,2}d acbd 0 4 2 3 429: a(b|c){2,2}d abcd 0 4 2 3 430: a(b|c){2,4}d abcd 0 4 2 3 431: a(b|c){2,4}d abcbd 0 5 3 4 432: a(b|c){2,4}d abcbcd 0 6 4 5 433: a(b|c){2,}d abcd 0 4 2 3 434: a(b|c){2,}d abcbd 0 5 3 4 435: ; perl only: these conflict with the POSIX test below 436: ;a(b|c?)+d abcd 0 4 3 3 437: ;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 438: ;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 439: 440: ; posix only: 441: - match_default extended REG_EXTENDED REG_STARTEND 442: 443: a(b|c?)+d abcd 0 4 2 3 444: a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 445: a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 446: a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 447: a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1 448: a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3 449: a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1 450: a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3 451: a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1 452: 453: - match_default normal REG_PERL 454: ; try to match C++ syntax elements: 455: ; line comment: 456: //[^\n]* "++i //here is a line comment\n" 4 28 457: ; block comment: 458: /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 459: /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 460: /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 461: /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 462: /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 463: /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 464: ; preprossor directives: 465: ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 466: ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 467: ; perl only: 468: ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 469: ; literals: 470: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 471: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 472: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 473: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 474: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 475: ; strings: 476: '([^\\']|\\.)*' '\\x3A' 0 6 4 5 477: '([^\\']|\\.)*' '\\'' 0 4 1 3 478: '([^\\']|\\.)*' '\\n' 0 4 1 3 479: 480: ; finally try some case insensitive matches: 481: - match_default normal REG_EXTENDED REG_ICASE 482: ; upper and lower have no meaning here so they fail, however these 483: ; may compile with other libraries... 484: ;[[:lower:]] ! 485: ;[[:upper:]] ! 486: 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 487: 488: ; known and suspected bugs: 489: - match_default normal REG_EXTENDED 490: \( ( 0 1 491: \) ) 0 1 492: \$ $ 0 1 493: \^ ^ 0 1 494: \. . 0 1 495: \* * 0 1 496: \+ + 0 1 497: \? ? 0 1 498: \[ [ 0 1 499: \] ] 0 1 500: \| | 0 1 501: \\ \\ 0 1 502: # # 0 1 503: \# # 0 1 504: a- a- 0 2 505: \- - 0 1 506: \{ { 0 1 507: \} } 0 1 508: 0 0 0 1 509: 1 1 0 1 510: 9 9 0 1 511: b b 0 1 512: B B 0 1 513: < < 0 1 514: > > 0 1 515: w w 0 1 516: W W 0 1 517: ` ` 0 1 518: ' ' 0 1 519: \n \n 0 1 520: , , 0 1 521: a a 0 1 522: f f 0 1 523: n n 0 1 524: r r 0 1 525: t t 0 1 526: v v 0 1 527: c c 0 1 528: x x 0 1 529: : : 0 1 530: (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 531: 532: - match_default normal REG_EXTENDED REG_ICASE 533: a A 0 1 534: A a 0 1 535: [abc]+ abcABC 0 6 536: [ABC]+ abcABC 0 6 537: [a-z]+ abcABC 0 6 538: [A-Z]+ abzANZ 0 6 539: [a-Z]+ abzABZ 0 6 540: [A-z]+ abzABZ 0 6 541: [[:lower:]]+ abyzABYZ 0 8 542: [[:upper:]]+ abzABZ 0 6 543: [[:alpha:]]+ abyzABYZ 0 8 544: [[:alnum:]]+ 09abyzABYZ 0 10 545: 546: ; word start: 547: \<abcd " abcd" 2 6 548: \<ab cab -1 -1 549: \<ab "\nab" 1 3 550: \<tag ::tag 2 5 551: ;word end: 552: abc\> abc 0 3 553: abc\> abcd -1 -1 554: abc\> abc\n 0 3 555: abc\> abc:: 0 3 556: 557: ; collating elements and rewritten set code: 558: - match_default normal REG_EXTENDED REG_STARTEND 559: ;[[.zero.]] 0 0 1 560: ;[[.one.]] 1 0 1 561: ;[[.two.]] 2 0 1 562: ;[[.three.]] 3 0 1 563: [[.a.]] baa 1 2 564: ;[[.right-curly-bracket.]] } 0 1 565: ;[[.NUL.]] \0 0 1 566: [[:<:]z] ! 567: [a[:>:]] ! 568: [[=a=]] a 0 1 569: ;[[=right-curly-bracket=]] } 0 1 570: - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 571: [[.A.]] A 0 1 572: [[.A.]] a 0 1 573: [[.A.]-b]+ AaBb 0 4 574: [A-[.b.]]+ AaBb 0 4 575: [[.a.]-B]+ AaBb 0 4 576: [a-[.B.]]+ AaBb 0 4 577: - match_default normal REG_EXTENDED REG_STARTEND 578: [[.a.]-c]+ abcd 0 3 579: [a-[.c.]]+ abcd 0 3 580: [[:alpha:]-a] ! 581: [a-[:alpha:]] ! 582: 583: ; try mutli-character ligatures: 584: ;[[.ae.]] ae 0 2 585: ;[[.ae.]] aE -1 -1 586: ;[[.AE.]] AE 0 2 587: ;[[.Ae.]] Ae 0 2 588: ;[[.ae.]-b] a -1 -1 589: ;[[.ae.]-b] b 0 1 590: ;[[.ae.]-b] ae 0 2 591: ;[a-[.ae.]] a 0 1 592: ;[a-[.ae.]] b -1 -1 593: ;[a-[.ae.]] ae 0 2 594: - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE 595: ;[[.ae.]] AE 0 2 596: ;[[.ae.]] Ae 0 2 597: ;[[.AE.]] Ae 0 2 598: ;[[.Ae.]] aE 0 2 599: ;[[.AE.]-B] a -1 -1 600: ;[[.Ae.]-b] b 0 1 601: ;[[.Ae.]-b] B 0 1 602: ;[[.ae.]-b] AE 0 2 603: 604: - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST 605: \s+ "ab ab" 2 5 606: \S+ " abc " 2 5 607: 608: - match_default normal REG_EXTENDED REG_STARTEND 609: \`abc abc 0 3 610: \`abc aabc -1 -1 611: abc\' abc 0 3 612: abc\' abcd -1 -1 613: abc\' abc\n\n -1 -1 614: abc\' abc 0 3 615: 616: ; extended repeat checking to exercise new algorithms: 617: ab.*xy abxy_ 0 4 618: ab.*xy ab_xy_ 0 5 619: ab.*xy abxy 0 4 620: ab.*xy ab_xy 0 5 621: ab.* ab 0 2 622: ab.* ab__ 0 4 623: 624: ab.{2,5}xy ab__xy_ 0 6 625: ab.{2,5}xy ab____xy_ 0 8 626: ab.{2,5}xy ab_____xy_ 0 9 627: ab.{2,5}xy ab__xy 0 6 628: ab.{2,5}xy ab_____xy 0 9 629: ab.{2,5} ab__ 0 4 630: ab.{2,5} ab_______ 0 7 631: ab.{2,5}xy ab______xy -1 -1 632: ab.{2,5}xy ab_xy -1 -1 633: 634: ab.*?xy abxy_ 0 4 635: ab.*?xy ab_xy_ 0 5 636: ab.*?xy abxy 0 4 637: ab.*?xy ab_xy 0 5 638: ab.*? ab 0 2 639: ab.*? ab__ 0 4 640: 641: ab.{2,5}?xy ab__xy_ 0 6 642: ab.{2,5}?xy ab____xy_ 0 8 643: ab.{2,5}?xy ab_____xy_ 0 9 644: ab.{2,5}?xy ab__xy 0 6 645: ab.{2,5}?xy ab_____xy 0 9 646: ab.{2,5}? ab__ 0 4 647: ab.{2,5}? ab_______ 0 7 648: ab.{2,5}?xy ab______xy -1 -1 649: ab.{2,5}xy ab_xy -1 -1 650: 651: ; again but with slower algorithm variant: 652: - match_default REG_EXTENDED 653: ; now again for single character repeats: 654: 655: ab_*xy abxy_ 0 4 656: ab_*xy ab_xy_ 0 5 657: ab_*xy abxy 0 4 658: ab_*xy ab_xy 0 5 659: ab_* ab 0 2 660: ab_* ab__ 0 4 661: 662: ab_{2,5}xy ab__xy_ 0 6 663: ab_{2,5}xy ab____xy_ 0 8 664: ab_{2,5}xy ab_____xy_ 0 9 665: ab_{2,5}xy ab__xy 0 6 666: ab_{2,5}xy ab_____xy 0 9 667: ab_{2,5} ab__ 0 4 668: ab_{2,5} ab_______ 0 7 669: ab_{2,5}xy ab______xy -1 -1 670: ab_{2,5}xy ab_xy -1 -1 671: 672: ab_*?xy abxy_ 0 4 673: ab_*?xy ab_xy_ 0 5 674: ab_*?xy abxy 0 4 675: ab_*?xy ab_xy 0 5 676: ab_*? ab 0 2 677: ab_*? ab__ 0 4 678: 679: ab_{2,5}?xy ab__xy_ 0 6 680: ab_{2,5}?xy ab____xy_ 0 8 681: ab_{2,5}?xy ab_____xy_ 0 9 682: ab_{2,5}?xy ab__xy 0 6 683: ab_{2,5}?xy ab_____xy 0 9 684: ab_{2,5}? ab__ 0 4 685: ab_{2,5}? ab_______ 0 7 686: ab_{2,5}?xy ab______xy -1 -1 687: ab_{2,5}xy ab_xy -1 -1 688: 689: ; and again for sets: 690: ab[_,;]*xy abxy_ 0 4 691: ab[_,;]*xy ab_xy_ 0 5 692: ab[_,;]*xy abxy 0 4 693: ab[_,;]*xy ab_xy 0 5 694: ab[_,;]* ab 0 2 695: ab[_,;]* ab__ 0 4 696: 697: ab[_,;]{2,5}xy ab__xy_ 0 6 698: ab[_,;]{2,5}xy ab____xy_ 0 8 699: ab[_,;]{2,5}xy ab_____xy_ 0 9 700: ab[_,;]{2,5}xy ab__xy 0 6 701: ab[_,;]{2,5}xy ab_____xy 0 9 702: ab[_,;]{2,5} ab__ 0 4 703: ab[_,;]{2,5} ab_______ 0 7 704: ab[_,;]{2,5}xy ab______xy -1 -1 705: ab[_,;]{2,5}xy ab_xy -1 -1 706: 707: ab[_,;]*?xy abxy_ 0 4 708: ab[_,;]*?xy ab_xy_ 0 5 709: ab[_,;]*?xy abxy 0 4 710: ab[_,;]*?xy ab_xy 0 5 711: ab[_,;]*? ab 0 2 712: ab[_,;]*? ab__ 0 4 713: 714: ab[_,;]{2,5}?xy ab__xy_ 0 6 715: ab[_,;]{2,5}?xy ab____xy_ 0 8 716: ab[_,;]{2,5}?xy ab_____xy_ 0 9 717: ab[_,;]{2,5}?xy ab__xy 0 6 718: ab[_,;]{2,5}?xy ab_____xy 0 9 719: ab[_,;]{2,5}? ab__ 0 4 720: ab[_,;]{2,5}? ab_______ 0 7 721: ab[_,;]{2,5}?xy ab______xy -1 -1 722: ab[_,;]{2,5}xy ab_xy -1 -1 723: 724: ; and again for tricky sets with digraphs: 725: ;ab[_[.ae.]]*xy abxy_ 0 4 726: ;ab[_[.ae.]]*xy ab_xy_ 0 5 727: ;ab[_[.ae.]]*xy abxy 0 4 728: ;ab[_[.ae.]]*xy ab_xy 0 5 729: ;ab[_[.ae.]]* ab 0 2 730: ;ab[_[.ae.]]* ab__ 0 4 731: 732: ;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 733: ;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 734: ;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 735: ;ab[_[.ae.]]{2,5}xy ab__xy 0 6 736: ;ab[_[.ae.]]{2,5}xy ab_____xy 0 9 737: ;ab[_[.ae.]]{2,5} ab__ 0 4 738: ;ab[_[.ae.]]{2,5} ab_______ 0 7 739: ;ab[_[.ae.]]{2,5}xy ab______xy -1 -1 740: ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 741: 742: ;ab[_[.ae.]]*?xy abxy_ 0 4 743: ;ab[_[.ae.]]*?xy ab_xy_ 0 5 744: ;ab[_[.ae.]]*?xy abxy 0 4 745: ;ab[_[.ae.]]*?xy ab_xy 0 5 746: ;ab[_[.ae.]]*? ab 0 2 747: ;ab[_[.ae.]]*? ab__ 0 2 748: 749: ;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 750: ;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 751: ;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 752: ;ab[_[.ae.]]{2,5}?xy ab__xy 0 6 753: ;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 754: ;ab[_[.ae.]]{2,5}? ab__ 0 4 755: ;ab[_[.ae.]]{2,5}? ab_______ 0 4 756: ;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 757: ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 758: 759: ; new bugs detected in spring 2003: 760: - normal match_continuous REG_NO_POSIX_TEST 761: b abc 1 2 762: 763: () abc 0 0 0 0 764: ^() abc 0 0 0 0 765: ^()+ abc 0 0 0 0 766: ^(){1} abc 0 0 0 0 767: ^(){2} abc 0 0 0 0 768: ^((){2}) abc 0 0 0 0 0 0 769: () "" 0 0 0 0 770: ()\1 "" 0 0 0 0 771: ()\1 a 0 0 0 0 772: a()\1b ab 0 2 1 1 773: a()b\1 ab 0 2 1 1 774: 775: ; subtleties of matching with no sub-expressions marked 776: - normal match_nosubs REG_NO_POSIX_TEST 777: a(b?c)+d accd 0 4 778: (wee|week)(knights|night) weeknights 0 10 779: .* abc 0 3 780: a(b|(c))d abd 0 3 781: a(b|(c))d acd 0 3 782: a(b*|c|e)d abbd 0 4 783: a(b*|c|e)d acd 0 3 784: a(b*|c|e)d ad 0 2 785: a(b?)c abc 0 3 786: a(b?)c ac 0 2 787: a(b+)c abc 0 3 788: a(b+)c abbbc 0 5 789: a(b*)c ac 0 2 790: (a|ab)(bc([de]+)f|cde) abcdef 0 6 791: a([bc]?)c abc 0 3 792: a([bc]?)c ac 0 2 793: a([bc]+)c abc 0 3 794: a([bc]+)c abcc 0 4 795: a([bc]+)bc abcbc 0 5 796: a(bb+|b)b abb 0 3 797: a(bbb+|bb+|b)b abb 0 3 798: a(bbb+|bb+|b)b abbb 0 4 799: a(bbb+|bb+|b)bb abbb 0 4 800: (.*).* abcdef 0 6 801: (a*)* bc 0 0 802: 803: - normal nosubs REG_NO_POSIX_TEST 804: a(b?c)+d accd 0 4 805: (wee|week)(knights|night) weeknights 0 10 806: .* abc 0 3 807: a(b|(c))d abd 0 3 808: a(b|(c))d acd 0 3 809: a(b*|c|e)d abbd 0 4 810: a(b*|c|e)d acd 0 3 811: a(b*|c|e)d ad 0 2 812: a(b?)c abc 0 3 813: a(b?)c ac 0 2 814: a(b+)c abc 0 3 815: a(b+)c abbbc 0 5 816: a(b*)c ac 0 2 817: (a|ab)(bc([de]+)f|cde) abcdef 0 6 818: a([bc]?)c abc 0 3 819: a([bc]?)c ac 0 2 820: a([bc]+)c abc 0 3 821: a([bc]+)c abcc 0 4 822: a([bc]+)bc abcbc 0 5 823: a(bb+|b)b abb 0 3 824: a(bbb+|bb+|b)b abb 0 3 825: a(bbb+|bb+|b)b abbb 0 4 826: a(bbb+|bb+|b)bb abbb 0 4 827: (.*).* abcdef 0 6 828: (a*)* bc 0 0 829: