exec_test.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package regexp
  5. import (
  6. "bufio"
  7. "compress/bzip2"
  8. "fmt"
  9. "internal/testenv"
  10. "io"
  11. "os"
  12. "path/filepath"
  13. "regexp/syntax"
  14. "strconv"
  15. "strings"
  16. "testing"
  17. "unicode/utf8"
  18. )
  19. // TestRE2 tests this package's regexp API against test cases
  20. // considered during RE2's exhaustive tests, which run all possible
  21. // regexps over a given set of atoms and operators, up to a given
  22. // complexity, over all possible strings over a given alphabet,
  23. // up to a given size. Rather than try to link with RE2, we read a
  24. // log file containing the test cases and the expected matches.
  25. // The log file, re2-exhaustive.txt, is generated by running 'make log'
  26. // in the open source RE2 distribution https://github.com/google/re2/.
  27. //
  28. // The test file format is a sequence of stanzas like:
  29. //
  30. // strings
  31. // "abc"
  32. // "123x"
  33. // regexps
  34. // "[a-z]+"
  35. // 0-3;0-3
  36. // -;-
  37. // "([0-9])([0-9])([0-9])"
  38. // -;-
  39. // -;0-3 0-1 1-2 2-3
  40. //
  41. // The stanza begins by defining a set of strings, quoted
  42. // using Go double-quote syntax, one per line. Then the
  43. // regexps section gives a sequence of regexps to run on
  44. // the strings. In the block that follows a regexp, each line
  45. // gives the semicolon-separated match results of running
  46. // the regexp on the corresponding string.
  47. // Each match result is either a single -, meaning no match, or a
  48. // space-separated sequence of pairs giving the match and
  49. // submatch indices. An unmatched subexpression formats
  50. // its pair as a single - (not illustrated above). For now
  51. // each regexp run produces two match results, one for a
  52. // ``full match'' that restricts the regexp to matching the entire
  53. // string or nothing, and one for a ``partial match'' that gives
  54. // the leftmost first match found in the string.
  55. //
  56. // Lines beginning with # are comments. Lines beginning with
  57. // a capital letter are test names printed during RE2's test suite
  58. // and are echoed into t but otherwise ignored.
  59. //
  60. // At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
  61. // so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
  62. //
  63. func TestRE2Search(t *testing.T) {
  64. testRE2(t, "testdata/re2-search.txt")
  65. }
  66. func testRE2(t *testing.T, file string) {
  67. f, err := os.Open(file)
  68. if err != nil {
  69. t.Fatal(err)
  70. }
  71. defer f.Close()
  72. var txt io.Reader
  73. if strings.HasSuffix(file, ".bz2") {
  74. z := bzip2.NewReader(f)
  75. txt = z
  76. file = file[:len(file)-len(".bz2")] // for error messages
  77. } else {
  78. txt = f
  79. }
  80. lineno := 0
  81. scanner := bufio.NewScanner(txt)
  82. var (
  83. str []string
  84. input []string
  85. inStrings bool
  86. re *Regexp
  87. refull *Regexp
  88. nfail int
  89. ncase int
  90. )
  91. for lineno := 1; scanner.Scan(); lineno++ {
  92. line := scanner.Text()
  93. switch {
  94. case line == "":
  95. t.Fatalf("%s:%d: unexpected blank line", file, lineno)
  96. case line[0] == '#':
  97. continue
  98. case 'A' <= line[0] && line[0] <= 'Z':
  99. // Test name.
  100. t.Logf("%s\n", line)
  101. continue
  102. case line == "strings":
  103. str = str[:0]
  104. inStrings = true
  105. case line == "regexps":
  106. inStrings = false
  107. case line[0] == '"':
  108. q, err := strconv.Unquote(line)
  109. if err != nil {
  110. // Fatal because we'll get out of sync.
  111. t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
  112. }
  113. if inStrings {
  114. str = append(str, q)
  115. continue
  116. }
  117. // Is a regexp.
  118. if len(input) != 0 {
  119. t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
  120. }
  121. re, err = tryCompile(q)
  122. if err != nil {
  123. if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
  124. // We don't and likely never will support \C; keep going.
  125. continue
  126. }
  127. t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
  128. if nfail++; nfail >= 100 {
  129. t.Fatalf("stopping after %d errors", nfail)
  130. }
  131. continue
  132. }
  133. full := `\A(?:` + q + `)\z`
  134. refull, err = tryCompile(full)
  135. if err != nil {
  136. // Fatal because q worked, so this should always work.
  137. t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
  138. }
  139. input = str
  140. case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
  141. // A sequence of match results.
  142. ncase++
  143. if re == nil {
  144. // Failed to compile: skip results.
  145. continue
  146. }
  147. if len(input) == 0 {
  148. t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
  149. }
  150. var text string
  151. text, input = input[0], input[1:]
  152. if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
  153. // RE2's \B considers every byte position,
  154. // so it sees 'not word boundary' in the
  155. // middle of UTF-8 sequences. This package
  156. // only considers the positions between runes,
  157. // so it disagrees. Skip those cases.
  158. continue
  159. }
  160. res := strings.Split(line, ";")
  161. if len(res) != len(run) {
  162. t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
  163. }
  164. for i := range res {
  165. have, suffix := run[i](re, refull, text)
  166. want := parseResult(t, file, lineno, res[i])
  167. if !same(have, want) {
  168. t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
  169. if nfail++; nfail >= 100 {
  170. t.Fatalf("stopping after %d errors", nfail)
  171. }
  172. continue
  173. }
  174. b, suffix := match[i](re, refull, text)
  175. if b != (want != nil) {
  176. t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
  177. if nfail++; nfail >= 100 {
  178. t.Fatalf("stopping after %d errors", nfail)
  179. }
  180. continue
  181. }
  182. }
  183. default:
  184. t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
  185. }
  186. }
  187. if err := scanner.Err(); err != nil {
  188. t.Fatalf("%s:%d: %v", file, lineno, err)
  189. }
  190. if len(input) != 0 {
  191. t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
  192. }
  193. t.Logf("%d cases tested", ncase)
  194. }
  195. var run = []func(*Regexp, *Regexp, string) ([]int, string){
  196. runFull,
  197. runPartial,
  198. runFullLongest,
  199. runPartialLongest,
  200. }
  201. func runFull(re, refull *Regexp, text string) ([]int, string) {
  202. refull.longest = false
  203. return refull.FindStringSubmatchIndex(text), "[full]"
  204. }
  205. func runPartial(re, refull *Regexp, text string) ([]int, string) {
  206. re.longest = false
  207. return re.FindStringSubmatchIndex(text), ""
  208. }
  209. func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
  210. refull.longest = true
  211. return refull.FindStringSubmatchIndex(text), "[full,longest]"
  212. }
  213. func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
  214. re.longest = true
  215. return re.FindStringSubmatchIndex(text), "[longest]"
  216. }
  217. var match = []func(*Regexp, *Regexp, string) (bool, string){
  218. matchFull,
  219. matchPartial,
  220. matchFullLongest,
  221. matchPartialLongest,
  222. }
  223. func matchFull(re, refull *Regexp, text string) (bool, string) {
  224. refull.longest = false
  225. return refull.MatchString(text), "[full]"
  226. }
  227. func matchPartial(re, refull *Regexp, text string) (bool, string) {
  228. re.longest = false
  229. return re.MatchString(text), ""
  230. }
  231. func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
  232. refull.longest = true
  233. return refull.MatchString(text), "[full,longest]"
  234. }
  235. func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
  236. re.longest = true
  237. return re.MatchString(text), "[longest]"
  238. }
  239. func isSingleBytes(s string) bool {
  240. for _, c := range s {
  241. if c >= utf8.RuneSelf {
  242. return false
  243. }
  244. }
  245. return true
  246. }
  247. func tryCompile(s string) (re *Regexp, err error) {
  248. // Protect against panic during Compile.
  249. defer func() {
  250. if r := recover(); r != nil {
  251. err = fmt.Errorf("panic: %v", r)
  252. }
  253. }()
  254. return Compile(s)
  255. }
  256. func parseResult(t *testing.T, file string, lineno int, res string) []int {
  257. // A single - indicates no match.
  258. if res == "-" {
  259. return nil
  260. }
  261. // Otherwise, a space-separated list of pairs.
  262. n := 1
  263. for j := 0; j < len(res); j++ {
  264. if res[j] == ' ' {
  265. n++
  266. }
  267. }
  268. out := make([]int, 2*n)
  269. i := 0
  270. n = 0
  271. for j := 0; j <= len(res); j++ {
  272. if j == len(res) || res[j] == ' ' {
  273. // Process a single pair. - means no submatch.
  274. pair := res[i:j]
  275. if pair == "-" {
  276. out[n] = -1
  277. out[n+1] = -1
  278. } else {
  279. loStr, hiStr, _ := strings.Cut(pair, "-")
  280. lo, err1 := strconv.Atoi(loStr)
  281. hi, err2 := strconv.Atoi(hiStr)
  282. if err1 != nil || err2 != nil || lo > hi {
  283. t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
  284. }
  285. out[n] = lo
  286. out[n+1] = hi
  287. }
  288. n += 2
  289. i = j + 1
  290. }
  291. }
  292. return out
  293. }
  294. func same(x, y []int) bool {
  295. if len(x) != len(y) {
  296. return false
  297. }
  298. for i, xi := range x {
  299. if xi != y[i] {
  300. return false
  301. }
  302. }
  303. return true
  304. }
  305. // TestFowler runs this package's regexp API against the
  306. // POSIX regular expression tests collected by Glenn Fowler
  307. // at http://www2.research.att.com/~astopen/testregex/testregex.html.
  308. func TestFowler(t *testing.T) {
  309. files, err := filepath.Glob("testdata/*.dat")
  310. if err != nil {
  311. t.Fatal(err)
  312. }
  313. for _, file := range files {
  314. t.Log(file)
  315. testFowler(t, file)
  316. }
  317. }
  318. var notab = MustCompilePOSIX(`[^\t]+`)
  319. func testFowler(t *testing.T, file string) {
  320. f, err := os.Open(file)
  321. if err != nil {
  322. t.Error(err)
  323. return
  324. }
  325. defer f.Close()
  326. b := bufio.NewReader(f)
  327. lineno := 0
  328. lastRegexp := ""
  329. Reading:
  330. for {
  331. lineno++
  332. line, err := b.ReadString('\n')
  333. if err != nil {
  334. if err != io.EOF {
  335. t.Errorf("%s:%d: %v", file, lineno, err)
  336. }
  337. break Reading
  338. }
  339. // http://www2.research.att.com/~astopen/man/man1/testregex.html
  340. //
  341. // INPUT FORMAT
  342. // Input lines may be blank, a comment beginning with #, or a test
  343. // specification. A specification is five fields separated by one
  344. // or more tabs. NULL denotes the empty string and NIL denotes the
  345. // 0 pointer.
  346. if line[0] == '#' || line[0] == '\n' {
  347. continue Reading
  348. }
  349. line = line[:len(line)-1]
  350. field := notab.FindAllString(line, -1)
  351. for i, f := range field {
  352. if f == "NULL" {
  353. field[i] = ""
  354. }
  355. if f == "NIL" {
  356. t.Logf("%s:%d: skip: %s", file, lineno, line)
  357. continue Reading
  358. }
  359. }
  360. if len(field) == 0 {
  361. continue Reading
  362. }
  363. // Field 1: the regex(3) flags to apply, one character per REG_feature
  364. // flag. The test is skipped if REG_feature is not supported by the
  365. // implementation. If the first character is not [BEASKLP] then the
  366. // specification is a global control line. One or more of [BEASKLP] may be
  367. // specified; the test will be repeated for each mode.
  368. //
  369. // B basic BRE (grep, ed, sed)
  370. // E REG_EXTENDED ERE (egrep)
  371. // A REG_AUGMENTED ARE (egrep with negation)
  372. // S REG_SHELL SRE (sh glob)
  373. // K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
  374. // L REG_LITERAL LRE (fgrep)
  375. //
  376. // a REG_LEFT|REG_RIGHT implicit ^...$
  377. // b REG_NOTBOL lhs does not match ^
  378. // c REG_COMMENT ignore space and #...\n
  379. // d REG_SHELL_DOT explicit leading . match
  380. // e REG_NOTEOL rhs does not match $
  381. // f REG_MULTIPLE multiple \n separated patterns
  382. // g FNM_LEADING_DIR testfnmatch only -- match until /
  383. // h REG_MULTIREF multiple digit backref
  384. // i REG_ICASE ignore case
  385. // j REG_SPAN . matches \n
  386. // k REG_ESCAPE \ to escape [...] delimiter
  387. // l REG_LEFT implicit ^...
  388. // m REG_MINIMAL minimal match
  389. // n REG_NEWLINE explicit \n match
  390. // o REG_ENCLOSED (|&) magic inside [@|&](...)
  391. // p REG_SHELL_PATH explicit / match
  392. // q REG_DELIMITED delimited pattern
  393. // r REG_RIGHT implicit ...$
  394. // s REG_SHELL_ESCAPED \ not special
  395. // t REG_MUSTDELIM all delimiters must be specified
  396. // u standard unspecified behavior -- errors not counted
  397. // v REG_CLASS_ESCAPE \ special inside [...]
  398. // w REG_NOSUB no subexpression match array
  399. // x REG_LENIENT let some errors slide
  400. // y REG_LEFT regexec() implicit ^...
  401. // z REG_NULL NULL subexpressions ok
  402. // $ expand C \c escapes in fields 2 and 3
  403. // / field 2 is a regsubcomp() expression
  404. // = field 3 is a regdecomp() expression
  405. //
  406. // Field 1 control lines:
  407. //
  408. // C set LC_COLLATE and LC_CTYPE to locale in field 2
  409. //
  410. // ?test ... output field 5 if passed and != EXPECTED, silent otherwise
  411. // &test ... output field 5 if current and previous passed
  412. // |test ... output field 5 if current passed and previous failed
  413. // ; ... output field 2 if previous failed
  414. // {test ... skip if failed until }
  415. // } end of skip
  416. //
  417. // : comment comment copied as output NOTE
  418. // :comment:test :comment: ignored
  419. // N[OTE] comment comment copied as output NOTE
  420. // T[EST] comment comment
  421. //
  422. // number use number for nmatch (20 by default)
  423. flag := field[0]
  424. switch flag[0] {
  425. case '?', '&', '|', ';', '{', '}':
  426. // Ignore all the control operators.
  427. // Just run everything.
  428. flag = flag[1:]
  429. if flag == "" {
  430. continue Reading
  431. }
  432. case ':':
  433. var ok bool
  434. if _, flag, ok = strings.Cut(flag[1:], ":"); !ok {
  435. t.Logf("skip: %s", line)
  436. continue Reading
  437. }
  438. case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  439. t.Logf("skip: %s", line)
  440. continue Reading
  441. }
  442. // Can check field count now that we've handled the myriad comment formats.
  443. if len(field) < 4 {
  444. t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
  445. continue Reading
  446. }
  447. // Expand C escapes (a.k.a. Go escapes).
  448. if strings.Contains(flag, "$") {
  449. f := `"` + field[1] + `"`
  450. if field[1], err = strconv.Unquote(f); err != nil {
  451. t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
  452. }
  453. f = `"` + field[2] + `"`
  454. if field[2], err = strconv.Unquote(f); err != nil {
  455. t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
  456. }
  457. }
  458. // Field 2: the regular expression pattern; SAME uses the pattern from
  459. // the previous specification.
  460. //
  461. if field[1] == "SAME" {
  462. field[1] = lastRegexp
  463. }
  464. lastRegexp = field[1]
  465. // Field 3: the string to match.
  466. text := field[2]
  467. // Field 4: the test outcome...
  468. ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
  469. if !ok {
  470. t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
  471. continue Reading
  472. }
  473. // Field 5: optional comment appended to the report.
  474. Testing:
  475. // Run test once for each specified capital letter mode that we support.
  476. for _, c := range flag {
  477. pattern := field[1]
  478. syn := syntax.POSIX | syntax.ClassNL
  479. switch c {
  480. default:
  481. continue Testing
  482. case 'E':
  483. // extended regexp (what we support)
  484. case 'L':
  485. // literal
  486. pattern = QuoteMeta(pattern)
  487. }
  488. for _, c := range flag {
  489. switch c {
  490. case 'i':
  491. syn |= syntax.FoldCase
  492. }
  493. }
  494. re, err := compile(pattern, syn, true)
  495. if err != nil {
  496. if shouldCompile {
  497. t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
  498. }
  499. continue Testing
  500. }
  501. if !shouldCompile {
  502. t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
  503. continue Testing
  504. }
  505. match := re.MatchString(text)
  506. if match != shouldMatch {
  507. t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
  508. continue Testing
  509. }
  510. have := re.FindStringSubmatchIndex(text)
  511. if (len(have) > 0) != match {
  512. t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
  513. continue Testing
  514. }
  515. if len(have) > len(pos) {
  516. have = have[:len(pos)]
  517. }
  518. if !same(have, pos) {
  519. t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
  520. }
  521. }
  522. }
  523. }
  524. func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
  525. // Field 4: the test outcome. This is either one of the posix error
  526. // codes (with REG_ omitted) or the match array, a list of (m,n)
  527. // entries with m and n being first and last+1 positions in the
  528. // field 3 string, or NULL if REG_NOSUB is in effect and success
  529. // is expected. BADPAT is acceptable in place of any regcomp(3)
  530. // error code. The match[] array is initialized to (-2,-2) before
  531. // each test. All array elements from 0 to nmatch-1 must be specified
  532. // in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
  533. // Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
  534. // matched (?{...}) expression, where x is the text enclosed by {...},
  535. // o is the expression ordinal counting from 1, and n is the length of
  536. // the unmatched portion of the subject string. If x starts with a
  537. // number then that is the return value of re_execf(), otherwise 0 is
  538. // returned.
  539. switch {
  540. case s == "":
  541. // Match with no position information.
  542. ok = true
  543. compiled = true
  544. matched = true
  545. return
  546. case s == "NOMATCH":
  547. // Match failure.
  548. ok = true
  549. compiled = true
  550. matched = false
  551. return
  552. case 'A' <= s[0] && s[0] <= 'Z':
  553. // All the other error codes are compile errors.
  554. ok = true
  555. compiled = false
  556. return
  557. }
  558. compiled = true
  559. var x []int
  560. for s != "" {
  561. var end byte = ')'
  562. if len(x)%2 == 0 {
  563. if s[0] != '(' {
  564. ok = false
  565. return
  566. }
  567. s = s[1:]
  568. end = ','
  569. }
  570. i := 0
  571. for i < len(s) && s[i] != end {
  572. i++
  573. }
  574. if i == 0 || i == len(s) {
  575. ok = false
  576. return
  577. }
  578. var v = -1
  579. var err error
  580. if s[:i] != "?" {
  581. v, err = strconv.Atoi(s[:i])
  582. if err != nil {
  583. ok = false
  584. return
  585. }
  586. }
  587. x = append(x, v)
  588. s = s[i+1:]
  589. }
  590. if len(x)%2 != 0 {
  591. ok = false
  592. return
  593. }
  594. ok = true
  595. matched = true
  596. pos = x
  597. return
  598. }
  599. var text []byte
  600. func makeText(n int) []byte {
  601. if len(text) >= n {
  602. return text[:n]
  603. }
  604. text = make([]byte, n)
  605. x := ^uint32(0)
  606. for i := range text {
  607. x += x
  608. x ^= 1
  609. if int32(x) < 0 {
  610. x ^= 0x88888eef
  611. }
  612. if x%31 == 0 {
  613. text[i] = '\n'
  614. } else {
  615. text[i] = byte(x%(0x7E+1-0x20) + 0x20)
  616. }
  617. }
  618. return text
  619. }
  620. func BenchmarkMatch(b *testing.B) {
  621. isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
  622. for _, data := range benchData {
  623. r := MustCompile(data.re)
  624. for _, size := range benchSizes {
  625. if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
  626. continue
  627. }
  628. t := makeText(size.n)
  629. b.Run(data.name+"/"+size.name, func(b *testing.B) {
  630. b.SetBytes(int64(size.n))
  631. for i := 0; i < b.N; i++ {
  632. if r.Match(t) {
  633. b.Fatal("match!")
  634. }
  635. }
  636. })
  637. }
  638. }
  639. }
  640. func BenchmarkMatch_onepass_regex(b *testing.B) {
  641. isRaceBuilder := strings.HasSuffix(testenv.Builder(), "-race")
  642. r := MustCompile(`(?s)\A.*\z`)
  643. if r.onepass == nil {
  644. b.Fatalf("want onepass regex, but %q is not onepass", r)
  645. }
  646. for _, size := range benchSizes {
  647. if (isRaceBuilder || testing.Short()) && size.n > 1<<10 {
  648. continue
  649. }
  650. t := makeText(size.n)
  651. b.Run(size.name, func(b *testing.B) {
  652. b.SetBytes(int64(size.n))
  653. b.ReportAllocs()
  654. for i := 0; i < b.N; i++ {
  655. if !r.Match(t) {
  656. b.Fatal("not match!")
  657. }
  658. }
  659. })
  660. }
  661. }
  662. var benchData = []struct{ name, re string }{
  663. {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
  664. {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
  665. {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
  666. {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
  667. {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
  668. {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
  669. }
  670. var benchSizes = []struct {
  671. name string
  672. n int
  673. }{
  674. {"16", 16},
  675. {"32", 32},
  676. {"1K", 1 << 10},
  677. {"32K", 32 << 10},
  678. {"1M", 1 << 20},
  679. {"32M", 32 << 20},
  680. }
  681. func TestLongest(t *testing.T) {
  682. re, err := Compile(`a(|b)`)
  683. if err != nil {
  684. t.Fatal(err)
  685. }
  686. if g, w := re.FindString("ab"), "a"; g != w {
  687. t.Errorf("first match was %q, want %q", g, w)
  688. }
  689. re.Longest()
  690. if g, w := re.FindString("ab"), "ab"; g != w {
  691. t.Errorf("longest match was %q, want %q", g, w)
  692. }
  693. }
  694. // TestProgramTooLongForBacktrack tests that a regex which is too long
  695. // for the backtracker still executes properly.
  696. func TestProgramTooLongForBacktrack(t *testing.T) {
  697. longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
  698. if !longRegex.MatchString("two") {
  699. t.Errorf("longRegex.MatchString(\"two\") was false, want true")
  700. }
  701. if longRegex.MatchString("xxx") {
  702. t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
  703. }
  704. }