replace_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package strings_test
  5. import (
  6. "bytes"
  7. "fmt"
  8. . "strings"
  9. "testing"
  10. )
  11. var htmlEscaper = NewReplacer(
  12. "&", "&",
  13. "<", "&lt;",
  14. ">", "&gt;",
  15. `"`, "&quot;",
  16. "'", "&apos;",
  17. )
  18. var htmlUnescaper = NewReplacer(
  19. "&amp;", "&",
  20. "&lt;", "<",
  21. "&gt;", ">",
  22. "&quot;", `"`,
  23. "&apos;", "'",
  24. )
  25. // The http package's old HTML escaping function.
  26. func oldHTMLEscape(s string) string {
  27. s = Replace(s, "&", "&amp;", -1)
  28. s = Replace(s, "<", "&lt;", -1)
  29. s = Replace(s, ">", "&gt;", -1)
  30. s = Replace(s, `"`, "&quot;", -1)
  31. s = Replace(s, "'", "&apos;", -1)
  32. return s
  33. }
  34. var capitalLetters = NewReplacer("a", "A", "b", "B")
  35. // TestReplacer tests the replacer implementations.
  36. func TestReplacer(t *testing.T) {
  37. type testCase struct {
  38. r *Replacer
  39. in, out string
  40. }
  41. var testCases []testCase
  42. // str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
  43. str := func(b byte) string {
  44. return string([]byte{b})
  45. }
  46. var s []string
  47. // inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
  48. s = nil
  49. for i := 0; i < 256; i++ {
  50. s = append(s, str(byte(i)), str(byte(i+1)))
  51. }
  52. inc := NewReplacer(s...)
  53. // Test cases with 1-byte old strings, 1-byte new strings.
  54. testCases = append(testCases,
  55. testCase{capitalLetters, "brad", "BrAd"},
  56. testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
  57. testCase{capitalLetters, "", ""},
  58. testCase{inc, "brad", "csbe"},
  59. testCase{inc, "\x00\xff", "\x01\x00"},
  60. testCase{inc, "", ""},
  61. testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
  62. )
  63. // repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
  64. s = nil
  65. for i := 0; i < 256; i++ {
  66. n := i + 1 - 'a'
  67. if n < 1 {
  68. n = 1
  69. }
  70. s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
  71. }
  72. repeat := NewReplacer(s...)
  73. // Test cases with 1-byte old strings, variable length new strings.
  74. testCases = append(testCases,
  75. testCase{htmlEscaper, "No changes", "No changes"},
  76. testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
  77. testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
  78. testCase{htmlEscaper, "", ""},
  79. testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
  80. testCase{repeat, "abba", "abbbba"},
  81. testCase{repeat, "", ""},
  82. testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
  83. )
  84. // The remaining test cases have variable length old strings.
  85. testCases = append(testCases,
  86. testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
  87. testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
  88. testCase{htmlUnescaper, "", ""},
  89. testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
  90. testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
  91. testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
  92. )
  93. // gen1 has multiple old strings of variable length. There is no
  94. // overall non-empty common prefix, but some pairwise common prefixes.
  95. gen1 := NewReplacer(
  96. "aaa", "3[aaa]",
  97. "aa", "2[aa]",
  98. "a", "1[a]",
  99. "i", "i",
  100. "longerst", "most long",
  101. "longer", "medium",
  102. "long", "short",
  103. "xx", "xx",
  104. "x", "X",
  105. "X", "Y",
  106. "Y", "Z",
  107. )
  108. testCases = append(testCases,
  109. testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
  110. testCase{gen1, "long, longerst, longer", "short, most long, medium"},
  111. testCase{gen1, "xxxxx", "xxxxX"},
  112. testCase{gen1, "XiX", "YiY"},
  113. testCase{gen1, "", ""},
  114. )
  115. // gen2 has multiple old strings with no pairwise common prefix.
  116. gen2 := NewReplacer(
  117. "roses", "red",
  118. "violets", "blue",
  119. "sugar", "sweet",
  120. )
  121. testCases = append(testCases,
  122. testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
  123. testCase{gen2, "", ""},
  124. )
  125. // gen3 has multiple old strings with an overall common prefix.
  126. gen3 := NewReplacer(
  127. "abracadabra", "poof",
  128. "abracadabrakazam", "splat",
  129. "abraham", "lincoln",
  130. "abrasion", "scrape",
  131. "abraham", "isaac",
  132. )
  133. testCases = append(testCases,
  134. testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
  135. testCase{gen3, "abrasion abracad", "scrape abracad"},
  136. testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
  137. testCase{gen3, "", ""},
  138. )
  139. // foo{1,2,3,4} have multiple old strings with an overall common prefix
  140. // and 1- or 2- byte extensions from the common prefix.
  141. foo1 := NewReplacer(
  142. "foo1", "A",
  143. "foo2", "B",
  144. "foo3", "C",
  145. )
  146. foo2 := NewReplacer(
  147. "foo1", "A",
  148. "foo2", "B",
  149. "foo31", "C",
  150. "foo32", "D",
  151. )
  152. foo3 := NewReplacer(
  153. "foo11", "A",
  154. "foo12", "B",
  155. "foo31", "C",
  156. "foo32", "D",
  157. )
  158. foo4 := NewReplacer(
  159. "foo12", "B",
  160. "foo32", "D",
  161. )
  162. testCases = append(testCases,
  163. testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
  164. testCase{foo1, "", ""},
  165. testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
  166. testCase{foo2, "", ""},
  167. testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
  168. testCase{foo3, "", ""},
  169. testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
  170. testCase{foo4, "", ""},
  171. )
  172. // genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
  173. allBytes := make([]byte, 256)
  174. for i := range allBytes {
  175. allBytes[i] = byte(i)
  176. }
  177. allString := string(allBytes)
  178. genAll := NewReplacer(
  179. allString, "[all]",
  180. "\xff", "[ff]",
  181. "\x00", "[00]",
  182. )
  183. testCases = append(testCases,
  184. testCase{genAll, allString, "[all]"},
  185. testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
  186. testCase{genAll, "", ""},
  187. )
  188. // Test cases with empty old strings.
  189. blankToX1 := NewReplacer("", "X")
  190. blankToX2 := NewReplacer("", "X", "", "")
  191. blankHighPriority := NewReplacer("", "X", "o", "O")
  192. blankLowPriority := NewReplacer("o", "O", "", "X")
  193. blankNoOp1 := NewReplacer("", "")
  194. blankNoOp2 := NewReplacer("", "", "", "A")
  195. blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
  196. testCases = append(testCases,
  197. testCase{blankToX1, "foo", "XfXoXoX"},
  198. testCase{blankToX1, "", "X"},
  199. testCase{blankToX2, "foo", "XfXoXoX"},
  200. testCase{blankToX2, "", "X"},
  201. testCase{blankHighPriority, "oo", "XOXOX"},
  202. testCase{blankHighPriority, "ii", "XiXiX"},
  203. testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
  204. testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
  205. testCase{blankHighPriority, "", "X"},
  206. testCase{blankLowPriority, "oo", "OOX"},
  207. testCase{blankLowPriority, "ii", "XiXiX"},
  208. testCase{blankLowPriority, "oiio", "OXiXiOX"},
  209. testCase{blankLowPriority, "iooi", "XiOOXiX"},
  210. testCase{blankLowPriority, "", "X"},
  211. testCase{blankNoOp1, "foo", "foo"},
  212. testCase{blankNoOp1, "", ""},
  213. testCase{blankNoOp2, "foo", "foo"},
  214. testCase{blankNoOp2, "", ""},
  215. testCase{blankFoo, "foobarfoobaz", "XRXZX"},
  216. testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
  217. testCase{blankFoo, "", "X"},
  218. )
  219. // single string replacer
  220. abcMatcher := NewReplacer("abc", "[match]")
  221. testCases = append(testCases,
  222. testCase{abcMatcher, "", ""},
  223. testCase{abcMatcher, "ab", "ab"},
  224. testCase{abcMatcher, "abc", "[match]"},
  225. testCase{abcMatcher, "abcd", "[match]d"},
  226. testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
  227. )
  228. // Issue 6659 cases (more single string replacer)
  229. noHello := NewReplacer("Hello", "")
  230. testCases = append(testCases,
  231. testCase{noHello, "Hello", ""},
  232. testCase{noHello, "Hellox", "x"},
  233. testCase{noHello, "xHello", "x"},
  234. testCase{noHello, "xHellox", "xx"},
  235. )
  236. // No-arg test cases.
  237. nop := NewReplacer()
  238. testCases = append(testCases,
  239. testCase{nop, "abc", "abc"},
  240. testCase{nop, "", ""},
  241. )
  242. // Run the test cases.
  243. for i, tc := range testCases {
  244. if s := tc.r.Replace(tc.in); s != tc.out {
  245. t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
  246. }
  247. var buf bytes.Buffer
  248. n, err := tc.r.WriteString(&buf, tc.in)
  249. if err != nil {
  250. t.Errorf("%d. WriteString: %v", i, err)
  251. continue
  252. }
  253. got := buf.String()
  254. if got != tc.out {
  255. t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
  256. continue
  257. }
  258. if n != len(tc.out) {
  259. t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
  260. i, tc.in, n, len(tc.out), tc.out)
  261. }
  262. }
  263. }
  264. var algorithmTestCases = []struct {
  265. r *Replacer
  266. want string
  267. }{
  268. {capitalLetters, "*strings.byteReplacer"},
  269. {htmlEscaper, "*strings.byteStringReplacer"},
  270. {NewReplacer("12", "123"), "*strings.singleStringReplacer"},
  271. {NewReplacer("1", "12"), "*strings.byteStringReplacer"},
  272. {NewReplacer("", "X"), "*strings.genericReplacer"},
  273. {NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
  274. }
  275. // TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
  276. func TestPickAlgorithm(t *testing.T) {
  277. for i, tc := range algorithmTestCases {
  278. got := fmt.Sprintf("%T", tc.r.Replacer())
  279. if got != tc.want {
  280. t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
  281. }
  282. }
  283. }
  284. type errWriter struct{}
  285. func (errWriter) Write(p []byte) (n int, err error) {
  286. return 0, fmt.Errorf("unwritable")
  287. }
  288. // TestWriteStringError tests that WriteString returns an error
  289. // received from the underlying io.Writer.
  290. func TestWriteStringError(t *testing.T) {
  291. for i, tc := range algorithmTestCases {
  292. n, err := tc.r.WriteString(errWriter{}, "abc")
  293. if n != 0 || err == nil || err.Error() != "unwritable" {
  294. t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
  295. }
  296. }
  297. }
  298. // TestGenericTrieBuilding verifies the structure of the generated trie. There
  299. // is one node per line, and the key ending with the current line is in the
  300. // trie if it ends with a "+".
  301. func TestGenericTrieBuilding(t *testing.T) {
  302. testCases := []struct{ in, out string }{
  303. {"abc;abdef;abdefgh;xx;xy;z", `-
  304. a-
  305. .b-
  306. ..c+
  307. ..d-
  308. ...ef+
  309. .....gh+
  310. x-
  311. .x+
  312. .y+
  313. z+
  314. `},
  315. {"abracadabra;abracadabrakazam;abraham;abrasion", `-
  316. a-
  317. .bra-
  318. ....c-
  319. .....adabra+
  320. ...........kazam+
  321. ....h-
  322. .....am+
  323. ....s-
  324. .....ion+
  325. `},
  326. {"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
  327. X+
  328. Y+
  329. a+
  330. .a+
  331. ..a+
  332. i+
  333. l-
  334. .ong+
  335. ....er+
  336. ......st+
  337. x+
  338. .x+
  339. `},
  340. {"foo;;foo;foo1", `+
  341. f-
  342. .oo+
  343. ...1+
  344. `},
  345. }
  346. for _, tc := range testCases {
  347. keys := Split(tc.in, ";")
  348. args := make([]string, len(keys)*2)
  349. for i, key := range keys {
  350. args[i*2] = key
  351. }
  352. got := NewReplacer(args...).PrintTrie()
  353. // Remove tabs from tc.out
  354. wantbuf := make([]byte, 0, len(tc.out))
  355. for i := 0; i < len(tc.out); i++ {
  356. if tc.out[i] != '\t' {
  357. wantbuf = append(wantbuf, tc.out[i])
  358. }
  359. }
  360. want := string(wantbuf)
  361. if got != want {
  362. t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
  363. }
  364. }
  365. }
  366. func BenchmarkGenericNoMatch(b *testing.B) {
  367. str := Repeat("A", 100) + Repeat("B", 100)
  368. generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
  369. for i := 0; i < b.N; i++ {
  370. generic.Replace(str)
  371. }
  372. }
  373. func BenchmarkGenericMatch1(b *testing.B) {
  374. str := Repeat("a", 100) + Repeat("b", 100)
  375. generic := NewReplacer("a", "A", "b", "B", "12", "123")
  376. for i := 0; i < b.N; i++ {
  377. generic.Replace(str)
  378. }
  379. }
  380. func BenchmarkGenericMatch2(b *testing.B) {
  381. str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
  382. for i := 0; i < b.N; i++ {
  383. htmlUnescaper.Replace(str)
  384. }
  385. }
  386. func benchmarkSingleString(b *testing.B, pattern, text string) {
  387. r := NewReplacer(pattern, "[match]")
  388. b.SetBytes(int64(len(text)))
  389. b.ResetTimer()
  390. for i := 0; i < b.N; i++ {
  391. r.Replace(text)
  392. }
  393. }
  394. func BenchmarkSingleMaxSkipping(b *testing.B) {
  395. benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
  396. }
  397. func BenchmarkSingleLongSuffixFail(b *testing.B) {
  398. benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
  399. }
  400. func BenchmarkSingleMatch(b *testing.B) {
  401. benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
  402. }
  403. func BenchmarkByteByteNoMatch(b *testing.B) {
  404. str := Repeat("A", 100) + Repeat("B", 100)
  405. for i := 0; i < b.N; i++ {
  406. capitalLetters.Replace(str)
  407. }
  408. }
  409. func BenchmarkByteByteMatch(b *testing.B) {
  410. str := Repeat("a", 100) + Repeat("b", 100)
  411. for i := 0; i < b.N; i++ {
  412. capitalLetters.Replace(str)
  413. }
  414. }
  415. func BenchmarkByteStringMatch(b *testing.B) {
  416. str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
  417. for i := 0; i < b.N; i++ {
  418. htmlEscaper.Replace(str)
  419. }
  420. }
  421. func BenchmarkHTMLEscapeNew(b *testing.B) {
  422. str := "I <3 to escape HTML & other text too."
  423. for i := 0; i < b.N; i++ {
  424. htmlEscaper.Replace(str)
  425. }
  426. }
  427. func BenchmarkHTMLEscapeOld(b *testing.B) {
  428. str := "I <3 to escape HTML & other text too."
  429. for i := 0; i < b.N; i++ {
  430. oldHTMLEscape(str)
  431. }
  432. }
  433. func BenchmarkByteStringReplacerWriteString(b *testing.B) {
  434. str := Repeat("I <3 to escape HTML & other text too.", 100)
  435. buf := new(bytes.Buffer)
  436. for i := 0; i < b.N; i++ {
  437. htmlEscaper.WriteString(buf, str)
  438. buf.Reset()
  439. }
  440. }
  441. func BenchmarkByteReplacerWriteString(b *testing.B) {
  442. str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
  443. buf := new(bytes.Buffer)
  444. for i := 0; i < b.N; i++ {
  445. capitalLetters.WriteString(buf, str)
  446. buf.Reset()
  447. }
  448. }
  449. // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
  450. func BenchmarkByteByteReplaces(b *testing.B) {
  451. str := Repeat("a", 100) + Repeat("b", 100)
  452. for i := 0; i < b.N; i++ {
  453. Replace(Replace(str, "a", "A", -1), "b", "B", -1)
  454. }
  455. }
  456. // BenchmarkByteByteMap compares byteByteImpl against Map.
  457. func BenchmarkByteByteMap(b *testing.B) {
  458. str := Repeat("a", 100) + Repeat("b", 100)
  459. fn := func(r rune) rune {
  460. switch r {
  461. case 'a':
  462. return 'A'
  463. case 'b':
  464. return 'B'
  465. }
  466. return r
  467. }
  468. for i := 0; i < b.N; i++ {
  469. Map(fn, str)
  470. }
  471. }
  472. var mapdata = []struct{ name, data string }{
  473. {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
  474. {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
  475. }
  476. func BenchmarkMap(b *testing.B) {
  477. mapidentity := func(r rune) rune {
  478. return r
  479. }
  480. b.Run("identity", func(b *testing.B) {
  481. for _, md := range mapdata {
  482. b.Run(md.name, func(b *testing.B) {
  483. for i := 0; i < b.N; i++ {
  484. Map(mapidentity, md.data)
  485. }
  486. })
  487. }
  488. })
  489. mapchange := func(r rune) rune {
  490. if 'a' <= r && r <= 'z' {
  491. return r + 'A' - 'a'
  492. }
  493. if 'α' <= r && r <= 'ω' {
  494. return r + 'Α' - 'α'
  495. }
  496. return r
  497. }
  498. b.Run("change", func(b *testing.B) {
  499. for _, md := range mapdata {
  500. b.Run(md.name, func(b *testing.B) {
  501. for i := 0; i < b.N; i++ {
  502. Map(mapchange, md.data)
  503. }
  504. })
  505. }
  506. })
  507. }