strings_test.go 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package strings_test
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "math/rand"
  10. "reflect"
  11. "runtime"
  12. "strconv"
  13. . "strings"
  14. "testing"
  15. "unicode"
  16. "unicode/utf8"
  17. "unsafe"
  18. )
  19. func eq(a, b []string) bool {
  20. if len(a) != len(b) {
  21. return false
  22. }
  23. for i := 0; i < len(a); i++ {
  24. if a[i] != b[i] {
  25. return false
  26. }
  27. }
  28. return true
  29. }
  30. var abcd = "abcd"
  31. var faces = "☺☻☹"
  32. var commas = "1,2,3,4"
  33. var dots = "1....2....3....4"
  34. type IndexTest struct {
  35. s string
  36. sep string
  37. out int
  38. }
  39. var indexTests = []IndexTest{
  40. {"", "", 0},
  41. {"", "a", -1},
  42. {"", "foo", -1},
  43. {"fo", "foo", -1},
  44. {"foo", "foo", 0},
  45. {"oofofoofooo", "f", 2},
  46. {"oofofoofooo", "foo", 4},
  47. {"barfoobarfoo", "foo", 3},
  48. {"foo", "", 0},
  49. {"foo", "o", 1},
  50. {"abcABCabc", "A", 3},
  51. // cases with one byte strings - test special case in Index()
  52. {"", "a", -1},
  53. {"x", "a", -1},
  54. {"x", "x", 0},
  55. {"abc", "a", 0},
  56. {"abc", "b", 1},
  57. {"abc", "c", 2},
  58. {"abc", "x", -1},
  59. // test special cases in Index() for short strings
  60. {"", "ab", -1},
  61. {"bc", "ab", -1},
  62. {"ab", "ab", 0},
  63. {"xab", "ab", 1},
  64. {"xab"[:2], "ab", -1},
  65. {"", "abc", -1},
  66. {"xbc", "abc", -1},
  67. {"abc", "abc", 0},
  68. {"xabc", "abc", 1},
  69. {"xabc"[:3], "abc", -1},
  70. {"xabxc", "abc", -1},
  71. {"", "abcd", -1},
  72. {"xbcd", "abcd", -1},
  73. {"abcd", "abcd", 0},
  74. {"xabcd", "abcd", 1},
  75. {"xyabcd"[:5], "abcd", -1},
  76. {"xbcqq", "abcqq", -1},
  77. {"abcqq", "abcqq", 0},
  78. {"xabcqq", "abcqq", 1},
  79. {"xyabcqq"[:6], "abcqq", -1},
  80. {"xabxcqq", "abcqq", -1},
  81. {"xabcqxq", "abcqq", -1},
  82. {"", "01234567", -1},
  83. {"32145678", "01234567", -1},
  84. {"01234567", "01234567", 0},
  85. {"x01234567", "01234567", 1},
  86. {"x0123456x01234567", "01234567", 9},
  87. {"xx01234567"[:9], "01234567", -1},
  88. {"", "0123456789", -1},
  89. {"3214567844", "0123456789", -1},
  90. {"0123456789", "0123456789", 0},
  91. {"x0123456789", "0123456789", 1},
  92. {"x012345678x0123456789", "0123456789", 11},
  93. {"xyz0123456789"[:12], "0123456789", -1},
  94. {"x01234567x89", "0123456789", -1},
  95. {"", "0123456789012345", -1},
  96. {"3214567889012345", "0123456789012345", -1},
  97. {"0123456789012345", "0123456789012345", 0},
  98. {"x0123456789012345", "0123456789012345", 1},
  99. {"x012345678901234x0123456789012345", "0123456789012345", 17},
  100. {"", "01234567890123456789", -1},
  101. {"32145678890123456789", "01234567890123456789", -1},
  102. {"01234567890123456789", "01234567890123456789", 0},
  103. {"x01234567890123456789", "01234567890123456789", 1},
  104. {"x0123456789012345678x01234567890123456789", "01234567890123456789", 21},
  105. {"xyz01234567890123456789"[:22], "01234567890123456789", -1},
  106. {"", "0123456789012345678901234567890", -1},
  107. {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
  108. {"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
  109. {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
  110. {"x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32},
  111. {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
  112. {"", "01234567890123456789012345678901", -1},
  113. {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
  114. {"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
  115. {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
  116. {"x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33},
  117. {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
  118. {"xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6},
  119. {"", "0123456789012345678901234567890123456789", -1},
  120. {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2},
  121. {"xx012345678901234567890123456789012345678901234567890123456789012"[:41], "0123456789012345678901234567890123456789", -1},
  122. {"xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx", -1},
  123. {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65},
  124. // test fallback to Rabin-Karp.
  125. {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
  126. {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
  127. }
  128. var lastIndexTests = []IndexTest{
  129. {"", "", 0},
  130. {"", "a", -1},
  131. {"", "foo", -1},
  132. {"fo", "foo", -1},
  133. {"foo", "foo", 0},
  134. {"foo", "f", 0},
  135. {"oofofoofooo", "f", 7},
  136. {"oofofoofooo", "foo", 7},
  137. {"barfoobarfoo", "foo", 9},
  138. {"foo", "", 3},
  139. {"foo", "o", 2},
  140. {"abcABCabc", "A", 3},
  141. {"abcABCabc", "a", 6},
  142. }
  143. var indexAnyTests = []IndexTest{
  144. {"", "", -1},
  145. {"", "a", -1},
  146. {"", "abc", -1},
  147. {"a", "", -1},
  148. {"a", "a", 0},
  149. {"\x80", "\xffb", 0},
  150. {"aaa", "a", 0},
  151. {"abc", "xyz", -1},
  152. {"abc", "xcz", 2},
  153. {"ab☺c", "x☺yz", 2},
  154. {"a☺b☻c☹d", "cx", len("a☺b☻")},
  155. {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
  156. {"aRegExp*", ".(|)*+?^$[]", 7},
  157. {dots + dots + dots, " ", -1},
  158. {"012abcba210", "\xffb", 4},
  159. {"012\x80bcb\x80210", "\xffb", 3},
  160. {"0123456\xcf\x80abc", "\xcfb\x80", 10},
  161. }
  162. var lastIndexAnyTests = []IndexTest{
  163. {"", "", -1},
  164. {"", "a", -1},
  165. {"", "abc", -1},
  166. {"a", "", -1},
  167. {"a", "a", 0},
  168. {"\x80", "\xffb", 0},
  169. {"aaa", "a", 2},
  170. {"abc", "xyz", -1},
  171. {"abc", "ab", 1},
  172. {"ab☺c", "x☺yz", 2},
  173. {"a☺b☻c☹d", "cx", len("a☺b☻")},
  174. {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
  175. {"a.RegExp*", ".(|)*+?^$[]", 8},
  176. {dots + dots + dots, " ", -1},
  177. {"012abcba210", "\xffb", 6},
  178. {"012\x80bcb\x80210", "\xffb", 7},
  179. {"0123456\xcf\x80abc", "\xcfb\x80", 10},
  180. }
  181. // Execute f on each test case. funcName should be the name of f; it's used
  182. // in failure reports.
  183. func runIndexTests(t *testing.T, f func(s, sep string) int, funcName string, testCases []IndexTest) {
  184. for _, test := range testCases {
  185. actual := f(test.s, test.sep)
  186. if actual != test.out {
  187. t.Errorf("%s(%q,%q) = %v; want %v", funcName, test.s, test.sep, actual, test.out)
  188. }
  189. }
  190. }
  191. func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
  192. func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
  193. func TestIndexAny(t *testing.T) { runIndexTests(t, IndexAny, "IndexAny", indexAnyTests) }
  194. func TestLastIndexAny(t *testing.T) {
  195. runIndexTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests)
  196. }
  197. func TestIndexByte(t *testing.T) {
  198. for _, tt := range indexTests {
  199. if len(tt.sep) != 1 {
  200. continue
  201. }
  202. pos := IndexByte(tt.s, tt.sep[0])
  203. if pos != tt.out {
  204. t.Errorf(`IndexByte(%q, %q) = %v; want %v`, tt.s, tt.sep[0], pos, tt.out)
  205. }
  206. }
  207. }
  208. func TestLastIndexByte(t *testing.T) {
  209. testCases := []IndexTest{
  210. {"", "q", -1},
  211. {"abcdef", "q", -1},
  212. {"abcdefabcdef", "a", len("abcdef")}, // something in the middle
  213. {"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
  214. {"zabcdefabcdef", "z", 0}, // first byte
  215. {"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
  216. }
  217. for _, test := range testCases {
  218. actual := LastIndexByte(test.s, test.sep[0])
  219. if actual != test.out {
  220. t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.s, test.sep[0], actual, test.out)
  221. }
  222. }
  223. }
  224. func simpleIndex(s, sep string) int {
  225. n := len(sep)
  226. for i := n; i <= len(s); i++ {
  227. if s[i-n:i] == sep {
  228. return i - n
  229. }
  230. }
  231. return -1
  232. }
  233. func TestIndexRandom(t *testing.T) {
  234. const chars = "abcdefghijklmnopqrstuvwxyz0123456789"
  235. for times := 0; times < 10; times++ {
  236. for strLen := 5 + rand.Intn(5); strLen < 140; strLen += 10 { // Arbitrary
  237. s1 := make([]byte, strLen)
  238. for i := range s1 {
  239. s1[i] = chars[rand.Intn(len(chars))]
  240. }
  241. s := string(s1)
  242. for i := 0; i < 50; i++ {
  243. begin := rand.Intn(len(s) + 1)
  244. end := begin + rand.Intn(len(s)+1-begin)
  245. sep := s[begin:end]
  246. if i%4 == 0 {
  247. pos := rand.Intn(len(sep) + 1)
  248. sep = sep[:pos] + "A" + sep[pos:]
  249. }
  250. want := simpleIndex(s, sep)
  251. res := Index(s, sep)
  252. if res != want {
  253. t.Errorf("Index(%s,%s) = %d; want %d", s, sep, res, want)
  254. }
  255. }
  256. }
  257. }
  258. }
  259. func TestIndexRune(t *testing.T) {
  260. tests := []struct {
  261. in string
  262. rune rune
  263. want int
  264. }{
  265. {"", 'a', -1},
  266. {"", '☺', -1},
  267. {"foo", '☹', -1},
  268. {"foo", 'o', 1},
  269. {"foo☺bar", '☺', 3},
  270. {"foo☺☻☹bar", '☹', 9},
  271. {"a A x", 'A', 2},
  272. {"some_text=some_value", '=', 9},
  273. {"☺a", 'a', 3},
  274. {"a☻☺b", '☺', 4},
  275. // RuneError should match any invalid UTF-8 byte sequence.
  276. {"�", '�', 0},
  277. {"\xff", '�', 0},
  278. {"☻x�", '�', len("☻x")},
  279. {"☻x\xe2\x98", '�', len("☻x")},
  280. {"☻x\xe2\x98�", '�', len("☻x")},
  281. {"☻x\xe2\x98x", '�', len("☻x")},
  282. // Invalid rune values should never match.
  283. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
  284. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
  285. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
  286. }
  287. for _, tt := range tests {
  288. if got := IndexRune(tt.in, tt.rune); got != tt.want {
  289. t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
  290. }
  291. }
  292. haystack := "test世界"
  293. allocs := testing.AllocsPerRun(1000, func() {
  294. if i := IndexRune(haystack, 's'); i != 2 {
  295. t.Fatalf("'s' at %d; want 2", i)
  296. }
  297. if i := IndexRune(haystack, '世'); i != 4 {
  298. t.Fatalf("'世' at %d; want 4", i)
  299. }
  300. })
  301. if runtime.Compiler == "gccgo" {
  302. t.Skip("skipping allocations test for gccgo until escape analysis is enabled")
  303. }
  304. if allocs != 0 && testing.CoverMode() == "" {
  305. t.Errorf("expected no allocations, got %f", allocs)
  306. }
  307. }
  308. const benchmarkString = "some_text=some☺value"
  309. func BenchmarkIndexRune(b *testing.B) {
  310. if got := IndexRune(benchmarkString, '☺'); got != 14 {
  311. b.Fatalf("wrong index: expected 14, got=%d", got)
  312. }
  313. for i := 0; i < b.N; i++ {
  314. IndexRune(benchmarkString, '☺')
  315. }
  316. }
  317. var benchmarkLongString = Repeat(" ", 100) + benchmarkString
  318. func BenchmarkIndexRuneLongString(b *testing.B) {
  319. if got := IndexRune(benchmarkLongString, '☺'); got != 114 {
  320. b.Fatalf("wrong index: expected 114, got=%d", got)
  321. }
  322. for i := 0; i < b.N; i++ {
  323. IndexRune(benchmarkLongString, '☺')
  324. }
  325. }
  326. func BenchmarkIndexRuneFastPath(b *testing.B) {
  327. if got := IndexRune(benchmarkString, 'v'); got != 17 {
  328. b.Fatalf("wrong index: expected 17, got=%d", got)
  329. }
  330. for i := 0; i < b.N; i++ {
  331. IndexRune(benchmarkString, 'v')
  332. }
  333. }
  334. func BenchmarkIndex(b *testing.B) {
  335. if got := Index(benchmarkString, "v"); got != 17 {
  336. b.Fatalf("wrong index: expected 17, got=%d", got)
  337. }
  338. for i := 0; i < b.N; i++ {
  339. Index(benchmarkString, "v")
  340. }
  341. }
  342. func BenchmarkLastIndex(b *testing.B) {
  343. if got := Index(benchmarkString, "v"); got != 17 {
  344. b.Fatalf("wrong index: expected 17, got=%d", got)
  345. }
  346. for i := 0; i < b.N; i++ {
  347. LastIndex(benchmarkString, "v")
  348. }
  349. }
  350. func BenchmarkIndexByte(b *testing.B) {
  351. if got := IndexByte(benchmarkString, 'v'); got != 17 {
  352. b.Fatalf("wrong index: expected 17, got=%d", got)
  353. }
  354. for i := 0; i < b.N; i++ {
  355. IndexByte(benchmarkString, 'v')
  356. }
  357. }
  358. type SplitTest struct {
  359. s string
  360. sep string
  361. n int
  362. a []string
  363. }
  364. var splittests = []SplitTest{
  365. {"", "", -1, []string{}},
  366. {abcd, "", 2, []string{"a", "bcd"}},
  367. {abcd, "", 4, []string{"a", "b", "c", "d"}},
  368. {abcd, "", -1, []string{"a", "b", "c", "d"}},
  369. {faces, "", -1, []string{"☺", "☻", "☹"}},
  370. {faces, "", 3, []string{"☺", "☻", "☹"}},
  371. {faces, "", 17, []string{"☺", "☻", "☹"}},
  372. {"☺�☹", "", -1, []string{"☺", "�", "☹"}},
  373. {abcd, "a", 0, nil},
  374. {abcd, "a", -1, []string{"", "bcd"}},
  375. {abcd, "z", -1, []string{"abcd"}},
  376. {commas, ",", -1, []string{"1", "2", "3", "4"}},
  377. {dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
  378. {faces, "☹", -1, []string{"☺☻", ""}},
  379. {faces, "~", -1, []string{faces}},
  380. {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
  381. {"1 2", " ", 3, []string{"1", "2"}},
  382. }
  383. func TestSplit(t *testing.T) {
  384. for _, tt := range splittests {
  385. a := SplitN(tt.s, tt.sep, tt.n)
  386. if !eq(a, tt.a) {
  387. t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a)
  388. continue
  389. }
  390. if tt.n == 0 {
  391. continue
  392. }
  393. s := Join(a, tt.sep)
  394. if s != tt.s {
  395. t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s)
  396. }
  397. if tt.n < 0 {
  398. b := Split(tt.s, tt.sep)
  399. if !reflect.DeepEqual(a, b) {
  400. t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
  401. }
  402. }
  403. }
  404. }
  405. var splitaftertests = []SplitTest{
  406. {abcd, "a", -1, []string{"a", "bcd"}},
  407. {abcd, "z", -1, []string{"abcd"}},
  408. {abcd, "", -1, []string{"a", "b", "c", "d"}},
  409. {commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
  410. {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
  411. {faces, "☹", -1, []string{"☺☻☹", ""}},
  412. {faces, "~", -1, []string{faces}},
  413. {faces, "", -1, []string{"☺", "☻", "☹"}},
  414. {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
  415. {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
  416. {"1 2", " ", 3, []string{"1 ", "2"}},
  417. {"123", "", 2, []string{"1", "23"}},
  418. {"123", "", 17, []string{"1", "2", "3"}},
  419. }
  420. func TestSplitAfter(t *testing.T) {
  421. for _, tt := range splitaftertests {
  422. a := SplitAfterN(tt.s, tt.sep, tt.n)
  423. if !eq(a, tt.a) {
  424. t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, a, tt.a)
  425. continue
  426. }
  427. s := Join(a, "")
  428. if s != tt.s {
  429. t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
  430. }
  431. if tt.n < 0 {
  432. b := SplitAfter(tt.s, tt.sep)
  433. if !reflect.DeepEqual(a, b) {
  434. t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
  435. }
  436. }
  437. }
  438. }
  439. type FieldsTest struct {
  440. s string
  441. a []string
  442. }
  443. var fieldstests = []FieldsTest{
  444. {"", []string{}},
  445. {" ", []string{}},
  446. {" \t ", []string{}},
  447. {"\u2000", []string{}},
  448. {" abc ", []string{"abc"}},
  449. {"1 2 3 4", []string{"1", "2", "3", "4"}},
  450. {"1 2 3 4", []string{"1", "2", "3", "4"}},
  451. {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
  452. {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
  453. {"\u2000\u2001\u2002", []string{}},
  454. {"\n™\t™\n", []string{"™", "™"}},
  455. {"\n\u20001™2\u2000 \u2001 ™", []string{"1™2", "™"}},
  456. {"\n1\uFFFD \uFFFD2\u20003\uFFFD4", []string{"1\uFFFD", "\uFFFD2", "3\uFFFD4"}},
  457. {"1\xFF\u2000\xFF2\xFF \xFF", []string{"1\xFF", "\xFF2\xFF", "\xFF"}},
  458. {faces, []string{faces}},
  459. }
  460. func TestFields(t *testing.T) {
  461. for _, tt := range fieldstests {
  462. a := Fields(tt.s)
  463. if !eq(a, tt.a) {
  464. t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
  465. continue
  466. }
  467. }
  468. }
  469. var FieldsFuncTests = []FieldsTest{
  470. {"", []string{}},
  471. {"XX", []string{}},
  472. {"XXhiXXX", []string{"hi"}},
  473. {"aXXbXXXcX", []string{"a", "b", "c"}},
  474. }
  475. func TestFieldsFunc(t *testing.T) {
  476. for _, tt := range fieldstests {
  477. a := FieldsFunc(tt.s, unicode.IsSpace)
  478. if !eq(a, tt.a) {
  479. t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
  480. continue
  481. }
  482. }
  483. pred := func(c rune) bool { return c == 'X' }
  484. for _, tt := range FieldsFuncTests {
  485. a := FieldsFunc(tt.s, pred)
  486. if !eq(a, tt.a) {
  487. t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
  488. }
  489. }
  490. }
  491. // Test case for any function which accepts and returns a single string.
  492. type StringTest struct {
  493. in, out string
  494. }
  495. // Execute f on each test case. funcName should be the name of f; it's used
  496. // in failure reports.
  497. func runStringTests(t *testing.T, f func(string) string, funcName string, testCases []StringTest) {
  498. for _, tc := range testCases {
  499. actual := f(tc.in)
  500. if actual != tc.out {
  501. t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
  502. }
  503. }
  504. }
  505. var upperTests = []StringTest{
  506. {"", ""},
  507. {"ONLYUPPER", "ONLYUPPER"},
  508. {"abc", "ABC"},
  509. {"AbC123", "ABC123"},
  510. {"azAZ09_", "AZAZ09_"},
  511. {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
  512. {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
  513. {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
  514. {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
  515. }
  516. var lowerTests = []StringTest{
  517. {"", ""},
  518. {"abc", "abc"},
  519. {"AbC123", "abc123"},
  520. {"azAZ09_", "azaz09_"},
  521. {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
  522. {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
  523. {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
  524. {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
  525. }
  526. const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
  527. var trimSpaceTests = []StringTest{
  528. {"", ""},
  529. {"abc", "abc"},
  530. {space + "abc" + space, "abc"},
  531. {" ", ""},
  532. {" \t\r\n \t\t\r\r\n\n ", ""},
  533. {" \t\r\n x\t\t\r\r\n\n ", "x"},
  534. {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
  535. {"1 \t\r\n2", "1 \t\r\n2"},
  536. {" x\x80", "x\x80"},
  537. {" x\xc0", "x\xc0"},
  538. {"x \xc0\xc0 ", "x \xc0\xc0"},
  539. {"x \xc0", "x \xc0"},
  540. {"x \xc0 ", "x \xc0"},
  541. {"x \xc0\xc0 ", "x \xc0\xc0"},
  542. {"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
  543. {"x ☺ ", "x ☺"},
  544. }
  545. func tenRunes(ch rune) string {
  546. r := make([]rune, 10)
  547. for i := range r {
  548. r[i] = ch
  549. }
  550. return string(r)
  551. }
  552. // User-defined self-inverse mapping function
  553. func rot13(r rune) rune {
  554. step := rune(13)
  555. if r >= 'a' && r <= 'z' {
  556. return ((r - 'a' + step) % 26) + 'a'
  557. }
  558. if r >= 'A' && r <= 'Z' {
  559. return ((r - 'A' + step) % 26) + 'A'
  560. }
  561. return r
  562. }
  563. func TestMap(t *testing.T) {
  564. // Run a couple of awful growth/shrinkage tests
  565. a := tenRunes('a')
  566. // 1. Grow. This triggers two reallocations in Map.
  567. maxRune := func(rune) rune { return unicode.MaxRune }
  568. m := Map(maxRune, a)
  569. expect := tenRunes(unicode.MaxRune)
  570. if m != expect {
  571. t.Errorf("growing: expected %q got %q", expect, m)
  572. }
  573. // 2. Shrink
  574. minRune := func(rune) rune { return 'a' }
  575. m = Map(minRune, tenRunes(unicode.MaxRune))
  576. expect = a
  577. if m != expect {
  578. t.Errorf("shrinking: expected %q got %q", expect, m)
  579. }
  580. // 3. Rot13
  581. m = Map(rot13, "a to zed")
  582. expect = "n gb mrq"
  583. if m != expect {
  584. t.Errorf("rot13: expected %q got %q", expect, m)
  585. }
  586. // 4. Rot13^2
  587. m = Map(rot13, Map(rot13, "a to zed"))
  588. expect = "a to zed"
  589. if m != expect {
  590. t.Errorf("rot13: expected %q got %q", expect, m)
  591. }
  592. // 5. Drop
  593. dropNotLatin := func(r rune) rune {
  594. if unicode.Is(unicode.Latin, r) {
  595. return r
  596. }
  597. return -1
  598. }
  599. m = Map(dropNotLatin, "Hello, 세계")
  600. expect = "Hello"
  601. if m != expect {
  602. t.Errorf("drop: expected %q got %q", expect, m)
  603. }
  604. // 6. Identity
  605. identity := func(r rune) rune {
  606. return r
  607. }
  608. orig := "Input string that we expect not to be copied."
  609. m = Map(identity, orig)
  610. if (*reflect.StringHeader)(unsafe.Pointer(&orig)).Data !=
  611. (*reflect.StringHeader)(unsafe.Pointer(&m)).Data {
  612. t.Error("unexpected copy during identity map")
  613. }
  614. // 7. Handle invalid UTF-8 sequence
  615. replaceNotLatin := func(r rune) rune {
  616. if unicode.Is(unicode.Latin, r) {
  617. return r
  618. }
  619. return utf8.RuneError
  620. }
  621. m = Map(replaceNotLatin, "Hello\255World")
  622. expect = "Hello\uFFFDWorld"
  623. if m != expect {
  624. t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
  625. }
  626. // 8. Check utf8.RuneSelf and utf8.MaxRune encoding
  627. encode := func(r rune) rune {
  628. switch r {
  629. case utf8.RuneSelf:
  630. return unicode.MaxRune
  631. case unicode.MaxRune:
  632. return utf8.RuneSelf
  633. }
  634. return r
  635. }
  636. s := string(rune(utf8.RuneSelf)) + string(utf8.MaxRune)
  637. r := string(utf8.MaxRune) + string(rune(utf8.RuneSelf)) // reverse of s
  638. m = Map(encode, s)
  639. if m != r {
  640. t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
  641. }
  642. m = Map(encode, r)
  643. if m != s {
  644. t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
  645. }
  646. // 9. Check mapping occurs in the front, middle and back
  647. trimSpaces := func(r rune) rune {
  648. if unicode.IsSpace(r) {
  649. return -1
  650. }
  651. return r
  652. }
  653. m = Map(trimSpaces, " abc 123 ")
  654. expect = "abc123"
  655. if m != expect {
  656. t.Errorf("trimSpaces: expected %q got %q", expect, m)
  657. }
  658. }
  659. func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
  660. func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
  661. var toValidUTF8Tests = []struct {
  662. in string
  663. repl string
  664. out string
  665. }{
  666. {"", "\uFFFD", ""},
  667. {"abc", "\uFFFD", "abc"},
  668. {"\uFDDD", "\uFFFD", "\uFDDD"},
  669. {"a\xffb", "\uFFFD", "a\uFFFDb"},
  670. {"a\xffb\uFFFD", "X", "aXb\uFFFD"},
  671. {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
  672. {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
  673. {"\xC0\xAF", "\uFFFD", "\uFFFD"},
  674. {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
  675. {"\xed\xa0\x80", "abc", "abc"},
  676. {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
  677. {"\xF0\x80\x80\xaf", "☺", "☺"},
  678. {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
  679. {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
  680. }
  681. func TestToValidUTF8(t *testing.T) {
  682. for _, tc := range toValidUTF8Tests {
  683. got := ToValidUTF8(tc.in, tc.repl)
  684. if got != tc.out {
  685. t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
  686. }
  687. }
  688. }
  689. func BenchmarkToUpper(b *testing.B) {
  690. for _, tc := range upperTests {
  691. b.Run(tc.in, func(b *testing.B) {
  692. for i := 0; i < b.N; i++ {
  693. actual := ToUpper(tc.in)
  694. if actual != tc.out {
  695. b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
  696. }
  697. }
  698. })
  699. }
  700. }
  701. func BenchmarkToLower(b *testing.B) {
  702. for _, tc := range lowerTests {
  703. b.Run(tc.in, func(b *testing.B) {
  704. for i := 0; i < b.N; i++ {
  705. actual := ToLower(tc.in)
  706. if actual != tc.out {
  707. b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
  708. }
  709. }
  710. })
  711. }
  712. }
  713. func BenchmarkMapNoChanges(b *testing.B) {
  714. identity := func(r rune) rune {
  715. return r
  716. }
  717. for i := 0; i < b.N; i++ {
  718. Map(identity, "Some string that won't be modified.")
  719. }
  720. }
  721. func TestSpecialCase(t *testing.T) {
  722. lower := "abcçdefgğhıijklmnoöprsştuüvyz"
  723. upper := "ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ"
  724. u := ToUpperSpecial(unicode.TurkishCase, upper)
  725. if u != upper {
  726. t.Errorf("Upper(upper) is %s not %s", u, upper)
  727. }
  728. u = ToUpperSpecial(unicode.TurkishCase, lower)
  729. if u != upper {
  730. t.Errorf("Upper(lower) is %s not %s", u, upper)
  731. }
  732. l := ToLowerSpecial(unicode.TurkishCase, lower)
  733. if l != lower {
  734. t.Errorf("Lower(lower) is %s not %s", l, lower)
  735. }
  736. l = ToLowerSpecial(unicode.TurkishCase, upper)
  737. if l != lower {
  738. t.Errorf("Lower(upper) is %s not %s", l, lower)
  739. }
  740. }
  741. func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
  742. var trimTests = []struct {
  743. f string
  744. in, arg, out string
  745. }{
  746. {"Trim", "abba", "a", "bb"},
  747. {"Trim", "abba", "ab", ""},
  748. {"TrimLeft", "abba", "ab", ""},
  749. {"TrimRight", "abba", "ab", ""},
  750. {"TrimLeft", "abba", "a", "bba"},
  751. {"TrimLeft", "abba", "b", "abba"},
  752. {"TrimRight", "abba", "a", "abb"},
  753. {"TrimRight", "abba", "b", "abba"},
  754. {"Trim", "<tag>", "<>", "tag"},
  755. {"Trim", "* listitem", " *", "listitem"},
  756. {"Trim", `"quote"`, `"`, "quote"},
  757. {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
  758. {"Trim", "\x80test\xff", "\xff", "test"},
  759. {"Trim", " Ġ ", " ", "Ġ"},
  760. {"Trim", " Ġİ0", "0 ", "Ġİ"},
  761. //empty string tests
  762. {"Trim", "abba", "", "abba"},
  763. {"Trim", "", "123", ""},
  764. {"Trim", "", "", ""},
  765. {"TrimLeft", "abba", "", "abba"},
  766. {"TrimLeft", "", "123", ""},
  767. {"TrimLeft", "", "", ""},
  768. {"TrimRight", "abba", "", "abba"},
  769. {"TrimRight", "", "123", ""},
  770. {"TrimRight", "", "", ""},
  771. {"TrimRight", "☺\xc0", "☺", "☺\xc0"},
  772. {"TrimPrefix", "aabb", "a", "abb"},
  773. {"TrimPrefix", "aabb", "b", "aabb"},
  774. {"TrimSuffix", "aabb", "a", "aabb"},
  775. {"TrimSuffix", "aabb", "b", "aab"},
  776. }
  777. func TestTrim(t *testing.T) {
  778. for _, tc := range trimTests {
  779. name := tc.f
  780. var f func(string, string) string
  781. switch name {
  782. case "Trim":
  783. f = Trim
  784. case "TrimLeft":
  785. f = TrimLeft
  786. case "TrimRight":
  787. f = TrimRight
  788. case "TrimPrefix":
  789. f = TrimPrefix
  790. case "TrimSuffix":
  791. f = TrimSuffix
  792. default:
  793. t.Errorf("Undefined trim function %s", name)
  794. }
  795. actual := f(tc.in, tc.arg)
  796. if actual != tc.out {
  797. t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
  798. }
  799. }
  800. }
  801. func BenchmarkTrim(b *testing.B) {
  802. b.ReportAllocs()
  803. for i := 0; i < b.N; i++ {
  804. for _, tc := range trimTests {
  805. name := tc.f
  806. var f func(string, string) string
  807. switch name {
  808. case "Trim":
  809. f = Trim
  810. case "TrimLeft":
  811. f = TrimLeft
  812. case "TrimRight":
  813. f = TrimRight
  814. case "TrimPrefix":
  815. f = TrimPrefix
  816. case "TrimSuffix":
  817. f = TrimSuffix
  818. default:
  819. b.Errorf("Undefined trim function %s", name)
  820. }
  821. actual := f(tc.in, tc.arg)
  822. if actual != tc.out {
  823. b.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
  824. }
  825. }
  826. }
  827. }
  828. func BenchmarkToValidUTF8(b *testing.B) {
  829. tests := []struct {
  830. name string
  831. input string
  832. }{
  833. {"Valid", "typical"},
  834. {"InvalidASCII", "foo\xffbar"},
  835. {"InvalidNonASCII", "日本語\xff日本語"},
  836. }
  837. replacement := "\uFFFD"
  838. b.ResetTimer()
  839. for _, test := range tests {
  840. b.Run(test.name, func(b *testing.B) {
  841. for i := 0; i < b.N; i++ {
  842. ToValidUTF8(test.input, replacement)
  843. }
  844. })
  845. }
  846. }
  847. type predicate struct {
  848. f func(rune) bool
  849. name string
  850. }
  851. var isSpace = predicate{unicode.IsSpace, "IsSpace"}
  852. var isDigit = predicate{unicode.IsDigit, "IsDigit"}
  853. var isUpper = predicate{unicode.IsUpper, "IsUpper"}
  854. var isValidRune = predicate{
  855. func(r rune) bool {
  856. return r != utf8.RuneError
  857. },
  858. "IsValidRune",
  859. }
  860. func not(p predicate) predicate {
  861. return predicate{
  862. func(r rune) bool {
  863. return !p.f(r)
  864. },
  865. "not " + p.name,
  866. }
  867. }
  868. var trimFuncTests = []struct {
  869. f predicate
  870. in string
  871. trimOut string
  872. leftOut string
  873. rightOut string
  874. }{
  875. {isSpace, space + " hello " + space,
  876. "hello",
  877. "hello " + space,
  878. space + " hello"},
  879. {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
  880. "hello",
  881. "hello34\u0e50\u0e51",
  882. "\u0e50\u0e5212hello"},
  883. {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
  884. "hello",
  885. "helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
  886. "\u2C6F\u2C6F\u2C6F\u2C6FABCDhello"},
  887. {not(isSpace), "hello" + space + "hello",
  888. space,
  889. space + "hello",
  890. "hello" + space},
  891. {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
  892. "\u0e50\u0e521234\u0e50\u0e51",
  893. "\u0e50\u0e521234\u0e50\u0e51helo",
  894. "hello\u0e50\u0e521234\u0e50\u0e51"},
  895. {isValidRune, "ab\xc0a\xc0cd",
  896. "\xc0a\xc0",
  897. "\xc0a\xc0cd",
  898. "ab\xc0a\xc0"},
  899. {not(isValidRune), "\xc0a\xc0",
  900. "a",
  901. "a\xc0",
  902. "\xc0a"},
  903. {isSpace, "",
  904. "",
  905. "",
  906. ""},
  907. {isSpace, " ",
  908. "",
  909. "",
  910. ""},
  911. }
  912. func TestTrimFunc(t *testing.T) {
  913. for _, tc := range trimFuncTests {
  914. trimmers := []struct {
  915. name string
  916. trim func(s string, f func(r rune) bool) string
  917. out string
  918. }{
  919. {"TrimFunc", TrimFunc, tc.trimOut},
  920. {"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
  921. {"TrimRightFunc", TrimRightFunc, tc.rightOut},
  922. }
  923. for _, trimmer := range trimmers {
  924. actual := trimmer.trim(tc.in, tc.f.f)
  925. if actual != trimmer.out {
  926. t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
  927. }
  928. }
  929. }
  930. }
  931. var indexFuncTests = []struct {
  932. in string
  933. f predicate
  934. first, last int
  935. }{
  936. {"", isValidRune, -1, -1},
  937. {"abc", isDigit, -1, -1},
  938. {"0123", isDigit, 0, 3},
  939. {"a1b", isDigit, 1, 1},
  940. {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
  941. {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
  942. {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
  943. {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
  944. // tests of invalid UTF-8
  945. {"\x801", isDigit, 1, 1},
  946. {"\x80abc", isDigit, -1, -1},
  947. {"\xc0a\xc0", isValidRune, 1, 1},
  948. {"\xc0a\xc0", not(isValidRune), 0, 2},
  949. {"\xc0☺\xc0", not(isValidRune), 0, 4},
  950. {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
  951. {"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
  952. {"a\xe0\x80cd", not(isValidRune), 1, 2},
  953. {"\x80\x80\x80\x80", not(isValidRune), 0, 3},
  954. }
  955. func TestIndexFunc(t *testing.T) {
  956. for _, tc := range indexFuncTests {
  957. first := IndexFunc(tc.in, tc.f.f)
  958. if first != tc.first {
  959. t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
  960. }
  961. last := LastIndexFunc(tc.in, tc.f.f)
  962. if last != tc.last {
  963. t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
  964. }
  965. }
  966. }
  967. func equal(m string, s1, s2 string, t *testing.T) bool {
  968. if s1 == s2 {
  969. return true
  970. }
  971. e1 := Split(s1, "")
  972. e2 := Split(s2, "")
  973. for i, c1 := range e1 {
  974. if i >= len(e2) {
  975. break
  976. }
  977. r1, _ := utf8.DecodeRuneInString(c1)
  978. r2, _ := utf8.DecodeRuneInString(e2[i])
  979. if r1 != r2 {
  980. t.Errorf("%s diff at %d: U+%04X U+%04X", m, i, r1, r2)
  981. }
  982. }
  983. return false
  984. }
  985. func TestCaseConsistency(t *testing.T) {
  986. // Make a string of all the runes.
  987. numRunes := int(unicode.MaxRune + 1)
  988. if testing.Short() {
  989. numRunes = 1000
  990. }
  991. a := make([]rune, numRunes)
  992. for i := range a {
  993. a[i] = rune(i)
  994. }
  995. s := string(a)
  996. // convert the cases.
  997. upper := ToUpper(s)
  998. lower := ToLower(s)
  999. // Consistency checks
  1000. if n := utf8.RuneCountInString(upper); n != numRunes {
  1001. t.Error("rune count wrong in upper:", n)
  1002. }
  1003. if n := utf8.RuneCountInString(lower); n != numRunes {
  1004. t.Error("rune count wrong in lower:", n)
  1005. }
  1006. if !equal("ToUpper(upper)", ToUpper(upper), upper, t) {
  1007. t.Error("ToUpper(upper) consistency fail")
  1008. }
  1009. if !equal("ToLower(lower)", ToLower(lower), lower, t) {
  1010. t.Error("ToLower(lower) consistency fail")
  1011. }
  1012. /*
  1013. These fail because of non-one-to-oneness of the data, such as multiple
  1014. upper case 'I' mapping to 'i'. We comment them out but keep them for
  1015. interest.
  1016. For instance: CAPITAL LETTER I WITH DOT ABOVE:
  1017. unicode.ToUpper(unicode.ToLower('\u0130')) != '\u0130'
  1018. if !equal("ToUpper(lower)", ToUpper(lower), upper, t) {
  1019. t.Error("ToUpper(lower) consistency fail");
  1020. }
  1021. if !equal("ToLower(upper)", ToLower(upper), lower, t) {
  1022. t.Error("ToLower(upper) consistency fail");
  1023. }
  1024. */
  1025. }
  1026. var RepeatTests = []struct {
  1027. in, out string
  1028. count int
  1029. }{
  1030. {"", "", 0},
  1031. {"", "", 1},
  1032. {"", "", 2},
  1033. {"-", "", 0},
  1034. {"-", "-", 1},
  1035. {"-", "----------", 10},
  1036. {"abc ", "abc abc abc ", 3},
  1037. }
  1038. func TestRepeat(t *testing.T) {
  1039. for _, tt := range RepeatTests {
  1040. a := Repeat(tt.in, tt.count)
  1041. if !equal("Repeat(s)", a, tt.out, t) {
  1042. t.Errorf("Repeat(%v, %d) = %v; want %v", tt.in, tt.count, a, tt.out)
  1043. continue
  1044. }
  1045. }
  1046. }
  1047. func repeat(s string, count int) (err error) {
  1048. defer func() {
  1049. if r := recover(); r != nil {
  1050. switch v := r.(type) {
  1051. case error:
  1052. err = v
  1053. default:
  1054. err = fmt.Errorf("%s", v)
  1055. }
  1056. }
  1057. }()
  1058. Repeat(s, count)
  1059. return
  1060. }
  1061. // See Issue golang.org/issue/16237
  1062. func TestRepeatCatchesOverflow(t *testing.T) {
  1063. tests := [...]struct {
  1064. s string
  1065. count int
  1066. errStr string
  1067. }{
  1068. 0: {"--", -2147483647, "negative"},
  1069. 1: {"", int(^uint(0) >> 1), ""},
  1070. 2: {"-", 10, ""},
  1071. 3: {"gopher", 0, ""},
  1072. 4: {"-", -1, "negative"},
  1073. 5: {"--", -102, "negative"},
  1074. 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
  1075. }
  1076. for i, tt := range tests {
  1077. err := repeat(tt.s, tt.count)
  1078. if tt.errStr == "" {
  1079. if err != nil {
  1080. t.Errorf("#%d panicked %v", i, err)
  1081. }
  1082. continue
  1083. }
  1084. if err == nil || !Contains(err.Error(), tt.errStr) {
  1085. t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
  1086. }
  1087. }
  1088. }
  1089. func runesEqual(a, b []rune) bool {
  1090. if len(a) != len(b) {
  1091. return false
  1092. }
  1093. for i, r := range a {
  1094. if r != b[i] {
  1095. return false
  1096. }
  1097. }
  1098. return true
  1099. }
  1100. var RunesTests = []struct {
  1101. in string
  1102. out []rune
  1103. lossy bool
  1104. }{
  1105. {"", []rune{}, false},
  1106. {" ", []rune{32}, false},
  1107. {"ABC", []rune{65, 66, 67}, false},
  1108. {"abc", []rune{97, 98, 99}, false},
  1109. {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
  1110. {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
  1111. {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
  1112. }
  1113. func TestRunes(t *testing.T) {
  1114. for _, tt := range RunesTests {
  1115. a := []rune(tt.in)
  1116. if !runesEqual(a, tt.out) {
  1117. t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out)
  1118. continue
  1119. }
  1120. if !tt.lossy {
  1121. // can only test reassembly if we didn't lose information
  1122. s := string(a)
  1123. if s != tt.in {
  1124. t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in)
  1125. }
  1126. }
  1127. }
  1128. }
  1129. func TestReadByte(t *testing.T) {
  1130. testStrings := []string{"", abcd, faces, commas}
  1131. for _, s := range testStrings {
  1132. reader := NewReader(s)
  1133. if e := reader.UnreadByte(); e == nil {
  1134. t.Errorf("Unreading %q at beginning: expected error", s)
  1135. }
  1136. var res bytes.Buffer
  1137. for {
  1138. b, e := reader.ReadByte()
  1139. if e == io.EOF {
  1140. break
  1141. }
  1142. if e != nil {
  1143. t.Errorf("Reading %q: %s", s, e)
  1144. break
  1145. }
  1146. res.WriteByte(b)
  1147. // unread and read again
  1148. e = reader.UnreadByte()
  1149. if e != nil {
  1150. t.Errorf("Unreading %q: %s", s, e)
  1151. break
  1152. }
  1153. b1, e := reader.ReadByte()
  1154. if e != nil {
  1155. t.Errorf("Reading %q after unreading: %s", s, e)
  1156. break
  1157. }
  1158. if b1 != b {
  1159. t.Errorf("Reading %q after unreading: want byte %q, got %q", s, b, b1)
  1160. break
  1161. }
  1162. }
  1163. if res.String() != s {
  1164. t.Errorf("Reader(%q).ReadByte() produced %q", s, res.String())
  1165. }
  1166. }
  1167. }
  1168. func TestReadRune(t *testing.T) {
  1169. testStrings := []string{"", abcd, faces, commas}
  1170. for _, s := range testStrings {
  1171. reader := NewReader(s)
  1172. if e := reader.UnreadRune(); e == nil {
  1173. t.Errorf("Unreading %q at beginning: expected error", s)
  1174. }
  1175. res := ""
  1176. for {
  1177. r, z, e := reader.ReadRune()
  1178. if e == io.EOF {
  1179. break
  1180. }
  1181. if e != nil {
  1182. t.Errorf("Reading %q: %s", s, e)
  1183. break
  1184. }
  1185. res += string(r)
  1186. // unread and read again
  1187. e = reader.UnreadRune()
  1188. if e != nil {
  1189. t.Errorf("Unreading %q: %s", s, e)
  1190. break
  1191. }
  1192. r1, z1, e := reader.ReadRune()
  1193. if e != nil {
  1194. t.Errorf("Reading %q after unreading: %s", s, e)
  1195. break
  1196. }
  1197. if r1 != r {
  1198. t.Errorf("Reading %q after unreading: want rune %q, got %q", s, r, r1)
  1199. break
  1200. }
  1201. if z1 != z {
  1202. t.Errorf("Reading %q after unreading: want size %d, got %d", s, z, z1)
  1203. break
  1204. }
  1205. }
  1206. if res != s {
  1207. t.Errorf("Reader(%q).ReadRune() produced %q", s, res)
  1208. }
  1209. }
  1210. }
  1211. var UnreadRuneErrorTests = []struct {
  1212. name string
  1213. f func(*Reader)
  1214. }{
  1215. {"Read", func(r *Reader) { r.Read([]byte{0}) }},
  1216. {"ReadByte", func(r *Reader) { r.ReadByte() }},
  1217. {"UnreadRune", func(r *Reader) { r.UnreadRune() }},
  1218. {"Seek", func(r *Reader) { r.Seek(0, io.SeekCurrent) }},
  1219. {"WriteTo", func(r *Reader) { r.WriteTo(&bytes.Buffer{}) }},
  1220. }
  1221. func TestUnreadRuneError(t *testing.T) {
  1222. for _, tt := range UnreadRuneErrorTests {
  1223. reader := NewReader("0123456789")
  1224. if _, _, err := reader.ReadRune(); err != nil {
  1225. // should not happen
  1226. t.Fatal(err)
  1227. }
  1228. tt.f(reader)
  1229. err := reader.UnreadRune()
  1230. if err == nil {
  1231. t.Errorf("Unreading after %s: expected error", tt.name)
  1232. }
  1233. }
  1234. }
  1235. var ReplaceTests = []struct {
  1236. in string
  1237. old, new string
  1238. n int
  1239. out string
  1240. }{
  1241. {"hello", "l", "L", 0, "hello"},
  1242. {"hello", "l", "L", -1, "heLLo"},
  1243. {"hello", "x", "X", -1, "hello"},
  1244. {"", "x", "X", -1, ""},
  1245. {"radar", "r", "<r>", -1, "<r>ada<r>"},
  1246. {"", "", "<>", -1, "<>"},
  1247. {"banana", "a", "<>", -1, "b<>n<>n<>"},
  1248. {"banana", "a", "<>", 1, "b<>nana"},
  1249. {"banana", "a", "<>", 1000, "b<>n<>n<>"},
  1250. {"banana", "an", "<>", -1, "b<><>a"},
  1251. {"banana", "ana", "<>", -1, "b<>na"},
  1252. {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
  1253. {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
  1254. {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
  1255. {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
  1256. {"banana", "", "<>", 1, "<>banana"},
  1257. {"banana", "a", "a", -1, "banana"},
  1258. {"banana", "a", "a", 1, "banana"},
  1259. {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
  1260. }
  1261. func TestReplace(t *testing.T) {
  1262. for _, tt := range ReplaceTests {
  1263. if s := Replace(tt.in, tt.old, tt.new, tt.n); s != tt.out {
  1264. t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
  1265. }
  1266. if tt.n == -1 {
  1267. s := ReplaceAll(tt.in, tt.old, tt.new)
  1268. if s != tt.out {
  1269. t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out)
  1270. }
  1271. }
  1272. }
  1273. }
  1274. var TitleTests = []struct {
  1275. in, out string
  1276. }{
  1277. {"", ""},
  1278. {"a", "A"},
  1279. {" aaa aaa aaa ", " Aaa Aaa Aaa "},
  1280. {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
  1281. {"123a456", "123a456"},
  1282. {"double-blind", "Double-Blind"},
  1283. {"ÿøû", "Ÿøû"},
  1284. {"with_underscore", "With_underscore"},
  1285. {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
  1286. }
  1287. func TestTitle(t *testing.T) {
  1288. for _, tt := range TitleTests {
  1289. if s := Title(tt.in); s != tt.out {
  1290. t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
  1291. }
  1292. }
  1293. }
  1294. var ContainsTests = []struct {
  1295. str, substr string
  1296. expected bool
  1297. }{
  1298. {"abc", "bc", true},
  1299. {"abc", "bcd", false},
  1300. {"abc", "", true},
  1301. {"", "a", false},
  1302. // cases to cover code in runtime/asm_amd64.s:indexShortStr
  1303. // 2-byte needle
  1304. {"xxxxxx", "01", false},
  1305. {"01xxxx", "01", true},
  1306. {"xx01xx", "01", true},
  1307. {"xxxx01", "01", true},
  1308. {"01xxxxx"[1:], "01", false},
  1309. {"xxxxx01"[:6], "01", false},
  1310. // 3-byte needle
  1311. {"xxxxxxx", "012", false},
  1312. {"012xxxx", "012", true},
  1313. {"xx012xx", "012", true},
  1314. {"xxxx012", "012", true},
  1315. {"012xxxxx"[1:], "012", false},
  1316. {"xxxxx012"[:7], "012", false},
  1317. // 4-byte needle
  1318. {"xxxxxxxx", "0123", false},
  1319. {"0123xxxx", "0123", true},
  1320. {"xx0123xx", "0123", true},
  1321. {"xxxx0123", "0123", true},
  1322. {"0123xxxxx"[1:], "0123", false},
  1323. {"xxxxx0123"[:8], "0123", false},
  1324. // 5-7-byte needle
  1325. {"xxxxxxxxx", "01234", false},
  1326. {"01234xxxx", "01234", true},
  1327. {"xx01234xx", "01234", true},
  1328. {"xxxx01234", "01234", true},
  1329. {"01234xxxxx"[1:], "01234", false},
  1330. {"xxxxx01234"[:9], "01234", false},
  1331. // 8-byte needle
  1332. {"xxxxxxxxxxxx", "01234567", false},
  1333. {"01234567xxxx", "01234567", true},
  1334. {"xx01234567xx", "01234567", true},
  1335. {"xxxx01234567", "01234567", true},
  1336. {"01234567xxxxx"[1:], "01234567", false},
  1337. {"xxxxx01234567"[:12], "01234567", false},
  1338. // 9-15-byte needle
  1339. {"xxxxxxxxxxxxx", "012345678", false},
  1340. {"012345678xxxx", "012345678", true},
  1341. {"xx012345678xx", "012345678", true},
  1342. {"xxxx012345678", "012345678", true},
  1343. {"012345678xxxxx"[1:], "012345678", false},
  1344. {"xxxxx012345678"[:13], "012345678", false},
  1345. // 16-byte needle
  1346. {"xxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEF", false},
  1347. {"0123456789ABCDEFxxxx", "0123456789ABCDEF", true},
  1348. {"xx0123456789ABCDEFxx", "0123456789ABCDEF", true},
  1349. {"xxxx0123456789ABCDEF", "0123456789ABCDEF", true},
  1350. {"0123456789ABCDEFxxxxx"[1:], "0123456789ABCDEF", false},
  1351. {"xxxxx0123456789ABCDEF"[:20], "0123456789ABCDEF", false},
  1352. // 17-31-byte needle
  1353. {"xxxxxxxxxxxxxxxxxxxxx", "0123456789ABCDEFG", false},
  1354. {"0123456789ABCDEFGxxxx", "0123456789ABCDEFG", true},
  1355. {"xx0123456789ABCDEFGxx", "0123456789ABCDEFG", true},
  1356. {"xxxx0123456789ABCDEFG", "0123456789ABCDEFG", true},
  1357. {"0123456789ABCDEFGxxxxx"[1:], "0123456789ABCDEFG", false},
  1358. {"xxxxx0123456789ABCDEFG"[:21], "0123456789ABCDEFG", false},
  1359. // partial match cases
  1360. {"xx01x", "012", false}, // 3
  1361. {"xx0123x", "01234", false}, // 5-7
  1362. {"xx01234567x", "012345678", false}, // 9-15
  1363. {"xx0123456789ABCDEFx", "0123456789ABCDEFG", false}, // 17-31, issue 15679
  1364. }
  1365. func TestContains(t *testing.T) {
  1366. for _, ct := range ContainsTests {
  1367. if Contains(ct.str, ct.substr) != ct.expected {
  1368. t.Errorf("Contains(%s, %s) = %v, want %v",
  1369. ct.str, ct.substr, !ct.expected, ct.expected)
  1370. }
  1371. }
  1372. }
  1373. var ContainsAnyTests = []struct {
  1374. str, substr string
  1375. expected bool
  1376. }{
  1377. {"", "", false},
  1378. {"", "a", false},
  1379. {"", "abc", false},
  1380. {"a", "", false},
  1381. {"a", "a", true},
  1382. {"aaa", "a", true},
  1383. {"abc", "xyz", false},
  1384. {"abc", "xcz", true},
  1385. {"a☺b☻c☹d", "uvw☻xyz", true},
  1386. {"aRegExp*", ".(|)*+?^$[]", true},
  1387. {dots + dots + dots, " ", false},
  1388. }
  1389. func TestContainsAny(t *testing.T) {
  1390. for _, ct := range ContainsAnyTests {
  1391. if ContainsAny(ct.str, ct.substr) != ct.expected {
  1392. t.Errorf("ContainsAny(%s, %s) = %v, want %v",
  1393. ct.str, ct.substr, !ct.expected, ct.expected)
  1394. }
  1395. }
  1396. }
  1397. var ContainsRuneTests = []struct {
  1398. str string
  1399. r rune
  1400. expected bool
  1401. }{
  1402. {"", 'a', false},
  1403. {"a", 'a', true},
  1404. {"aaa", 'a', true},
  1405. {"abc", 'y', false},
  1406. {"abc", 'c', true},
  1407. {"a☺b☻c☹d", 'x', false},
  1408. {"a☺b☻c☹d", '☻', true},
  1409. {"aRegExp*", '*', true},
  1410. }
  1411. func TestContainsRune(t *testing.T) {
  1412. for _, ct := range ContainsRuneTests {
  1413. if ContainsRune(ct.str, ct.r) != ct.expected {
  1414. t.Errorf("ContainsRune(%q, %q) = %v, want %v",
  1415. ct.str, ct.r, !ct.expected, ct.expected)
  1416. }
  1417. }
  1418. }
  1419. var EqualFoldTests = []struct {
  1420. s, t string
  1421. out bool
  1422. }{
  1423. {"abc", "abc", true},
  1424. {"ABcd", "ABcd", true},
  1425. {"123abc", "123ABC", true},
  1426. {"αβδ", "ΑΒΔ", true},
  1427. {"abc", "xyz", false},
  1428. {"abc", "XYZ", false},
  1429. {"abcdefghijk", "abcdefghijX", false},
  1430. {"abcdefghijk", "abcdefghij\u212A", true},
  1431. {"abcdefghijK", "abcdefghij\u212A", true},
  1432. {"abcdefghijkz", "abcdefghij\u212Ay", false},
  1433. {"abcdefghijKz", "abcdefghij\u212Ay", false},
  1434. {"1", "2", false},
  1435. {"utf-8", "US-ASCII", false},
  1436. }
  1437. func TestEqualFold(t *testing.T) {
  1438. for _, tt := range EqualFoldTests {
  1439. if out := EqualFold(tt.s, tt.t); out != tt.out {
  1440. t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
  1441. }
  1442. if out := EqualFold(tt.t, tt.s); out != tt.out {
  1443. t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
  1444. }
  1445. }
  1446. }
  1447. func BenchmarkEqualFold(b *testing.B) {
  1448. for i := 0; i < b.N; i++ {
  1449. for _, tt := range EqualFoldTests {
  1450. if out := EqualFold(tt.s, tt.t); out != tt.out {
  1451. b.Fatal("wrong result")
  1452. }
  1453. }
  1454. }
  1455. }
  1456. var CountTests = []struct {
  1457. s, sep string
  1458. num int
  1459. }{
  1460. {"", "", 1},
  1461. {"", "notempty", 0},
  1462. {"notempty", "", 9},
  1463. {"smaller", "not smaller", 0},
  1464. {"12345678987654321", "6", 2},
  1465. {"611161116", "6", 3},
  1466. {"notequal", "NotEqual", 0},
  1467. {"equal", "equal", 1},
  1468. {"abc1231231123q", "123", 3},
  1469. {"11111", "11", 2},
  1470. }
  1471. func TestCount(t *testing.T) {
  1472. for _, tt := range CountTests {
  1473. if num := Count(tt.s, tt.sep); num != tt.num {
  1474. t.Errorf("Count(%q, %q) = %d, want %d", tt.s, tt.sep, num, tt.num)
  1475. }
  1476. }
  1477. }
  1478. var cutTests = []struct {
  1479. s, sep string
  1480. before, after string
  1481. found bool
  1482. }{
  1483. {"abc", "b", "a", "c", true},
  1484. {"abc", "a", "", "bc", true},
  1485. {"abc", "c", "ab", "", true},
  1486. {"abc", "abc", "", "", true},
  1487. {"abc", "", "", "abc", true},
  1488. {"abc", "d", "abc", "", false},
  1489. {"", "d", "", "", false},
  1490. {"", "", "", "", true},
  1491. }
  1492. func TestCut(t *testing.T) {
  1493. for _, tt := range cutTests {
  1494. if before, after, found := Cut(tt.s, tt.sep); before != tt.before || after != tt.after || found != tt.found {
  1495. t.Errorf("Cut(%q, %q) = %q, %q, %v, want %q, %q, %v", tt.s, tt.sep, before, after, found, tt.before, tt.after, tt.found)
  1496. }
  1497. }
  1498. }
  1499. func makeBenchInputHard() string {
  1500. tokens := [...]string{
  1501. "<a>", "<p>", "<b>", "<strong>",
  1502. "</a>", "</p>", "</b>", "</strong>",
  1503. "hello", "world",
  1504. }
  1505. x := make([]byte, 0, 1<<20)
  1506. for {
  1507. i := rand.Intn(len(tokens))
  1508. if len(x)+len(tokens[i]) >= 1<<20 {
  1509. break
  1510. }
  1511. x = append(x, tokens[i]...)
  1512. }
  1513. return string(x)
  1514. }
  1515. var benchInputHard = makeBenchInputHard()
  1516. func benchmarkIndexHard(b *testing.B, sep string) {
  1517. for i := 0; i < b.N; i++ {
  1518. Index(benchInputHard, sep)
  1519. }
  1520. }
  1521. func benchmarkLastIndexHard(b *testing.B, sep string) {
  1522. for i := 0; i < b.N; i++ {
  1523. LastIndex(benchInputHard, sep)
  1524. }
  1525. }
  1526. func benchmarkCountHard(b *testing.B, sep string) {
  1527. for i := 0; i < b.N; i++ {
  1528. Count(benchInputHard, sep)
  1529. }
  1530. }
  1531. func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, "<>") }
  1532. func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, "</pre>") }
  1533. func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, "<b>hello world</b>") }
  1534. func BenchmarkIndexHard4(b *testing.B) {
  1535. benchmarkIndexHard(b, "<pre><b>hello</b><strong>world</strong></pre>")
  1536. }
  1537. func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, "<>") }
  1538. func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, "</pre>") }
  1539. func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, "<b>hello world</b>") }
  1540. func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, "<>") }
  1541. func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, "</pre>") }
  1542. func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, "<b>hello world</b>") }
  1543. var benchInputTorture = Repeat("ABC", 1<<10) + "123" + Repeat("ABC", 1<<10)
  1544. var benchNeedleTorture = Repeat("ABC", 1<<10+1)
  1545. func BenchmarkIndexTorture(b *testing.B) {
  1546. for i := 0; i < b.N; i++ {
  1547. Index(benchInputTorture, benchNeedleTorture)
  1548. }
  1549. }
  1550. func BenchmarkCountTorture(b *testing.B) {
  1551. for i := 0; i < b.N; i++ {
  1552. Count(benchInputTorture, benchNeedleTorture)
  1553. }
  1554. }
  1555. func BenchmarkCountTortureOverlapping(b *testing.B) {
  1556. A := Repeat("ABC", 1<<20)
  1557. B := Repeat("ABC", 1<<10)
  1558. for i := 0; i < b.N; i++ {
  1559. Count(A, B)
  1560. }
  1561. }
  1562. func BenchmarkCountByte(b *testing.B) {
  1563. indexSizes := []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
  1564. benchStr := Repeat(benchmarkString,
  1565. (indexSizes[len(indexSizes)-1]+len(benchmarkString)-1)/len(benchmarkString))
  1566. benchFunc := func(b *testing.B, benchStr string) {
  1567. b.SetBytes(int64(len(benchStr)))
  1568. for i := 0; i < b.N; i++ {
  1569. Count(benchStr, "=")
  1570. }
  1571. }
  1572. for _, size := range indexSizes {
  1573. b.Run(fmt.Sprintf("%d", size), func(b *testing.B) {
  1574. benchFunc(b, benchStr[:size])
  1575. })
  1576. }
  1577. }
  1578. var makeFieldsInput = func() string {
  1579. x := make([]byte, 1<<20)
  1580. // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
  1581. for i := range x {
  1582. switch rand.Intn(10) {
  1583. case 0:
  1584. x[i] = ' '
  1585. case 1:
  1586. if i > 0 && x[i-1] == 'x' {
  1587. copy(x[i-1:], "χ")
  1588. break
  1589. }
  1590. fallthrough
  1591. default:
  1592. x[i] = 'x'
  1593. }
  1594. }
  1595. return string(x)
  1596. }
  1597. var makeFieldsInputASCII = func() string {
  1598. x := make([]byte, 1<<20)
  1599. // Input is ~10% space, rest ASCII non-space.
  1600. for i := range x {
  1601. if rand.Intn(10) == 0 {
  1602. x[i] = ' '
  1603. } else {
  1604. x[i] = 'x'
  1605. }
  1606. }
  1607. return string(x)
  1608. }
  1609. var stringdata = []struct{ name, data string }{
  1610. {"ASCII", makeFieldsInputASCII()},
  1611. {"Mixed", makeFieldsInput()},
  1612. }
  1613. func BenchmarkFields(b *testing.B) {
  1614. for _, sd := range stringdata {
  1615. b.Run(sd.name, func(b *testing.B) {
  1616. for j := 1 << 4; j <= 1<<20; j <<= 4 {
  1617. b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
  1618. b.ReportAllocs()
  1619. b.SetBytes(int64(j))
  1620. data := sd.data[:j]
  1621. for i := 0; i < b.N; i++ {
  1622. Fields(data)
  1623. }
  1624. })
  1625. }
  1626. })
  1627. }
  1628. }
  1629. func BenchmarkFieldsFunc(b *testing.B) {
  1630. for _, sd := range stringdata {
  1631. b.Run(sd.name, func(b *testing.B) {
  1632. for j := 1 << 4; j <= 1<<20; j <<= 4 {
  1633. b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
  1634. b.ReportAllocs()
  1635. b.SetBytes(int64(j))
  1636. data := sd.data[:j]
  1637. for i := 0; i < b.N; i++ {
  1638. FieldsFunc(data, unicode.IsSpace)
  1639. }
  1640. })
  1641. }
  1642. })
  1643. }
  1644. }
  1645. func BenchmarkSplitEmptySeparator(b *testing.B) {
  1646. for i := 0; i < b.N; i++ {
  1647. Split(benchInputHard, "")
  1648. }
  1649. }
  1650. func BenchmarkSplitSingleByteSeparator(b *testing.B) {
  1651. for i := 0; i < b.N; i++ {
  1652. Split(benchInputHard, "/")
  1653. }
  1654. }
  1655. func BenchmarkSplitMultiByteSeparator(b *testing.B) {
  1656. for i := 0; i < b.N; i++ {
  1657. Split(benchInputHard, "hello")
  1658. }
  1659. }
  1660. func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
  1661. for i := 0; i < b.N; i++ {
  1662. SplitN(benchInputHard, "/", 10)
  1663. }
  1664. }
  1665. func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
  1666. for i := 0; i < b.N; i++ {
  1667. SplitN(benchInputHard, "hello", 10)
  1668. }
  1669. }
  1670. func BenchmarkRepeat(b *testing.B) {
  1671. s := "0123456789"
  1672. for _, n := range []int{5, 10} {
  1673. for _, c := range []int{1, 2, 6} {
  1674. b.Run(fmt.Sprintf("%dx%d", n, c), func(b *testing.B) {
  1675. for i := 0; i < b.N; i++ {
  1676. Repeat(s[:n], c)
  1677. }
  1678. })
  1679. }
  1680. }
  1681. }
  1682. func BenchmarkIndexAnyASCII(b *testing.B) {
  1683. x := Repeat("#", 2048) // Never matches set
  1684. cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
  1685. for k := 1; k <= 2048; k <<= 4 {
  1686. for j := 1; j <= 64; j <<= 1 {
  1687. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1688. for i := 0; i < b.N; i++ {
  1689. IndexAny(x[:k], cs[:j])
  1690. }
  1691. })
  1692. }
  1693. }
  1694. }
  1695. func BenchmarkIndexAnyUTF8(b *testing.B) {
  1696. x := Repeat("#", 2048) // Never matches set
  1697. cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
  1698. for k := 1; k <= 2048; k <<= 4 {
  1699. for j := 1; j <= 64; j <<= 1 {
  1700. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1701. for i := 0; i < b.N; i++ {
  1702. IndexAny(x[:k], cs[:j])
  1703. }
  1704. })
  1705. }
  1706. }
  1707. }
  1708. func BenchmarkLastIndexAnyASCII(b *testing.B) {
  1709. x := Repeat("#", 2048) // Never matches set
  1710. cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
  1711. for k := 1; k <= 2048; k <<= 4 {
  1712. for j := 1; j <= 64; j <<= 1 {
  1713. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1714. for i := 0; i < b.N; i++ {
  1715. LastIndexAny(x[:k], cs[:j])
  1716. }
  1717. })
  1718. }
  1719. }
  1720. }
  1721. func BenchmarkLastIndexAnyUTF8(b *testing.B) {
  1722. x := Repeat("#", 2048) // Never matches set
  1723. cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
  1724. for k := 1; k <= 2048; k <<= 4 {
  1725. for j := 1; j <= 64; j <<= 1 {
  1726. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1727. for i := 0; i < b.N; i++ {
  1728. LastIndexAny(x[:k], cs[:j])
  1729. }
  1730. })
  1731. }
  1732. }
  1733. }
  1734. func BenchmarkTrimASCII(b *testing.B) {
  1735. cs := "0123456789abcdef"
  1736. for k := 1; k <= 4096; k <<= 4 {
  1737. for j := 1; j <= 16; j <<= 1 {
  1738. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1739. x := Repeat(cs[:j], k) // Always matches set
  1740. for i := 0; i < b.N; i++ {
  1741. Trim(x[:k], cs[:j])
  1742. }
  1743. })
  1744. }
  1745. }
  1746. }
  1747. func BenchmarkTrimByte(b *testing.B) {
  1748. x := " the quick brown fox "
  1749. for i := 0; i < b.N; i++ {
  1750. Trim(x, " ")
  1751. }
  1752. }
  1753. func BenchmarkIndexPeriodic(b *testing.B) {
  1754. key := "aa"
  1755. for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
  1756. b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
  1757. s := Repeat("a"+Repeat(" ", skip-1), 1<<16/skip)
  1758. for i := 0; i < b.N; i++ {
  1759. Index(s, key)
  1760. }
  1761. })
  1762. }
  1763. }
  1764. func BenchmarkJoin(b *testing.B) {
  1765. vals := []string{"red", "yellow", "pink", "green", "purple", "orange", "blue"}
  1766. for l := 0; l <= len(vals); l++ {
  1767. b.Run(strconv.Itoa(l), func(b *testing.B) {
  1768. b.ReportAllocs()
  1769. vals := vals[:l]
  1770. for i := 0; i < b.N; i++ {
  1771. Join(vals, " and ")
  1772. }
  1773. })
  1774. }
  1775. }
  1776. func BenchmarkTrimSpace(b *testing.B) {
  1777. tests := []struct{ name, input string }{
  1778. {"NoTrim", "typical"},
  1779. {"ASCII", " foo bar "},
  1780. {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
  1781. {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
  1782. }
  1783. for _, test := range tests {
  1784. b.Run(test.name, func(b *testing.B) {
  1785. for i := 0; i < b.N; i++ {
  1786. TrimSpace(test.input)
  1787. }
  1788. })
  1789. }
  1790. }
  1791. var stringSink string
  1792. func BenchmarkReplaceAll(b *testing.B) {
  1793. b.ReportAllocs()
  1794. for i := 0; i < b.N; i++ {
  1795. stringSink = ReplaceAll("banana", "a", "<>")
  1796. }
  1797. }