replace.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package strings
  5. import (
  6. "io"
  7. "sync"
  8. )
  9. // Replacer replaces a list of strings with replacements.
  10. // It is safe for concurrent use by multiple goroutines.
  11. type Replacer struct {
  12. once sync.Once // guards buildOnce method
  13. r replacer
  14. oldnew []string
  15. }
  16. // replacer is the interface that a replacement algorithm needs to implement.
  17. type replacer interface {
  18. Replace(s string) string
  19. WriteString(w io.Writer, s string) (n int, err error)
  20. }
  21. // NewReplacer returns a new Replacer from a list of old, new string
  22. // pairs. Replacements are performed in the order they appear in the
  23. // target string, without overlapping matches. The old string
  24. // comparisons are done in argument order.
  25. //
  26. // NewReplacer panics if given an odd number of arguments.
  27. func NewReplacer(oldnew ...string) *Replacer {
  28. if len(oldnew)%2 == 1 {
  29. panic("strings.NewReplacer: odd argument count")
  30. }
  31. return &Replacer{oldnew: append([]string(nil), oldnew...)}
  32. }
  33. func (r *Replacer) buildOnce() {
  34. r.r = r.build()
  35. r.oldnew = nil
  36. }
  37. func (b *Replacer) build() replacer {
  38. oldnew := b.oldnew
  39. if len(oldnew) == 2 && len(oldnew[0]) > 1 {
  40. return makeSingleStringReplacer(oldnew[0], oldnew[1])
  41. }
  42. allNewBytes := true
  43. for i := 0; i < len(oldnew); i += 2 {
  44. if len(oldnew[i]) != 1 {
  45. return makeGenericReplacer(oldnew)
  46. }
  47. if len(oldnew[i+1]) != 1 {
  48. allNewBytes = false
  49. }
  50. }
  51. if allNewBytes {
  52. r := byteReplacer{}
  53. for i := range r {
  54. r[i] = byte(i)
  55. }
  56. // The first occurrence of old->new map takes precedence
  57. // over the others with the same old string.
  58. for i := len(oldnew) - 2; i >= 0; i -= 2 {
  59. o := oldnew[i][0]
  60. n := oldnew[i+1][0]
  61. r[o] = n
  62. }
  63. return &r
  64. }
  65. r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
  66. // The first occurrence of old->new map takes precedence
  67. // over the others with the same old string.
  68. for i := len(oldnew) - 2; i >= 0; i -= 2 {
  69. o := oldnew[i][0]
  70. n := oldnew[i+1]
  71. // To avoid counting repetitions multiple times.
  72. if r.replacements[o] == nil {
  73. // We need to use string([]byte{o}) instead of string(o),
  74. // to avoid utf8 encoding of o.
  75. // E. g. byte(150) produces string of length 2.
  76. r.toReplace = append(r.toReplace, string([]byte{o}))
  77. }
  78. r.replacements[o] = []byte(n)
  79. }
  80. return &r
  81. }
  82. // Replace returns a copy of s with all replacements performed.
  83. func (r *Replacer) Replace(s string) string {
  84. r.once.Do(r.buildOnce)
  85. return r.r.Replace(s)
  86. }
  87. // WriteString writes s to w with all replacements performed.
  88. func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
  89. r.once.Do(r.buildOnce)
  90. return r.r.WriteString(w, s)
  91. }
  92. // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
  93. // and values may be empty. For example, the trie containing keys "ax", "ay",
  94. // "bcbc", "x" and "xy" could have eight nodes:
  95. //
  96. // n0 -
  97. // n1 a-
  98. // n2 .x+
  99. // n3 .y+
  100. // n4 b-
  101. // n5 .cbc+
  102. // n6 x+
  103. // n7 .y+
  104. //
  105. // n0 is the root node, and its children are n1, n4 and n6; n1's children are
  106. // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
  107. // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
  108. // (marked with a trailing "+") are complete keys.
  109. type trieNode struct {
  110. // value is the value of the trie node's key/value pair. It is empty if
  111. // this node is not a complete key.
  112. value string
  113. // priority is the priority (higher is more important) of the trie node's
  114. // key/value pair; keys are not necessarily matched shortest- or longest-
  115. // first. Priority is positive if this node is a complete key, and zero
  116. // otherwise. In the example above, positive/zero priorities are marked
  117. // with a trailing "+" or "-".
  118. priority int
  119. // A trie node may have zero, one or more child nodes:
  120. // * if the remaining fields are zero, there are no children.
  121. // * if prefix and next are non-zero, there is one child in next.
  122. // * if table is non-zero, it defines all the children.
  123. //
  124. // Prefixes are preferred over tables when there is one child, but the
  125. // root node always uses a table for lookup efficiency.
  126. // prefix is the difference in keys between this trie node and the next.
  127. // In the example above, node n4 has prefix "cbc" and n4's next node is n5.
  128. // Node n5 has no children and so has zero prefix, next and table fields.
  129. prefix string
  130. next *trieNode
  131. // table is a lookup table indexed by the next byte in the key, after
  132. // remapping that byte through genericReplacer.mapping to create a dense
  133. // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
  134. // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
  135. // genericReplacer.tableSize will be 5. Node n0's table will be
  136. // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
  137. // 'a', 'b' and 'x'.
  138. table []*trieNode
  139. }
  140. func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
  141. if key == "" {
  142. if t.priority == 0 {
  143. t.value = val
  144. t.priority = priority
  145. }
  146. return
  147. }
  148. if t.prefix != "" {
  149. // Need to split the prefix among multiple nodes.
  150. var n int // length of the longest common prefix
  151. for ; n < len(t.prefix) && n < len(key); n++ {
  152. if t.prefix[n] != key[n] {
  153. break
  154. }
  155. }
  156. if n == len(t.prefix) {
  157. t.next.add(key[n:], val, priority, r)
  158. } else if n == 0 {
  159. // First byte differs, start a new lookup table here. Looking up
  160. // what is currently t.prefix[0] will lead to prefixNode, and
  161. // looking up key[0] will lead to keyNode.
  162. var prefixNode *trieNode
  163. if len(t.prefix) == 1 {
  164. prefixNode = t.next
  165. } else {
  166. prefixNode = &trieNode{
  167. prefix: t.prefix[1:],
  168. next: t.next,
  169. }
  170. }
  171. keyNode := new(trieNode)
  172. t.table = make([]*trieNode, r.tableSize)
  173. t.table[r.mapping[t.prefix[0]]] = prefixNode
  174. t.table[r.mapping[key[0]]] = keyNode
  175. t.prefix = ""
  176. t.next = nil
  177. keyNode.add(key[1:], val, priority, r)
  178. } else {
  179. // Insert new node after the common section of the prefix.
  180. next := &trieNode{
  181. prefix: t.prefix[n:],
  182. next: t.next,
  183. }
  184. t.prefix = t.prefix[:n]
  185. t.next = next
  186. next.add(key[n:], val, priority, r)
  187. }
  188. } else if t.table != nil {
  189. // Insert into existing table.
  190. m := r.mapping[key[0]]
  191. if t.table[m] == nil {
  192. t.table[m] = new(trieNode)
  193. }
  194. t.table[m].add(key[1:], val, priority, r)
  195. } else {
  196. t.prefix = key
  197. t.next = new(trieNode)
  198. t.next.add("", val, priority, r)
  199. }
  200. }
  201. func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
  202. // Iterate down the trie to the end, and grab the value and keylen with
  203. // the highest priority.
  204. bestPriority := 0
  205. node := &r.root
  206. n := 0
  207. for node != nil {
  208. if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
  209. bestPriority = node.priority
  210. val = node.value
  211. keylen = n
  212. found = true
  213. }
  214. if s == "" {
  215. break
  216. }
  217. if node.table != nil {
  218. index := r.mapping[s[0]]
  219. if int(index) == r.tableSize {
  220. break
  221. }
  222. node = node.table[index]
  223. s = s[1:]
  224. n++
  225. } else if node.prefix != "" && HasPrefix(s, node.prefix) {
  226. n += len(node.prefix)
  227. s = s[len(node.prefix):]
  228. node = node.next
  229. } else {
  230. break
  231. }
  232. }
  233. return
  234. }
  235. // genericReplacer is the fully generic algorithm.
  236. // It's used as a fallback when nothing faster can be used.
  237. type genericReplacer struct {
  238. root trieNode
  239. // tableSize is the size of a trie node's lookup table. It is the number
  240. // of unique key bytes.
  241. tableSize int
  242. // mapping maps from key bytes to a dense index for trieNode.table.
  243. mapping [256]byte
  244. }
  245. func makeGenericReplacer(oldnew []string) *genericReplacer {
  246. r := new(genericReplacer)
  247. // Find each byte used, then assign them each an index.
  248. for i := 0; i < len(oldnew); i += 2 {
  249. key := oldnew[i]
  250. for j := 0; j < len(key); j++ {
  251. r.mapping[key[j]] = 1
  252. }
  253. }
  254. for _, b := range r.mapping {
  255. r.tableSize += int(b)
  256. }
  257. var index byte
  258. for i, b := range r.mapping {
  259. if b == 0 {
  260. r.mapping[i] = byte(r.tableSize)
  261. } else {
  262. r.mapping[i] = index
  263. index++
  264. }
  265. }
  266. // Ensure root node uses a lookup table (for performance).
  267. r.root.table = make([]*trieNode, r.tableSize)
  268. for i := 0; i < len(oldnew); i += 2 {
  269. r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
  270. }
  271. return r
  272. }
  273. type appendSliceWriter []byte
  274. // Write writes to the buffer to satisfy io.Writer.
  275. func (w *appendSliceWriter) Write(p []byte) (int, error) {
  276. *w = append(*w, p...)
  277. return len(p), nil
  278. }
  279. // WriteString writes to the buffer without string->[]byte->string allocations.
  280. func (w *appendSliceWriter) WriteString(s string) (int, error) {
  281. *w = append(*w, s...)
  282. return len(s), nil
  283. }
  284. type stringWriter struct {
  285. w io.Writer
  286. }
  287. func (w stringWriter) WriteString(s string) (int, error) {
  288. return w.w.Write([]byte(s))
  289. }
  290. func getStringWriter(w io.Writer) io.StringWriter {
  291. sw, ok := w.(io.StringWriter)
  292. if !ok {
  293. sw = stringWriter{w}
  294. }
  295. return sw
  296. }
  297. func (r *genericReplacer) Replace(s string) string {
  298. buf := make(appendSliceWriter, 0, len(s))
  299. r.WriteString(&buf, s)
  300. return string(buf)
  301. }
  302. func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  303. sw := getStringWriter(w)
  304. var last, wn int
  305. var prevMatchEmpty bool
  306. for i := 0; i <= len(s); {
  307. // Fast path: s[i] is not a prefix of any pattern.
  308. if i != len(s) && r.root.priority == 0 {
  309. index := int(r.mapping[s[i]])
  310. if index == r.tableSize || r.root.table[index] == nil {
  311. i++
  312. continue
  313. }
  314. }
  315. // Ignore the empty match iff the previous loop found the empty match.
  316. val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
  317. prevMatchEmpty = match && keylen == 0
  318. if match {
  319. wn, err = sw.WriteString(s[last:i])
  320. n += wn
  321. if err != nil {
  322. return
  323. }
  324. wn, err = sw.WriteString(val)
  325. n += wn
  326. if err != nil {
  327. return
  328. }
  329. i += keylen
  330. last = i
  331. continue
  332. }
  333. i++
  334. }
  335. if last != len(s) {
  336. wn, err = sw.WriteString(s[last:])
  337. n += wn
  338. }
  339. return
  340. }
  341. // singleStringReplacer is the implementation that's used when there is only
  342. // one string to replace (and that string has more than one byte).
  343. type singleStringReplacer struct {
  344. finder *stringFinder
  345. // value is the new string that replaces that pattern when it's found.
  346. value string
  347. }
  348. func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
  349. return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
  350. }
  351. func (r *singleStringReplacer) Replace(s string) string {
  352. var buf Builder
  353. i, matched := 0, false
  354. for {
  355. match := r.finder.next(s[i:])
  356. if match == -1 {
  357. break
  358. }
  359. matched = true
  360. buf.Grow(match + len(r.value))
  361. buf.WriteString(s[i : i+match])
  362. buf.WriteString(r.value)
  363. i += match + len(r.finder.pattern)
  364. }
  365. if !matched {
  366. return s
  367. }
  368. buf.WriteString(s[i:])
  369. return buf.String()
  370. }
  371. func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  372. sw := getStringWriter(w)
  373. var i, wn int
  374. for {
  375. match := r.finder.next(s[i:])
  376. if match == -1 {
  377. break
  378. }
  379. wn, err = sw.WriteString(s[i : i+match])
  380. n += wn
  381. if err != nil {
  382. return
  383. }
  384. wn, err = sw.WriteString(r.value)
  385. n += wn
  386. if err != nil {
  387. return
  388. }
  389. i += match + len(r.finder.pattern)
  390. }
  391. wn, err = sw.WriteString(s[i:])
  392. n += wn
  393. return
  394. }
  395. // byteReplacer is the implementation that's used when all the "old"
  396. // and "new" values are single ASCII bytes.
  397. // The array contains replacement bytes indexed by old byte.
  398. type byteReplacer [256]byte
  399. func (r *byteReplacer) Replace(s string) string {
  400. var buf []byte // lazily allocated
  401. for i := 0; i < len(s); i++ {
  402. b := s[i]
  403. if r[b] != b {
  404. if buf == nil {
  405. buf = []byte(s)
  406. }
  407. buf[i] = r[b]
  408. }
  409. }
  410. if buf == nil {
  411. return s
  412. }
  413. return string(buf)
  414. }
  415. func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  416. // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
  417. bufsize := 32 << 10
  418. if len(s) < bufsize {
  419. bufsize = len(s)
  420. }
  421. buf := make([]byte, bufsize)
  422. for len(s) > 0 {
  423. ncopy := copy(buf, s)
  424. s = s[ncopy:]
  425. for i, b := range buf[:ncopy] {
  426. buf[i] = r[b]
  427. }
  428. wn, err := w.Write(buf[:ncopy])
  429. n += wn
  430. if err != nil {
  431. return n, err
  432. }
  433. }
  434. return n, nil
  435. }
  436. // byteStringReplacer is the implementation that's used when all the
  437. // "old" values are single ASCII bytes but the "new" values vary in size.
  438. type byteStringReplacer struct {
  439. // replacements contains replacement byte slices indexed by old byte.
  440. // A nil []byte means that the old byte should not be replaced.
  441. replacements [256][]byte
  442. // toReplace keeps a list of bytes to replace. Depending on length of toReplace
  443. // and length of target string it may be faster to use Count, or a plain loop.
  444. // We store single byte as a string, because Count takes a string.
  445. toReplace []string
  446. }
  447. // countCutOff controls the ratio of a string length to a number of replacements
  448. // at which (*byteStringReplacer).Replace switches algorithms.
  449. // For strings with higher ration of length to replacements than that value,
  450. // we call Count, for each replacement from toReplace.
  451. // For strings, with a lower ratio we use simple loop, because of Count overhead.
  452. // countCutOff is an empirically determined overhead multiplier.
  453. // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
  454. const countCutOff = 8
  455. func (r *byteStringReplacer) Replace(s string) string {
  456. newSize := len(s)
  457. anyChanges := false
  458. // Is it faster to use Count?
  459. if len(r.toReplace)*countCutOff <= len(s) {
  460. for _, x := range r.toReplace {
  461. if c := Count(s, x); c != 0 {
  462. // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
  463. newSize += c * (len(r.replacements[x[0]]) - 1)
  464. anyChanges = true
  465. }
  466. }
  467. } else {
  468. for i := 0; i < len(s); i++ {
  469. b := s[i]
  470. if r.replacements[b] != nil {
  471. // See above for explanation of -1
  472. newSize += len(r.replacements[b]) - 1
  473. anyChanges = true
  474. }
  475. }
  476. }
  477. if !anyChanges {
  478. return s
  479. }
  480. buf := make([]byte, newSize)
  481. j := 0
  482. for i := 0; i < len(s); i++ {
  483. b := s[i]
  484. if r.replacements[b] != nil {
  485. j += copy(buf[j:], r.replacements[b])
  486. } else {
  487. buf[j] = b
  488. j++
  489. }
  490. }
  491. return string(buf)
  492. }
  493. func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
  494. sw := getStringWriter(w)
  495. last := 0
  496. for i := 0; i < len(s); i++ {
  497. b := s[i]
  498. if r.replacements[b] == nil {
  499. continue
  500. }
  501. if last != i {
  502. nw, err := sw.WriteString(s[last:i])
  503. n += nw
  504. if err != nil {
  505. return n, err
  506. }
  507. }
  508. last = i + 1
  509. nw, err := w.Write(r.replacements[b])
  510. n += nw
  511. if err != nil {
  512. return n, err
  513. }
  514. }
  515. if last != len(s) {
  516. var nw int
  517. nw, err = sw.WriteString(s[last:])
  518. n += nw
  519. }
  520. return
  521. }