bidi.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go gen_trieval.go gen_ranges.go
  5. // Package bidi contains functionality for bidirectional text support.
  6. //
  7. // See https://www.unicode.org/reports/tr9.
  8. //
  9. // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
  10. // and without notice.
  11. package bidi // import "golang.org/x/text/unicode/bidi"
  12. // TODO
  13. // - Transformer for reordering?
  14. // - Transformer (validator, really) for Bidi Rule.
  15. import (
  16. "bytes"
  17. )
  18. // This API tries to avoid dealing with embedding levels for now. Under the hood
  19. // these will be computed, but the question is to which extent the user should
  20. // know they exist. We should at some point allow the user to specify an
  21. // embedding hierarchy, though.
  22. // A Direction indicates the overall flow of text.
  23. type Direction int
  24. const (
  25. // LeftToRight indicates the text contains no right-to-left characters and
  26. // that either there are some left-to-right characters or the option
  27. // DefaultDirection(LeftToRight) was passed.
  28. LeftToRight Direction = iota
  29. // RightToLeft indicates the text contains no left-to-right characters and
  30. // that either there are some right-to-left characters or the option
  31. // DefaultDirection(RightToLeft) was passed.
  32. RightToLeft
  33. // Mixed indicates text contains both left-to-right and right-to-left
  34. // characters.
  35. Mixed
  36. // Neutral means that text contains no left-to-right and right-to-left
  37. // characters and that no default direction has been set.
  38. Neutral
  39. )
  40. type options struct {
  41. defaultDirection Direction
  42. }
  43. // An Option is an option for Bidi processing.
  44. type Option func(*options)
  45. // ICU allows the user to define embedding levels. This may be used, for example,
  46. // to use hierarchical structure of markup languages to define embeddings.
  47. // The following option may be a way to expose this functionality in this API.
  48. // // LevelFunc sets a function that associates nesting levels with the given text.
  49. // // The levels function will be called with monotonically increasing values for p.
  50. // func LevelFunc(levels func(p int) int) Option {
  51. // panic("unimplemented")
  52. // }
  53. // DefaultDirection sets the default direction for a Paragraph. The direction is
  54. // overridden if the text contains directional characters.
  55. func DefaultDirection(d Direction) Option {
  56. return func(opts *options) {
  57. opts.defaultDirection = d
  58. }
  59. }
  60. // A Paragraph holds a single Paragraph for Bidi processing.
  61. type Paragraph struct {
  62. p []byte
  63. o Ordering
  64. opts []Option
  65. types []Class
  66. pairTypes []bracketType
  67. pairValues []rune
  68. runes []rune
  69. options options
  70. }
  71. // Initialize the p.pairTypes, p.pairValues and p.types from the input previously
  72. // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
  73. // separator (bidi class B).
  74. //
  75. // The function p.Order() needs these values to be set, so this preparation could be postponed.
  76. // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
  77. // separator, the whole input needs to be processed anyway and should not be done twice.
  78. //
  79. // The function has the same return values as SetBytes() / SetString()
  80. func (p *Paragraph) prepareInput() (n int, err error) {
  81. p.runes = bytes.Runes(p.p)
  82. bytecount := 0
  83. // clear slices from previous SetString or SetBytes
  84. p.pairTypes = nil
  85. p.pairValues = nil
  86. p.types = nil
  87. for _, r := range p.runes {
  88. props, i := LookupRune(r)
  89. bytecount += i
  90. cls := props.Class()
  91. if cls == B {
  92. return bytecount, nil
  93. }
  94. p.types = append(p.types, cls)
  95. if props.IsOpeningBracket() {
  96. p.pairTypes = append(p.pairTypes, bpOpen)
  97. p.pairValues = append(p.pairValues, r)
  98. } else if props.IsBracket() {
  99. // this must be a closing bracket,
  100. // since IsOpeningBracket is not true
  101. p.pairTypes = append(p.pairTypes, bpClose)
  102. p.pairValues = append(p.pairValues, r)
  103. } else {
  104. p.pairTypes = append(p.pairTypes, bpNone)
  105. p.pairValues = append(p.pairValues, 0)
  106. }
  107. }
  108. return bytecount, nil
  109. }
  110. // SetBytes configures p for the given paragraph text. It replaces text
  111. // previously set by SetBytes or SetString. If b contains a paragraph separator
  112. // it will only process the first paragraph and report the number of bytes
  113. // consumed from b including this separator. Error may be non-nil if options are
  114. // given.
  115. func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
  116. p.p = b
  117. p.opts = opts
  118. return p.prepareInput()
  119. }
  120. // SetString configures s for the given paragraph text. It replaces text
  121. // previously set by SetBytes or SetString. If s contains a paragraph separator
  122. // it will only process the first paragraph and report the number of bytes
  123. // consumed from s including this separator. Error may be non-nil if options are
  124. // given.
  125. func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
  126. p.p = []byte(s)
  127. p.opts = opts
  128. return p.prepareInput()
  129. }
  130. // IsLeftToRight reports whether the principle direction of rendering for this
  131. // paragraphs is left-to-right. If this returns false, the principle direction
  132. // of rendering is right-to-left.
  133. func (p *Paragraph) IsLeftToRight() bool {
  134. return p.Direction() == LeftToRight
  135. }
  136. // Direction returns the direction of the text of this paragraph.
  137. //
  138. // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
  139. func (p *Paragraph) Direction() Direction {
  140. return p.o.Direction()
  141. }
  142. // TODO: what happens if the position is > len(input)? This should return an error.
  143. // RunAt reports the Run at the given position of the input text.
  144. //
  145. // This method can be used for computing line breaks on paragraphs.
  146. func (p *Paragraph) RunAt(pos int) Run {
  147. c := 0
  148. runNumber := 0
  149. for i, r := range p.o.runes {
  150. c += len(r)
  151. if pos < c {
  152. runNumber = i
  153. }
  154. }
  155. return p.o.Run(runNumber)
  156. }
  157. func calculateOrdering(levels []level, runes []rune) Ordering {
  158. var curDir Direction
  159. prevDir := Neutral
  160. prevI := 0
  161. o := Ordering{}
  162. // lvl = 0,2,4,...: left to right
  163. // lvl = 1,3,5,...: right to left
  164. for i, lvl := range levels {
  165. if lvl%2 == 0 {
  166. curDir = LeftToRight
  167. } else {
  168. curDir = RightToLeft
  169. }
  170. if curDir != prevDir {
  171. if i > 0 {
  172. o.runes = append(o.runes, runes[prevI:i])
  173. o.directions = append(o.directions, prevDir)
  174. o.startpos = append(o.startpos, prevI)
  175. }
  176. prevI = i
  177. prevDir = curDir
  178. }
  179. }
  180. o.runes = append(o.runes, runes[prevI:])
  181. o.directions = append(o.directions, prevDir)
  182. o.startpos = append(o.startpos, prevI)
  183. return o
  184. }
  185. // Order computes the visual ordering of all the runs in a Paragraph.
  186. func (p *Paragraph) Order() (Ordering, error) {
  187. if len(p.types) == 0 {
  188. return Ordering{}, nil
  189. }
  190. for _, fn := range p.opts {
  191. fn(&p.options)
  192. }
  193. lvl := level(-1)
  194. if p.options.defaultDirection == RightToLeft {
  195. lvl = 1
  196. }
  197. para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
  198. if err != nil {
  199. return Ordering{}, err
  200. }
  201. levels := para.getLevels([]int{len(p.types)})
  202. p.o = calculateOrdering(levels, p.runes)
  203. return p.o, nil
  204. }
  205. // Line computes the visual ordering of runs for a single line starting and
  206. // ending at the given positions in the original text.
  207. func (p *Paragraph) Line(start, end int) (Ordering, error) {
  208. lineTypes := p.types[start:end]
  209. para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
  210. if err != nil {
  211. return Ordering{}, err
  212. }
  213. levels := para.getLevels([]int{len(lineTypes)})
  214. o := calculateOrdering(levels, p.runes[start:end])
  215. return o, nil
  216. }
  217. // An Ordering holds the computed visual order of runs of a Paragraph. Calling
  218. // SetBytes or SetString on the originating Paragraph invalidates an Ordering.
  219. // The methods of an Ordering should only be called by one goroutine at a time.
  220. type Ordering struct {
  221. runes [][]rune
  222. directions []Direction
  223. startpos []int
  224. }
  225. // Direction reports the directionality of the runs.
  226. //
  227. // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
  228. func (o *Ordering) Direction() Direction {
  229. return o.directions[0]
  230. }
  231. // NumRuns returns the number of runs.
  232. func (o *Ordering) NumRuns() int {
  233. return len(o.runes)
  234. }
  235. // Run returns the ith run within the ordering.
  236. func (o *Ordering) Run(i int) Run {
  237. r := Run{
  238. runes: o.runes[i],
  239. direction: o.directions[i],
  240. startpos: o.startpos[i],
  241. }
  242. return r
  243. }
  244. // TODO: perhaps with options.
  245. // // Reorder creates a reader that reads the runes in visual order per character.
  246. // // Modifiers remain after the runes they modify.
  247. // func (l *Runs) Reorder() io.Reader {
  248. // panic("unimplemented")
  249. // }
  250. // A Run is a continuous sequence of characters of a single direction.
  251. type Run struct {
  252. runes []rune
  253. direction Direction
  254. startpos int
  255. }
  256. // String returns the text of the run in its original order.
  257. func (r *Run) String() string {
  258. return string(r.runes)
  259. }
  260. // Bytes returns the text of the run in its original order.
  261. func (r *Run) Bytes() []byte {
  262. return []byte(r.String())
  263. }
  264. // TODO: methods for
  265. // - Display order
  266. // - headers and footers
  267. // - bracket replacement.
  268. // Direction reports the direction of the run.
  269. func (r *Run) Direction() Direction {
  270. return r.direction
  271. }
  272. // Pos returns the position of the Run within the text passed to SetBytes or SetString of the
  273. // originating Paragraph value.
  274. func (r *Run) Pos() (start, end int) {
  275. return r.startpos, r.startpos + len(r.runes) - 1
  276. }
  277. // AppendReverse reverses the order of characters of in, appends them to out,
  278. // and returns the result. Modifiers will still follow the runes they modify.
  279. // Brackets are replaced with their counterparts.
  280. func AppendReverse(out, in []byte) []byte {
  281. ret := make([]byte, len(in)+len(out))
  282. copy(ret, out)
  283. inRunes := bytes.Runes(in)
  284. for i, r := range inRunes {
  285. prop, _ := LookupRune(r)
  286. if prop.IsBracket() {
  287. inRunes[i] = prop.reverseBracket(r)
  288. }
  289. }
  290. for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
  291. inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
  292. }
  293. copy(ret[len(out):], string(inRunes))
  294. return ret
  295. }
  296. // ReverseString reverses the order of characters in s and returns a new string.
  297. // Modifiers will still follow the runes they modify. Brackets are replaced with
  298. // their counterparts.
  299. func ReverseString(s string) string {
  300. input := []rune(s)
  301. li := len(input)
  302. ret := make([]rune, li)
  303. for i, r := range input {
  304. prop, _ := LookupRune(r)
  305. if prop.IsBracket() {
  306. ret[li-i-1] = prop.reverseBracket(r)
  307. } else {
  308. ret[li-i-1] = r
  309. }
  310. }
  311. return string(ret)
  312. }