graphic.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package unicode
  5. // Bit masks for each code point under U+0100, for fast lookup.
  6. const (
  7. pC = 1 << iota // a control character.
  8. pP // a punctuation character.
  9. pN // a numeral.
  10. pS // a symbolic character.
  11. pZ // a spacing character.
  12. pLu // an upper-case letter.
  13. pLl // a lower-case letter.
  14. pp // a printable character according to Go's definition.
  15. pg = pp | pZ // a graphical character according to the Unicode definition.
  16. pLo = pLl | pLu // a letter that is neither upper nor lower case.
  17. pLmask = pLo
  18. )
  19. // GraphicRanges defines the set of graphic characters according to Unicode.
  20. var GraphicRanges = []*RangeTable{
  21. L, M, N, P, S, Zs,
  22. }
  23. // PrintRanges defines the set of printable characters according to Go.
  24. // ASCII space, U+0020, is handled separately.
  25. var PrintRanges = []*RangeTable{
  26. L, M, N, P, S,
  27. }
  28. // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
  29. // Such characters include letters, marks, numbers, punctuation, symbols, and
  30. // spaces, from categories L, M, N, P, S, Zs.
  31. func IsGraphic(r rune) bool {
  32. // We convert to uint32 to avoid the extra test for negative,
  33. // and in the index we convert to uint8 to avoid the range check.
  34. if uint32(r) <= MaxLatin1 {
  35. return properties[uint8(r)]&pg != 0
  36. }
  37. return In(r, GraphicRanges...)
  38. }
  39. // IsPrint reports whether the rune is defined as printable by Go. Such
  40. // characters include letters, marks, numbers, punctuation, symbols, and the
  41. // ASCII space character, from categories L, M, N, P, S and the ASCII space
  42. // character. This categorization is the same as IsGraphic except that the
  43. // only spacing character is ASCII space, U+0020.
  44. func IsPrint(r rune) bool {
  45. if uint32(r) <= MaxLatin1 {
  46. return properties[uint8(r)]&pp != 0
  47. }
  48. return In(r, PrintRanges...)
  49. }
  50. // IsOneOf reports whether the rune is a member of one of the ranges.
  51. // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
  52. func IsOneOf(ranges []*RangeTable, r rune) bool {
  53. for _, inside := range ranges {
  54. if Is(inside, r) {
  55. return true
  56. }
  57. }
  58. return false
  59. }
  60. // In reports whether the rune is a member of one of the ranges.
  61. func In(r rune, ranges ...*RangeTable) bool {
  62. for _, inside := range ranges {
  63. if Is(inside, r) {
  64. return true
  65. }
  66. }
  67. return false
  68. }
  69. // IsControl reports whether the rune is a control character.
  70. // The C (Other) Unicode category includes more code points
  71. // such as surrogates; use Is(C, r) to test for them.
  72. func IsControl(r rune) bool {
  73. if uint32(r) <= MaxLatin1 {
  74. return properties[uint8(r)]&pC != 0
  75. }
  76. // All control characters are < MaxLatin1.
  77. return false
  78. }
  79. // IsLetter reports whether the rune is a letter (category L).
  80. func IsLetter(r rune) bool {
  81. if uint32(r) <= MaxLatin1 {
  82. return properties[uint8(r)]&(pLmask) != 0
  83. }
  84. return isExcludingLatin(Letter, r)
  85. }
  86. // IsMark reports whether the rune is a mark character (category M).
  87. func IsMark(r rune) bool {
  88. // There are no mark characters in Latin-1.
  89. return isExcludingLatin(Mark, r)
  90. }
  91. // IsNumber reports whether the rune is a number (category N).
  92. func IsNumber(r rune) bool {
  93. if uint32(r) <= MaxLatin1 {
  94. return properties[uint8(r)]&pN != 0
  95. }
  96. return isExcludingLatin(Number, r)
  97. }
  98. // IsPunct reports whether the rune is a Unicode punctuation character
  99. // (category P).
  100. func IsPunct(r rune) bool {
  101. if uint32(r) <= MaxLatin1 {
  102. return properties[uint8(r)]&pP != 0
  103. }
  104. return Is(Punct, r)
  105. }
  106. // IsSpace reports whether the rune is a space character as defined
  107. // by Unicode's White Space property; in the Latin-1 space
  108. // this is
  109. // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
  110. // Other definitions of spacing characters are set by category
  111. // Z and property Pattern_White_Space.
  112. func IsSpace(r rune) bool {
  113. // This property isn't the same as Z; special-case it.
  114. if uint32(r) <= MaxLatin1 {
  115. switch r {
  116. case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
  117. return true
  118. }
  119. return false
  120. }
  121. return isExcludingLatin(White_Space, r)
  122. }
  123. // IsSymbol reports whether the rune is a symbolic character.
  124. func IsSymbol(r rune) bool {
  125. if uint32(r) <= MaxLatin1 {
  126. return properties[uint8(r)]&pS != 0
  127. }
  128. return isExcludingLatin(Symbol, r)
  129. }