123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- // Copyright 2011 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package unicode
- // Bit masks for each code point under U+0100, for fast lookup.
- const (
- pC = 1 << iota // a control character.
- pP // a punctuation character.
- pN // a numeral.
- pS // a symbolic character.
- pZ // a spacing character.
- pLu // an upper-case letter.
- pLl // a lower-case letter.
- pp // a printable character according to Go's definition.
- pg = pp | pZ // a graphical character according to the Unicode definition.
- pLo = pLl | pLu // a letter that is neither upper nor lower case.
- pLmask = pLo
- )
- // GraphicRanges defines the set of graphic characters according to Unicode.
- var GraphicRanges = []*RangeTable{
- L, M, N, P, S, Zs,
- }
- // PrintRanges defines the set of printable characters according to Go.
- // ASCII space, U+0020, is handled separately.
- var PrintRanges = []*RangeTable{
- L, M, N, P, S,
- }
- // IsGraphic reports whether the rune is defined as a Graphic by Unicode.
- // Such characters include letters, marks, numbers, punctuation, symbols, and
- // spaces, from categories L, M, N, P, S, Zs.
- func IsGraphic(r rune) bool {
- // We convert to uint32 to avoid the extra test for negative,
- // and in the index we convert to uint8 to avoid the range check.
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pg != 0
- }
- return In(r, GraphicRanges...)
- }
- // IsPrint reports whether the rune is defined as printable by Go. Such
- // characters include letters, marks, numbers, punctuation, symbols, and the
- // ASCII space character, from categories L, M, N, P, S and the ASCII space
- // character. This categorization is the same as IsGraphic except that the
- // only spacing character is ASCII space, U+0020.
- func IsPrint(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pp != 0
- }
- return In(r, PrintRanges...)
- }
- // IsOneOf reports whether the rune is a member of one of the ranges.
- // The function "In" provides a nicer signature and should be used in preference to IsOneOf.
- func IsOneOf(ranges []*RangeTable, r rune) bool {
- for _, inside := range ranges {
- if Is(inside, r) {
- return true
- }
- }
- return false
- }
- // In reports whether the rune is a member of one of the ranges.
- func In(r rune, ranges ...*RangeTable) bool {
- for _, inside := range ranges {
- if Is(inside, r) {
- return true
- }
- }
- return false
- }
- // IsControl reports whether the rune is a control character.
- // The C (Other) Unicode category includes more code points
- // such as surrogates; use Is(C, r) to test for them.
- func IsControl(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pC != 0
- }
- // All control characters are < MaxLatin1.
- return false
- }
- // IsLetter reports whether the rune is a letter (category L).
- func IsLetter(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&(pLmask) != 0
- }
- return isExcludingLatin(Letter, r)
- }
- // IsMark reports whether the rune is a mark character (category M).
- func IsMark(r rune) bool {
- // There are no mark characters in Latin-1.
- return isExcludingLatin(Mark, r)
- }
- // IsNumber reports whether the rune is a number (category N).
- func IsNumber(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pN != 0
- }
- return isExcludingLatin(Number, r)
- }
- // IsPunct reports whether the rune is a Unicode punctuation character
- // (category P).
- func IsPunct(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pP != 0
- }
- return Is(Punct, r)
- }
- // IsSpace reports whether the rune is a space character as defined
- // by Unicode's White Space property; in the Latin-1 space
- // this is
- // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
- // Other definitions of spacing characters are set by category
- // Z and property Pattern_White_Space.
- func IsSpace(r rune) bool {
- // This property isn't the same as Z; special-case it.
- if uint32(r) <= MaxLatin1 {
- switch r {
- case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
- return true
- }
- return false
- }
- return isExcludingLatin(White_Space, r)
- }
- // IsSymbol reports whether the rune is a symbolic character.
- func IsSymbol(r rune) bool {
- if uint32(r) <= MaxLatin1 {
- return properties[uint8(r)]&pS != 0
- }
- return isExcludingLatin(Symbol, r)
- }
|