httplex.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package httpguts
  5. import (
  6. "net"
  7. "strings"
  8. "unicode/utf8"
  9. "golang.org/x/net/idna"
  10. )
  11. var isTokenTable = [127]bool{
  12. '!': true,
  13. '#': true,
  14. '$': true,
  15. '%': true,
  16. '&': true,
  17. '\'': true,
  18. '*': true,
  19. '+': true,
  20. '-': true,
  21. '.': true,
  22. '0': true,
  23. '1': true,
  24. '2': true,
  25. '3': true,
  26. '4': true,
  27. '5': true,
  28. '6': true,
  29. '7': true,
  30. '8': true,
  31. '9': true,
  32. 'A': true,
  33. 'B': true,
  34. 'C': true,
  35. 'D': true,
  36. 'E': true,
  37. 'F': true,
  38. 'G': true,
  39. 'H': true,
  40. 'I': true,
  41. 'J': true,
  42. 'K': true,
  43. 'L': true,
  44. 'M': true,
  45. 'N': true,
  46. 'O': true,
  47. 'P': true,
  48. 'Q': true,
  49. 'R': true,
  50. 'S': true,
  51. 'T': true,
  52. 'U': true,
  53. 'W': true,
  54. 'V': true,
  55. 'X': true,
  56. 'Y': true,
  57. 'Z': true,
  58. '^': true,
  59. '_': true,
  60. '`': true,
  61. 'a': true,
  62. 'b': true,
  63. 'c': true,
  64. 'd': true,
  65. 'e': true,
  66. 'f': true,
  67. 'g': true,
  68. 'h': true,
  69. 'i': true,
  70. 'j': true,
  71. 'k': true,
  72. 'l': true,
  73. 'm': true,
  74. 'n': true,
  75. 'o': true,
  76. 'p': true,
  77. 'q': true,
  78. 'r': true,
  79. 's': true,
  80. 't': true,
  81. 'u': true,
  82. 'v': true,
  83. 'w': true,
  84. 'x': true,
  85. 'y': true,
  86. 'z': true,
  87. '|': true,
  88. '~': true,
  89. }
  90. func IsTokenRune(r rune) bool {
  91. i := int(r)
  92. return i < len(isTokenTable) && isTokenTable[i]
  93. }
  94. func isNotToken(r rune) bool {
  95. return !IsTokenRune(r)
  96. }
  97. // HeaderValuesContainsToken reports whether any string in values
  98. // contains the provided token, ASCII case-insensitively.
  99. func HeaderValuesContainsToken(values []string, token string) bool {
  100. for _, v := range values {
  101. if headerValueContainsToken(v, token) {
  102. return true
  103. }
  104. }
  105. return false
  106. }
  107. // isOWS reports whether b is an optional whitespace byte, as defined
  108. // by RFC 7230 section 3.2.3.
  109. func isOWS(b byte) bool { return b == ' ' || b == '\t' }
  110. // trimOWS returns x with all optional whitespace removes from the
  111. // beginning and end.
  112. func trimOWS(x string) string {
  113. // TODO: consider using strings.Trim(x, " \t") instead,
  114. // if and when it's fast enough. See issue 10292.
  115. // But this ASCII-only code will probably always beat UTF-8
  116. // aware code.
  117. for len(x) > 0 && isOWS(x[0]) {
  118. x = x[1:]
  119. }
  120. for len(x) > 0 && isOWS(x[len(x)-1]) {
  121. x = x[:len(x)-1]
  122. }
  123. return x
  124. }
  125. // headerValueContainsToken reports whether v (assumed to be a
  126. // 0#element, in the ABNF extension described in RFC 7230 section 7)
  127. // contains token amongst its comma-separated tokens, ASCII
  128. // case-insensitively.
  129. func headerValueContainsToken(v string, token string) bool {
  130. for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
  131. if tokenEqual(trimOWS(v[:comma]), token) {
  132. return true
  133. }
  134. v = v[comma+1:]
  135. }
  136. return tokenEqual(trimOWS(v), token)
  137. }
  138. // lowerASCII returns the ASCII lowercase version of b.
  139. func lowerASCII(b byte) byte {
  140. if 'A' <= b && b <= 'Z' {
  141. return b + ('a' - 'A')
  142. }
  143. return b
  144. }
  145. // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
  146. func tokenEqual(t1, t2 string) bool {
  147. if len(t1) != len(t2) {
  148. return false
  149. }
  150. for i, b := range t1 {
  151. if b >= utf8.RuneSelf {
  152. // No UTF-8 or non-ASCII allowed in tokens.
  153. return false
  154. }
  155. if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
  156. return false
  157. }
  158. }
  159. return true
  160. }
  161. // isLWS reports whether b is linear white space, according
  162. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  163. // LWS = [CRLF] 1*( SP | HT )
  164. func isLWS(b byte) bool { return b == ' ' || b == '\t' }
  165. // isCTL reports whether b is a control byte, according
  166. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  167. // CTL = <any US-ASCII control character
  168. // (octets 0 - 31) and DEL (127)>
  169. func isCTL(b byte) bool {
  170. const del = 0x7f // a CTL
  171. return b < ' ' || b == del
  172. }
  173. // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
  174. // HTTP/2 imposes the additional restriction that uppercase ASCII
  175. // letters are not allowed.
  176. //
  177. // RFC 7230 says:
  178. // header-field = field-name ":" OWS field-value OWS
  179. // field-name = token
  180. // token = 1*tchar
  181. // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  182. // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  183. func ValidHeaderFieldName(v string) bool {
  184. if len(v) == 0 {
  185. return false
  186. }
  187. for _, r := range v {
  188. if !IsTokenRune(r) {
  189. return false
  190. }
  191. }
  192. return true
  193. }
  194. // ValidHostHeader reports whether h is a valid host header.
  195. func ValidHostHeader(h string) bool {
  196. // The latest spec is actually this:
  197. //
  198. // http://tools.ietf.org/html/rfc7230#section-5.4
  199. // Host = uri-host [ ":" port ]
  200. //
  201. // Where uri-host is:
  202. // http://tools.ietf.org/html/rfc3986#section-3.2.2
  203. //
  204. // But we're going to be much more lenient for now and just
  205. // search for any byte that's not a valid byte in any of those
  206. // expressions.
  207. for i := 0; i < len(h); i++ {
  208. if !validHostByte[h[i]] {
  209. return false
  210. }
  211. }
  212. return true
  213. }
  214. // See the validHostHeader comment.
  215. var validHostByte = [256]bool{
  216. '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
  217. '8': true, '9': true,
  218. 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
  219. 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
  220. 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  221. 'y': true, 'z': true,
  222. 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
  223. 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
  224. 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  225. 'Y': true, 'Z': true,
  226. '!': true, // sub-delims
  227. '$': true, // sub-delims
  228. '%': true, // pct-encoded (and used in IPv6 zones)
  229. '&': true, // sub-delims
  230. '(': true, // sub-delims
  231. ')': true, // sub-delims
  232. '*': true, // sub-delims
  233. '+': true, // sub-delims
  234. ',': true, // sub-delims
  235. '-': true, // unreserved
  236. '.': true, // unreserved
  237. ':': true, // IPv6address + Host expression's optional port
  238. ';': true, // sub-delims
  239. '=': true, // sub-delims
  240. '[': true,
  241. '\'': true, // sub-delims
  242. ']': true,
  243. '_': true, // unreserved
  244. '~': true, // unreserved
  245. }
  246. // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
  247. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
  248. //
  249. // message-header = field-name ":" [ field-value ]
  250. // field-value = *( field-content | LWS )
  251. // field-content = <the OCTETs making up the field-value
  252. // and consisting of either *TEXT or combinations
  253. // of token, separators, and quoted-string>
  254. //
  255. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
  256. //
  257. // TEXT = <any OCTET except CTLs,
  258. // but including LWS>
  259. // LWS = [CRLF] 1*( SP | HT )
  260. // CTL = <any US-ASCII control character
  261. // (octets 0 - 31) and DEL (127)>
  262. //
  263. // RFC 7230 says:
  264. // field-value = *( field-content / obs-fold )
  265. // obj-fold = N/A to http2, and deprecated
  266. // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  267. // field-vchar = VCHAR / obs-text
  268. // obs-text = %x80-FF
  269. // VCHAR = "any visible [USASCII] character"
  270. //
  271. // http2 further says: "Similarly, HTTP/2 allows header field values
  272. // that are not valid. While most of the values that can be encoded
  273. // will not alter header field parsing, carriage return (CR, ASCII
  274. // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
  275. // 0x0) might be exploited by an attacker if they are translated
  276. // verbatim. Any request or response that contains a character not
  277. // permitted in a header field value MUST be treated as malformed
  278. // (Section 8.1.2.6). Valid characters are defined by the
  279. // field-content ABNF rule in Section 3.2 of [RFC7230]."
  280. //
  281. // This function does not (yet?) properly handle the rejection of
  282. // strings that begin or end with SP or HTAB.
  283. func ValidHeaderFieldValue(v string) bool {
  284. for i := 0; i < len(v); i++ {
  285. b := v[i]
  286. if isCTL(b) && !isLWS(b) {
  287. return false
  288. }
  289. }
  290. return true
  291. }
  292. func isASCII(s string) bool {
  293. for i := 0; i < len(s); i++ {
  294. if s[i] >= utf8.RuneSelf {
  295. return false
  296. }
  297. }
  298. return true
  299. }
  300. // PunycodeHostPort returns the IDNA Punycode version
  301. // of the provided "host" or "host:port" string.
  302. func PunycodeHostPort(v string) (string, error) {
  303. if isASCII(v) {
  304. return v, nil
  305. }
  306. host, port, err := net.SplitHostPort(v)
  307. if err != nil {
  308. // The input 'v' argument was just a "host" argument,
  309. // without a port. This error should not be returned
  310. // to the caller.
  311. host = v
  312. port = ""
  313. }
  314. host, err = idna.ToASCII(host)
  315. if err != nil {
  316. // Non-UTF-8? Not representable in Punycode, in any
  317. // case.
  318. return "", err
  319. }
  320. if port == "" {
  321. return host, nil
  322. }
  323. return net.JoinHostPort(host, port), nil
  324. }