punycode.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cookiejar
  5. // This file implements the Punycode algorithm from RFC 3492.
  6. import (
  7. "fmt"
  8. "net/http/internal/ascii"
  9. "strings"
  10. "unicode/utf8"
  11. )
  12. // These parameter values are specified in section 5.
  13. //
  14. // All computation is done with int32s, so that overflow behavior is identical
  15. // regardless of whether int is 32-bit or 64-bit.
  16. const (
  17. base int32 = 36
  18. damp int32 = 700
  19. initialBias int32 = 72
  20. initialN int32 = 128
  21. skew int32 = 38
  22. tmax int32 = 26
  23. tmin int32 = 1
  24. )
  25. // encode encodes a string as specified in section 6.3 and prepends prefix to
  26. // the result.
  27. //
  28. // The "while h < length(input)" line in the specification becomes "for
  29. // remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
  30. func encode(prefix, s string) (string, error) {
  31. output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
  32. copy(output, prefix)
  33. delta, n, bias := int32(0), initialN, initialBias
  34. b, remaining := int32(0), int32(0)
  35. for _, r := range s {
  36. if r < utf8.RuneSelf {
  37. b++
  38. output = append(output, byte(r))
  39. } else {
  40. remaining++
  41. }
  42. }
  43. h := b
  44. if b > 0 {
  45. output = append(output, '-')
  46. }
  47. for remaining != 0 {
  48. m := int32(0x7fffffff)
  49. for _, r := range s {
  50. if m > r && r >= n {
  51. m = r
  52. }
  53. }
  54. delta += (m - n) * (h + 1)
  55. if delta < 0 {
  56. return "", fmt.Errorf("cookiejar: invalid label %q", s)
  57. }
  58. n = m
  59. for _, r := range s {
  60. if r < n {
  61. delta++
  62. if delta < 0 {
  63. return "", fmt.Errorf("cookiejar: invalid label %q", s)
  64. }
  65. continue
  66. }
  67. if r > n {
  68. continue
  69. }
  70. q := delta
  71. for k := base; ; k += base {
  72. t := k - bias
  73. if t < tmin {
  74. t = tmin
  75. } else if t > tmax {
  76. t = tmax
  77. }
  78. if q < t {
  79. break
  80. }
  81. output = append(output, encodeDigit(t+(q-t)%(base-t)))
  82. q = (q - t) / (base - t)
  83. }
  84. output = append(output, encodeDigit(q))
  85. bias = adapt(delta, h+1, h == b)
  86. delta = 0
  87. h++
  88. remaining--
  89. }
  90. delta++
  91. n++
  92. }
  93. return string(output), nil
  94. }
  95. func encodeDigit(digit int32) byte {
  96. switch {
  97. case 0 <= digit && digit < 26:
  98. return byte(digit + 'a')
  99. case 26 <= digit && digit < 36:
  100. return byte(digit + ('0' - 26))
  101. }
  102. panic("cookiejar: internal error in punycode encoding")
  103. }
  104. // adapt is the bias adaptation function specified in section 6.1.
  105. func adapt(delta, numPoints int32, firstTime bool) int32 {
  106. if firstTime {
  107. delta /= damp
  108. } else {
  109. delta /= 2
  110. }
  111. delta += delta / numPoints
  112. k := int32(0)
  113. for delta > ((base-tmin)*tmax)/2 {
  114. delta /= base - tmin
  115. k += base
  116. }
  117. return k + (base-tmin+1)*delta/(delta+skew)
  118. }
  119. // Strictly speaking, the remaining code below deals with IDNA (RFC 5890 and
  120. // friends) and not Punycode (RFC 3492) per se.
  121. // acePrefix is the ASCII Compatible Encoding prefix.
  122. const acePrefix = "xn--"
  123. // toASCII converts a domain or domain label to its ASCII form. For example,
  124. // toASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
  125. // toASCII("golang") is "golang".
  126. func toASCII(s string) (string, error) {
  127. if ascii.Is(s) {
  128. return s, nil
  129. }
  130. labels := strings.Split(s, ".")
  131. for i, label := range labels {
  132. if !ascii.Is(label) {
  133. a, err := encode(acePrefix, label)
  134. if err != nil {
  135. return "", err
  136. }
  137. labels[i] = a
  138. }
  139. }
  140. return strings.Join(labels, "."), nil
  141. }