module DomainName::Punycode

Constants

BASE
CUTOFF
DAMP
DECODE_DIGIT

Returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or nil if cp is does not represent a value.

DELIMITER
DOT
ENCODE_DIGIT

Returns the basic code point whose value (when used for representing integers) is d, which must be in the range 0 to BASE-1. The lowercase form is used unless flag is true, in which case the uppercase form is used. The behavior is undefined if flag is nonzero and digit d has no uppercase form.

INITIAL_BIAS
INITIAL_N
LOBASE
MAXINT
PREFIX
RE_NONBASIC
SKEW
TMAX
TMIN

Public Instance Methods

decode(string) click to toggle source

Decode a string encoded in Punycode

# File lib/domain_name/punycode.rb, line 193
def decode(string)
  # Initialize the state
  n = INITIAL_N
  i = 0
  bias = INITIAL_BIAS

  if j = string.rindex(DELIMITER)
    b = string[0...j]

    b.match(RE_NONBASIC) and
      raise ArgumentError, "Illegal character is found in basic part: #{string.inspect}"

    # Handle the basic code points

    output = b.unpack('U*')
    u = string[(j + 1)..-1]
  else
    output = []
    u = string
  end

  # Main decoding loop: Start just after the last delimiter if any
  # basic code points were copied; start at the beginning
  # otherwise.

  input = u.unpack('C*')
  input_length = input.length
  h = 0
  out = output.length

  while h < input_length
    # Decode a generalized variable-length integer into delta,
    # which gets added to i.  The overflow checking is easier
    # if we increase i as we go, then subtract off its starting
    # value at the end to obtain delta.

    oldi = i
    w = 1
    k = BASE

    loop {
      digit = DECODE_DIGIT[input[h]] or
      raise ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}"
      h += 1
      i += digit * w
      raise BufferOverflowError if i > MAXINT
      t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
      break if digit < t
      w *= BASE - t
      raise BufferOverflowError if w > MAXINT
      k += BASE
      h < input_length or raise ArgumentError, "Malformed input given: #{string.inspect}"
    }

    # Adapt the bias
    delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
    delta += delta / (out + 1)
    bias = 0
    while delta > CUTOFF
      delta /= LOBASE
      bias += BASE
    end
    bias += (LOBASE + 1) * delta / (delta + SKEW)

    # i was supposed to wrap around from out+1 to 0, incrementing
    # n each time, so we'll fix that now:

    q, i = i.divmod(out + 1)
    n += q
    raise BufferOverflowError if n > MAXINT

    # Insert n at position i of the output:

    output[i, 0] = n

    out += 1
    i += 1
  end
  output.pack('U*')
end
decode_hostname(hostname) click to toggle source

Decode a hostname using IDN/Punycode algorithms

# File lib/domain_name/punycode.rb, line 275
def decode_hostname(hostname)
  hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) {
    $1 << decode($2)
  }
end
encode(string) click to toggle source

Encode a string in Punycode

# File lib/domain_name/punycode.rb, line 100
def encode(string)
  input = string.unpack('U*')
  output = ''

  # Initialize the state
  n = INITIAL_N
  delta = 0
  bias = INITIAL_BIAS

  # Handle the basic code points
  input.each { |cp| output << cp.chr if cp < 0x80 }

  h = b = output.length

  # h is the number of code points that have been handled, b is the
  # number of basic code points, and out is the number of characters
  # that have been output.

  output << DELIMITER if b > 0

  # Main encoding loop

  while h < input.length
    # All non-basic code points < n have been handled already.  Find
    # the next larger one

    m = MAXINT
    input.each { |cp|
      m = cp if (n...m) === cp
    }

    # Increase delta enough to advance the decoder's <n,i> state to
    # <m,0>, but guard against overflow

    delta += (m - n) * (h + 1)
    raise BufferOverflowError if delta > MAXINT
    n = m

    input.each { |cp|
      # AMC-ACE-Z can use this simplified version instead
      if cp < n
        delta += 1
        raise BufferOverflowError if delta > MAXINT
      elsif cp == n
        # Represent delta as a generalized variable-length integer
        q = delta
        k = BASE
        loop {
          t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
          break if q < t
          q, r = (q - t).divmod(BASE - t)
          output << ENCODE_DIGIT[t + r, false]
          k += BASE
        }

        output << ENCODE_DIGIT[q, false]

        # Adapt the bias
        delta = h == b ? delta / DAMP : delta >> 1
        delta += delta / (h + 1)
        bias = 0
        while delta > CUTOFF
          delta /= LOBASE
          bias += BASE
        end
        bias += (LOBASE + 1) * delta / (delta + SKEW)

        delta = 0
        h += 1
      end
    }

    delta += 1
    n += 1
  end

  output
end
encode_hostname(hostname) click to toggle source

Encode a hostname using IDN/Punycode algorithms

# File lib/domain_name/punycode.rb, line 180
def encode_hostname(hostname)
  hostname.match(RE_NONBASIC) or return hostname

  hostname.split(DOT).map { |name|
    if name.match(RE_NONBASIC)
      PREFIX + encode(name)
    else
      name
    end
  }.join(DOT)
end