module SimpleIDN::Punycode
Constants
- ASCII_MAX
- BASE
- DAMP
- DELIMITER
- EMPTY
- INITIAL_BIAS
- INITIAL_N
- MAXINT
- SKEW
- TMAX
- TMIN
Public Instance Methods
adapt(delta, numpoints, firsttime)
click to toggle source
Bias adaptation function
# File lib/simpleidn.rb, line 44 def adapt(delta, numpoints, firsttime) delta = firsttime ? (delta / DAMP) : (delta >> 1) delta += (delta / numpoints) k = 0 while delta > (((BASE - TMIN) * TMAX) / 2) delta /= BASE - TMIN k += BASE end k + (BASE - TMIN + 1) * delta / (delta + SKEW) end
decode(input)
click to toggle source
Main decode
# File lib/simpleidn.rb, line 57 def decode(input) input_encoding = input.encoding input = input.encode(Encoding::UTF_8).codepoints.to_a output = [] # Initialize the state: n = INITIAL_N i = 0 bias = INITIAL_BIAS # Handle the basic code points: Let basic be the number of input code # points before the last delimiter, or 0 if there is none, then # copy the first basic code points to the output. basic = input.rindex(DELIMITER) || 0 input[0, basic].each do |char| raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX output << char end # Main decoding loop: Start just after the last delimiter if any # basic code points were copied; start at the beginning otherwise. ic = basic > 0 ? basic + 1 : 0 while ic < input.length # ic is the index of the next character to be consumed, # Decode a generalized variable-length integer into delta, # which gets added to i. The overflow checking is easier # if we increase i as we go, then subtract off its starting # value at the end to obtain delta. oldi = i w = 1 k = BASE loop do raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length digit = decode_digit(input[ic]) ic += 1 raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE raise(ConversionError, "punycode_overflow(1)") if digit > (MAXINT - i) / w i += digit * w t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias break if digit < t raise(ConversionError, "punycode_overflow(2)") if w > MAXINT / (BASE - t) w *= BASE - t k += BASE end out = output.length + 1 bias = adapt(i - oldi, out, oldi == 0) # i was supposed to wrap around from out to 0, # incrementing n each time, so we'll fix that now: raise(ConversionError, "punycode_overflow(3)") if (i / out) > MAXINT - n n += (i / out) i %= out # Insert n at position i of the output: output.insert(i, n) i += 1 end output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding) end
decode_digit(cp)
click to toggle source
decode_digit
(cp) returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or base if cp is does not represent a value.
# File lib/simpleidn.rb, line 30 def decode_digit(cp) cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE end
encode(input)
click to toggle source
Main encode function
# File lib/simpleidn.rb, line 129 def encode(input) input_encoding = input.encoding input = input.encode(Encoding::UTF_8).codepoints.to_a output = [] # Initialize the state: n = INITIAL_N delta = 0 bias = INITIAL_BIAS # Handle the basic code points: output = input.select { |char| char <= ASCII_MAX } h = b = output.length # h is the number of code points that have been handled, b is the # number of basic code points output << DELIMITER if b > 0 # Main encoding loop: while h < input.length # All non-basic code points < n have been # handled already. Find the next larger one: m = MAXINT input.each do |char| m = char if char >= n && char < m end # Increase delta enough to advance the decoder's # <n,i> state to <m,0>, but guard against overflow: raise(ConversionError, "punycode_overflow (1)") if m - n > ((MAXINT - delta) / (h + 1)).floor delta += (m - n) * (h + 1) n = m input.each_with_index do |char, _| if char < n delta += 1 raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT end next unless char == n # Represent delta as a generalized variable-length integer: q = delta k = BASE loop do t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias break if q < t output << encode_digit(t + (q - t) % (BASE - t)) q = ((q - t) / (BASE - t)).floor k += BASE end output << encode_digit(q) bias = adapt(delta, h + 1, h == b) delta = 0 h += 1 end delta += 1 n += 1 end output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding) end
encode_digit(d)
click to toggle source
encode_digit
(d) returns the basic code point whose value (when used for representing integers) is d, which needs to be in the range 0 to base-1.
# File lib/simpleidn.rb, line 37 def encode_digit(d) d + 22 + 75 * (d < 26 ? 1 : 0) # 0..25 map to ASCII a..z # 26..35 map to ASCII 0..9 end