module FaradayJSON::Encoding
Character encoding helper functions
Public Instance Methods
bin_to_hex(data)
click to toggle source
Helper function for testing
# File lib/faraday_json/encoding.rb, line 211 def bin_to_hex(data) if data.respond_to? :each_byte return data.each_byte.map { |b| b.to_s(16) }.join end return data end
get_bom(enc)
click to toggle source
Given a (canonical) encoding, returns a BOM as an array of byte values. If the given encoding does not have a BOM, an empty array is returned.
# File lib/faraday_json/encoding.rb, line 192 def get_bom(enc) bom = [] if enc.start_with?('utf16be') or enc.start_with?('utf-16be') bom = [0xfe, 0xff] elsif enc.start_with?('utf16le') or enc.start_with?('utf-16le') bom = [0xff, 0xfe] elsif enc.start_with?('utf8') or enc.start_with?('utf-8') bom = [0xef, 0xbb, 0xbf] elsif enc.start_with?('utf32be') or enc.start_with?('utf-32be') bom = [0x00, 0x00, 0xfe, 0xff] elsif enc.start_with?('utf32le') or enc.start_with?('utf-32le') bom = [0xff, 0xfe, 0x00, 0x00] end return bom end
get_canonical_encoding(enc)
click to toggle source
Returns a canonical version of an encoding.
# File lib/faraday_json/encoding.rb, line 178 def get_canonical_encoding(enc) if defined? ::Encoding and ::Encoding.respond_to? :find # Oh... Ruby 1.9.2 doesn't like passing an Encoding to find()... if not enc.is_a? ::Encoding enc = ::Encoding.find(enc) end return enc.to_s.downcase end return enc.downcase end
get_dominant_encoding(str, charset, opts = {})
click to toggle source
Given a String with (potentially, this depends on Ruby version) an encoding, and a charset from a content-type header (which may be nil), determines the dominant encoding. (Charset, if given, overrides internal encoding, if present).
# File lib/faraday_json/encoding.rb, line 155 def get_dominant_encoding(str, charset, opts = {}) enc = nil if str.respond_to? :encoding enc = str.encoding end if charset.nil? or charset.empty? if enc.nil? default_encoding = opts.fetch('default_encoding', nil) if default_encoding.nil? raise "No charset provided, don't know what to do!" # FIXME end enc = default_encoding end else enc = charset end return enc end
strip_bom(data, charset, opts = {})
click to toggle source
Helper function; strips a BOM for UTF-16 encodings
# File lib/faraday_json/encoding.rb, line 111 def strip_bom(data, charset, opts = {}) # Only need to do this on Strings if not data.is_a? String return data end # If the charset is given, it overrides string internal encoding. enc = get_dominant_encoding(data, charset, opts) # Make the encoding canonical (if we can find out about that). canonical = get_canonical_encoding(enc) # Determine what a BOM would look like. bom = get_bom(canonical) # We can't operate on data, we need a byte array. arr = data.each_byte.to_a # Match BOM found = true bom.each_index do |i| if bom[i] != arr[i] found = false break end end # So we may have found a BOM! Strip it. if found ret = arr[bom.length..-1].pack('c*') if ret.respond_to? :force_encoding ret.force_encoding(canonical) end return ret end # No BOM return data end
to_utf8(data, charset, opts = {})
click to toggle source
# File lib/faraday_json/encoding.rb, line 81 def to_utf8(data, charset, opts = {}) if data.is_a? Hash transcoded = {} data.each do |key, value| transcoded[to_utf8(key, charset, opts)] = to_utf8(value, charset, opts) end return transcoded elsif data.is_a? Array transcoded = [] data.each do |value| transcoded << to_utf8(value, charset, opts) end return transcoded elsif data.is_a? String return transcode(data, charset, 'UTF-8//IGNORE', opts) else return data end end
transcode(data, input_charset, output_charset, opts = {})
click to toggle source
# File lib/faraday_json/encoding.rb, line 18 def transcode(data, input_charset, output_charset, opts = {}) # In Ruby 1.8, we pretty much have to believe the given charsets; there's # not a lot of choice. # If we don't have an input charset, we can't do better than US-ASCII. if input_charset.nil? or input_charset.empty? input_charset = opts.fetch('default_input_charset', 'us-ascii') end # The default output charset, on the other hand, should be UTF-8. if output_charset.nil? or output_charset.empty? output_charset = opts.fetch('default_output_charset', 'UTF-8//IGNORE') end # Transcode using iconv require 'iconv' return ::Iconv.conv(output_charset, input_charset, data) end