summaryrefslogtreecommitdiff
path: root/ext/psych/lib/psych/scalar_scanner.rb
blob: d565a336e899b5bb1104f402db9210f521d0d765 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# frozen_string_literal: true
require 'strscan'

module Psych
  ###
  # Scan scalars for built in types
  class ScalarScanner
    # Taken from http://yaml.org/type/timestamp.html
    TIME = /^-?\d{4}-\d{1,2}-\d{1,2}(?:[Tt]|\s+)\d{1,2}:\d\d:\d\d(?:\.\d*)?(?:\s*(?:Z|[-+]\d{1,2}:?(?:\d\d)?))?$/

    # Taken from http://yaml.org/type/float.html
    FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10)
              |[-+]?\.(inf|Inf|INF)(?# infinity)
              |\.(nan|NaN|NAN)(?# not a number))$/x

    # Taken from http://yaml.org/type/int.html
    INTEGER = /^(?:[-+]?0b[0-1_,]+          (?# base 2)
                  |[-+]?0[0-7_,]+           (?# base 8)
                  |[-+]?(?:0|[1-9][0-9_,]*) (?# base 10)
                  |[-+]?0x[0-9a-fA-F_,]+    (?# base 16))$/x

    attr_reader :class_loader

    # Create a new scanner
    def initialize class_loader
      @symbol_cache = {}
      @class_loader = class_loader
    end

    # Tokenize +string+ returning the Ruby object
    def tokenize string
      return nil if string.empty?
      return @symbol_cache[string] if @symbol_cache.key?(string)

      # Check for a String type, being careful not to get caught by hash keys, hex values, and
      # special floats (e.g., -.inf).
      if string.match?(/^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/) || string.match?(/\n/)
        return string if string.length > 5

        if string.match?(/^[^ytonf~]/i)
          string
        elsif string == '~' || string.match?(/^null$/i)
          nil
        elsif string.match?(/^(yes|true|on)$/i)
          true
        elsif string.match?(/^(no|false|off)$/i)
          false
        else
          string
        end
      elsif string.match?(TIME)
        begin
          parse_time string
        rescue ArgumentError
          string
        end
      elsif string.match?(/^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/)
        require 'date'
        begin
          class_loader.date.strptime(string, '%Y-%m-%d')
        rescue ArgumentError
          string
        end
      elsif string.match?(/^\.inf$/i)
        Float::INFINITY
      elsif string.match?(/^-\.inf$/i)
        -Float::INFINITY
      elsif string.match?(/^\.nan$/i)
        Float::NAN
      elsif string.match?(/^:./)
        if string =~ /^:(["'])(.*)\1/
          @symbol_cache[string] = class_loader.symbolize($2.sub(/^:/, ''))
        else
          @symbol_cache[string] = class_loader.symbolize(string.sub(/^:/, ''))
        end
      elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/)
        i = 0
        string.split(':').each_with_index do |n,e|
          i += (n.to_i * 60 ** (e - 2).abs)
        end
        i
      elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/)
        i = 0
        string.split(':').each_with_index do |n,e|
          i += (n.to_f * 60 ** (e - 2).abs)
        end
        i
      elsif string.match?(FLOAT)
        if string.match?(/\A[-+]?\.\Z/)
          string
        else
          Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
        end
      elsif string.match?(INTEGER)
        parse_int string
      else
        string
      end
    end

    ###
    # Parse and return an int from +string+
    def parse_int string
      Integer(string.gsub(/[,_]/, ''))
    end

    ###
    # Parse and return a Time from +string+
    def parse_time string
      klass = class_loader.load 'Time'

      date, time = *(string.split(/[ tT]/, 2))
      (yy, m, dd) = date.match(/^(-?\d{4})-(\d{1,2})-(\d{1,2})/).captures.map { |x| x.to_i }
      md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/)

      (hh, mm, ss) = md[1].split(':').map { |x| x.to_i }
      us = (md[2] ? Rational("0.#{md[2]}") : 0) * 1000000

      time = klass.utc(yy, m, dd, hh, mm, ss, us)

      return time if 'Z' == md[3]
      return klass.at(time.to_i, us) unless md[3]

      tz = md[3].match(/^([+\-]?\d{1,2})\:?(\d{1,2})?$/)[1..-1].compact.map { |digit| Integer(digit, 10) }
      offset = tz.first * 3600

      if offset < 0
        offset -= ((tz[1] || 0) * 60)
      else
        offset += ((tz[1] || 0) * 60)
      end

      klass.new(yy, m, dd, hh, mm, ss+us/(1_000_000r), offset)
    end
  end
end