1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
# frozen_string_literal: true
require 'strscan'
module Psych
###
# Scan scalars for built in types
class ScalarScanner
# Taken from http://yaml.org/type/timestamp.html
TIME = /^-?\d{4}-\d{1,2}-\d{1,2}(?:[Tt]|\s+)\d{1,2}:\d\d:\d\d(?:\.\d*)?(?:\s*(?:Z|[-+]\d{1,2}:?(?:\d\d)?))?$/
# Taken from http://yaml.org/type/float.html
FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10)
|[-+]?\.(inf|Inf|INF)(?# infinity)
|\.(nan|NaN|NAN)(?# not a number))$/x
# Taken from http://yaml.org/type/int.html
INTEGER = /^(?:[-+]?0b[0-1_]+ (?# base 2)
|[-+]?0[0-7_]+ (?# base 8)
|[-+]?(?:0|[1-9][0-9_]*) (?# base 10)
|[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x
attr_reader :class_loader
# Create a new scanner
def initialize class_loader
@string_cache = {}
@symbol_cache = {}
@class_loader = class_loader
end
# Tokenize +string+ returning the Ruby object
def tokenize string
return nil if string.empty?
return string if @string_cache.key?(string)
return @symbol_cache[string] if @symbol_cache.key?(string)
# Check for a String type, being careful not to get caught by hash keys, hex values, and
# special floats (e.g., -.inf).
if string.match?(/^[^\d\.:-]?[A-Za-z_\s!@#\$%\^&\*\(\)\{\}\<\>\|\/\\~;=]+/) || string.match?(/\n/)
if string.length > 5
@string_cache[string] = true
return string
end
if string.match?(/^[^ytonf~]/i)
@string_cache[string] = true
string
elsif string == '~' || string.match?(/^null$/i)
nil
elsif string.match?(/^(yes|true|on)$/i)
true
elsif string.match?(/^(no|false|off)$/i)
false
else
@string_cache[string] = true
string
end
elsif string.match?(TIME)
begin
parse_time string
rescue ArgumentError
string
end
elsif string.match?(/^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/)
require 'date'
begin
class_loader.date.strptime(string, '%Y-%m-%d')
rescue ArgumentError
string
end
elsif string.match?(/^\.inf$/i)
Float::INFINITY
elsif string.match?(/^-\.inf$/i)
-Float::INFINITY
elsif string.match?(/^\.nan$/i)
Float::NAN
elsif string.match?(/^:./)
if string =~ /^:(["'])(.*)\1/
@symbol_cache[string] = class_loader.symbolize($2.sub(/^:/, ''))
else
@symbol_cache[string] = class_loader.symbolize(string.sub(/^:/, ''))
end
elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}$/)
i = 0
string.split(':').each_with_index do |n,e|
i += (n.to_i * 60 ** (e - 2).abs)
end
i
elsif string.match?(/^[-+]?[0-9][0-9_]*(:[0-5]?[0-9]){1,2}\.[0-9_]*$/)
i = 0
string.split(':').each_with_index do |n,e|
i += (n.to_f * 60 ** (e - 2).abs)
end
i
elsif string.match?(FLOAT)
if string.match?(/\A[-+]?\.\Z/)
@string_cache[string] = true
string
else
Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
end
else
int = parse_int string.gsub(/[,_]/, '')
return int if int
@string_cache[string] = true
string
end
end
###
# Parse and return an int from +string+
def parse_int string
return unless INTEGER === string
Integer(string)
end
###
# Parse and return a Time from +string+
def parse_time string
klass = class_loader.load 'Time'
date, time = *(string.split(/[ tT]/, 2))
(yy, m, dd) = date.match(/^(-?\d{4})-(\d{1,2})-(\d{1,2})/).captures.map { |x| x.to_i }
md = time.match(/(\d+:\d+:\d+)(?:\.(\d*))?\s*(Z|[-+]\d+(:\d\d)?)?/)
(hh, mm, ss) = md[1].split(':').map { |x| x.to_i }
us = (md[2] ? Rational("0.#{md[2]}") : 0) * 1000000
time = klass.utc(yy, m, dd, hh, mm, ss, us)
return time if 'Z' == md[3]
return klass.at(time.to_i, us) unless md[3]
tz = md[3].match(/^([+\-]?\d{1,2})\:?(\d{1,2})?$/)[1..-1].compact.map { |digit| Integer(digit, 10) }
offset = tz.first * 3600
if offset < 0
offset -= ((tz[1] || 0) * 60)
else
offset += ((tz[1] || 0) * 60)
end
klass.new(yy, m, dd, hh, mm, ss+us/(1_000_000r), offset)
end
end
end
|