diff options
author | nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-09-24 15:18:44 +0000 |
---|---|---|
committer | nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-09-24 15:18:44 +0000 |
commit | db9445103c082a306ba085f7677da02ea94b8841 (patch) | |
tree | a311d59f031ae5def87f68be71ed1f58abadc031 /lib/xsd | |
parent | 8c2fb77787d1f20b4c19c9c52552856c339b86e9 (diff) |
* lib/soap/* (29 files): SOAP4R added.
* lib/wsdl/* (42 files): WSDL4R added.
* lib/xsd/* (12 files): XSD4R added.
* test/soap/* (16 files): added.
* test/wsdl/* (2 files): added.
* test/xsd/* (3 files): added.
* sample/soap/* (27 files): added.
* sample/wsdl/* (13 files): added.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4591 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/xsd')
-rw-r--r-- | lib/xsd/charset.rb | 175 | ||||
-rw-r--r-- | lib/xsd/datatypes.rb | 1094 | ||||
-rw-r--r-- | lib/xsd/datatypes1999.rb | 31 | ||||
-rw-r--r-- | lib/xsd/iconvcharset.rb | 44 | ||||
-rw-r--r-- | lib/xsd/namedelements.rb | 86 | ||||
-rw-r--r-- | lib/xsd/ns.rb | 141 | ||||
-rw-r--r-- | lib/xsd/qname.rb | 77 | ||||
-rw-r--r-- | lib/xsd/xmlparser.rb | 72 | ||||
-rw-r--r-- | lib/xsd/xmlparser/parser.rb | 107 | ||||
-rw-r--r-- | lib/xsd/xmlparser/rexmlparser.rb | 65 | ||||
-rw-r--r-- | lib/xsd/xmlparser/xmlparser.rb | 61 | ||||
-rw-r--r-- | lib/xsd/xmlparser/xmlscanner.rb | 158 |
12 files changed, 2111 insertions, 0 deletions
diff --git a/lib/xsd/charset.rb b/lib/xsd/charset.rb new file mode 100644 index 0000000000..6dda959155 --- /dev/null +++ b/lib/xsd/charset.rb @@ -0,0 +1,175 @@ +=begin +XSD4R - Charset handling library. +Copyright (C) 2001, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +module XSD + + +module Charset + @encoding = $KCODE + + class XSDError < StandardError; end + class CharsetError < XSDError; end + class UnknownCharsetError < CharsetError; end + class CharsetConversionError < CharsetError; end + +public + + ### + ## Maps + # + EncodingConvertMap = {} + def Charset.init + begin + require 'xsd/iconvcharset' + @encoding = 'UTF8' + EncodingConvertMap[['UTF8', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) } + EncodingConvertMap[['EUC' , 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) } + EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv("shift-jis", "euc-jp", str) } + if /(mswin|bccwin|mingw|cygwin)/ =~ RUBY_PLATFORM + EncodingConvertMap[['UTF8', 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv("cp932", "utf-8", str) } + EncodingConvertMap[['SJIS', 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", "cp932", str) } + EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "cp932", str) } + else + EncodingConvertMap[['UTF8', 'SJIS']] = Proc.new { |str| IconvCharset.safe_iconv("shift-jis", "utf-8", str) } + EncodingConvertMap[['SJIS', 'UTF8']] = Proc.new { |str| IconvCharset.safe_iconv("utf-8", "shift-jis", str) } + EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "shift-jis", str) } + end + rescue LoadError + begin + require 'nkf' + EncodingConvertMap[['EUC' , 'SJIS']] = Proc.new { |str| NKF.nkf('-sXm0', str) } + EncodingConvertMap[['SJIS', 'EUC' ]] = Proc.new { |str| NKF.nkf('-eXm0', str) } + rescue LoadError + end + + begin + require 'uconv' + @encoding = 'UTF8' + EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc) + EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis) + EncodingConvertMap[['EUC' , 'UTF8']] = Uconv.method(:euctou8) + EncodingConvertMap[['SJIS', 'UTF8']] = Uconv.method(:sjistou8) + rescue LoadError + end + end + end + self.init + + CharsetMap = { + 'NONE' => 'us-ascii', + 'EUC' => 'euc-jp', + 'SJIS' => 'shift_jis', + 'UTF8' => 'utf-8', + } + + + ### + ## handlers + # + def Charset.encoding + @encoding + end + + def Charset.encoding_label + charset_label(@encoding) + end + + def Charset.encoding_to_xml(str, charset) + encoding_conv(str, @encoding, charset_str(charset)) + end + + def Charset.encoding_from_xml(str, charset) + encoding_conv(str, charset_str(charset), @encoding) + end + + def Charset.encoding_conv(str, enc_from, enc_to) + if enc_from == enc_to or enc_from == 'NONE' or enc_to == 'NONE' + str + elsif converter = EncodingConvertMap[[enc_from, enc_to]] + converter.call(str) + else + raise CharsetConversionError.new( + "Converter not found: #{ enc_from } -> #{ enc_to }") + end + end + + def Charset.charset_label(encoding) + CharsetMap[encoding.upcase] + end + + def Charset.charset_str(label) + CharsetMap.index(label.downcase) + end + + # Original regexps: http://www.din.or.jp/~ohzaki/perl.htm + # ascii_euc = '[\x00-\x7F]' + ascii_euc = '[\x9\xa\xd\x20-\x7F]' # XML 1.0 restricted. + twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])' + threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])' + character_euc = "(?:#{ ascii_euc }|#{ twobytes_euc }|#{ threebytes_euc })" + EUCRegexp = Regexp.new("\\A#{ character_euc }*\\z", nil, "NONE") + + # onebyte_sjis = '[\x00-\x7F\xA1-\xDF]' + onebyte_sjis = '[\x9\xa\xd\x20-\x7F\xA1-\xDF]' # XML 1.0 restricted. + twobytes_sjis = '(?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])' + character_sjis = "(?:#{ onebyte_sjis }|#{ twobytes_sjis })" + SJISRegexp = Regexp.new("\\A#{ character_sjis }*\\z", nil, "NONE") + + # 0xxxxxxx + #ascii_utf8 = '[\0-\x7F]' + ascii_utf8 = '[\x9\xA\xD\x20-\x7F]' # XML 1.0 restricted. + # 110yyyyy 10xxxxxx + twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])' + # 1110zzzz 10yyyyyy 10xxxxxx + threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])' + # 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx + fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])' + character_utf8 = "(?:#{ ascii_utf8 }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })" + UTF8Regexp = Regexp.new("\\A#{ character_utf8 }*\\z", nil, "NONE") + + def Charset.is_utf8(str) + UTF8Regexp =~ str + end + + def Charset.is_euc(str) + EUCRegexp =~ str + end + + def Charset.is_sjis(str) + SJISRegexp =~ str + end + + def Charset.is_ces(str, code = $KCODE) + case code + when 'NONE' + true + when 'UTF8' + is_utf8(str) + when 'EUC' + is_euc(str) + when 'SJIS' + is_sjis(str) + else + raise UnknownCharsetError.new("Unknown charset: #{ code }") + end + end +end + + +end diff --git a/lib/xsd/datatypes.rb b/lib/xsd/datatypes.rb new file mode 100644 index 0000000000..03e42d21d2 --- /dev/null +++ b/lib/xsd/datatypes.rb @@ -0,0 +1,1094 @@ +=begin +XSD4R - XML Schema Datatype implementation. +Copyright (C) 2000, 2001, 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/qname' +require 'xsd/charset' +require 'uri' + + +### +## XMLSchamaDatatypes general definitions. +# +module XSD + + +Namespace = 'http://www.w3.org/2001/XMLSchema' +InstanceNamespace = 'http://www.w3.org/2001/XMLSchema-instance' + +AttrType = 'type' +NilValue = 'true' + +AnyTypeLiteral = 'anyType' +AnySimpleTypeLiteral = 'anySimpleType' +NilLiteral = 'nil' +StringLiteral = 'string' +BooleanLiteral = 'boolean' +DecimalLiteral = 'decimal' +FloatLiteral = 'float' +DoubleLiteral = 'double' +DurationLiteral = 'duration' +DateTimeLiteral = 'dateTime' +TimeLiteral = 'time' +DateLiteral = 'date' +GYearMonthLiteral = 'gYearMonth' +GYearLiteral = 'gYear' +GMonthDayLiteral = 'gMonthDay' +GDayLiteral = 'gDay' +GMonthLiteral = 'gMonth' +HexBinaryLiteral = 'hexBinary' +Base64BinaryLiteral = 'base64Binary' +AnyURILiteral = 'anyURI' +QNameLiteral = 'QName' + +NormalizedStringLiteral = 'normalizedString' +IntegerLiteral = 'integer' +LongLiteral = 'long' +IntLiteral = 'int' +ShortLiteral = 'short' + +AttrTypeName = QName.new(InstanceNamespace, AttrType) +AttrNilName = QName.new(InstanceNamespace, NilLiteral) + +AnyTypeName = QName.new(Namespace, AnyTypeLiteral) +AnySimpleTypeName = QName.new(Namespace, AnySimpleTypeLiteral) + +class Error < StandardError; end +class ValueSpaceError < Error; end + + +### +## The base class of all datatypes with Namespace. +# +class NSDBase + @@types = [] + + attr_accessor :type + + def self.inherited(klass) + @@types << klass + end + + def self.types + @@types + end + + def initialize + @type = nil + end +end + + +### +## The base class of XSD datatypes. +# +class XSDAnySimpleType < NSDBase + include XSD + Type = QName.new(Namespace, AnySimpleTypeLiteral) + + # @data represents canonical space (ex. Integer: 123). + attr_reader :data + # @is_nil represents this data is nil or not. + attr_accessor :is_nil + + def initialize(value = nil) + super() + @type = Type + @data = nil + @is_nil = true + set(value) if value + end + + # set accepts a string which follows lexical space (ex. String: "+123"), or + # an object which follows canonical space (ex. Integer: 123). + def set(value) + if value.nil? + @is_nil = true + @data = nil + else + @is_nil = false + _set(value) + end + end + + # to_s creates a string which follows lexical space (ex. String: "123"). + def to_s() + if @is_nil + "" + else + _to_s + end + end + + def trim(data) + data.sub(/\A\s*(\S*)\s*\z/, '\1') + end + +private + + def _set(value) + @data = value + end + + def _to_s + @data.to_s + end +end + +class XSDNil < XSDAnySimpleType + Type = QName.new(Namespace, NilLiteral) + Value = 'true' + + def initialize(value = nil) + super() + @type = Type + set(value) + end + +private + + def _set(value) + @data = value + end +end + + +### +## Primitive datatypes. +# +class XSDString < XSDAnySimpleType + Type = QName.new(Namespace, StringLiteral) + + def initialize(value = nil) + super() + @type = Type + @encoding = nil + set(value) if value + end + +private + + def _set(value) + unless XSD::Charset.is_ces(value, XSD::Charset.encoding) + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + @data = value + end +end + +class XSDBoolean < XSDAnySimpleType + Type = QName.new(Namespace, BooleanLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) + end + +private + + def _set(value) + if value.is_a?(String) + str = trim(value) + if str == 'true' || str == '1' + @data = true + elsif str == 'false' || str == '0' + @data = false + else + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + else + @data = value ? true : false + end + end +end + +class XSDDecimal < XSDAnySimpleType + Type = QName.new(Namespace, DecimalLiteral) + + def initialize(value = nil) + super() + @type = Type + @sign = '' + @number = '' + @point = 0 + set(value) if value + end + + def nonzero? + (@number != '0') + end + +private + + def _set(d) + if d.is_a?(String) + # Integer("00012") => 10 in Ruby. + d.sub!(/^([+\-]?)0*(?=\d)/, "\\1") + end + set_str(d) + end + + def set_str(str) + /^([+\-]?)(\d*)(?:\.(\d*)?)?$/ =~ trim(str.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + + @sign = $1 || '+' + int_part = $2 + frac_part = $3 + + int_part = '0' if int_part.empty? + frac_part = frac_part ? frac_part.sub(/0+$/, '') : '' + @point = - frac_part.size + @number = int_part + frac_part + + # normalize + if @sign == '+' + @sign = '' + elsif @sign == '-' + if @number == '0' + @sign = '' + end + end + + @data = _to_s + end + + # 0.0 -> 0; right? + def _to_s + str = @number.dup + if @point.nonzero? + str[@number.size + @point, 0] = '.' + end + @sign + str + end +end + +class XSDFloat < XSDAnySimpleType + Type = QName.new(Namespace, FloatLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def _set(value) + # "NaN".to_f => 0 in some environment. libc? + if value.is_a?(Float) + @data = narrow32bit(value) + return + end + + str = trim(value.to_s) + if str == 'NaN' + @data = 0.0/0.0 + elsif str == 'INF' + @data = 1.0/0.0 + elsif str == '-INF' + @data = -1.0/0.0 + else + if /^[+\-\.\deE]+$/ !~ str + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + # Float("-1.4E") might fail on some system. + str << '0' if /e$/i =~ str + begin + @data = narrow32bit(Float(str)) + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + end + + # Do I have to convert 0.0 -> 0 and -0.0 -> -0 ? + def _to_s + if @data.nan? + 'NaN' + elsif @data.infinite? == 1 + 'INF' + elsif @data.infinite? == -1 + '-INF' + else + sprintf("%.10g", @data) + end + end + + # Convert to single-precision 32-bit floating point value. + def narrow32bit(f) + if f.nan? || f.infinite? + f + else + packed = [f].pack("f") + (/\A\0*\z/ =~ packed)? 0.0 : f + end + end +end + +# Ruby's Float is double-precision 64-bit floating point value. +class XSDDouble < XSDAnySimpleType + Type = QName.new(Namespace, DoubleLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def _set(value) + # "NaN".to_f => 0 in some environment. libc? + if value.is_a?(Float) + @data = value + return + end + + str = trim(value.to_s) + if str == 'NaN' + @data = 0.0/0.0 + elsif str == 'INF' + @data = 1.0/0.0 + elsif str == '-INF' + @data = -1.0/0.0 + else + begin + @data = Float(str) + rescue ArgumentError + # '1.4e' cannot be parsed on some architecture. + if /e\z/i =~ str + begin + @data = Float(str + '0') + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + else + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + end + end + + # Do I have to convert 0.0 -> 0 and -0.0 -> -0 ? + def _to_s + if @data.nan? + 'NaN' + elsif @data.infinite? == 1 + 'INF' + elsif @data.infinite? == -1 + '-INF' + else + sprintf("%.16g", @data) + end + end +end + +class XSDDuration < XSDAnySimpleType + Type = QName.new(Namespace, DurationLiteral) + + attr_accessor :sign + attr_accessor :year + attr_accessor :month + attr_accessor :day + attr_accessor :hour + attr_accessor :min + attr_accessor :sec + + def initialize(value = nil) + super() + @type = Type + @sign = nil + @year = nil + @month = nil + @day = nil + @hour = nil + @min = nil + @sec = nil + set(value) if value + end + +private + + def _set(value) + /^([+\-]?)P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?(T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:\.\d+)?)S)?)?$/ =~ trim(value.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + + if ($5 and ((!$2 and !$3 and !$4) or (!$6 and !$7 and !$8))) + # Should we allow 'PT5S' here? + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + + @sign = $1 + @year = $2.to_i + @month = $3.to_i + @day = $4.to_i + @hour = $6.to_i + @min = $7.to_i + @sec = $8 ? XSDDecimal.new($8) : 0 + @data = _to_s + end + + def _to_s + str = '' + str << @sign if @sign + str << 'P' + l = '' + l << "#{ @year }Y" if @year.nonzero? + l << "#{ @month }M" if @month.nonzero? + l << "#{ @day }D" if @day.nonzero? + r = '' + r << "#{ @hour }H" if @hour.nonzero? + r << "#{ @min }M" if @min.nonzero? + r << "#{ @sec }S" if @sec.nonzero? + str << l + if l.empty? + str << "0D" + end + unless r.empty? + str << "T" << r + end + str + end +end + + +require 'rational' +require 'date' +unless Object.const_defined?('DateTime') + raise LoadError.new('XSD4R requires date2/3.2 or later to be installed. You can download it from http://www.funaba.org/en/ruby.html#date2') +end + +module XSDDateTimeImpl + SecInDay = 86400 # 24 * 60 * 60 + + def to_time + begin + if @data.of * SecInDay == Time.now.utc_offset + d = @data + usec = (d.sec_fraction * SecInDay * 1000000).to_f + Time.local(d.year, d.month, d.mday, d.hour, d.min, d.sec, usec) + else + d = @data.newof + usec = (d.sec_fraction * SecInDay * 1000000).to_f + Time.gm(d.year, d.month, d.mday, d.hour, d.min, d.sec, usec) + end + rescue ArgumentError + nil + end + end + + def tz2of(str) + /^(?:Z|(?:([+\-])(\d\d):(\d\d))?)$/ =~ str + sign = $1 + hour = $2.to_i + min = $3.to_i + + of = case sign + when '+' + of = +(hour.to_r * 60 + min) / 1440 # 24 * 60 + when '-' + of = -(hour.to_r * 60 + min) / 1440 # 24 * 60 + else + 0 + end + of + end + + def of2tz(offset) + diffmin = offset * 24 * 60 + if diffmin.zero? + 'Z' + else + ((diffmin < 0) ? '-' : '+') << format('%02d:%02d', + (diffmin.abs / 60.0).to_i, (diffmin.abs % 60.0).to_i) + end + end + + def _set(t) + if (t.is_a?(Date)) + @data = t + elsif (t.is_a?(Time)) + sec, min, hour, mday, month, year = t.to_a[0..5] + diffday = t.usec.to_r / 1000000 / SecInDay + of = t.utc_offset.to_r / SecInDay + @data = DateTime.civil(year, month, mday, hour, min, sec, of) + @data += diffday + else + set_str(t) + end + end + + def add_tz(s) + s + of2tz(@data.offset) + end +end + +class XSDDateTime < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, DateTimeLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^([+\-]?\d\d\d\d\d*)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d(?:\.(\d*))?)(Z|(?:[+\-]\d\d:\d\d)?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + if $1 == '0000' + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + year = $1.to_i + if year < 0 + year += 1 + end + mon = $2.to_i + mday = $3.to_i + hour = $4.to_i + min = $5.to_i + sec = $6.to_i + secfrac = $7 + zonestr = $8 + + @data = DateTime.civil(year, mon, mday, hour, min, sec, tz2of(zonestr)) + + if secfrac + diffday = secfrac.to_i.to_r / (10 ** secfrac.size) / SecInDay + # jd = @data.jd + # day_fraction = @data.day_fraction + diffday + # @data = DateTime.new0(DateTime.jd_to_rjd(jd, day_fraction, + # @data.offset), @data.offset) + # + # Thanks to Funaba-san, above code can be simply written as below. + @data += diffday + # FYI: new0 and jd_to_rjd are not necessary to use if you don't have + # exceptional reason. + end + end + + def _to_s + year = (@data.year > 0) ? @data.year : @data.year - 1 + s = format('%.4d-%02d-%02dT%02d:%02d:%02d', + year, @data.mon, @data.mday, @data.hour, @data.min, @data.sec) + if @data.sec_fraction.nonzero? + fr = @data.sec_fraction * SecInDay + shiftsize = fr.denominator.to_s.size + fr_s = (fr * (10 ** shiftsize)).to_i.to_s + s << '.' << '0' * (shiftsize - fr_s.size) << fr_s.sub(/0+$/, '') + end + add_tz(s) + end +end + +class XSDTime < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, TimeLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^(\d\d):(\d\d):(\d\d(?:\.(\d*))?)(Z|(?:([+\-])(\d\d):(\d\d))?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + hour = $1.to_i + min = $2.to_i + sec = $3.to_i + secfrac = $4 + zonestr = $5 + + @data = DateTime.civil(1, 1, 1, hour, min, sec, tz2of(zonestr)) + + if secfrac + @data += secfrac.to_i.to_r / (10 ** secfrac.size) / SecInDay + end + end + + def _to_s + s = format('%02d:%02d:%02d', @data.hour, @data.min, @data.sec) + if @data.sec_fraction.nonzero? + fr = @data.sec_fraction * SecInDay + shiftsize = fr.denominator.to_s.size + fr_s = (fr * (10 ** shiftsize)).to_i.to_s + s << '.' << '0' * (shiftsize - fr_s.size) << fr_s.sub(/0+$/, '') + end + add_tz(s) + end +end + +class XSDDate < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, DateLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^([+\-]?\d\d\d\d\d*)-(\d\d)-(\d\d)(Z|(?:([+\-])(\d\d):(\d\d))?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + year = $1.to_i + if year < 0 + year += 1 + end + mon = $2.to_i + mday = $3.to_i + zonestr = $4 + + @data = DateTime.civil(year, mon, mday, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + year = (@data.year > 0) ? @data.year : @data.year - 1 + s = format('%.4d-%02d-%02d', year, @data.mon, @data.mday) + add_tz(s) + end +end + +class XSDGYearMonth < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, GYearMonthLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^([+\-]?\d\d\d\d\d*)-(\d\d)(Z|(?:([+\-])(\d\d):(\d\d))?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + year = $1.to_i + if year < 0 + year += 1 + end + mon = $2.to_i + zonestr = $3 + + @data = DateTime.civil(year, mon, 1, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + year = (@data.year > 0) ? @data.year : @data.year - 1 + s = format('%.4d-%02d', year, @data.mon) + add_tz(s) + end +end + +class XSDGYear < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, GYearLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^([+\-]?\d\d\d\d\d*)(Z|(?:([+\-])(\d\d):(\d\d))?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + year = $1.to_i + if year < 0 + year += 1 + end + zonestr = $2 + + @data = DateTime.civil(year, 1, 1, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + year = (@data.year > 0) ? @data.year : @data.year - 1 + s = format('%.4d', year) + add_tz(s) + end +end + +class XSDGMonthDay < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, GMonthDayLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^(\d\d)-(\d\d)(Z|(?:[+\-]\d\d:\d\d)?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + mon = $1.to_i + mday = $2.to_i + zonestr = $3 + + @data = DateTime.civil(1, mon, mday, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + s = format('%02d-%02d', @data.mon, @data.mday) + add_tz(s) + end +end + +class XSDGDay < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, GDayLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^(\d\d)(Z|(?:[+\-]\d\d:\d\d)?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + mday = $1.to_i + zonestr = $2 + + @data = DateTime.civil(1, 1, mday, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + s = format('%02d', @data.mday) + add_tz(s) + end +end + +class XSDGMonth < XSDAnySimpleType + include XSDDateTimeImpl + Type = QName.new(Namespace, GMonthLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(t) + /^(\d\d)(Z|(?:[+\-]\d\d:\d\d)?)?$/ =~ trim(t.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ t }'.") + end + + mon = $1.to_i + zonestr = $2 + + @data = DateTime.civil(1, mon, 1, 0, 0, 0, tz2of(zonestr)) + end + + def _to_s + s = format('%02d', @data.mon) + add_tz(s) + end +end + +class XSDHexBinary < XSDAnySimpleType + Type = QName.new(Namespace, HexBinaryLiteral) + + # String in Ruby could be a binary. + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + + def set_encoded(value) + if /^[0-9a-fA-F]*$/ !~ value + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + @data = trim(String.new(value)) + @is_nil = false + end + + def string + [@data].pack("H*") + end + +private + + def _set(value) + @data = value.unpack("H*")[0] + @data.tr!('a-f', 'A-F') + end +end + +class XSDBase64Binary < XSDAnySimpleType + Type = QName.new(Namespace, Base64BinaryLiteral) + + # String in Ruby could be a binary. + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + + def set_encoded(value) + if /^[A-Za-z0-9+\/=]*$/ !~ value + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + @data = trim(String.new(value)) + @is_nil = false + end + + def string + @data.unpack("m")[0] + end + +private + + def _set(value) + @data = trim([value].pack("m")) + end +end + +class XSDAnyURI < XSDAnySimpleType + Type = QName.new(Namespace, AnyURILiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def _set(value) + begin + @data = URI.parse(trim(value.to_s)) + rescue URI::InvalidURIError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + end +end + +class XSDQName < XSDAnySimpleType + Type = QName.new(Namespace, QNameLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def _set(value) + /^(?:([^:]+):)?([^:]+)$/ =~ trim(value.to_s) + unless Regexp.last_match + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + + @prefix = $1 + @localpart = $2 + @data = _to_s + end + + def _to_s + if @prefix + "#{ @prefix }:#{ @localpart }" + else + "#{ @localpart }" + end + end +end + + +### +## Derived types +# +class XSDNormalizedString < XSDString + Type = QName.new(Namespace, NormalizedStringLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def _set(value) + if /[\t\r\n]/ =~ value + raise ValueSpaceError.new("#{ type }: cannot accept '#{ value }'.") + end + super + end +end + +class XSDInteger < XSDDecimal + Type = QName.new(Namespace, IntegerLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(str) + begin + @data = Integer(str) + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + + def _to_s() + @data.to_s + end +end + +class XSDLong < XSDInteger + Type = QName.new(Namespace, LongLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(str) + begin + @data = Integer(str) + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + unless validate(@data) + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + + MaxInclusive = +9223372036854775807 + MinInclusive = -9223372036854775808 + def validate(v) + ((MinInclusive <= v) && (v <= MaxInclusive)) + end +end + +class XSDInt < XSDLong + Type = QName.new(Namespace, IntLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(str) + begin + @data = Integer(str) + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + unless validate(@data) + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + + MaxInclusive = +2147483647 + MinInclusive = -2147483648 + def validate(v) + ((MinInclusive <= v) && (v <= MaxInclusive)) + end +end + +class XSDShort < XSDInt + Type = QName.new(Namespace, ShortLiteral) + + def initialize(value = nil) + super() + @type = Type + set(value) if value + end + +private + + def set_str(str) + begin + @data = Integer(str) + rescue ArgumentError + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + unless validate(@data) + raise ValueSpaceError.new("#{ type }: cannot accept '#{ str }'.") + end + end + + MaxInclusive = +32767 + MinInclusive = -32768 + def validate(v) + ((MinInclusive <= v) && (v <= MaxInclusive)) + end +end + + +end diff --git a/lib/xsd/datatypes1999.rb b/lib/xsd/datatypes1999.rb new file mode 100644 index 0000000000..6b6b6be20a --- /dev/null +++ b/lib/xsd/datatypes1999.rb @@ -0,0 +1,31 @@ +=begin +XSD4R - XML Schema Datatype 1999 support +Copyright (C) 2001, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/datatypes' + + +module XSD + Namespace.replace('http://www.w3.org/1999/XMLSchema') + InstanceNamespace.replace('http://www.w3.org/1999/XMLSchema-instance') + AnyTypeLiteral.replace('ur-type') + AnySimpleTypeLiteral.replace('ur-type') + NilLiteral.replace('null') + NilValue.replace('1') + DateTimeLiteral.replace('timeInstant') +end diff --git a/lib/xsd/iconvcharset.rb b/lib/xsd/iconvcharset.rb new file mode 100644 index 0000000000..f607b7db45 --- /dev/null +++ b/lib/xsd/iconvcharset.rb @@ -0,0 +1,44 @@ +=begin +XSD4R - Charset handling with iconv. +Copyright (C) 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'iconv' + + +module XSD + + +class IconvCharset + def self.safe_iconv(to, from, str) + iconv = Iconv.new(to, from) + out = "" + begin + out << iconv.iconv(str) + rescue Iconv::IllegalSequence => e + out << e.success + ch, str = e.failed.split(//, 2) + out << '?' + STDERR.puts("Failed to convert #{ch}") + retry + end + return out + end +end + + +end diff --git a/lib/xsd/namedelements.rb b/lib/xsd/namedelements.rb new file mode 100644 index 0000000000..76f958a9e5 --- /dev/null +++ b/lib/xsd/namedelements.rb @@ -0,0 +1,86 @@ +=begin +XSD4R - WSDL named element collection. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +module XSD + + +class NamedElements + include Enumerable + + def initialize + @elements = [] + @cache = {} + end + + def dup + o = NamedElements.new + o.elements = @elements.dup + o + end + + def size + @elements.size + end + + def [](idx) + if idx.is_a?(Numeric) + @elements[idx] + else + @cache[idx] ||= @elements.find { |item| item.name == idx } + end + end + + def find_name(name) + @elements.find { |item| item.name.name == name } + end + + def each + @elements.each do |element| + yield(element) + end + end + + def <<(rhs) + @elements << rhs + self + end + + def +(rhs) + o = NamedElements.new + o.elements = @elements + rhs.elements + o + end + + def concat(rhs) + @elements.concat(rhs.elements) + self + end + +protected + + def elements=(rhs) + @elements = rhs + end + + def elements + @elements + end +end + +end diff --git a/lib/xsd/ns.rb b/lib/xsd/ns.rb new file mode 100644 index 0000000000..0767b2c30d --- /dev/null +++ b/lib/xsd/ns.rb @@ -0,0 +1,141 @@ +=begin +XSD4R - XML Schema Namespace library +Copyright (C) 2000, 2001, 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/datatypes' + + +module XSD + + +class NS + class Assigner + def initialize + @count = 0 + end + + def assign(ns) + @count += 1 + "n#{ @count }" + end + end + + attr_reader :default_namespace + + class FormatError < Error; end + +public + + def initialize(tag2ns = {}) + @tag2ns = tag2ns + @assigner = nil + @ns2tag = {} + @tag2ns.each do |tag, ns| + @ns2tag[ns] = tag + end + @default_namespace = nil + end + + def assign(ns, tag = nil) + if (tag == '') + @default_namespace = ns + tag + else + @assigner ||= Assigner.new + tag ||= @assigner.assign(ns) + @ns2tag[ns] = tag + @tag2ns[tag] = ns + tag + end + end + + def assigned?(ns) + @ns2tag.key?(ns) + end + + def assigned_tag?(tag) + @tag2ns.key?(tag) + end + + def clone_ns + cloned = NS.new(@tag2ns.dup) + cloned.assigner = @assigner + cloned.assign(@default_namespace, '') if @default_namespace + cloned + end + + def name(name) + if (name.namespace == @default_namespace) + name.name + elsif @ns2tag.key?(name.namespace) + @ns2tag[name.namespace] + ':' << name.name + else + raise FormatError.new('Namespace: ' << name.namespace << ' not defined yet.') + end + end + + def compare(ns, name, rhs) + if (ns == @default_namespace) + return true if (name == rhs) + end + @tag2ns.each do |assigned_tag, assigned_ns| + if assigned_ns == ns && "#{ assigned_tag }:#{ name }" == rhs + return true + end + end + false + end + + # $1 and $2 are necessary. + ParseRegexp = Regexp.new('^([^:]+)(?::(.+))?$') + + def parse(elem) + ns = nil + name = nil + ParseRegexp =~ elem + if $2 + ns = @tag2ns[$1] + name = $2 + if !ns + raise FormatError.new('Unknown namespace qualifier: ' << $1) + end + elsif $1 + ns = @default_namespace + name = $1 + end + if !name + raise FormatError.new("Illegal element format: #{ elem }") + end + XSD::QName.new(ns, name) + end + + def each_ns + @ns2tag.each do |ns, tag| + yield(ns, tag) + end + end + +protected + + def assigner=(assigner) + @assigner = assigner + end +end + + +end diff --git a/lib/xsd/qname.rb b/lib/xsd/qname.rb new file mode 100644 index 0000000000..150a837c1d --- /dev/null +++ b/lib/xsd/qname.rb @@ -0,0 +1,77 @@ +=begin +XSD4R - XML QName definition. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +module XSD + + +class QName + attr_accessor :namespace + attr_accessor :name + + def initialize(namespace = nil, name = nil) + @namespace = namespace + @name = name + end + + def dup_name(name) + self.class.new(@namespace, name) + end + + def match(rhs) + unless self.class === rhs + return false + end + if rhs.namespace and (rhs.namespace != @namespace) + return false + end + if rhs.name and (rhs.name != @name) + return false + end + true + end + + def ==(rhs) + (self.class === rhs && @namespace == rhs.namespace && @name == rhs.name) + end + + def ===(rhs) + (self == rhs) + end + + def eql?(rhs) + (self == rhs) + end + + def hash + @namespace.hash ^ @name.hash + end + + def to_s + "{#{ namespace }}#{ name }" + end + + NormalizedNameRegexp = /^\{([^}]*)\}(.*)$/ + def parse(str) + NormalizedNameRegexp =~ str + self.new($1, $2) + end +end + + +end diff --git a/lib/xsd/xmlparser.rb b/lib/xsd/xmlparser.rb new file mode 100644 index 0000000000..7d6d389261 --- /dev/null +++ b/lib/xsd/xmlparser.rb @@ -0,0 +1,72 @@ +=begin +XSD4R - XML Instance parser library. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/xmlparser/parser' + + +module XSD + + +module XMLParser + def create_parser(host, opt) + XSD::XMLParser::Parser.create_parser(host, opt) + end + module_function :create_parser + + # $1 is necessary. + NSParseRegexp = Regexp.new('^xmlns:?(.*)$') + + def filter_ns(ns, attrs) + return attrs if attrs.nil? or attrs.empty? + newattrs = {} + attrs.each do |key, value| + if (NSParseRegexp =~ key) + # '' means 'default namespace'. + tag = $1 || '' + ns.assign(value, tag) + else + newattrs[key] = value + end + end + newattrs + end + module_function :filter_ns +end + + +end + + +# Try to load XML processor. +loaded = false +[ + 'xsd/xmlparser/xmlscanner', + 'xsd/xmlparser/xmlparser', + 'xsd/xmlparser/rexmlparser', +].each do |lib| + begin + require lib + loaded = true + break + rescue LoadError + end +end +unless loaded + raise RuntimeError.new("XML processor module not found.") +end diff --git a/lib/xsd/xmlparser/parser.rb b/lib/xsd/xmlparser/parser.rb new file mode 100644 index 0000000000..0c7fd48084 --- /dev/null +++ b/lib/xsd/xmlparser/parser.rb @@ -0,0 +1,107 @@ +=begin +XSD4R - XML Instance parser library. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/qname' +require 'xsd/ns' +require 'xsd/charset' + + +module XSD +module XMLParser + + +class Parser + class ParseError < Error; end + class FormatDecodeError < ParseError; end + class UnknownElementError < FormatDecodeError; end + class UnknownAttributeError < FormatDecodeError; end + class UnexpectedElementError < FormatDecodeError; end + class ElementConstraintError < FormatDecodeError; end + + @@parser_factory = nil + + def self.factory + @@parser_factory + end + + def self.create_parser(host, opt = {}) + @@parser_factory.new(host, opt) + end + + def self.add_factory(factory) + if $DEBUG + puts "Set #{ factory } as XML processor." + end + @@parser_factory = factory + end + +public + + attr_accessor :charset + + def initialize(host, opt = {}) + @host = host + @charset = opt[:charset] || 'us-ascii' + end + + def parse(string_or_readable) + @textbuf = '' + prologue + do_parse(string_or_readable) + epilogue + end + +private + + def do_parse(string_or_readable) + raise NotImplementError.new( + 'Method do_parse must be defined in derived class.') + end + + def start_element(name, attrs) + @host.start_element(name, attrs) + end + + def characters(text) + @host.characters(text) + end + + def end_element(name) + @host.end_element(name) + end + + def prologue + end + + def epilogue + end + + def xmldecl_encoding=(charset) + if @charset.nil? + @charset = charset + else + # Definition in a stream (like HTTP) has a priority. + p "encoding definition: #{ charset } is ignored." if $DEBUG + end + end +end + + +end +end diff --git a/lib/xsd/xmlparser/rexmlparser.rb b/lib/xsd/xmlparser/rexmlparser.rb new file mode 100644 index 0000000000..2500d432d8 --- /dev/null +++ b/lib/xsd/xmlparser/rexmlparser.rb @@ -0,0 +1,65 @@ +=begin +XSD4R - REXMLParser XML parser library. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/xmlparser' +require 'rexml/streamlistener' +require 'rexml/document' + + +module XSD +module XMLParser + + +class REXMLParser < XSD::XMLParser::Parser + include REXML::StreamListener + + def do_parse(string_or_readable) + source = nil + source = REXML::SourceFactory.create_from(string_or_readable) + source.encoding = charset if charset + # Listener passes a String in utf-8. + @charset = 'utf-8' + REXML::Document.parse_stream(source, self) + end + + def epilogue + end + + def tag_start(name, attrs) + start_element(name, attrs) + end + + def tag_end(name) + end_element(name) + end + + def text(text) + characters(text) + end + + def xmldecl(version, encoding, standalone) + # Version should be checked. + end + + add_factory(self) +end + + +end +end diff --git a/lib/xsd/xmlparser/xmlparser.rb b/lib/xsd/xmlparser/xmlparser.rb new file mode 100644 index 0000000000..f555b99b26 --- /dev/null +++ b/lib/xsd/xmlparser/xmlparser.rb @@ -0,0 +1,61 @@ +=begin +XSD4R - XMLParser XML parser library. +Copyright (C) 2001, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/xmlparser' +require 'xml/parser' + + +module XSD +module XMLParser + + +class XMLParser < XSD::XMLParser::Parser + class Listener < XML::Parser + begin + require 'xml/encoding-ja' + include XML::Encoding_ja + rescue LoadError + # uconv may not be installed. + end + end + + def do_parse(string_or_readable) + # XMLParser passes a String in utf-8. + @charset = 'utf-8' + @parser = Listener.new + @parser.parse(string_or_readable) do |type, name, data| + case type + when XML::Parser::START_ELEM + start_element(name, data) + when XML::Parser::END_ELEM + end_element(name) + when XML::Parser::CDATA + characters(data) + else + raise FormatDecodeError.new("Unexpected XML: #{ type }/#{ name }/#{ data }.") + end + end + end + + add_factory(self) +end + + +end +end diff --git a/lib/xsd/xmlparser/xmlscanner.rb b/lib/xsd/xmlparser/xmlscanner.rb new file mode 100644 index 0000000000..c10e275b9e --- /dev/null +++ b/lib/xsd/xmlparser/xmlscanner.rb @@ -0,0 +1,158 @@ +=begin +XSD4R - XMLScan XML parser library. +Copyright (C) 2002, 2003 NAKAMURA, Hiroshi. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 2 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PRATICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 675 Mass +Ave, Cambridge, MA 02139, USA. +=end + + +require 'xsd/xmlparser' +require 'xmlscan/scanner' + + +module XSD +module XMLParser + + +class XMLScanner < XSD::XMLParser::Parser + include XMLScan::Visitor + + def do_parse(string_or_readable) + @attrs = {} + @curattr = nil + @scanner = XMLScan::XMLScanner.new(self) + @scanner.kcode = ::XSD::Charset.charset_str(charset) if charset + @scanner.parse(string_or_readable) + end + + def scanner_kcode=(charset) + @scanner.kcode = ::XSD::Charset.charset_str(charset) if charset + self.xmldecl_encoding = charset + end + + ENTITY_REF_MAP = { + 'lt' => '<', + 'gt' => '>', + 'amp' => '&', + 'quot' => '"', + 'apos' => '\'' + } + + def parse_error(msg) + raise ParseError.new(msg) + end + + def wellformed_error(msg) + raise NotWellFormedError.new(msg) + end + + def valid_error(msg) + raise NotValidError.new(msg) + end + + def warning(msg) + p msg if $DEBUG + end + + # def on_xmldecl; end + + def on_xmldecl_version(str) + # 1.0 expected. + end + + def on_xmldecl_encoding(str) + self.scanner_kcode = str + end + + # def on_xmldecl_standalone(str); end + + # def on_xmldecl_other(name, value); end + + # def on_xmldecl_end; end + + # def on_doctype(root, pubid, sysid); end + + # def on_prolog_space(str); end + + # def on_comment(str); end + + # def on_pi(target, pi); end + + def on_chardata(str) + characters(str) + end + + # def on_cdata(str); end + + def on_etag(name) + end_element(name) + end + + def on_entityref(ref) + characters(ENTITY_REF_MAP[ref]) + end + + def on_charref(code) + characters([code].pack('U')) + end + + def on_charref_hex(code) + on_charref(code) + end + + # def on_start_document; end + + # def on_end_document; end + + def on_stag(name) + @attrs = {} + end + + def on_attribute(name) + @attrs[name] = @curattr = '' + end + + def on_attr_value(str) + @curattr << str + end + + def on_attr_entityref(ref) + @curattr << ENTITY_REF_MAP[ref] + end + + def on_attr_charref(code) + @curattr << [code].pack('U') + end + + def on_attr_charref_hex(code) + on_attr_charref(code) + end + + # def on_attribute_end(name); end + + def on_stag_end_empty(name) + on_stag_end(name) + on_etag(name) + end + + def on_stag_end(name) + start_element(name, @attrs) + end + + add_factory(self) +end + + +end +end |