* forgot to add this file in the previous commit.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4615 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-09-28 09:33:59 +0000
committer: nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-09-28 09:33:59 +0000
commit: eb8ee5e401cf49a31d4dc5ba2c8e74f379bb2408 (patch)
tree: 74edbf3a969aa75179115b876623f3cdaa24e803
parent: d57fc5a48922bbdcc6ab8dc0c2e02fe796c70afc (diff)
1 files changed, 11 insertions, 8 deletions
diff --git a/lib/xsd/charset.rb b/lib/xsd/charset.rb
index 6dda959155..88d4f99043 100644
--- a/lib/xsd/charset.rb
+++ b/lib/xsd/charset.rb
@@ -117,12 +117,13 @@ public
     CharsetMap.index(label.downcase)
   end
 
-  # Original regexps: http://www.din.or.jp/~ohzaki/perl.htm
-  # ascii_euc = '[\x00-\x7F]'
-  ascii_euc = '[\x9\xa\xd\x20-\x7F]'	# XML 1.0 restricted.
+  # us_ascii = '[\x00-\x7F]'
+  us_ascii = '[\x9\xa\xd\x20-\x7F]'	# XML 1.0 restricted.
+  USASCIIRegexp = Regexp.new("\\A#{ us_ascii }*\\z", nil, "NONE")
+
   twobytes_euc = '(?:[\x8E\xA1-\xFE][\xA1-\xFE])'
   threebytes_euc = '(?:\x8F[\xA1-\xFE][\xA1-\xFE])'
-  character_euc = "(?:#{ ascii_euc }|#{ twobytes_euc }|#{ threebytes_euc })"
+  character_euc = "(?:#{ us_ascii }|#{ twobytes_euc }|#{ threebytes_euc })"
   EUCRegexp = Regexp.new("\\A#{ character_euc }*\\z", nil, "NONE")
 
   # onebyte_sjis = '[\x00-\x7F\xA1-\xDF]'
@@ -132,17 +133,19 @@ public
   SJISRegexp = Regexp.new("\\A#{ character_sjis }*\\z", nil, "NONE")
 
   # 0xxxxxxx
-  #ascii_utf8 = '[\0-\x7F]'
-  ascii_utf8 = '[\x9\xA\xD\x20-\x7F]'	# XML 1.0 restricted.
   # 110yyyyy 10xxxxxx
   twobytes_utf8 = '(?:[\xC0-\xDF][\x80-\xBF])'
   # 1110zzzz 10yyyyyy 10xxxxxx
   threebytes_utf8 = '(?:[\xE0-\xEF][\x80-\xBF][\x80-\xBF])'
   # 11110uuu 10uuuzzz 10yyyyyy 10xxxxxx
   fourbytes_utf8 = '(?:[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF])'
-  character_utf8 = "(?:#{ ascii_utf8 }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })"
+  character_utf8 = "(?:#{ us_ascii }|#{ twobytes_utf8 }|#{ threebytes_utf8 }|#{ fourbytes_utf8 })"
   UTF8Regexp = Regexp.new("\\A#{ character_utf8 }*\\z", nil, "NONE")
 
+  def Charset.is_us_ascii(str)
+    USASCIIRegexp =~ str
+  end
+
   def Charset.is_utf8(str)
     UTF8Regexp =~ str
   end
@@ -158,7 +161,7 @@ public
   def Charset.is_ces(str, code = $KCODE)
     case code
     when 'NONE'
-      true
+      is_us_ascii(str)
     when 'UTF8'
       is_utf8(str)
     when 'EUC'
author	nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-09-28 09:33:59 +0000
committer	nahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-09-28 09:33:59 +0000
commit	eb8ee5e401cf49a31d4dc5ba2c8e74f379bb2408 (patch)
tree	74edbf3a969aa75179115b876623f3cdaa24e803
parent	d57fc5a48922bbdcc6ab8dc0c2e02fe796c70afc (diff)