2 files changed, 13 insertions, 39 deletions
diff --git a/test/prism/encoding/encodings_test.rb b/test/prism/encoding/encodings_test.rb
index 4ad2b465cc..b008fc3fa1 100644
--- a/test/prism/encoding/encodings_test.rb
+++ b/test/prism/encoding/encodings_test.rb
@@ -56,21 +56,11 @@ module Prism
 
     # Check that we can properly parse every codepoint in the given encoding.
     def assert_encoding(encoding, name, range)
-      # I'm not entirely sure, but I believe these codepoints are incorrect in
-      # their parsing in CRuby. They all report as matching `[[:lower:]]` but
-      # then they are parsed as constants. This is because CRuby determines if
-      # an identifier is a constant or not by case folding it down to lowercase
-      # and checking if there is a difference. And even though they report
-      # themselves as lowercase, their case fold is different. I have reported
-      # this bug upstream.
+      unicode = false
+
       case encoding
       when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
-        range = range.to_a - [
-          0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
-          0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
-          0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
-          0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
-        ]
+        unicode = true
       when Encoding::Windows_1253
         range = range.to_a - [0xb5]
       end
@@ -79,7 +69,7 @@ module Prism
         character = codepoint.chr(encoding)
 
         if character.match?(/[[:alpha:]]/)
-          if character.match?(/[[:upper:]]/)
+          if character.match?(/[[:upper:]]/) || (unicode && character.match?(Regexp.new("\\p{Lt}".encode(encoding))))
             assert_encoding_constant(name, character)
           else
             assert_encoding_identifier(name, character)
diff --git a/test/prism/encoding/regular_expression_encoding_test.rb b/test/prism/encoding/regular_expression_encoding_test.rb
index e2daae1d7f..fdff1e3281 100644
--- a/test/prism/encoding/regular_expression_encoding_test.rb
+++ b/test/prism/encoding/regular_expression_encoding_test.rb
@@ -2,6 +2,7 @@
 
 return unless defined?(RubyVM::InstructionSequence)
 return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism
+return if RUBY_VERSION < "3.2"
 
 require_relative "../test_helper"
 
@@ -21,7 +22,7 @@ module Prism
 
       ["n", "u", "e", "s"].each do |modifier|
         define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do
-          regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ]
+          regexp_sources = ["abc", "garçon", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}", "\\p{L}" ]
 
           assert_regular_expression_encoding_flags(
             encoding,
@@ -35,17 +36,15 @@ module Prism
 
     def assert_regular_expression_encoding_flags(encoding, regexps)
       regexps.each do |regexp|
-        regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n")
         source = "# encoding: #{encoding.name}\n#{regexp}"
 
-        encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"]
-        skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"]
-
-        # TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104
-        unless regexp_modifier_used
-          skipped_errors += encoding_errors
-          encoding_errors.clear
-        end
+        encoding_errors = [
+          "invalid multibyte char", "escaped non ASCII character in UTF-8 regexp",
+          "differs from source encoding", "incompatible character encoding",
+          "invalid multibyte escape", "UTF-8 character in non UTF-8 regexp",
+          "invalid Unicode range", "non escaped non ASCII character",
+          "invalid character property name", "invalid Unicode list",
+        ]
 
         expected =
           begin
@@ -53,8 +52,6 @@ module Prism
           rescue SyntaxError => error
             if encoding_errors.find { |e| error.message.include?(e) }
               error.message.split("\n").map { |m| m[/: (.+?)$/, 1] }
-            elsif skipped_errors.find { |e| error.message.include?(e) }
-              next
             else
               raise
             end
@@ -111,19 +108,6 @@ module Prism
             end
           end
 
-        # TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages.
-        # This class of error message is tricky. The part not being compared is a representation of the regexp.
-        # Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented.
-        # Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses
-        # the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct.
-        if expected.is_a?(Array) && actual.is_a?(Array)
-          if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") &&
-              actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:")
-            expected.pop
-            actual.pop
-          end
-        end
-
         assert_equal expected, actual
       end
     end