From b4f8623441a8be53b643fed826ba44e933cafd7e Mon Sep 17 00:00:00 2001 From: nagachika Date: Thu, 18 Jan 2024 11:50:31 +0900 Subject: merge revision(s) b3d612804946e841e47d14e09b6839224a79c1a4: [Backport #20150] Fix memory leak in grapheme clusters [Bug #20150] String#grapheme_cluters and String#each_grapheme_cluster leaks memory because if the string is not UTF-8, then the created regex will not be freed. For example: str = "hello world".encode(Encoding::UTF_32LE) 10.times do 1_000.times do str.grapheme_clusters end puts `ps -o rss= -p #{$$}` end Before: 26000 42256 59008 75792 92528 109232 125936 142672 159392 176160 After: 9264 9504 9808 10000 10128 10224 10352 10544 10704 10896 --- string.c | 98 +++++++++++++++++++++++++++++++----------------- test/ruby/test_string.rb | 11 ++++++ 2 files changed, 75 insertions(+), 34 deletions(-) --- test/ruby/test_string.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'test/ruby') diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 7c685407d6..069813e681 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1074,6 +1074,17 @@ CODE assert_equal("C", res[2]) end + def test_grapheme_clusters_memory_leak + assert_no_memory_leak([], "", "#{<<~"begin;"}\n#{<<~'end;'}", "[Bug #todo]", rss: true) + begin; + str = "hello world".encode(Encoding::UTF_32LE) + + 10_000.times do + str.grapheme_clusters + end + end; + end + def test_each_line verbose, $VERBOSE = $VERBOSE, nil -- cgit v1.2.3