diff options
Diffstat (limited to 'io_buffer.c')
| -rw-r--r-- | io_buffer.c | 240 |
1 files changed, 191 insertions, 49 deletions
diff --git a/io_buffer.c b/io_buffer.c index 0534999319..f1afc3d3ba 100644 --- a/io_buffer.c +++ b/io_buffer.c @@ -273,6 +273,35 @@ io_buffer_free(struct rb_io_buffer *buffer) } static void +rb_io_buffer_type_mark(void *_buffer) +{ + struct rb_io_buffer *buffer = _buffer; + if (buffer->source != Qnil) { + if (RB_TYPE_P(buffer->source, T_STRING)) { + // The `source` String has to be pinned, because the `base` may point to the embedded String content, + // which can be otherwise moved by GC compaction. + rb_gc_mark(buffer->source); + } else { + rb_gc_mark_movable(buffer->source); + } + } +} + +static void +rb_io_buffer_type_compact(void *_buffer) +{ + struct rb_io_buffer *buffer = _buffer; + if (buffer->source != Qnil) { + if (RB_TYPE_P(buffer->source, T_STRING)) { + // The `source` String has to be pinned, because the `base` may point to the embedded String content, + // which can be otherwise moved by GC compaction. + } else { + buffer->source = rb_gc_location(buffer->source); + } + } +} + +static void rb_io_buffer_type_free(void *_buffer) { struct rb_io_buffer *buffer = _buffer; @@ -293,20 +322,16 @@ rb_io_buffer_type_size(const void *_buffer) return total; } -RUBY_REFERENCES(io_buffer_refs) = { - RUBY_REF_EDGE(struct rb_io_buffer, source), - RUBY_REF_END -}; - static const rb_data_type_t rb_io_buffer_type = { .wrap_struct_name = "IO::Buffer", .function = { - .dmark = RUBY_REFS_LIST_PTR(io_buffer_refs), + .dmark = rb_io_buffer_type_mark, .dfree = rb_io_buffer_type_free, .dsize = rb_io_buffer_type_size, + .dcompact = rb_io_buffer_type_compact, }, .data = NULL, - .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE | RUBY_TYPED_DECL_MARKING, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE, }; static inline enum rb_io_buffer_flags @@ -454,6 +479,8 @@ io_buffer_extract_offset_length(VALUE self, int argc, VALUE argv[], size_t *offs VALUE rb_io_buffer_type_allocate(VALUE self) { + io_buffer_experimental(); + struct rb_io_buffer *buffer = NULL; VALUE instance = TypedData_Make_Struct(self, struct rb_io_buffer, &rb_io_buffer_type, buffer); @@ -496,7 +523,9 @@ io_buffer_for_yield_instance(VALUE _arguments) arguments->instance = io_buffer_for_make_instance(arguments->klass, arguments->string, arguments->flags); - rb_str_locktmp(arguments->string); + if (!RB_OBJ_FROZEN(arguments->string)) { + rb_str_locktmp(arguments->string); + } return rb_yield(arguments->instance); } @@ -510,7 +539,9 @@ io_buffer_for_yield_instance_ensure(VALUE _arguments) rb_io_buffer_free(arguments->instance); } - rb_str_unlocktmp(arguments->string); + if (!RB_OBJ_FROZEN(arguments->string)) { + rb_str_unlocktmp(arguments->string); + } return Qnil; } @@ -540,7 +571,7 @@ io_buffer_for_yield_instance_ensure(VALUE _arguments) * buffer.get_string(0, 1) * # => "t" * string - * # => "best" + * # => "test" * * buffer.resize(100) * # in `resize': Cannot resize external buffer! (IO::Buffer::AccessError) @@ -620,8 +651,6 @@ rb_io_buffer_new(void *base, size_t size, enum rb_io_buffer_flags flags) VALUE rb_io_buffer_map(VALUE io, size_t size, rb_off_t offset, enum rb_io_buffer_flags flags) { - io_buffer_experimental(); - VALUE instance = rb_io_buffer_type_allocate(rb_cIOBuffer); struct rb_io_buffer *buffer = NULL; @@ -638,18 +667,25 @@ rb_io_buffer_map(VALUE io, size_t size, rb_off_t offset, enum rb_io_buffer_flags * call-seq: IO::Buffer.map(file, [size, [offset, [flags]]]) -> io_buffer * * Create an IO::Buffer for reading from +file+ by memory-mapping the file. - * +file_io+ should be a +File+ instance, opened for reading. + * +file+ should be a +File+ instance, opened for reading or reading and writing. * * Optional +size+ and +offset+ of mapping can be specified. + * Trying to map an empty file or specify +size+ of 0 will raise an error. + * Valid values for +offset+ are system-dependent. + * + * By default, the buffer is writable and expects the file to be writable. + * It is also shared, so several processes can use the same mapping. * - * By default, the buffer would be immutable (read only); to create a writable - * mapping, you need to open a file in read-write mode, and explicitly pass - * +flags+ argument without IO::Buffer::IMMUTABLE. + * You can pass IO::Buffer::READONLY in +flags+ argument to make a read-only buffer; + * this allows to work with files opened only for reading. + * Specifying IO::Buffer::PRIVATE in +flags+ creates a private mapping, + * which will not impact other processes or the underlying file. + * It also allows updating a buffer created from a read-only file. * * File.write('test.txt', 'test') * * buffer = IO::Buffer.map(File.open('test.txt'), nil, 0, IO::Buffer::READONLY) - * # => #<IO::Buffer 0x00000001014a0000+4 MAPPED READONLY> + * # => #<IO::Buffer 0x00000001014a0000+4 EXTERNAL MAPPED FILE SHARED READONLY> * * buffer.readonly? # => true * @@ -657,7 +693,7 @@ rb_io_buffer_map(VALUE io, size_t size, rb_off_t offset, enum rb_io_buffer_flags * # => "test" * * buffer.set_string('b', 0) - * # `set_string': Buffer is not writable! (IO::Buffer::AccessError) + * # 'IO::Buffer#set_string': Buffer is not writable! (IO::Buffer::AccessError) * * # create read/write mapping: length 4 bytes, offset 0, flags 0 * buffer = IO::Buffer.map(File.open('test.txt', 'r+'), 4, 0) @@ -679,31 +715,48 @@ io_buffer_map(int argc, VALUE *argv, VALUE klass) // We might like to handle a string path? VALUE io = argv[0]; + rb_off_t file_size = rb_file_size(io); + // Compiler can confirm that we handled file_size <= 0 case: + if (UNLIKELY(file_size <= 0)) { + rb_raise(rb_eArgError, "Invalid negative or zero file size!"); + } + // Here, we assume that file_size is positive: + else if (UNLIKELY((uintmax_t)file_size > SIZE_MAX)) { + rb_raise(rb_eArgError, "File larger than address space!"); + } + size_t size; if (argc >= 2 && !RB_NIL_P(argv[1])) { size = io_buffer_extract_size(argv[1]); - } - else { - rb_off_t file_size = rb_file_size(io); - - // Compiler can confirm that we handled file_size < 0 case: - if (file_size < 0) { - rb_raise(rb_eArgError, "Invalid negative file size!"); + if (UNLIKELY(size == 0)) { + rb_raise(rb_eArgError, "Size can't be zero!"); } - // Here, we assume that file_size is positive: - else if ((uintmax_t)file_size > SIZE_MAX) { - rb_raise(rb_eArgError, "File larger than address space!"); - } - else { - // This conversion should be safe: - size = (size_t)file_size; + if (UNLIKELY(size > (size_t)file_size)) { + rb_raise(rb_eArgError, "Size can't be larger than file size!"); } } + else { + // This conversion should be safe: + size = (size_t)file_size; + } // This is the file offset, not the buffer offset: rb_off_t offset = 0; if (argc >= 3) { offset = NUM2OFFT(argv[2]); + if (UNLIKELY(offset < 0)) { + rb_raise(rb_eArgError, "Offset can't be negative!"); + } + if (UNLIKELY(offset >= file_size)) { + rb_raise(rb_eArgError, "Offset too large!"); + } + if (RB_NIL_P(argv[1])) { + // Decrease size if it's set from the actual file size: + size = (size_t)(file_size - offset); + } + else if (UNLIKELY((size_t)(file_size - offset) < size)) { + rb_raise(rb_eArgError, "Offset too large!"); + } } enum rb_io_buffer_flags flags = 0; @@ -752,8 +805,6 @@ io_flags_for_size(size_t size) VALUE rb_io_buffer_initialize(int argc, VALUE *argv, VALUE self) { - io_buffer_experimental(); - rb_check_arity(argc, 0, 2); struct rb_io_buffer *buffer = NULL; @@ -1473,13 +1524,19 @@ VALUE rb_io_buffer_free_locked(VALUE self) return self; } +static bool +size_sum_is_bigger_than(size_t a, size_t b, size_t x) +{ + struct rbimpl_size_overflow_tag size = rbimpl_size_add_overflow(a, b); + return size.overflowed || size.result > x; +} + // Validate that access to the buffer is within bounds, assuming you want to // access length bytes from the specified offset. static inline void io_buffer_validate_range(struct rb_io_buffer *buffer, size_t offset, size_t length) { - // We assume here that offset + length won't overflow: - if (offset + length > buffer->size) { + if (size_sum_is_bigger_than(offset, length, buffer->size)) { rb_raise(rb_eArgError, "Specified offset+length is bigger than the buffer size!"); } } @@ -1789,9 +1846,9 @@ rb_io_buffer_compare(VALUE self, VALUE other) } static void -io_buffer_validate_type(size_t size, size_t offset) +io_buffer_validate_type(size_t size, size_t offset, size_t extend) { - if (offset > size) { + if (size_sum_is_bigger_than(offset, extend, size)) { rb_raise(rb_eArgError, "Type extends beyond end of buffer! (offset=%"PRIdSIZE" > size=%"PRIdSIZE")", offset, size); } } @@ -1811,6 +1868,9 @@ io_buffer_validate_type(size_t size, size_t offset) // :u64, :U64 | unsigned 64-bit integer. // :s64, :S64 | signed 64-bit integer. // +// :u128, :U128 | unsigned 128-bit integer. +// :s128, :S128 | signed 128-bit integer. +// // :f32, :F32 | 32-bit floating point number. // :f64, :F64 | 64-bit floating point number. @@ -1842,13 +1902,53 @@ ruby_swapf64(double value) return swap.value; } +// Structures and conversion functions are now in numeric.h/numeric.c +// Unified swap function for 128-bit integers (works with both signed and unsigned) +// Since both rb_uint128_t and rb_int128_t have the same memory layout, +// we can use a union to make the swap function work with both types +static inline rb_uint128_t +ruby_swap128_uint(rb_uint128_t x) +{ + rb_uint128_t result; +#ifdef HAVE_UINT128_T +#if __has_builtin(__builtin_bswap128) + result.value = __builtin_bswap128(x.value); +#else + // Manual byte swap for 128-bit integers + uint64_t low = (uint64_t)x.value; + uint64_t high = (uint64_t)(x.value >> 64); + low = ruby_swap64(low); + high = ruby_swap64(high); + result.value = ((uint128_t)low << 64) | high; +#endif +#else + // Fallback swap function using two 64-bit integers + // For big-endian data on little-endian host (or vice versa): + // 1. Swap bytes within each 64-bit part + // 2. Swap the order of the parts (since big-endian stores high first, little-endian stores low first) + result.parts.low = ruby_swap64(x.parts.high); + result.parts.high = ruby_swap64(x.parts.low); +#endif + return result; +} + +static inline rb_int128_t +ruby_swap128_int(rb_int128_t x) +{ + union uint128_int128_conversion conversion = { + .int128 = x + }; + conversion.uint128 = ruby_swap128_uint(conversion.uint128); + return conversion.int128; +} + #define IO_BUFFER_DECLARE_TYPE(name, type, endian, wrap, unwrap, swap) \ static ID RB_IO_BUFFER_DATA_TYPE_##name; \ \ static VALUE \ io_buffer_read_##name(const void* base, size_t size, size_t *offset) \ { \ - io_buffer_validate_type(size, *offset + sizeof(type)); \ + io_buffer_validate_type(size, *offset, sizeof(type)); \ type value; \ memcpy(&value, (char*)base + *offset, sizeof(type)); \ if (endian != RB_IO_BUFFER_HOST_ENDIAN) value = swap(value); \ @@ -1859,7 +1959,7 @@ io_buffer_read_##name(const void* base, size_t size, size_t *offset) \ static void \ io_buffer_write_##name(const void* base, size_t size, size_t *offset, VALUE _value) \ { \ - io_buffer_validate_type(size, *offset + sizeof(type)); \ + io_buffer_validate_type(size, *offset, sizeof(type)); \ type value = unwrap(_value); \ if (endian != RB_IO_BUFFER_HOST_ENDIAN) value = swap(value); \ memcpy((char*)base + *offset, &value, sizeof(type)); \ @@ -1888,6 +1988,11 @@ IO_BUFFER_DECLARE_TYPE(U64, uint64_t, RB_IO_BUFFER_BIG_ENDIAN, RB_ULL2NUM, RB_NU IO_BUFFER_DECLARE_TYPE(s64, int64_t, RB_IO_BUFFER_LITTLE_ENDIAN, RB_LL2NUM, RB_NUM2LL, ruby_swap64) IO_BUFFER_DECLARE_TYPE(S64, int64_t, RB_IO_BUFFER_BIG_ENDIAN, RB_LL2NUM, RB_NUM2LL, ruby_swap64) +IO_BUFFER_DECLARE_TYPE(u128, rb_uint128_t, RB_IO_BUFFER_LITTLE_ENDIAN, rb_uint128_to_numeric, rb_numeric_to_uint128, ruby_swap128_uint) +IO_BUFFER_DECLARE_TYPE(U128, rb_uint128_t, RB_IO_BUFFER_BIG_ENDIAN, rb_uint128_to_numeric, rb_numeric_to_uint128, ruby_swap128_uint) +IO_BUFFER_DECLARE_TYPE(s128, rb_int128_t, RB_IO_BUFFER_LITTLE_ENDIAN, rb_int128_to_numeric, rb_numeric_to_int128, ruby_swap128_int) +IO_BUFFER_DECLARE_TYPE(S128, rb_int128_t, RB_IO_BUFFER_BIG_ENDIAN, rb_int128_to_numeric, rb_numeric_to_int128, ruby_swap128_int) + IO_BUFFER_DECLARE_TYPE(f32, float, RB_IO_BUFFER_LITTLE_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf32) IO_BUFFER_DECLARE_TYPE(F32, float, RB_IO_BUFFER_BIG_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf32) IO_BUFFER_DECLARE_TYPE(f64, double, RB_IO_BUFFER_LITTLE_ENDIAN, DBL2NUM, NUM2DBL, ruby_swapf64) @@ -1912,6 +2017,10 @@ io_buffer_buffer_type_size(ID buffer_type) IO_BUFFER_DATA_TYPE_SIZE(U64) IO_BUFFER_DATA_TYPE_SIZE(s64) IO_BUFFER_DATA_TYPE_SIZE(S64) + IO_BUFFER_DATA_TYPE_SIZE(u128) + IO_BUFFER_DATA_TYPE_SIZE(U128) + IO_BUFFER_DATA_TYPE_SIZE(s128) + IO_BUFFER_DATA_TYPE_SIZE(S128) IO_BUFFER_DATA_TYPE_SIZE(f32) IO_BUFFER_DATA_TYPE_SIZE(F32) IO_BUFFER_DATA_TYPE_SIZE(f64) @@ -1968,6 +2077,11 @@ rb_io_buffer_get_value(const void* base, size_t size, ID buffer_type, size_t *of IO_BUFFER_GET_VALUE(s64) IO_BUFFER_GET_VALUE(S64) + IO_BUFFER_GET_VALUE(u128) + IO_BUFFER_GET_VALUE(U128) + IO_BUFFER_GET_VALUE(s128) + IO_BUFFER_GET_VALUE(S128) + IO_BUFFER_GET_VALUE(f32) IO_BUFFER_GET_VALUE(F32) IO_BUFFER_GET_VALUE(f64) @@ -1997,6 +2111,10 @@ rb_io_buffer_get_value(const void* base, size_t size, ID buffer_type, size_t *of * * +:U64+: unsigned integer, 8 bytes, big-endian * * +:s64+: signed integer, 8 bytes, little-endian * * +:S64+: signed integer, 8 bytes, big-endian + * * +:u128+: unsigned integer, 16 bytes, little-endian + * * +:U128+: unsigned integer, 16 bytes, big-endian + * * +:s128+: signed integer, 16 bytes, little-endian + * * +:S128+: signed integer, 16 bytes, big-endian * * +:f32+: float, 4 bytes, little-endian * * +:F32+: float, 4 bytes, big-endian * * +:f64+: double, 8 bytes, little-endian @@ -2178,7 +2296,7 @@ io_buffer_values(int argc, VALUE *argv, VALUE self) /* * call-seq: - * each_byte([offset, [count]]) {|offset, byte| ...} -> self + * each_byte([offset, [count]]) {|byte| ...} -> self * each_byte([offset, [count]]) -> enumerator * * Iterates over the buffer, yielding each byte starting from +offset+. @@ -2202,7 +2320,7 @@ io_buffer_each_byte(int argc, VALUE *argv, VALUE self) rb_io_buffer_get_bytes_for_reading(self, &base, &size); size_t offset, count; - io_buffer_extract_offset_count(RB_IO_BUFFER_DATA_TYPE_U8, size, argc-1, argv+1, &offset, &count); + io_buffer_extract_offset_count(RB_IO_BUFFER_DATA_TYPE_U8, size, argc, argv, &offset, &count); for (size_t i = 0; i < count; i++) { unsigned char *value = (unsigned char *)base + i + offset; @@ -2234,6 +2352,11 @@ rb_io_buffer_set_value(const void* base, size_t size, ID buffer_type, size_t *of IO_BUFFER_SET_VALUE(s64); IO_BUFFER_SET_VALUE(S64); + IO_BUFFER_SET_VALUE(u128); + IO_BUFFER_SET_VALUE(U128); + IO_BUFFER_SET_VALUE(s128); + IO_BUFFER_SET_VALUE(S128); + IO_BUFFER_SET_VALUE(f32); IO_BUFFER_SET_VALUE(F32); IO_BUFFER_SET_VALUE(f64); @@ -2364,7 +2487,7 @@ io_buffer_memmove(struct rb_io_buffer *buffer, size_t offset, const void *source io_buffer_validate_range(buffer, offset, length); - if (source_offset + length > source_size) { + if (size_sum_is_bigger_than(source_offset, length, source_size)) { rb_raise(rb_eArgError, "The computed source range exceeds the size of the source buffer!"); } @@ -2450,7 +2573,9 @@ rb_io_buffer_initialize_copy(VALUE self, VALUE source) io_buffer_initialize(self, buffer, NULL, source_size, io_flags_for_size(source_size), Qnil); - return io_buffer_copy_from(buffer, source_base, source_size, 0, NULL); + VALUE result = io_buffer_copy_from(buffer, source_base, source_size, 0, NULL); + RB_GC_GUARD(source); + return result; } /* @@ -2535,7 +2660,9 @@ io_buffer_copy(int argc, VALUE *argv, VALUE self) rb_io_buffer_get_bytes_for_reading(source, &source_base, &source_size); - return io_buffer_copy_from(buffer, source_base, source_size, argc-1, argv+1); + VALUE result = io_buffer_copy_from(buffer, source_base, source_size, argc-1, argv+1); + RB_GC_GUARD(source); + return result; } /* @@ -2613,7 +2740,9 @@ io_buffer_set_string(int argc, VALUE *argv, VALUE self) const void *source_base = RSTRING_PTR(string); size_t source_size = RSTRING_LEN(string); - return io_buffer_copy_from(buffer, source_base, source_size, argc-1, argv+1); + VALUE result = io_buffer_copy_from(buffer, source_base, source_size, argc-1, argv+1); + RB_GC_GUARD(string); + return result; } void @@ -2733,7 +2862,6 @@ io_buffer_blocking_region_ensure(VALUE _argument) static VALUE io_buffer_blocking_region(VALUE io, struct rb_io_buffer *buffer, rb_blocking_function_t *function, void *data) { - io = rb_io_get_io(io); struct rb_io *ioptr; RB_IO_POINTER(io, ioptr); @@ -2798,6 +2926,8 @@ io_buffer_read_internal(void *_argument) VALUE rb_io_buffer_read(VALUE self, VALUE io, size_t length, size_t offset) { + io = rb_io_get_io(io); + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_read(scheduler, io, self, length, offset); @@ -2915,6 +3045,8 @@ io_buffer_pread_internal(void *_argument) VALUE rb_io_buffer_pread(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset) { + io = rb_io_get_io(io); + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_pread(scheduler, io, from, self, length, offset); @@ -3035,6 +3167,8 @@ io_buffer_write_internal(void *_argument) VALUE rb_io_buffer_write(VALUE self, VALUE io, size_t length, size_t offset) { + io = rb_io_get_write_io(rb_io_get_io(io)); + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_write(scheduler, io, self, length, offset); @@ -3099,6 +3233,7 @@ io_buffer_write(int argc, VALUE *argv, VALUE self) return rb_io_buffer_write(self, io, length, offset); } + struct io_buffer_pwrite_internal_argument { // The file descriptor to write to: int descriptor; @@ -3144,6 +3279,8 @@ io_buffer_pwrite_internal(void *_argument) VALUE rb_io_buffer_pwrite(VALUE self, VALUE io, rb_off_t from, size_t length, size_t offset) { + io = rb_io_get_write_io(rb_io_get_io(io)); + VALUE scheduler = rb_fiber_scheduler_current(); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_pwrite(scheduler, io, from, self, length, offset); @@ -3647,9 +3784,9 @@ io_buffer_not_inplace(VALUE self) * * File.write('test.txt', 'test data') * # => 9 - * buffer = IO::Buffer.map(File.open('test.txt')) + * buffer = IO::Buffer.map(File.open('test.txt'), nil, 0, IO::Buffer::READONLY) * # => - * # #<IO::Buffer 0x00007f3f0768c000+9 MAPPED IMMUTABLE> + * # #<IO::Buffer 0x00007f3f0768c000+9 EXTERNAL MAPPED FILE SHARED READONLY> * # ... * buffer.get_string(5, 2) # read 2 bytes, starting from offset 5 * # => "da" @@ -3798,6 +3935,11 @@ Init_IO_Buffer(void) IO_BUFFER_DEFINE_DATA_TYPE(s64); IO_BUFFER_DEFINE_DATA_TYPE(S64); + IO_BUFFER_DEFINE_DATA_TYPE(u128); + IO_BUFFER_DEFINE_DATA_TYPE(U128); + IO_BUFFER_DEFINE_DATA_TYPE(s128); + IO_BUFFER_DEFINE_DATA_TYPE(S128); + IO_BUFFER_DEFINE_DATA_TYPE(f32); IO_BUFFER_DEFINE_DATA_TYPE(F32); IO_BUFFER_DEFINE_DATA_TYPE(f64); |
