diff options
Diffstat (limited to 'ext/strscan')
| -rw-r--r-- | ext/strscan/depend | 1 | ||||
| -rw-r--r-- | ext/strscan/extconf.rb | 8 | ||||
| -rw-r--r-- | ext/strscan/lib/strscan.rb | 20 | ||||
| -rw-r--r-- | ext/strscan/lib/strscan/strscan.rb | 44 | ||||
| -rw-r--r-- | ext/strscan/strscan.c | 422 | ||||
| -rw-r--r-- | ext/strscan/strscan.gemspec | 12 |
6 files changed, 326 insertions, 181 deletions
diff --git a/ext/strscan/depend b/ext/strscan/depend index 8dbae206d4..b40a025230 100644 --- a/ext/strscan/depend +++ b/ext/strscan/depend @@ -138,6 +138,7 @@ strscan.o: $(hdrdir)/ruby/internal/intern/re.h strscan.o: $(hdrdir)/ruby/internal/intern/ruby.h strscan.o: $(hdrdir)/ruby/internal/intern/select.h strscan.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +strscan.o: $(hdrdir)/ruby/internal/intern/set.h strscan.o: $(hdrdir)/ruby/internal/intern/signal.h strscan.o: $(hdrdir)/ruby/internal/intern/sprintf.h strscan.o: $(hdrdir)/ruby/internal/intern/string.h diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index bd65606a4e..4e8d851fdb 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -2,8 +2,12 @@ require 'mkmf' if RUBY_ENGINE == 'ruby' $INCFLAGS << " -I$(top_srcdir)" if $extmk - have_func("onig_region_memsize", "ruby.h") - have_func("rb_reg_onig_match", "ruby.h") + have_func("onig_region_memsize(NULL)") + have_func("rb_reg_onig_match", "ruby/re.h") + have_func("rb_deprecate_constant") + have_func("rb_int_parse_cstr", "ruby.h") # RUBY_VERSION >= 2.5 + have_func("rb_gc_location", "ruby.h") # RUBY_VERSION >= 2.7 + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 create_makefile 'strscan' else File.write('Makefile', dummy_makefile("").join) diff --git a/ext/strscan/lib/strscan.rb b/ext/strscan/lib/strscan.rb new file mode 100644 index 0000000000..4e8910d141 --- /dev/null +++ b/ext/strscan/lib/strscan.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +case RUBY_ENGINE +when 'ruby' + require 'strscan.so' + require_relative 'strscan/strscan' +when 'jruby' + require 'strscan.jar' + JRuby::Util.load_ext('org.jruby.ext.strscan.StringScannerLibrary') + require_relative 'strscan/strscan' +when 'truffleruby' + if RUBY_ENGINE_VERSION.to_i >= 34 + require 'strscan/truffleruby' + else + $LOAD_PATH.delete __dir__ + require 'strscan' + end +else + raise NotImplementedError, "Unknown Ruby: #{RUBY_ENGINE}" +end diff --git a/ext/strscan/lib/strscan/strscan.rb b/ext/strscan/lib/strscan/strscan.rb index 46acc7ea82..5e262f4007 100644 --- a/ext/strscan/lib/strscan/strscan.rb +++ b/ext/strscan/lib/strscan/strscan.rb @@ -1,17 +1,47 @@ # frozen_string_literal: true class StringScanner + unless method_defined?(:integer_at) # For JRuby + def integer_at(specifier, *to_i_args) + self[specifier]&.to_i(*to_i_args) + end + end + + # :markup: markdown + # # call-seq: - # scan_integer(base: 10) + # scan_integer(base: 10) -> integer or nil + # + # Returns an integer scanned from `self`, + # beginning at the current position; + # returns `nil` if no such integer was available. + # + # When `base` is `10` (the default), + # equivalent to calling #scan with argument +pattern+ + # as `'[+-]?\d+'`: + # + # ```ruby + # scanner = StringScanner.new('Form 27B/6') + # scanner.scan_integer # => nil # No integer at position 0. + # scanner.pos = 5 + # scanner.scan_integer # => 27 + # scanner.matched # => "27" + # scanner.pos # => 7 + # ``` # - # If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern, - # and returns an Integer or nil. + # When `base` is `16` (the only other value allowed), + # equivalent to calling #scan with argument `pattern` + # as `'[+-]?(0x)?[0-9a-fA-F]+'`: # - # If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern, - # and returns an Integer or nil. + # ```ruby + # scanner.pos = 5 + # scanner.scan_integer(base: 16) # => 635 + # scanner.matched # => "27B" + # scanner.pos # => 8 + # ``` # - # The scanned string must be encoded with an ASCII compatible encoding, otherwise - # Encoding::CompatibilityError will be raised. + # Raises Encoding::CompatibilityError if `self` does not have + # an ASCII compatible encoding. def scan_integer(base: 10) case base when 10 diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index ee5af9a139..dede57218b 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -22,7 +22,15 @@ extern size_t onig_region_memsize(const struct re_registers *regs); #include <stdbool.h> -#define STRSCAN_VERSION "3.1.2" +#define STRSCAN_VERSION "3.1.9.dev" + + +#ifdef HAVE_RB_DEPRECATE_CONSTANT +/* In ruby 3.0, defined but exposed in external headers */ +extern void rb_deprecate_constant(VALUE mod, const char *name); +#else +# define rb_deprecate_constant(mod, name) ((void)0) +#endif /* ======================================================================= Data Type Definitions @@ -30,7 +38,6 @@ extern size_t onig_region_memsize(const struct re_registers *regs); static VALUE StringScanner; static VALUE ScanError; -static ID id_byteslice; static int usascii_encindex, utf8_encindex, binary_encindex; @@ -58,8 +65,13 @@ struct strscanner }; #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) -#define MATCHED(s) (s)->flags |= FLAG_MATCHED -#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED +#define MATCHED(s) ((s)->flags |= FLAG_MATCHED) +#define CLEAR_MATCHED(s) ((s)->flags &= ~FLAG_MATCHED) +#define CLEAR_NAMED_CAPTURES(s) ((s)->regex = Qnil) +#define CLEAR_MATCH_STATUS(s) do {\ + CLEAR_MATCHED(s);\ + CLEAR_NAMED_CAPTURES(s);\ +} while (0) #define S_PBEG(s) (RSTRING_PTR((s)->str)) #define S_LEN(s) (RSTRING_LEN((s)->str)) @@ -92,7 +104,6 @@ static VALUE strscan_init_copy _((VALUE vself, VALUE vorig)); static VALUE strscan_s_mustc _((VALUE self)); static VALUE strscan_terminate _((VALUE self)); -static VALUE strscan_clear _((VALUE self)); static VALUE strscan_get_string _((VALUE self)); static VALUE strscan_set_string _((VALUE self, VALUE str)); static VALUE strscan_concat _((VALUE self, VALUE str)); @@ -114,14 +125,11 @@ static VALUE strscan_search_full _((VALUE self, VALUE re, static void adjust_registers_to_matched _((struct strscanner *p)); static VALUE strscan_getch _((VALUE self)); static VALUE strscan_get_byte _((VALUE self)); -static VALUE strscan_getbyte _((VALUE self)); static VALUE strscan_peek _((VALUE self, VALUE len)); -static VALUE strscan_peep _((VALUE self, VALUE len)); static VALUE strscan_scan_base10_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); -static VALUE strscan_empty_p _((VALUE self)); static VALUE strscan_rest_p _((VALUE self)); static VALUE strscan_matched_p _((VALUE self)); static VALUE strscan_matched _((VALUE self)); @@ -174,12 +182,35 @@ extract_beg_len(struct strscanner *p, long beg_i, long len) Constructor ======================================================================= */ +#ifdef RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +#else +# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE +# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +# else +# define RUBY_TYPED_EMBEDDABLE 0 +# endif +#endif + +#ifdef HAVE_RB_GC_LOCATION +static void +strscan_compact(void *ptr) +{ + struct strscanner *p = ptr; + p->str = rb_gc_location(p->str); + p->regex = rb_gc_location(p->regex); +} +#else +#define rb_gc_mark_movable rb_gc_mark +#endif + static void strscan_mark(void *ptr) { struct strscanner *p = ptr; - rb_gc_mark(p->str); - rb_gc_mark(p->regex); + rb_gc_mark_movable(p->str); + rb_gc_mark_movable(p->regex); } static void @@ -187,24 +218,37 @@ strscan_free(void *ptr) { struct strscanner *p = ptr; onig_region_free(&(p->regs), 0); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE ruby_xfree(p); +#endif } static size_t strscan_memsize(const void *ptr) { - const struct strscanner *p = ptr; - size_t size = sizeof(*p) - sizeof(p->regs); + size_t size = 0; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + size += sizeof(struct strscanner); +#endif + #ifdef HAVE_ONIG_REGION_MEMSIZE - size += onig_region_memsize(&p->regs); + const struct strscanner *p = ptr; + size += onig_region_memsize(&p->regs) - sizeof(p->regs); #endif return size; } static const rb_data_type_t strscanner_type = { - "StringScanner", - {strscan_mark, strscan_free, strscan_memsize}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + .wrap_struct_name = "StringScanner", + .function = { + .dmark = strscan_mark, + .dfree = strscan_free, + .dsize = strscan_memsize, +#ifdef HAVE_RB_GC_LOCATION + .dcompact = strscan_compact, +#endif + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE }; static VALUE @@ -216,7 +260,6 @@ strscan_s_allocate(VALUE klass) CLEAR_MATCH_STATUS(p); onig_region_init(&(p->regs)); p->str = Qnil; - p->regex = Qnil; return obj; } @@ -269,7 +312,7 @@ strscan_initialize(int argc, VALUE *argv, VALUE self) p->fixed_anchor_p = false; } StringValue(str); - p->str = str; + RB_OBJ_WRITE(self, &p->str, str); return self; } @@ -299,7 +342,7 @@ strscan_init_copy(VALUE vself, VALUE vorig) orig = check_strscan(vorig); if (self != orig) { self->flags = orig->flags; - self->str = orig->str; + RB_OBJ_WRITE(vself, &self->str, orig->str); self->prev = orig->prev; self->curr = orig->curr; if (rb_reg_region_copy(&self->regs, &orig->regs)) @@ -367,6 +410,9 @@ strscan_reset(VALUE self) /* * :markup: markdown + * :call-seq: + * terminate -> self + * * :include: strscan/link_refs.txt * :include: strscan/methods/terminate.md */ @@ -382,21 +428,6 @@ strscan_terminate(VALUE self) } /* - * call-seq: - * clear -> self - * - * This method is obsolete; use the equivalent method StringScanner#terminate. - */ - - /* :nodoc: */ -static VALUE -strscan_clear(VALUE self) -{ - rb_warning("StringScanner#clear is obsolete; use #terminate instead"); - return strscan_terminate(self); -} - -/* * :markup: markdown * :include: strscan/link_refs.txt * @@ -463,7 +494,7 @@ strscan_set_string(VALUE self, VALUE str) struct strscanner *p = check_strscan(self); StringValue(str); - p->str = str; + RB_OBJ_WRITE(self, &p->str, str); p->curr = 0; CLEAR_MATCH_STATUS(p); return str; @@ -511,6 +542,9 @@ strscan_concat(VALUE self, VALUE str) /* * :markup: markdown + * :call-seq: + * pos -> byte_position + * * :include: strscan/link_refs.txt * :include: strscan/methods/get_pos.md */ @@ -520,11 +554,14 @@ strscan_get_pos(VALUE self) struct strscanner *p; GET_SCANNER(self, p); - return INT2FIX(p->curr); + return LONG2NUM(p->curr); } /* * :markup: markdown + * :call-seq: + * charpos -> character_position + * * :include: strscan/link_refs.txt * :include: strscan/methods/get_charpos.md */ @@ -540,6 +577,10 @@ strscan_get_charpos(VALUE self) /* * :markup: markdown + * :call-seq: + * pos = n -> n + * pointer = n -> n + * * :include: strscan/link_refs.txt * :include: strscan/methods/set_pos.md */ @@ -550,7 +591,7 @@ strscan_set_pos(VALUE self, VALUE v) long i; GET_SCANNER(self, p); - i = NUM2INT(v); + i = NUM2LONG(v); if (i < 0) i += S_LEN(p); if (i < 0) rb_raise(rb_eRangeError, "index out of range"); if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); @@ -571,19 +612,20 @@ match_target(struct strscanner *p) } static inline void -set_registers(struct strscanner *p, size_t length) +set_registers(struct strscanner *p, size_t pos, size_t length) { const int at = 0; OnigRegion *regs = &(p->regs); onig_region_clear(regs); if (onig_region_set(regs, at, 0, 0)) return; if (p->fixed_anchor_p) { - regs->beg[at] = p->curr; - regs->end[at] = p->curr + length; + regs->beg[at] = pos + p->curr; + regs->end[at] = pos + p->curr + length; } else { - regs->end[at] = length; + regs->beg[at] = pos; + regs->end[at] = pos + length; } } @@ -707,7 +749,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly if (RB_TYPE_P(pattern, T_REGEXP)) { OnigPosition ret; - p->regex = pattern; + RB_OBJ_WRITE(self, &p->regex, pattern); ret = rb_reg_onig_match(p->regex, p->str, headonly ? strscan_match : strscan_search, @@ -731,7 +773,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { return Qnil; } - set_registers(p, RSTRING_LEN(pattern)); + set_registers(p, 0, RSTRING_LEN(pattern)); } else { rb_encoding *enc = rb_enc_check(p->str, pattern); @@ -740,7 +782,7 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly if (pos == -1) { return Qnil; } - set_registers(p, RSTRING_LEN(pattern) + pos); + set_registers(p, pos, RSTRING_LEN(pattern)); } } @@ -763,6 +805,9 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly /* * :markup: markdown + * :call-seq: + * scan(pattern) -> substring or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/scan.md */ @@ -777,7 +822,7 @@ strscan_scan(VALUE self, VALUE re) * :include: strscan/link_refs.txt * * call-seq: - * match?(pattern) -> updated_position or nil + * match?(pattern) -> match_size or nil * * Attempts to [match][17] the given `pattern` * at the beginning of the [target substring][3]; @@ -836,6 +881,9 @@ strscan_match_p(VALUE self, VALUE re) /* * :markup: markdown + * call-seq: + * skip(pattern) -> match_size or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/skip.md */ @@ -908,7 +956,7 @@ strscan_check(VALUE self, VALUE re) /* * call-seq: - * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or nil + * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or length or nil * * Equivalent to one of the following: * @@ -933,6 +981,9 @@ strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f) /* * :markup: markdown + * :call-seq: + * scan_until(pattern) -> substring or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/scan_until.md */ @@ -1007,6 +1058,9 @@ strscan_exist_p(VALUE self, VALUE re) /* * :markup: markdown + * :call-seq: + * skip_until(pattern) -> matched_substring_size or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/skip_until.md */ @@ -1118,6 +1172,9 @@ adjust_registers_to_matched(struct strscanner *p) /* * :markup: markdown + * :call-seq: + * getch -> character or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/getch.md */ @@ -1145,7 +1202,7 @@ strscan_getch(VALUE self) /* * call-seq: - * scan_byte -> integer_byte + * scan_byte -> integer_byte or nil * * Scans one byte and returns it as an integer. * This method is not multibyte character sensitive. @@ -1191,6 +1248,9 @@ strscan_peek_byte(VALUE self) /* * :markup: markdown + * :call-seq: + * get_byte -> byte_as_character or nil + * * :include: strscan/link_refs.txt * :include: strscan/methods/get_byte.md */ @@ -1214,22 +1274,6 @@ strscan_get_byte(VALUE self) } /* - * call-seq: - * getbyte - * - * Equivalent to #get_byte. - * This method is obsolete; use #get_byte instead. - */ - - /* :nodoc: */ -static VALUE -strscan_getbyte(VALUE self) -{ - rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead"); - return strscan_get_byte(self); -} - -/* * :markup: markdown * :include: strscan/link_refs.txt * @@ -1264,22 +1308,6 @@ strscan_peek(VALUE self, VALUE vlen) return extract_beg_len(p, p->curr, len); } -/* - * call-seq: - * peep - * - * Equivalent to #peek. - * This method is obsolete; use #peek instead. - */ - - /* :nodoc: */ -static VALUE -strscan_peep(VALUE self, VALUE vlen) -{ - rb_warning("StringScanner#peep is obsolete; use #peek instead"); - return strscan_peek(self, vlen); -} - static VALUE strscan_parse_integer(struct strscanner *p, int base, long len) { @@ -1292,20 +1320,25 @@ strscan_parse_integer(struct strscanner *p, int base, long len) integer = rb_cstr2inum(buffer, base); RB_ALLOCV_END(buffer_v); p->curr += len; + + MATCHED(p); + adjust_registers_to_matched(p); + return integer; } static inline bool -strscan_ascii_compat_fastpath(VALUE str) { +strscan_ascii_compat_fastpath(VALUE str) +{ int encindex = ENCODING_GET_INLINED(str); - // The overwhelming majority of strings are in one of these 3 encodings. + /* The overwhelming majority of strings are in one of these 3 encodings. */ return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex; } static inline void strscan_must_ascii_compat(VALUE str) { - // The overwhelming majority of strings are in one of these 3 encodings. + /* The overwhelming majority of strings are in one of these 3 encodings. */ if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) { return; } @@ -1313,11 +1346,12 @@ strscan_must_ascii_compat(VALUE str) rb_must_asciicompat(str); } +/* :nodoc: */ static VALUE strscan_scan_base10_integer(VALUE self) { char *ptr; - long len = 0; + long len = 0, remaining_len; struct strscanner *p; GET_SCANNER(self, p); @@ -1327,7 +1361,7 @@ strscan_scan_base10_integer(VALUE self) ptr = CURPTR(p); - long remaining_len = S_RESTLEN(p); + remaining_len = S_RESTLEN(p); if (remaining_len <= 0) { return Qnil; @@ -1341,7 +1375,6 @@ strscan_scan_base10_integer(VALUE self) return Qnil; } - MATCHED(p); p->prev = p->curr; while (len < remaining_len && rb_isdigit(ptr[len])) { @@ -1351,11 +1384,12 @@ strscan_scan_base10_integer(VALUE self) return strscan_parse_integer(p, 10, len); } +/* :nodoc: */ static VALUE strscan_scan_base16_integer(VALUE self) { char *ptr; - long len = 0; + long len = 0, remaining_len; struct strscanner *p; GET_SCANNER(self, p); @@ -1365,7 +1399,7 @@ strscan_scan_base16_integer(VALUE self) ptr = CURPTR(p); - long remaining_len = S_RESTLEN(p); + remaining_len = S_RESTLEN(p); if (remaining_len <= 0) { return Qnil; @@ -1375,7 +1409,7 @@ strscan_scan_base16_integer(VALUE self) len++; } - if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') { + if ((remaining_len >= (len + 3)) && ptr[len] == '0' && ptr[len + 1] == 'x' && rb_isxdigit(ptr[len + 2])) { len += 2; } @@ -1383,7 +1417,6 @@ strscan_scan_base16_integer(VALUE self) return Qnil; } - MATCHED(p); p->prev = p->curr; while (len < remaining_len && rb_isxdigit(ptr[len])) { @@ -1519,26 +1552,9 @@ strscan_eos_p(VALUE self) /* * call-seq: - * empty? - * - * Equivalent to #eos?. - * This method is obsolete, use #eos? instead. - */ - - /* :nodoc: */ -static VALUE -strscan_empty_p(VALUE self) -{ - rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); - return strscan_eos_p(self); -} - -/* - * call-seq: * rest? * * Returns true if and only if there is more data in the string. See #eos?. - * This method is obsolete; use #eos? instead. * * s = StringScanner.new('test string') * # These two are opposites @@ -1660,19 +1676,49 @@ strscan_matched_size(VALUE self) static int name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc) { - int num; - - num = onig_name_to_backref_number(RREGEXP_PTR(regexp), - (const unsigned char* )name, (const unsigned char* )name_end, regs); - if (num >= 1) { - return num; - } - else { - rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s", - rb_long2int(name_end - name), name); + if (RTEST(regexp)) { + int num = onig_name_to_backref_number(RREGEXP_PTR(regexp), + (const unsigned char* )name, + (const unsigned char* )name_end, + regs); + if (num >= 1) { + return num; + } } + rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s", + rb_long2int(name_end - name), name); +} - UNREACHABLE; +/* + * Resolve capture group index from Integer, Symbol, or String. + * Returns the resolved register index, or -1 if unmatched/out of range. + * For Symbol/String specifiers, raises IndexError if the named group + * does not exist. + */ +static long +resolve_capture_index(struct strscanner *p, VALUE specifier) +{ + const char *name; + long i; + if (! MATCHED_P(p)) return -1; + switch (TYPE(specifier)) { + case T_SYMBOL: + specifier = rb_sym2str(specifier); + /* fall through */ + case T_STRING: + RSTRING_GETMEM(specifier, name, i); + i = name_to_backref_number(&(p->regs), p->regex, name, name + i, + rb_enc_get(specifier)); + break; + default: + i = NUM2LONG(specifier); + } + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return -1; + if (i >= p->regs.num_regs) return -1; + if (p->regs.beg[i] == -1) return -1; + return i; } /* @@ -1749,31 +1795,12 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name static VALUE strscan_aref(VALUE self, VALUE idx) { - const char *name; struct strscanner *p; long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) return Qnil; - - switch (TYPE(idx)) { - case T_SYMBOL: - idx = rb_sym2str(idx); - /* fall through */ - case T_STRING: - if (!RTEST(p->regex)) return Qnil; - RSTRING_GETMEM(idx, name, i); - i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx)); - break; - default: - i = NUM2LONG(idx); - } - - if (i < 0) - i += p->regs.num_regs; - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; - if (p->regs.beg[i] == -1) return Qnil; + i = resolve_capture_index(p, idx); + if (i < 0) return Qnil; return extract_range(p, adjust_register_position(p, p->regs.beg[i]), @@ -1782,6 +1809,81 @@ strscan_aref(VALUE self, VALUE idx) /* * :markup: markdown + * + * call-seq: + * integer_at(specifier, base=10) -> integer or nil + * + * Returns the captured substring at the given `specifier` as an Integer, + * following the behavior of `String#to_i(base)`. + * + * `specifier` can be an Integer (positive, negative, or zero), a Symbol, + * or a String for named capture groups. + * + * Returns `nil` if: + * - No match has been performed or the last match failed + * - The `specifier` is an Integer and is out of range + * - The group at `specifier` did not participate in the match + * + * Raises IndexError if `specifier` is a Symbol or String that does not + * correspond to a named capture group, consistent with + * `StringScanner#[]`. + * + * This is semantically equivalent to `self[specifier]&.to_i(base)` + * but avoids the allocation of a temporary String when possible. + * + * ```rb + * scanner = StringScanner.new("2024-06-15") + * scanner.scan(/(\d{4})-(\d{2})-(\d{2})/) + * scanner.integer_at(1) # => 2024 + * scanner.integer_at(1, 16) # => 8228 + * ``` + */ +static VALUE +strscan_integer_at(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + long i; + long beg, end, len; + const char *ptr; + VALUE rb_specifier; + VALUE rb_base; + int base = 10; + + GET_SCANNER(self, p); + rb_scan_args(argc, argv, "11", &rb_specifier, &rb_base); + if (argc > 1) + base = NUM2INT(rb_base); + i = resolve_capture_index(p, rb_specifier); + if (i < 0) + return Qnil; + + beg = adjust_register_position(p, p->regs.beg[i]); + end = adjust_register_position(p, p->regs.end[i]); + len = end - beg; + ptr = S_PBEG(p) + beg; +#ifdef HAVE_RB_INT_PARSE_CSTR + { + /* + * Ruby 2.5 or later export the rb_int_parse_cstr() symbol but + * prototype definition isn't provided. Ruby 4.1 or later + * provide prototype definition. + */ +# ifndef RB_INT_PARSE_DEFAULT + VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp, + size_t *ndigits, int base, int flags); +# define RB_INT_PARSE_DEFAULT 0x07 +# endif + char *endp; + return rb_int_parse_cstr(ptr, len, &endp, NULL, base, + RB_INT_PARSE_DEFAULT); + } +#else + return rb_str_to_inum(rb_str_new(ptr, len), base, 0); +#endif +} + +/* + * :markup: markdown * :include: strscan/link_refs.txt * * call-seq: @@ -2049,22 +2151,6 @@ strscan_rest_size(VALUE self) return INT2FIX(i); } -/* - * call-seq: - * restsize - * - * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>. - * This method is obsolete; use #rest_size instead. - */ - - /* :nodoc: */ -static VALUE -strscan_restsize(VALUE self) -{ - rb_warning("StringScanner#restsize is obsolete; use #rest_size instead"); - return strscan_rest_size(self); -} - #define INSPECT_LENGTH 5 /* @@ -2207,7 +2293,10 @@ named_captures_iter(const OnigUChar *name, VALUE value = RUBY_Qnil; int i; for (i = 0; i < back_num; i++) { - value = strscan_aref(data->self, INT2NUM(back_refs[i])); + VALUE v = strscan_aref(data->self, INT2NUM(back_refs[i])); + if (!RB_NIL_P(v)) { + value = v; + } } rb_hash_aset(data->captures, key, value); return 0; @@ -2220,8 +2309,8 @@ named_captures_iter(const OnigUChar *name, * call-seq: * named_captures -> hash * - * Returns the array of captured match values at indexes (1..) - * if the most recent match attempt succeeded, or nil otherwise; + * Returns a hash of named captures for the most recent regexp match, + * or an empty hash if there are no named captures; * see [Captured Match Values][13]: * * ```rb @@ -2261,6 +2350,13 @@ strscan_named_captures(VALUE self) ======================================================================= */ /* + * Document-class: StringScanner::Error + * + * The error class for StringScanner. + * See StringScanner#unscan. + */ + +/* * Document-class: StringScanner * * :markup: markdown @@ -2280,8 +2376,6 @@ Init_strscan(void) ID id_scanerr = rb_intern("ScanError"); VALUE tmp; - id_byteslice = rb_intern("byteslice"); - usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); binary_encindex = rb_ascii8bit_encindex(); @@ -2290,6 +2384,7 @@ Init_strscan(void) ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError); if (!rb_const_defined(rb_cObject, id_scanerr)) { rb_const_set(rb_cObject, id_scanerr, ScanError); + rb_deprecate_constant(rb_cObject, "ScanError"); } tmp = rb_str_new2(STRSCAN_VERSION); rb_obj_freeze(tmp); @@ -2297,6 +2392,7 @@ Init_strscan(void) tmp = rb_str_new2("$Id$"); rb_obj_freeze(tmp); rb_const_set(StringScanner, rb_intern("Id"), tmp); + rb_deprecate_constant(StringScanner, "Id"); rb_define_alloc_func(StringScanner, strscan_s_allocate); rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1); @@ -2304,7 +2400,6 @@ Init_strscan(void) rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0); rb_define_method(StringScanner, "reset", strscan_reset, 0); rb_define_method(StringScanner, "terminate", strscan_terminate, 0); - rb_define_method(StringScanner, "clear", strscan_clear, 0); rb_define_method(StringScanner, "string", strscan_get_string, 0); rb_define_method(StringScanner, "string=", strscan_set_string, 1); rb_define_method(StringScanner, "concat", strscan_concat, 1); @@ -2329,11 +2424,9 @@ Init_strscan(void) rb_define_method(StringScanner, "getch", strscan_getch, 0); rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); - rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); rb_define_method(StringScanner, "peek", strscan_peek, 1); rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); - rb_define_method(StringScanner, "peep", strscan_peep, 1); rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0); rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0); @@ -2343,13 +2436,13 @@ Init_strscan(void) rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); - rb_define_method(StringScanner, "empty?", strscan_empty_p, 0); rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); rb_define_method(StringScanner, "[]", strscan_aref, 1); + rb_define_method(StringScanner, "integer_at", strscan_integer_at, -1); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0); rb_define_method(StringScanner, "size", strscan_size, 0); @@ -2358,13 +2451,10 @@ Init_strscan(void) rb_define_method(StringScanner, "rest", strscan_rest, 0); rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); - rb_define_method(StringScanner, "restsize", strscan_restsize, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0); rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0); rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0); - - rb_require("strscan/strscan"); } diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec index 47180bb8d8..a51285fa7e 100644 --- a/ext/strscan/strscan.gemspec +++ b/ext/strscan/strscan.gemspec @@ -16,18 +16,18 @@ Gem::Specification.new do |s| s.summary = "Provides lexical scanning operations on a String." s.description = "Provides lexical scanning operations on a String." - files = [ - "COPYING", - "LICENSE.txt", - "lib/strscan/strscan.rb" + files = %w[ + COPYING + LICENSE.txt + lib/strscan.rb + lib/strscan/strscan.rb + lib/strscan/truffleruby.rb ] s.require_paths = %w{lib} if RUBY_ENGINE == "jruby" files << "lib/strscan.jar" - files << "ext/jruby/lib/strscan.rb" - s.require_paths += %w{ext/jruby/lib} s.platform = "java" else files << "ext/strscan/extconf.rb" |
