diff options
Diffstat (limited to 'ext/strscan')
| -rw-r--r-- | ext/strscan/depend | 178 | ||||
| -rw-r--r-- | ext/strscan/extconf.rb | 14 | ||||
| -rw-r--r-- | ext/strscan/lib/strscan.rb | 20 | ||||
| -rw-r--r-- | ext/strscan/lib/strscan/strscan.rb | 55 | ||||
| -rw-r--r-- | ext/strscan/strscan.c | 2314 | ||||
| -rw-r--r-- | ext/strscan/strscan.gemspec | 49 |
6 files changed, 2041 insertions, 589 deletions
diff --git a/ext/strscan/depend b/ext/strscan/depend index 9199574c3f..b40a025230 100644 --- a/ext/strscan/depend +++ b/ext/strscan/depend @@ -1 +1,177 @@ -strscan.o: strscan.c $(hdrdir)/ruby.h $(topdir)/config.h $(hdrdir)/defines.h +# AUTOGENERATED DEPENDENCIES START +strscan.o: $(RUBY_EXTCONF_H) +strscan.o: $(arch_hdrdir)/ruby/config.h +strscan.o: $(hdrdir)/ruby/assert.h +strscan.o: $(hdrdir)/ruby/backward.h +strscan.o: $(hdrdir)/ruby/backward/2/assume.h +strscan.o: $(hdrdir)/ruby/backward/2/attributes.h +strscan.o: $(hdrdir)/ruby/backward/2/bool.h +strscan.o: $(hdrdir)/ruby/backward/2/inttypes.h +strscan.o: $(hdrdir)/ruby/backward/2/limits.h +strscan.o: $(hdrdir)/ruby/backward/2/long_long.h +strscan.o: $(hdrdir)/ruby/backward/2/stdalign.h +strscan.o: $(hdrdir)/ruby/backward/2/stdarg.h +strscan.o: $(hdrdir)/ruby/defines.h +strscan.o: $(hdrdir)/ruby/encoding.h +strscan.o: $(hdrdir)/ruby/intern.h +strscan.o: $(hdrdir)/ruby/internal/abi.h +strscan.o: $(hdrdir)/ruby/internal/anyargs.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/char.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/double.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/int.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/long.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/short.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +strscan.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +strscan.o: $(hdrdir)/ruby/internal/assume.h +strscan.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +strscan.o: $(hdrdir)/ruby/internal/attr/artificial.h +strscan.o: $(hdrdir)/ruby/internal/attr/cold.h +strscan.o: $(hdrdir)/ruby/internal/attr/const.h +strscan.o: $(hdrdir)/ruby/internal/attr/constexpr.h +strscan.o: $(hdrdir)/ruby/internal/attr/deprecated.h +strscan.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +strscan.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +strscan.o: $(hdrdir)/ruby/internal/attr/error.h +strscan.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +strscan.o: $(hdrdir)/ruby/internal/attr/forceinline.h +strscan.o: $(hdrdir)/ruby/internal/attr/format.h +strscan.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +strscan.o: $(hdrdir)/ruby/internal/attr/noalias.h +strscan.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +strscan.o: $(hdrdir)/ruby/internal/attr/noexcept.h +strscan.o: $(hdrdir)/ruby/internal/attr/noinline.h +strscan.o: $(hdrdir)/ruby/internal/attr/nonnull.h +strscan.o: $(hdrdir)/ruby/internal/attr/noreturn.h +strscan.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +strscan.o: $(hdrdir)/ruby/internal/attr/pure.h +strscan.o: $(hdrdir)/ruby/internal/attr/restrict.h +strscan.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +strscan.o: $(hdrdir)/ruby/internal/attr/warning.h +strscan.o: $(hdrdir)/ruby/internal/attr/weakref.h +strscan.o: $(hdrdir)/ruby/internal/cast.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +strscan.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +strscan.o: $(hdrdir)/ruby/internal/compiler_since.h +strscan.o: $(hdrdir)/ruby/internal/config.h +strscan.o: $(hdrdir)/ruby/internal/constant_p.h +strscan.o: $(hdrdir)/ruby/internal/core.h +strscan.o: $(hdrdir)/ruby/internal/core/rarray.h +strscan.o: $(hdrdir)/ruby/internal/core/rbasic.h +strscan.o: $(hdrdir)/ruby/internal/core/rbignum.h +strscan.o: $(hdrdir)/ruby/internal/core/rclass.h +strscan.o: $(hdrdir)/ruby/internal/core/rdata.h +strscan.o: $(hdrdir)/ruby/internal/core/rfile.h +strscan.o: $(hdrdir)/ruby/internal/core/rhash.h +strscan.o: $(hdrdir)/ruby/internal/core/rmatch.h +strscan.o: $(hdrdir)/ruby/internal/core/robject.h +strscan.o: $(hdrdir)/ruby/internal/core/rregexp.h +strscan.o: $(hdrdir)/ruby/internal/core/rstring.h +strscan.o: $(hdrdir)/ruby/internal/core/rstruct.h +strscan.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +strscan.o: $(hdrdir)/ruby/internal/ctype.h +strscan.o: $(hdrdir)/ruby/internal/dllexport.h +strscan.o: $(hdrdir)/ruby/internal/dosish.h +strscan.o: $(hdrdir)/ruby/internal/encoding/coderange.h +strscan.o: $(hdrdir)/ruby/internal/encoding/ctype.h +strscan.o: $(hdrdir)/ruby/internal/encoding/encoding.h +strscan.o: $(hdrdir)/ruby/internal/encoding/pathname.h +strscan.o: $(hdrdir)/ruby/internal/encoding/re.h +strscan.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +strscan.o: $(hdrdir)/ruby/internal/encoding/string.h +strscan.o: $(hdrdir)/ruby/internal/encoding/symbol.h +strscan.o: $(hdrdir)/ruby/internal/encoding/transcode.h +strscan.o: $(hdrdir)/ruby/internal/error.h +strscan.o: $(hdrdir)/ruby/internal/eval.h +strscan.o: $(hdrdir)/ruby/internal/event.h +strscan.o: $(hdrdir)/ruby/internal/fl_type.h +strscan.o: $(hdrdir)/ruby/internal/gc.h +strscan.o: $(hdrdir)/ruby/internal/glob.h +strscan.o: $(hdrdir)/ruby/internal/globals.h +strscan.o: $(hdrdir)/ruby/internal/has/attribute.h +strscan.o: $(hdrdir)/ruby/internal/has/builtin.h +strscan.o: $(hdrdir)/ruby/internal/has/c_attribute.h +strscan.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +strscan.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +strscan.o: $(hdrdir)/ruby/internal/has/extension.h +strscan.o: $(hdrdir)/ruby/internal/has/feature.h +strscan.o: $(hdrdir)/ruby/internal/has/warning.h +strscan.o: $(hdrdir)/ruby/internal/intern/array.h +strscan.o: $(hdrdir)/ruby/internal/intern/bignum.h +strscan.o: $(hdrdir)/ruby/internal/intern/class.h +strscan.o: $(hdrdir)/ruby/internal/intern/compar.h +strscan.o: $(hdrdir)/ruby/internal/intern/complex.h +strscan.o: $(hdrdir)/ruby/internal/intern/cont.h +strscan.o: $(hdrdir)/ruby/internal/intern/dir.h +strscan.o: $(hdrdir)/ruby/internal/intern/enum.h +strscan.o: $(hdrdir)/ruby/internal/intern/enumerator.h +strscan.o: $(hdrdir)/ruby/internal/intern/error.h +strscan.o: $(hdrdir)/ruby/internal/intern/eval.h +strscan.o: $(hdrdir)/ruby/internal/intern/file.h +strscan.o: $(hdrdir)/ruby/internal/intern/hash.h +strscan.o: $(hdrdir)/ruby/internal/intern/io.h +strscan.o: $(hdrdir)/ruby/internal/intern/load.h +strscan.o: $(hdrdir)/ruby/internal/intern/marshal.h +strscan.o: $(hdrdir)/ruby/internal/intern/numeric.h +strscan.o: $(hdrdir)/ruby/internal/intern/object.h +strscan.o: $(hdrdir)/ruby/internal/intern/parse.h +strscan.o: $(hdrdir)/ruby/internal/intern/proc.h +strscan.o: $(hdrdir)/ruby/internal/intern/process.h +strscan.o: $(hdrdir)/ruby/internal/intern/random.h +strscan.o: $(hdrdir)/ruby/internal/intern/range.h +strscan.o: $(hdrdir)/ruby/internal/intern/rational.h +strscan.o: $(hdrdir)/ruby/internal/intern/re.h +strscan.o: $(hdrdir)/ruby/internal/intern/ruby.h +strscan.o: $(hdrdir)/ruby/internal/intern/select.h +strscan.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +strscan.o: $(hdrdir)/ruby/internal/intern/set.h +strscan.o: $(hdrdir)/ruby/internal/intern/signal.h +strscan.o: $(hdrdir)/ruby/internal/intern/sprintf.h +strscan.o: $(hdrdir)/ruby/internal/intern/string.h +strscan.o: $(hdrdir)/ruby/internal/intern/struct.h +strscan.o: $(hdrdir)/ruby/internal/intern/thread.h +strscan.o: $(hdrdir)/ruby/internal/intern/time.h +strscan.o: $(hdrdir)/ruby/internal/intern/variable.h +strscan.o: $(hdrdir)/ruby/internal/intern/vm.h +strscan.o: $(hdrdir)/ruby/internal/interpreter.h +strscan.o: $(hdrdir)/ruby/internal/iterator.h +strscan.o: $(hdrdir)/ruby/internal/memory.h +strscan.o: $(hdrdir)/ruby/internal/method.h +strscan.o: $(hdrdir)/ruby/internal/module.h +strscan.o: $(hdrdir)/ruby/internal/newobj.h +strscan.o: $(hdrdir)/ruby/internal/scan_args.h +strscan.o: $(hdrdir)/ruby/internal/special_consts.h +strscan.o: $(hdrdir)/ruby/internal/static_assert.h +strscan.o: $(hdrdir)/ruby/internal/stdalign.h +strscan.o: $(hdrdir)/ruby/internal/stdbool.h +strscan.o: $(hdrdir)/ruby/internal/stdckdint.h +strscan.o: $(hdrdir)/ruby/internal/symbol.h +strscan.o: $(hdrdir)/ruby/internal/value.h +strscan.o: $(hdrdir)/ruby/internal/value_type.h +strscan.o: $(hdrdir)/ruby/internal/variable.h +strscan.o: $(hdrdir)/ruby/internal/warning_push.h +strscan.o: $(hdrdir)/ruby/internal/xmalloc.h +strscan.o: $(hdrdir)/ruby/missing.h +strscan.o: $(hdrdir)/ruby/onigmo.h +strscan.o: $(hdrdir)/ruby/oniguruma.h +strscan.o: $(hdrdir)/ruby/re.h +strscan.o: $(hdrdir)/ruby/regex.h +strscan.o: $(hdrdir)/ruby/ruby.h +strscan.o: $(hdrdir)/ruby/st.h +strscan.o: $(hdrdir)/ruby/subst.h +strscan.o: strscan.c +# AUTOGENERATED DEPENDENCIES END diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index 0d21966fc2..4e8d851fdb 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -1,2 +1,14 @@ +# frozen_string_literal: true require 'mkmf' -create_makefile 'strscan' +if RUBY_ENGINE == 'ruby' + $INCFLAGS << " -I$(top_srcdir)" if $extmk + have_func("onig_region_memsize(NULL)") + have_func("rb_reg_onig_match", "ruby/re.h") + have_func("rb_deprecate_constant") + have_func("rb_int_parse_cstr", "ruby.h") # RUBY_VERSION >= 2.5 + have_func("rb_gc_location", "ruby.h") # RUBY_VERSION >= 2.7 + have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3 + create_makefile 'strscan' +else + File.write('Makefile', dummy_makefile("").join) +end diff --git a/ext/strscan/lib/strscan.rb b/ext/strscan/lib/strscan.rb new file mode 100644 index 0000000000..4e8910d141 --- /dev/null +++ b/ext/strscan/lib/strscan.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +case RUBY_ENGINE +when 'ruby' + require 'strscan.so' + require_relative 'strscan/strscan' +when 'jruby' + require 'strscan.jar' + JRuby::Util.load_ext('org.jruby.ext.strscan.StringScannerLibrary') + require_relative 'strscan/strscan' +when 'truffleruby' + if RUBY_ENGINE_VERSION.to_i >= 34 + require 'strscan/truffleruby' + else + $LOAD_PATH.delete __dir__ + require 'strscan' + end +else + raise NotImplementedError, "Unknown Ruby: #{RUBY_ENGINE}" +end diff --git a/ext/strscan/lib/strscan/strscan.rb b/ext/strscan/lib/strscan/strscan.rb new file mode 100644 index 0000000000..5e262f4007 --- /dev/null +++ b/ext/strscan/lib/strscan/strscan.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +class StringScanner + unless method_defined?(:integer_at) # For JRuby + def integer_at(specifier, *to_i_args) + self[specifier]&.to_i(*to_i_args) + end + end + + # :markup: markdown + # + # call-seq: + # scan_integer(base: 10) -> integer or nil + # + # Returns an integer scanned from `self`, + # beginning at the current position; + # returns `nil` if no such integer was available. + # + # When `base` is `10` (the default), + # equivalent to calling #scan with argument +pattern+ + # as `'[+-]?\d+'`: + # + # ```ruby + # scanner = StringScanner.new('Form 27B/6') + # scanner.scan_integer # => nil # No integer at position 0. + # scanner.pos = 5 + # scanner.scan_integer # => 27 + # scanner.matched # => "27" + # scanner.pos # => 7 + # ``` + # + # When `base` is `16` (the only other value allowed), + # equivalent to calling #scan with argument `pattern` + # as `'[+-]?(0x)?[0-9a-fA-F]+'`: + # + # ```ruby + # scanner.pos = 5 + # scanner.scan_integer(base: 16) # => 635 + # scanner.matched # => "27B" + # scanner.pos # => 8 + # ``` + # + # Raises Encoding::CompatibilityError if `self` does not have + # an ASCII compatible encoding. + def scan_integer(base: 10) + case base + when 10 + scan_base10_integer + when 16 + scan_base16_integer + else + raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16" + end + end +end diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index b5ee20282c..dede57218b 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -4,14 +4,33 @@ Copyright (c) 1999-2006 Minero Aoki This program is free software. - You can distribute/modify this program under the terms of - the Ruby License. For details, see the file COPYING. + You can redistribute this program under the terms of the Ruby's or 2-clause + BSD License. For details, see the COPYING and LICENSE.txt files. */ -#include "ruby.h" -#include "re.h" +#include "ruby/ruby.h" +#include "ruby/re.h" +#include "ruby/encoding.h" -#define STRSCAN_VERSION "0.7.0" +#ifdef RUBY_EXTCONF_H +# include RUBY_EXTCONF_H +#endif + +#ifdef HAVE_ONIG_REGION_MEMSIZE +extern size_t onig_region_memsize(const struct re_registers *regs); +#endif + +#include <stdbool.h> + +#define STRSCAN_VERSION "3.1.9.dev" + + +#ifdef HAVE_RB_DEPRECATE_CONSTANT +/* In ruby 3.0, defined but exposed in external headers */ +extern void rb_deprecate_constant(VALUE mod, const char *name); +#else +# define rb_deprecate_constant(mod, name) ((void)0) +#endif /* ======================================================================= Data Type Definitions @@ -20,6 +39,8 @@ static VALUE StringScanner; static VALUE ScanError; +static int usascii_encindex, utf8_encindex, binary_encindex; + struct strscanner { /* multi-purpose flags */ @@ -28,50 +49,61 @@ struct strscanner /* the string to scan */ VALUE str; - + /* scan pointers */ long prev; /* legal only when MATCHED_P(s) */ long curr; /* always legal */ /* the regexp register; legal only when MATCHED_P(s) */ struct re_registers regs; + + /* regexp used for last scan */ + VALUE regex; + + /* anchor mode */ + bool fixed_anchor_p; }; #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) -#define MATCHED(s) (s)->flags |= FLAG_MATCHED -#define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED +#define MATCHED(s) ((s)->flags |= FLAG_MATCHED) +#define CLEAR_MATCHED(s) ((s)->flags &= ~FLAG_MATCHED) +#define CLEAR_NAMED_CAPTURES(s) ((s)->regex = Qnil) +#define CLEAR_MATCH_STATUS(s) do {\ + CLEAR_MATCHED(s);\ + CLEAR_NAMED_CAPTURES(s);\ +} while (0) -#define S_PBEG(s) (RSTRING((s)->str)->ptr) -#define S_LEN(s) (RSTRING((s)->str)->len) +#define S_PBEG(s) (RSTRING_PTR((s)->str)) +#define S_LEN(s) (RSTRING_LEN((s)->str)) #define S_PEND(s) (S_PBEG(s) + S_LEN(s)) #define CURPTR(s) (S_PBEG(s) + (s)->curr) #define S_RESTLEN(s) (S_LEN(s) - (s)->curr) -#define EOS_P(s) ((s)->curr >= RSTRING(p->str)->len) +#define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str)) #define GET_SCANNER(obj,var) do {\ - Data_Get_Struct(obj, struct strscanner, var);\ - if (NIL_P(var->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\ + (var) = check_strscan(obj);\ + if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\ } while (0) /* ======================================================================= Function Prototypes ======================================================================= */ -static VALUE infect _((VALUE str, struct strscanner *p)); +static inline long minl _((const long n, const long x)); static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i)); static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len)); -static void check_strscan _((VALUE obj)); -static void strscan_mark _((struct strscanner *p)); -static void strscan_free _((struct strscanner *p)); +static struct strscanner *check_strscan _((VALUE obj)); +static void strscan_mark _((void *p)); +static void strscan_free _((void *p)); +static size_t strscan_memsize _((const void *p)); static VALUE strscan_s_allocate _((VALUE klass)); static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self)); static VALUE strscan_init_copy _((VALUE vself, VALUE vorig)); static VALUE strscan_s_mustc _((VALUE self)); static VALUE strscan_terminate _((VALUE self)); -static VALUE strscan_clear _((VALUE self)); static VALUE strscan_get_string _((VALUE self)); static VALUE strscan_set_string _((VALUE self, VALUE str)); static VALUE strscan_concat _((VALUE self, VALUE str)); @@ -93,13 +125,11 @@ static VALUE strscan_search_full _((VALUE self, VALUE re, static void adjust_registers_to_matched _((struct strscanner *p)); static VALUE strscan_getch _((VALUE self)); static VALUE strscan_get_byte _((VALUE self)); -static VALUE strscan_getbyte _((VALUE self)); static VALUE strscan_peek _((VALUE self, VALUE len)); -static VALUE strscan_peep _((VALUE self, VALUE len)); +static VALUE strscan_scan_base10_integer _((VALUE self)); static VALUE strscan_unscan _((VALUE self)); static VALUE strscan_bol_p _((VALUE self)); static VALUE strscan_eos_p _((VALUE self)); -static VALUE strscan_empty_p _((VALUE self)); static VALUE strscan_rest_p _((VALUE self)); static VALUE strscan_matched_p _((VALUE self)); static VALUE strscan_matched _((VALUE self)); @@ -119,112 +149,207 @@ static VALUE inspect2 _((struct strscanner *p)); ======================================================================= */ static VALUE -infect(VALUE str, struct strscanner *p) +str_new(struct strscanner *p, const char *ptr, long len) { - OBJ_INFECT(str, p->str); + VALUE str = rb_str_new(ptr, len); + rb_enc_copy(str, p->str); return str; } +static inline long +minl(const long x, const long y) +{ + return (x < y) ? x : y; +} + static VALUE extract_range(struct strscanner *p, long beg_i, long end_i) { if (beg_i > S_LEN(p)) return Qnil; - if (end_i > S_LEN(p)) - end_i = S_LEN(p); - return infect(rb_str_new(S_PBEG(p) + beg_i, end_i - beg_i), p); + end_i = minl(end_i, S_LEN(p)); + return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i); } static VALUE extract_beg_len(struct strscanner *p, long beg_i, long len) { if (beg_i > S_LEN(p)) return Qnil; - if (beg_i + len > S_LEN(p)) - len = S_LEN(p) - beg_i; - return infect(rb_str_new(S_PBEG(p) + beg_i, len), p); + len = minl(len, S_LEN(p) - beg_i); + return str_new(p, S_PBEG(p) + beg_i, len); } /* ======================================================================= Constructor ======================================================================= */ +#ifdef RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +#else +# ifdef HAVE_CONST_RUBY_TYPED_EMBEDDABLE +# define RUBY_TYPED_EMBEDDABLE RUBY_TYPED_EMBEDDABLE +# define HAVE_RUBY_TYPED_EMBEDDABLE 1 +# else +# define RUBY_TYPED_EMBEDDABLE 0 +# endif +#endif + +#ifdef HAVE_RB_GC_LOCATION +static void +strscan_compact(void *ptr) +{ + struct strscanner *p = ptr; + p->str = rb_gc_location(p->str); + p->regex = rb_gc_location(p->regex); +} +#else +#define rb_gc_mark_movable rb_gc_mark +#endif + static void -strscan_mark(struct strscanner *p) +strscan_mark(void *ptr) { - rb_gc_mark(p->str); + struct strscanner *p = ptr; + rb_gc_mark_movable(p->str); + rb_gc_mark_movable(p->regex); } static void -strscan_free(struct strscanner *p) +strscan_free(void *ptr) +{ + struct strscanner *p = ptr; + onig_region_free(&(p->regs), 0); +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + ruby_xfree(p); +#endif +} + +static size_t +strscan_memsize(const void *ptr) { - re_free_registers(&(p->regs)); - free(p); + size_t size = 0; +#ifndef HAVE_RUBY_TYPED_EMBEDDABLE + size += sizeof(struct strscanner); +#endif + +#ifdef HAVE_ONIG_REGION_MEMSIZE + const struct strscanner *p = ptr; + size += onig_region_memsize(&p->regs) - sizeof(p->regs); +#endif + return size; } +static const rb_data_type_t strscanner_type = { + .wrap_struct_name = "StringScanner", + .function = { + .dmark = strscan_mark, + .dfree = strscan_free, + .dsize = strscan_memsize, +#ifdef HAVE_RB_GC_LOCATION + .dcompact = strscan_compact, +#endif + }, + .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE +}; + static VALUE strscan_s_allocate(VALUE klass) { struct strscanner *p; - - p = ALLOC(struct strscanner); - MEMZERO(p, struct strscanner, 1); + VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p); + CLEAR_MATCH_STATUS(p); - MEMZERO(&(p->regs), struct re_registers, 1); + onig_region_init(&(p->regs)); p->str = Qnil; - return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p); + return obj; } /* - * call-seq: StringScanner.new(string, dup = false) + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * StringScanner.new(string, fixed_anchor: false) -> string_scanner + * + * Returns a new `StringScanner` object whose [stored string][1] + * is the given `string`; + * sets the [fixed-anchor property][10]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.string # => "foobarbaz" + * scanner.fixed_anchor? # => false + * put_situation(scanner) + * # Situation: + * # pos: 0 + * # charpos: 0 + * # rest: "foobarbaz" + * # rest_size: 9 + * ``` * - * Creates a new StringScanner object to scan over the given +string+. - * +dup+ argument is obsolete and not used now. */ static VALUE strscan_initialize(int argc, VALUE *argv, VALUE self) { struct strscanner *p; - VALUE str, need_dup; - - Data_Get_Struct(self, struct strscanner, p); - rb_scan_args(argc, argv, "11", &str, &need_dup); + VALUE str, options; + + p = check_strscan(self); + rb_scan_args(argc, argv, "11", &str, &options); + options = rb_check_hash_type(options); + if (!NIL_P(options)) { + VALUE fixed_anchor; + ID keyword_ids[1]; + keyword_ids[0] = rb_intern("fixed_anchor"); + rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor); + if (fixed_anchor == Qundef) { + p->fixed_anchor_p = false; + } + else { + p->fixed_anchor_p = RTEST(fixed_anchor); + } + } + else { + p->fixed_anchor_p = false; + } StringValue(str); - p->str = str; + RB_OBJ_WRITE(self, &p->str, str); return self; } -static void +static struct strscanner * check_strscan(VALUE obj) { - if (TYPE(obj) != T_DATA || RDATA(obj)->dmark != (RUBY_DATA_FUNC)strscan_mark) { - rb_raise(rb_eTypeError, - "wrong argument type %s (expected StringScanner)", - rb_obj_classname(obj)); - } + return rb_check_typeddata(obj, &strscanner_type); } /* + * :markup: markdown + * :include: strscan/link_refs.txt + * * call-seq: - * dup - * clone + * dup -> shallow_copy * - * Duplicates a StringScanner object. + * Returns a shallow copy of `self`; + * the [stored string][1] in the copy is the same string as in `self`. */ static VALUE strscan_init_copy(VALUE vself, VALUE vorig) { struct strscanner *self, *orig; - Data_Get_Struct(vself, struct strscanner, self); - check_strscan(vorig); - Data_Get_Struct(vorig, struct strscanner, orig); + self = check_strscan(vself); + orig = check_strscan(vorig); if (self != orig) { - self->flags = orig->flags; - self->str = orig->str; - self->prev = orig->prev; - self->curr = orig->curr; - re_copy_registers(&self->regs, &orig->regs); + self->flags = orig->flags; + RB_OBJ_WRITE(vself, &self->str, orig->str); + self->prev = orig->prev; + self->curr = orig->curr; + if (rb_reg_region_copy(&self->regs, &orig->regs)) + rb_memerror(); + RB_GC_GUARD(vorig); } + return vself; } @@ -233,10 +358,13 @@ strscan_init_copy(VALUE vself, VALUE vorig) ======================================================================= */ /* - * call-seq: StringScanner.must_C_version + * call-seq: + * StringScanner.must_C_version -> self * - * This method is defined for backward compatibility. + * Returns +self+; defined for backward compatibility. */ + + /* :nodoc: */ static VALUE strscan_s_mustc(VALUE self) { @@ -244,7 +372,30 @@ strscan_s_mustc(VALUE self) } /* - * Reset the scan pointer (index 0) and clear matching data. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * reset -> self + * + * Sets both [byte position][2] and [character position][7] to zero, + * and clears [match values][9]; + * returns +self+: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.exist?(/bar/) # => 6 + * scanner.reset # => #<StringScanner 0/9 @ "fooba..."> + * put_situation(scanner) + * # Situation: + * # pos: 0 + * # charpos: 0 + * # rest: "foobarbaz" + * # rest_size: 9 + * # => nil + * match_values_cleared?(scanner) # => true + * ``` + * */ static VALUE strscan_reset(VALUE self) @@ -258,11 +409,12 @@ strscan_reset(VALUE self) } /* - * call-seq: - * terminate - * clear + * :markup: markdown + * :call-seq: + * terminate -> self * - * Set the scan pointer to the end of the string and clear matching data. + * :include: strscan/link_refs.txt + * :include: strscan/methods/terminate.md */ static VALUE strscan_terminate(VALUE self) @@ -276,18 +428,21 @@ strscan_terminate(VALUE self) } /* - * Equivalent to #terminate. - * This method is obsolete; use #terminate instead. - */ -static VALUE -strscan_clear(VALUE self) -{ - rb_warning("StringScanner#clear is obsolete; use #terminate instead"); - return strscan_terminate(self); -} - -/* - * Returns the string being scanned. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * string -> stored_string + * + * Returns the [stored string][1]: + * + * ```rb + * scanner = StringScanner.new('foobar') + * scanner.string # => "foobar" + * scanner.concat('baz') + * scanner.string # => "foobarbaz" + * ``` + * */ static VALUE strscan_get_string(VALUE self) @@ -299,38 +454,80 @@ strscan_get_string(VALUE self) } /* - * call-seq: string=(str) + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * string = other_string -> other_string + * + * Replaces the [stored string][1] with the given `other_string`: + * + * - Sets both [positions][11] to zero. + * - Clears [match values][9]. + * - Returns `other_string`. + * + * ```rb + * scanner = StringScanner.new('foobar') + * scanner.scan(/foo/) + * put_situation(scanner) + * # Situation: + * # pos: 3 + * # charpos: 3 + * # rest: "bar" + * # rest_size: 3 + * match_values_cleared?(scanner) # => false + * + * scanner.string = 'baz' # => "baz" + * put_situation(scanner) + * # Situation: + * # pos: 0 + * # charpos: 0 + * # rest: "baz" + * # rest_size: 3 + * match_values_cleared?(scanner) # => true + * ``` * - * Changes the string being scanned to +str+ and resets the scanner. - * Returns +str+. */ static VALUE strscan_set_string(VALUE self, VALUE str) { - struct strscanner *p; + struct strscanner *p = check_strscan(self); - Data_Get_Struct(self, struct strscanner, p); StringValue(str); - p->str = rb_str_dup(str); - rb_obj_freeze(p->str); + RB_OBJ_WRITE(self, &p->str, str); p->curr = 0; CLEAR_MATCH_STATUS(p); return str; } /* - * call-seq: - * concat(str) - * <<(str) + * :markup: markdown + * :include: strscan/link_refs.txt * - * Appends +str+ to the string being scanned. - * This method does not affect scan pointer. + * call-seq: + * concat(more_string) -> self + * + * - Appends the given `more_string` + * to the [stored string][1]. + * - Returns `self`. + * - Does not affect the [positions][11] + * or [match values][9]. + * + * + * ```rb + * scanner = StringScanner.new('foo') + * scanner.string # => "foo" + * scanner.terminate + * scanner.concat('barbaz') # => #<StringScanner 3/9 "foo" @ "barba..."> + * scanner.string # => "foobarbaz" + * put_situation(scanner) + * # Situation: + * # pos: 3 + * # charpos: 3 + * # rest: "barbaz" + * # rest_size: 6 + * ``` * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.scan(/Fri /) - * s << " +1000 GMT" - * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT" - * s.scan(/Dec/) # -> "Dec" */ static VALUE strscan_concat(VALUE self, VALUE str) @@ -344,18 +541,12 @@ strscan_concat(VALUE self, VALUE str) } /* - * Returns the position of the scan pointer. In the 'reset' position, this - * value is zero. In the 'terminated' position (i.e. the string is exhausted), - * this value is the length of the string. - * - * In short, it's a 0-based index into the string. + * :markup: markdown + * :call-seq: + * pos -> byte_position * - * s = StringScanner.new('test string') - * s.pos # -> 0 - * s.scan_until /str/ # -> "test str" - * s.pos # -> 8 - * s.terminate # -> #<StringScanner fin> - * s.pos # -> 11 + * :include: strscan/link_refs.txt + * :include: strscan/methods/get_pos.md */ static VALUE strscan_get_pos(VALUE self) @@ -363,17 +554,35 @@ strscan_get_pos(VALUE self) struct strscanner *p; GET_SCANNER(self, p); - return INT2FIX(p->curr); + return LONG2NUM(p->curr); } /* - * call-seq: pos=(n) + * :markup: markdown + * :call-seq: + * charpos -> character_position * - * Modify the scan pointer. + * :include: strscan/link_refs.txt + * :include: strscan/methods/get_charpos.md + */ +static VALUE +strscan_get_charpos(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + + return LONG2NUM(rb_enc_strlen(S_PBEG(p), CURPTR(p), rb_enc_get(p->str))); +} + +/* + * :markup: markdown + * :call-seq: + * pos = n -> n + * pointer = n -> n * - * s = StringScanner.new('test string') - * s.pos = 7 # -> 7 - * s.rest # -> "ring" + * :include: strscan/link_refs.txt + * :include: strscan/methods/set_pos.md */ static VALUE strscan_set_pos(VALUE self, VALUE v) @@ -382,76 +591,225 @@ strscan_set_pos(VALUE self, VALUE v) long i; GET_SCANNER(self, p); - i = NUM2INT(v); + i = NUM2LONG(v); if (i < 0) i += S_LEN(p); if (i < 0) rb_raise(rb_eRangeError, "index out of range"); if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range"); p->curr = i; - return INT2NUM(i); + return LONG2NUM(i); +} + +static inline UChar * +match_target(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return (UChar *)S_PBEG(p); + } + else + { + return (UChar *)CURPTR(p); + } +} + +static inline void +set_registers(struct strscanner *p, size_t pos, size_t length) +{ + const int at = 0; + OnigRegion *regs = &(p->regs); + onig_region_clear(regs); + if (onig_region_set(regs, at, 0, 0)) return; + if (p->fixed_anchor_p) { + regs->beg[at] = pos + p->curr; + regs->end[at] = pos + p->curr + length; + } + else + { + regs->beg[at] = pos; + regs->end[at] = pos + length; + } +} + +static inline void +succ(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + p->curr = p->regs.end[0]; + } + else + { + p->curr += p->regs.end[0]; + } +} + +static inline long +last_match_length(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return p->regs.end[0] - p->prev; + } + else + { + return p->regs.end[0]; + } +} + +static inline long +adjust_register_position(struct strscanner *p, long position) +{ + if (p->fixed_anchor_p) { + return position; + } + else { + return p->prev + position; + } +} + +/* rb_reg_onig_match is available in Ruby 3.3 and later. */ +#ifndef HAVE_RB_REG_ONIG_MATCH +static OnigPosition +rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs) +{ + OnigPosition result; + regex_t *reg = rb_reg_prepare_re(re, str); + + bool tmpreg = reg != RREGEXP_PTR(re); + if (!tmpreg) RREGEXP(re)->usecnt++; + + result = match(reg, str, regs, args); + + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP_PTR(re)); + RREGEXP_PTR(re) = reg; + } + } + + if (result < 0) { + if (result != ONIG_MISMATCH) { + rb_raise(ScanError, "regexp buffer overflow"); + } + } + + return result; +} +#endif + +static OnigPosition +strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_match(reg, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + regs, + ONIG_OPTION_NONE); +} + +static OnigPosition +strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_search(reg, + match_target(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + (UChar *)CURPTR(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + regs, + ONIG_OPTION_NONE); +} + +static void +strscan_enc_check(VALUE str1, VALUE str2) +{ + if (RB_ENCODING_GET(str1) != RB_ENCODING_GET(str2)) { + rb_enc_check(str1, str2); + } } static VALUE -strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) +strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) { struct strscanner *p; - int ret; - Check_Type(regex, T_REGEXP); GET_SCANNER(self, p); CLEAR_MATCH_STATUS(p); if (S_RESTLEN(p) < 0) { return Qnil; } - rb_kcode_set_option(regex); - if (headonly) { - ret = re_match(RREGEXP(regex)->ptr, - CURPTR(p), S_RESTLEN(p), - 0, - &(p->regs)); + + if (RB_TYPE_P(pattern, T_REGEXP)) { + OnigPosition ret; + RB_OBJ_WRITE(self, &p->regex, pattern); + ret = rb_reg_onig_match(p->regex, + p->str, + headonly ? strscan_match : strscan_search, + (void *)p, + &(p->regs)); + + if (ret == ONIG_MISMATCH) { + return Qnil; + } } else { - ret = re_search(RREGEXP(regex)->ptr, - CURPTR(p), S_RESTLEN(p), - 0, - S_RESTLEN(p), - &(p->regs)); - } - rb_kcode_reset_option(); - - if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); - if (ret < 0) { - /* not matched */ - return Qnil; + StringValue(pattern); + if (S_RESTLEN(p) < RSTRING_LEN(pattern)) { + strscan_enc_check(p->str, pattern); + return Qnil; + } + + if (headonly) { + strscan_enc_check(p->str, pattern); + + if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { + return Qnil; + } + set_registers(p, 0, RSTRING_LEN(pattern)); + } + else { + rb_encoding *enc = rb_enc_check(p->str, pattern); + long pos = rb_memsearch(RSTRING_PTR(pattern), RSTRING_LEN(pattern), + CURPTR(p), S_RESTLEN(p), enc); + if (pos == -1) { + return Qnil; + } + set_registers(p, pos, RSTRING_LEN(pattern)); + } } MATCHED(p); p->prev = p->curr; + if (succptr) { - p->curr += p->regs.end[0]; - } - if (getstr) { - return extract_beg_len(p, p->prev, p->regs.end[0]); + succ(p); } - else { - return INT2FIX(p->regs.end[0]); + { + const long length = last_match_length(p); + if (getstr) { + return extract_beg_len(p, p->prev, length); + } + else { + return INT2FIX(length); + } } } /* - * call-seq: scan(pattern) => String - * - * Tries to match with +pattern+ at the current position. If there's a match, - * the scanner advances the "scan pointer" and returns the matched string. - * Otherwise, the scanner returns +nil+. - * - * s = StringScanner.new('test string') - * p s.scan(/\w+/) # -> "test" - * p s.scan(/\w+/) # -> nil - * p s.scan(/\s+/) # -> " " - * p s.scan(/\w+/) # -> "string" - * p s.scan(/./) # -> nil + * :markup: markdown + * :call-seq: + * scan(pattern) -> substring or nil * + * :include: strscan/link_refs.txt + * :include: strscan/methods/scan.md */ static VALUE strscan_scan(VALUE self, VALUE re) @@ -460,15 +818,60 @@ strscan_scan(VALUE self, VALUE re) } /* - * call-seq: match?(pattern) + * :markup: markdown + * :include: strscan/link_refs.txt * - * Tests whether the given +pattern+ is matched from the current scan pointer. - * Returns the length of the match, or +nil+. The scan pointer is not advanced. + * call-seq: + * match?(pattern) -> match_size or nil + * + * Attempts to [match][17] the given `pattern` + * at the beginning of the [target substring][3]; + * does not modify the [positions][11]. + * + * If the match succeeds: + * + * - Sets [match values][9]. + * - Returns the size in bytes of the matched substring. + * + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.pos = 3 + * scanner.match?(/bar/) => 3 + * put_match_values(scanner) + * # Basic match values: + * # matched?: true + * # matched_size: 3 + * # pre_match: "foo" + * # matched : "bar" + * # post_match: "baz" + * # Captured match values: + * # size: 1 + * # captures: [] + * # named_captures: {} + * # values_at: ["bar", nil] + * # []: + * # [0]: "bar" + * # [1]: nil + * put_situation(scanner) + * # Situation: + * # pos: 3 + * # charpos: 3 + * # rest: "barbaz" + * # rest_size: 6 + * ``` + * + * If the match fails: + * + * - Clears match values. + * - Returns `nil`. + * - Does not increment positions. + * + * ```rb + * scanner.match?(/nope/) # => nil + * match_values_cleared?(scanner) # => true + * ``` * - * s = StringScanner.new('test string') - * p s.match?(/\w+/) # -> 4 - * p s.match?(/\w+/) # -> 4 - * p s.match?(/\s+/) # -> nil */ static VALUE strscan_match_p(VALUE self, VALUE re) @@ -477,21 +880,12 @@ strscan_match_p(VALUE self, VALUE re) } /* - * call-seq: skip(pattern) - * - * Attempts to skip over the given +pattern+ beginning with the scan pointer. - * If it matches, the scan pointer is advanced to the end of the match, and the - * length of the match is returned. Otherwise, +nil+ is returned. - * - * It's similar to #scan, but without returning the matched string. - * - * s = StringScanner.new('test string') - * p s.skip(/\w+/) # -> 4 - * p s.skip(/\w+/) # -> nil - * p s.skip(/\s+/) # -> 1 - * p s.skip(/\w+/) # -> 6 - * p s.skip(/./) # -> nil + * :markup: markdown + * call-seq: + * skip(pattern) -> match_size or nil * + * :include: strscan/link_refs.txt + * :include: strscan/methods/skip.md */ static VALUE strscan_skip(VALUE self, VALUE re) @@ -500,19 +894,59 @@ strscan_skip(VALUE self, VALUE re) } /* - * call-seq: check(pattern) - * - * This returns the value that #scan would return, without advancing the scan - * pointer. The match register is affected, though. + * :markup: markdown + * :include: strscan/link_refs.txt * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.check /Fri/ # -> "Fri" - * s.pos # -> 0 - * s.matched # -> "Fri" - * s.check /12/ # -> nil - * s.matched # -> nil + * call-seq: + * check(pattern) -> matched_substring or nil + * + * Attempts to [match][17] the given `pattern` + * at the beginning of the [target substring][3]; + * does not modify the [positions][11]. + * + * If the match succeeds: + * + * - Returns the matched substring. + * - Sets all [match values][9]. + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.pos = 3 + * scanner.check('bar') # => "bar" + * put_match_values(scanner) + * # Basic match values: + * # matched?: true + * # matched_size: 3 + * # pre_match: "foo" + * # matched : "bar" + * # post_match: "baz" + * # Captured match values: + * # size: 1 + * # captures: [] + * # named_captures: {} + * # values_at: ["bar", nil] + * # []: + * # [0]: "bar" + * # [1]: nil + * # => 0..1 + * put_situation(scanner) + * # Situation: + * # pos: 3 + * # charpos: 3 + * # rest: "barbaz" + * # rest_size: 6 + * ``` + * + * If the match fails: + * + * - Returns `nil`. + * - Clears all [match values][9]. + * + * ```rb + * scanner.check(/nope/) # => nil + * match_values_cleared?(scanner) # => true + * ``` * - * Mnemonic: it "checks" to see whether a #scan will return a value. */ static VALUE strscan_check(VALUE self, VALUE re) @@ -521,33 +955,37 @@ strscan_check(VALUE self, VALUE re) } /* - * call-seq: scan_full(pattern, return_string_p, advance_pointer_p) + * call-seq: + * scan_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or length or nil + * + * Equivalent to one of the following: + * + * - +advance_pointer_p+ +true+: * - * Tests whether the given +pattern+ is matched from the current scan pointer. - * Returns the matched string if +return_string_p+ is true. - * Advances the scan pointer if +advance_pointer_p+ is true. - * The match register is affected. + * - +return_string_p+ +true+: StringScanner#scan(pattern). + * - +return_string_p+ +false+: StringScanner#skip(pattern). + * + * - +advance_pointer_p+ +false+: + * + * - +return_string_p+ +true+: StringScanner#check(pattern). + * - +return_string_p+ +false+: StringScanner#match?(pattern). * - * "full" means "#scan with full parameters". */ + + /* :nodoc: */ static VALUE strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f) { return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1); } - /* - * call-seq: scan_until(pattern) + * :markup: markdown + * :call-seq: + * scan_until(pattern) -> substring or nil * - * Scans the string _until_ the +pattern+ is matched. Returns the substring up - * to and including the end of the match, advancing the scan pointer to that - * location. If there is no match, +nil+ is returned. - * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.scan_until(/1/) # -> "Fri Dec 1" - * s.pre_match # -> "Fri Dec " - * s.scan_until(/XYZ/) # -> nil + * :include: strscan/link_refs.txt + * :include: strscan/methods/scan_until.md */ static VALUE strscan_scan_until(VALUE self, VALUE re) @@ -556,17 +994,61 @@ strscan_scan_until(VALUE self, VALUE re) } /* - * call-seq: exist?(pattern) + * :markup: markdown + * :include: strscan/link_refs.txt * - * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string, - * without advancing the scan pointer. This predicates whether a #scan_until - * will return a value. + * call-seq: + * exist?(pattern) -> byte_offset or nil + * + * Attempts to [match][17] the given `pattern` + * anywhere (at any [position][2]) + * n the [target substring][3]; + * does not modify the [positions][11]. + * + * If the match succeeds: + * + * - Returns a byte offset: + * the distance in bytes between the current [position][2] + * and the end of the matched substring. + * - Sets all [match values][9]. + * + * ```rb + * scanner = StringScanner.new('foobarbazbatbam') + * scanner.pos = 6 + * scanner.exist?(/bat/) # => 6 + * put_match_values(scanner) + * # Basic match values: + * # matched?: true + * # matched_size: 3 + * # pre_match: "foobarbaz" + * # matched : "bat" + * # post_match: "bam" + * # Captured match values: + * # size: 1 + * # captures: [] + * # named_captures: {} + * # values_at: ["bat", nil] + * # []: + * # [0]: "bat" + * # [1]: nil + * put_situation(scanner) + * # Situation: + * # pos: 6 + * # charpos: 6 + * # rest: "bazbatbam" + * # rest_size: 9 + * ``` + * + * If the match fails: + * + * - Returns `nil`. + * - Clears all [match values][9]. + * + * ```rb + * scanner.exist?(/nope/) # => nil + * match_values_cleared?(scanner) # => true + * ``` * - * s = StringScanner.new('test string') - * s.exist? /s/ # -> 3 - * s.scan /test/ # -> "test" - * s.exist? /s/ # -> 6 - * s.exist? /e/ # -> nil */ static VALUE strscan_exist_p(VALUE self, VALUE re) @@ -575,20 +1057,12 @@ strscan_exist_p(VALUE self, VALUE re) } /* - * call-seq: skip_until(pattern) - * - * Advances the scan pointer until +pattern+ is matched and consumed. Returns - * the number of bytes advanced, or +nil+ if no match was found. + * :markup: markdown + * :call-seq: + * skip_until(pattern) -> matched_substring_size or nil * - * Look ahead to match +pattern+, and advance the scan pointer to the _end_ - * of the match. Return the number of characters advanced, or +nil+ if the - * match was unsuccessful. - * - * It's similar to #scan_until, but without returning the intervening string. - * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.skip_until /12/ # -> 10 - * s # + * :include: strscan/link_refs.txt + * :include: strscan/methods/skip_until.md */ static VALUE strscan_skip_until(VALUE self, VALUE re) @@ -597,17 +1071,61 @@ strscan_skip_until(VALUE self, VALUE re) } /* - * call-seq: check_until(pattern) + * :markup: markdown + * :include: strscan/link_refs.txt * - * This returns the value that #scan_until would return, without advancing the - * scan pointer. The match register is affected, though. - * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.check_until /12/ # -> "Fri Dec 12" - * s.pos # -> 0 - * s.matched # -> 12 + * call-seq: + * check_until(pattern) -> substring or nil + * + * Attempts to [match][17] the given `pattern` + * anywhere (at any [position][2]) + * in the [target substring][3]; + * does not modify the [positions][11]. + * + * If the match succeeds: + * + * - Sets all [match values][9]. + * - Returns the matched substring, + * which extends from the current [position][2] + * to the end of the matched substring. + * + * ```rb + * scanner = StringScanner.new('foobarbazbatbam') + * scanner.pos = 6 + * scanner.check_until(/bat/) # => "bazbat" + * put_match_values(scanner) + * # Basic match values: + * # matched?: true + * # matched_size: 3 + * # pre_match: "foobarbaz" + * # matched : "bat" + * # post_match: "bam" + * # Captured match values: + * # size: 1 + * # captures: [] + * # named_captures: {} + * # values_at: ["bat", nil] + * # []: + * # [0]: "bat" + * # [1]: nil + * put_situation(scanner) + * # Situation: + * # pos: 6 + * # charpos: 6 + * # rest: "bazbatbam" + * # rest_size: 9 + * ``` + * + * If the match fails: + * + * - Clears all [match values][9]. + * - Returns `nil`. + * + * ```rb + * scanner.check_until(/nope/) # => nil + * match_values_cleared?(scanner) # => true + * ``` * - * Mnemonic: it "checks" to see whether a #scan_until will return a value. */ static VALUE strscan_check_until(VALUE self, VALUE re) @@ -616,48 +1134,49 @@ strscan_check_until(VALUE self, VALUE re) } /* - * call-seq: search_full(pattern, return_string_p, advance_pointer_p) + * call-seq: + * search_full(pattern, advance_pointer_p, return_string_p) -> matched_substring or position_delta or nil + * + * Equivalent to one of the following: + * + * - +advance_pointer_p+ +true+: + * + * - +return_string_p+ +true+: StringScanner#scan_until(pattern). + * - +return_string_p+ +false+: StringScanner#skip_until(pattern). + * + * - +advance_pointer_p+ +false+: + * + * - +return_string_p+ +true+: StringScanner#check_until(pattern). + * - +return_string_p+ +false+: StringScanner#exist?(pattern). * - * Scans the string _until_ the +pattern+ is matched. - * Returns the matched string if +return_string_p+ is true, otherwise - * returns the number of bytes advanced. - * Advances the scan pointer if +advance_pointer_p+, otherwise not. - * This method does affect the match register. */ + + /* :nodoc: */ static VALUE strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f) { return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0); } -/* DANGEROUS; need to synchronize with regex.c */ static void adjust_registers_to_matched(struct strscanner *p) { - if (p->regs.allocated == 0) { - p->regs.beg = ALLOC_N(int, RE_NREGS); - p->regs.end = ALLOC_N(int, RE_NREGS); - p->regs.allocated = RE_NREGS; + onig_region_clear(&(p->regs)); + if (p->fixed_anchor_p) { + onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr); + } + else { + onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev)); } - p->regs.num_regs = 1; - p->regs.beg[0] = 0; - p->regs.end[0] = p->curr - p->prev; } /* - * Scans one character and returns it. - * This method is multi-byte character sensitive. - * See also #get_byte. + * :markup: markdown + * :call-seq: + * getch -> character or nil * - * s = StringScanner.new('ab') - * s.getch # => "a" - * s.getch # => "b" - * s.getch # => nil - * - * $KCODE = 'EUC' - * s = StringScanner.new("\244\242") - * s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP - * s.getch # => nil + * :include: strscan/link_refs.txt + * :include: strscan/methods/getch.md */ static VALUE strscan_getch(VALUE self) @@ -669,71 +1188,108 @@ strscan_getch(VALUE self) CLEAR_MATCH_STATUS(p); if (EOS_P(p)) return Qnil; - len = mbclen(*CURPTR(p)); - if (p->curr + len > S_LEN(p)) { - len = S_LEN(p) - p->curr; - } + + len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str)); + len = minl(len, S_RESTLEN(p)); p->prev = p->curr; p->curr += len; MATCHED(p); adjust_registers_to_matched(p); - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); } /* - * Scans one byte and returns it. - * This method is NOT multi-byte character sensitive. - * See also #getch. + * call-seq: + * scan_byte -> integer_byte or nil + * + * Scans one byte and returns it as an integer. + * This method is not multibyte character sensitive. + * See also: #getch. * - * s = StringScanner.new('ab') - * s.get_byte # => "a" - * s.get_byte # => "b" - * s.get_byte # => nil - * - * s = StringScanner.new("\244\242") - * s.get_byte # => "\244" - * s.get_byte # => "\242" - * s.get_byte # => nil */ static VALUE -strscan_get_byte(VALUE self) +strscan_scan_byte(VALUE self) { struct strscanner *p; + VALUE byte; GET_SCANNER(self, p); CLEAR_MATCH_STATUS(p); - if (EOS_P(p)) { + if (EOS_P(p)) return Qnil; - } + + byte = INT2FIX((unsigned char)*CURPTR(p)); p->prev = p->curr; p->curr++; MATCHED(p); adjust_registers_to_matched(p); - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return byte; } /* - * Equivalent to #get_byte. - * This method is obsolete; use #get_byte instead. + * Peeks at the current byte and returns it as an integer. + * + * s = StringScanner.new('ab') + * s.peek_byte # => 97 */ static VALUE -strscan_getbyte(VALUE self) +strscan_peek_byte(VALUE self) { - rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead"); - return strscan_get_byte(self); + struct strscanner *p; + + GET_SCANNER(self, p); + if (EOS_P(p)) + return Qnil; + + return INT2FIX((unsigned char)*CURPTR(p)); } /* - * call-seq: peek(len) + * :markup: markdown + * :call-seq: + * get_byte -> byte_as_character or nil * - * Extracts a string corresponding to <tt>string[pos,len]</tt>, without - * advancing the scan pointer. + * :include: strscan/link_refs.txt + * :include: strscan/methods/get_byte.md + */ +static VALUE +strscan_get_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + if (EOS_P(p)) + return Qnil; + + p->prev = p->curr; + p->curr++; + MATCHED(p); + adjust_registers_to_matched(p); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt * - * s = StringScanner.new('test string') - * s.peek(7) # => "test st" - * s.peek(7) # => "test st" + * call-seq: + * peek(length) -> substring + * + * Returns the substring `string[pos, length]`; + * does not update [match values][9] or [positions][11]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.pos = 3 + * scanner.peek(3) # => "bar" + * scanner.terminate + * scanner.peek(3) # => "" + * ``` * */ static VALUE @@ -743,37 +1299,170 @@ strscan_peek(VALUE self, VALUE vlen) long len; GET_SCANNER(self, p); + len = NUM2LONG(vlen); - if (EOS_P(p)) { - return infect(rb_str_new("", 0), p); + if (EOS_P(p)) + return str_new(p, "", 0); + + len = minl(len, S_RESTLEN(p)); + return extract_beg_len(p, p->curr, len); +} + +static VALUE +strscan_parse_integer(struct strscanner *p, int base, long len) +{ + VALUE buffer_v, integer; + + char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1); + + MEMCPY(buffer, CURPTR(p), char, len); + buffer[len] = '\0'; + integer = rb_cstr2inum(buffer, base); + RB_ALLOCV_END(buffer_v); + p->curr += len; + + MATCHED(p); + adjust_registers_to_matched(p); + + return integer; +} + +static inline bool +strscan_ascii_compat_fastpath(VALUE str) +{ + int encindex = ENCODING_GET_INLINED(str); + /* The overwhelming majority of strings are in one of these 3 encodings. */ + return encindex == utf8_encindex || encindex == binary_encindex || encindex == usascii_encindex; +} + +static inline void +strscan_must_ascii_compat(VALUE str) +{ + /* The overwhelming majority of strings are in one of these 3 encodings. */ + if (RB_LIKELY(strscan_ascii_compat_fastpath(str))) { + return; } - if (p->curr + len > S_LEN(p)) { - len = S_LEN(p) - p->curr; + + rb_must_asciicompat(str); +} + +/* :nodoc: */ +static VALUE +strscan_scan_base10_integer(VALUE self) +{ + char *ptr; + long len = 0, remaining_len; + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + + strscan_must_ascii_compat(p->str); + + ptr = CURPTR(p); + + remaining_len = S_RESTLEN(p); + + if (remaining_len <= 0) { + return Qnil; } - return extract_beg_len(p, p->curr, len); + + if (ptr[len] == '-' || ptr[len] == '+') { + len++; + } + + if (!rb_isdigit(ptr[len])) { + return Qnil; + } + + p->prev = p->curr; + + while (len < remaining_len && rb_isdigit(ptr[len])) { + len++; + } + + return strscan_parse_integer(p, 10, len); } -/* - * Equivalent to #peek. - * This method is obsolete; use #peek instead. - */ +/* :nodoc: */ static VALUE -strscan_peep(VALUE self, VALUE vlen) +strscan_scan_base16_integer(VALUE self) { - rb_warning("StringScanner#peep is obsolete; use #peek instead"); - return strscan_peek(self, vlen); + char *ptr; + long len = 0, remaining_len; + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + + strscan_must_ascii_compat(p->str); + + ptr = CURPTR(p); + + remaining_len = S_RESTLEN(p); + + if (remaining_len <= 0) { + return Qnil; + } + + if (ptr[len] == '-' || ptr[len] == '+') { + len++; + } + + if ((remaining_len >= (len + 3)) && ptr[len] == '0' && ptr[len + 1] == 'x' && rb_isxdigit(ptr[len + 2])) { + len += 2; + } + + if (len >= remaining_len || !rb_isxdigit(ptr[len])) { + return Qnil; + } + + p->prev = p->curr; + + while (len < remaining_len && rb_isxdigit(ptr[len])) { + len++; + } + + return strscan_parse_integer(p, 16, len); } /* - * Set the scan pointer to the previous position. Only one previous position is - * remembered, and it changes with each scanning operation. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * unscan -> self + * + * Sets the [position][2] to its value previous to the recent successful + * [match][17] attempt: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.scan(/foo/) + * put_situation(scanner) + * # Situation: + * # pos: 3 + * # charpos: 3 + * # rest: "barbaz" + * # rest_size: 6 + * scanner.unscan + * # => #<StringScanner 0/9 @ "fooba..."> + * put_situation(scanner) + * # Situation: + * # pos: 0 + * # charpos: 0 + * # rest: "foobarbaz" + * # rest_size: 9 + * ``` + * + * Raises an exception if match values are clear: + * + * ```rb + * scanner.scan(/nope/) # => nil + * match_values_cleared?(scanner) # => true + * scanner.unscan # Raises StringScanner::Error. + * ``` * - * s = StringScanner.new('test string') - * s.scan(/\w+/) # => "test" - * s.unscan - * s.scan(/../) # => "te" - * s.scan(/\d/) # => nil - * s.unscan # ScanError: unscan failed: previous match had failed */ static VALUE strscan_unscan(VALUE self) @@ -781,25 +1470,45 @@ strscan_unscan(VALUE self) struct strscanner *p; GET_SCANNER(self, p); - if (! MATCHED_P(p)) { - rb_raise(ScanError, "unscan failed: previous match had failed"); - } + if (! MATCHED_P(p)) + rb_raise(ScanError, "unscan failed: previous match record not exist"); p->curr = p->prev; CLEAR_MATCH_STATUS(p); return self; } /* - * Returns +true+ iff the scan pointer is at the beginning of the line. - * - * s = StringScanner.new("test\ntest\n") - * s.bol? # => true - * s.scan(/te/) - * s.bol? # => false - * s.scan(/st\n/) - * s.bol? # => true - * s.terminate - * s.bol? # => true + * + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * beginning_of_line? -> true or false + * + * Returns whether the [position][2] is at the beginning of a line; + * that is, at the beginning of the [stored string][1] + * or immediately after a newline: + * + * scanner = StringScanner.new(MULTILINE_TEXT) + * scanner.string + * # => "Go placidly amid the noise and haste,\nand remember what peace there may be in silence.\n" + * scanner.pos # => 0 + * scanner.beginning_of_line? # => true + * + * scanner.scan_until(/,/) # => "Go placidly amid the noise and haste," + * scanner.beginning_of_line? # => false + * + * scanner.scan(/\n/) # => "\n" + * scanner.beginning_of_line? # => true + * + * scanner.terminate + * scanner.beginning_of_line? # => true + * + * scanner.concat('x') + * scanner.terminate + * scanner.beginning_of_line? # => false + * + * StringScanner#bol? is an alias for StringScanner#beginning_of_line?. */ static VALUE strscan_bol_p(VALUE self) @@ -813,14 +1522,24 @@ strscan_bol_p(VALUE self) } /* - * Returns +true+ if the scan pointer is at the end of the string. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * eos? -> true or false + * + * Returns whether the [position][2] + * is at the end of the [stored string][1]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.eos? # => false + * pos = 3 + * scanner.eos? # => false + * scanner.terminate + * scanner.eos? # => true + * ``` * - * s = StringScanner.new('test string') - * p s.eos? # => false - * s.scan(/test/) - * p s.eos? # => false - * s.terminate - * p s.eos? # => true */ static VALUE strscan_eos_p(VALUE self) @@ -832,24 +1551,18 @@ strscan_eos_p(VALUE self) } /* - * Equivalent to #eos?. - * This method is obsolete, use #eos? instead. - */ -static VALUE -strscan_empty_p(VALUE self) -{ - rb_warning("StringScanner#empty? is obsolete; use #eos? instead"); - return strscan_eos_p(self); -} - -/* - * Returns true iff there is more data in the string. See #eos?. - * This method is obsolete; use #eos? instead. + * call-seq: + * rest? + * + * Returns true if and only if there is more data in the string. See #eos?. * * s = StringScanner.new('test string') - * s.eos? # These two - * s.rest? # are opposites. + * # These two are opposites + * s.eos? # => false + * s.rest? # => true */ + + /* :nodoc: */ static VALUE strscan_rest_p(VALUE self) { @@ -860,13 +1573,26 @@ strscan_rest_p(VALUE self) } /* - * Returns +true+ iff the last match was successful. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * matched? -> true or false + * + * Returns `true` of the most recent [match attempt][17] was successful, + * `false` otherwise; + * see [Basic Matched Values][18]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.matched? # => false + * scanner.pos = 3 + * scanner.exist?(/baz/) # => 6 + * scanner.matched? # => true + * scanner.exist?(/nope/) # => nil + * scanner.matched? # => false + * ``` * - * s = StringScanner.new('test string') - * s.match?(/\w+/) # => 4 - * s.matched? # => true - * s.match?(/\d+/) # => nil - * s.matched? # => false */ static VALUE strscan_matched_p(VALUE self) @@ -878,11 +1604,27 @@ strscan_matched_p(VALUE self) } /* - * Returns the last matched string. - * - * s = StringScanner.new('test string') - * s.match?(/\w+/) # -> 4 - * s.matched # -> "test" + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * matched -> matched_substring or nil + * + * Returns the matched substring from the most recent [match][17] attempt + * if it was successful, + * or `nil` otherwise; + * see [Basic Matched Values][18]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.matched # => nil + * scanner.pos = 3 + * scanner.match?(/bar/) # => 3 + * scanner.matched # => "bar" + * scanner.match?(/nope/) # => nil + * scanner.matched # => nil + * ``` + * */ static VALUE strscan_matched(VALUE self) @@ -891,20 +1633,35 @@ strscan_matched(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); } /* - * Returns the size of the most recent match (see #matched), or +nil+ if there - * was no recent match. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * matched_size -> substring_size or nil + * + * Returns the size (in bytes) of the matched substring + * from the most recent match [match attempt][17] if it was successful, + * or `nil` otherwise; + * see [Basic Matched Values][18]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.matched_size # => nil + * + * pos = 3 + * scanner.exist?(/baz/) # => 9 + * scanner.matched_size # => 3 + * + * scanner.exist?(/nope/) # => nil + * scanner.matched_size # => nil + * ``` * - * s = StringScanner.new('test string') - * s.check /\w+/ # -> "test" - * s.matched_size # -> 4 - * s.check /\d+/ # -> nil - * s.matched_size # -> nil */ static VALUE strscan_matched_size(VALUE self) @@ -913,34 +1670,127 @@ strscan_matched_size(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; + return LONG2NUM(p->regs.end[0] - p->regs.beg[0]); +} - return INT2NUM(p->regs.end[0] - p->regs.beg[0]); +static int +name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc) +{ + if (RTEST(regexp)) { + int num = onig_name_to_backref_number(RREGEXP_PTR(regexp), + (const unsigned char* )name, + (const unsigned char* )name_end, + regs); + if (num >= 1) { + return num; + } + } + rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s", + rb_long2int(name_end - name), name); } /* - * Equivalent to #matched_size. - * This method is obsolete; use #matched_size instead. + * Resolve capture group index from Integer, Symbol, or String. + * Returns the resolved register index, or -1 if unmatched/out of range. + * For Symbol/String specifiers, raises IndexError if the named group + * does not exist. */ -static VALUE -strscan_matchedsize(VALUE self) +static long +resolve_capture_index(struct strscanner *p, VALUE specifier) { - rb_warning("StringScanner#matchedsize is obsolete; use #matched_size instead"); - return strscan_matched_size(self); + const char *name; + long i; + if (! MATCHED_P(p)) return -1; + switch (TYPE(specifier)) { + case T_SYMBOL: + specifier = rb_sym2str(specifier); + /* fall through */ + case T_STRING: + RSTRING_GETMEM(specifier, name, i); + i = name_to_backref_number(&(p->regs), p->regex, name, name + i, + rb_enc_get(specifier)); + break; + default: + i = NUM2LONG(specifier); + } + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return -1; + if (i >= p->regs.num_regs) return -1; + if (p->regs.beg[i] == -1) return -1; + return i; } /* - * call-seq: [](n) - * - * Return the n-th subgroup in the most recent match. - * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 " - * s[0] # -> "Fri Dec 12 " - * s[1] # -> "Fri" - * s[2] # -> "Dec" - * s[3] # -> "12" - * s.post_match # -> "1975 14:39" - * s.pre_match # -> "" + * + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * [](specifier) -> substring or nil + * + * Returns a captured substring or `nil`; + * see [Captured Match Values][13]. + * + * When there are captures: + * + * ```rb + * scanner = StringScanner.new('Fri Dec 12 1975 14:39') + * scanner.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) + * ``` + * + * - `specifier` zero: returns the entire matched substring: + * + * ```rb + * scanner[0] # => "Fri Dec 12 " + * scanner.pre_match # => "" + * scanner.post_match # => "1975 14:39" + * ``` + * + * - `specifier` positive integer. returns the `n`th capture, or `nil` if out of range: + * + * ```rb + * scanner[1] # => "Fri" + * scanner[2] # => "Dec" + * scanner[3] # => "12" + * scanner[4] # => nil + * ``` + * + * - `specifier` negative integer. counts backward from the last subgroup: + * + * ```rb + * scanner[-1] # => "12" + * scanner[-4] # => "Fri Dec 12 " + * scanner[-5] # => nil + * ``` + * + * - `specifier` symbol or string. returns the named subgroup, or `nil` if no such: + * + * ```rb + * scanner[:wday] # => "Fri" + * scanner['wday'] # => "Fri" + * scanner[:month] # => "Dec" + * scanner[:day] # => "12" + * scanner[:nope] # => nil + * ``` + * + * When there are no captures, only `[0]` returns non-`nil`: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.exist?(/bar/) + * scanner[0] # => "bar" + * scanner[1] # => nil + * ``` + * + * For a failed match, even `[0]` returns `nil`: + * + * ```rb + * scanner.scan(/nope/) # => nil + * scanner[0] # => nil + * scanner[1] # => nil + * ``` + * */ static VALUE strscan_aref(VALUE self, VALUE idx) @@ -949,27 +1799,240 @@ strscan_aref(VALUE self, VALUE idx) long i; GET_SCANNER(self, p); - if (! MATCHED_P(p)) return Qnil; - - i = NUM2LONG(idx); + i = resolve_capture_index(p, idx); + if (i < 0) return Qnil; + + return extract_range(p, + adjust_register_position(p, p->regs.beg[i]), + adjust_register_position(p, p->regs.end[i])); +} + +/* + * :markup: markdown + * + * call-seq: + * integer_at(specifier, base=10) -> integer or nil + * + * Returns the captured substring at the given `specifier` as an Integer, + * following the behavior of `String#to_i(base)`. + * + * `specifier` can be an Integer (positive, negative, or zero), a Symbol, + * or a String for named capture groups. + * + * Returns `nil` if: + * - No match has been performed or the last match failed + * - The `specifier` is an Integer and is out of range + * - The group at `specifier` did not participate in the match + * + * Raises IndexError if `specifier` is a Symbol or String that does not + * correspond to a named capture group, consistent with + * `StringScanner#[]`. + * + * This is semantically equivalent to `self[specifier]&.to_i(base)` + * but avoids the allocation of a temporary String when possible. + * + * ```rb + * scanner = StringScanner.new("2024-06-15") + * scanner.scan(/(\d{4})-(\d{2})-(\d{2})/) + * scanner.integer_at(1) # => 2024 + * scanner.integer_at(1, 16) # => 8228 + * ``` + */ +static VALUE +strscan_integer_at(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + long i; + long beg, end, len; + const char *ptr; + VALUE rb_specifier; + VALUE rb_base; + int base = 10; + + GET_SCANNER(self, p); + rb_scan_args(argc, argv, "11", &rb_specifier, &rb_base); + if (argc > 1) + base = NUM2INT(rb_base); + i = resolve_capture_index(p, rb_specifier); if (i < 0) - i += p->regs.num_regs; - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; - if (p->regs.beg[i] == -1) return Qnil; + return Qnil; - return extract_range(p, p->prev + p->regs.beg[i], - p->prev + p->regs.end[i]); + beg = adjust_register_position(p, p->regs.beg[i]); + end = adjust_register_position(p, p->regs.end[i]); + len = end - beg; + ptr = S_PBEG(p) + beg; +#ifdef HAVE_RB_INT_PARSE_CSTR + { + /* + * Ruby 2.5 or later export the rb_int_parse_cstr() symbol but + * prototype definition isn't provided. Ruby 4.1 or later + * provide prototype definition. + */ +# ifndef RB_INT_PARSE_DEFAULT + VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp, + size_t *ndigits, int base, int flags); +# define RB_INT_PARSE_DEFAULT 0x07 +# endif + char *endp; + return rb_int_parse_cstr(ptr, len, &endp, NULL, base, + RB_INT_PARSE_DEFAULT); + } +#else + return rb_str_to_inum(rb_str_new(ptr, len), base, 0); +#endif } /* - * Return the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * size -> captures_count + * + * Returns the count of captures if the most recent match attempt succeeded, `nil` otherwise; + * see [Captures Match Values][13]: + * + * ```rb + * scanner = StringScanner.new('Fri Dec 12 1975 14:39') + * scanner.size # => nil + * + * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) / + * scanner.match?(pattern) + * scanner.values_at(*0..scanner.size) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil] + * scanner.size # => 4 + * + * scanner.match?(/nope/) # => nil + * scanner.size # => nil + * ``` + * + */ +static VALUE +strscan_size(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + return INT2FIX(p->regs.num_regs); +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * captures -> substring_array or nil + * + * Returns the array of [captured match values][13] at indexes `(1..)` + * if the most recent match attempt succeeded, or `nil` otherwise: + * + * ```rb + * scanner = StringScanner.new('Fri Dec 12 1975 14:39') + * scanner.captures # => nil + * + * scanner.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) + * scanner.captures # => ["Fri", "Dec", "12"] + * scanner.values_at(*0..4) # => ["Fri Dec 12 ", "Fri", "Dec", "12", nil] + * + * scanner.exist?(/Fri/) + * scanner.captures # => [] + * + * scanner.scan(/nope/) + * scanner.captures # => nil + * ``` + * + */ +static VALUE +strscan_captures(VALUE self) +{ + struct strscanner *p; + int i, num_regs; + VALUE new_ary; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + + num_regs = p->regs.num_regs; + new_ary = rb_ary_new2(num_regs); + + for (i = 1; i < num_regs; i++) { + VALUE str; + if (p->regs.beg[i] == -1) + str = Qnil; + else + str = extract_range(p, + adjust_register_position(p, p->regs.beg[i]), + adjust_register_position(p, p->regs.end[i])); + rb_ary_push(new_ary, str); + } + + return new_ary; +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * values_at(*specifiers) -> array_of_captures or nil + * + * Returns an array of captured substrings, or `nil` of none. + * + * For each `specifier`, the returned substring is `[specifier]`; + * see #[]. + * + * ```rb + * scanner = StringScanner.new('Fri Dec 12 1975 14:39') + * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) / + * scanner.match?(pattern) + * scanner.values_at(*0..3) # => ["Fri Dec 12 ", "Fri", "Dec", "12"] + * scanner.values_at(*%i[wday month day]) # => ["Fri", "Dec", "12"] + * ``` + * + */ + +static VALUE +strscan_values_at(int argc, VALUE *argv, VALUE self) +{ + struct strscanner *p; + long i; + VALUE new_ary; + + GET_SCANNER(self, p); + if (! MATCHED_P(p)) return Qnil; + + new_ary = rb_ary_new2(argc); + for (i = 0; i<argc; i++) { + rb_ary_push(new_ary, strscan_aref(self, argv[i])); + } + + return new_ary; +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * pre_match -> substring + * + * Returns the substring that precedes the matched substring + * from the most recent match attempt if it was successful, + * or `nil` otherwise; + * see [Basic Match Values][18]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.pre_match # => nil + * + * scanner.pos = 3 + * scanner.exist?(/baz/) # => 6 + * scanner.pre_match # => "foobar" # Substring of entire string, not just target string. + * + * scanner.exist?(/nope/) # => nil + * scanner.pre_match # => nil + * ``` * - * s = StringScanner.new('test string') - * s.scan(/\w+/) # -> "test" - * s.scan(/\s+/) # -> " " - * s.pre_match # -> "test" - * s.post_match # -> "string" */ static VALUE strscan_pre_match(VALUE self) @@ -978,18 +2041,35 @@ strscan_pre_match(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - - return extract_range(p, 0, p->prev + p->regs.beg[0]); + return extract_range(p, + 0, + adjust_register_position(p, p->regs.beg[0])); } /* - * Return the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * post_match -> substring + * + * Returns the substring that follows the matched substring + * from the most recent match attempt if it was successful, + * or `nil` otherwise; + * see [Basic Match Values][18]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.post_match # => nil + * + * scanner.pos = 3 + * scanner.match?(/bar/) # => 3 + * scanner.post_match # => "baz" + * + * scanner.match?(/nope/) # => nil + * scanner.post_match # => nil + * ``` * - * s = StringScanner.new('test string') - * s.scan(/\w+/) # -> "test" - * s.scan(/\s+/) # -> " " - * s.pre_match # -> "test" - * s.post_match # -> "string" */ static VALUE strscan_post_match(VALUE self) @@ -998,13 +2078,30 @@ strscan_post_match(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - - return extract_range(p, p->prev + p->regs.end[0], S_LEN(p)); + return extract_range(p, + adjust_register_position(p, p->regs.end[0]), + S_LEN(p)); } /* - * Returns the "rest" of the string (i.e. everything after the scan pointer). - * If there is no more data (eos? = true), it returns <tt>""</tt>. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * rest -> target_substring + * + * Returns the 'rest' of the [stored string][1] (all after the current [position][2]), + * which is the [target substring][3]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.rest # => "foobarbaz" + * scanner.pos = 3 + * scanner.rest # => "barbaz" + * scanner.terminate + * scanner.rest # => "" + * ``` + * */ static VALUE strscan_rest(VALUE self) @@ -1013,13 +2110,32 @@ strscan_rest(VALUE self) GET_SCANNER(self, p); if (EOS_P(p)) { - return infect(rb_str_new("", 0), p); + return str_new(p, "", 0); } return extract_range(p, p->curr, S_LEN(p)); } /* - * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>. + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * rest_size -> integer + * + * Returns the size (in bytes) of the #rest of the [stored string][1]: + * + * ```rb + * scanner = StringScanner.new('foobarbaz') + * scanner.rest # => "foobarbaz" + * scanner.rest_size # => 9 + * scanner.pos = 3 + * scanner.rest # => "barbaz" + * scanner.rest_size # => 6 + * scanner.terminate + * scanner.rest # => "" + * scanner.rest_size # => 0 + * ``` + * */ static VALUE strscan_rest_size(VALUE self) @@ -1031,110 +2147,202 @@ strscan_rest_size(VALUE self) if (EOS_P(p)) { return INT2FIX(0); } - - i = S_LEN(p) - p->curr; + i = S_RESTLEN(p); return INT2FIX(i); } -/* - * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>. - * This method is obsolete; use #rest_size instead. - */ -static VALUE -strscan_restsize(VALUE self) -{ - rb_warning("StringScanner#restsize is obsolete; use #rest_size instead"); - return strscan_rest_size(self); -} - #define INSPECT_LENGTH 5 -#define BUFSIZE 256 /* - * Returns a string that represents the StringScanner object, showing: - * - the current position - * - the size of the string - * - the characters surrounding the scan pointer - * - * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">' - * s.scan_until /12/ # -> "Fri Dec 12" - * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">' + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * inspect -> string + * + * Returns a string representation of `self` that may show: + * + * 1. The current [position][2]. + * 2. The size (in bytes) of the [stored string][1]. + * 3. The substring preceding the current position. + * 4. The substring following the current position (which is also the [target substring][3]). + * + * ```rb + * scanner = StringScanner.new("Fri Dec 12 1975 14:39") + * scanner.pos = 11 + * scanner.inspect # => "#<StringScanner 11/21 \"...c 12 \" @ \"1975 ...\">" + * ``` + * + * If at beginning-of-string, item 4 above (following substring) is omitted: + * + * ```rb + * scanner.reset + * scanner.inspect # => "#<StringScanner 0/21 @ \"Fri D...\">" + * ``` + * + * If at end-of-string, all items above are omitted: + * + * ```rb + * scanner.terminate + * scanner.inspect # => "#<StringScanner fin>" + * ``` + * */ static VALUE strscan_inspect(VALUE self) { struct strscanner *p; - char buf[BUFSIZE]; - long len; VALUE a, b; - Data_Get_Struct(self, struct strscanner, p); + p = check_strscan(self); if (NIL_P(p->str)) { - len = snprintf(buf, BUFSIZE, "#<%s (uninitialized)>", - rb_class2name(CLASS_OF(self))); - return infect(rb_str_new(buf, len), p); + a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self)); + return a; } if (EOS_P(p)) { - len = snprintf(buf, BUFSIZE, "#<%s fin>", - rb_class2name(CLASS_OF(self))); - return infect(rb_str_new(buf, len), p); + a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self)); + return a; } if (p->curr == 0) { - b = inspect2(p); - len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld @ %s>", - rb_class2name(CLASS_OF(self)), - p->curr, S_LEN(p), - RSTRING(b)->ptr); - return infect(rb_str_new(buf, len), p); + b = inspect2(p); + a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">", + rb_obj_class(self), + p->curr, S_LEN(p), + b); + return a; } a = inspect1(p); b = inspect2(p); - len = snprintf(buf, BUFSIZE, "#<%s %ld/%ld %s @ %s>", - rb_class2name(CLASS_OF(self)), - p->curr, S_LEN(p), - RSTRING(a)->ptr, - RSTRING(b)->ptr); - return infect(rb_str_new(buf, len), p); + a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">", + rb_obj_class(self), + p->curr, S_LEN(p), + a, b); + return a; } static VALUE inspect1(struct strscanner *p) { - char buf[BUFSIZE]; - char *bp = buf; + VALUE str; long len; if (p->curr == 0) return rb_str_new2(""); if (p->curr > INSPECT_LENGTH) { - strcpy(bp, "..."); bp += 3; - len = INSPECT_LENGTH; + str = rb_str_new_cstr("..."); + len = INSPECT_LENGTH; } else { - len = p->curr; + str = rb_str_new(0, 0); + len = p->curr; } - memcpy(bp, CURPTR(p) - len, len); bp += len; - return rb_str_dump(rb_str_new(buf, bp - buf)); + rb_str_cat(str, CURPTR(p) - len, len); + return rb_str_dump(str); } static VALUE inspect2(struct strscanner *p) { - char buf[BUFSIZE]; - char *bp = buf; + VALUE str; long len; if (EOS_P(p)) return rb_str_new2(""); - len = S_LEN(p) - p->curr; + len = S_RESTLEN(p); if (len > INSPECT_LENGTH) { - len = INSPECT_LENGTH; - memcpy(bp, CURPTR(p), len); bp += len; - strcpy(bp, "..."); bp += 3; + str = rb_str_new(CURPTR(p), INSPECT_LENGTH); + rb_str_cat2(str, "..."); } else { - memcpy(bp, CURPTR(p), len); bp += len; + str = rb_str_new(CURPTR(p), len); + } + return rb_str_dump(str); +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * fixed_anchor? -> true or false + * + * Returns whether the [fixed-anchor property][10] is set. + */ +static VALUE +strscan_fixed_anchor_p(VALUE self) +{ + struct strscanner *p; + p = check_strscan(self); + return p->fixed_anchor_p ? Qtrue : Qfalse; +} + +typedef struct { + VALUE self; + VALUE captures; +} named_captures_data; + +static int +named_captures_iter(const OnigUChar *name, + const OnigUChar *name_end, + int back_num, + int *back_refs, + OnigRegex regex, + void *arg) +{ + named_captures_data *data = arg; + + VALUE key = rb_str_new((const char *)name, name_end - name); + VALUE value = RUBY_Qnil; + int i; + for (i = 0; i < back_num; i++) { + VALUE v = strscan_aref(data->self, INT2NUM(back_refs[i])); + if (!RB_NIL_P(v)) { + value = v; + } } - return rb_str_dump(rb_str_new(buf, bp - buf)); + rb_hash_aset(data->captures, key, value); + return 0; +} + +/* + * :markup: markdown + * :include: strscan/link_refs.txt + * + * call-seq: + * named_captures -> hash + * + * Returns a hash of named captures for the most recent regexp match, + * or an empty hash if there are no named captures; + * see [Captured Match Values][13]: + * + * ```rb + * scanner = StringScanner.new('Fri Dec 12 1975 14:39') + * scanner.named_captures # => {} + * + * pattern = /(?<wday>\w+) (?<month>\w+) (?<day>\d+) / + * scanner.match?(pattern) + * scanner.named_captures # => {"wday"=>"Fri", "month"=>"Dec", "day"=>"12"} + * + * scanner.string = 'nope' + * scanner.match?(pattern) + * scanner.named_captures # => {"wday"=>nil, "month"=>nil, "day"=>nil} + * + * scanner.match?(/nosuch/) + * scanner.named_captures # => {} + * ``` + * + */ +static VALUE +strscan_named_captures(VALUE self) +{ + struct strscanner *p; + named_captures_data data; + GET_SCANNER(self, p); + data.self = self; + data.captures = rb_hash_new(); + if (!RB_NIL_P(p->regex)) { + onig_foreach_name(RREGEXP_PTR(p->regex), named_captures_iter, &data); + } + + return data.captures; } /* ======================================================================= @@ -1142,118 +2350,41 @@ inspect2(struct strscanner *p) ======================================================================= */ /* + * Document-class: StringScanner::Error + * + * The error class for StringScanner. + * See StringScanner#unscan. + */ + +/* * Document-class: StringScanner - * - * StringScanner provides for lexical scanning operations on a String. Here is - * an example of its usage: - * - * s = StringScanner.new('This is an example string') - * s.eos? # -> false - * - * p s.scan(/\w+/) # -> "This" - * p s.scan(/\w+/) # -> nil - * p s.scan(/\s+/) # -> " " - * p s.scan(/\s+/) # -> nil - * p s.scan(/\w+/) # -> "is" - * s.eos? # -> false - * - * p s.scan(/\s+/) # -> " " - * p s.scan(/\w+/) # -> "an" - * p s.scan(/\s+/) # -> " " - * p s.scan(/\w+/) # -> "example" - * p s.scan(/\s+/) # -> " " - * p s.scan(/\w+/) # -> "string" - * s.eos? # -> true - * - * p s.scan(/\s+/) # -> nil - * p s.scan(/\w+/) # -> nil - * - * Scanning a string means remembering the position of a <i>scan pointer</i>, - * which is just an index. The point of scanning is to move forward a bit at - * a time, so matches are sought after the scan pointer; usually immediately - * after it. - * - * Given the string "test string", here are the pertinent scan pointer - * positions: - * - * t e s t s t r i n g - * 0 1 2 ... 1 - * 0 - * - * When you #scan for a pattern (a regular expression), the match must occur - * at the character after the scan pointer. If you use #scan_until, then the - * match can occur anywhere after the scan pointer. In both cases, the scan - * pointer moves <i>just beyond</i> the last character of the match, ready to - * scan again from the next character onwards. This is demonstrated by the - * example above. - * - * == Method Categories - * - * There are other methods besides the plain scanners. You can look ahead in - * the string without actually scanning. You can access the most recent match. - * You can modify the string being scanned, reset or terminate the scanner, - * find out or change the position of the scan pointer, skip ahead, and so on. - * - * === Advancing the Scan Pointer - * - * - #getch - * - #get_byte - * - #scan - * - #scan_until - * - #skip - * - #skip_until - * - * === Looking Ahead - * - * - #check - * - #check_until - * - #exist? - * - #match? - * - #peek - * - * === Finding Where we Are - * - * - #beginning_of_line? (#bol?) - * - #eos? - * - #rest? - * - #rest_size - * - #pos - * - * === Setting Where we Are - * - * - #reset - * - #terminate - * - #pos= - * - * === Match Data - * - * - #matched - * - #matched? - * - #matched_size - * - [] - * - #pre_match - * - #post_match - * - * === Miscellaneous - * - * - << - * - #concat - * - #string - * - #string= - * - #unscan - * - * There are aliases to several of the methods. + * + * :markup: markdown + * + * :include: strscan/link_refs.txt + * :include: strscan/strscan.md + * */ void Init_strscan(void) { +#ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif + +#undef rb_intern ID id_scanerr = rb_intern("ScanError"); VALUE tmp; + usascii_encindex = rb_usascii_encindex(); + utf8_encindex = rb_utf8_encindex(); + binary_encindex = rb_ascii8bit_encindex(); + StringScanner = rb_define_class("StringScanner", rb_cObject); ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError); if (!rb_const_defined(rb_cObject, id_scanerr)) { rb_const_set(rb_cObject, id_scanerr, ScanError); + rb_deprecate_constant(rb_cObject, "ScanError"); } tmp = rb_str_new2(STRSCAN_VERSION); rb_obj_freeze(tmp); @@ -1261,20 +2392,21 @@ Init_strscan(void) tmp = rb_str_new2("$Id$"); rb_obj_freeze(tmp); rb_const_set(StringScanner, rb_intern("Id"), tmp); - + rb_deprecate_constant(StringScanner, "Id"); + rb_define_alloc_func(StringScanner, strscan_s_allocate); rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1); rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1); rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0); rb_define_method(StringScanner, "reset", strscan_reset, 0); rb_define_method(StringScanner, "terminate", strscan_terminate, 0); - rb_define_method(StringScanner, "clear", strscan_clear, 0); rb_define_method(StringScanner, "string", strscan_get_string, 0); rb_define_method(StringScanner, "string=", strscan_set_string, 1); rb_define_method(StringScanner, "concat", strscan_concat, 1); rb_define_method(StringScanner, "<<", strscan_concat, 1); rb_define_method(StringScanner, "pos", strscan_get_pos, 0); rb_define_method(StringScanner, "pos=", strscan_set_pos, 1); + rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0); rb_define_method(StringScanner, "pointer", strscan_get_pos, 0); rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1); @@ -1292,29 +2424,37 @@ Init_strscan(void) rb_define_method(StringScanner, "getch", strscan_getch, 0); rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); - rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); + rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); rb_define_method(StringScanner, "peek", strscan_peek, 1); - rb_define_method(StringScanner, "peep", strscan_peep, 1); + rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); + + rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0); + rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0); rb_define_method(StringScanner, "unscan", strscan_unscan, 0); rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?")); rb_define_method(StringScanner, "eos?", strscan_eos_p, 0); - rb_define_method(StringScanner, "empty?", strscan_empty_p, 0); rb_define_method(StringScanner, "rest?", strscan_rest_p, 0); rb_define_method(StringScanner, "matched?", strscan_matched_p, 0); rb_define_method(StringScanner, "matched", strscan_matched, 0); rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0); - rb_define_method(StringScanner, "matchedsize", strscan_matchedsize, 0); rb_define_method(StringScanner, "[]", strscan_aref, 1); + rb_define_method(StringScanner, "integer_at", strscan_integer_at, -1); rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0); rb_define_method(StringScanner, "post_match", strscan_post_match, 0); + rb_define_method(StringScanner, "size", strscan_size, 0); + rb_define_method(StringScanner, "captures", strscan_captures, 0); + rb_define_method(StringScanner, "values_at", strscan_values_at, -1); rb_define_method(StringScanner, "rest", strscan_rest, 0); rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0); - rb_define_method(StringScanner, "restsize", strscan_restsize, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0); + + rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0); + + rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0); } diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec new file mode 100644 index 0000000000..a51285fa7e --- /dev/null +++ b/ext/strscan/strscan.gemspec @@ -0,0 +1,49 @@ +# frozen_string_literal: true +# +source_version = ["", "ext/strscan/"].find do |dir| + begin + break File.open(File.join(__dir__, "#{dir}strscan.c")) {|f| + f.gets("\n#define STRSCAN_VERSION ") + f.gets[/\s*"(.+)"/, 1] + } + rescue Errno::ENOENT + end +end + +Gem::Specification.new do |s| + s.name = "strscan" + s.version = source_version + s.summary = "Provides lexical scanning operations on a String." + s.description = "Provides lexical scanning operations on a String." + + files = %w[ + COPYING + LICENSE.txt + lib/strscan.rb + lib/strscan/strscan.rb + lib/strscan/truffleruby.rb + ] + + s.require_paths = %w{lib} + + if RUBY_ENGINE == "jruby" + files << "lib/strscan.jar" + s.platform = "java" + else + files << "ext/strscan/extconf.rb" + files << "ext/strscan/strscan.c" + s.rdoc_options << "-idoc" + s.extra_rdoc_files = [ + ".rdoc_options", + *Dir.glob("doc/strscan/**/*") + ] + s.extensions = %w{ext/strscan/extconf.rb} + end + s.files = files + s.required_ruby_version = ">= 2.4.0" + + s.authors = ["Minero Aoki", "Sutou Kouhei", "Charles Oliver Nutter"] + s.email = [nil, "kou@cozmixng.org", "headius@headius.com"] + s.homepage = "https://github.com/ruby/strscan" + s.licenses = ["Ruby", "BSD-2-Clause"] +end |
