diff options
Diffstat (limited to 'ext/strscan')
-rw-r--r-- | ext/strscan/depend | 4 | ||||
-rw-r--r-- | ext/strscan/extconf.rb | 11 | ||||
-rw-r--r-- | ext/strscan/strscan.c | 251 | ||||
-rw-r--r-- | ext/strscan/strscan.gemspec | 23 |
4 files changed, 225 insertions, 64 deletions
diff --git a/ext/strscan/depend b/ext/strscan/depend index 8fe3cb23d9..8dbae206d4 100644 --- a/ext/strscan/depend +++ b/ext/strscan/depend @@ -52,6 +52,7 @@ strscan.o: $(hdrdir)/ruby/internal/attr/noexcept.h strscan.o: $(hdrdir)/ruby/internal/attr/noinline.h strscan.o: $(hdrdir)/ruby/internal/attr/nonnull.h strscan.o: $(hdrdir)/ruby/internal/attr/noreturn.h +strscan.o: $(hdrdir)/ruby/internal/attr/packed_struct.h strscan.o: $(hdrdir)/ruby/internal/attr/pure.h strscan.o: $(hdrdir)/ruby/internal/attr/restrict.h strscan.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h @@ -121,7 +122,6 @@ strscan.o: $(hdrdir)/ruby/internal/intern/enumerator.h strscan.o: $(hdrdir)/ruby/internal/intern/error.h strscan.o: $(hdrdir)/ruby/internal/intern/eval.h strscan.o: $(hdrdir)/ruby/internal/intern/file.h -strscan.o: $(hdrdir)/ruby/internal/intern/gc.h strscan.o: $(hdrdir)/ruby/internal/intern/hash.h strscan.o: $(hdrdir)/ruby/internal/intern/io.h strscan.o: $(hdrdir)/ruby/internal/intern/load.h @@ -152,12 +152,12 @@ strscan.o: $(hdrdir)/ruby/internal/memory.h strscan.o: $(hdrdir)/ruby/internal/method.h strscan.o: $(hdrdir)/ruby/internal/module.h strscan.o: $(hdrdir)/ruby/internal/newobj.h -strscan.o: $(hdrdir)/ruby/internal/rgengc.h strscan.o: $(hdrdir)/ruby/internal/scan_args.h strscan.o: $(hdrdir)/ruby/internal/special_consts.h strscan.o: $(hdrdir)/ruby/internal/static_assert.h strscan.o: $(hdrdir)/ruby/internal/stdalign.h strscan.o: $(hdrdir)/ruby/internal/stdbool.h +strscan.o: $(hdrdir)/ruby/internal/stdckdint.h strscan.o: $(hdrdir)/ruby/internal/symbol.h strscan.o: $(hdrdir)/ruby/internal/value.h strscan.o: $(hdrdir)/ruby/internal/value_type.h diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index f0ecbf85d8..bd65606a4e 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -1,5 +1,10 @@ # frozen_string_literal: true require 'mkmf' -$INCFLAGS << " -I$(top_srcdir)" if $extmk -have_func("onig_region_memsize", "ruby.h") -create_makefile 'strscan' +if RUBY_ENGINE == 'ruby' + $INCFLAGS << " -I$(top_srcdir)" if $extmk + have_func("onig_region_memsize", "ruby.h") + have_func("rb_reg_onig_match", "ruby.h") + create_makefile 'strscan' +else + File.write('Makefile', dummy_makefile("").join) +end diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index e1426380b4..70a3ce5260 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs); #include <stdbool.h> -#define STRSCAN_VERSION "3.0.1" +#define STRSCAN_VERSION "3.1.1" /* ======================================================================= Data Type Definitions @@ -435,11 +435,11 @@ strscan_get_pos(VALUE self) * * In short, it's a 0-based index into the string. * - * s = StringScanner.new("abcädeföghi") - * s.charpos # -> 0 - * s.scan_until(/ä/) # -> "abcä" - * s.pos # -> 5 - * s.charpos # -> 4 + * s = StringScanner.new("abc\u00e4def\u00f6ghi") + * s.charpos # -> 0 + * s.scan_until(/\u00e4/) # -> "abc\u00E4" + * s.pos # -> 5 + * s.charpos # -> 4 */ static VALUE strscan_get_charpos(VALUE self) @@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position) } } +/* rb_reg_onig_match is available in Ruby 3.3 and later. */ +#ifndef HAVE_RB_REG_ONIG_MATCH +static OnigPosition +rb_reg_onig_match(VALUE re, VALUE str, + OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args), + void *args, struct re_registers *regs) +{ + regex_t *reg = rb_reg_prepare_re(re, str); + + bool tmpreg = reg != RREGEXP_PTR(re); + if (!tmpreg) RREGEXP(re)->usecnt++; + + OnigPosition result = match(reg, str, regs, args); + + if (!tmpreg) RREGEXP(re)->usecnt--; + if (tmpreg) { + if (RREGEXP(re)->usecnt) { + onig_free(reg); + } + else { + onig_free(RREGEXP_PTR(re)); + RREGEXP_PTR(re) = reg; + } + } + + if (result < 0) { + if (result != ONIG_MISMATCH) { + rb_raise(ScanError, "regexp buffer overflow"); + } + } + + return result; +} +#endif + +static OnigPosition +strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_match(reg, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + regs, + ONIG_OPTION_NONE); +} + +static OnigPosition +strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr) +{ + struct strscanner *p = (struct strscanner *)args_ptr; + + return onig_search(reg, + match_target(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + (UChar *)CURPTR(p), + (UChar *)(CURPTR(p) + S_RESTLEN(p)), + regs, + ONIG_OPTION_NONE); +} + static VALUE strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) { @@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly } if (RB_TYPE_P(pattern, T_REGEXP)) { - regex_t *rb_reg_prepare_re(VALUE re, VALUE str); - regex_t *re; - long ret; - int tmpreg; - p->regex = pattern; - re = rb_reg_prepare_re(pattern, p->str); - tmpreg = re != RREGEXP_PTR(pattern); - if (!tmpreg) RREGEXP(pattern)->usecnt++; - - if (headonly) { - ret = onig_match(re, - match_target(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), - &(p->regs), - ONIG_OPTION_NONE); - } - else { - ret = onig_search(re, - match_target(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - &(p->regs), - ONIG_OPTION_NONE); - } - if (!tmpreg) RREGEXP(pattern)->usecnt--; - if (tmpreg) { - if (RREGEXP(pattern)->usecnt) { - onig_free(re); - } - else { - onig_free(RREGEXP_PTR(pattern)); - RREGEXP_PTR(pattern) = re; - } - } + OnigPosition ret = rb_reg_onig_match(pattern, + p->str, + headonly ? strscan_match : strscan_search, + (void *)p, + &(p->regs)); - if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); - if (ret < 0) { - /* not matched */ + if (ret == ONIG_MISMATCH) { return Qnil; } } @@ -874,6 +903,57 @@ strscan_getch(VALUE self) } /* + * Scans one byte and returns it as an integer. + * This method is not multibyte character sensitive. + * See also: #getch. + * + * s = StringScanner.new('ab') + * s.scan_byte # => 97 + * s.scan_byte # => 98 + * s.scan_byte # => nil + * + * s = StringScanner.new("\244\242".force_encoding("euc-jp")) + * s.scan_byte # => 0xA4 + * s.scan_byte # => 0xA2 + * s.scan_byte # => nil + */ +static VALUE +strscan_scan_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + if (EOS_P(p)) + return Qnil; + + VALUE byte = INT2FIX((unsigned char)*CURPTR(p)); + p->prev = p->curr; + p->curr++; + MATCHED(p); + adjust_registers_to_matched(p); + return byte; +} + +/* + * Peeks at the current byte and returns it as an integer. + * + * s = StringScanner.new('ab') + * s.peek_byte # => 97 + */ +static VALUE +strscan_peek_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (EOS_P(p)) + return Qnil; + + return INT2FIX((unsigned char)*CURPTR(p)); +} + +/* * Scans one byte and returns it. * This method is not multibyte character sensitive. * See also: #getch. @@ -1038,8 +1118,9 @@ strscan_empty_p(VALUE self) * This method is obsolete; use #eos? instead. * * s = StringScanner.new('test string') - * s.eos? # These two - * s.rest? # are opposites. + * # These two are opposites + * s.eos? # => false + * s.rest? # => true */ static VALUE strscan_rest_p(VALUE self) @@ -1213,10 +1294,10 @@ strscan_size(VALUE self) * If nothing was priorly matched, it returns nil. * * s = StringScanner.new("Fri Dec 12 1975 14:39") - * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 " - * s.captures # -> ["Fri", "Dec", "12"] - * s.scan(/(\w+) (\w+) (\d+) /) # -> nil - * s.captures # -> nil + * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 " + * s.captures # -> ["Fri", "Dec", "12", nil] + * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil + * s.captures # -> nil */ static VALUE strscan_captures(VALUE self) @@ -1232,9 +1313,13 @@ strscan_captures(VALUE self) new_ary = rb_ary_new2(num_regs); for (i = 1; i < num_regs; i++) { - VALUE str = extract_range(p, - adjust_register_position(p, p->regs.beg[i]), - adjust_register_position(p, p->regs.end[i])); + VALUE str; + if (p->regs.beg[i] == -1) + str = Qnil; + else + str = extract_range(p, + adjust_register_position(p, p->regs.beg[i]), + adjust_register_position(p, p->regs.end[i])); rb_ary_push(new_ary, str); } @@ -1458,6 +1543,56 @@ strscan_fixed_anchor_p(VALUE self) return p->fixed_anchor_p ? Qtrue : Qfalse; } +typedef struct { + VALUE self; + VALUE captures; +} named_captures_data; + +static int +named_captures_iter(const OnigUChar *name, + const OnigUChar *name_end, + int back_num, + int *back_refs, + OnigRegex regex, + void *arg) +{ + named_captures_data *data = arg; + + VALUE key = rb_str_new((const char *)name, name_end - name); + VALUE value = RUBY_Qnil; + int i; + for (i = 0; i < back_num; i++) { + value = strscan_aref(data->self, INT2NUM(back_refs[i])); + } + rb_hash_aset(data->captures, key, value); + return 0; +} + +/* + * call-seq: + * scanner.named_captures -> hash + * + * Returns a hash of string variables matching the regular expression. + * + * scan = StringScanner.new('foobarbaz') + * scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/) + * scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"} + */ +static VALUE +strscan_named_captures(VALUE self) +{ + struct strscanner *p; + GET_SCANNER(self, p); + named_captures_data data; + data.self = self; + data.captures = rb_hash_new(); + if (!RB_NIL_P(p->regex)) { + onig_foreach_name(RREGEXP_PTR(p->regex), named_captures_iter, &data); + } + + return data.captures; +} + /* ======================================================================= Ruby Interface ======================================================================= */ @@ -1468,6 +1603,8 @@ strscan_fixed_anchor_p(VALUE self) * StringScanner provides for lexical scanning operations on a String. Here is * an example of its usage: * + * require 'strscan' + * * s = StringScanner.new('This is an example string') * s.eos? # -> false * @@ -1519,6 +1656,7 @@ strscan_fixed_anchor_p(VALUE self) * * - #getch * - #get_byte + * - #scan_byte * - #scan * - #scan_until * - #skip @@ -1531,6 +1669,7 @@ strscan_fixed_anchor_p(VALUE self) * - #exist? * - #match? * - #peek + * - #peek_byte * * === Finding Where we Are * @@ -1622,7 +1761,9 @@ Init_strscan(void) rb_define_method(StringScanner, "getch", strscan_getch, 0); rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); + rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); rb_define_method(StringScanner, "peek", strscan_peek, 1); + rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); rb_define_method(StringScanner, "peep", strscan_peep, 1); rb_define_method(StringScanner, "unscan", strscan_unscan, 0); @@ -1650,4 +1791,6 @@ Init_strscan(void) rb_define_method(StringScanner, "inspect", strscan_inspect, 0); rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0); + + rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0); } diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec index 5d8119ea4c..8a61c7abe6 100644 --- a/ext/strscan/strscan.gemspec +++ b/ext/strscan/strscan.gemspec @@ -16,13 +16,26 @@ Gem::Specification.new do |s| s.summary = "Provides lexical scanning operations on a String." s.description = "Provides lexical scanning operations on a String." - s.require_path = %w{lib} - s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c} - s.extensions = %w{ext/strscan/extconf.rb} + files = [ + "COPYING", + "LICENSE.txt", + ] + if RUBY_ENGINE == "jruby" + s.require_paths = %w{ext/jruby/lib lib} + files << "ext/jruby/lib/strscan.rb" + files << "lib/strscan.jar" + s.platform = "java" + else + s.require_paths = %w{lib} + files << "ext/strscan/extconf.rb" + files << "ext/strscan/strscan.c" + s.extensions = %w{ext/strscan/extconf.rb} + end + s.files = files s.required_ruby_version = ">= 2.4.0" - s.authors = ["Minero Aoki", "Sutou Kouhei"] - s.email = [nil, "kou@cozmixng.org"] + s.authors = ["Minero Aoki", "Sutou Kouhei", "Charles Oliver Nutter"] + s.email = [nil, "kou@cozmixng.org", "headius@headius.com"] s.homepage = "https://github.com/ruby/strscan" s.licenses = ["Ruby", "BSD-2-Clause"] end |