summaryrefslogtreecommitdiff
path: root/ext/strscan
diff options
context:
space:
mode:
Diffstat (limited to 'ext/strscan')
-rw-r--r--ext/strscan/depend4
-rw-r--r--ext/strscan/extconf.rb11
-rw-r--r--ext/strscan/strscan.c251
-rw-r--r--ext/strscan/strscan.gemspec23
4 files changed, 225 insertions, 64 deletions
diff --git a/ext/strscan/depend b/ext/strscan/depend
index 8fe3cb23d9..8dbae206d4 100644
--- a/ext/strscan/depend
+++ b/ext/strscan/depend
@@ -52,6 +52,7 @@ strscan.o: $(hdrdir)/ruby/internal/attr/noexcept.h
strscan.o: $(hdrdir)/ruby/internal/attr/noinline.h
strscan.o: $(hdrdir)/ruby/internal/attr/nonnull.h
strscan.o: $(hdrdir)/ruby/internal/attr/noreturn.h
+strscan.o: $(hdrdir)/ruby/internal/attr/packed_struct.h
strscan.o: $(hdrdir)/ruby/internal/attr/pure.h
strscan.o: $(hdrdir)/ruby/internal/attr/restrict.h
strscan.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h
@@ -121,7 +122,6 @@ strscan.o: $(hdrdir)/ruby/internal/intern/enumerator.h
strscan.o: $(hdrdir)/ruby/internal/intern/error.h
strscan.o: $(hdrdir)/ruby/internal/intern/eval.h
strscan.o: $(hdrdir)/ruby/internal/intern/file.h
-strscan.o: $(hdrdir)/ruby/internal/intern/gc.h
strscan.o: $(hdrdir)/ruby/internal/intern/hash.h
strscan.o: $(hdrdir)/ruby/internal/intern/io.h
strscan.o: $(hdrdir)/ruby/internal/intern/load.h
@@ -152,12 +152,12 @@ strscan.o: $(hdrdir)/ruby/internal/memory.h
strscan.o: $(hdrdir)/ruby/internal/method.h
strscan.o: $(hdrdir)/ruby/internal/module.h
strscan.o: $(hdrdir)/ruby/internal/newobj.h
-strscan.o: $(hdrdir)/ruby/internal/rgengc.h
strscan.o: $(hdrdir)/ruby/internal/scan_args.h
strscan.o: $(hdrdir)/ruby/internal/special_consts.h
strscan.o: $(hdrdir)/ruby/internal/static_assert.h
strscan.o: $(hdrdir)/ruby/internal/stdalign.h
strscan.o: $(hdrdir)/ruby/internal/stdbool.h
+strscan.o: $(hdrdir)/ruby/internal/stdckdint.h
strscan.o: $(hdrdir)/ruby/internal/symbol.h
strscan.o: $(hdrdir)/ruby/internal/value.h
strscan.o: $(hdrdir)/ruby/internal/value_type.h
diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb
index f0ecbf85d8..bd65606a4e 100644
--- a/ext/strscan/extconf.rb
+++ b/ext/strscan/extconf.rb
@@ -1,5 +1,10 @@
# frozen_string_literal: true
require 'mkmf'
-$INCFLAGS << " -I$(top_srcdir)" if $extmk
-have_func("onig_region_memsize", "ruby.h")
-create_makefile 'strscan'
+if RUBY_ENGINE == 'ruby'
+ $INCFLAGS << " -I$(top_srcdir)" if $extmk
+ have_func("onig_region_memsize", "ruby.h")
+ have_func("rb_reg_onig_match", "ruby.h")
+ create_makefile 'strscan'
+else
+ File.write('Makefile', dummy_makefile("").join)
+end
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index e1426380b4..70a3ce5260 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -22,7 +22,7 @@ extern size_t onig_region_memsize(const struct re_registers *regs);
#include <stdbool.h>
-#define STRSCAN_VERSION "3.0.1"
+#define STRSCAN_VERSION "3.1.1"
/* =======================================================================
Data Type Definitions
@@ -435,11 +435,11 @@ strscan_get_pos(VALUE self)
*
* In short, it's a 0-based index into the string.
*
- * s = StringScanner.new("abcädeföghi")
- * s.charpos # -> 0
- * s.scan_until(/ä/) # -> "abcä"
- * s.pos # -> 5
- * s.charpos # -> 4
+ * s = StringScanner.new("abc\u00e4def\u00f6ghi")
+ * s.charpos # -> 0
+ * s.scan_until(/\u00e4/) # -> "abc\u00E4"
+ * s.pos # -> 5
+ * s.charpos # -> 4
*/
static VALUE
strscan_get_charpos(VALUE self)
@@ -539,6 +539,68 @@ adjust_register_position(struct strscanner *p, long position)
}
}
+/* rb_reg_onig_match is available in Ruby 3.3 and later. */
+#ifndef HAVE_RB_REG_ONIG_MATCH
+static OnigPosition
+rb_reg_onig_match(VALUE re, VALUE str,
+ OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
+ void *args, struct re_registers *regs)
+{
+ regex_t *reg = rb_reg_prepare_re(re, str);
+
+ bool tmpreg = reg != RREGEXP_PTR(re);
+ if (!tmpreg) RREGEXP(re)->usecnt++;
+
+ OnigPosition result = match(reg, str, regs, args);
+
+ if (!tmpreg) RREGEXP(re)->usecnt--;
+ if (tmpreg) {
+ if (RREGEXP(re)->usecnt) {
+ onig_free(reg);
+ }
+ else {
+ onig_free(RREGEXP_PTR(re));
+ RREGEXP_PTR(re) = reg;
+ }
+ }
+
+ if (result < 0) {
+ if (result != ONIG_MISMATCH) {
+ rb_raise(ScanError, "regexp buffer overflow");
+ }
+ }
+
+ return result;
+}
+#endif
+
+static OnigPosition
+strscan_match(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
+{
+ struct strscanner *p = (struct strscanner *)args_ptr;
+
+ return onig_match(reg,
+ match_target(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ (UChar* )CURPTR(p),
+ regs,
+ ONIG_OPTION_NONE);
+}
+
+static OnigPosition
+strscan_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
+{
+ struct strscanner *p = (struct strscanner *)args_ptr;
+
+ return onig_search(reg,
+ match_target(p),
+ (UChar *)(CURPTR(p) + S_RESTLEN(p)),
+ (UChar *)CURPTR(p),
+ (UChar *)(CURPTR(p) + S_RESTLEN(p)),
+ regs,
+ ONIG_OPTION_NONE);
+}
+
static VALUE
strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
@@ -560,47 +622,14 @@ strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly
}
if (RB_TYPE_P(pattern, T_REGEXP)) {
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
- regex_t *re;
- long ret;
- int tmpreg;
-
p->regex = pattern;
- re = rb_reg_prepare_re(pattern, p->str);
- tmpreg = re != RREGEXP_PTR(pattern);
- if (!tmpreg) RREGEXP(pattern)->usecnt++;
-
- if (headonly) {
- ret = onig_match(re,
- match_target(p),
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p),
- &(p->regs),
- ONIG_OPTION_NONE);
- }
- else {
- ret = onig_search(re,
- match_target(p),
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p),
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- &(p->regs),
- ONIG_OPTION_NONE);
- }
- if (!tmpreg) RREGEXP(pattern)->usecnt--;
- if (tmpreg) {
- if (RREGEXP(pattern)->usecnt) {
- onig_free(re);
- }
- else {
- onig_free(RREGEXP_PTR(pattern));
- RREGEXP_PTR(pattern) = re;
- }
- }
+ OnigPosition ret = rb_reg_onig_match(pattern,
+ p->str,
+ headonly ? strscan_match : strscan_search,
+ (void *)p,
+ &(p->regs));
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
- if (ret < 0) {
- /* not matched */
+ if (ret == ONIG_MISMATCH) {
return Qnil;
}
}
@@ -874,6 +903,57 @@ strscan_getch(VALUE self)
}
/*
+ * Scans one byte and returns it as an integer.
+ * This method is not multibyte character sensitive.
+ * See also: #getch.
+ *
+ * s = StringScanner.new('ab')
+ * s.scan_byte # => 97
+ * s.scan_byte # => 98
+ * s.scan_byte # => nil
+ *
+ * s = StringScanner.new("\244\242".force_encoding("euc-jp"))
+ * s.scan_byte # => 0xA4
+ * s.scan_byte # => 0xA2
+ * s.scan_byte # => nil
+ */
+static VALUE
+strscan_scan_byte(VALUE self)
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ CLEAR_MATCH_STATUS(p);
+ if (EOS_P(p))
+ return Qnil;
+
+ VALUE byte = INT2FIX((unsigned char)*CURPTR(p));
+ p->prev = p->curr;
+ p->curr++;
+ MATCHED(p);
+ adjust_registers_to_matched(p);
+ return byte;
+}
+
+/*
+ * Peeks at the current byte and returns it as an integer.
+ *
+ * s = StringScanner.new('ab')
+ * s.peek_byte # => 97
+ */
+static VALUE
+strscan_peek_byte(VALUE self)
+{
+ struct strscanner *p;
+
+ GET_SCANNER(self, p);
+ if (EOS_P(p))
+ return Qnil;
+
+ return INT2FIX((unsigned char)*CURPTR(p));
+}
+
+/*
* Scans one byte and returns it.
* This method is not multibyte character sensitive.
* See also: #getch.
@@ -1038,8 +1118,9 @@ strscan_empty_p(VALUE self)
* This method is obsolete; use #eos? instead.
*
* s = StringScanner.new('test string')
- * s.eos? # These two
- * s.rest? # are opposites.
+ * # These two are opposites
+ * s.eos? # => false
+ * s.rest? # => true
*/
static VALUE
strscan_rest_p(VALUE self)
@@ -1213,10 +1294,10 @@ strscan_size(VALUE self)
* If nothing was priorly matched, it returns nil.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
- * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
- * s.captures # -> ["Fri", "Dec", "12"]
- * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
- * s.captures # -> nil
+ * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> "Fri Dec 12 "
+ * s.captures # -> ["Fri", "Dec", "12", nil]
+ * s.scan(/(\w+) (\w+) (\d+) (1980)?/) # -> nil
+ * s.captures # -> nil
*/
static VALUE
strscan_captures(VALUE self)
@@ -1232,9 +1313,13 @@ strscan_captures(VALUE self)
new_ary = rb_ary_new2(num_regs);
for (i = 1; i < num_regs; i++) {
- VALUE str = extract_range(p,
- adjust_register_position(p, p->regs.beg[i]),
- adjust_register_position(p, p->regs.end[i]));
+ VALUE str;
+ if (p->regs.beg[i] == -1)
+ str = Qnil;
+ else
+ str = extract_range(p,
+ adjust_register_position(p, p->regs.beg[i]),
+ adjust_register_position(p, p->regs.end[i]));
rb_ary_push(new_ary, str);
}
@@ -1458,6 +1543,56 @@ strscan_fixed_anchor_p(VALUE self)
return p->fixed_anchor_p ? Qtrue : Qfalse;
}
+typedef struct {
+ VALUE self;
+ VALUE captures;
+} named_captures_data;
+
+static int
+named_captures_iter(const OnigUChar *name,
+ const OnigUChar *name_end,
+ int back_num,
+ int *back_refs,
+ OnigRegex regex,
+ void *arg)
+{
+ named_captures_data *data = arg;
+
+ VALUE key = rb_str_new((const char *)name, name_end - name);
+ VALUE value = RUBY_Qnil;
+ int i;
+ for (i = 0; i < back_num; i++) {
+ value = strscan_aref(data->self, INT2NUM(back_refs[i]));
+ }
+ rb_hash_aset(data->captures, key, value);
+ return 0;
+}
+
+/*
+ * call-seq:
+ * scanner.named_captures -> hash
+ *
+ * Returns a hash of string variables matching the regular expression.
+ *
+ * scan = StringScanner.new('foobarbaz')
+ * scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/)
+ * scan.named_captures # -> {"f"=>"foo", "r"=>"bar", "z"=>"baz"}
+ */
+static VALUE
+strscan_named_captures(VALUE self)
+{
+ struct strscanner *p;
+ GET_SCANNER(self, p);
+ named_captures_data data;
+ data.self = self;
+ data.captures = rb_hash_new();
+ if (!RB_NIL_P(p->regex)) {
+ onig_foreach_name(RREGEXP_PTR(p->regex), named_captures_iter, &data);
+ }
+
+ return data.captures;
+}
+
/* =======================================================================
Ruby Interface
======================================================================= */
@@ -1468,6 +1603,8 @@ strscan_fixed_anchor_p(VALUE self)
* StringScanner provides for lexical scanning operations on a String. Here is
* an example of its usage:
*
+ * require 'strscan'
+ *
* s = StringScanner.new('This is an example string')
* s.eos? # -> false
*
@@ -1519,6 +1656,7 @@ strscan_fixed_anchor_p(VALUE self)
*
* - #getch
* - #get_byte
+ * - #scan_byte
* - #scan
* - #scan_until
* - #skip
@@ -1531,6 +1669,7 @@ strscan_fixed_anchor_p(VALUE self)
* - #exist?
* - #match?
* - #peek
+ * - #peek_byte
*
* === Finding Where we Are
*
@@ -1622,7 +1761,9 @@ Init_strscan(void)
rb_define_method(StringScanner, "getch", strscan_getch, 0);
rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
+ rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0);
rb_define_method(StringScanner, "peek", strscan_peek, 1);
+ rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);
rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
@@ -1650,4 +1791,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
+
+ rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);
}
diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec
index 5d8119ea4c..8a61c7abe6 100644
--- a/ext/strscan/strscan.gemspec
+++ b/ext/strscan/strscan.gemspec
@@ -16,13 +16,26 @@ Gem::Specification.new do |s|
s.summary = "Provides lexical scanning operations on a String."
s.description = "Provides lexical scanning operations on a String."
- s.require_path = %w{lib}
- s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c}
- s.extensions = %w{ext/strscan/extconf.rb}
+ files = [
+ "COPYING",
+ "LICENSE.txt",
+ ]
+ if RUBY_ENGINE == "jruby"
+ s.require_paths = %w{ext/jruby/lib lib}
+ files << "ext/jruby/lib/strscan.rb"
+ files << "lib/strscan.jar"
+ s.platform = "java"
+ else
+ s.require_paths = %w{lib}
+ files << "ext/strscan/extconf.rb"
+ files << "ext/strscan/strscan.c"
+ s.extensions = %w{ext/strscan/extconf.rb}
+ end
+ s.files = files
s.required_ruby_version = ">= 2.4.0"
- s.authors = ["Minero Aoki", "Sutou Kouhei"]
- s.email = [nil, "kou@cozmixng.org"]
+ s.authors = ["Minero Aoki", "Sutou Kouhei", "Charles Oliver Nutter"]
+ s.email = [nil, "kou@cozmixng.org", "headius@headius.com"]
s.homepage = "https://github.com/ruby/strscan"
s.licenses = ["Ruby", "BSD-2-Clause"]
end