summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorSutou Kouhei <kou@cozmixng.org>2019-10-14 12:40:50 +0900
committerGitHub <noreply@github.com>2019-10-14 12:40:50 +0900
commit95c420c4a65ca2e7f3edf27134ad33691959296c (patch)
tree844e207d891996ecf7c60950d8a1e652be6d1938 /ext
parent6fa3492362dc91cfec7eb4fd55918791da5a34fb (diff)
Import StringScanner 1.0.3 (#2553)
Notes
Notes: Merged-By: kou <kou@clear-code.com>
Diffstat (limited to 'ext')
-rw-r--r--ext/strscan/extconf.rb3
-rw-r--r--ext/strscan/strscan.c288
-rw-r--r--ext/strscan/strscan.gemspec9
3 files changed, 233 insertions, 67 deletions
diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb
index 714fa99fae..f0ecbf85d8 100644
--- a/ext/strscan/extconf.rb
+++ b/ext/strscan/extconf.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
require 'mkmf'
-$INCFLAGS << " -I$(top_srcdir)"
+$INCFLAGS << " -I$(top_srcdir)" if $extmk
+have_func("onig_region_memsize", "ruby.h")
create_makefile 'strscan'
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 77a36fe323..99d6992601 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -11,9 +11,18 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
#include "ruby/encoding.h"
-#include "regint.h"
-#define STRSCAN_VERSION "0.7.0"
+#ifdef RUBY_EXTCONF_H
+# include RUBY_EXTCONF_H
+#endif
+
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+extern size_t onig_region_memsize(const struct re_registers *regs);
+#endif
+
+#include <stdbool.h>
+
+#define STRSCAN_VERSION "1.0.3"
/* =======================================================================
Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner
/* regexp used for last scan */
VALUE regex;
+
+ /* anchor mode */
+ bool fixed_anchor_p;
};
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -186,7 +198,11 @@ static size_t
strscan_memsize(const void *ptr)
{
const struct strscanner *p = ptr;
- return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
+ size_t size = sizeof(*p) - sizeof(p->regs);
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+ size += onig_region_memsize(&p->regs);
+#endif
+ return size;
}
static const rb_data_type_t strscanner_type = {
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
}
/*
- * call-seq: StringScanner.new(string, dup = false)
+ * call-seq:
+ * StringScanner.new(string, fixed_anchor: false)
+ * StringScanner.new(string, dup = false)
*
* Creates a new StringScanner object to scan over the given +string+.
+ *
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
+ * the string. Otherwise, +\A+ always matches the current position.
+ *
* +dup+ argument is obsolete and not used now.
*/
static VALUE
strscan_initialize(int argc, VALUE *argv, VALUE self)
{
struct strscanner *p;
- VALUE str, need_dup;
+ VALUE str, options;
p = check_strscan(self);
- rb_scan_args(argc, argv, "11", &str, &need_dup);
+ rb_scan_args(argc, argv, "11", &str, &options);
+ options = rb_check_hash_type(options);
+ if (!NIL_P(options)) {
+ VALUE fixed_anchor;
+ ID keyword_ids[1];
+ keyword_ids[0] = rb_intern("fixed_anchor");
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
+ if (fixed_anchor == Qundef) {
+ p->fixed_anchor_p = false;
+ }
+ else {
+ p->fixed_anchor_p = RTEST(fixed_anchor);
+ }
+ }
+ else {
+ p->fixed_anchor_p = false;
+ }
StringValue(str);
p->str = str;
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
* terminate
* clear
*
- * Set the scan pointer to the end of the string and clear matching data.
+ * Sets the scan pointer to the end of the string and clear matching data.
*/
static VALUE
strscan_terminate(VALUE self)
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
/*
* call-seq: pos=(n)
*
- * Set the byte position of the scan pointer.
+ * Sets the byte position of the scan pointer.
*
* s = StringScanner.new('test string')
* s.pos = 7 # -> 7
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
return INT2NUM(i);
}
+static inline UChar *
+match_target(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ return (UChar *)S_PBEG(p);
+ }
+ else
+ {
+ return (UChar *)CURPTR(p);
+ }
+}
+
+static inline void
+set_registers(struct strscanner *p, size_t length)
+{
+ onig_region_clear(&(p->regs));
+ if (p->fixed_anchor_p) {
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
+ }
+ else
+ {
+ onig_region_set(&(p->regs), 0, 0, length);
+ }
+}
+
+static inline void
+succ(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ p->curr = p->regs.end[0];
+ }
+ else
+ {
+ p->curr += p->regs.end[0];
+ }
+}
+
+static inline long
+last_match_length(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ return p->regs.end[0] - p->prev;
+ }
+ else
+ {
+ return p->regs.end[0];
+ }
+}
+
+static inline long
+adjust_register_position(struct strscanner *p, long position)
+{
+ if (p->fixed_anchor_p) {
+ return position;
+ }
+ else {
+ return p->prev + position;
+ }
+}
+
static VALUE
-strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
+strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
struct strscanner *p;
- regex_t *re;
- long ret;
- int tmpreg;
- Check_Type(regex, T_REGEXP);
+ if (headonly) {
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
+ StringValue(pattern);
+ }
+ }
+ else {
+ Check_Type(pattern, T_REGEXP);
+ }
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
return Qnil;
}
- p->regex = regex;
- re = rb_reg_prepare_re(regex, p->str);
- tmpreg = re != RREGEXP_PTR(regex);
- if (!tmpreg) RREGEXP(regex)->usecnt++;
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
+ regex_t *re;
+ long ret;
+ int tmpreg;
+
+ p->regex = pattern;
+ re = rb_reg_prepare_re(pattern, p->str);
+ tmpreg = re != RREGEXP_PTR(pattern);
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
+
+ if (headonly) {
+ ret = onig_match(re,
+ match_target(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ (UChar* )CURPTR(p),
+ &(p->regs),
+ ONIG_OPTION_NONE);
+ }
+ else {
+ ret = onig_search(re,
+ match_target(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ (UChar* )CURPTR(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ &(p->regs),
+ ONIG_OPTION_NONE);
+ }
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
+ if (tmpreg) {
+ if (RREGEXP(pattern)->usecnt) {
+ onig_free(re);
+ }
+ else {
+ onig_free(RREGEXP_PTR(pattern));
+ RREGEXP_PTR(pattern) = re;
+ }
+ }
- if (headonly) {
- ret = onig_match(re, (UChar* )CURPTR(p),
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
+ if (ret < 0) {
+ /* not matched */
+ return Qnil;
+ }
}
else {
- ret = onig_search(re,
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- &(p->regs), ONIG_OPTION_NONE);
- }
- if (!tmpreg) RREGEXP(regex)->usecnt--;
- if (tmpreg) {
- if (RREGEXP(regex)->usecnt) {
- onig_free(re);
+ rb_enc_check(p->str, pattern);
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
+ return Qnil;
}
- else {
- onig_free(RREGEXP_PTR(regex));
- RREGEXP_PTR(regex) = re;
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
+ return Qnil;
}
- }
-
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
- if (ret < 0) {
- /* not matched */
- return Qnil;
+ set_registers(p, RSTRING_LEN(pattern));
}
MATCHED(p);
p->prev = p->curr;
+
if (succptr) {
- p->curr += p->regs.end[0];
- }
- if (getstr) {
- return extract_beg_len(p, p->prev, p->regs.end[0]);
+ succ(p);
}
- else {
- return INT2FIX(p->regs.end[0]);
+ {
+ const long length = last_match_length(p);
+ if (getstr) {
+ return extract_beg_len(p, p->prev, length);
+ }
+ else {
+ return INT2FIX(length);
+ }
}
}
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
* p s.scan(/\w+/) # -> "test"
* p s.scan(/\w+/) # -> nil
* p s.scan(/\s+/) # -> " "
- * p s.scan(/\w+/) # -> "string"
+ * p s.scan("str") # -> "str"
+ * p s.scan(/\w+/) # -> "ing"
* p s.scan(/./) # -> nil
*
*/
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
* s = StringScanner.new('test string')
* p s.match?(/\w+/) # -> 4
* p s.match?(/\w+/) # -> 4
+ * p s.match?("test") # -> 4
* p s.match?(/\s+/) # -> nil
*/
static VALUE
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
* p s.skip(/\w+/) # -> 4
* p s.skip(/\w+/) # -> nil
* p s.skip(/\s+/) # -> 1
- * p s.skip(/\w+/) # -> 6
+ * p s.skip("st") # -> 2
+ * p s.skip(/\w+/) # -> 4
* p s.skip(/./) # -> nil
*
*/
@@ -704,7 +835,12 @@ static void
adjust_registers_to_matched(struct strscanner *p)
{
onig_region_clear(&(p->regs));
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
+ if (p->fixed_anchor_p) {
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
+ }
+ else {
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
+ }
}
/*
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
p->curr += len;
MATCHED(p);
adjust_registers_to_matched(p);
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
p->curr++;
MATCHED(p);
adjust_registers_to_matched(p);
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
}
/*
- * Set the scan pointer to the previous position. Only one previous position is
+ * Sets the scan pointer to the previous position. Only one previous position is
* remembered, and it changes with each scanning operation.
*
* s = StringScanner.new('test string')
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil;
- return extract_range(p, p->prev + p->regs.beg[i],
- p->prev + p->regs.end[i]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[i]),
+ adjust_register_position(p, p->regs.end[i]));
}
/*
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
new_ary = rb_ary_new2(num_regs);
for (i = 1; i < num_regs; i++) {
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
- p->prev + p->regs.end[i]);
+ VALUE str = extract_range(p,
+ adjust_register_position(p, p->regs.beg[i]),
+ adjust_register_position(p, p->regs.end[i]));
rb_ary_push(new_ary, str);
}
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
+ return extract_range(p,
+ 0,
+ adjust_register_position(p, p->regs.beg[0]));
}
/*
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
+ return extract_range(p,
+ adjust_register_position(p, p->regs.end[0]),
+ S_LEN(p));
}
/*
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
return rb_str_dump(str);
}
+/*
+ * call-seq:
+ * scanner.fixed_anchor? -> true or false
+ *
+ * Whether +scanner+ uses fixed anchor mode or not.
+ *
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
+ * the string. Otherwise, +\A+ always matches the current position.
+ */
+static VALUE
+strscan_fixed_anchor_p(VALUE self)
+{
+ struct strscanner *p;
+ p = check_strscan(self);
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
+}
+
/* =======================================================================
Ruby Interface
======================================================================= */
@@ -1488,4 +1650,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
+
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
}
diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec
index eefe8fbf2c..4759c6c860 100644
--- a/ext/strscan/strscan.gemspec
+++ b/ext/strscan/strscan.gemspec
@@ -1,19 +1,20 @@
# frozen_string_literal: true
Gem::Specification.new do |s|
s.name = "strscan"
- s.version = '1.0.0'
+ s.version = '1.0.3'
s.summary = "Provides lexical scanning operations on a String."
s.description = "Provides lexical scanning operations on a String."
s.require_path = %w{lib}
- s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c ext/strscan/regenc.h ext/strscan/regint.h}
+ s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c}
s.extensions = %w{ext/strscan/extconf.rb}
s.required_ruby_version = ">= 2.4.0"
- s.authors = ["Minero Aoki"]
- s.email = [nil]
+ s.authors = ["Minero Aoki", "Sutou Kouhei"]
+ s.email = [nil, "kou@cozmixng.org"]
s.homepage = "https://github.com/ruby/strscan"
s.license = "BSD-2-Clause"
s.add_development_dependency "rake-compiler"
+ s.add_development_dependency "benchmark-driver"
end