summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-08-03 13:25:38 -0400
committerTakashi Kokubun <takashikkbn@gmail.com>2023-08-16 17:47:32 -0700
commit0004565a919fde84def202432ef04cee0f7b689e (patch)
treeef0b16256ef59aa1ed2791cffc4fdaa5e5bf8b4c
parent1ad0d198760d0f3295da86f66d099f28c9b50df7 (diff)
[ruby/yarp] Use yp_memchr in regexp parsing
https://github.com/ruby/yarp/commit/08081dd24f
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/8226
-rw-r--r--yarp/extension.c2
-rw-r--r--yarp/regexp.c15
-rw-r--r--yarp/regexp.h4
-rw-r--r--yarp/yarp.c2
4 files changed, 15 insertions, 8 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index d4ce3625d8..4f2065a81b 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -379,7 +379,7 @@ named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list;
yp_string_list_init(&string_list);
- if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
+ if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
yp_string_list_free(&string_list);
return Qnil;
}
diff --git a/yarp/regexp.c b/yarp/regexp.c
index 4855859442..40d85c5227 100644
--- a/yarp/regexp.c
+++ b/yarp/regexp.c
@@ -6,16 +6,20 @@ typedef struct {
const char *cursor;
const char *end;
yp_string_list_t *named_captures;
+ bool encoding_changed;
+ yp_encoding_t *encoding;
} yp_regexp_parser_t;
// This initializes a new parser with the given source.
static void
-yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures) {
+yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
*parser = (yp_regexp_parser_t) {
.start = start,
.cursor = start,
.end = end,
- .named_captures = named_captures
+ .named_captures = named_captures,
+ .encoding_changed = encoding_changed,
+ .encoding = encoding
};
}
@@ -60,7 +64,8 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
if (yp_regexp_char_is_eof(parser)) {
return false;
}
- const char *end = (const char *) memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor));
+
+ const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
if (end == NULL) {
return false;
}
@@ -542,8 +547,8 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool
-yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures) {
+yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
yp_regexp_parser_t parser;
- yp_regexp_parser_init(&parser, source, source + size, named_captures);
+ yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
return yp_regexp_parse_pattern(&parser);
}
diff --git a/yarp/regexp.h b/yarp/regexp.h
index cf624db6b8..5a2f13047e 100644
--- a/yarp/regexp.h
+++ b/yarp/regexp.h
@@ -3,6 +3,8 @@
#include "yarp/defines.h"
#include "yarp/parser.h"
+#include "yarp/enc/yp_encoding.h"
+#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
@@ -12,6 +14,6 @@
// Parse a regular expression and extract the names of all of the named capture
// groups.
-YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);
+YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
#endif
diff --git a/yarp/yarp.c b/yarp/yarp.c
index dd27c172f4..a8ff6c3859 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -12535,7 +12535,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
- if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures)) {
+ if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED);