summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas E. Enebo <tom.enebo@gmail.com>2023-07-27 14:46:19 -0400
committerTakashi Kokubun <takashikkbn@gmail.com>2023-08-16 17:47:32 -0700
commit76c77b5eca3f02db894819823d3a1ab55455754d (patch)
tree1719a0bbe3cbe627e240a73d9e7ef479d45810de
parent0f8091947debc467b58a2b04c150dc7a38026ed3 (diff)
[ruby/yarp] WIP - Introduce contextually parsing programs vs evals
This is more or less the code I used in my POC in JRuby to parse evals. Evals depend on parent variable scopes and will produce a different syntax tree. Questions: 1. How does MRI compile evals currently? I cannot find anything. 2. This passes in a char * of data. It does not encode the variables we pass in because the system calling this already knows. Is this adequate though? 3. Can I get guidance on how best to test this? https://github.com/ruby/yarp/commit/f441b6fd2c
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/8226
-rw-r--r--yarp/extension.c10
-rw-r--r--yarp/yarp.c42
-rw-r--r--yarp/yarp.h4
3 files changed, 43 insertions, 13 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index 4e801b3b05..7ede50bb0f 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -194,7 +194,7 @@ dump_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input->source, input->size, filepath);
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, false);
yp_serialize(&parser, node, &buffer);
VALUE result = rb_str_new(buffer.value, buffer.length);
@@ -378,7 +378,7 @@ lex_input(input_t *input, const char *filepath) {
};
parser.lex_callback = &lex_callback;
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, false);
// Here we need to update the source range to have the correct newline
// offsets. We do it here because we've already created the object and given
@@ -439,7 +439,7 @@ parse_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input->source, input->size, filepath);
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, false);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
VALUE source = yp_source_new(&parser);
@@ -582,7 +582,7 @@ memsize(VALUE self, VALUE string) {
size_t length = RSTRING_LEN(string);
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, false);
yp_memsize_t memsize;
yp_node_memsize(node, &memsize);
@@ -608,7 +608,7 @@ profile_file(VALUE self, VALUE filepath) {
yp_parser_t parser;
yp_parser_init(&parser, input.source, input.size, checked);
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, false);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 2ae3b3ea28..50871beedf 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -12735,8 +12735,8 @@ parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, const ch
}
static yp_node_t *
-parse_program(yp_parser_t *parser) {
- yp_parser_scope_push(parser, true);
+parse_program(yp_parser_t *parser, bool eval) {
+ yp_parser_scope_push(parser, !eval);
parser_lex(parser);
yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN);
@@ -12756,6 +12756,34 @@ parse_program(yp_parser_t *parser) {
return (yp_node_t *) yp_program_node_create(parser, &locals, statements);
}
+// Assume always a valid string since it is from trusted source (Ruby impl internals).
+// Format: [num_scopes, (num_vars1, (var_char1*, 0)*)*]
+static void
+yp_populate_eval_scopes(yp_parser_t *parser, const char *data) {
+ const char *p = data;
+ size_t number_of_scopes = (size_t) *p;
+
+ p++;
+ for (size_t scope_index = 0; scope_index < number_of_scopes; scope_index++) {
+ size_t number_of_variables = (size_t) *p++;
+
+ yp_parser_scope_push(parser, scope_index == 0);
+
+ for (size_t variable_index = 0; variable_index < number_of_variables; variable_index++) {
+ char *eos = strchr(p, 0);
+
+ yp_token_t lvar = (yp_token_t) {
+ .type = YP_TOKEN_IDENTIFIER,
+ .start = p,
+ .end = eos
+ };
+ yp_parser_local_add_token(parser, &lvar);
+
+ p = ++eos;
+ }
+ }
+}
+
/******************************************************************************/
/* External functions */
/******************************************************************************/
@@ -12892,8 +12920,8 @@ yp_parser_free(yp_parser_t *parser) {
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t *
-yp_parse(yp_parser_t *parser) {
- return parse_program(parser);
+yp_parse(yp_parser_t *parser, bool eval) {
+ return parse_program(parser, eval);
}
YP_EXPORTED_FUNCTION void
@@ -12910,11 +12938,13 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
// Parse and serialize the AST represented by the given source to the given
// buffer.
YP_EXPORTED_FUNCTION void
-yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
+yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes) {
+ bool eval = parent_scopes != NULL;
yp_parser_t parser;
yp_parser_init(&parser, source, size, NULL);
+ if (eval) yp_populate_eval_scopes(&parser, parent_scopes);
- yp_node_t *node = yp_parse(&parser);
+ yp_node_t *node = yp_parse(&parser, eval);
yp_serialize(&parser, node, buffer);
yp_node_destroy(&parser, node);
diff --git a/yarp/yarp.h b/yarp/yarp.h
index 4bbffdbb10..492038a6e2 100644
--- a/yarp/yarp.h
+++ b/yarp/yarp.h
@@ -51,7 +51,7 @@ YP_EXPORTED_FUNCTION void yp_parser_register_encoding_decode_callback(yp_parser_
YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
// Parse the Ruby source associated with the given parser and return the tree.
-YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
+YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser, bool eval);
// Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
@@ -61,7 +61,7 @@ YP_EXPORTED_FUNCTION void yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_
// Parse and serialize the AST represented by the given source to the given
// buffer.
-YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer);
+YP_EXPORTED_FUNCTION void yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer, const char *parent_scopes);
// Returns a string representation of the given token type.
YP_EXPORTED_FUNCTION const char * yp_token_type_to_str(yp_token_type_t token_type);