summaryrefslogtreecommitdiff
path: root/yarp
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-09-06 12:43:36 -0400
committergit <svn-admin@ruby-lang.org>2023-09-07 14:01:21 +0000
commit194584f20277a63164789aa83ae3841ef6e6eb8c (patch)
tree803d88a237fd2ee871b924f2f90a649c77f9127c /yarp
parent0adca625ee34ced92da68ba144de32f44e7300cd (diff)
[ruby/yarp] Introduce owned constants
Before this commit, constants in the constant pool were assumed to be slices of the source string. This works in _almost_ all cases. There are times, however, when a string needs to be synthesized. This can occur when passing in locals that need to be scoped through eval, or when generating method names like `foo=`. After this commit, there is a single bit `owned` boolean on constants in the pool that indicates whether or not it is a slice of the source string. If it is not, it is assumed to be allocated memory that should be freed by the constant pool when the constant pool is freed. When serializing, the most significant bit in the location of the contents of the constant indicates whether or not it is owned. When it is, instead of 4 bytes for the source offset and 4 bytes for the length it is instead 4 bytes for the buffer offset and 4 bytes the length. The contents of the owned constants are embedded into the buffer after the constant pool itself. https://github.com/ruby/yarp/commit/461c047365
Diffstat (limited to 'yarp')
-rw-r--r--yarp/templates/ext/yarp/api_node.c.erb11
-rw-r--r--yarp/templates/lib/yarp/serialize.rb.erb9
-rw-r--r--yarp/templates/src/serialize.c.erb27
-rw-r--r--yarp/util/yp_constant_pool.c49
-rw-r--r--yarp/util/yp_constant_pool.h15
-rw-r--r--yarp/yarp.c47
6 files changed, 132 insertions, 26 deletions
diff --git a/yarp/templates/ext/yarp/api_node.c.erb b/yarp/templates/ext/yarp/api_node.c.erb
index 0d075112c8..a9f5115d7f 100644
--- a/yarp/templates/ext/yarp/api_node.c.erb
+++ b/yarp/templates/ext/yarp/api_node.c.erb
@@ -144,30 +144,41 @@ yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding) {
// <%= field.name %>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_pop(value_stack);
<%- when YARP::NodeListField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
rb_ary_push(argv[<%= index %>], rb_ary_pop(value_stack));
}
<%- when YARP::StringField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_string_new(&cast-><%= field.name %>, encoding);
<%- when YARP::ConstantField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+ assert(cast-><%= field.name %> != 0);
argv[<%= index %>] = rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::OptionalConstantField -%>
argv[<%= index %>] = cast-><%= field.name %> == 0 ? Qnil : rb_id2sym(constants[cast-><%= field.name %> - 1]);
<%- when YARP::ConstantListField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = rb_ary_new_capa(cast-><%= field.name %>.size);
for (size_t index = 0; index < cast-><%= field.name %>.size; index++) {
+ assert(cast-><%= field.name %>.ids[index] != 0);
rb_ary_push(argv[<%= index %>], rb_id2sym(constants[cast-><%= field.name %>.ids[index] - 1]));
}
<%- when YARP::LocationField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::OptionalLocationField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : yp_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source);
<%- when YARP::UInt32Field -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(cast-><%= field.name %>);
<%- when YARP::FlagsField -%>
+#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
argv[<%= index %>] = ULONG2NUM(node->flags >> <%= YARP::COMMON_FLAGS %>);
<%- else -%>
<%- raise -%>
diff --git a/yarp/templates/lib/yarp/serialize.rb.erb b/yarp/templates/lib/yarp/serialize.rb.erb
index c8d7f422cd..3f5329e3a9 100644
--- a/yarp/templates/lib/yarp/serialize.rb.erb
+++ b/yarp/templates/lib/yarp/serialize.rb.erb
@@ -163,11 +163,16 @@ module YARP
unless constant
offset = constant_pool_offset + index * 8
-
start = serialized.unpack1("L", offset: offset)
length = serialized.unpack1("L", offset: offset + 4)
- constant = input.byteslice(start, length).to_sym
+ constant =
+ if start.nobits?(1 << 31)
+ input.byteslice(start, length).to_sym
+ else
+ serialized.byteslice(start & ((1 << 31) - 1), length).to_sym
+ end
+
constant_pool[index] = constant
end
diff --git a/yarp/templates/src/serialize.c.erb b/yarp/templates/src/serialize.c.erb
index 8e0b0905dc..b60bce2113 100644
--- a/yarp/templates/src/serialize.c.erb
+++ b/yarp/templates/src/serialize.c.erb
@@ -206,12 +206,31 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer)
// If we find a constant at this index, serialize it at the correct
// index in the buffer.
if (constant->id != 0) {
- size_t buffer_offset = offset + ((constant->id - 1) * 8);
+ size_t buffer_offset = offset + ((((size_t) constant->id) - 1) * 8);
+
+ if (constant->owned) {
+ // Since this is an owned constant, we are going to write its
+ // contents into the buffer after the constant pool. So
+ // effectively in place of the source offset, we have a buffer
+ // offset. We will add a leading 1 to indicate that this is a
+ // buffer offset.
+ uint32_t content_offset = yp_sizet_to_u32(buffer->length);
+ uint32_t owned_mask = (uint32_t) (1 << 31);
+
+ assert(content_offset < owned_mask);
+ content_offset |= owned_mask;
+
+ memcpy(buffer->value + buffer_offset, &content_offset, 4);
+ yp_buffer_append_bytes(buffer, constant->start, constant->length);
+ } else {
+ // Since this is a shared constant, we are going to write its
+ // source offset directly into the buffer.
+ uint32_t source_offset = yp_ptrdifft_to_u32(constant->start - parser->start);
+ memcpy(buffer->value + buffer_offset, &source_offset, 4);
+ }
- uint32_t source_offset = yp_ptrdifft_to_u32(constant->start - parser->start);
+ // Now we can write the length of the constant into the buffer.
uint32_t constant_length = yp_sizet_to_u32(constant->length);
-
- memcpy(buffer->value + buffer_offset, &source_offset, 4);
memcpy(buffer->value + buffer_offset + 4, &constant_length, 4);
}
}
diff --git a/yarp/util/yp_constant_pool.c b/yarp/util/yp_constant_pool.c
index 3ad241a9d1..8be96138a1 100644
--- a/yarp/util/yp_constant_pool.c
+++ b/yarp/util/yp_constant_pool.c
@@ -106,12 +106,11 @@ yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity) {
return true;
}
-// Insert a constant into a constant pool. Returns the id of the constant, or 0
-// if any potential calls to resize fail.
-yp_constant_id_t
+// Insert a constant into a constant pool and return its index in the pool.
+static size_t
yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
if (pool->size >= (pool->capacity / 4 * 3)) {
- if (!yp_constant_pool_resize(pool)) return 0;
+ if (!yp_constant_pool_resize(pool)) return pool->capacity;
}
size_t hash = yp_constant_pool_hash(start, length);
@@ -123,25 +122,59 @@ yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t l
// same as the content we are trying to insert. If it is, then we can
// return the id of the existing constant.
if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
- return pool->constants[index].id;
+ return index;
}
index = (index + 1) % pool->capacity;
}
- yp_constant_id_t id = (yp_constant_id_t)++pool->size;
+ pool->size++;
+ assert(pool->size < ((size_t) (1 << 31)));
+
pool->constants[index] = (yp_constant_t) {
- .id = id,
+ .id = (unsigned int) (pool->size & 0x7FFFFFFF),
.start = start,
.length = length,
.hash = hash
};
- return id;
+ return index;
+}
+
+// Insert a constant into a constant pool. Returns the id of the constant, or 0
+// if any potential calls to resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ size_t index = yp_constant_pool_insert(pool, start, length);
+ return index == pool->capacity ? 0 : ((yp_constant_id_t) pool->constants[index].id);
+}
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ size_t index = yp_constant_pool_insert(pool, start, length);
+ if (index == pool->capacity) return 0;
+
+ yp_constant_t *constant = &pool->constants[index];
+ constant->owned = true;
+ return ((yp_constant_id_t) constant->id);
}
// Free the memory associated with a constant pool.
void
yp_constant_pool_free(yp_constant_pool_t *pool) {
+ // For each constant in the current constant pool, free the contents if the
+ // contents are owned.
+ for (uint32_t index = 0; index < pool->capacity; index++) {
+ yp_constant_t *constant = &pool->constants[index];
+
+ // If an id is set on this constant, then we know we have content here.
+ if (constant->id != 0 && constant->owned) {
+ free((void *) constant->start);
+ }
+ }
+
free(pool->constants);
}
diff --git a/yarp/util/yp_constant_pool.h b/yarp/util/yp_constant_pool.h
index 1ac23cf88b..ecd3ff619e 100644
--- a/yarp/util/yp_constant_pool.h
+++ b/yarp/util/yp_constant_pool.h
@@ -8,6 +8,7 @@
#include "yarp/defines.h"
+#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
@@ -39,7 +40,8 @@ size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
void yp_constant_id_list_free(yp_constant_id_list_t *list);
typedef struct {
- yp_constant_id_t id;
+ unsigned int id: 31;
+ bool owned: 1;
const uint8_t *start;
size_t length;
size_t hash;
@@ -57,9 +59,14 @@ typedef struct {
// Initialize a new constant pool with a given capacity.
bool yp_constant_pool_init(yp_constant_pool_t *pool, size_t capacity);
-// Insert a constant into a constant pool. Returns the id of the constant, or 0
-// if any potential calls to resize fail.
-yp_constant_id_t yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+// Insert a constant into a constant pool that is a slice of a source string.
+// Returns the id of the constant, or 0 if any potential calls to resize fail.
+yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
// Free the memory associated with a constant pool.
void yp_constant_pool_free(yp_constant_pool_t *pool);
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 6f232712cd..2292807125 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -428,7 +428,13 @@ debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * call
// Retrieve the constant pool id for the given location.
static inline yp_constant_id_t
yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- return yp_constant_pool_insert(&parser->constant_pool, start, (size_t) (end - start));
+ return yp_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
+}
+
+// Retrieve the constant pool id for the given string.
+static inline yp_constant_id_t
+yp_parser_constant_id_owned(yp_parser_t *parser, const uint8_t *start, size_t length) {
+ return yp_constant_pool_insert_owned(&parser->constant_pool, start, length);
}
// Retrieve the constant pool id for the given token.
@@ -4610,15 +4616,19 @@ yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) {
return -1;
}
-// Add a local variable from a location to the current scope.
-static yp_constant_id_t
-yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
- yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
-
+// Add a constant id to the local table of the current scope.
+static inline void
+yp_parser_local_add(yp_parser_t *parser, yp_constant_id_t constant_id) {
if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
yp_constant_id_list_append(&parser->current_scope->locals, constant_id);
}
+}
+// Add a local variable from a location to the current scope.
+static yp_constant_id_t
+yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+ yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end);
+ if (constant_id != 0) yp_parser_local_add(parser, constant_id);
return constant_id;
}
@@ -4628,6 +4638,13 @@ yp_parser_local_add_token(yp_parser_t *parser, yp_token_t *token) {
yp_parser_local_add_location(parser, token->start, token->end);
}
+// Add a local variable from an owned string to the current scope.
+static inline void
+yp_parser_local_add_owned(yp_parser_t *parser, const uint8_t *start, size_t length) {
+ yp_constant_id_t constant_id = yp_parser_constant_id_owned(parser, start, length);
+ if (constant_id != 0) yp_parser_local_add(parser, constant_id);
+}
+
// Add a parameter name to the current scope and check whether the name of the
// parameter is unique or not.
static void
@@ -4644,7 +4661,9 @@ yp_parser_parameter_name_check(yp_parser_t *parser, yp_token_t *name) {
}
}
-// Pop the current scope off the scope stack.
+// Pop the current scope off the scope stack. Note that we specifically do not
+// free the associated constant list because we assume that we have already
+// transferred ownership of the list to the AST somewhere.
static void
yp_parser_scope_pop(yp_parser_t *parser) {
yp_scope_t *scope = parser->current_scope;
@@ -13757,7 +13776,10 @@ yp_parser_metadata(yp_parser_t *parser, const char *metadata) {
uint32_t local_size = yp_metadata_read_u32(metadata);
metadata += 4;
- yp_parser_local_add_location(parser, (const uint8_t *) metadata, (const uint8_t *) (metadata + local_size));
+ uint8_t *constant = malloc(local_size);
+ memcpy(constant, metadata, local_size);
+
+ yp_parser_local_add_owned(parser, constant, (size_t) local_size);
metadata += local_size;
}
}
@@ -13896,6 +13918,15 @@ yp_parser_free(yp_parser_t *parser) {
yp_constant_pool_free(&parser->constant_pool);
yp_newline_list_free(&parser->newline_list);
+ while (parser->current_scope != NULL) {
+ // Normally, popping the scope doesn't free the locals since it is
+ // assumed that ownership has transferred to the AST. However if we have
+ // scopes while we're freeing the parser, it's likely they came from
+ // eval scopes and we need to free them explicitly here.
+ yp_constant_id_list_free(&parser->current_scope->locals);
+ yp_parser_scope_pop(parser);
+ }
+
while (parser->lex_modes.index >= YP_LEX_STACK_SIZE) {
lex_mode_pop(parser);
}