summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-04-05 11:54:49 -0400
committergit <svn-admin@ruby-lang.org>2024-04-05 19:23:57 +0000
commit358aeb103b308957bda1b03b760f08848b454cef (patch)
treec229f75846f1984e9cf5f1b71d496d6922dc1f42
parent1f84e1099e76f9a8fb4c40ac3e35e0c5626d3b88 (diff)
[ruby/prism] Switch locals to use a hash
https://github.com/ruby/prism/commit/f38946021e
-rw-r--r--prism/parser.h24
-rw-r--r--prism/prism.c200
-rw-r--r--prism/util/pm_constant_pool.c24
-rw-r--r--prism/util/pm_constant_pool.h17
4 files changed, 248 insertions, 17 deletions
diff --git a/prism/parser.h b/prism/parser.h
index 889a187149..56d6fe8357 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -510,6 +510,28 @@ static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMEN
static const pm_shareable_constant_value_t PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY = 0x4;
/**
+ * This tracks an individual local variable in a certain lexical context, as
+ * well as the number of times is it read.
+ */
+typedef struct {
+ pm_constant_id_t name;
+ uint32_t index;
+ uint32_t reads;
+ uint32_t hash;
+} pm_local_t;
+
+/**
+ * This is a set of local variables in a certain lexical context (method, class,
+ * module, etc.). We need to track how many times these variables are read in
+ * order to warn if they only get written.
+ */
+typedef struct pm_locals {
+ uint32_t size;
+ uint32_t capacity;
+ pm_local_t *locals;
+} pm_locals_t;
+
+/**
* This struct represents a node in a linked list of scopes. Some scopes can see
* into their parent scopes, while others cannot.
*/
@@ -518,7 +540,7 @@ typedef struct pm_scope {
struct pm_scope *previous;
/** The IDs of the locals in the given scope. */
- pm_constant_id_list_t locals;
+ pm_locals_t locals;
/**
* This is a bitfield that indicates the parameters that are being used in
diff --git a/prism/prism.c b/prism/prism.c
index 74b738a416..efad4bf65b 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -673,6 +673,168 @@ pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id
PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
/******************************************************************************/
+/* Local variable-related functions */
+/******************************************************************************/
+
+static void
+pm_locals_free(pm_locals_t *locals) {
+ if (locals->capacity > 0) {
+ xfree(locals->locals);
+ }
+}
+
+/**
+ * Use the mid-square method to hash the given constant id.
+ */
+static uint32_t
+pm_locals_hash(pm_constant_id_t name) {
+ uint64_t square = (uint64_t) name * (uint64_t) name;
+
+ uint32_t num_digits = (uint32_t) floor(log10(square) + 1);
+ uint32_t start = num_digits / 2;
+ uint32_t end = start + 1;
+
+ return (uint32_t) (((uint64_t) ((square / pow(10, start))) % (uint64_t) pow(10, end)));
+}
+
+static void
+pm_locals_rehash(pm_locals_t *locals) {
+ uint32_t next_capacity = locals->capacity == 0 ? 8 : (locals->capacity * 2);
+ assert(next_capacity > locals->capacity);
+
+ pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
+ if (next_locals == NULL) abort();
+
+ uint32_t mask = next_capacity - 1;
+ for (uint32_t index = 0; index < locals->capacity; index++) {
+ pm_local_t *local = &locals->locals[index];
+
+ if (local->name != PM_CONSTANT_ID_UNSET) {
+ uint32_t hash = local->hash;
+
+ while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
+ next_locals[hash & mask] = *local;
+ }
+ }
+
+ pm_locals_free(locals);
+ locals->locals = next_locals;
+ locals->capacity = next_capacity;
+}
+
+/**
+ * Add a new local to the set of locals. This will automatically rehash the
+ * locals if the size is greater than 3/4 of the capacity.
+ *
+ * Returns true if the local was added, and false if the local already exists.
+ */
+static bool
+pm_locals_write(pm_locals_t *locals, pm_constant_id_t name) {
+ if (locals->size >= (locals->capacity / 4 * 3)) {
+ pm_locals_rehash(locals);
+ }
+
+ uint32_t mask = locals->capacity - 1;
+ uint32_t hash = pm_locals_hash(name);
+ uint32_t initial_hash = hash;
+
+ do {
+ pm_local_t *local = &locals->locals[hash & mask];
+
+ if (local->name == PM_CONSTANT_ID_UNSET) {
+ *local = (pm_local_t) {
+ .name = name,
+ .index = locals->size++,
+ .reads = 0,
+ .hash = hash
+ };
+ return true;
+ } else if (local->name == name) {
+ return false;
+ } else {
+ hash++;
+ }
+ } while ((hash & mask) != initial_hash);
+
+ assert(false && "unreachable");
+}
+
+/**
+ * Finds the index of a local variable in the locals set. If it is not found,
+ * this returns UINT32_MAX.
+ */
+static uint32_t
+pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
+ if (locals->capacity == 0) return UINT32_MAX;
+
+ uint32_t mask = locals->capacity - 1;
+ uint32_t hash = pm_locals_hash(name);
+ uint32_t initial_hash = hash & mask;
+
+ do {
+ pm_local_t *local = &locals->locals[hash & mask];
+
+ if (local->name == PM_CONSTANT_ID_UNSET) {
+ return UINT32_MAX;
+ } else if (local->name == name) {
+ return hash & mask;
+ } else {
+ hash++;
+ }
+ } while ((hash & mask) != initial_hash);
+
+ return UINT32_MAX;
+}
+
+/**
+ * Called when a variable is read in a certain lexical context. Tracks the read
+ * by adding to the reads count.
+ */
+// static void
+// pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
+// uint32_t index = pm_locals_find(locals, name);
+// assert(index != UINT32_MAX);
+
+// pm_local_t *local = &locals->locals[index];
+// assert(local->reads < UINT32_MAX);
+
+// local->reads++;
+// }
+
+/**
+ * Called when a variable read is transformed into a variable write, because a
+ * write operator is found after the variable name.
+ */
+// static void
+// pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
+// uint32_t index = pm_locals_find(locals, name);
+// assert(index != UINT32_MAX);
+
+// pm_local_t *local = &locals->locals[index];
+// assert(local->reads > 0);
+
+// local->reads--;
+// }
+
+/**
+ * Write out the locals into the given list of constant ids in the correct
+ * order. This is used to set the list of locals on the nodes in the tree once
+ * we're sure no additional locals will be added to the set.
+ */
+static void
+pm_locals_order(pm_locals_t *locals, pm_constant_id_list_t *list) {
+ pm_constant_id_list_init_capacity(list, locals->size);
+
+ for (uint32_t index = 0; index < locals->capacity; index++) {
+ pm_local_t *local = &locals->locals[index];
+
+ if (local->name != PM_CONSTANT_ID_UNSET) {
+ pm_constant_id_list_insert(list, (size_t) local->index, local->name);
+ }
+ }
+}
+
+/******************************************************************************/
/* Node-related functions */
/******************************************************************************/
@@ -7045,7 +7207,7 @@ pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant
int depth = 0;
while (scope != NULL) {
- if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
+ if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
if (scope->closed) break;
scope = scope->previous;
@@ -7070,9 +7232,7 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
*/
static inline void
pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
- if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
- pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
- }
+ pm_locals_write(&parser->current_scope->locals, constant_id);
}
/**
@@ -7175,7 +7335,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
// whether it's already in the current scope.
pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
- if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
+ if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
// Add an error if the parameter doesn't start with _ and has been seen before
if ((name->start < name->end) && (*name->start != '_')) {
pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
@@ -7186,14 +7346,13 @@ pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
}
/**
- * Pop the current scope off the scope stack. Note that we specifically do not
- * free the associated constant list because we assume that we have already
- * transferred ownership of the list to the AST somewhere.
+ * Pop the current scope off the scope stack.
*/
static void
pm_parser_scope_pop(pm_parser_t *parser) {
pm_scope_t *scope = parser->current_scope;
parser->current_scope = scope->previous;
+ pm_locals_free(&scope->locals);
xfree(scope);
}
@@ -13849,7 +14008,8 @@ parse_block(pm_parser_t *parser) {
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
pm_parser_scope_pop(parser);
@@ -17173,7 +17333,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
- pm_constant_id_list_t locals = parser->current_scope->locals;
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
@@ -17234,7 +17396,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_parser_scope_pop(parser);
pm_do_loop_stack_pop(parser);
@@ -17509,7 +17672,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
end_keyword = parser->previous;
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_parser_scope_pop(parser);
pm_parser_current_param_name_restore(parser, saved_param_name);
@@ -17774,7 +17938,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
statements = (pm_node_t *) parse_rescues_implicit_begin(parser, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
+
pm_parser_scope_pop(parser);
pm_parser_current_param_name_restore(parser, saved_param_name);
@@ -18541,7 +18707,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
pm_parser_scope_pop(parser);
@@ -19815,7 +19982,9 @@ parse_program(pm_parser_t *parser) {
if (!statements) {
statements = pm_statements_node_create(parser);
}
- pm_constant_id_list_t locals = parser->current_scope->locals;
+
+ pm_constant_id_list_t locals;
+ pm_locals_order(&parser->current_scope->locals, &locals);
pm_parser_scope_pop(parser);
// If this is an empty file, then we're still going to parse all of the
@@ -20140,7 +20309,6 @@ pm_parser_free(pm_parser_t *parser) {
// assumed that ownership has transferred to the AST. However if we have
// scopes while we're freeing the parser, it's likely they came from
// eval scopes and we need to free them explicitly here.
- pm_constant_id_list_free(&parser->current_scope->locals);
pm_parser_scope_pop(parser);
}
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
index 741a036cf7..fbd235d607 100644
--- a/prism/util/pm_constant_pool.c
+++ b/prism/util/pm_constant_pool.c
@@ -11,6 +11,18 @@ pm_constant_id_list_init(pm_constant_id_list_t *list) {
}
/**
+ * Initialize a list of constant ids with a given capacity.
+ */
+void
+pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity) {
+ list->ids = xmalloc(sizeof(pm_constant_id_t) * capacity);
+ if (list->ids == NULL) abort();
+
+ list->size = 0;
+ list->capacity = capacity;
+}
+
+/**
* Append a constant id to a list of constant ids. Returns false if any
* potential reallocations fail.
*/
@@ -27,6 +39,18 @@ pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id) {
}
/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ */
+void
+pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id) {
+ assert(index < list->capacity);
+ assert(list->ids[index] == PM_CONSTANT_ID_UNSET);
+
+ list->ids[index] = id;
+ list->size++;
+}
+
+/**
* Checks if the current constant id list includes the given constant id.
*/
bool
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
index 19e406396e..0fe16858a0 100644
--- a/prism/util/pm_constant_pool.h
+++ b/prism/util/pm_constant_pool.h
@@ -52,6 +52,14 @@ typedef struct {
void pm_constant_id_list_init(pm_constant_id_list_t *list);
/**
+ * Initialize a list of constant ids with a given capacity.
+ *
+ * @param list The list to initialize.
+ * @param capacity The initial capacity of the list.
+ */
+void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity);
+
+/**
* Append a constant id to a list of constant ids. Returns false if any
* potential reallocations fail.
*
@@ -62,6 +70,15 @@ void pm_constant_id_list_init(pm_constant_id_list_t *list);
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id);
/**
+ * Insert a constant id into a list of constant ids at the specified index.
+ *
+ * @param list The list to insert into.
+ * @param index The index at which to insert.
+ * @param id The id to insert.
+ */
+void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id);
+
+/**
* Checks if the current constant id list includes the given constant id.
*
* @param list The list to check.