summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-02-16 18:29:19 -0500
committergit <svn-admin@ruby-lang.org>2024-02-17 02:05:12 +0000
commitb56b8ec797408865ccc79b67e9860d8382f986dd (patch)
treea0200cba639f40974ac7a4a1e74942a94ef3b9bf
parent075b6ac8aeb1217b04e067eeb10bd06897a4359d (diff)
[ruby/prism] Provide the ability to dump AST to JSON from C
https://github.com/ruby/prism/commit/d3a149efc5
-rw-r--r--prism/node.h1
-rw-r--r--prism/prism.h11
-rw-r--r--prism/templates/src/node.c.erb96
-rw-r--r--prism/templates/src/prettyprint.c.erb41
-rw-r--r--prism/util/pm_buffer.c70
-rw-r--r--prism/util/pm_buffer.h19
6 files changed, 180 insertions, 58 deletions
diff --git a/prism/node.h b/prism/node.h
index 3e15d18552..8d1b6a599a 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -8,6 +8,7 @@
#include "prism/defines.h"
#include "prism/parser.h"
+#include "prism/util/pm_buffer.h"
/**
* Append a new node onto the end of the node list.
diff --git a/prism/prism.h b/prism/prism.h
index afd57483fd..ffc722e90c 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -188,14 +188,13 @@ const char * pm_token_type_human(pm_token_type_t token_type);
PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
/**
- * Visit each of the nodes in this subtree using the given visitor callback.
+ * Dump JSON to the given buffer.
*
- * @param node The node to visit.
- * @param visitor The visitor callback to use. It should return `true` if the
- * visitor should continue visiting nodes, and `false` if it should stop.
- * @param data The optional data to pass to the visitor.
+ * @param buffer The buffer to serialize to.
+ * @param parser The parser that parsed the node.
+ * @param node The node to serialize.
*/
-PRISM_EXPORTED_FUNCTION void pm_node_visit(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data);
+PRISM_EXPORTED_FUNCTION void pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node);
/**
* @mainpage
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index a7135d598e..095a2b171e 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -161,42 +161,110 @@ pm_node_type_to_str(pm_node_type_t node_type)
return "";
}
+static void
+pm_dump_json_constant(pm_buffer_t *buffer, const pm_parser_t *parser, pm_constant_id_t constant_id) {
+ const pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, constant_id);
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, constant->start, constant->length, PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+}
+
+static void
+pm_dump_json_location(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_location_t *location) {
+ uint32_t start = (uint32_t) (location->start - parser->start);
+ uint32_t end = (uint32_t) (location->end - parser->start);
+ pm_buffer_append_format(buffer, "{\"start\":%" PRIu32 ",\"end\":%" PRIu32 "}", start, end);
+}
+
/**
- * Visit each of the nodes in this subtree using the given visitor callback.
+ * Dump JSON to the given buffer.
*/
PRISM_EXPORTED_FUNCTION void
-pm_node_visit(const pm_node_t *node, bool (*visitor)(const pm_node_t *node, void *data), void *data) {
- if (!visitor(node, data)) return;
-
+pm_dump_json(pm_buffer_t *buffer, const pm_parser_t *parser, const pm_node_t *node) {
switch (PM_NODE_TYPE(node)) {
<%- nodes.each do |node| -%>
- <%- if (fields = node.fields.select { |field| field.is_a?(Prism::NodeField) || field.is_a?(Prism::OptionalNodeField) || field.is_a?(Prism::NodeListField) }).any? -%>
case <%= node.type %>: {
+ pm_buffer_append_string(buffer, "{\"type\":\"<%= node.name %>\",\"location\":", <%= node.name.bytesize + 22 %>);
+
const pm_<%= node.human %>_t *cast = (const pm_<%= node.human %>_t *) node;
- <%- fields.each do |field| -%>
+ pm_dump_json_location(buffer, parser, &cast->base.location);
+ <%- node.fields.each_with_index do |field, index| -%>
- // Visit the <%= field.name %> field
+ // Dump the <%= field.name %> field
+ pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= field.name %>\":", <%= field.name.bytesize + 3 %>);
<%- case field -%>
<%- when Prism::NodeField -%>
- pm_node_visit((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
<%- when Prism::OptionalNodeField -%>
if (cast-><%= field.name %> != NULL) {
- pm_node_visit((const pm_node_t *) cast-><%= field.name %>, visitor, data);
+ pm_dump_json(buffer, parser, (const pm_node_t *) cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
}
<%- when Prism::NodeListField -%>
const pm_node_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
+ for (size_t index = 0; index < <%= field.name %>->size; index++) {
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json(buffer, parser, <%= field.name %>->nodes[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::StringField -%>
+ const pm_string_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '"');
+ pm_buffer_append_source(buffer, pm_string_source(<%= field.name %>), pm_string_length(<%= field.name %>), PM_BUFFER_ESCAPING_JSON);
+ pm_buffer_append_byte(buffer, '"');
+ <%- when Prism::ConstantField -%>
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ <%- when Prism::OptionalConstantField -%>
+ if (cast-><%= field.name %> != PM_CONSTANT_ID_UNSET) {
+ pm_dump_json_constant(buffer, parser, cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::ConstantListField -%>
+ const pm_constant_id_list_t *<%= field.name %> = &cast-><%= field.name %>;
+ pm_buffer_append_byte(buffer, '[');
+
for (size_t index = 0; index < <%= field.name %>->size; index++) {
- pm_node_visit(<%= field.name %>->nodes[index], visitor, data);
+ if (index != 0) pm_buffer_append_byte(buffer, ',');
+ pm_dump_json_constant(buffer, parser, <%= field.name %>->ids[index]);
+ }
+ pm_buffer_append_byte(buffer, ']');
+ <%- when Prism::LocationField -%>
+ pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+ <%- when Prism::OptionalLocationField -%>
+ if (cast-><%= field.name %>.start != NULL) {
+ pm_dump_json_location(buffer, parser, &cast-><%= field.name %>);
+ } else {
+ pm_buffer_append_string(buffer, "null", 4);
+ }
+ <%- when Prism::UInt8Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu8, cast-><%= field.name %>);
+ <%- when Prism::UInt32Field -%>
+ pm_buffer_append_format(buffer, "%" PRIu32, cast-><%= field.name %>);
+ <%- when Prism::FlagsField -%>
+ size_t flags = 0;
+ pm_buffer_append_byte(buffer, '[');
+ <%- found = flags.find { |flag| flag.name == field.kind }.tap { |found| raise "Expected to find #{field.kind}" unless found } -%>
+ <%- found.values.each_with_index do |value, index| -%>
+ if (PM_NODE_FLAG_P(cast, PM_<%= found.human.upcase %>_<%= value.name %>)) {
+ if (flags != 0) pm_buffer_append_byte(buffer, ',');
+ pm_buffer_append_string(buffer, "\"<%= value.name %>\"", <%= value.name.bytesize + 2 %>);
+ flags++;
}
<%- end -%>
+ pm_buffer_append_byte(buffer, ']');
+ <%- else -%>
+ <%- raise %>
+ <%- end -%>
<%- end -%>
+ pm_buffer_append_byte(buffer, '}');
break;
}
- <%- else -%>
- case <%= node.type %>:
- break;
- <%- end -%>
<%- end -%>
case PM_SCOPE_NODE:
break;
diff --git a/prism/templates/src/prettyprint.c.erb b/prism/templates/src/prettyprint.c.erb
index 2fcce5f689..7da92ffb77 100644
--- a/prism/templates/src/prettyprint.c.erb
+++ b/prism/templates/src/prettyprint.c.erb
@@ -1,41 +1,6 @@
<%# encoding: ASCII -%>
#include "prism/prettyprint.h"
-static void
-prettyprint_source(pm_buffer_t *output_buffer, const uint8_t *source, size_t length) {
- for (size_t index = 0; index < length; index++) {
- const uint8_t byte = source[index];
-
- if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
- pm_buffer_append_format(output_buffer, "\\x%02X", byte);
- } else {
- switch (byte) {
- case '\a': pm_buffer_append_string(output_buffer, "\\a", 2); break;
- case '\b': pm_buffer_append_string(output_buffer, "\\b", 2); break;
- case '\t': pm_buffer_append_string(output_buffer, "\\t", 2); break;
- case '\n': pm_buffer_append_string(output_buffer, "\\n", 2); break;
- case '\v': pm_buffer_append_string(output_buffer, "\\v", 2); break;
- case '\f': pm_buffer_append_string(output_buffer, "\\f", 2); break;
- case '\r': pm_buffer_append_string(output_buffer, "\\r", 2); break;
- case '"': pm_buffer_append_string(output_buffer, "\\\"", 2); break;
- case '#': {
- if (index + 1 < length) {
- const uint8_t next_byte = source[index + 1];
- if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
- pm_buffer_append_byte(output_buffer, '\\');
- }
- }
-
- pm_buffer_append_byte(output_buffer, '#');
- break;
- }
- case '\\': pm_buffer_append_string(output_buffer, "\\\\", 2); break;
- default: pm_buffer_append_byte(output_buffer, byte); break;
- }
- }
- }
-}
-
static inline void
prettyprint_location(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm_location_t *location) {
pm_line_column_t start = pm_newline_list_line_column(&parser->newline_list, location->start, parser->start_line);
@@ -93,7 +58,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
}
<%- when Prism::StringField -%>
pm_buffer_append_string(output_buffer, " \"", 2);
- prettyprint_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>));
+ pm_buffer_append_source(output_buffer, pm_string_source(&cast-><%= field.name %>), pm_string_length(&cast-><%= field.name %>), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
<%- when Prism::NodeListField -%>
pm_buffer_append_format(output_buffer, " (length: %lu)\n", (unsigned long) (cast-><%= field.name %>.size));
@@ -139,7 +104,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
+ pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
<%- when Prism::OptionalLocationField -%>
pm_location_t *location = &cast-><%= field.name %>;
@@ -149,7 +114,7 @@ prettyprint_node(pm_buffer_t *output_buffer, const pm_parser_t *parser, const pm
pm_buffer_append_byte(output_buffer, ' ');
prettyprint_location(output_buffer, parser, location);
pm_buffer_append_string(output_buffer, " = \"", 4);
- prettyprint_source(output_buffer, location->start, (size_t) (location->end - location->start));
+ pm_buffer_append_source(output_buffer, location->start, (size_t) (location->end - location->start), PM_BUFFER_ESCAPING_RUBY);
pm_buffer_append_string(output_buffer, "\"\n", 2);
}
<%- when Prism::UInt8Field -%>
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
index 129bed52cd..2cf7d9eea8 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/util/pm_buffer.c
@@ -161,6 +161,76 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
}
/**
+ * Append a slice of source code to the buffer.
+ */
+void
+pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping) {
+ for (size_t index = 0; index < length; index++) {
+ const uint8_t byte = source[index];
+
+ if ((byte <= 0x06) || (byte >= 0x0E && byte <= 0x1F) || (byte >= 0x7F)) {
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_format(buffer, "\\x%02X", byte);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ } else {
+ switch (byte) {
+ case '\a':
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_string(buffer, "\\a", 2);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ break;
+ case '\b':
+ pm_buffer_append_string(buffer, "\\b", 2);
+ break;
+ case '\t':
+ pm_buffer_append_string(buffer, "\\t", 2);
+ break;
+ case '\n':
+ pm_buffer_append_string(buffer, "\\n", 2);
+ break;
+ case '\v':
+ if (escaping == PM_BUFFER_ESCAPING_RUBY) {
+ pm_buffer_append_string(buffer, "\\v", 2);
+ } else {
+ pm_buffer_append_format(buffer, "\\u%04X", byte);
+ }
+ break;
+ case '\f':
+ pm_buffer_append_string(buffer, "\\f", 2);
+ break;
+ case '\r':
+ pm_buffer_append_string(buffer, "\\r", 2);
+ break;
+ case '"':
+ pm_buffer_append_string(buffer, "\\\"", 2);
+ break;
+ case '#': {
+ if (escaping == PM_BUFFER_ESCAPING_RUBY && index + 1 < length) {
+ const uint8_t next_byte = source[index + 1];
+ if (next_byte == '{' || next_byte == '@' || next_byte == '$') {
+ pm_buffer_append_byte(buffer, '\\');
+ }
+ }
+
+ pm_buffer_append_byte(buffer, '#');
+ break;
+ }
+ case '\\':
+ pm_buffer_append_string(buffer, "\\\\", 2);
+ break;
+ default:
+ pm_buffer_append_byte(buffer, byte);
+ break;
+ }
+ }
+ }
+}
+
+/**
* Prepend the given string to the buffer.
*/
void
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index 513633baa1..a798dd1d17 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -130,6 +130,25 @@ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
/**
+ * The different types of escaping that can be performed by the buffer when
+ * appending a slice of Ruby source code.
+ */
+typedef enum {
+ PM_BUFFER_ESCAPING_RUBY,
+ PM_BUFFER_ESCAPING_JSON
+} pm_buffer_escaping_t;
+
+/**
+ * Append a slice of source code to the buffer.
+ *
+ * @param buffer The buffer to append to.
+ * @param source The source code to append.
+ * @param length The length of the source code to append.
+ * @param escaping The type of escaping to perform.
+ */
+void pm_buffer_append_source(pm_buffer_t *buffer, const uint8_t *source, size_t length, pm_buffer_escaping_t escaping);
+
+/**
* Prepend the given string to the buffer.
*
* @param buffer The buffer to prepend to.