summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-08-24 20:40:37 -0400
committergit <svn-admin@ruby-lang.org>2023-08-25 12:38:35 +0000
commit90103f5d18df54f21af235e7e034db0dc473cfd0 (patch)
tree89e1a99ecd14909d4460650b2b92713b3669318b
parentfe8f6dfed15333618f79ecf58944214f08c4f020 (diff)
[ruby/yarp] Add the ability to serialize shared strings
https://github.com/ruby/yarp/commit/8d18c7ae29
-rw-r--r--yarp/templates/java/org/yarp/Loader.java.erb21
-rw-r--r--yarp/templates/lib/yarp/serialize.rb.erb21
-rw-r--r--yarp/templates/src/serialize.c.erb81
3 files changed, 82 insertions, 41 deletions
diff --git a/yarp/templates/java/org/yarp/Loader.java.erb b/yarp/templates/java/org/yarp/Loader.java.erb
index 454f5f3075..312e232182 100644
--- a/yarp/templates/java/org/yarp/Loader.java.erb
+++ b/yarp/templates/java/org/yarp/Loader.java.erb
@@ -95,13 +95,28 @@ public class Loader {
return new ParseResult(node, comments, errors, warnings);
}
- private byte[] loadString() {
+ private byte[] loadEmbeddedString() {
int length = loadVarInt();
byte[] string = new byte[length];
buffer.get(string);
return string;
}
+ private byte[] loadString() {
+ switch (buffer.get()) {
+ case 1:
+ int start = loadVarInt();
+ int length = loadVarInt();
+ byte[] string = new byte[length];
+ System.arraycopy(source.bytes, start, string, 0, length);
+ return string;
+ case 2:
+ return loadEmbeddedString();
+ default:
+ throw new Error("Expected 0 or 1 but was " + buffer.get());
+ }
+ }
+
private ParseResult.Comment[] loadComments() {
int count = loadVarInt();
ParseResult.Comment[] comments = new ParseResult.Comment[count];
@@ -123,7 +138,7 @@ public class Loader {
// error messages only contain ASCII characters
for (int i = 0; i < count; i++) {
- byte[] bytes = loadString();
+ byte[] bytes = loadEmbeddedString();
String message = new String(bytes, StandardCharsets.US_ASCII);
Nodes.Location location = loadLocation();
@@ -140,7 +155,7 @@ public class Loader {
// warning messages only contain ASCII characters
for (int i = 0; i < count; i++) {
- byte[] bytes = loadString();
+ byte[] bytes = loadEmbeddedString();
String message = new String(bytes, StandardCharsets.US_ASCII);
Nodes.Location location = loadLocation();
diff --git a/yarp/templates/lib/yarp/serialize.rb.erb b/yarp/templates/lib/yarp/serialize.rb.erb
index 65cd52e238..8ee072c0b1 100644
--- a/yarp/templates/lib/yarp/serialize.rb.erb
+++ b/yarp/templates/lib/yarp/serialize.rb.erb
@@ -54,8 +54,8 @@ module YARP
end
comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
- errors = load_varint.times.map { ParseError.new(load_string, load_location) }
- warnings = load_varint.times.map { ParseWarning.new(load_string, load_location) }
+ errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
+ warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
@@ -70,8 +70,8 @@ module YARP
@input = input.force_encoding(@encoding).freeze
comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(io.getbyte), load_location) }
- errors = load_varint.times.map { ParseError.new(load_string, load_location) }
- warnings = load_varint.times.map { ParseWarning.new(load_string, load_location) }
+ errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
+ warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
@@ -110,10 +110,21 @@ module YARP
end
end
- def load_string
+ def load_embedded_string
io.read(load_varint).force_encoding(encoding)
end
+ def load_string
+ case io.getbyte
+ when 1
+ input.byteslice(load_varint, load_varint).force_encoding(encoding)
+ when 2
+ load_embedded_string
+ else
+ raise
+ end
+ end
+
def load_location
Location.new(source, load_varint, load_varint)
end
diff --git a/yarp/templates/src/serialize.c.erb b/yarp/templates/src/serialize.c.erb
index 497e287b35..9b49540566 100644
--- a/yarp/templates/src/serialize.c.erb
+++ b/yarp/templates/src/serialize.c.erb
@@ -15,7 +15,7 @@ yp_sizet_to_u32(size_t value) {
}
static void
-serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *buffer) {
+yp_serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *buffer) {
assert(location->start);
assert(location->end);
assert(location->start <= location->end);
@@ -24,13 +24,36 @@ serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *bu
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(location->end - location->start));
}
+static void
+yp_serialize_string(yp_parser_t *parser, yp_string_t *string, yp_buffer_t *buffer) {
+ switch (string->type) {
+ case YP_STRING_SHARED: {
+ yp_buffer_append_u8(buffer, 1);
+ yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(yp_string_source(string) - parser->start));
+ yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_string_length(string)));
+ break;
+ }
+ case YP_STRING_OWNED:
+ case YP_STRING_CONSTANT: {
+ uint32_t length = yp_sizet_to_u32(yp_string_length(string));
+ yp_buffer_append_u8(buffer, 2);
+ yp_buffer_append_u32(buffer, length);
+ yp_buffer_append_str(buffer, yp_string_source(string), length);
+ break;
+ }
+ case YP_STRING_MAPPED:
+ assert(false && "Cannot serialize mapped strings.");
+ break;
+ }
+}
+
void
yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_u8(buffer, (uint8_t) YP_NODE_TYPE(node));
size_t offset = buffer->length;
- serialize_location(parser, &node->location, buffer);
+ yp_serialize_location(parser, &node->location, buffer);
switch (YP_NODE_TYPE(node)) {
// We do not need to serialize a ScopeNode ever as
@@ -56,9 +79,7 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_serialize_node(parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
}
<%- when StringParam -%>
- uint32_t <%= param.name %>_length = yp_sizet_to_u32(yp_string_length(&((yp_<%= node.human %>_t *)node)-><%= param.name %>));
- yp_buffer_append_u32(buffer, <%= param.name %>_length);
- yp_buffer_append_str(buffer, yp_string_source(&((yp_<%= node.human %>_t *)node)-><%= param.name %>), <%= param.name %>_length);
+ yp_serialize_string(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
<%- when NodeListParam -%>
uint32_t <%= param.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.size);
yp_buffer_append_u32(buffer, <%= param.name %>_size);
@@ -69,7 +90,7 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
uint32_t <%= param.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.size);
yp_buffer_append_u32(buffer, <%= param.name %>_size);
for (uint32_t index = 0; index < <%= param.name %>_size; index++) {
- serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>.locations[index], buffer);
+ yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>.locations[index], buffer);
}
<%- when ConstantParam -%>
yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>));
@@ -80,13 +101,13 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.ids[index]));
}
<%- when LocationParam -%>
- serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
+ yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
<%- when OptionalLocationParam -%>
if (((yp_<%= node.human %>_t *)node)-><%= param.name %>.start == NULL) {
yp_buffer_append_u8(buffer, 0);
} else {
yp_buffer_append_u8(buffer, 1);
- serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
+ yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer);
}
<%- when UInt32Param -%>
yp_buffer_append_u32(buffer, ((yp_<%= node.human %>_t *)node)-><%= param.name %>);
@@ -107,7 +128,8 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
}
}
-void yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_t *buffer) {
+static void
+yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_t *buffer) {
// serialize type
yp_buffer_append_u8(buffer, (uint8_t) comment->type);
@@ -116,16 +138,18 @@ void yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(comment->end - comment->start));
}
-void yp_serialize_comment_list(yp_parser_t *parser, yp_list_t list, yp_buffer_t *buffer) {
- yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(&list)));
+static void
+yp_serialize_comment_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) {
+ yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list)));
yp_comment_t *comment;
- for (comment = (yp_comment_t *) list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
+ for (comment = (yp_comment_t *) list->head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
yp_serialize_comment(parser, comment, buffer);
}
}
-void yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, yp_buffer_t *buffer) {
+static void
+yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, yp_buffer_t *buffer) {
// serialize message
size_t message_length = strlen(diagnostic->message);
yp_buffer_append_u32(buffer, yp_sizet_to_u32(message_length));
@@ -136,11 +160,12 @@ void yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, y
yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(diagnostic->end - diagnostic->start));
}
-void yp_serialize_diagnostic_list(yp_parser_t *parser, yp_list_t list, yp_buffer_t *buffer) {
- yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(&list)));
+static void
+yp_serialize_diagnostic_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) {
+ yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list)));
yp_diagnostic_t *diagnostic;
- for (diagnostic = (yp_diagnostic_t *) list.head; diagnostic != NULL; diagnostic = (yp_diagnostic_t *) diagnostic->node.next) {
+ for (diagnostic = (yp_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (yp_diagnostic_t *) diagnostic->node.next) {
yp_serialize_diagnostic(parser, diagnostic, buffer);
}
}
@@ -153,14 +178,9 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer)
yp_buffer_append_u32(buffer, yp_sizet_to_u32(encoding_length));
yp_buffer_append_str(buffer, parser->encoding.name, encoding_length);
- // Serialize the comments
- yp_serialize_comment_list(parser, parser->comment_list, buffer);
-
- // Serialize the errors
- yp_serialize_diagnostic_list(parser, parser->error_list, buffer);
-
- // Serialize the warnings
- yp_serialize_diagnostic_list(parser, parser->warning_list, buffer);
+ yp_serialize_comment_list(parser, &parser->comment_list, buffer);
+ yp_serialize_diagnostic_list(parser, &parser->error_list, buffer);
+ yp_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
// Here we're going to leave space for the offset of the constant pool in
// the buffer.
@@ -224,16 +244,11 @@ yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffe
yp_node_t *node = yp_parse(&parser);
// Append 0 to mark end of tokens
- yp_buffer_append_u32(buffer, 0);
-
- // Serialize the comments
- yp_serialize_comment_list(&parser, parser.comment_list, buffer);
-
- // Serialize the errors
- yp_serialize_diagnostic_list(&parser, parser.error_list, buffer);
+ yp_buffer_append_u8(buffer, 0);
- // Serialize the warnings
- yp_serialize_diagnostic_list(&parser, parser.warning_list, buffer);
+ yp_serialize_comment_list(&parser, &parser.comment_list, buffer);
+ yp_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
+ yp_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);