diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-08-24 20:40:37 -0400 |
|---|---|---|
| committer | git <svn-admin@ruby-lang.org> | 2023-08-25 12:38:35 +0000 |
| commit | 90103f5d18df54f21af235e7e034db0dc473cfd0 (patch) | |
| tree | 89e1a99ecd14909d4460650b2b92713b3669318b | |
| parent | fe8f6dfed15333618f79ecf58944214f08c4f020 (diff) | |
[ruby/yarp] Add the ability to serialize shared strings
https://github.com/ruby/yarp/commit/8d18c7ae29
| -rw-r--r-- | yarp/templates/java/org/yarp/Loader.java.erb | 21 | ||||
| -rw-r--r-- | yarp/templates/lib/yarp/serialize.rb.erb | 21 | ||||
| -rw-r--r-- | yarp/templates/src/serialize.c.erb | 81 |
3 files changed, 82 insertions, 41 deletions
diff --git a/yarp/templates/java/org/yarp/Loader.java.erb b/yarp/templates/java/org/yarp/Loader.java.erb index 454f5f3075..312e232182 100644 --- a/yarp/templates/java/org/yarp/Loader.java.erb +++ b/yarp/templates/java/org/yarp/Loader.java.erb @@ -95,13 +95,28 @@ public class Loader { return new ParseResult(node, comments, errors, warnings); } - private byte[] loadString() { + private byte[] loadEmbeddedString() { int length = loadVarInt(); byte[] string = new byte[length]; buffer.get(string); return string; } + private byte[] loadString() { + switch (buffer.get()) { + case 1: + int start = loadVarInt(); + int length = loadVarInt(); + byte[] string = new byte[length]; + System.arraycopy(source.bytes, start, string, 0, length); + return string; + case 2: + return loadEmbeddedString(); + default: + throw new Error("Expected 0 or 1 but was " + buffer.get()); + } + } + private ParseResult.Comment[] loadComments() { int count = loadVarInt(); ParseResult.Comment[] comments = new ParseResult.Comment[count]; @@ -123,7 +138,7 @@ public class Loader { // error messages only contain ASCII characters for (int i = 0; i < count; i++) { - byte[] bytes = loadString(); + byte[] bytes = loadEmbeddedString(); String message = new String(bytes, StandardCharsets.US_ASCII); Nodes.Location location = loadLocation(); @@ -140,7 +155,7 @@ public class Loader { // warning messages only contain ASCII characters for (int i = 0; i < count; i++) { - byte[] bytes = loadString(); + byte[] bytes = loadEmbeddedString(); String message = new String(bytes, StandardCharsets.US_ASCII); Nodes.Location location = loadLocation(); diff --git a/yarp/templates/lib/yarp/serialize.rb.erb b/yarp/templates/lib/yarp/serialize.rb.erb index 65cd52e238..8ee072c0b1 100644 --- a/yarp/templates/lib/yarp/serialize.rb.erb +++ b/yarp/templates/lib/yarp/serialize.rb.erb @@ -54,8 +54,8 @@ module YARP end comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) } - errors = load_varint.times.map { ParseError.new(load_string, load_location) } - warnings = load_varint.times.map { ParseWarning.new(load_string, load_location) } + errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) } + warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) } raise "Expected to consume all bytes while deserializing" unless @io.eof? @@ -70,8 +70,8 @@ module YARP @input = input.force_encoding(@encoding).freeze comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(io.getbyte), load_location) } - errors = load_varint.times.map { ParseError.new(load_string, load_location) } - warnings = load_varint.times.map { ParseWarning.new(load_string, load_location) } + errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) } + warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) } @constant_pool_offset = io.read(4).unpack1("L") @constant_pool = Array.new(load_varint, nil) @@ -110,10 +110,21 @@ module YARP end end - def load_string + def load_embedded_string io.read(load_varint).force_encoding(encoding) end + def load_string + case io.getbyte + when 1 + input.byteslice(load_varint, load_varint).force_encoding(encoding) + when 2 + load_embedded_string + else + raise + end + end + def load_location Location.new(source, load_varint, load_varint) end diff --git a/yarp/templates/src/serialize.c.erb b/yarp/templates/src/serialize.c.erb index 497e287b35..9b49540566 100644 --- a/yarp/templates/src/serialize.c.erb +++ b/yarp/templates/src/serialize.c.erb @@ -15,7 +15,7 @@ yp_sizet_to_u32(size_t value) { } static void -serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *buffer) { +yp_serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *buffer) { assert(location->start); assert(location->end); assert(location->start <= location->end); @@ -24,13 +24,36 @@ serialize_location(yp_parser_t *parser, yp_location_t *location, yp_buffer_t *bu yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(location->end - location->start)); } +static void +yp_serialize_string(yp_parser_t *parser, yp_string_t *string, yp_buffer_t *buffer) { + switch (string->type) { + case YP_STRING_SHARED: { + yp_buffer_append_u8(buffer, 1); + yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(yp_string_source(string) - parser->start)); + yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_string_length(string))); + break; + } + case YP_STRING_OWNED: + case YP_STRING_CONSTANT: { + uint32_t length = yp_sizet_to_u32(yp_string_length(string)); + yp_buffer_append_u8(buffer, 2); + yp_buffer_append_u32(buffer, length); + yp_buffer_append_str(buffer, yp_string_source(string), length); + break; + } + case YP_STRING_MAPPED: + assert(false && "Cannot serialize mapped strings."); + break; + } +} + void yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { yp_buffer_append_u8(buffer, (uint8_t) YP_NODE_TYPE(node)); size_t offset = buffer->length; - serialize_location(parser, &node->location, buffer); + yp_serialize_location(parser, &node->location, buffer); switch (YP_NODE_TYPE(node)) { // We do not need to serialize a ScopeNode ever as @@ -56,9 +79,7 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { yp_serialize_node(parser, (yp_node_t *)((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); } <%- when StringParam -%> - uint32_t <%= param.name %>_length = yp_sizet_to_u32(yp_string_length(&((yp_<%= node.human %>_t *)node)-><%= param.name %>)); - yp_buffer_append_u32(buffer, <%= param.name %>_length); - yp_buffer_append_str(buffer, yp_string_source(&((yp_<%= node.human %>_t *)node)-><%= param.name %>), <%= param.name %>_length); + yp_serialize_string(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); <%- when NodeListParam -%> uint32_t <%= param.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.size); yp_buffer_append_u32(buffer, <%= param.name %>_size); @@ -69,7 +90,7 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { uint32_t <%= param.name %>_size = yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.size); yp_buffer_append_u32(buffer, <%= param.name %>_size); for (uint32_t index = 0; index < <%= param.name %>_size; index++) { - serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>.locations[index], buffer); + yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>.locations[index], buffer); } <%- when ConstantParam -%> yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>)); @@ -80,13 +101,13 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= param.name %>.ids[index])); } <%- when LocationParam -%> - serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); + yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); <%- when OptionalLocationParam -%> if (((yp_<%= node.human %>_t *)node)-><%= param.name %>.start == NULL) { yp_buffer_append_u8(buffer, 0); } else { yp_buffer_append_u8(buffer, 1); - serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); + yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= param.name %>, buffer); } <%- when UInt32Param -%> yp_buffer_append_u32(buffer, ((yp_<%= node.human %>_t *)node)-><%= param.name %>); @@ -107,7 +128,8 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { } } -void yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_t *buffer) { +static void +yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_t *buffer) { // serialize type yp_buffer_append_u8(buffer, (uint8_t) comment->type); @@ -116,16 +138,18 @@ void yp_serialize_comment(yp_parser_t *parser, yp_comment_t *comment, yp_buffer_ yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(comment->end - comment->start)); } -void yp_serialize_comment_list(yp_parser_t *parser, yp_list_t list, yp_buffer_t *buffer) { - yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(&list))); +static void +yp_serialize_comment_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) { + yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list))); yp_comment_t *comment; - for (comment = (yp_comment_t *) list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) { + for (comment = (yp_comment_t *) list->head; comment != NULL; comment = (yp_comment_t *) comment->node.next) { yp_serialize_comment(parser, comment, buffer); } } -void yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, yp_buffer_t *buffer) { +static void +yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, yp_buffer_t *buffer) { // serialize message size_t message_length = strlen(diagnostic->message); yp_buffer_append_u32(buffer, yp_sizet_to_u32(message_length)); @@ -136,11 +160,12 @@ void yp_serialize_diagnostic(yp_parser_t *parser, yp_diagnostic_t *diagnostic, y yp_buffer_append_u32(buffer, yp_ptrdifft_to_u32(diagnostic->end - diagnostic->start)); } -void yp_serialize_diagnostic_list(yp_parser_t *parser, yp_list_t list, yp_buffer_t *buffer) { - yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(&list))); +static void +yp_serialize_diagnostic_list(yp_parser_t *parser, yp_list_t *list, yp_buffer_t *buffer) { + yp_buffer_append_u32(buffer, yp_sizet_to_u32(yp_list_size(list))); yp_diagnostic_t *diagnostic; - for (diagnostic = (yp_diagnostic_t *) list.head; diagnostic != NULL; diagnostic = (yp_diagnostic_t *) diagnostic->node.next) { + for (diagnostic = (yp_diagnostic_t *) list->head; diagnostic != NULL; diagnostic = (yp_diagnostic_t *) diagnostic->node.next) { yp_serialize_diagnostic(parser, diagnostic, buffer); } } @@ -153,14 +178,9 @@ yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) yp_buffer_append_u32(buffer, yp_sizet_to_u32(encoding_length)); yp_buffer_append_str(buffer, parser->encoding.name, encoding_length); - // Serialize the comments - yp_serialize_comment_list(parser, parser->comment_list, buffer); - - // Serialize the errors - yp_serialize_diagnostic_list(parser, parser->error_list, buffer); - - // Serialize the warnings - yp_serialize_diagnostic_list(parser, parser->warning_list, buffer); + yp_serialize_comment_list(parser, &parser->comment_list, buffer); + yp_serialize_diagnostic_list(parser, &parser->error_list, buffer); + yp_serialize_diagnostic_list(parser, &parser->warning_list, buffer); // Here we're going to leave space for the offset of the constant pool in // the buffer. @@ -224,16 +244,11 @@ yp_lex_serialize(const char *source, size_t size, const char *filepath, yp_buffe yp_node_t *node = yp_parse(&parser); // Append 0 to mark end of tokens - yp_buffer_append_u32(buffer, 0); - - // Serialize the comments - yp_serialize_comment_list(&parser, parser.comment_list, buffer); - - // Serialize the errors - yp_serialize_diagnostic_list(&parser, parser.error_list, buffer); + yp_buffer_append_u8(buffer, 0); - // Serialize the warnings - yp_serialize_diagnostic_list(&parser, parser.warning_list, buffer); + yp_serialize_comment_list(&parser, &parser.comment_list, buffer); + yp_serialize_diagnostic_list(&parser, &parser.error_list, buffer); + yp_serialize_diagnostic_list(&parser, &parser.warning_list, buffer); yp_node_destroy(&parser, node); yp_parser_free(&parser); |
