summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-11 10:12:00 -0400
committerKevin Newton <kddnewton@gmail.com>2023-10-13 15:31:30 -0400
commit973ecf684884d4eaea970dcad18ee468b39d87c4 (patch)
treeb105be66d05251e2b2d7b1091280079291647332
parentaf8484bc39bd5c4668fdafa528179b9a2fdebd23 (diff)
[ruby/prism] Extract out a couple more token buffer functions
https://github.com/ruby/prism/commit/341e027d23
-rw-r--r--prism/prism.c45
1 files changed, 33 insertions, 12 deletions
diff --git a/prism/prism.c b/prism/prism.c
index c99be9b3bc..942057bba6 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -6662,6 +6662,21 @@ typedef struct {
const uint8_t *cursor;
} pm_token_buffer_t;
+// Push the given byte into the token buffer.
+static inline void
+pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
+ pm_buffer_append_u8(&token_buffer->buffer, byte);
+}
+
+// When we're about to return from lexing the current token and we know for sure
+// that we have found an escape sequence, this function is called to copy the
+// contents of the token buffer into the current string on the parser so that it
+// can be attached to the correct node.
+static inline void
+pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+ pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
+}
+
// When we're about to return from lexing the current token, we need to flush
// all of the content that we have pushed into the buffer into the current
// string. If we haven't pushed anything into the buffer, this means that we
@@ -6675,21 +6690,28 @@ pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
} else {
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
- pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
+ pm_token_buffer_copy(parser, token_buffer);
}
}
// When we've found an escape sequence, we need to copy everything up to this
// point into the buffer because we're about to provide a string that has
// different content than a direct slice of the source.
+//
+// It is expected that the parser's current token end will be pointing at one
+// byte past the backslash that starts the escape sequence.
static void
pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+ const uint8_t *start;
if (token_buffer->cursor == NULL) {
pm_buffer_init_capacity(&token_buffer->buffer, 16);
- pm_buffer_append_bytes(&token_buffer->buffer, parser->current.start, (size_t) (parser->current.end - parser->current.start));
+ start = parser->current.start;
} else {
- pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
+ start = token_buffer->cursor;
}
+
+ const uint8_t *end = parser->current.end - 1;
+ pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
}
// This is a convenience macro that will set the current token type, call the
@@ -7949,9 +7971,8 @@ parser_lex(pm_parser_t *parser) {
// literally. In this case we'll skip past the next character
// and find the next breakpoint.
if (*breakpoint == '\\') {
- parser->current.end = breakpoint;
+ parser->current.end = breakpoint + 1;
pm_token_buffer_escape(parser, &token_buffer);
- parser->current.end++;
// If we've hit the end of the file, then break out of the
// loop by setting the breakpoint to NULL.
@@ -7967,25 +7988,25 @@ parser_lex(pm_parser_t *parser) {
case '\t':
case '\v':
case '\\':
- pm_buffer_append_u8(&token_buffer.buffer, peeked);
+ pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++;
break;
case '\r':
parser->current.end++;
if (peek(parser) != '\n') {
- pm_buffer_append_u8(&token_buffer.buffer, '\r');
+ pm_token_buffer_push(&token_buffer, '\r');
break;
}
/* fallthrough */
case '\n':
- pm_buffer_append_u8(&token_buffer.buffer, '\n');
+ pm_token_buffer_push(&token_buffer, '\n');
if (parser->heredoc_end) {
// ... if we are on the same line as a heredoc,
// flush the heredoc and continue parsing after
// heredoc_end.
parser_flush_heredoc_end(parser);
- pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer.buffer.value, token_buffer.buffer.length);
+ pm_token_buffer_copy(parser, &token_buffer);
LEX(PM_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
@@ -7996,13 +8017,13 @@ parser_lex(pm_parser_t *parser) {
break;
default:
if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
- pm_buffer_append_u8(&token_buffer.buffer, peeked);
+ pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.list.interpolation) {
escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
} else {
- pm_buffer_append_u8(&token_buffer.buffer, '\\');
- pm_buffer_append_u8(&token_buffer.buffer, peeked);
+ pm_token_buffer_push(&token_buffer, '\\');
+ pm_token_buffer_push(&token_buffer, peeked);
parser->current.end++;
}