[PRISM] Move error formatting into Ruby

author: Kevin Newton <kddnewton@gmail.com> 2024-05-24 12:12:56 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2024-05-24 12:58:44 -0400
commit: ba336027beea3b76798a053cb3e7fd06ae1e4741 (patch)
tree: b14b7f8c1b6af845b3926fb6692587e861cffc73
parent: 47f0965269dae95c678b4276861cc389d277ce33 (diff)
1 files changed, 363 insertions, 1 deletions
diff --git a/prism_compile.c b/prism_compile.c
index da7865fc8c..dfe34c24e1 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -9159,6 +9159,368 @@ pm_parse_result_free(pm_parse_result_t *result)
     pm_options_free(&result->options);
 }
 
+/** An error that is going to be formatted into the output. */
+typedef struct {
+    /** A pointer to the diagnostic that was generated during parsing. */
+    pm_diagnostic_t *error;
+
+    /** The start line of the diagnostic message. */
+    int32_t line;
+
+    /** The column start of the diagnostic message. */
+    uint32_t column_start;
+
+    /** The column end of the diagnostic message. */
+    uint32_t column_end;
+} pm_parse_error_t;
+
+/** The format that will be used to format the errors into the output. */
+typedef struct {
+    /** The prefix that will be used for line numbers. */
+    const char *number_prefix;
+
+    /** The prefix that will be used for blank lines. */
+    const char *blank_prefix;
+
+    /** The divider that will be used between sections of source code. */
+    const char *divider;
+
+    /** The length of the blank prefix. */
+    size_t blank_prefix_length;
+
+    /** The length of the divider. */
+    size_t divider_length;
+} pm_parse_error_format_t;
+
+#define PM_COLOR_GRAY "\033[38;5;102m"
+#define PM_COLOR_RED "\033[1;31m"
+#define PM_COLOR_RESET "\033[m"
+#define PM_ERROR_TRUNCATE 30
+
+static inline pm_parse_error_t *
+pm_parse_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
+    pm_parse_error_t *errors = xcalloc(error_list->size, sizeof(pm_parse_error_t));
+    if (errors == NULL) return NULL;
+
+    int32_t start_line = parser->start_line;
+    for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+        pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
+        pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
+
+        // We're going to insert this error into the array in sorted order. We
+        // do this by finding the first error that has a line number greater
+        // than the current error and then inserting the current error before
+        // that one.
+        size_t index = 0;
+        while (
+            (index < error_list->size) &&
+            (errors[index].error != NULL) &&
+            (
+                (errors[index].line < start.line) ||
+                ((errors[index].line == start.line) && (errors[index].column_start < start.column))
+            )
+        ) index++;
+
+        // Now we're going to shift all of the errors after this one down one
+        // index to make room for the new error.
+        if (index + 1 < error_list->size) {
+            memmove(&errors[index + 1], &errors[index], sizeof(pm_parse_error_t) * (error_list->size - index - 1));
+        }
+
+        // Finally, we'll insert the error into the array.
+        uint32_t column_end;
+        if (start.line == end.line) {
+            column_end = end.column;
+        } else {
+            column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
+        }
+
+        // Ensure we have at least one column of error.
+        if (start.column == column_end) column_end++;
+
+        errors[index] = (pm_parse_error_t) {
+            .error = error,
+            .line = start.line,
+            .column_start = start.column,
+            .column_end = column_end
+        };
+    }
+
+    return errors;
+}
+
+static inline void
+pm_parse_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, uint32_t column_start, uint32_t column_end, pm_buffer_t *buffer) {
+    int32_t line_delta = line - parser->start_line;
+    assert(line_delta >= 0);
+
+    size_t index = (size_t) line_delta;
+    assert(index < newline_list->size);
+
+    const uint8_t *start = &parser->start[newline_list->offsets[index]];
+    const uint8_t *end;
+
+    if (index >= newline_list->size - 1) {
+        end = parser->end;
+    } else {
+        end = &parser->start[newline_list->offsets[index + 1]];
+    }
+
+    pm_buffer_append_format(buffer, number_prefix, line);
+
+    // Here we determine if we should truncate the end of the line.
+    bool truncate_end = false;
+    if ((column_end != 0) && ((end - (start + column_end)) >= PM_ERROR_TRUNCATE)) {
+        end = start + column_end + PM_ERROR_TRUNCATE;
+        truncate_end = true;
+    }
+
+    // Here we determine if we should truncate the start of the line.
+    if (column_start >= PM_ERROR_TRUNCATE) {
+        pm_buffer_append_string(buffer, "... ", 4);
+        start += column_start;
+    }
+
+    pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
+
+    if (truncate_end) {
+        pm_buffer_append_string(buffer, " ...\n", 5);
+    } else if (end == parser->end && end[-1] != '\n') {
+        pm_buffer_append_string(buffer, "\n", 1);
+    }
+}
+
+/**
+ * Format the errors on the parser into the given buffer.
+ */
+static void
+pm_parse_errors_format(const pm_parser_t *parser, const pm_list_t *error_list, pm_buffer_t *buffer, bool colorize, bool inline_messages) {
+    assert(error_list->size != 0);
+
+    // First, we're going to sort all of the errors by line number using an
+    // insertion sort into a newly allocated array.
+    const int32_t start_line = parser->start_line;
+    const pm_newline_list_t *newline_list = &parser->newline_list;
+
+    pm_parse_error_t *errors = pm_parse_errors_format_sort(parser, error_list, newline_list);
+    if (errors == NULL) return;
+
+    // Now we're going to determine how we're going to format line numbers and
+    // blank lines based on the maximum number of digits in the line numbers
+    // that are going to be displaid.
+    pm_parse_error_format_t error_format;
+    int32_t first_line_number = errors[0].line;
+    int32_t last_line_number = errors[error_list->size - 1].line;
+
+    // If we have a maximum line number that is negative, then we're going to
+    // use the absolute value for comparison but multiple by 10 to additionally
+    // have a column for the negative sign.
+    if (first_line_number < 0) first_line_number = (-first_line_number) * 10;
+    if (last_line_number < 0) last_line_number = (-last_line_number) * 10;
+    int32_t max_line_number = first_line_number > last_line_number ? first_line_number : last_line_number;
+
+    if (max_line_number < 10) {
+        if (colorize) {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
+                .blank_prefix = PM_COLOR_GRAY "  | " PM_COLOR_RESET,
+                .divider = PM_COLOR_GRAY "  ~~~~~" PM_COLOR_RESET "\n"
+            };
+        } else {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = "%1" PRIi32 " | ",
+                .blank_prefix = "  | ",
+                .divider = "  ~~~~~\n"
+            };
+        }
+    } else if (max_line_number < 100) {
+        if (colorize) {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
+                .blank_prefix = PM_COLOR_GRAY "   | " PM_COLOR_RESET,
+                .divider = PM_COLOR_GRAY "  ~~~~~~" PM_COLOR_RESET "\n"
+            };
+        } else {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = "%2" PRIi32 " | ",
+                .blank_prefix = "   | ",
+                .divider = "  ~~~~~~\n"
+            };
+        }
+    } else if (max_line_number < 1000) {
+        if (colorize) {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
+                .blank_prefix = PM_COLOR_GRAY "    | " PM_COLOR_RESET,
+                .divider = PM_COLOR_GRAY "  ~~~~~~~" PM_COLOR_RESET "\n"
+            };
+        } else {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = "%3" PRIi32 " | ",
+                .blank_prefix = "    | ",
+                .divider = "  ~~~~~~~\n"
+            };
+        }
+    } else if (max_line_number < 10000) {
+        if (colorize) {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
+                .blank_prefix = PM_COLOR_GRAY "     | " PM_COLOR_RESET,
+                .divider = PM_COLOR_GRAY "  ~~~~~~~~" PM_COLOR_RESET "\n"
+            };
+        } else {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = "%4" PRIi32 " | ",
+                .blank_prefix = "     | ",
+                .divider = "  ~~~~~~~~\n"
+            };
+        }
+    } else {
+        if (colorize) {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
+                .blank_prefix = PM_COLOR_GRAY "      | " PM_COLOR_RESET,
+                .divider = PM_COLOR_GRAY "  ~~~~~~~~" PM_COLOR_RESET "\n"
+            };
+        } else {
+            error_format = (pm_parse_error_format_t) {
+                .number_prefix = "%5" PRIi32 " | ",
+                .blank_prefix = "      | ",
+                .divider = "  ~~~~~~~~\n"
+            };
+        }
+    }
+
+    error_format.blank_prefix_length = strlen(error_format.blank_prefix);
+    error_format.divider_length = strlen(error_format.divider);
+
+    // Now we're going to iterate through every error in our error list and
+    // display it. While we're iterating, we will display some padding lines of
+    // the source before the error to give some context. We'll be careful not to
+    // display the same line twice in case the errors are close enough in the
+    // source.
+    int32_t last_line = parser->start_line - 1;
+    uint32_t last_column_start = 0;
+    const pm_encoding_t *encoding = parser->encoding;
+
+    for (size_t index = 0; index < error_list->size; index++) {
+        pm_parse_error_t *error = &errors[index];
+
+        // Here we determine how many lines of padding of the source to display,
+        // based on the difference from the last line that was displaid.
+        if (error->line - last_line > 1) {
+            if (error->line - last_line > 2) {
+                if ((index != 0) && (error->line - last_line > 3)) {
+                    pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
+                }
+
+                pm_buffer_append_string(buffer, "  ", 2);
+                pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, 0, 0, buffer);
+            }
+
+            pm_buffer_append_string(buffer, "  ", 2);
+            pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, 0, 0, buffer);
+        }
+
+        // If this is the first error or we're on a new line, then we'll display
+        // the line that has the error in it.
+        if ((index == 0) || (error->line != last_line)) {
+            if (colorize) {
+                pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 12);
+            } else {
+                pm_buffer_append_string(buffer, "> ", 2);
+            }
+
+            last_column_start = error->column_start;
+
+            // Find the maximum column end of all the errors on this line.
+            uint32_t column_end = error->column_end;
+            for (size_t next_index = index + 1; next_index < error_list->size; next_index++) {
+                if (errors[next_index].line != error->line) break;
+                if (errors[next_index].column_end > column_end) column_end = errors[next_index].column_end;
+            }
+
+            pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, error->column_start, column_end, buffer);
+        }
+
+        const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
+        if (start == parser->end) pm_buffer_append_byte(buffer, '\n');
+
+        // Now we'll display the actual error message. We'll do this by first
+        // putting the prefix to the line, then a bunch of blank spaces
+        // depending on the column, then as many carets as we need to display
+        // the width of the error, then the error message itself.
+        //
+        // Note that this doesn't take into account the width of the actual
+        // character when displaid in the terminal. For some east-asian
+        // languages or emoji, this means it can be thrown off pretty badly. We
+        // will need to solve this eventually.
+        pm_buffer_append_string(buffer, "  ", 2);
+        pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
+
+        size_t column = 0;
+        if (last_column_start >= PM_ERROR_TRUNCATE) {
+            pm_buffer_append_string(buffer, "    ", 4);
+            column = last_column_start;
+        }
+
+        while (column < error->column_start) {
+            pm_buffer_append_byte(buffer, ' ');
+
+            size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
+            column += (char_width == 0 ? 1 : char_width);
+        }
+
+        if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RED, 7);
+        pm_buffer_append_byte(buffer, '^');
+
+        size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
+        column += (char_width == 0 ? 1 : char_width);
+
+        while (column < error->column_end) {
+            pm_buffer_append_byte(buffer, '~');
+
+            size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
+            column += (char_width == 0 ? 1 : char_width);
+        }
+
+        if (colorize) pm_buffer_append_string(buffer, PM_COLOR_RESET, 3);
+
+        if (inline_messages) {
+            pm_buffer_append_byte(buffer, ' ');
+            assert(error->error != NULL);
+
+            const char *message = error->error->message;
+            pm_buffer_append_string(buffer, message, strlen(message));
+        }
+
+        pm_buffer_append_byte(buffer, '\n');
+
+        // Here we determine how many lines of padding to display after the
+        // error, depending on where the next error is in source.
+        last_line = error->line;
+        int32_t next_line = (index == error_list->size - 1) ? (((int32_t) newline_list->size) + parser->start_line) : errors[index + 1].line;
+
+        if (next_line - last_line > 1) {
+            pm_buffer_append_string(buffer, "  ", 2);
+            pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer);
+        }
+
+        if (next_line - last_line > 1) {
+            pm_buffer_append_string(buffer, "  ", 2);
+            pm_parse_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, 0, 0, buffer);
+        }
+    }
+
+    // Finally, we'll free the array of errors that we allocated.
+    xfree(errors);
+}
+
+#undef PM_ERROR_TRUNCATE
+#undef PM_COLOR_GRAY
+#undef PM_COLOR_RED
+#undef PM_COLOR_RESET
+
 /**
  * Check if the given source slice is valid UTF-8. The location represents the
  * location of the error, but the slice of the source will include the content
@@ -9230,7 +9592,7 @@ pm_parse_process_error(const pm_parse_result_t *result)
                 pm_list_node_t *list_node = (pm_list_node_t *) error;
                 pm_list_t error_list = { .size = 1, .head = list_node, .tail = list_node };
 
-                pm_parser_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false);
+                pm_parse_errors_format(parser, &error_list, &buffer, rb_stderr_tty_p(), false);
             }
 
             VALUE value = rb_exc_new(rb_eArgError, pm_buffer_value(&buffer), pm_buffer_length(&buffer));
author	Kevin Newton <kddnewton@gmail.com>	2024-05-24 12:12:56 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2024-05-24 12:58:44 -0400
commit	ba336027beea3b76798a053cb3e7fd06ae1e4741 (patch)
tree	b14b7f8c1b6af845b3926fb6692587e861cffc73
parent	47f0965269dae95c678b4276861cc389d277ce33 (diff)