[ruby/prism] Even more C file documentation

https://github.com/ruby/prism/commit/9c648ce615
author: Kevin Newton <kddnewton@gmail.com> 2023-10-31 12:54:54 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2023-11-01 13:10:29 -0400
commit: 17923cc876513707b4bedcd4437b229feb455099 (patch)
tree: a4b5ef4421268c1d52937cc86e4135d9b1157486
parent: 6b3b530cc1266aeaecb68a01e8511a794ea456ea (diff)
7 files changed, 617 insertions, 265 deletions
diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c
index d021894c1e..e471d03b6b 100644
--- a/prism/enc/pm_unicode.c
+++ b/prism/enc/pm_unicode.c
@@ -2183,7 +2183,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
  * codepoint is in the list.
  */
 static bool
-pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, size_t size, const pm_unicode_codepoint_t codepoints[size]) {
+pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
     size_t start = 0;
     size_t end = size;
 
@@ -2300,7 +2300,7 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
     if (codepoint <= 0xFF) {
         return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
     } else {
-        return pm_unicode_codepoint_match(codepoint, UNICODE_ALPHA_CODEPOINTS_LENGTH, unicode_alpha_codepoints) ? width : 0;
+        return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
     }
 }
 
@@ -2320,7 +2320,7 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
     if (codepoint <= 0xFF) {
         return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
     } else {
-        return pm_unicode_codepoint_match(codepoint, UNICODE_ALNUM_CODEPOINTS_LENGTH, unicode_alnum_codepoints) ? width : 0;
+        return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
     }
 }
 
@@ -2340,7 +2340,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
     if (codepoint <= 0xFF) {
         return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
     } else {
-        return pm_unicode_codepoint_match(codepoint, UNICODE_ISUPPER_CODEPOINTS_LENGTH, unicode_isupper_codepoints) ? true : false;
+        return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
     }
 }
 
diff --git a/prism/parser.h b/prism/parser.h
index 01b047ccdf..92a8ce589d 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -12,9 +12,11 @@
 
 #include <stdbool.h>
 
-// This enum provides various bits that represent different kinds of states that
-// the lexer can track. This is used to determine which kind of token to return
-// based on the context of the parser.
+/**
+ * This enum provides various bits that represent different kinds of states that
+ * the lexer can track. This is used to determine which kind of token to return
+ * based on the context of the parser.
+ */
 typedef enum {
     PM_LEX_STATE_BIT_BEG,
     PM_LEX_STATE_BIT_END,
@@ -31,8 +33,10 @@ typedef enum {
     PM_LEX_STATE_BIT_FITEM
 } pm_lex_state_bit_t;
 
-// This enum combines the various bits from the above enum into individual
-// values that represent the various states of the lexer.
+/**
+ * This enum combines the various bits from the above enum into individual
+ * values that represent the various states of the lexer.
+ */
 typedef enum {
     PM_LEX_STATE_NONE = 0,
     PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
@@ -53,6 +57,9 @@ typedef enum {
     PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
 } pm_lex_state_t;
 
+/**
+ * The type of quote that a heredoc uses.
+ */
 typedef enum {
     PM_HEREDOC_QUOTE_NONE,
     PM_HEREDOC_QUOTE_SINGLE = '\'',
@@ -60,183 +67,287 @@ typedef enum {
     PM_HEREDOC_QUOTE_BACKTICK = '`',
 } pm_heredoc_quote_t;
 
+/**
+ * The type of indentation that a heredoc uses.
+ */
 typedef enum {
     PM_HEREDOC_INDENT_NONE,
     PM_HEREDOC_INDENT_DASH,
     PM_HEREDOC_INDENT_TILDE,
 } pm_heredoc_indent_t;
 
-// When lexing Ruby source, the lexer has a small amount of state to tell which
-// kind of token it is currently lexing. For example, when we find the start of
-// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
-// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
-// are found as part of a string.
+/**
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
 typedef struct pm_lex_mode {
     enum {
-        // This state is used when any given token is being lexed.
+        /** This state is used when any given token is being lexed. */
         PM_LEX_DEFAULT,
 
-        // This state is used when we're lexing as normal but inside an embedded
-        // expression of a string.
+        /**
+         * This state is used when we're lexing as normal but inside an embedded
+         * expression of a string.
+         */
         PM_LEX_EMBEXPR,
 
-        // This state is used when we're lexing a variable that is embedded
-        // directly inside of a string with the # shorthand.
+        /**
+         * This state is used when we're lexing a variable that is embedded
+         * directly inside of a string with the # shorthand.
+         */
         PM_LEX_EMBVAR,
 
-        // This state is used when you are inside the content of a heredoc.
+        /** This state is used when you are inside the content of a heredoc. */
         PM_LEX_HEREDOC,
 
-        // This state is used when we are lexing a list of tokens, as in a %w
-        // word list literal or a %i symbol list literal.
+        /**
+         * This state is used when we are lexing a list of tokens, as in a %w
+         * word list literal or a %i symbol list literal.
+         */
         PM_LEX_LIST,
 
-        // This state is used when a regular expression has been begun and we
-        // are looking for the terminator.
+        /**
+         * This state is used when a regular expression has been begun and we
+         * are looking for the terminator.
+         */
         PM_LEX_REGEXP,
 
-        // This state is used when we are lexing a string or a string-like
-        // token, as in string content with either quote or an xstring.
+        /**
+         * This state is used when we are lexing a string or a string-like
+         * token, as in string content with either quote or an xstring.
+         */
         PM_LEX_STRING
     } mode;
 
     union {
         struct {
-            // This keeps track of the nesting level of the list.
+            /** This keeps track of the nesting level of the list. */
             size_t nesting;
 
-            // Whether or not interpolation is allowed in this list.
+            /** Whether or not interpolation is allowed in this list. */
             bool interpolation;
 
-            // When lexing a list, it takes into account balancing the
-            // terminator if the terminator is one of (), [], {}, or <>.
+            /**
+             * When lexing a list, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
             uint8_t incrementor;
 
-            // This is the terminator of the list literal.
+            /** This is the terminator of the list literal. */
             uint8_t terminator;
 
-            // This is the character set that should be used to delimit the
-            // tokens within the list.
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the list.
+             */
             uint8_t breakpoints[11];
         } list;
 
         struct {
-            // This keeps track of the nesting level of the regular expression.
+            /**
+             * This keeps track of the nesting level of the regular expression.
+             */
             size_t nesting;
 
-            // When lexing a regular expression, it takes into account balancing
-            // the terminator if the terminator is one of (), [], {}, or <>.
+            /**
+             * When lexing a regular expression, it takes into account balancing
+             * the terminator if the terminator is one of (), [], {}, or <>.
+             */
             uint8_t incrementor;
 
-            // This is the terminator of the regular expression.
+            /** This is the terminator of the regular expression. */
             uint8_t terminator;
 
-            // This is the character set that should be used to delimit the
-            // tokens within the regular expression.
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the regular expression.
+             */
             uint8_t breakpoints[6];
         } regexp;
 
         struct {
-            // This keeps track of the nesting level of the string.
+            /** This keeps track of the nesting level of the string. */
             size_t nesting;
 
-            // Whether or not interpolation is allowed in this string.
+            /** Whether or not interpolation is allowed in this string. */
             bool interpolation;
 
-            // Whether or not at the end of the string we should allow a :,
-            // which would indicate this was a dynamic symbol instead of a
-            // string.
+            /**
+             * Whether or not at the end of the string we should allow a :,
+             * which would indicate this was a dynamic symbol instead of a
+             * string.
+             */
             bool label_allowed;
 
-            // When lexing a string, it takes into account balancing the
-            // terminator if the terminator is one of (), [], {}, or <>.
+            /**
+             * When lexing a string, it takes into account balancing the
+             * terminator if the terminator is one of (), [], {}, or <>.
+             */
             uint8_t incrementor;
 
-            // This is the terminator of the string. It is typically either a
-            // single or double quote.
+            /**
+             * This is the terminator of the string. It is typically either a
+             * single or double quote.
+             */
             uint8_t terminator;
 
-            // This is the character set that should be used to delimit the
-            // tokens within the string.
+            /**
+             * This is the character set that should be used to delimit the
+             * tokens within the string.
+             */
             uint8_t breakpoints[6];
         } string;
 
         struct {
-            // These pointers point to the beginning and end of the heredoc
-            // identifier.
+            /** A pointer to the start of the heredoc identifier. */
             const uint8_t *ident_start;
+
+            /** The length of the heredoc identifier. */
             size_t ident_length;
 
+            /** The type of quote that the heredoc uses. */
             pm_heredoc_quote_t quote;
+
+            /** The type of indentation that the heredoc uses. */
             pm_heredoc_indent_t indent;
 
-            // This is the pointer to the character where lexing should resume
-            // once the heredoc has been completely processed.
+            /**
+             * This is the pointer to the character where lexing should resume
+             * once the heredoc has been completely processed.
+             */
             const uint8_t *next_start;
 
-            // This is used to track the amount of common whitespace on each
-            // line so that we know how much to dedent each line in the case of
-            // a tilde heredoc.
+            /**
+             * This is used to track the amount of common whitespace on each
+             * line so that we know how much to dedent each line in the case of
+             * a tilde heredoc.
+             */
             size_t common_whitespace;
         } heredoc;
     } as;
 
-    // The previous lex state so that it knows how to pop.
+    /** The previous lex state so that it knows how to pop. */
     struct pm_lex_mode *prev;
 } pm_lex_mode_t;
 
-// We pre-allocate a certain number of lex states in order to avoid having to
-// call malloc too many times while parsing. You really shouldn't need more than
-// this because you only really nest deeply when doing string interpolation.
+/**
+ * We pre-allocate a certain number of lex states in order to avoid having to
+ * call malloc too many times while parsing. You really shouldn't need more than
+ * this because you only really nest deeply when doing string interpolation.
+ */
 #define PM_LEX_STACK_SIZE 4
 
 // A forward declaration since our error handler struct accepts a parser for
 // each of its function calls.
 typedef struct pm_parser pm_parser_t;
 
-// While parsing, we keep track of a stack of contexts. This is helpful for
-// error recovery so that we can pop back to a previous context when we hit a
-// token that is understood by a parent context but not by the current context.
+/**
+ * While parsing, we keep track of a stack of contexts. This is helpful for
+ * error recovery so that we can pop back to a previous context when we hit a
+ * token that is understood by a parent context but not by the current context.
+ */
 typedef enum {
-    PM_CONTEXT_BEGIN,          // a begin statement
-    PM_CONTEXT_BLOCK_BRACES,   // expressions in block arguments using braces
-    PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
-    PM_CONTEXT_CASE_WHEN,      // a case when statements
-    PM_CONTEXT_CASE_IN,        // a case in statements
-    PM_CONTEXT_CLASS,          // a class declaration
-    PM_CONTEXT_DEF,            // a method definition
-    PM_CONTEXT_DEF_PARAMS,     // a method definition's parameters
-    PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
-    PM_CONTEXT_ELSE,           // an else clause
-    PM_CONTEXT_ELSIF,          // an elsif clause
-    PM_CONTEXT_EMBEXPR,        // an interpolated expression
-    PM_CONTEXT_ENSURE,         // an ensure statement
-    PM_CONTEXT_FOR,            // a for loop
-    PM_CONTEXT_FOR_INDEX,      // a for loop's index
-    PM_CONTEXT_IF,             // an if statement
-    PM_CONTEXT_LAMBDA_BRACES,  // a lambda expression with braces
-    PM_CONTEXT_LAMBDA_DO_END,  // a lambda expression with do..end
-    PM_CONTEXT_MAIN,           // the top level context
-    PM_CONTEXT_MODULE,         // a module declaration
-    PM_CONTEXT_PARENS,         // a parenthesized expression
-    PM_CONTEXT_POSTEXE,        // an END block
-    PM_CONTEXT_PREDICATE,      // a predicate inside an if/elsif/unless statement
-    PM_CONTEXT_PREEXE,         // a BEGIN block
-    PM_CONTEXT_RESCUE_ELSE,    // a rescue else statement
-    PM_CONTEXT_RESCUE,         // a rescue statement
-    PM_CONTEXT_SCLASS,         // a singleton class definition
-    PM_CONTEXT_UNLESS,         // an unless statement
-    PM_CONTEXT_UNTIL,          // an until statement
-    PM_CONTEXT_WHILE,          // a while statement
+    /** a begin statement */
+    PM_CONTEXT_BEGIN,
+
+    /** expressions in block arguments using braces */
+    PM_CONTEXT_BLOCK_BRACES,
+
+    /** expressions in block arguments using do..end */
+    PM_CONTEXT_BLOCK_KEYWORDS,
+
+    /** a case when statements */
+    PM_CONTEXT_CASE_WHEN,
+
+    /** a case in statements */
+    PM_CONTEXT_CASE_IN,
+
+    /** a class declaration */
+    PM_CONTEXT_CLASS,
+
+    /** a method definition */
+    PM_CONTEXT_DEF,
+
+    /** a method definition's parameters */
+    PM_CONTEXT_DEF_PARAMS,
+
+    /** a method definition's default parameter */
+    PM_CONTEXT_DEFAULT_PARAMS,
+
+    /** an else clause */
+    PM_CONTEXT_ELSE,
+
+    /** an elsif clause */
+    PM_CONTEXT_ELSIF,
+
+    /** an interpolated expression */
+    PM_CONTEXT_EMBEXPR,
+
+    /** an ensure statement */
+    PM_CONTEXT_ENSURE,
+
+    /** a for loop */
+    PM_CONTEXT_FOR,
+
+    /** a for loop's index */
+    PM_CONTEXT_FOR_INDEX,
+
+    /** an if statement */
+    PM_CONTEXT_IF,
+
+    /** a lambda expression with braces */
+    PM_CONTEXT_LAMBDA_BRACES,
+
+    /** a lambda expression with do..end */
+    PM_CONTEXT_LAMBDA_DO_END,
+
+    /** the top level context */
+    PM_CONTEXT_MAIN,
+
+    /** a module declaration */
+    PM_CONTEXT_MODULE,
+
+    /** a parenthesized expression */
+    PM_CONTEXT_PARENS,
+
+    /** an END block */
+    PM_CONTEXT_POSTEXE,
+
+    /** a predicate inside an if/elsif/unless statement */
+    PM_CONTEXT_PREDICATE,
+
+    /** a BEGIN block */
+    PM_CONTEXT_PREEXE,
+
+    /** a rescue else statement */
+    PM_CONTEXT_RESCUE_ELSE,
+
+    /** a rescue statement */
+    PM_CONTEXT_RESCUE,
+
+    /** a singleton class definition */
+    PM_CONTEXT_SCLASS,
+
+    /** an unless statement */
+    PM_CONTEXT_UNLESS,
+
+    /** an until statement */
+    PM_CONTEXT_UNTIL,
+
+    /** a while statement */
+    PM_CONTEXT_WHILE,
 } pm_context_t;
 
-// This is a node in a linked list of contexts.
+/** This is a node in a linked list of contexts. */
 typedef struct pm_context_node {
     pm_context_t context;
     struct pm_context_node *prev;
 } pm_context_node_t;
 
-// This is the type of a comment that we've found while parsing.
+/** This is the type of a comment that we've found while parsing. */
 typedef enum {
     PM_COMMENT_INLINE,
     PM_COMMENT_EMBDOC,
@@ -269,185 +380,270 @@ typedef struct {
     uint32_t value_length;
 } pm_magic_comment_t;
 
-// When the encoding that is being used to parse the source is changed by prism,
-// we provide the ability here to call out to a user-defined function.
+/**
+ * When the encoding that is being used to parse the source is changed by prism,
+ * we provide the ability here to call out to a user-defined function.
+ */
 typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
 
-// When an encoding is encountered that isn't understood by prism, we provide
-// the ability here to call out to a user-defined function to get an encoding
-// struct. If the function returns something that isn't NULL, we set that to
-// our encoding and use it to parse identifiers.
+/**
+ * When an encoding is encountered that isn't understood by prism, we provide
+ * the ability here to call out to a user-defined function to get an encoding
+ * struct. If the function returns something that isn't NULL, we set that to
+ * our encoding and use it to parse identifiers.
+ */
 typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
 
-// When you are lexing through a file, the lexer needs all of the information
-// that the parser additionally provides (for example, the local table). So if
-// you want to properly lex Ruby, you need to actually lex it in the context of
-// the parser. In order to provide this functionality, we optionally allow a
-// struct to be attached to the parser that calls back out to a user-provided
-// callback when each token is lexed.
+/**
+ * When you are lexing through a file, the lexer needs all of the information
+ * that the parser additionally provides (for example, the local table). So if
+ * you want to properly lex Ruby, you need to actually lex it in the context of
+ * the parser. In order to provide this functionality, we optionally allow a
+ * struct to be attached to the parser that calls back out to a user-provided
+ * callback when each token is lexed.
+ */
 typedef struct {
-    // This opaque pointer is used to provide whatever information the user
-    // deemed necessary to the callback. In our case we use it to pass the array
-    // that the tokens get appended into.
+    /**
+     * This opaque pointer is used to provide whatever information the user
+     * deemed necessary to the callback. In our case we use it to pass the array
+     * that the tokens get appended into.
+     */
     void *data;
 
-    // This is the callback that is called when a token is lexed. It is passed
-    // the opaque data pointer, the parser, and the token that was lexed.
+    /**
+     * This is the callback that is called when a token is lexed. It is passed
+     * the opaque data pointer, the parser, and the token that was lexed.
+     */
     void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
 } pm_lex_callback_t;
 
-// This struct represents a node in a linked list of scopes. Some scopes can see
-// into their parent scopes, while others cannot.
+/**
+ * This struct represents a node in a linked list of scopes. Some scopes can see
+ * into their parent scopes, while others cannot.
+ */
 typedef struct pm_scope {
-    // The IDs of the locals in the given scope.
+    /** The IDs of the locals in the given scope. */
     pm_constant_id_list_t locals;
 
-    // A pointer to the previous scope in the linked list.
+    /** A pointer to the previous scope in the linked list. */
     struct pm_scope *previous;
 
-    // A boolean indicating whether or not this scope can see into its parent.
-    // If closed is true, then the scope cannot see into its parent.
+    /**
+     * A boolean indicating whether or not this scope can see into its parent.
+     * If closed is true, then the scope cannot see into its parent.
+     */
     bool closed;
 
-    // A boolean indicating whether or not this scope has explicit parameters.
-    // This is necessary to determine whether or not numbered parameters are
-    // allowed.
+    /**
+     * A boolean indicating whether or not this scope has explicit parameters.
+     * This is necessary to determine whether or not numbered parameters are
+     * allowed.
+     */
     bool explicit_params;
 
-    // A boolean indicating whether or not this scope has numbered parameters.
-    // This is necessary to determine if child blocks are allowed to use
-    // numbered parameters.
+    /**
+     * A boolean indicating whether or not this scope has numbered parameters.
+     * This is necessary to determine if child blocks are allowed to use
+     * numbered parameters.
+     */
     bool numbered_params;
 
-    // A transparent scope is a scope that cannot have locals set on itself.
-    // When a local is set on this scope, it will instead be set on the parent
-    // scope's local table.
+    /**
+     * A transparent scope is a scope that cannot have locals set on itself.
+     * When a local is set on this scope, it will instead be set on the parent
+     * scope's local table.
+     */
     bool transparent;
 } pm_scope_t;
 
-// This struct represents the overall parser. It contains a reference to the
-// source file, as well as pointers that indicate where in the source it's
-// currently parsing. It also contains the most recent and current token that
-// it's considering.
+/**
+ * This struct represents the overall parser. It contains a reference to the
+ * source file, as well as pointers that indicate where in the source it's
+ * currently parsing. It also contains the most recent and current token that
+ * it's considering.
+ */
 struct pm_parser {
-    pm_lex_state_t lex_state; // the current state of the lexer
-    int enclosure_nesting;    // tracks the current nesting of (), [], and {}
+    /** The current state of the lexer. */
+    pm_lex_state_t lex_state;
 
-    // Used to temporarily track the nesting of enclosures to determine if a {
-    // is the beginning of a lambda following the parameters of a lambda.
+    /** Tracks the current nesting of (), [], and {}. */
+    int enclosure_nesting;
+
+    /**
+     * Used to temporarily track the nesting of enclosures to determine if a {
+     * is the beginning of a lambda following the parameters of a lambda.
+     */
     int lambda_enclosure_nesting;
 
-    // Used to track the nesting of braces to ensure we get the correct value
-    // when we are interpolating blocks with braces.
+    /**
+     * Used to track the nesting of braces to ensure we get the correct value
+     * when we are interpolating blocks with braces.
+     */
     int brace_nesting;
 
-    // the stack used to determine if a do keyword belongs to the predicate of a
-    // while, until, or for loop
+    /**
+     * The stack used to determine if a do keyword belongs to the predicate of a
+     * while, until, or for loop.
+     */
     pm_state_stack_t do_loop_stack;
 
-    // the stack used to determine if a do keyword belongs to the beginning of a
-    // block
+    /**
+     * The stack used to determine if a do keyword belongs to the beginning of a
+     * block.
+     */
     pm_state_stack_t accepts_block_stack;
 
     struct {
-        pm_lex_mode_t *current;                 // the current mode of the lexer
-        pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
-        size_t index;                           // the current index into the lexer mode stack
+        /** The current mode of the lexer. */
+        pm_lex_mode_t *current;
+
+        /** The stack of lexer modes. */
+        pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
+
+        /** The current index into the lexer mode stack. */
+        size_t index;
     } lex_modes;
 
-    const uint8_t *start;   // the pointer to the start of the source
-    const uint8_t *end;     // the pointer to the end of the source
-    pm_token_t previous; // the previous token we were considering
-    pm_token_t current;  // the current token we're considering
+    /** The pointer to the start of the source. */
+    const uint8_t *start;
+
+    /** The pointer to the end of the source. */
+    const uint8_t *end;
+
+    /** The previous token we were considering. */
+    pm_token_t previous;
+
+    /** The current token we're considering. */
+    pm_token_t current;
 
-    // This is a special field set on the parser when we need the parser to jump
-    // to a specific location when lexing the next token, as opposed to just
-    // using the end of the previous token. Normally this is NULL.
+    /**
+     * This is a special field set on the parser when we need the parser to jump
+     * to a specific location when lexing the next token, as opposed to just
+     * using the end of the previous token. Normally this is NULL.
+     */
     const uint8_t *next_start;
 
-    // This field indicates the end of a heredoc whose identifier was found on
-    // the current line. If another heredoc is found on the same line, then this
-    // will be moved forward to the end of that heredoc. If no heredocs are
-    // found on a line then this is NULL.
+    /**
+     * This field indicates the end of a heredoc whose identifier was found on
+     * the current line. If another heredoc is found on the same line, then this
+     * will be moved forward to the end of that heredoc. If no heredocs are
+     * found on a line then this is NULL.
+     */
     const uint8_t *heredoc_end;
 
-    pm_list_t comment_list;             // the list of comments that have been found while parsing
-    pm_list_t magic_comment_list;       // the list of magic comments that have been found while parsing.
-    pm_list_t warning_list;             // the list of warnings that have been found while parsing
-    pm_list_t error_list;               // the list of errors that have been found while parsing
-    pm_scope_t *current_scope;          // the current local scope
+    /** The list of comments that have been found while parsing. */
+    pm_list_t comment_list;
+
+    /** The list of magic comments that have been found while parsing. */
+    pm_list_t magic_comment_list;
+
+    /** The list of warnings that have been found while parsing. */
+    pm_list_t warning_list;
+
+    /** The list of errors that have been found while parsing. */
+    pm_list_t error_list;
+
+    /** The current local scope. */
+    pm_scope_t *current_scope;
 
-    pm_context_node_t *current_context; // the current parsing context
+    /** The current parsing context. */
+    pm_context_node_t *current_context;
 
-    // The encoding functions for the current file is attached to the parser as
-    // it's parsing so that it can change with a magic comment.
+    /**
+     * The encoding functions for the current file is attached to the parser as
+     * it's parsing so that it can change with a magic comment.
+     */
     pm_encoding_t encoding;
 
-    // When the encoding that is being used to parse the source is changed by
-    // prism, we provide the ability here to call out to a user-defined
-    // function.
+    /**
+     * When the encoding that is being used to parse the source is changed by
+     * prism, we provide the ability here to call out to a user-defined
+     * function.
+     */
     pm_encoding_changed_callback_t encoding_changed_callback;
 
-    // When an encoding is encountered that isn't understood by prism, we
-    // provide the ability here to call out to a user-defined function to get an
-    // encoding struct. If the function returns something that isn't NULL, we
-    // set that to our encoding and use it to parse identifiers.
+    /**
+     * When an encoding is encountered that isn't understood by prism, we
+     * provide the ability here to call out to a user-defined function to get an
+     * encoding struct. If the function returns something that isn't NULL, we
+     * set that to our encoding and use it to parse identifiers.
+     */
     pm_encoding_decode_callback_t encoding_decode_callback;
 
-    // This pointer indicates where a comment must start if it is to be
-    // considered an encoding comment.
+    /**
+     * This pointer indicates where a comment must start if it is to be
+     * considered an encoding comment.
+     */
     const uint8_t *encoding_comment_start;
 
-    // This is an optional callback that can be attached to the parser that will
-    // be called whenever a new token is lexed by the parser.
+    /**
+     * This is an optional callback that can be attached to the parser that will
+     * be called whenever a new token is lexed by the parser.
+     */
     pm_lex_callback_t *lex_callback;
 
-    // This is the path of the file being parsed
-    // We use the filepath when constructing SourceFileNodes
+    /**
+     * This is the path of the file being parsed. We use the filepath when
+     * constructing SourceFileNodes.
+     */
     pm_string_t filepath_string;
 
-    // This constant pool keeps all of the constants defined throughout the file
-    // so that we can reference them later.
+    /**
+     * This constant pool keeps all of the constants defined throughout the file
+     * so that we can reference them later.
+     */
     pm_constant_pool_t constant_pool;
 
-    // This is the list of newline offsets in the source file.
+    /** This is the list of newline offsets in the source file. */
     pm_newline_list_t newline_list;
 
-    // We want to add a flag to integer nodes that indicates their base. We only
-    // want to parse these once, but we don't have space on the token itself to
-    // communicate this information. So we store it here and pass it through
-    // when we find tokens that we need it for.
+    /**
+     * We want to add a flag to integer nodes that indicates their base. We only
+     * want to parse these once, but we don't have space on the token itself to
+     * communicate this information. So we store it here and pass it through
+     * when we find tokens that we need it for.
+     */
     pm_node_flags_t integer_base;
 
-    // This string is used to pass information from the lexer to the parser. It
-    // is particularly necessary because of escape sequences.
+    /**
+     * This string is used to pass information from the lexer to the parser. It
+     * is particularly necessary because of escape sequences.
+     */
     pm_string_t current_string;
 
-    // Whether or not we're at the beginning of a command
+    /** Whether or not we're at the beginning of a command. */
     bool command_start;
 
-    // Whether or not we're currently recovering from a syntax error
+    /** Whether or not we're currently recovering from a syntax error. */
     bool recovering;
 
-    // Whether or not the encoding has been changed by a magic comment. We use
-    // this to provide a fast path for the lexer instead of going through the
-    // function pointer.
+    /**
+     * Whether or not the encoding has been changed by a magic comment. We use
+     * this to provide a fast path for the lexer instead of going through the
+     * function pointer.
+     */
     bool encoding_changed;
 
-    // This flag indicates that we are currently parsing a pattern matching
-    // expression and impacts that calculation of newlines.
+    /**
+     * This flag indicates that we are currently parsing a pattern matching
+     * expression and impacts that calculation of newlines.
+     */
     bool pattern_matching_newlines;
 
-    // This flag indicates that we are currently parsing a keyword argument.
+    /** This flag indicates that we are currently parsing a keyword argument. */
     bool in_keyword_arg;
 
-    // Whether or not the parser has seen a token that has semantic meaning
-    // (i.e., a token that is not a comment or whitespace).
+    /**
+     * Whether or not the parser has seen a token that has semantic meaning
+     * (i.e., a token that is not a comment or whitespace).
+     */
     bool semantic_token_seen;
 
-    // Whether or not we have found a frozen_string_literal magic comment with
-    // a true value.
+    /**
+     * Whether or not we have found a frozen_string_literal magic comment with
+     * a true value.
+     */
     bool frozen_string_literal;
 };
 
-#endif // PRISM_PARSER_H
+#endif
diff --git a/prism/prism.c b/prism/prism.c
index 0b7494c5eb..05dad03a43 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -15599,29 +15599,31 @@ pm_metadata_read_u32(const char *ptr) {
     }
 }
 
-// Process any additional metadata being passed into a call to the parser via
-// the pm_parse_serialize function. Since the source of these calls will be from
-// Ruby implementation internals we assume it is from a trusted source.
-//
-// Currently, this is only passing in variable scoping surrounding an eval, but
-// eventually it will be extended to hold any additional metadata.  This data
-// is serialized to reduce the calling complexity for a foreign function call
-// vs a foreign runtime making a bindable in-memory version of a C structure.
-//
-// metadata is assumed to be a valid pointer pointing to well-formed data. The
-// format is described below:
-//
-// ```text
-// [
-//   filepath_size: uint32_t,
-//   filepath: char*,
-//   scopes_count: uint32_t,
-//   [
-//     locals_count: uint32_t,
-//     [local_size: uint32_t, local: char*]*
-//   ]*
-// ]
-// ```
+/**
+ * Process any additional metadata being passed into a call to the parser via
+ * the pm_parse_serialize function. Since the source of these calls will be from
+ * Ruby implementation internals we assume it is from a trusted source.
+ *
+ * Currently, this is only passing in variable scoping surrounding an eval, but
+ * eventually it will be extended to hold any additional metadata.  This data
+ * is serialized to reduce the calling complexity for a foreign function call
+ * vs a foreign runtime making a bindable in-memory version of a C structure.
+ *
+ * metadata is assumed to be a valid pointer pointing to well-formed data. The
+ * format is described below:
+ *
+ * ```text
+ * [
+ *   filepath_size: uint32_t,
+ *   filepath: char*,
+ *   scopes_count: uint32_t,
+ *   [
+ *     locals_count: uint32_t,
+ *     [local_size: uint32_t, local: char*]*
+ *   ]*
+ * ]
+ * ```
+ */
 void
 pm_parser_metadata(pm_parser_t *parser, const char *metadata) {
     uint32_t filepath_size = pm_metadata_read_u32(metadata);
diff --git a/prism/prism.h b/prism/prism.h
index 46bfae0fe0..c68e9cbdf7 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -29,54 +29,156 @@
 #include <strings.h>
 #endif
 
-void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);
-
-void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
-
-void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
-
-// The prism version and the serialization format.
+/**
+ * The prism version and the serialization format.
+ *
+ * @returns The prism version as a constant string.
+ */
 PRISM_EXPORTED_FUNCTION const char * pm_version(void);
 
-// Initialize a parser with the given start and end pointers.
+/**
+ * Initialize a parser with the given start and end pointers.
+ *
+ * @param parser The parser to initialize.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param filepath The optional filepath to pass to the parser.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
 
-// Register a callback that will be called whenever prism changes the encoding it
-// is using to parse based on the magic comment.
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
 
-// Register a callback that will be called when prism encounters a magic comment
-// with an encoding referenced that it doesn't understand. The callback should
-// return NULL if it also doesn't understand the encoding or it should return a
-// pointer to a pm_encoding_t struct that contains the functions necessary to
-// parse identifiers.
+/**
+ * Register a callback that will be called when prism encounters a magic comment
+ * with an encoding referenced that it doesn't understand. The callback should
+ * return NULL if it also doesn't understand the encoding or it should return a
+ * pointer to a pm_encoding_t struct that contains the functions necessary to
+ * parse identifiers.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
 
-// Free any memory associated with the given parser.
+/**
+ * Free any memory associated with the given parser.
+ *
+ * @param parser The parser to free.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
 
-// Parse the Ruby source associated with the given parser and return the tree.
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ *
+ * @param parser The parser to use.
+ * @return The AST representing the Ruby source.
+ */
 PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
 
-// Serialize the AST represented by the given node to the given buffer.
+/**
+ * Serialize the given list of comments to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param list The list of comments to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
+
+/**
+ * Serialize the name of the encoding to the buffer.
+ *
+ * @param encoding The encoding to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);
+
+/**
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
 PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
 
-// Parse the given source to the AST and serialize the AST to the given buffer.
+/**
+ * Process any additional metadata being passed into a call to the parser via
+ * the pm_parse_serialize function. Since the source of these calls will be from
+ * Ruby implementation internals we assume it is from a trusted source.
+ *
+ * Currently, this is only passing in variable scoping surrounding an eval, but
+ * eventually it will be extended to hold any additional metadata.  This data
+ * is serialized to reduce the calling complexity for a foreign function call
+ * vs a foreign runtime making a bindable in-memory version of a C structure.
+ *
+ * @param parser The parser to process the metadata for.
+ * @param metadata The metadata to process.
+ */
+void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
+
+/**
+ * Parse the given source to the AST and serialize the AST to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
 
-// Parse and serialize the comments in the given source to the given buffer.
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
 
-// Lex the given source and serialize to the given buffer.
+/**
+ * Lex the given source and serialize to the given buffer.
+ *
+ * @param source The source to lex.
+ * @param size The size of the source.
+ * @param filepath The optional filepath to pass to the lexer.
+ * @param buffer The buffer to serialize to.
+ */
 PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer);
 
-// Parse and serialize both the AST and the tokens represented by the given
-// source to the given buffer.
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
 PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
 
-// Returns a string representation of the given token type.
+/**
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The token type to convert to a string.
+ * @return A string representation of the given token type.
+ */
 PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
 
 #endif
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 75b10c9807..48ad64d699 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -9,41 +9,70 @@
 #include <stddef.h>
 #include <stdint.h>
 
-// This enum represents every type of token in the Ruby source.
+/**
+ * This enum represents every type of token in the Ruby source.
+ */
 typedef enum pm_token_type {
 <%- tokens.each do |token| -%>
     <%= token.declaration %>
 <%- end -%>
-    PM_TOKEN_MAXIMUM, // the maximum token value
+
+    /** The maximum token value. */
+    PM_TOKEN_MAXIMUM,
 } pm_token_type_t;
 
-// This struct represents a token in the Ruby source. We use it to track both
-// type and location information.
+/**
+ * This struct represents a token in the Ruby source. We use it to track both
+ * type and location information.
+ */
 typedef struct {
+    /** The type of the token. */
     pm_token_type_t type;
+
+    /** A pointer to the start location of the token in the source. */
     const uint8_t *start;
+
+    /** A pointer to the end location of the token in the source. */
     const uint8_t *end;
 } pm_token_t;
 
-// This represents a range of bytes in the source string to which a node or
-// token corresponds.
+/**
+ * This represents a range of bytes in the source string to which a node or
+ * token corresponds.
+ */
 typedef struct {
+    /** A pointer to the start location of the range in the source. */
     const uint8_t *start;
+
+    /** A pointer to the end location of the range in the source. */
     const uint8_t *end;
 } pm_location_t;
 
 struct pm_node;
 
+/**
+ * A list of nodes in the source, most often used for lists of children.
+ */
 typedef struct pm_node_list {
-    struct pm_node **nodes;
+    /** The number of nodes in the list. */
     size_t size;
+
+    /** The capacity of the list that has been allocated. */
     size_t capacity;
+
+    /** The nodes in the list. */
+    struct pm_node **nodes;
 } pm_node_list_t;
 
+/**
+ * This enum represents every type of node in the Ruby syntax tree.
+ */
 enum pm_node_type {
 <%- nodes.each_with_index do |node, index| -%>
     <%= node.type %> = <%= index + 1 %>,
 <%- end -%>
+
+    /** A special kind of node used for compilation. */
     PM_SCOPE_NODE
 };
 
@@ -66,15 +95,22 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
  * embedded into every node type.
  */
 typedef struct pm_node {
-    // This represents the type of the node. It somewhat maps to the nodes that
-    // existed in the original grammar and ripper, but it's not a 1:1 mapping.
+    /**
+     * This represents the type of the node. It somewhat maps to the nodes that
+     * existed in the original grammar and ripper, but it's not a 1:1 mapping.
+     */
     pm_node_type_t type;
 
-    // This represents any flags on the node
+    /**
+     * This represents any flags on the node. Some are common to all nodes, and
+     * some are specific to the type of node.
+     */
     pm_node_flags_t flags;
 
-    // This is the location of the node in the source. It's a range of bytes
-    // containing a start and an end.
+    /**
+     * This is the location of the node in the source. It's a range of bytes
+     * containing a start and an end.
+     */
     pm_location_t location;
 } pm_node_t;
 <%- nodes.each do |node| -%>
@@ -124,4 +160,4 @@ typedef enum pm_<%= flag.human %> {
 
 #define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
 
-#endif // PRISM_AST_H
+#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index d46284d3b2..2f75509492 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -47,7 +47,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
     }
 }
 
-void
+static void
 pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
 
@@ -136,6 +136,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
     pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
 }
 
+/**
+ * Serialize the given list of comments to the given buffer.
+ */
 void
 pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
     pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
@@ -189,6 +192,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
     }
 }
 
+/**
+ * Serialize the name of the encoding to the buffer.
+ */
 void
 pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
     size_t encoding_length = strlen(encoding->name);
@@ -197,6 +203,9 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
 }
 
 #line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+/**
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ */
 void
 pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
     pm_serialize_encoding(&parser->encoding, buffer);
@@ -274,6 +283,9 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
     pm_buffer_append_varint(buffer, parser->lex_state);
 }
 
+/**
+ * Lex the given source and serialize to the given buffer.
+ */
 PRISM_EXPORTED_FUNCTION void
 pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) {
     pm_parser_t parser;
@@ -300,8 +312,10 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
     pm_parser_free(&parser);
 }
 
-// Parse and serialize both the AST and the tokens represented by the given
-// source to the given buffer.
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ */
 PRISM_EXPORTED_FUNCTION void
 pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
     pm_parser_t parser;
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
index 98be081732..d3c1c3f1b8 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/token_type.c.erb
@@ -2,7 +2,9 @@
 
 #include "prism/ast.h"
 
-// Returns a string representation of the given token type.
+/**
+ * Returns a string representation of the given token type.
+ */
 PRISM_EXPORTED_FUNCTION const char *
 pm_token_type_to_str(pm_token_type_t token_type)
 {
author	Kevin Newton <kddnewton@gmail.com>	2023-10-31 12:54:54 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2023-11-01 13:10:29 -0400
commit	17923cc876513707b4bedcd4437b229feb455099 (patch)
tree	a4b5ef4421268c1d52937cc86e4135d9b1157486
parent	6b3b530cc1266aeaecb68a01e8511a794ea456ea (diff)