summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-31 12:54:54 -0400
committerKevin Newton <kddnewton@gmail.com>2023-11-01 13:10:29 -0400
commit17923cc876513707b4bedcd4437b229feb455099 (patch)
treea4b5ef4421268c1d52937cc86e4135d9b1157486
parent6b3b530cc1266aeaecb68a01e8511a794ea456ea (diff)
[ruby/prism] Even more C file documentation
https://github.com/ruby/prism/commit/9c648ce615
-rw-r--r--prism/enc/pm_unicode.c8
-rw-r--r--prism/parser.h586
-rw-r--r--prism/prism.c48
-rw-r--r--prism/prism.h154
-rw-r--r--prism/templates/include/prism/ast.h.erb62
-rw-r--r--prism/templates/src/serialize.c.erb20
-rw-r--r--prism/templates/src/token_type.c.erb4
7 files changed, 617 insertions, 265 deletions
diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c
index d021894c1e..e471d03b6b 100644
--- a/prism/enc/pm_unicode.c
+++ b/prism/enc/pm_unicode.c
@@ -2183,7 +2183,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
* codepoint is in the list.
*/
static bool
-pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, size_t size, const pm_unicode_codepoint_t codepoints[size]) {
+pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
size_t start = 0;
size_t end = size;
@@ -2300,7 +2300,7 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
if (codepoint <= 0xFF) {
return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
} else {
- return pm_unicode_codepoint_match(codepoint, UNICODE_ALPHA_CODEPOINTS_LENGTH, unicode_alpha_codepoints) ? width : 0;
+ return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
}
}
@@ -2320,7 +2320,7 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
if (codepoint <= 0xFF) {
return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
} else {
- return pm_unicode_codepoint_match(codepoint, UNICODE_ALNUM_CODEPOINTS_LENGTH, unicode_alnum_codepoints) ? width : 0;
+ return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
}
}
@@ -2340,7 +2340,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
if (codepoint <= 0xFF) {
return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
} else {
- return pm_unicode_codepoint_match(codepoint, UNICODE_ISUPPER_CODEPOINTS_LENGTH, unicode_isupper_codepoints) ? true : false;
+ return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
}
}
diff --git a/prism/parser.h b/prism/parser.h
index 01b047ccdf..92a8ce589d 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -12,9 +12,11 @@
#include <stdbool.h>
-// This enum provides various bits that represent different kinds of states that
-// the lexer can track. This is used to determine which kind of token to return
-// based on the context of the parser.
+/**
+ * This enum provides various bits that represent different kinds of states that
+ * the lexer can track. This is used to determine which kind of token to return
+ * based on the context of the parser.
+ */
typedef enum {
PM_LEX_STATE_BIT_BEG,
PM_LEX_STATE_BIT_END,
@@ -31,8 +33,10 @@ typedef enum {
PM_LEX_STATE_BIT_FITEM
} pm_lex_state_bit_t;
-// This enum combines the various bits from the above enum into individual
-// values that represent the various states of the lexer.
+/**
+ * This enum combines the various bits from the above enum into individual
+ * values that represent the various states of the lexer.
+ */
typedef enum {
PM_LEX_STATE_NONE = 0,
PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG),
@@ -53,6 +57,9 @@ typedef enum {
PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN
} pm_lex_state_t;
+/**
+ * The type of quote that a heredoc uses.
+ */
typedef enum {
PM_HEREDOC_QUOTE_NONE,
PM_HEREDOC_QUOTE_SINGLE = '\'',
@@ -60,183 +67,287 @@ typedef enum {
PM_HEREDOC_QUOTE_BACKTICK = '`',
} pm_heredoc_quote_t;
+/**
+ * The type of indentation that a heredoc uses.
+ */
typedef enum {
PM_HEREDOC_INDENT_NONE,
PM_HEREDOC_INDENT_DASH,
PM_HEREDOC_INDENT_TILDE,
} pm_heredoc_indent_t;
-// When lexing Ruby source, the lexer has a small amount of state to tell which
-// kind of token it is currently lexing. For example, when we find the start of
-// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
-// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
-// are found as part of a string.
+/**
+ * When lexing Ruby source, the lexer has a small amount of state to tell which
+ * kind of token it is currently lexing. For example, when we find the start of
+ * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After
+ * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that
+ * are found as part of a string.
+ */
typedef struct pm_lex_mode {
enum {
- // This state is used when any given token is being lexed.
+ /** This state is used when any given token is being lexed. */
PM_LEX_DEFAULT,
- // This state is used when we're lexing as normal but inside an embedded
- // expression of a string.
+ /**
+ * This state is used when we're lexing as normal but inside an embedded
+ * expression of a string.
+ */
PM_LEX_EMBEXPR,
- // This state is used when we're lexing a variable that is embedded
- // directly inside of a string with the # shorthand.
+ /**
+ * This state is used when we're lexing a variable that is embedded
+ * directly inside of a string with the # shorthand.
+ */
PM_LEX_EMBVAR,
- // This state is used when you are inside the content of a heredoc.
+ /** This state is used when you are inside the content of a heredoc. */
PM_LEX_HEREDOC,
- // This state is used when we are lexing a list of tokens, as in a %w
- // word list literal or a %i symbol list literal.
+ /**
+ * This state is used when we are lexing a list of tokens, as in a %w
+ * word list literal or a %i symbol list literal.
+ */
PM_LEX_LIST,
- // This state is used when a regular expression has been begun and we
- // are looking for the terminator.
+ /**
+ * This state is used when a regular expression has been begun and we
+ * are looking for the terminator.
+ */
PM_LEX_REGEXP,
- // This state is used when we are lexing a string or a string-like
- // token, as in string content with either quote or an xstring.
+ /**
+ * This state is used when we are lexing a string or a string-like
+ * token, as in string content with either quote or an xstring.
+ */
PM_LEX_STRING
} mode;
union {
struct {
- // This keeps track of the nesting level of the list.
+ /** This keeps track of the nesting level of the list. */
size_t nesting;
- // Whether or not interpolation is allowed in this list.
+ /** Whether or not interpolation is allowed in this list. */
bool interpolation;
- // When lexing a list, it takes into account balancing the
- // terminator if the terminator is one of (), [], {}, or <>.
+ /**
+ * When lexing a list, it takes into account balancing the
+ * terminator if the terminator is one of (), [], {}, or <>.
+ */
uint8_t incrementor;
- // This is the terminator of the list literal.
+ /** This is the terminator of the list literal. */
uint8_t terminator;
- // This is the character set that should be used to delimit the
- // tokens within the list.
+ /**
+ * This is the character set that should be used to delimit the
+ * tokens within the list.
+ */
uint8_t breakpoints[11];
} list;
struct {
- // This keeps track of the nesting level of the regular expression.
+ /**
+ * This keeps track of the nesting level of the regular expression.
+ */
size_t nesting;
- // When lexing a regular expression, it takes into account balancing
- // the terminator if the terminator is one of (), [], {}, or <>.
+ /**
+ * When lexing a regular expression, it takes into account balancing
+ * the terminator if the terminator is one of (), [], {}, or <>.
+ */
uint8_t incrementor;
- // This is the terminator of the regular expression.
+ /** This is the terminator of the regular expression. */
uint8_t terminator;
- // This is the character set that should be used to delimit the
- // tokens within the regular expression.
+ /**
+ * This is the character set that should be used to delimit the
+ * tokens within the regular expression.
+ */
uint8_t breakpoints[6];
} regexp;
struct {
- // This keeps track of the nesting level of the string.
+ /** This keeps track of the nesting level of the string. */
size_t nesting;
- // Whether or not interpolation is allowed in this string.
+ /** Whether or not interpolation is allowed in this string. */
bool interpolation;
- // Whether or not at the end of the string we should allow a :,
- // which would indicate this was a dynamic symbol instead of a
- // string.
+ /**
+ * Whether or not at the end of the string we should allow a :,
+ * which would indicate this was a dynamic symbol instead of a
+ * string.
+ */
bool label_allowed;
- // When lexing a string, it takes into account balancing the
- // terminator if the terminator is one of (), [], {}, or <>.
+ /**
+ * When lexing a string, it takes into account balancing the
+ * terminator if the terminator is one of (), [], {}, or <>.
+ */
uint8_t incrementor;
- // This is the terminator of the string. It is typically either a
- // single or double quote.
+ /**
+ * This is the terminator of the string. It is typically either a
+ * single or double quote.
+ */
uint8_t terminator;
- // This is the character set that should be used to delimit the
- // tokens within the string.
+ /**
+ * This is the character set that should be used to delimit the
+ * tokens within the string.
+ */
uint8_t breakpoints[6];
} string;
struct {
- // These pointers point to the beginning and end of the heredoc
- // identifier.
+ /** A pointer to the start of the heredoc identifier. */
const uint8_t *ident_start;
+
+ /** The length of the heredoc identifier. */
size_t ident_length;
+ /** The type of quote that the heredoc uses. */
pm_heredoc_quote_t quote;
+
+ /** The type of indentation that the heredoc uses. */
pm_heredoc_indent_t indent;
- // This is the pointer to the character where lexing should resume
- // once the heredoc has been completely processed.
+ /**
+ * This is the pointer to the character where lexing should resume
+ * once the heredoc has been completely processed.
+ */
const uint8_t *next_start;
- // This is used to track the amount of common whitespace on each
- // line so that we know how much to dedent each line in the case of
- // a tilde heredoc.
+ /**
+ * This is used to track the amount of common whitespace on each
+ * line so that we know how much to dedent each line in the case of
+ * a tilde heredoc.
+ */
size_t common_whitespace;
} heredoc;
} as;
- // The previous lex state so that it knows how to pop.
+ /** The previous lex state so that it knows how to pop. */
struct pm_lex_mode *prev;
} pm_lex_mode_t;
-// We pre-allocate a certain number of lex states in order to avoid having to
-// call malloc too many times while parsing. You really shouldn't need more than
-// this because you only really nest deeply when doing string interpolation.
+/**
+ * We pre-allocate a certain number of lex states in order to avoid having to
+ * call malloc too many times while parsing. You really shouldn't need more than
+ * this because you only really nest deeply when doing string interpolation.
+ */
#define PM_LEX_STACK_SIZE 4
// A forward declaration since our error handler struct accepts a parser for
// each of its function calls.
typedef struct pm_parser pm_parser_t;
-// While parsing, we keep track of a stack of contexts. This is helpful for
-// error recovery so that we can pop back to a previous context when we hit a
-// token that is understood by a parent context but not by the current context.
+/**
+ * While parsing, we keep track of a stack of contexts. This is helpful for
+ * error recovery so that we can pop back to a previous context when we hit a
+ * token that is understood by a parent context but not by the current context.
+ */
typedef enum {
- PM_CONTEXT_BEGIN, // a begin statement
- PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces
- PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end
- PM_CONTEXT_CASE_WHEN, // a case when statements
- PM_CONTEXT_CASE_IN, // a case in statements
- PM_CONTEXT_CLASS, // a class declaration
- PM_CONTEXT_DEF, // a method definition
- PM_CONTEXT_DEF_PARAMS, // a method definition's parameters
- PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter
- PM_CONTEXT_ELSE, // an else clause
- PM_CONTEXT_ELSIF, // an elsif clause
- PM_CONTEXT_EMBEXPR, // an interpolated expression
- PM_CONTEXT_ENSURE, // an ensure statement
- PM_CONTEXT_FOR, // a for loop
- PM_CONTEXT_FOR_INDEX, // a for loop's index
- PM_CONTEXT_IF, // an if statement
- PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces
- PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end
- PM_CONTEXT_MAIN, // the top level context
- PM_CONTEXT_MODULE, // a module declaration
- PM_CONTEXT_PARENS, // a parenthesized expression
- PM_CONTEXT_POSTEXE, // an END block
- PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement
- PM_CONTEXT_PREEXE, // a BEGIN block
- PM_CONTEXT_RESCUE_ELSE, // a rescue else statement
- PM_CONTEXT_RESCUE, // a rescue statement
- PM_CONTEXT_SCLASS, // a singleton class definition
- PM_CONTEXT_UNLESS, // an unless statement
- PM_CONTEXT_UNTIL, // an until statement
- PM_CONTEXT_WHILE, // a while statement
+ /** a begin statement */
+ PM_CONTEXT_BEGIN,
+
+ /** expressions in block arguments using braces */
+ PM_CONTEXT_BLOCK_BRACES,
+
+ /** expressions in block arguments using do..end */
+ PM_CONTEXT_BLOCK_KEYWORDS,
+
+ /** a case when statements */
+ PM_CONTEXT_CASE_WHEN,
+
+ /** a case in statements */
+ PM_CONTEXT_CASE_IN,
+
+ /** a class declaration */
+ PM_CONTEXT_CLASS,
+
+ /** a method definition */
+ PM_CONTEXT_DEF,
+
+ /** a method definition's parameters */
+ PM_CONTEXT_DEF_PARAMS,
+
+ /** a method definition's default parameter */
+ PM_CONTEXT_DEFAULT_PARAMS,
+
+ /** an else clause */
+ PM_CONTEXT_ELSE,
+
+ /** an elsif clause */
+ PM_CONTEXT_ELSIF,
+
+ /** an interpolated expression */
+ PM_CONTEXT_EMBEXPR,
+
+ /** an ensure statement */
+ PM_CONTEXT_ENSURE,
+
+ /** a for loop */
+ PM_CONTEXT_FOR,
+
+ /** a for loop's index */
+ PM_CONTEXT_FOR_INDEX,
+
+ /** an if statement */
+ PM_CONTEXT_IF,
+
+ /** a lambda expression with braces */
+ PM_CONTEXT_LAMBDA_BRACES,
+
+ /** a lambda expression with do..end */
+ PM_CONTEXT_LAMBDA_DO_END,
+
+ /** the top level context */
+ PM_CONTEXT_MAIN,
+
+ /** a module declaration */
+ PM_CONTEXT_MODULE,
+
+ /** a parenthesized expression */
+ PM_CONTEXT_PARENS,
+
+ /** an END block */
+ PM_CONTEXT_POSTEXE,
+
+ /** a predicate inside an if/elsif/unless statement */
+ PM_CONTEXT_PREDICATE,
+
+ /** a BEGIN block */
+ PM_CONTEXT_PREEXE,
+
+ /** a rescue else statement */
+ PM_CONTEXT_RESCUE_ELSE,
+
+ /** a rescue statement */
+ PM_CONTEXT_RESCUE,
+
+ /** a singleton class definition */
+ PM_CONTEXT_SCLASS,
+
+ /** an unless statement */
+ PM_CONTEXT_UNLESS,
+
+ /** an until statement */
+ PM_CONTEXT_UNTIL,
+
+ /** a while statement */
+ PM_CONTEXT_WHILE,
} pm_context_t;
-// This is a node in a linked list of contexts.
+/** This is a node in a linked list of contexts. */
typedef struct pm_context_node {
pm_context_t context;
struct pm_context_node *prev;
} pm_context_node_t;
-// This is the type of a comment that we've found while parsing.
+/** This is the type of a comment that we've found while parsing. */
typedef enum {
PM_COMMENT_INLINE,
PM_COMMENT_EMBDOC,
@@ -269,185 +380,270 @@ typedef struct {
uint32_t value_length;
} pm_magic_comment_t;
-// When the encoding that is being used to parse the source is changed by prism,
-// we provide the ability here to call out to a user-defined function.
+/**
+ * When the encoding that is being used to parse the source is changed by prism,
+ * we provide the ability here to call out to a user-defined function.
+ */
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
-// When an encoding is encountered that isn't understood by prism, we provide
-// the ability here to call out to a user-defined function to get an encoding
-// struct. If the function returns something that isn't NULL, we set that to
-// our encoding and use it to parse identifiers.
+/**
+ * When an encoding is encountered that isn't understood by prism, we provide
+ * the ability here to call out to a user-defined function to get an encoding
+ * struct. If the function returns something that isn't NULL, we set that to
+ * our encoding and use it to parse identifiers.
+ */
typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width);
-// When you are lexing through a file, the lexer needs all of the information
-// that the parser additionally provides (for example, the local table). So if
-// you want to properly lex Ruby, you need to actually lex it in the context of
-// the parser. In order to provide this functionality, we optionally allow a
-// struct to be attached to the parser that calls back out to a user-provided
-// callback when each token is lexed.
+/**
+ * When you are lexing through a file, the lexer needs all of the information
+ * that the parser additionally provides (for example, the local table). So if
+ * you want to properly lex Ruby, you need to actually lex it in the context of
+ * the parser. In order to provide this functionality, we optionally allow a
+ * struct to be attached to the parser that calls back out to a user-provided
+ * callback when each token is lexed.
+ */
typedef struct {
- // This opaque pointer is used to provide whatever information the user
- // deemed necessary to the callback. In our case we use it to pass the array
- // that the tokens get appended into.
+ /**
+ * This opaque pointer is used to provide whatever information the user
+ * deemed necessary to the callback. In our case we use it to pass the array
+ * that the tokens get appended into.
+ */
void *data;
- // This is the callback that is called when a token is lexed. It is passed
- // the opaque data pointer, the parser, and the token that was lexed.
+ /**
+ * This is the callback that is called when a token is lexed. It is passed
+ * the opaque data pointer, the parser, and the token that was lexed.
+ */
void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token);
} pm_lex_callback_t;
-// This struct represents a node in a linked list of scopes. Some scopes can see
-// into their parent scopes, while others cannot.
+/**
+ * This struct represents a node in a linked list of scopes. Some scopes can see
+ * into their parent scopes, while others cannot.
+ */
typedef struct pm_scope {
- // The IDs of the locals in the given scope.
+ /** The IDs of the locals in the given scope. */
pm_constant_id_list_t locals;
- // A pointer to the previous scope in the linked list.
+ /** A pointer to the previous scope in the linked list. */
struct pm_scope *previous;
- // A boolean indicating whether or not this scope can see into its parent.
- // If closed is true, then the scope cannot see into its parent.
+ /**
+ * A boolean indicating whether or not this scope can see into its parent.
+ * If closed is true, then the scope cannot see into its parent.
+ */
bool closed;
- // A boolean indicating whether or not this scope has explicit parameters.
- // This is necessary to determine whether or not numbered parameters are
- // allowed.
+ /**
+ * A boolean indicating whether or not this scope has explicit parameters.
+ * This is necessary to determine whether or not numbered parameters are
+ * allowed.
+ */
bool explicit_params;
- // A boolean indicating whether or not this scope has numbered parameters.
- // This is necessary to determine if child blocks are allowed to use
- // numbered parameters.
+ /**
+ * A boolean indicating whether or not this scope has numbered parameters.
+ * This is necessary to determine if child blocks are allowed to use
+ * numbered parameters.
+ */
bool numbered_params;
- // A transparent scope is a scope that cannot have locals set on itself.
- // When a local is set on this scope, it will instead be set on the parent
- // scope's local table.
+ /**
+ * A transparent scope is a scope that cannot have locals set on itself.
+ * When a local is set on this scope, it will instead be set on the parent
+ * scope's local table.
+ */
bool transparent;
} pm_scope_t;
-// This struct represents the overall parser. It contains a reference to the
-// source file, as well as pointers that indicate where in the source it's
-// currently parsing. It also contains the most recent and current token that
-// it's considering.
+/**
+ * This struct represents the overall parser. It contains a reference to the
+ * source file, as well as pointers that indicate where in the source it's
+ * currently parsing. It also contains the most recent and current token that
+ * it's considering.
+ */
struct pm_parser {
- pm_lex_state_t lex_state; // the current state of the lexer
- int enclosure_nesting; // tracks the current nesting of (), [], and {}
+ /** The current state of the lexer. */
+ pm_lex_state_t lex_state;
- // Used to temporarily track the nesting of enclosures to determine if a {
- // is the beginning of a lambda following the parameters of a lambda.
+ /** Tracks the current nesting of (), [], and {}. */
+ int enclosure_nesting;
+
+ /**
+ * Used to temporarily track the nesting of enclosures to determine if a {
+ * is the beginning of a lambda following the parameters of a lambda.
+ */
int lambda_enclosure_nesting;
- // Used to track the nesting of braces to ensure we get the correct value
- // when we are interpolating blocks with braces.
+ /**
+ * Used to track the nesting of braces to ensure we get the correct value
+ * when we are interpolating blocks with braces.
+ */
int brace_nesting;
- // the stack used to determine if a do keyword belongs to the predicate of a
- // while, until, or for loop
+ /**
+ * The stack used to determine if a do keyword belongs to the predicate of a
+ * while, until, or for loop.
+ */
pm_state_stack_t do_loop_stack;
- // the stack used to determine if a do keyword belongs to the beginning of a
- // block
+ /**
+ * The stack used to determine if a do keyword belongs to the beginning of a
+ * block.
+ */
pm_state_stack_t accepts_block_stack;
struct {
- pm_lex_mode_t *current; // the current mode of the lexer
- pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes
- size_t index; // the current index into the lexer mode stack
+ /** The current mode of the lexer. */
+ pm_lex_mode_t *current;
+
+ /** The stack of lexer modes. */
+ pm_lex_mode_t stack[PM_LEX_STACK_SIZE];
+
+ /** The current index into the lexer mode stack. */
+ size_t index;
} lex_modes;
- const uint8_t *start; // the pointer to the start of the source
- const uint8_t *end; // the pointer to the end of the source
- pm_token_t previous; // the previous token we were considering
- pm_token_t current; // the current token we're considering
+ /** The pointer to the start of the source. */
+ const uint8_t *start;
+
+ /** The pointer to the end of the source. */
+ const uint8_t *end;
+
+ /** The previous token we were considering. */
+ pm_token_t previous;
+
+ /** The current token we're considering. */
+ pm_token_t current;
- // This is a special field set on the parser when we need the parser to jump
- // to a specific location when lexing the next token, as opposed to just
- // using the end of the previous token. Normally this is NULL.
+ /**
+ * This is a special field set on the parser when we need the parser to jump
+ * to a specific location when lexing the next token, as opposed to just
+ * using the end of the previous token. Normally this is NULL.
+ */
const uint8_t *next_start;
- // This field indicates the end of a heredoc whose identifier was found on
- // the current line. If another heredoc is found on the same line, then this
- // will be moved forward to the end of that heredoc. If no heredocs are
- // found on a line then this is NULL.
+ /**
+ * This field indicates the end of a heredoc whose identifier was found on
+ * the current line. If another heredoc is found on the same line, then this
+ * will be moved forward to the end of that heredoc. If no heredocs are
+ * found on a line then this is NULL.
+ */
const uint8_t *heredoc_end;
- pm_list_t comment_list; // the list of comments that have been found while parsing
- pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
- pm_list_t warning_list; // the list of warnings that have been found while parsing
- pm_list_t error_list; // the list of errors that have been found while parsing
- pm_scope_t *current_scope; // the current local scope
+ /** The list of comments that have been found while parsing. */
+ pm_list_t comment_list;
+
+ /** The list of magic comments that have been found while parsing. */
+ pm_list_t magic_comment_list;
+
+ /** The list of warnings that have been found while parsing. */
+ pm_list_t warning_list;
+
+ /** The list of errors that have been found while parsing. */
+ pm_list_t error_list;
+
+ /** The current local scope. */
+ pm_scope_t *current_scope;
- pm_context_node_t *current_context; // the current parsing context
+ /** The current parsing context. */
+ pm_context_node_t *current_context;
- // The encoding functions for the current file is attached to the parser as
- // it's parsing so that it can change with a magic comment.
+ /**
+ * The encoding functions for the current file is attached to the parser as
+ * it's parsing so that it can change with a magic comment.
+ */
pm_encoding_t encoding;
- // When the encoding that is being used to parse the source is changed by
- // prism, we provide the ability here to call out to a user-defined
- // function.
+ /**
+ * When the encoding that is being used to parse the source is changed by
+ * prism, we provide the ability here to call out to a user-defined
+ * function.
+ */
pm_encoding_changed_callback_t encoding_changed_callback;
- // When an encoding is encountered that isn't understood by prism, we
- // provide the ability here to call out to a user-defined function to get an
- // encoding struct. If the function returns something that isn't NULL, we
- // set that to our encoding and use it to parse identifiers.
+ /**
+ * When an encoding is encountered that isn't understood by prism, we
+ * provide the ability here to call out to a user-defined function to get an
+ * encoding struct. If the function returns something that isn't NULL, we
+ * set that to our encoding and use it to parse identifiers.
+ */
pm_encoding_decode_callback_t encoding_decode_callback;
- // This pointer indicates where a comment must start if it is to be
- // considered an encoding comment.
+ /**
+ * This pointer indicates where a comment must start if it is to be
+ * considered an encoding comment.
+ */
const uint8_t *encoding_comment_start;
- // This is an optional callback that can be attached to the parser that will
- // be called whenever a new token is lexed by the parser.
+ /**
+ * This is an optional callback that can be attached to the parser that will
+ * be called whenever a new token is lexed by the parser.
+ */
pm_lex_callback_t *lex_callback;
- // This is the path of the file being parsed
- // We use the filepath when constructing SourceFileNodes
+ /**
+ * This is the path of the file being parsed. We use the filepath when
+ * constructing SourceFileNodes.
+ */
pm_string_t filepath_string;
- // This constant pool keeps all of the constants defined throughout the file
- // so that we can reference them later.
+ /**
+ * This constant pool keeps all of the constants defined throughout the file
+ * so that we can reference them later.
+ */
pm_constant_pool_t constant_pool;
- // This is the list of newline offsets in the source file.
+ /** This is the list of newline offsets in the source file. */
pm_newline_list_t newline_list;
- // We want to add a flag to integer nodes that indicates their base. We only
- // want to parse these once, but we don't have space on the token itself to
- // communicate this information. So we store it here and pass it through
- // when we find tokens that we need it for.
+ /**
+ * We want to add a flag to integer nodes that indicates their base. We only
+ * want to parse these once, but we don't have space on the token itself to
+ * communicate this information. So we store it here and pass it through
+ * when we find tokens that we need it for.
+ */
pm_node_flags_t integer_base;
- // This string is used to pass information from the lexer to the parser. It
- // is particularly necessary because of escape sequences.
+ /**
+ * This string is used to pass information from the lexer to the parser. It
+ * is particularly necessary because of escape sequences.
+ */
pm_string_t current_string;
- // Whether or not we're at the beginning of a command
+ /** Whether or not we're at the beginning of a command. */
bool command_start;
- // Whether or not we're currently recovering from a syntax error
+ /** Whether or not we're currently recovering from a syntax error. */
bool recovering;
- // Whether or not the encoding has been changed by a magic comment. We use
- // this to provide a fast path for the lexer instead of going through the
- // function pointer.
+ /**
+ * Whether or not the encoding has been changed by a magic comment. We use
+ * this to provide a fast path for the lexer instead of going through the
+ * function pointer.
+ */
bool encoding_changed;
- // This flag indicates that we are currently parsing a pattern matching
- // expression and impacts that calculation of newlines.
+ /**
+ * This flag indicates that we are currently parsing a pattern matching
+ * expression and impacts that calculation of newlines.
+ */
bool pattern_matching_newlines;
- // This flag indicates that we are currently parsing a keyword argument.
+ /** This flag indicates that we are currently parsing a keyword argument. */
bool in_keyword_arg;
- // Whether or not the parser has seen a token that has semantic meaning
- // (i.e., a token that is not a comment or whitespace).
+ /**
+ * Whether or not the parser has seen a token that has semantic meaning
+ * (i.e., a token that is not a comment or whitespace).
+ */
bool semantic_token_seen;
- // Whether or not we have found a frozen_string_literal magic comment with
- // a true value.
+ /**
+ * Whether or not we have found a frozen_string_literal magic comment with
+ * a true value.
+ */
bool frozen_string_literal;
};
-#endif // PRISM_PARSER_H
+#endif
diff --git a/prism/prism.c b/prism/prism.c
index 0b7494c5eb..05dad03a43 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -15599,29 +15599,31 @@ pm_metadata_read_u32(const char *ptr) {
}
}
-// Process any additional metadata being passed into a call to the parser via
-// the pm_parse_serialize function. Since the source of these calls will be from
-// Ruby implementation internals we assume it is from a trusted source.
-//
-// Currently, this is only passing in variable scoping surrounding an eval, but
-// eventually it will be extended to hold any additional metadata. This data
-// is serialized to reduce the calling complexity for a foreign function call
-// vs a foreign runtime making a bindable in-memory version of a C structure.
-//
-// metadata is assumed to be a valid pointer pointing to well-formed data. The
-// format is described below:
-//
-// ```text
-// [
-// filepath_size: uint32_t,
-// filepath: char*,
-// scopes_count: uint32_t,
-// [
-// locals_count: uint32_t,
-// [local_size: uint32_t, local: char*]*
-// ]*
-// ]
-// ```
+/**
+ * Process any additional metadata being passed into a call to the parser via
+ * the pm_parse_serialize function. Since the source of these calls will be from
+ * Ruby implementation internals we assume it is from a trusted source.
+ *
+ * Currently, this is only passing in variable scoping surrounding an eval, but
+ * eventually it will be extended to hold any additional metadata. This data
+ * is serialized to reduce the calling complexity for a foreign function call
+ * vs a foreign runtime making a bindable in-memory version of a C structure.
+ *
+ * metadata is assumed to be a valid pointer pointing to well-formed data. The
+ * format is described below:
+ *
+ * ```text
+ * [
+ * filepath_size: uint32_t,
+ * filepath: char*,
+ * scopes_count: uint32_t,
+ * [
+ * locals_count: uint32_t,
+ * [local_size: uint32_t, local: char*]*
+ * ]*
+ * ]
+ * ```
+ */
void
pm_parser_metadata(pm_parser_t *parser, const char *metadata) {
uint32_t filepath_size = pm_metadata_read_u32(metadata);
diff --git a/prism/prism.h b/prism/prism.h
index 46bfae0fe0..c68e9cbdf7 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -29,54 +29,156 @@
#include <strings.h>
#endif
-void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-
-void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);
-
-void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
-
-void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
-
-// The prism version and the serialization format.
+/**
+ * The prism version and the serialization format.
+ *
+ * @returns The prism version as a constant string.
+ */
PRISM_EXPORTED_FUNCTION const char * pm_version(void);
-// Initialize a parser with the given start and end pointers.
+/**
+ * Initialize a parser with the given start and end pointers.
+ *
+ * @param parser The parser to initialize.
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param filepath The optional filepath to pass to the parser.
+ */
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath);
-// Register a callback that will be called whenever prism changes the encoding it
-// is using to parse based on the magic comment.
+/**
+ * Register a callback that will be called whenever prism changes the encoding
+ * it is using to parse based on the magic comment.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
-// Register a callback that will be called when prism encounters a magic comment
-// with an encoding referenced that it doesn't understand. The callback should
-// return NULL if it also doesn't understand the encoding or it should return a
-// pointer to a pm_encoding_t struct that contains the functions necessary to
-// parse identifiers.
+/**
+ * Register a callback that will be called when prism encounters a magic comment
+ * with an encoding referenced that it doesn't understand. The callback should
+ * return NULL if it also doesn't understand the encoding or it should return a
+ * pointer to a pm_encoding_t struct that contains the functions necessary to
+ * parse identifiers.
+ *
+ * @param parser The parser to register the callback with.
+ * @param callback The callback to register.
+ */
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);
-// Free any memory associated with the given parser.
+/**
+ * Free any memory associated with the given parser.
+ *
+ * @param parser The parser to free.
+ */
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
-// Parse the Ruby source associated with the given parser and return the tree.
+/**
+ * Parse the Ruby source associated with the given parser and return the tree.
+ *
+ * @param parser The parser to use.
+ * @return The AST representing the Ruby source.
+ */
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
-// Serialize the AST represented by the given node to the given buffer.
+/**
+ * Serialize the given list of comments to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param list The list of comments to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
+
+/**
+ * Serialize the name of the encoding to the buffer.
+ *
+ * @param encoding The encoding to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);
+
+/**
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
+void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
+
+/**
+ * Serialize the AST represented by the given node to the given buffer.
+ *
+ * @param parser The parser to serialize.
+ * @param node The node to serialize.
+ * @param buffer The buffer to serialize to.
+ */
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);
-// Parse the given source to the AST and serialize the AST to the given buffer.
+/**
+ * Process any additional metadata being passed into a call to the parser via
+ * the pm_parse_serialize function. Since the source of these calls will be from
+ * Ruby implementation internals we assume it is from a trusted source.
+ *
+ * Currently, this is only passing in variable scoping surrounding an eval, but
+ * eventually it will be extended to hold any additional metadata. This data
+ * is serialized to reduce the calling complexity for a foreign function call
+ * vs a foreign runtime making a bindable in-memory version of a C structure.
+ *
+ * @param parser The parser to process the metadata for.
+ * @param metadata The metadata to process.
+ */
+void pm_parser_metadata(pm_parser_t *parser, const char *metadata);
+
+/**
+ * Parse the given source to the AST and serialize the AST to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
-// Parse and serialize the comments in the given source to the given buffer.
+/**
+ * Parse and serialize the comments in the given source to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
-// Lex the given source and serialize to the given buffer.
+/**
+ * Lex the given source and serialize to the given buffer.
+ *
+ * @param source The source to lex.
+ * @param size The size of the source.
+ * @param filepath The optional filepath to pass to the lexer.
+ * @param buffer The buffer to serialize to.
+ */
PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer);
-// Parse and serialize both the AST and the tokens represented by the given
-// source to the given buffer.
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ *
+ * @param source The source to parse.
+ * @param size The size of the source.
+ * @param buffer The buffer to serialize to.
+ * @param metadata The optional metadata to pass to the parser.
+ */
PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata);
-// Returns a string representation of the given token type.
+/**
+ * Returns a string representation of the given token type.
+ *
+ * @param token_type The token type to convert to a string.
+ * @return A string representation of the given token type.
+ */
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
#endif
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index 75b10c9807..48ad64d699 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -9,41 +9,70 @@
#include <stddef.h>
#include <stdint.h>
-// This enum represents every type of token in the Ruby source.
+/**
+ * This enum represents every type of token in the Ruby source.
+ */
typedef enum pm_token_type {
<%- tokens.each do |token| -%>
<%= token.declaration %>
<%- end -%>
- PM_TOKEN_MAXIMUM, // the maximum token value
+
+ /** The maximum token value. */
+ PM_TOKEN_MAXIMUM,
} pm_token_type_t;
-// This struct represents a token in the Ruby source. We use it to track both
-// type and location information.
+/**
+ * This struct represents a token in the Ruby source. We use it to track both
+ * type and location information.
+ */
typedef struct {
+ /** The type of the token. */
pm_token_type_t type;
+
+ /** A pointer to the start location of the token in the source. */
const uint8_t *start;
+
+ /** A pointer to the end location of the token in the source. */
const uint8_t *end;
} pm_token_t;
-// This represents a range of bytes in the source string to which a node or
-// token corresponds.
+/**
+ * This represents a range of bytes in the source string to which a node or
+ * token corresponds.
+ */
typedef struct {
+ /** A pointer to the start location of the range in the source. */
const uint8_t *start;
+
+ /** A pointer to the end location of the range in the source. */
const uint8_t *end;
} pm_location_t;
struct pm_node;
+/**
+ * A list of nodes in the source, most often used for lists of children.
+ */
typedef struct pm_node_list {
- struct pm_node **nodes;
+ /** The number of nodes in the list. */
size_t size;
+
+ /** The capacity of the list that has been allocated. */
size_t capacity;
+
+ /** The nodes in the list. */
+ struct pm_node **nodes;
} pm_node_list_t;
+/**
+ * This enum represents every type of node in the Ruby syntax tree.
+ */
enum pm_node_type {
<%- nodes.each_with_index do |node, index| -%>
<%= node.type %> = <%= index + 1 %>,
<%- end -%>
+
+ /** A special kind of node used for compilation. */
PM_SCOPE_NODE
};
@@ -66,15 +95,22 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
* embedded into every node type.
*/
typedef struct pm_node {
- // This represents the type of the node. It somewhat maps to the nodes that
- // existed in the original grammar and ripper, but it's not a 1:1 mapping.
+ /**
+ * This represents the type of the node. It somewhat maps to the nodes that
+ * existed in the original grammar and ripper, but it's not a 1:1 mapping.
+ */
pm_node_type_t type;
- // This represents any flags on the node
+ /**
+ * This represents any flags on the node. Some are common to all nodes, and
+ * some are specific to the type of node.
+ */
pm_node_flags_t flags;
- // This is the location of the node in the source. It's a range of bytes
- // containing a start and an end.
+ /**
+ * This is the location of the node in the source. It's a range of bytes
+ * containing a start and an end.
+ */
pm_location_t location;
} pm_node_t;
<%- nodes.each do |node| -%>
@@ -124,4 +160,4 @@ typedef enum pm_<%= flag.human %> {
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
-#endif // PRISM_AST_H
+#endif
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index d46284d3b2..2f75509492 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -47,7 +47,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe
}
}
-void
+static void
pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node));
@@ -136,6 +136,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu
pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start));
}
+/**
+ * Serialize the given list of comments to the given buffer.
+ */
void
pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list)));
@@ -189,6 +192,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *
}
}
+/**
+ * Serialize the name of the encoding to the buffer.
+ */
void
pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
size_t encoding_length = strlen(encoding->name);
@@ -197,6 +203,9 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
}
#line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>"
+/**
+ * Serialize the encoding, metadata, nodes, and constant pool.
+ */
void
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_encoding(&parser->encoding, buffer);
@@ -274,6 +283,9 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
pm_buffer_append_varint(buffer, parser->lex_state);
}
+/**
+ * Lex the given source and serialize to the given buffer.
+ */
PRISM_EXPORTED_FUNCTION void
pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) {
pm_parser_t parser;
@@ -300,8 +312,10 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
pm_parser_free(&parser);
}
-// Parse and serialize both the AST and the tokens represented by the given
-// source to the given buffer.
+/**
+ * Parse and serialize both the AST and the tokens represented by the given
+ * source to the given buffer.
+ */
PRISM_EXPORTED_FUNCTION void
pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) {
pm_parser_t parser;
diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb
index 98be081732..d3c1c3f1b8 100644
--- a/prism/templates/src/token_type.c.erb
+++ b/prism/templates/src/token_type.c.erb
@@ -2,7 +2,9 @@
#include "prism/ast.h"
-// Returns a string representation of the given token type.
+/**
+ * Returns a string representation of the given token type.
+ */
PRISM_EXPORTED_FUNCTION const char *
pm_token_type_to_str(pm_token_type_t token_type)
{