diff options
| author | Kevin Newton <kddnewton@gmail.com> | 2023-10-31 12:54:54 -0400 |
|---|---|---|
| committer | Kevin Newton <kddnewton@gmail.com> | 2023-11-01 13:10:29 -0400 |
| commit | 17923cc876513707b4bedcd4437b229feb455099 (patch) | |
| tree | a4b5ef4421268c1d52937cc86e4135d9b1157486 | |
| parent | 6b3b530cc1266aeaecb68a01e8511a794ea456ea (diff) | |
[ruby/prism] Even more C file documentation
https://github.com/ruby/prism/commit/9c648ce615
| -rw-r--r-- | prism/enc/pm_unicode.c | 8 | ||||
| -rw-r--r-- | prism/parser.h | 586 | ||||
| -rw-r--r-- | prism/prism.c | 48 | ||||
| -rw-r--r-- | prism/prism.h | 154 | ||||
| -rw-r--r-- | prism/templates/include/prism/ast.h.erb | 62 | ||||
| -rw-r--r-- | prism/templates/src/serialize.c.erb | 20 | ||||
| -rw-r--r-- | prism/templates/src/token_type.c.erb | 4 |
7 files changed, 617 insertions, 265 deletions
diff --git a/prism/enc/pm_unicode.c b/prism/enc/pm_unicode.c index d021894c1e..e471d03b6b 100644 --- a/prism/enc/pm_unicode.c +++ b/prism/enc/pm_unicode.c @@ -2183,7 +2183,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C * codepoint is in the list. */ static bool -pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, size_t size, const pm_unicode_codepoint_t codepoints[size]) { +pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) { size_t start = 0; size_t end = size; @@ -2300,7 +2300,7 @@ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0; } else { - return pm_unicode_codepoint_match(codepoint, UNICODE_ALPHA_CODEPOINTS_LENGTH, unicode_alpha_codepoints) ? width : 0; + return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0; } } @@ -2320,7 +2320,7 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0; } else { - return pm_unicode_codepoint_match(codepoint, UNICODE_ALNUM_CODEPOINTS_LENGTH, unicode_alnum_codepoints) ? width : 0; + return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0; } } @@ -2340,7 +2340,7 @@ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { if (codepoint <= 0xFF) { return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false; } else { - return pm_unicode_codepoint_match(codepoint, UNICODE_ISUPPER_CODEPOINTS_LENGTH, unicode_isupper_codepoints) ? true : false; + return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false; } } diff --git a/prism/parser.h b/prism/parser.h index 01b047ccdf..92a8ce589d 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -12,9 +12,11 @@ #include <stdbool.h> -// This enum provides various bits that represent different kinds of states that -// the lexer can track. This is used to determine which kind of token to return -// based on the context of the parser. +/** + * This enum provides various bits that represent different kinds of states that + * the lexer can track. This is used to determine which kind of token to return + * based on the context of the parser. + */ typedef enum { PM_LEX_STATE_BIT_BEG, PM_LEX_STATE_BIT_END, @@ -31,8 +33,10 @@ typedef enum { PM_LEX_STATE_BIT_FITEM } pm_lex_state_bit_t; -// This enum combines the various bits from the above enum into individual -// values that represent the various states of the lexer. +/** + * This enum combines the various bits from the above enum into individual + * values that represent the various states of the lexer. + */ typedef enum { PM_LEX_STATE_NONE = 0, PM_LEX_STATE_BEG = (1 << PM_LEX_STATE_BIT_BEG), @@ -53,6 +57,9 @@ typedef enum { PM_LEX_STATE_END_ANY = PM_LEX_STATE_END | PM_LEX_STATE_ENDARG | PM_LEX_STATE_ENDFN } pm_lex_state_t; +/** + * The type of quote that a heredoc uses. + */ typedef enum { PM_HEREDOC_QUOTE_NONE, PM_HEREDOC_QUOTE_SINGLE = '\'', @@ -60,183 +67,287 @@ typedef enum { PM_HEREDOC_QUOTE_BACKTICK = '`', } pm_heredoc_quote_t; +/** + * The type of indentation that a heredoc uses. + */ typedef enum { PM_HEREDOC_INDENT_NONE, PM_HEREDOC_INDENT_DASH, PM_HEREDOC_INDENT_TILDE, } pm_heredoc_indent_t; -// When lexing Ruby source, the lexer has a small amount of state to tell which -// kind of token it is currently lexing. For example, when we find the start of -// a string, the first token that we return is a TOKEN_STRING_BEGIN token. After -// that the lexer is now in the PM_LEX_STRING mode, and will return tokens that -// are found as part of a string. +/** + * When lexing Ruby source, the lexer has a small amount of state to tell which + * kind of token it is currently lexing. For example, when we find the start of + * a string, the first token that we return is a TOKEN_STRING_BEGIN token. After + * that the lexer is now in the PM_LEX_STRING mode, and will return tokens that + * are found as part of a string. + */ typedef struct pm_lex_mode { enum { - // This state is used when any given token is being lexed. + /** This state is used when any given token is being lexed. */ PM_LEX_DEFAULT, - // This state is used when we're lexing as normal but inside an embedded - // expression of a string. + /** + * This state is used when we're lexing as normal but inside an embedded + * expression of a string. + */ PM_LEX_EMBEXPR, - // This state is used when we're lexing a variable that is embedded - // directly inside of a string with the # shorthand. + /** + * This state is used when we're lexing a variable that is embedded + * directly inside of a string with the # shorthand. + */ PM_LEX_EMBVAR, - // This state is used when you are inside the content of a heredoc. + /** This state is used when you are inside the content of a heredoc. */ PM_LEX_HEREDOC, - // This state is used when we are lexing a list of tokens, as in a %w - // word list literal or a %i symbol list literal. + /** + * This state is used when we are lexing a list of tokens, as in a %w + * word list literal or a %i symbol list literal. + */ PM_LEX_LIST, - // This state is used when a regular expression has been begun and we - // are looking for the terminator. + /** + * This state is used when a regular expression has been begun and we + * are looking for the terminator. + */ PM_LEX_REGEXP, - // This state is used when we are lexing a string or a string-like - // token, as in string content with either quote or an xstring. + /** + * This state is used when we are lexing a string or a string-like + * token, as in string content with either quote or an xstring. + */ PM_LEX_STRING } mode; union { struct { - // This keeps track of the nesting level of the list. + /** This keeps track of the nesting level of the list. */ size_t nesting; - // Whether or not interpolation is allowed in this list. + /** Whether or not interpolation is allowed in this list. */ bool interpolation; - // When lexing a list, it takes into account balancing the - // terminator if the terminator is one of (), [], {}, or <>. + /** + * When lexing a list, it takes into account balancing the + * terminator if the terminator is one of (), [], {}, or <>. + */ uint8_t incrementor; - // This is the terminator of the list literal. + /** This is the terminator of the list literal. */ uint8_t terminator; - // This is the character set that should be used to delimit the - // tokens within the list. + /** + * This is the character set that should be used to delimit the + * tokens within the list. + */ uint8_t breakpoints[11]; } list; struct { - // This keeps track of the nesting level of the regular expression. + /** + * This keeps track of the nesting level of the regular expression. + */ size_t nesting; - // When lexing a regular expression, it takes into account balancing - // the terminator if the terminator is one of (), [], {}, or <>. + /** + * When lexing a regular expression, it takes into account balancing + * the terminator if the terminator is one of (), [], {}, or <>. + */ uint8_t incrementor; - // This is the terminator of the regular expression. + /** This is the terminator of the regular expression. */ uint8_t terminator; - // This is the character set that should be used to delimit the - // tokens within the regular expression. + /** + * This is the character set that should be used to delimit the + * tokens within the regular expression. + */ uint8_t breakpoints[6]; } regexp; struct { - // This keeps track of the nesting level of the string. + /** This keeps track of the nesting level of the string. */ size_t nesting; - // Whether or not interpolation is allowed in this string. + /** Whether or not interpolation is allowed in this string. */ bool interpolation; - // Whether or not at the end of the string we should allow a :, - // which would indicate this was a dynamic symbol instead of a - // string. + /** + * Whether or not at the end of the string we should allow a :, + * which would indicate this was a dynamic symbol instead of a + * string. + */ bool label_allowed; - // When lexing a string, it takes into account balancing the - // terminator if the terminator is one of (), [], {}, or <>. + /** + * When lexing a string, it takes into account balancing the + * terminator if the terminator is one of (), [], {}, or <>. + */ uint8_t incrementor; - // This is the terminator of the string. It is typically either a - // single or double quote. + /** + * This is the terminator of the string. It is typically either a + * single or double quote. + */ uint8_t terminator; - // This is the character set that should be used to delimit the - // tokens within the string. + /** + * This is the character set that should be used to delimit the + * tokens within the string. + */ uint8_t breakpoints[6]; } string; struct { - // These pointers point to the beginning and end of the heredoc - // identifier. + /** A pointer to the start of the heredoc identifier. */ const uint8_t *ident_start; + + /** The length of the heredoc identifier. */ size_t ident_length; + /** The type of quote that the heredoc uses. */ pm_heredoc_quote_t quote; + + /** The type of indentation that the heredoc uses. */ pm_heredoc_indent_t indent; - // This is the pointer to the character where lexing should resume - // once the heredoc has been completely processed. + /** + * This is the pointer to the character where lexing should resume + * once the heredoc has been completely processed. + */ const uint8_t *next_start; - // This is used to track the amount of common whitespace on each - // line so that we know how much to dedent each line in the case of - // a tilde heredoc. + /** + * This is used to track the amount of common whitespace on each + * line so that we know how much to dedent each line in the case of + * a tilde heredoc. + */ size_t common_whitespace; } heredoc; } as; - // The previous lex state so that it knows how to pop. + /** The previous lex state so that it knows how to pop. */ struct pm_lex_mode *prev; } pm_lex_mode_t; -// We pre-allocate a certain number of lex states in order to avoid having to -// call malloc too many times while parsing. You really shouldn't need more than -// this because you only really nest deeply when doing string interpolation. +/** + * We pre-allocate a certain number of lex states in order to avoid having to + * call malloc too many times while parsing. You really shouldn't need more than + * this because you only really nest deeply when doing string interpolation. + */ #define PM_LEX_STACK_SIZE 4 // A forward declaration since our error handler struct accepts a parser for // each of its function calls. typedef struct pm_parser pm_parser_t; -// While parsing, we keep track of a stack of contexts. This is helpful for -// error recovery so that we can pop back to a previous context when we hit a -// token that is understood by a parent context but not by the current context. +/** + * While parsing, we keep track of a stack of contexts. This is helpful for + * error recovery so that we can pop back to a previous context when we hit a + * token that is understood by a parent context but not by the current context. + */ typedef enum { - PM_CONTEXT_BEGIN, // a begin statement - PM_CONTEXT_BLOCK_BRACES, // expressions in block arguments using braces - PM_CONTEXT_BLOCK_KEYWORDS, // expressions in block arguments using do..end - PM_CONTEXT_CASE_WHEN, // a case when statements - PM_CONTEXT_CASE_IN, // a case in statements - PM_CONTEXT_CLASS, // a class declaration - PM_CONTEXT_DEF, // a method definition - PM_CONTEXT_DEF_PARAMS, // a method definition's parameters - PM_CONTEXT_DEFAULT_PARAMS, // a method definition's default parameter - PM_CONTEXT_ELSE, // an else clause - PM_CONTEXT_ELSIF, // an elsif clause - PM_CONTEXT_EMBEXPR, // an interpolated expression - PM_CONTEXT_ENSURE, // an ensure statement - PM_CONTEXT_FOR, // a for loop - PM_CONTEXT_FOR_INDEX, // a for loop's index - PM_CONTEXT_IF, // an if statement - PM_CONTEXT_LAMBDA_BRACES, // a lambda expression with braces - PM_CONTEXT_LAMBDA_DO_END, // a lambda expression with do..end - PM_CONTEXT_MAIN, // the top level context - PM_CONTEXT_MODULE, // a module declaration - PM_CONTEXT_PARENS, // a parenthesized expression - PM_CONTEXT_POSTEXE, // an END block - PM_CONTEXT_PREDICATE, // a predicate inside an if/elsif/unless statement - PM_CONTEXT_PREEXE, // a BEGIN block - PM_CONTEXT_RESCUE_ELSE, // a rescue else statement - PM_CONTEXT_RESCUE, // a rescue statement - PM_CONTEXT_SCLASS, // a singleton class definition - PM_CONTEXT_UNLESS, // an unless statement - PM_CONTEXT_UNTIL, // an until statement - PM_CONTEXT_WHILE, // a while statement + /** a begin statement */ + PM_CONTEXT_BEGIN, + + /** expressions in block arguments using braces */ + PM_CONTEXT_BLOCK_BRACES, + + /** expressions in block arguments using do..end */ + PM_CONTEXT_BLOCK_KEYWORDS, + + /** a case when statements */ + PM_CONTEXT_CASE_WHEN, + + /** a case in statements */ + PM_CONTEXT_CASE_IN, + + /** a class declaration */ + PM_CONTEXT_CLASS, + + /** a method definition */ + PM_CONTEXT_DEF, + + /** a method definition's parameters */ + PM_CONTEXT_DEF_PARAMS, + + /** a method definition's default parameter */ + PM_CONTEXT_DEFAULT_PARAMS, + + /** an else clause */ + PM_CONTEXT_ELSE, + + /** an elsif clause */ + PM_CONTEXT_ELSIF, + + /** an interpolated expression */ + PM_CONTEXT_EMBEXPR, + + /** an ensure statement */ + PM_CONTEXT_ENSURE, + + /** a for loop */ + PM_CONTEXT_FOR, + + /** a for loop's index */ + PM_CONTEXT_FOR_INDEX, + + /** an if statement */ + PM_CONTEXT_IF, + + /** a lambda expression with braces */ + PM_CONTEXT_LAMBDA_BRACES, + + /** a lambda expression with do..end */ + PM_CONTEXT_LAMBDA_DO_END, + + /** the top level context */ + PM_CONTEXT_MAIN, + + /** a module declaration */ + PM_CONTEXT_MODULE, + + /** a parenthesized expression */ + PM_CONTEXT_PARENS, + + /** an END block */ + PM_CONTEXT_POSTEXE, + + /** a predicate inside an if/elsif/unless statement */ + PM_CONTEXT_PREDICATE, + + /** a BEGIN block */ + PM_CONTEXT_PREEXE, + + /** a rescue else statement */ + PM_CONTEXT_RESCUE_ELSE, + + /** a rescue statement */ + PM_CONTEXT_RESCUE, + + /** a singleton class definition */ + PM_CONTEXT_SCLASS, + + /** an unless statement */ + PM_CONTEXT_UNLESS, + + /** an until statement */ + PM_CONTEXT_UNTIL, + + /** a while statement */ + PM_CONTEXT_WHILE, } pm_context_t; -// This is a node in a linked list of contexts. +/** This is a node in a linked list of contexts. */ typedef struct pm_context_node { pm_context_t context; struct pm_context_node *prev; } pm_context_node_t; -// This is the type of a comment that we've found while parsing. +/** This is the type of a comment that we've found while parsing. */ typedef enum { PM_COMMENT_INLINE, PM_COMMENT_EMBDOC, @@ -269,185 +380,270 @@ typedef struct { uint32_t value_length; } pm_magic_comment_t; -// When the encoding that is being used to parse the source is changed by prism, -// we provide the ability here to call out to a user-defined function. +/** + * When the encoding that is being used to parse the source is changed by prism, + * we provide the ability here to call out to a user-defined function. + */ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser); -// When an encoding is encountered that isn't understood by prism, we provide -// the ability here to call out to a user-defined function to get an encoding -// struct. If the function returns something that isn't NULL, we set that to -// our encoding and use it to parse identifiers. +/** + * When an encoding is encountered that isn't understood by prism, we provide + * the ability here to call out to a user-defined function to get an encoding + * struct. If the function returns something that isn't NULL, we set that to + * our encoding and use it to parse identifiers. + */ typedef pm_encoding_t *(*pm_encoding_decode_callback_t)(pm_parser_t *parser, const uint8_t *name, size_t width); -// When you are lexing through a file, the lexer needs all of the information -// that the parser additionally provides (for example, the local table). So if -// you want to properly lex Ruby, you need to actually lex it in the context of -// the parser. In order to provide this functionality, we optionally allow a -// struct to be attached to the parser that calls back out to a user-provided -// callback when each token is lexed. +/** + * When you are lexing through a file, the lexer needs all of the information + * that the parser additionally provides (for example, the local table). So if + * you want to properly lex Ruby, you need to actually lex it in the context of + * the parser. In order to provide this functionality, we optionally allow a + * struct to be attached to the parser that calls back out to a user-provided + * callback when each token is lexed. + */ typedef struct { - // This opaque pointer is used to provide whatever information the user - // deemed necessary to the callback. In our case we use it to pass the array - // that the tokens get appended into. + /** + * This opaque pointer is used to provide whatever information the user + * deemed necessary to the callback. In our case we use it to pass the array + * that the tokens get appended into. + */ void *data; - // This is the callback that is called when a token is lexed. It is passed - // the opaque data pointer, the parser, and the token that was lexed. + /** + * This is the callback that is called when a token is lexed. It is passed + * the opaque data pointer, the parser, and the token that was lexed. + */ void (*callback)(void *data, pm_parser_t *parser, pm_token_t *token); } pm_lex_callback_t; -// This struct represents a node in a linked list of scopes. Some scopes can see -// into their parent scopes, while others cannot. +/** + * This struct represents a node in a linked list of scopes. Some scopes can see + * into their parent scopes, while others cannot. + */ typedef struct pm_scope { - // The IDs of the locals in the given scope. + /** The IDs of the locals in the given scope. */ pm_constant_id_list_t locals; - // A pointer to the previous scope in the linked list. + /** A pointer to the previous scope in the linked list. */ struct pm_scope *previous; - // A boolean indicating whether or not this scope can see into its parent. - // If closed is true, then the scope cannot see into its parent. + /** + * A boolean indicating whether or not this scope can see into its parent. + * If closed is true, then the scope cannot see into its parent. + */ bool closed; - // A boolean indicating whether or not this scope has explicit parameters. - // This is necessary to determine whether or not numbered parameters are - // allowed. + /** + * A boolean indicating whether or not this scope has explicit parameters. + * This is necessary to determine whether or not numbered parameters are + * allowed. + */ bool explicit_params; - // A boolean indicating whether or not this scope has numbered parameters. - // This is necessary to determine if child blocks are allowed to use - // numbered parameters. + /** + * A boolean indicating whether or not this scope has numbered parameters. + * This is necessary to determine if child blocks are allowed to use + * numbered parameters. + */ bool numbered_params; - // A transparent scope is a scope that cannot have locals set on itself. - // When a local is set on this scope, it will instead be set on the parent - // scope's local table. + /** + * A transparent scope is a scope that cannot have locals set on itself. + * When a local is set on this scope, it will instead be set on the parent + * scope's local table. + */ bool transparent; } pm_scope_t; -// This struct represents the overall parser. It contains a reference to the -// source file, as well as pointers that indicate where in the source it's -// currently parsing. It also contains the most recent and current token that -// it's considering. +/** + * This struct represents the overall parser. It contains a reference to the + * source file, as well as pointers that indicate where in the source it's + * currently parsing. It also contains the most recent and current token that + * it's considering. + */ struct pm_parser { - pm_lex_state_t lex_state; // the current state of the lexer - int enclosure_nesting; // tracks the current nesting of (), [], and {} + /** The current state of the lexer. */ + pm_lex_state_t lex_state; - // Used to temporarily track the nesting of enclosures to determine if a { - // is the beginning of a lambda following the parameters of a lambda. + /** Tracks the current nesting of (), [], and {}. */ + int enclosure_nesting; + + /** + * Used to temporarily track the nesting of enclosures to determine if a { + * is the beginning of a lambda following the parameters of a lambda. + */ int lambda_enclosure_nesting; - // Used to track the nesting of braces to ensure we get the correct value - // when we are interpolating blocks with braces. + /** + * Used to track the nesting of braces to ensure we get the correct value + * when we are interpolating blocks with braces. + */ int brace_nesting; - // the stack used to determine if a do keyword belongs to the predicate of a - // while, until, or for loop + /** + * The stack used to determine if a do keyword belongs to the predicate of a + * while, until, or for loop. + */ pm_state_stack_t do_loop_stack; - // the stack used to determine if a do keyword belongs to the beginning of a - // block + /** + * The stack used to determine if a do keyword belongs to the beginning of a + * block. + */ pm_state_stack_t accepts_block_stack; struct { - pm_lex_mode_t *current; // the current mode of the lexer - pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; // the stack of lexer modes - size_t index; // the current index into the lexer mode stack + /** The current mode of the lexer. */ + pm_lex_mode_t *current; + + /** The stack of lexer modes. */ + pm_lex_mode_t stack[PM_LEX_STACK_SIZE]; + + /** The current index into the lexer mode stack. */ + size_t index; } lex_modes; - const uint8_t *start; // the pointer to the start of the source - const uint8_t *end; // the pointer to the end of the source - pm_token_t previous; // the previous token we were considering - pm_token_t current; // the current token we're considering + /** The pointer to the start of the source. */ + const uint8_t *start; + + /** The pointer to the end of the source. */ + const uint8_t *end; + + /** The previous token we were considering. */ + pm_token_t previous; + + /** The current token we're considering. */ + pm_token_t current; - // This is a special field set on the parser when we need the parser to jump - // to a specific location when lexing the next token, as opposed to just - // using the end of the previous token. Normally this is NULL. + /** + * This is a special field set on the parser when we need the parser to jump + * to a specific location when lexing the next token, as opposed to just + * using the end of the previous token. Normally this is NULL. + */ const uint8_t *next_start; - // This field indicates the end of a heredoc whose identifier was found on - // the current line. If another heredoc is found on the same line, then this - // will be moved forward to the end of that heredoc. If no heredocs are - // found on a line then this is NULL. + /** + * This field indicates the end of a heredoc whose identifier was found on + * the current line. If another heredoc is found on the same line, then this + * will be moved forward to the end of that heredoc. If no heredocs are + * found on a line then this is NULL. + */ const uint8_t *heredoc_end; - pm_list_t comment_list; // the list of comments that have been found while parsing - pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing. - pm_list_t warning_list; // the list of warnings that have been found while parsing - pm_list_t error_list; // the list of errors that have been found while parsing - pm_scope_t *current_scope; // the current local scope + /** The list of comments that have been found while parsing. */ + pm_list_t comment_list; + + /** The list of magic comments that have been found while parsing. */ + pm_list_t magic_comment_list; + + /** The list of warnings that have been found while parsing. */ + pm_list_t warning_list; + + /** The list of errors that have been found while parsing. */ + pm_list_t error_list; + + /** The current local scope. */ + pm_scope_t *current_scope; - pm_context_node_t *current_context; // the current parsing context + /** The current parsing context. */ + pm_context_node_t *current_context; - // The encoding functions for the current file is attached to the parser as - // it's parsing so that it can change with a magic comment. + /** + * The encoding functions for the current file is attached to the parser as + * it's parsing so that it can change with a magic comment. + */ pm_encoding_t encoding; - // When the encoding that is being used to parse the source is changed by - // prism, we provide the ability here to call out to a user-defined - // function. + /** + * When the encoding that is being used to parse the source is changed by + * prism, we provide the ability here to call out to a user-defined + * function. + */ pm_encoding_changed_callback_t encoding_changed_callback; - // When an encoding is encountered that isn't understood by prism, we - // provide the ability here to call out to a user-defined function to get an - // encoding struct. If the function returns something that isn't NULL, we - // set that to our encoding and use it to parse identifiers. + /** + * When an encoding is encountered that isn't understood by prism, we + * provide the ability here to call out to a user-defined function to get an + * encoding struct. If the function returns something that isn't NULL, we + * set that to our encoding and use it to parse identifiers. + */ pm_encoding_decode_callback_t encoding_decode_callback; - // This pointer indicates where a comment must start if it is to be - // considered an encoding comment. + /** + * This pointer indicates where a comment must start if it is to be + * considered an encoding comment. + */ const uint8_t *encoding_comment_start; - // This is an optional callback that can be attached to the parser that will - // be called whenever a new token is lexed by the parser. + /** + * This is an optional callback that can be attached to the parser that will + * be called whenever a new token is lexed by the parser. + */ pm_lex_callback_t *lex_callback; - // This is the path of the file being parsed - // We use the filepath when constructing SourceFileNodes + /** + * This is the path of the file being parsed. We use the filepath when + * constructing SourceFileNodes. + */ pm_string_t filepath_string; - // This constant pool keeps all of the constants defined throughout the file - // so that we can reference them later. + /** + * This constant pool keeps all of the constants defined throughout the file + * so that we can reference them later. + */ pm_constant_pool_t constant_pool; - // This is the list of newline offsets in the source file. + /** This is the list of newline offsets in the source file. */ pm_newline_list_t newline_list; - // We want to add a flag to integer nodes that indicates their base. We only - // want to parse these once, but we don't have space on the token itself to - // communicate this information. So we store it here and pass it through - // when we find tokens that we need it for. + /** + * We want to add a flag to integer nodes that indicates their base. We only + * want to parse these once, but we don't have space on the token itself to + * communicate this information. So we store it here and pass it through + * when we find tokens that we need it for. + */ pm_node_flags_t integer_base; - // This string is used to pass information from the lexer to the parser. It - // is particularly necessary because of escape sequences. + /** + * This string is used to pass information from the lexer to the parser. It + * is particularly necessary because of escape sequences. + */ pm_string_t current_string; - // Whether or not we're at the beginning of a command + /** Whether or not we're at the beginning of a command. */ bool command_start; - // Whether or not we're currently recovering from a syntax error + /** Whether or not we're currently recovering from a syntax error. */ bool recovering; - // Whether or not the encoding has been changed by a magic comment. We use - // this to provide a fast path for the lexer instead of going through the - // function pointer. + /** + * Whether or not the encoding has been changed by a magic comment. We use + * this to provide a fast path for the lexer instead of going through the + * function pointer. + */ bool encoding_changed; - // This flag indicates that we are currently parsing a pattern matching - // expression and impacts that calculation of newlines. + /** + * This flag indicates that we are currently parsing a pattern matching + * expression and impacts that calculation of newlines. + */ bool pattern_matching_newlines; - // This flag indicates that we are currently parsing a keyword argument. + /** This flag indicates that we are currently parsing a keyword argument. */ bool in_keyword_arg; - // Whether or not the parser has seen a token that has semantic meaning - // (i.e., a token that is not a comment or whitespace). + /** + * Whether or not the parser has seen a token that has semantic meaning + * (i.e., a token that is not a comment or whitespace). + */ bool semantic_token_seen; - // Whether or not we have found a frozen_string_literal magic comment with - // a true value. + /** + * Whether or not we have found a frozen_string_literal magic comment with + * a true value. + */ bool frozen_string_literal; }; -#endif // PRISM_PARSER_H +#endif diff --git a/prism/prism.c b/prism/prism.c index 0b7494c5eb..05dad03a43 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -15599,29 +15599,31 @@ pm_metadata_read_u32(const char *ptr) { } } -// Process any additional metadata being passed into a call to the parser via -// the pm_parse_serialize function. Since the source of these calls will be from -// Ruby implementation internals we assume it is from a trusted source. -// -// Currently, this is only passing in variable scoping surrounding an eval, but -// eventually it will be extended to hold any additional metadata. This data -// is serialized to reduce the calling complexity for a foreign function call -// vs a foreign runtime making a bindable in-memory version of a C structure. -// -// metadata is assumed to be a valid pointer pointing to well-formed data. The -// format is described below: -// -// ```text -// [ -// filepath_size: uint32_t, -// filepath: char*, -// scopes_count: uint32_t, -// [ -// locals_count: uint32_t, -// [local_size: uint32_t, local: char*]* -// ]* -// ] -// ``` +/** + * Process any additional metadata being passed into a call to the parser via + * the pm_parse_serialize function. Since the source of these calls will be from + * Ruby implementation internals we assume it is from a trusted source. + * + * Currently, this is only passing in variable scoping surrounding an eval, but + * eventually it will be extended to hold any additional metadata. This data + * is serialized to reduce the calling complexity for a foreign function call + * vs a foreign runtime making a bindable in-memory version of a C structure. + * + * metadata is assumed to be a valid pointer pointing to well-formed data. The + * format is described below: + * + * ```text + * [ + * filepath_size: uint32_t, + * filepath: char*, + * scopes_count: uint32_t, + * [ + * locals_count: uint32_t, + * [local_size: uint32_t, local: char*]* + * ]* + * ] + * ``` + */ void pm_parser_metadata(pm_parser_t *parser, const char *metadata) { uint32_t filepath_size = pm_metadata_read_u32(metadata); diff --git a/prism/prism.h b/prism/prism.h index 46bfae0fe0..c68e9cbdf7 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -29,54 +29,156 @@ #include <strings.h> #endif -void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); - -void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer); - -void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); - -void pm_parser_metadata(pm_parser_t *parser, const char *metadata); - -// The prism version and the serialization format. +/** + * The prism version and the serialization format. + * + * @returns The prism version as a constant string. + */ PRISM_EXPORTED_FUNCTION const char * pm_version(void); -// Initialize a parser with the given start and end pointers. +/** + * Initialize a parser with the given start and end pointers. + * + * @param parser The parser to initialize. + * @param source The source to parse. + * @param size The size of the source. + * @param filepath The optional filepath to pass to the parser. + */ PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const char *filepath); -// Register a callback that will be called whenever prism changes the encoding it -// is using to parse based on the magic comment. +/** + * Register a callback that will be called whenever prism changes the encoding + * it is using to parse based on the magic comment. + * + * @param parser The parser to register the callback with. + * @param callback The callback to register. + */ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback); -// Register a callback that will be called when prism encounters a magic comment -// with an encoding referenced that it doesn't understand. The callback should -// return NULL if it also doesn't understand the encoding or it should return a -// pointer to a pm_encoding_t struct that contains the functions necessary to -// parse identifiers. +/** + * Register a callback that will be called when prism encounters a magic comment + * with an encoding referenced that it doesn't understand. The callback should + * return NULL if it also doesn't understand the encoding or it should return a + * pointer to a pm_encoding_t struct that contains the functions necessary to + * parse identifiers. + * + * @param parser The parser to register the callback with. + * @param callback The callback to register. + */ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback); -// Free any memory associated with the given parser. +/** + * Free any memory associated with the given parser. + * + * @param parser The parser to free. + */ PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser); -// Parse the Ruby source associated with the given parser and return the tree. +/** + * Parse the Ruby source associated with the given parser and return the tree. + * + * @param parser The parser to use. + * @return The AST representing the Ruby source. + */ PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser); -// Serialize the AST represented by the given node to the given buffer. +/** + * Serialize the given list of comments to the given buffer. + * + * @param parser The parser to serialize. + * @param list The list of comments to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer); + +/** + * Serialize the name of the encoding to the buffer. + * + * @param encoding The encoding to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer); + +/** + * Serialize the encoding, metadata, nodes, and constant pool. + * + * @param parser The parser to serialize. + * @param node The node to serialize. + * @param buffer The buffer to serialize to. + */ +void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); + +/** + * Serialize the AST represented by the given node to the given buffer. + * + * @param parser The parser to serialize. + * @param node The node to serialize. + * @param buffer The buffer to serialize to. + */ PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer); -// Parse the given source to the AST and serialize the AST to the given buffer. +/** + * Process any additional metadata being passed into a call to the parser via + * the pm_parse_serialize function. Since the source of these calls will be from + * Ruby implementation internals we assume it is from a trusted source. + * + * Currently, this is only passing in variable scoping surrounding an eval, but + * eventually it will be extended to hold any additional metadata. This data + * is serialized to reduce the calling complexity for a foreign function call + * vs a foreign runtime making a bindable in-memory version of a C structure. + * + * @param parser The parser to process the metadata for. + * @param metadata The metadata to process. + */ +void pm_parser_metadata(pm_parser_t *parser, const char *metadata); + +/** + * Parse the given source to the AST and serialize the AST to the given buffer. + * + * @param source The source to parse. + * @param size The size of the source. + * @param buffer The buffer to serialize to. + * @param metadata The optional metadata to pass to the parser. + */ PRISM_EXPORTED_FUNCTION void pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); -// Parse and serialize the comments in the given source to the given buffer. +/** + * Parse and serialize the comments in the given source to the given buffer. + * + * @param source The source to parse. + * @param size The size of the source. + * @param buffer The buffer to serialize to. + * @param metadata The optional metadata to pass to the parser. + */ PRISM_EXPORTED_FUNCTION void pm_parse_serialize_comments(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); -// Lex the given source and serialize to the given buffer. +/** + * Lex the given source and serialize to the given buffer. + * + * @param source The source to lex. + * @param size The size of the source. + * @param filepath The optional filepath to pass to the lexer. + * @param buffer The buffer to serialize to. + */ PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer); -// Parse and serialize both the AST and the tokens represented by the given -// source to the given buffer. +/** + * Parse and serialize both the AST and the tokens represented by the given + * source to the given buffer. + * + * @param source The source to parse. + * @param size The size of the source. + * @param buffer The buffer to serialize to. + * @param metadata The optional metadata to pass to the parser. + */ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata); -// Returns a string representation of the given token type. +/** + * Returns a string representation of the given token type. + * + * @param token_type The token type to convert to a string. + * @return A string representation of the given token type. + */ PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type); #endif diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb index 75b10c9807..48ad64d699 100644 --- a/prism/templates/include/prism/ast.h.erb +++ b/prism/templates/include/prism/ast.h.erb @@ -9,41 +9,70 @@ #include <stddef.h> #include <stdint.h> -// This enum represents every type of token in the Ruby source. +/** + * This enum represents every type of token in the Ruby source. + */ typedef enum pm_token_type { <%- tokens.each do |token| -%> <%= token.declaration %> <%- end -%> - PM_TOKEN_MAXIMUM, // the maximum token value + + /** The maximum token value. */ + PM_TOKEN_MAXIMUM, } pm_token_type_t; -// This struct represents a token in the Ruby source. We use it to track both -// type and location information. +/** + * This struct represents a token in the Ruby source. We use it to track both + * type and location information. + */ typedef struct { + /** The type of the token. */ pm_token_type_t type; + + /** A pointer to the start location of the token in the source. */ const uint8_t *start; + + /** A pointer to the end location of the token in the source. */ const uint8_t *end; } pm_token_t; -// This represents a range of bytes in the source string to which a node or -// token corresponds. +/** + * This represents a range of bytes in the source string to which a node or + * token corresponds. + */ typedef struct { + /** A pointer to the start location of the range in the source. */ const uint8_t *start; + + /** A pointer to the end location of the range in the source. */ const uint8_t *end; } pm_location_t; struct pm_node; +/** + * A list of nodes in the source, most often used for lists of children. + */ typedef struct pm_node_list { - struct pm_node **nodes; + /** The number of nodes in the list. */ size_t size; + + /** The capacity of the list that has been allocated. */ size_t capacity; + + /** The nodes in the list. */ + struct pm_node **nodes; } pm_node_list_t; +/** + * This enum represents every type of node in the Ruby syntax tree. + */ enum pm_node_type { <%- nodes.each_with_index do |node, index| -%> <%= node.type %> = <%= index + 1 %>, <%- end -%> + + /** A special kind of node used for compilation. */ PM_SCOPE_NODE }; @@ -66,15 +95,22 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS * embedded into every node type. */ typedef struct pm_node { - // This represents the type of the node. It somewhat maps to the nodes that - // existed in the original grammar and ripper, but it's not a 1:1 mapping. + /** + * This represents the type of the node. It somewhat maps to the nodes that + * existed in the original grammar and ripper, but it's not a 1:1 mapping. + */ pm_node_type_t type; - // This represents any flags on the node + /** + * This represents any flags on the node. Some are common to all nodes, and + * some are specific to the type of node. + */ pm_node_flags_t flags; - // This is the location of the node in the source. It's a range of bytes - // containing a start and an end. + /** + * This is the location of the node in the source. It's a range of bytes + * containing a start and an end. + */ pm_location_t location; } pm_node_t; <%- nodes.each do |node| -%> @@ -124,4 +160,4 @@ typedef enum pm_<%= flag.human %> { #define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %> -#endif // PRISM_AST_H +#endif diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb index d46284d3b2..2f75509492 100644 --- a/prism/templates/src/serialize.c.erb +++ b/prism/templates/src/serialize.c.erb @@ -47,7 +47,7 @@ pm_serialize_string(pm_parser_t *parser, pm_string_t *string, pm_buffer_t *buffe } } -void +static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); @@ -136,6 +136,9 @@ pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *bu pm_buffer_append_varint(buffer, pm_ptrdifft_to_u32(comment->end - comment->start)); } +/** + * Serialize the given list of comments to the given buffer. + */ void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) { pm_buffer_append_varint(buffer, pm_sizet_to_u32(pm_list_size(list))); @@ -189,6 +192,9 @@ pm_serialize_diagnostic_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t * } } +/** + * Serialize the name of the encoding to the buffer. + */ void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { size_t encoding_length = strlen(encoding->name); @@ -197,6 +203,9 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) { } #line <%= __LINE__ + 1 %> "<%= File.basename(__FILE__) %>" +/** + * Serialize the encoding, metadata, nodes, and constant pool. + */ void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_encoding(&parser->encoding, buffer); @@ -274,6 +283,9 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) { pm_buffer_append_varint(buffer, parser->lex_state); } +/** + * Lex the given source and serialize to the given buffer. + */ PRISM_EXPORTED_FUNCTION void pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_buffer_t *buffer) { pm_parser_t parser; @@ -300,8 +312,10 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu pm_parser_free(&parser); } -// Parse and serialize both the AST and the tokens represented by the given -// source to the given buffer. +/** + * Parse and serialize both the AST and the tokens represented by the given + * source to the given buffer. + */ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, const char *metadata) { pm_parser_t parser; diff --git a/prism/templates/src/token_type.c.erb b/prism/templates/src/token_type.c.erb index 98be081732..d3c1c3f1b8 100644 --- a/prism/templates/src/token_type.c.erb +++ b/prism/templates/src/token_type.c.erb @@ -2,7 +2,9 @@ #include "prism/ast.h" -// Returns a string representation of the given token type. +/** + * Returns a string representation of the given token type. + */ PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type) { |
