summaryrefslogtreecommitdiff
path: root/prism
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-31 13:26:31 -0400
committerKevin Newton <kddnewton@gmail.com>2023-11-01 13:10:29 -0400
commit690f3bbf5d67d8629d6e6d7305b79fcc5362b0c9 (patch)
tree5d01ab6281f21ccac723ac4f856c0b441a98d98a /prism
parente745af2f0e36f7ff3499019dfb038799a9d3d069 (diff)
[ruby/prism] Last remaining missing C comments
https://github.com/ruby/prism/commit/e327449db6
Diffstat (limited to 'prism')
-rw-r--r--prism/config.yml10
-rw-r--r--prism/defines.h39
-rw-r--r--prism/diagnostic.h14
-rw-r--r--prism/enc/pm_encoding.h21
-rw-r--r--prism/node.h5
-rw-r--r--prism/pack.h77
-rw-r--r--prism/parser.h32
-rw-r--r--prism/prettyprint.h5
-rw-r--r--prism/prism.c120
-rw-r--r--prism/prism.h108
-rw-r--r--prism/regexp.c12
-rw-r--r--prism/regexp.h5
-rw-r--r--prism/templates/include/prism/ast.h.erb41
-rw-r--r--prism/util/pm_buffer.h5
-rw-r--r--prism/util/pm_char.h5
-rw-r--r--prism/util/pm_constant_pool.h27
-rw-r--r--prism/util/pm_list.h5
-rw-r--r--prism/util/pm_memchr.h5
-rw-r--r--prism/util/pm_newline_list.h21
-rw-r--r--prism/util/pm_state_stack.h6
-rw-r--r--prism/util/pm_string.h5
-rw-r--r--prism/util/pm_string_list.h5
-rw-r--r--prism/util/pm_strncasecmp.h5
-rw-r--r--prism/util/pm_strpbrk.h5
-rw-r--r--prism/version.h10
25 files changed, 424 insertions, 169 deletions
diff --git a/prism/config.yml b/prism/config.yml
index 97e8804ad4..bd5afc7d4e 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -59,11 +59,11 @@ tokens:
- name: CONSTANT
comment: "a constant"
- name: DOT
- comment: "."
+ comment: "the . call operator"
- name: DOT_DOT
- comment: ".."
+ comment: "the .. range operator"
- name: DOT_DOT_DOT
- comment: "..."
+ comment: "the ... range operator or forwarding parameter"
- name: EMBDOC_BEGIN
comment: "=begin"
- name: EMBDOC_END
@@ -311,9 +311,9 @@ tokens:
- name: UCOLON_COLON
comment: "unary ::"
- name: UDOT_DOT
- comment: "unary .."
+ comment: "unary .. operator"
- name: UDOT_DOT_DOT
- comment: "unary ..."
+ comment: "unary ... operator"
- name: UMINUS
comment: "-@"
- name: UMINUS_NUM
diff --git a/prism/defines.h b/prism/defines.h
index 457a8502f8..b10f8fa3e0 100644
--- a/prism/defines.h
+++ b/prism/defines.h
@@ -1,8 +1,14 @@
+/**
+ * @file defines.h
+ *
+ * Macro definitions used throughout the prism library.
+ *
+ * This file should be included first by any *.h or *.c in prism for consistency
+ * and to ensure that the macros are defined before they are used.
+ */
#ifndef PRISM_DEFINES_H
#define PRISM_DEFINES_H
-// This file should be included first by any *.h or *.c in prism.
-
#include <ctype.h>
#include <stdarg.h>
#include <stddef.h>
@@ -10,7 +16,11 @@
#include <stdio.h>
#include <string.h>
-// PRISM_EXPORTED_FUNCTION
+/**
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
#ifndef PRISM_EXPORTED_FUNCTION
# ifdef PRISM_EXPORT_SYMBOLS
# ifdef _WIN32
@@ -23,7 +33,12 @@
# endif
#endif
-// PRISM_ATTRIBUTE_FORMAT
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
#if defined(__GNUC__)
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
#elif defined(__clang__)
@@ -32,19 +47,29 @@
# define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
#endif
-// PRISM_ATTRIBUTE_UNUSED
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
#if defined(__GNUC__)
# define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
#else
# define PRISM_ATTRIBUTE_UNUSED
#endif
-// inline
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
#if defined(_MSC_VER) && !defined(inline)
# define inline __inline
#endif
-// Windows versions before 2015 use _snprintf
+/**
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
+ * implement _snprintf. We standard that here.
+ */
#if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
# define snprintf _snprintf
#endif
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 7d78a16000..97bd83fdf7 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -1,3 +1,8 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
#ifndef PRISM_DIAGNOSTIC_H
#define PRISM_DIAGNOSTIC_H
@@ -9,14 +14,21 @@
#include <assert.h>
/**
- * This struct represents a diagnostic found during parsing.
+ * This struct represents a diagnostic generated during parsing.
*
* @extends pm_list_node_t
*/
typedef struct {
+ /** The embedded base node. */
pm_list_node_t node;
+
+ /** A pointer to the start of the source that generated the diagnostic. */
const uint8_t *start;
+
+ /** A pointer to the end of the source that generated the diagnostic. */
const uint8_t *end;
+
+ /** The message associated with the diagnostic. */
const char *message;
} pm_diagnostic_t;
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 28b9f02281..f8e554e617 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_encoding.h
+ *
+ * The encoding interface and implementations used by the parser.
+ */
#ifndef PRISM_ENCODING_H
#define PRISM_ENCODING_H
@@ -55,10 +60,22 @@ typedef struct {
bool multibyte;
} pm_encoding_t;
-// These bits define the location of each bit of metadata within the various
-// lookup tables that are used to determine the properties of a character.
+/**
+ * All of the lookup tables use the first bit of each embedded byte to indicate
+ * whether the codepoint is alphabetical.
+ */
#define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
+
+/**
+ * All of the lookup tables use the second bit of each embedded byte to indicate
+ * whether the codepoint is alphanumeric.
+ */
#define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
+
+/**
+ * All of the lookup tables use the third bit of each embedded byte to indicate
+ * whether the codepoint is uppercase.
+ */
#define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
/**
diff --git a/prism/node.h b/prism/node.h
index 768ddec1b0..3e15d18552 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -1,3 +1,8 @@
+/**
+ * @file node.h
+ *
+ * Functions related to nodes in the AST.
+ */
#ifndef PRISM_NODE_H
#define PRISM_NODE_H
diff --git a/prism/pack.h b/prism/pack.h
index be52a7b4de..e494848389 100644
--- a/prism/pack.h
+++ b/prism/pack.h
@@ -1,3 +1,8 @@
+/**
+ * @file pack.h
+ *
+ * A pack template string parser.
+ */
#ifndef PRISM_PACK_H
#define PRISM_PACK_H
@@ -6,15 +11,18 @@
#include <stdint.h>
#include <stdlib.h>
+/** The version of the pack template language that we are parsing. */
typedef enum pm_pack_version {
PM_PACK_VERSION_3_2_0
} pm_pack_version;
+/** The type of pack template we are parsing. */
typedef enum pm_pack_variant {
PM_PACK_VARIANT_PACK,
PM_PACK_VARIANT_UNPACK
} pm_pack_variant;
+/** A directive within the pack template. */
typedef enum pm_pack_type {
PM_PACK_SPACE,
PM_PACK_COMMENT,
@@ -40,12 +48,14 @@ typedef enum pm_pack_type {
PM_PACK_END
} pm_pack_type;
+/** The signness of a pack directive. */
typedef enum pm_pack_signed {
PM_PACK_UNSIGNED,
PM_PACK_SIGNED,
PM_PACK_SIGNED_NA
} pm_pack_signed;
+/** The endianness of a pack directive. */
typedef enum pm_pack_endian {
PM_PACK_AGNOSTIC_ENDIAN,
PM_PACK_LITTLE_ENDIAN, // aka 'VAX', or 'V'
@@ -54,6 +64,7 @@ typedef enum pm_pack_endian {
PM_PACK_ENDIAN_NA
} pm_pack_endian;
+/** The size of an integer pack directive. */
typedef enum pm_pack_size {
PM_PACK_SIZE_SHORT,
PM_PACK_SIZE_INT,
@@ -67,6 +78,7 @@ typedef enum pm_pack_size {
PM_PACK_SIZE_NA
} pm_pack_size;
+/** The type of length of a pack directive. */
typedef enum pm_pack_length_type {
PM_PACK_LENGTH_FIXED,
PM_PACK_LENGTH_MAX,
@@ -74,6 +86,7 @@ typedef enum pm_pack_length_type {
PM_PACK_LENGTH_NA
} pm_pack_length_type;
+/** The type of encoding for a pack template string. */
typedef enum pm_pack_encoding {
PM_PACK_ENCODING_START,
PM_PACK_ENCODING_ASCII_8BIT,
@@ -81,6 +94,7 @@ typedef enum pm_pack_encoding {
PM_PACK_ENCODING_UTF_8
} pm_pack_encoding;
+/** The result of parsing a pack template. */
typedef enum pm_pack_result {
PM_PACK_OK,
PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
@@ -90,39 +104,31 @@ typedef enum pm_pack_result {
PM_PACK_ERROR_DOUBLE_ENDIAN
} pm_pack_result;
-// Parse a single directive from a pack or unpack format string.
-//
-// Parameters:
-// - [in] pm_pack_version version the version of Ruby
-// - [in] pm_pack_variant variant pack or unpack
-// - [in out] const char **format the start of the next directive to parse
-// on calling, and advanced beyond the parsed directive on return, or as
-// much of it as was consumed until an error was encountered
-// - [in] const char *format_end the end of the format string
-// - [out] pm_pack_type *type the type of the directive
-// - [out] pm_pack_signed *signed_type
-// whether the value is signed
-// - [out] pm_pack_endian *endian the endianness of the value
-// - [out] pm_pack_size *size the size of the value
-// - [out] pm_pack_length_type *length_type
-// what kind of length is specified
-// - [out] size_t *length the length of the directive
-// - [in out] pm_pack_encoding *encoding
-// takes the current encoding of the string
-// which would result from parsing the whole format string, and returns a
-// possibly changed directive - the encoding should be
-// PM_PACK_ENCODING_START when pm_pack_parse is called for the first
-// directive in a format string
-//
-// Return:
-// - PM_PACK_OK on success
-// - PM_PACK_ERROR_* on error
-//
-// Notes:
-// Consult Ruby documentation for the meaning of directives.
+/**
+ * Parse a single directive from a pack or unpack format string.
+ *
+ * @param variant (in) pack or unpack
+ * @param format (in, out) the start of the next directive to parse on calling,
+ * and advanced beyond the parsed directive on return, or as much of it as
+ * was consumed until an error was encountered
+ * @param format_end (in) the end of the format string
+ * @param type (out) the type of the directive
+ * @param signed_type (out) whether the value is signed
+ * @param endian (out) the endianness of the value
+ * @param size (out) the size of the value
+ * @param length_type (out) what kind of length is specified
+ * @param length (out) the length of the directive
+ * @param encoding (in, out) takes the current encoding of the string which
+ * would result from parsing the whole format string, and returns a possibly
+ * changed directive - the encoding should be `PM_PACK_ENCODING_START` when
+ * pm_pack_parse is called for the first directive in a format string
+ *
+ * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
+ * @note Consult Ruby documentation for the meaning of directives.
+ */
PRISM_EXPORTED_FUNCTION pm_pack_result
pm_pack_parse(
- pm_pack_variant variant_arg,
+ pm_pack_variant variant,
const char **format,
const char *format_end,
pm_pack_type *type,
@@ -134,8 +140,13 @@ pm_pack_parse(
pm_pack_encoding *encoding
);
-// prism abstracts sizes away from the native system - this converts an abstract
-// size to a native size.
+/**
+ * Prism abstracts sizes away from the native system - this converts an abstract
+ * size to a native size.
+ *
+ * @param size The abstract size to convert.
+ * @return The native size.
+ */
PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
#endif
diff --git a/prism/parser.h b/prism/parser.h
index 92a8ce589d..f4d0153e17 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -1,3 +1,8 @@
+/**
+ * @file parser.h
+ *
+ * The parser used to parse Ruby source.
+ */
#ifndef PRISM_PARSER_H
#define PRISM_PARSER_H
@@ -84,6 +89,7 @@ typedef enum {
* are found as part of a string.
*/
typedef struct pm_lex_mode {
+ /** The type of this lex mode. */
enum {
/** This state is used when any given token is being lexed. */
PM_LEX_DEFAULT,
@@ -122,6 +128,7 @@ typedef struct pm_lex_mode {
PM_LEX_STRING
} mode;
+ /** The data associated with this type of lex mode. */
union {
struct {
/** This keeps track of the nesting level of the list. */
@@ -240,8 +247,9 @@ typedef struct pm_lex_mode {
*/
#define PM_LEX_STACK_SIZE 4
-// A forward declaration since our error handler struct accepts a parser for
-// each of its function calls.
+/**
+ * The parser used to parse Ruby source.
+ */
typedef struct pm_parser pm_parser_t;
/**
@@ -343,7 +351,10 @@ typedef enum {
/** This is a node in a linked list of contexts. */
typedef struct pm_context_node {
+ /** The context that this node represents. */
pm_context_t context;
+
+ /** A pointer to the previous context in the linked list. */
struct pm_context_node *prev;
} pm_context_node_t;
@@ -360,9 +371,16 @@ typedef enum {
* @extends pm_list_node_t
*/
typedef struct pm_comment {
+ /** The embedded base node. */
pm_list_node_t node;
+
+ /** A pointer to the start of the comment in the source. */
const uint8_t *start;
+
+ /** A pointer to the end of the comment in the source. */
const uint8_t *end;
+
+ /** The type of comment that we've found. */
pm_comment_type_t type;
} pm_comment_t;
@@ -373,10 +391,19 @@ typedef struct pm_comment {
* @extends pm_list_node_t
*/
typedef struct {
+ /** The embedded base node. */
pm_list_node_t node;
+
+ /** A pointer to the start of the key in the source. */
const uint8_t *key_start;
+
+ /** A pointer to the start of the value in the source. */
const uint8_t *value_start;
+
+ /** The length of the key in the source. */
uint32_t key_length;
+
+ /** The length of the value in the source. */
uint32_t value_length;
} pm_magic_comment_t;
@@ -493,6 +520,7 @@ struct pm_parser {
*/
pm_state_stack_t accepts_block_stack;
+ /** A stack of lex modes. */
struct {
/** The current mode of the lexer. */
pm_lex_mode_t *current;
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
index 9ae2397e63..351b92df39 100644
--- a/prism/prettyprint.h
+++ b/prism/prettyprint.h
@@ -1,3 +1,8 @@
+/**
+ * @file prettyprint.h
+ *
+ * An AST node pretty-printer.
+ */
#ifndef PRISM_PRETTYPRINT_H
#define PRISM_PRETTYPRINT_H
diff --git a/prism/prism.c b/prism/prism.c
index 2b23cdb73c..8ac30c43d1 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,105 +1,6 @@
#include "prism.h"
/**
- * @mainpage
- *
- * Prism is a parser for the Ruby programming language. It is designed to be
- * portable, error tolerant, and maintainable. It is written in C99 and has no
- * dependencies. It is currently being integrated into
- * [CRuby](https://github.com/ruby/ruby),
- * [JRuby](https://github.com/jruby/jruby),
- * [TruffleRuby](https://github.com/oracle/truffleruby),
- * [Sorbet](https://github.com/sorbet/sorbet), and
- * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
- *
- * @section getting-started Getting started
- *
- * If you're vendoring this project and compiling it statically then as long as
- * you have a C99 compiler you will be fine. If you're linking against it as
- * shared library, then you should compile with `-fvisibility=hidden` and
- * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
- * visible.
- *
- * @section parsing Parsing
- *
- * In order to parse Ruby code, the structures and functions that you're going
- * to want to use and be aware of are:
- *
- * * @ref pm_parser_t - the main parser structure
- * * @ref pm_parser_init - initialize a parser
- * * @ref pm_parse - parse and return the root node
- * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse`
- * * @ref pm_parser_free - free the internal memory of the parser
- *
- * Putting all of this together would look something like:
- *
- * ```c
- * void parse(const uint8_t *source, size_t length) {
- * pm_parser_t parser;
- * pm_parser_init(&parser, source, length, NULL);
- *
- * pm_node_t *root = pm_parse(&parser);
- * printf("PARSED!\n");
- *
- * pm_node_destroy(root);
- * pm_parser_free(&parser);
- * }
- * ```
- *
- * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
- * their first member. This means you can downcast and upcast any node in the
- * tree to a `pm_node_t`.
- *
- * @section serializing Serializing
- *
- * Prism provides the ability to serialize the AST and its related metadata into
- * a binary format. This format is designed to be portable to different
- * languages and runtimes so that you only need to make one FFI call in order to
- * parse Ruby code. The structures and functions that you're going to want to
- * use and be aware of are:
- *
- * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST
- * * @ref pm_buffer_free - free the memory associated with the buffer
- * * @ref pm_serialize - serialize the AST into a buffer
- * * @ref pm_parse_serialize - parse and serialize the AST into a buffer
- *
- * Putting all of this together would look something like:
- *
- * ```c
- * void serialize(const uint8_t *source, size_t length) {
- * pm_buffer_t buffer = { 0 };
- *
- * pm_parse_serialize(source, length, &buffer, NULL);
- * printf("SERIALIZED!\n");
- *
- * pm_buffer_free(&buffer);
- * }
- * ```
- *
- * @section inspecting Inspecting
- *
- * Prism provides the ability to inspect the AST by pretty-printing nodes. You
- * can do this with the `pm_prettyprint` function, which you would use like:
- *
- * ```c
- * void prettyprint(const uint8_t *source, size_t length) {
- * pm_parser_t parser;
- * pm_parser_init(&parser, source, length, NULL);
- *
- * pm_node_t *root = pm_parse(&parser);
- * pm_buffer_t buffer = { 0 };
- *
- * pm_prettyprint(&buffer, &parser, root);
- * printf("*.s%\n", (int) buffer.length, buffer.value);
- *
- * pm_buffer_free(&buffer);
- * pm_node_destroy(root);
- * pm_parser_free(&parser);
- * }
- * ```
- */
-
-/**
* The prism version and the serialization format.
*/
const char *
@@ -764,9 +665,16 @@ not_provided(pm_parser_t *parser) {
* of the call node creation functions.
*/
typedef struct {
+ /** The optional location of the opening parenthesis or bracket. */
pm_location_t opening_loc;
+
+ /** The lazily-allocated optional arguments node. */
pm_arguments_node_t *arguments;
+
+ /** The optional location of the closing parenthesis or bracket. */
pm_location_t closing_loc;
+
+ /** The optional block attached to the call. */
pm_node_t *block;
} pm_arguments_t;
@@ -7668,7 +7576,16 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
* automatically attach the string content to the node that it belongs to.
*/
typedef struct {
+ /**
+ * The buffer that we're using to keep track of the string content. It will
+ * only be initialized if we receive an escape sequence.
+ */
pm_buffer_t buffer;
+
+ /**
+ * The cursor into the source string that points to how far we have
+ * currently copied into the buffer.
+ */
const uint8_t *cursor;
} pm_token_buffer_t;
@@ -9835,8 +9752,13 @@ typedef enum {
* are combined in this way to make it easier to represent associativity.
*/
typedef struct {
+ /** The left binding power. */
pm_binding_power_t left;
+
+ /** The right binding power. */
pm_binding_power_t right;
+
+ /** Whether or not this token can be used as a binary operator. */
bool binary;
} pm_binding_powers_t;
diff --git a/prism/prism.h b/prism/prism.h
index c68e9cbdf7..4b8755a30d 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -1,3 +1,8 @@
+/**
+ * @file prism.h
+ *
+ * The main header file for the prism parser.
+ */
#ifndef PRISM_H
#define PRISM_H
@@ -75,10 +80,10 @@ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_pars
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
/**
- * Parse the Ruby source associated with the given parser and return the tree.
+ * Initiate the parser with the given parser.
*
* @param parser The parser to use.
- * @return The AST representing the Ruby source.
+ * @return The AST representing the source.
*/
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
@@ -181,4 +186,103 @@ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_
*/
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
+/**
+ * @mainpage
+ *
+ * Prism is a parser for the Ruby programming language. It is designed to be
+ * portable, error tolerant, and maintainable. It is written in C99 and has no
+ * dependencies. It is currently being integrated into
+ * [CRuby](https://github.com/ruby/ruby),
+ * [JRuby](https://github.com/jruby/jruby),
+ * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [Sorbet](https://github.com/sorbet/sorbet), and
+ * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
+ *
+ * @section getting-started Getting started
+ *
+ * If you're vendoring this project and compiling it statically then as long as
+ * you have a C99 compiler you will be fine. If you're linking against it as
+ * shared library, then you should compile with `-fvisibility=hidden` and
+ * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
+ * visible.
+ *
+ * @section parsing Parsing
+ *
+ * In order to parse Ruby code, the structures and functions that you're going
+ * to want to use and be aware of are:
+ *
+ * * pm_parser_t - the main parser structure
+ * * pm_parser_init - initialize a parser
+ * * pm_parse - parse and return the root node
+ * * pm_node_destroy - deallocate the root node returned by `pm_parse`
+ * * pm_parser_free - free the internal memory of the parser
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void parse(const uint8_t *source, size_t length) {
+ * pm_parser_t parser;
+ * pm_parser_init(&parser, source, length, NULL);
+ *
+ * pm_node_t *root = pm_parse(&parser);
+ * printf("PARSED!\n");
+ *
+ * pm_node_destroy(root);
+ * pm_parser_free(&parser);
+ * }
+ * ```
+ *
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
+ * their first member. This means you can downcast and upcast any node in the
+ * tree to a `pm_node_t`.
+ *
+ * @section serializing Serializing
+ *
+ * Prism provides the ability to serialize the AST and its related metadata into
+ * a binary format. This format is designed to be portable to different
+ * languages and runtimes so that you only need to make one FFI call in order to
+ * parse Ruby code. The structures and functions that you're going to want to
+ * use and be aware of are:
+ *
+ * * pm_buffer_t - a small buffer object that will hold the serialized AST
+ * * pm_buffer_free - free the memory associated with the buffer
+ * * pm_serialize - serialize the AST into a buffer
+ * * pm_parse_serialize - parse and serialize the AST into a buffer
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void serialize(const uint8_t *source, size_t length) {
+ * pm_buffer_t buffer = { 0 };
+ *
+ * pm_parse_serialize(source, length, &buffer, NULL);
+ * printf("SERIALIZED!\n");
+ *
+ * pm_buffer_free(&buffer);
+ * }
+ * ```
+ *
+ * @section inspecting Inspecting
+ *
+ * Prism provides the ability to inspect the AST by pretty-printing nodes. You
+ * can do this with the `pm_prettyprint` function, which you would use like:
+ *
+ * ```c
+ * void prettyprint(const uint8_t *source, size_t length) {
+ * pm_parser_t parser;
+ * pm_parser_init(&parser, source, length, NULL);
+ *
+ * pm_node_t *root = pm_parse(&parser);
+ * pm_buffer_t buffer = { 0 };
+ *
+ * pm_prettyprint(&buffer, &parser, root);
+ * printf("*.s%\n", (int) buffer.length, buffer.value);
+ *
+ * pm_buffer_free(&buffer);
+ * pm_node_destroy(root);
+ * pm_parser_free(&parser);
+ * }
+ * ```
+ */
+
#endif
diff --git a/prism/regexp.c b/prism/regexp.c
index fa2ea5cd20..22833d177f 100644
--- a/prism/regexp.c
+++ b/prism/regexp.c
@@ -4,11 +4,22 @@
* This is the parser that is going to handle parsing regular expressions.
*/
typedef struct {
+ /** A pointer to the start of the source that we are parsing. */
const uint8_t *start;
+
+ /** A pointer to the current position in the source. */
const uint8_t *cursor;
+
+ /** A pointer to the end of the source that we are parsing. */
const uint8_t *end;
+
+ /** A list of named captures that we've found. */
pm_string_list_t *named_captures;
+
+ /** Whether the encoding has changed from the default. */
bool encoding_changed;
+
+ /** The encoding of the source. */
pm_encoding_t *encoding;
} pm_regexp_parser_t;
@@ -318,6 +329,7 @@ typedef enum {
* This is the set of options that are configurable on the regular expression.
*/
typedef struct {
+ /** The current state of each option. */
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
} pm_regexp_options_t;
diff --git a/prism/regexp.h b/prism/regexp.h
index 9eae245d1e..09bdaca89a 100644
--- a/prism/regexp.h
+++ b/prism/regexp.h
@@ -1,3 +1,8 @@
+/**
+ * @file regexp.h
+ *
+ * A regular expression parser.
+ */
#ifndef PRISM_REGEXP_H
#define PRISM_REGEXP_H
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index fadf461c40..3f279d9779 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -1,3 +1,8 @@
+/**
+ * @file ast.h
+ *
+ * The abstract syntax tree.
+ */
#ifndef PRISM_AST_H
#define PRISM_AST_H
@@ -78,18 +83,37 @@ enum pm_node_type {
PM_SCOPE_NODE
};
+/**
+ * This is the type of node embedded in the node struct. We explicitly control
+ * the size of it here to avoid having the variable-width enum.
+ */
typedef uint16_t pm_node_type_t;
+
+/**
+ * These are the flags embedded in the node struct. We explicitly control the
+ * size of it here to avoid having the variable-width enum.
+ */
typedef uint16_t pm_node_flags_t;
-// We store the flags enum in every node in the tree. Some flags are common to
-// all nodes (the ones listed below). Others are specific to certain node types.
+/**
+ * We store the flags enum in every node in the tree. Some flags are common to
+ * all nodes (the ones listed below). Others are specific to certain node types.
+ */
#define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
+
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
-// For easy access, we define some macros to check node type
-#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
#define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
/**
@@ -132,8 +156,11 @@ typedef struct pm_node {
* @extends pm_node_t
*/
typedef struct pm_<%= node.human %> {
+ /** The embedded base node. */
pm_node_t base;
<%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
+
+ /** <%= node.name %>#<%= field.name %> */
<%= case field
when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
when Prism::NodeListField then "struct pm_node_list #{field.name}"
@@ -162,6 +189,12 @@ typedef enum pm_<%= flag.human %> {
} pm_<%= flag.human %>_t;
<%- end -%>
+/**
+ * When we're serializing to Java, we want to skip serializing the location
+ * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
+ * to specify that through the environment. It will never be true except for in
+ * those build systems.
+ */
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
#endif
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index a2bccb3d00..3c3a6fb688 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
#ifndef PRISM_BUFFER_H
#define PRISM_BUFFER_H
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
index 2bdc67de4a..32f698a42b 100644
--- a/prism/util/pm_char.h
+++ b/prism/util/pm_char.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_char.h
+ *
+ * Functions for working with characters and strings.
+ */
#ifndef PRISM_CHAR_H
#define PRISM_CHAR_H
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
index 19c3b619e1..238d0c4cae 100644
--- a/prism/util/pm_constant_pool.h
+++ b/prism/util/pm_constant_pool.h
@@ -1,8 +1,12 @@
-// The constant pool is a data structure that stores a set of strings. Each
-// string is assigned a unique id, which can be used to compare strings for
-// equality. This comparison ends up being much faster than strcmp, since it
-// only requires a single integer comparison.
-
+/**
+ * @file pm_constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ *
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
#ifndef PRISM_CONSTANT_POOL_H
#define PRISM_CONSTANT_POOL_H
@@ -14,12 +18,23 @@
#include <stdlib.h>
#include <string.h>
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
typedef uint32_t pm_constant_id_t;
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
typedef struct {
- pm_constant_id_t *ids;
+ /** The number of constant ids in the list. */
size_t size;
+
+ /** The number of constant ids that have been allocated in the list. */
size_t capacity;
+
+ /** The constant ids in the list. */
+ pm_constant_id_t *ids;
} pm_constant_id_list_t;
/**
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
index 53a5b9c3a1..b05ed0290a 100644
--- a/prism/util/pm_list.h
+++ b/prism/util/pm_list.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_list.h
+ *
+ * An abstract linked list.
+ */
#ifndef PRISM_LIST_H
#define PRISM_LIST_H
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
index 6b817a5521..1eae6ab1ba 100644
--- a/prism/util/pm_memchr.h
+++ b/prism/util/pm_memchr.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_memchr.h
+ *
+ * A custom memchr implementation.
+ */
#ifndef PRISM_MEMCHR_H
#define PRISM_MEMCHR_H
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
index 603a84c38c..a31051f4e0 100644
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@@ -1,11 +1,16 @@
-// When compiling the syntax tree, it's necessary to know the line and column
-// of many nodes. This is necessary to support things like error messages,
-// tracepoints, etc.
-//
-// It's possible that we could store the start line, start column, end line, and
-// end column on every node in addition to the offsets that we already store,
-// but that would be quite a lot of memory overhead.
-
+/**
+ * @file pm_newline_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
#ifndef PRISM_NEWLINE_LIST_H
#define PRISM_NEWLINE_LIST_H
diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h
index 7268a3fd63..1ce57a2209 100644
--- a/prism/util/pm_state_stack.h
+++ b/prism/util/pm_state_stack.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_state_stack.h
+ *
+ * A stack of boolean values.
+ */
#ifndef PRISM_STATE_STACK_H
#define PRISM_STATE_STACK_H
@@ -30,6 +35,7 @@ void pm_state_stack_pop(pm_state_stack_t *stack);
* Returns the value at the top of the stack.
*
* @param stack The stack to get the value from.
+ * @return The value at the top of the stack.
*/
bool pm_state_stack_p(pm_state_stack_t *stack);
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index b0b7c6bf2d..ddb153784f 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_string.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
#ifndef PRISM_STRING_H
#define PRISM_STRING_H
diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h
index 1f460e5dc9..0d406cc5d8 100644
--- a/prism/util/pm_string_list.h
+++ b/prism/util/pm_string_list.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_string_list.h
+ *
+ * A list of strings.
+ */
#ifndef PRISM_STRING_LIST_H
#define PRISM_STRING_LIST_H
diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h
index 6cf7aa8023..c381ea38f4 100644
--- a/prism/util/pm_strncasecmp.h
+++ b/prism/util/pm_strncasecmp.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_strncasecmp.h
+ *
+ * A custom strncasecmp implementation.
+ */
#ifndef PRISM_STRNCASECMP_H
#define PRISM_STRNCASECMP_H
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
index b589004abf..61a443e51a 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/util/pm_strpbrk.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_strpbrk.h
+ *
+ * A custom strpbrk implementation.
+ */
#ifndef PRISM_STRPBRK_H
#define PRISM_STRPBRK_H
diff --git a/prism/version.h b/prism/version.h
index 25ee409c74..2e5e84cdf1 100644
--- a/prism/version.h
+++ b/prism/version.h
@@ -1,4 +1,12 @@
/**
+ * @file version.h
+ *
+ * The version of the Prism library.
+ */
+#ifndef PRISM_VERSION_H
+#define PRISM_VERSION_H
+
+/**
* The major version of the Prism library as an int.
*/
#define PRISM_VERSION_MAJOR 0
@@ -17,3 +25,5 @@
* The version of the Prism library as a constant string.
*/
#define PRISM_VERSION "0.16.0"
+
+#endif