[ruby/prism] Last remaining missing C comments

https://github.com/ruby/prism/commit/e327449db6
author: Kevin Newton <kddnewton@gmail.com> 2023-10-31 13:26:31 -0400
committer: Kevin Newton <kddnewton@gmail.com> 2023-11-01 13:10:29 -0400
commit: 690f3bbf5d67d8629d6e6d7305b79fcc5362b0c9 (patch)
tree: 5d01ab6281f21ccac723ac4f856c0b441a98d98a /prism
parent: e745af2f0e36f7ff3499019dfb038799a9d3d069 (diff)
25 files changed, 424 insertions, 169 deletions
diff --git a/prism/config.yml b/prism/config.yml
index 97e8804ad4..bd5afc7d4e 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -59,11 +59,11 @@ tokens:
   - name: CONSTANT
     comment: "a constant"
   - name: DOT
-    comment: "."
+    comment: "the . call operator"
   - name: DOT_DOT
-    comment: ".."
+    comment: "the .. range operator"
   - name: DOT_DOT_DOT
-    comment: "..."
+    comment: "the ... range operator or forwarding parameter"
   - name: EMBDOC_BEGIN
     comment: "=begin"
   - name: EMBDOC_END
@@ -311,9 +311,9 @@ tokens:
   - name: UCOLON_COLON
     comment: "unary ::"
   - name: UDOT_DOT
-    comment: "unary .."
+    comment: "unary .. operator"
   - name: UDOT_DOT_DOT
-    comment: "unary ..."
+    comment: "unary ... operator"
   - name: UMINUS
     comment: "-@"
   - name: UMINUS_NUM
diff --git a/prism/defines.h b/prism/defines.h
index 457a8502f8..b10f8fa3e0 100644
--- a/prism/defines.h
+++ b/prism/defines.h
@@ -1,8 +1,14 @@
+/**
+ * @file defines.h
+ *
+ * Macro definitions used throughout the prism library.
+ *
+ * This file should be included first by any *.h or *.c in prism for consistency
+ * and to ensure that the macros are defined before they are used.
+ */
 #ifndef PRISM_DEFINES_H
 #define PRISM_DEFINES_H
 
-// This file should be included first by any *.h or *.c in prism.
-
 #include <ctype.h>
 #include <stdarg.h>
 #include <stddef.h>
@@ -10,7 +16,11 @@
 #include <stdio.h>
 #include <string.h>
 
-// PRISM_EXPORTED_FUNCTION
+/** 
+ * By default, we compile with -fvisibility=hidden. When this is enabled, we
+ * need to mark certain functions as being publically-visible. This macro does
+ * that in a compiler-agnostic way.
+ */
 #ifndef PRISM_EXPORTED_FUNCTION
 #   ifdef PRISM_EXPORT_SYMBOLS
 #       ifdef _WIN32
@@ -23,7 +33,12 @@
 #   endif
 #endif
 
-// PRISM_ATTRIBUTE_FORMAT
+/**
+ * Certain compilers support specifying that a function accepts variadic
+ * parameters that look like printf format strings to provide a better developer
+ * experience when someone is using the function. This macro does that in a
+ * compiler-agnostic way.
+ */
 #if defined(__GNUC__)
 #   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index)))
 #elif defined(__clang__)
@@ -32,19 +47,29 @@
 #   define PRISM_ATTRIBUTE_FORMAT(string_index, argument_index)
 #endif
 
-// PRISM_ATTRIBUTE_UNUSED
+/**
+ * GCC will warn if you specify a function or parameter that is unused at
+ * runtime. This macro allows you to mark a function or parameter as unused in a
+ * compiler-agnostic way.
+ */
 #if defined(__GNUC__)
 #   define PRISM_ATTRIBUTE_UNUSED __attribute__((unused))
 #else
 #   define PRISM_ATTRIBUTE_UNUSED
 #endif
 
-// inline
+/**
+ * Old Visual Studio versions do not support the inline keyword, so we need to
+ * define it to be __inline.
+ */
 #if defined(_MSC_VER) && !defined(inline)
 #   define inline __inline
 #endif
 
-// Windows versions before 2015 use _snprintf
+/**
+ * Old Visual Studio versions before 2015 do not implement sprintf, but instead
+ * implement _snprintf. We standard that here.
+ */
 #if !defined(snprintf) && defined(_MSC_VER) && (_MSC_VER < 1900)
 #   define snprintf _snprintf
 #endif
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 7d78a16000..97bd83fdf7 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -1,3 +1,8 @@
+/**
+ * @file diagnostic.h
+ *
+ * A list of diagnostics generated during parsing.
+ */
 #ifndef PRISM_DIAGNOSTIC_H
 #define PRISM_DIAGNOSTIC_H
 
@@ -9,14 +14,21 @@
 #include <assert.h>
 
 /**
- * This struct represents a diagnostic found during parsing.
+ * This struct represents a diagnostic generated during parsing.
  *
  * @extends pm_list_node_t
  */
 typedef struct {
+    /** The embedded base node. */
     pm_list_node_t node;
+
+    /** A pointer to the start of the source that generated the diagnostic. */
     const uint8_t *start;
+
+    /** A pointer to the end of the source that generated the diagnostic. */
     const uint8_t *end;
+
+    /** The message associated with the diagnostic. */
     const char *message;
 } pm_diagnostic_t;
 
diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h
index 28b9f02281..f8e554e617 100644
--- a/prism/enc/pm_encoding.h
+++ b/prism/enc/pm_encoding.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_encoding.h
+ *
+ * The encoding interface and implementations used by the parser.
+ */
 #ifndef PRISM_ENCODING_H
 #define PRISM_ENCODING_H
 
@@ -55,10 +60,22 @@ typedef struct {
     bool multibyte;
 } pm_encoding_t;
 
-// These bits define the location of each bit of metadata within the various
-// lookup tables that are used to determine the properties of a character.
+/**
+ * All of the lookup tables use the first bit of each embedded byte to indicate
+ * whether the codepoint is alphabetical.
+ */
 #define PRISM_ENCODING_ALPHABETIC_BIT 1 << 0
+
+/**
+ * All of the lookup tables use the second bit of each embedded byte to indicate
+ * whether the codepoint is alphanumeric.
+ */
 #define PRISM_ENCODING_ALPHANUMERIC_BIT 1 << 1
+
+/**
+ * All of the lookup tables use the third bit of each embedded byte to indicate
+ * whether the codepoint is uppercase.
+ */
 #define PRISM_ENCODING_UPPERCASE_BIT 1 << 2
 
 /**
diff --git a/prism/node.h b/prism/node.h
index 768ddec1b0..3e15d18552 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -1,3 +1,8 @@
+/**
+ * @file node.h
+ *
+ * Functions related to nodes in the AST.
+ */
 #ifndef PRISM_NODE_H
 #define PRISM_NODE_H
 
diff --git a/prism/pack.h b/prism/pack.h
index be52a7b4de..e494848389 100644
--- a/prism/pack.h
+++ b/prism/pack.h
@@ -1,3 +1,8 @@
+/**
+ * @file pack.h
+ *
+ * A pack template string parser.
+ */
 #ifndef PRISM_PACK_H
 #define PRISM_PACK_H
 
@@ -6,15 +11,18 @@
 #include <stdint.h>
 #include <stdlib.h>
 
+/** The version of the pack template language that we are parsing. */
 typedef enum pm_pack_version {
     PM_PACK_VERSION_3_2_0
 } pm_pack_version;
 
+/** The type of pack template we are parsing. */
 typedef enum pm_pack_variant {
     PM_PACK_VARIANT_PACK,
     PM_PACK_VARIANT_UNPACK
 } pm_pack_variant;
 
+/** A directive within the pack template. */
 typedef enum pm_pack_type {
     PM_PACK_SPACE,
     PM_PACK_COMMENT,
@@ -40,12 +48,14 @@ typedef enum pm_pack_type {
     PM_PACK_END
 } pm_pack_type;
 
+/** The signness of a pack directive. */
 typedef enum pm_pack_signed {
     PM_PACK_UNSIGNED,
     PM_PACK_SIGNED,
     PM_PACK_SIGNED_NA
 } pm_pack_signed;
 
+/** The endianness of a pack directive. */
 typedef enum pm_pack_endian {
     PM_PACK_AGNOSTIC_ENDIAN,
     PM_PACK_LITTLE_ENDIAN,      // aka 'VAX', or 'V'
@@ -54,6 +64,7 @@ typedef enum pm_pack_endian {
     PM_PACK_ENDIAN_NA
 } pm_pack_endian;
 
+/** The size of an integer pack directive. */
 typedef enum pm_pack_size {
     PM_PACK_SIZE_SHORT,
     PM_PACK_SIZE_INT,
@@ -67,6 +78,7 @@ typedef enum pm_pack_size {
     PM_PACK_SIZE_NA
 } pm_pack_size;
 
+/** The type of length of a pack directive. */
 typedef enum pm_pack_length_type {
     PM_PACK_LENGTH_FIXED,
     PM_PACK_LENGTH_MAX,
@@ -74,6 +86,7 @@ typedef enum pm_pack_length_type {
     PM_PACK_LENGTH_NA
 } pm_pack_length_type;
 
+/** The type of encoding for a pack template string. */
 typedef enum pm_pack_encoding {
     PM_PACK_ENCODING_START,
     PM_PACK_ENCODING_ASCII_8BIT,
@@ -81,6 +94,7 @@ typedef enum pm_pack_encoding {
     PM_PACK_ENCODING_UTF_8
 } pm_pack_encoding;
 
+/** The result of parsing a pack template. */
 typedef enum pm_pack_result {
     PM_PACK_OK,
     PM_PACK_ERROR_UNSUPPORTED_DIRECTIVE,
@@ -90,39 +104,31 @@ typedef enum pm_pack_result {
     PM_PACK_ERROR_DOUBLE_ENDIAN
 } pm_pack_result;
 
-// Parse a single directive from a pack or unpack format string.
-//
-// Parameters:
-//  - [in] pm_pack_version version    the version of Ruby
-//  - [in] pm_pack_variant variant    pack or unpack
-//  - [in out] const char **format    the start of the next directive to parse
-//      on calling, and advanced beyond the parsed directive on return, or as
-//      much of it as was consumed until an error was encountered
-//  - [in] const char *format_end     the end of the format string
-//  - [out] pm_pack_type *type        the type of the directive
-//  - [out] pm_pack_signed *signed_type
-//                                    whether the value is signed
-//  - [out] pm_pack_endian *endian    the endianness of the value
-//  - [out] pm_pack_size *size        the size of the value
-//  - [out] pm_pack_length_type *length_type
-//                                    what kind of length is specified
-//  - [out] size_t *length            the length of the directive
-//  - [in out] pm_pack_encoding *encoding
-//                                    takes the current encoding of the string
-//      which would result from parsing the whole format string, and returns a
-//      possibly changed directive - the encoding should be
-//      PM_PACK_ENCODING_START when pm_pack_parse is called for the first
-//      directive in a format string
-//
-// Return:
-//  - PM_PACK_OK on success
-//  - PM_PACK_ERROR_* on error
-//
-// Notes:
-//   Consult Ruby documentation for the meaning of directives.
+/**
+ * Parse a single directive from a pack or unpack format string.
+ *
+ * @param variant (in) pack or unpack
+ * @param format (in, out) the start of the next directive to parse on calling,
+ *     and advanced beyond the parsed directive on return, or as much of it as
+ *     was consumed until an error was encountered
+ * @param format_end (in) the end of the format string
+ * @param type (out) the type of the directive
+ * @param signed_type (out) whether the value is signed
+ * @param endian (out) the endianness of the value
+ * @param size (out) the size of the value
+ * @param length_type (out) what kind of length is specified
+ * @param length (out) the length of the directive
+ * @param encoding (in, out) takes the current encoding of the string which
+ *     would result from parsing the whole format string, and returns a possibly
+ *     changed directive - the encoding should be `PM_PACK_ENCODING_START` when
+ *     pm_pack_parse is called for the first directive in a format string
+ *
+ * @return `PM_PACK_OK` on success or `PM_PACK_ERROR_*` on error
+ * @note Consult Ruby documentation for the meaning of directives.
+ */
 PRISM_EXPORTED_FUNCTION pm_pack_result
 pm_pack_parse(
-    pm_pack_variant variant_arg,
+    pm_pack_variant variant,
     const char **format,
     const char *format_end,
     pm_pack_type *type,
@@ -134,8 +140,13 @@ pm_pack_parse(
     pm_pack_encoding *encoding
 );
 
-// prism abstracts sizes away from the native system - this converts an abstract
-// size to a native size.
+/**
+ * Prism abstracts sizes away from the native system - this converts an abstract
+ * size to a native size.
+ *
+ * @param size The abstract size to convert.
+ * @return The native size.
+ */
 PRISM_EXPORTED_FUNCTION size_t pm_size_to_native(pm_pack_size size);
 
 #endif
diff --git a/prism/parser.h b/prism/parser.h
index 92a8ce589d..f4d0153e17 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -1,3 +1,8 @@
+/**
+ * @file parser.h
+ *
+ * The parser used to parse Ruby source.
+ */
 #ifndef PRISM_PARSER_H
 #define PRISM_PARSER_H
 
@@ -84,6 +89,7 @@ typedef enum {
  * are found as part of a string.
  */
 typedef struct pm_lex_mode {
+    /** The type of this lex mode. */
     enum {
         /** This state is used when any given token is being lexed. */
         PM_LEX_DEFAULT,
@@ -122,6 +128,7 @@ typedef struct pm_lex_mode {
         PM_LEX_STRING
     } mode;
 
+    /** The data associated with this type of lex mode. */
     union {
         struct {
             /** This keeps track of the nesting level of the list. */
@@ -240,8 +247,9 @@ typedef struct pm_lex_mode {
  */
 #define PM_LEX_STACK_SIZE 4
 
-// A forward declaration since our error handler struct accepts a parser for
-// each of its function calls.
+/**
+ * The parser used to parse Ruby source.
+ */
 typedef struct pm_parser pm_parser_t;
 
 /**
@@ -343,7 +351,10 @@ typedef enum {
 
 /** This is a node in a linked list of contexts. */
 typedef struct pm_context_node {
+    /** The context that this node represents. */
     pm_context_t context;
+
+    /** A pointer to the previous context in the linked list. */
     struct pm_context_node *prev;
 } pm_context_node_t;
 
@@ -360,9 +371,16 @@ typedef enum {
  * @extends pm_list_node_t
  */
 typedef struct pm_comment {
+    /** The embedded base node. */
     pm_list_node_t node;
+
+    /** A pointer to the start of the comment in the source. */
     const uint8_t *start;
+
+    /** A pointer to the end of the comment in the source. */
     const uint8_t *end;
+
+    /** The type of comment that we've found. */
     pm_comment_type_t type;
 } pm_comment_t;
 
@@ -373,10 +391,19 @@ typedef struct pm_comment {
  * @extends pm_list_node_t
  */
 typedef struct {
+    /** The embedded base node. */
     pm_list_node_t node;
+
+    /** A pointer to the start of the key in the source. */
     const uint8_t *key_start;
+
+    /** A pointer to the start of the value in the source. */
     const uint8_t *value_start;
+
+    /** The length of the key in the source. */
     uint32_t key_length;
+
+    /** The length of the value in the source. */
     uint32_t value_length;
 } pm_magic_comment_t;
 
@@ -493,6 +520,7 @@ struct pm_parser {
      */
     pm_state_stack_t accepts_block_stack;
 
+    /** A stack of lex modes. */
     struct {
         /** The current mode of the lexer. */
         pm_lex_mode_t *current;
diff --git a/prism/prettyprint.h b/prism/prettyprint.h
index 9ae2397e63..351b92df39 100644
--- a/prism/prettyprint.h
+++ b/prism/prettyprint.h
@@ -1,3 +1,8 @@
+/**
+ * @file prettyprint.h
+ *
+ * An AST node pretty-printer.
+ */
 #ifndef PRISM_PRETTYPRINT_H
 #define PRISM_PRETTYPRINT_H
 
diff --git a/prism/prism.c b/prism/prism.c
index 2b23cdb73c..8ac30c43d1 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -1,105 +1,6 @@
 #include "prism.h"
 
 /**
- * @mainpage
- *
- * Prism is a parser for the Ruby programming language. It is designed to be
- * portable, error tolerant, and maintainable. It is written in C99 and has no
- * dependencies. It is currently being integrated into
- * [CRuby](https://github.com/ruby/ruby),
- * [JRuby](https://github.com/jruby/jruby),
- * [TruffleRuby](https://github.com/oracle/truffleruby),
- * [Sorbet](https://github.com/sorbet/sorbet), and
- * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
- *
- * @section getting-started Getting started
- *
- * If you're vendoring this project and compiling it statically then as long as
- * you have a C99 compiler you will be fine. If you're linking against it as
- * shared library, then you should compile with `-fvisibility=hidden` and
- * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
- * visible.
- *
- * @section parsing Parsing
- * 
- * In order to parse Ruby code, the structures and functions that you're going
- * to want to use and be aware of are:
- *
- * * @ref pm_parser_t - the main parser structure
- * * @ref pm_parser_init - initialize a parser
- * * @ref pm_parse - parse and return the root node
- * * @ref pm_node_destroy - deallocate the root node returned by `pm_parse`
- * * @ref pm_parser_free - free the internal memory of the parser
- *
- * Putting all of this together would look something like:
- *
- * ```c
- * void parse(const uint8_t *source, size_t length) {
- *     pm_parser_t parser;
- *     pm_parser_init(&parser, source, length, NULL);
- *
- *     pm_node_t *root = pm_parse(&parser);
- *     printf("PARSED!\n");
- *
- *     pm_node_destroy(root);
- *     pm_parser_free(&parser);
- * }
- * ```
- *
- * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
- * their first member. This means you can downcast and upcast any node in the
- * tree to a `pm_node_t`.
- *
- * @section serializing Serializing
- *
- * Prism provides the ability to serialize the AST and its related metadata into
- * a binary format. This format is designed to be portable to different
- * languages and runtimes so that you only need to make one FFI call in order to
- * parse Ruby code. The structures and functions that you're going to want to
- * use and be aware of are:
- *
- * * @ref pm_buffer_t - a small buffer object that will hold the serialized AST
- * * @ref pm_buffer_free - free the memory associated with the buffer
- * * @ref pm_serialize - serialize the AST into a buffer
- * * @ref pm_parse_serialize - parse and serialize the AST into a buffer
- *
- * Putting all of this together would look something like:
- *
- * ```c
- * void serialize(const uint8_t *source, size_t length) {
- *     pm_buffer_t buffer = { 0 };
- *
- *     pm_parse_serialize(source, length, &buffer, NULL);
- *     printf("SERIALIZED!\n");
- *
- *     pm_buffer_free(&buffer);
- * }
- * ```
- *
- * @section inspecting Inspecting
- *
- * Prism provides the ability to inspect the AST by pretty-printing nodes. You
- * can do this with the `pm_prettyprint` function, which you would use like:
- *
- * ```c
- * void prettyprint(const uint8_t *source, size_t length) {
- *     pm_parser_t parser;
- *     pm_parser_init(&parser, source, length, NULL);
- *
- *     pm_node_t *root = pm_parse(&parser);
- *     pm_buffer_t buffer = { 0 };
- *
- *     pm_prettyprint(&buffer, &parser, root);
- *     printf("*.s%\n", (int) buffer.length, buffer.value);
- *
- *     pm_buffer_free(&buffer);
- *     pm_node_destroy(root);
- *     pm_parser_free(&parser);
- * }
- * ```
- */
-
-/**
  * The prism version and the serialization format.
  */
 const char *
@@ -764,9 +665,16 @@ not_provided(pm_parser_t *parser) {
  * of the call node creation functions.
  */
 typedef struct {
+    /** The optional location of the opening parenthesis or bracket. */
     pm_location_t opening_loc;
+
+    /** The lazily-allocated optional arguments node. */
     pm_arguments_node_t *arguments;
+
+    /** The optional location of the closing parenthesis or bracket. */
     pm_location_t closing_loc;
+
+    /** The optional block attached to the call. */
     pm_node_t *block;
 } pm_arguments_t;
 
@@ -7668,7 +7576,16 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
  * automatically attach the string content to the node that it belongs to.
  */
 typedef struct {
+    /**
+     * The buffer that we're using to keep track of the string content. It will
+     * only be initialized if we receive an escape sequence.
+     */
     pm_buffer_t buffer;
+
+    /**
+     * The cursor into the source string that points to how far we have
+     * currently copied into the buffer.
+     */
     const uint8_t *cursor;
 } pm_token_buffer_t;
 
@@ -9835,8 +9752,13 @@ typedef enum {
  * are combined in this way to make it easier to represent associativity.
  */
 typedef struct {
+    /** The left binding power. */
     pm_binding_power_t left;
+
+    /** The right binding power. */
     pm_binding_power_t right;
+
+    /** Whether or not this token can be used as a binary operator. */
     bool binary;
 } pm_binding_powers_t;
 
diff --git a/prism/prism.h b/prism/prism.h
index c68e9cbdf7..4b8755a30d 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -1,3 +1,8 @@
+/**
+ * @file prism.h
+ *
+ * The main header file for the prism parser.
+ */
 #ifndef PRISM_H
 #define PRISM_H
 
@@ -75,10 +80,10 @@ PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_pars
 PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);
 
 /**
- * Parse the Ruby source associated with the given parser and return the tree.
+ * Initiate the parser with the given parser.
  *
  * @param parser The parser to use.
- * @return The AST representing the Ruby source.
+ * @return The AST representing the source.
  */
 PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);
 
@@ -181,4 +186,103 @@ PRISM_EXPORTED_FUNCTION void pm_parse_lex_serialize(const uint8_t *source, size_
  */
 PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
 
+/**
+ * @mainpage
+ *
+ * Prism is a parser for the Ruby programming language. It is designed to be
+ * portable, error tolerant, and maintainable. It is written in C99 and has no
+ * dependencies. It is currently being integrated into
+ * [CRuby](https://github.com/ruby/ruby),
+ * [JRuby](https://github.com/jruby/jruby),
+ * [TruffleRuby](https://github.com/oracle/truffleruby),
+ * [Sorbet](https://github.com/sorbet/sorbet), and
+ * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
+ *
+ * @section getting-started Getting started
+ *
+ * If you're vendoring this project and compiling it statically then as long as
+ * you have a C99 compiler you will be fine. If you're linking against it as
+ * shared library, then you should compile with `-fvisibility=hidden` and
+ * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
+ * visible.
+ *
+ * @section parsing Parsing
+ * 
+ * In order to parse Ruby code, the structures and functions that you're going
+ * to want to use and be aware of are:
+ *
+ * * pm_parser_t - the main parser structure
+ * * pm_parser_init - initialize a parser
+ * * pm_parse - parse and return the root node
+ * * pm_node_destroy - deallocate the root node returned by `pm_parse`
+ * * pm_parser_free - free the internal memory of the parser
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void parse(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     printf("PARSED!\n");
+ *
+ *     pm_node_destroy(root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ *
+ * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
+ * their first member. This means you can downcast and upcast any node in the
+ * tree to a `pm_node_t`.
+ *
+ * @section serializing Serializing
+ *
+ * Prism provides the ability to serialize the AST and its related metadata into
+ * a binary format. This format is designed to be portable to different
+ * languages and runtimes so that you only need to make one FFI call in order to
+ * parse Ruby code. The structures and functions that you're going to want to
+ * use and be aware of are:
+ *
+ * * pm_buffer_t - a small buffer object that will hold the serialized AST
+ * * pm_buffer_free - free the memory associated with the buffer
+ * * pm_serialize - serialize the AST into a buffer
+ * * pm_parse_serialize - parse and serialize the AST into a buffer
+ *
+ * Putting all of this together would look something like:
+ *
+ * ```c
+ * void serialize(const uint8_t *source, size_t length) {
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_parse_serialize(source, length, &buffer, NULL);
+ *     printf("SERIALIZED!\n");
+ *
+ *     pm_buffer_free(&buffer);
+ * }
+ * ```
+ *
+ * @section inspecting Inspecting
+ *
+ * Prism provides the ability to inspect the AST by pretty-printing nodes. You
+ * can do this with the `pm_prettyprint` function, which you would use like:
+ *
+ * ```c
+ * void prettyprint(const uint8_t *source, size_t length) {
+ *     pm_parser_t parser;
+ *     pm_parser_init(&parser, source, length, NULL);
+ *
+ *     pm_node_t *root = pm_parse(&parser);
+ *     pm_buffer_t buffer = { 0 };
+ *
+ *     pm_prettyprint(&buffer, &parser, root);
+ *     printf("*.s%\n", (int) buffer.length, buffer.value);
+ *
+ *     pm_buffer_free(&buffer);
+ *     pm_node_destroy(root);
+ *     pm_parser_free(&parser);
+ * }
+ * ```
+ */
+
 #endif
diff --git a/prism/regexp.c b/prism/regexp.c
index fa2ea5cd20..22833d177f 100644
--- a/prism/regexp.c
+++ b/prism/regexp.c
@@ -4,11 +4,22 @@
  * This is the parser that is going to handle parsing regular expressions.
  */
 typedef struct {
+    /** A pointer to the start of the source that we are parsing. */
     const uint8_t *start;
+
+    /** A pointer to the current position in the source. */
     const uint8_t *cursor;
+
+    /** A pointer to the end of the source that we are parsing. */
     const uint8_t *end;
+
+    /** A list of named captures that we've found. */
     pm_string_list_t *named_captures;
+
+    /** Whether the encoding has changed from the default. */
     bool encoding_changed;
+
+    /** The encoding of the source. */
     pm_encoding_t *encoding;
 } pm_regexp_parser_t;
 
@@ -318,6 +329,7 @@ typedef enum {
  * This is the set of options that are configurable on the regular expression.
  */
 typedef struct {
+    /** The current state of each option. */
     uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
 } pm_regexp_options_t;
 
diff --git a/prism/regexp.h b/prism/regexp.h
index 9eae245d1e..09bdaca89a 100644
--- a/prism/regexp.h
+++ b/prism/regexp.h
@@ -1,3 +1,8 @@
+/**
+ * @file regexp.h
+ *
+ * A regular expression parser.
+ */
 #ifndef PRISM_REGEXP_H
 #define PRISM_REGEXP_H
 
diff --git a/prism/templates/include/prism/ast.h.erb b/prism/templates/include/prism/ast.h.erb
index fadf461c40..3f279d9779 100644
--- a/prism/templates/include/prism/ast.h.erb
+++ b/prism/templates/include/prism/ast.h.erb
@@ -1,3 +1,8 @@
+/**
+ * @file ast.h
+ *
+ * The abstract syntax tree.
+ */
 #ifndef PRISM_AST_H
 #define PRISM_AST_H
 
@@ -78,18 +83,37 @@ enum pm_node_type {
     PM_SCOPE_NODE
 };
 
+/**
+ * This is the type of node embedded in the node struct. We explicitly control
+ * the size of it here to avoid having the variable-width enum.
+ */
 typedef uint16_t pm_node_type_t;
+
+/**
+ * These are the flags embedded in the node struct. We explicitly control the
+ * size of it here to avoid having the variable-width enum.
+ */
 typedef uint16_t pm_node_flags_t;
 
-// We store the flags enum in every node in the tree. Some flags are common to
-// all nodes (the ones listed below). Others are specific to certain node types.
+/**
+ * We store the flags enum in every node in the tree. Some flags are common to
+ * all nodes (the ones listed below). Others are specific to certain node types.
+ */
 #define PM_NODE_FLAG_BITS (sizeof(pm_node_flags_t) * 8)
+
 static const pm_node_flags_t PM_NODE_FLAG_NEWLINE = (1 << (PM_NODE_FLAG_BITS - 1));
 static const pm_node_flags_t PM_NODE_FLAG_STATIC_LITERAL = (1 << (PM_NODE_FLAG_BITS - 2));
 static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS - 1)) | (1 << (PM_NODE_FLAG_BITS - 2));
 
-// For easy access, we define some macros to check node type
-#define PM_NODE_TYPE(node) ((enum pm_node_type)node->type)
+/**
+ * Cast the type to an enum to allow the compiler to provide exhaustiveness
+ * checking.
+ */
+#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
+
+/**
+ * Return true if the type of the given node matches the given type.
+ */
 #define PM_NODE_TYPE_P(node, type) (PM_NODE_TYPE(node) == (type))
 
 /**
@@ -132,8 +156,11 @@ typedef struct pm_node {
  * @extends pm_node_t
  */
 typedef struct pm_<%= node.human %> {
+    /** The embedded base node. */
     pm_node_t base;
 <%- node.fields.grep_v(Prism::FlagsField).each do |field| -%>
+
+    /** <%= node.name %>#<%= field.name %> */
     <%= case field
     when Prism::NodeField, Prism::OptionalNodeField then "struct #{field.c_type} *#{field.name}"
     when Prism::NodeListField then "struct pm_node_list #{field.name}"
@@ -162,6 +189,12 @@ typedef enum pm_<%= flag.human %> {
 } pm_<%= flag.human %>_t;
 <%- end -%>
 
+/**
+ * When we're serializing to Java, we want to skip serializing the location
+ * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us
+ * to specify that through the environment. It will never be true except for in
+ * those build systems.
+ */
 #define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS %>
 
 #endif
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index a2bccb3d00..3c3a6fb688 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_buffer.h
+ *
+ * A wrapper around a contiguous block of allocated memory.
+ */
 #ifndef PRISM_BUFFER_H
 #define PRISM_BUFFER_H
 
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
index 2bdc67de4a..32f698a42b 100644
--- a/prism/util/pm_char.h
+++ b/prism/util/pm_char.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_char.h
+ *
+ * Functions for working with characters and strings.
+ */
 #ifndef PRISM_CHAR_H
 #define PRISM_CHAR_H
 
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
index 19c3b619e1..238d0c4cae 100644
--- a/prism/util/pm_constant_pool.h
+++ b/prism/util/pm_constant_pool.h
@@ -1,8 +1,12 @@
-// The constant pool is a data structure that stores a set of strings. Each
-// string is assigned a unique id, which can be used to compare strings for
-// equality. This comparison ends up being much faster than strcmp, since it
-// only requires a single integer comparison.
-
+/**
+ * @file pm_constant_pool.h
+ *
+ * A data structure that stores a set of strings.
+ * 
+ * Each string is assigned a unique id, which can be used to compare strings for
+ * equality. This comparison ends up being much faster than strcmp, since it
+ * only requires a single integer comparison.
+ */
 #ifndef PRISM_CONSTANT_POOL_H
 #define PRISM_CONSTANT_POOL_H
 
@@ -14,12 +18,23 @@
 #include <stdlib.h>
 #include <string.h>
 
+/**
+ * A constant id is a unique identifier for a constant in the constant pool.
+ */
 typedef uint32_t pm_constant_id_t;
 
+/**
+ * A list of constant IDs. Usually used to represent a set of locals.
+ */
 typedef struct {
-    pm_constant_id_t *ids;
+    /** The number of constant ids in the list. */
     size_t size;
+
+    /** The number of constant ids that have been allocated in the list. */
     size_t capacity;
+
+    /** The constant ids in the list. */
+    pm_constant_id_t *ids;
 } pm_constant_id_list_t;
 
 /**
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
index 53a5b9c3a1..b05ed0290a 100644
--- a/prism/util/pm_list.h
+++ b/prism/util/pm_list.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_list.h
+ *
+ * An abstract linked list.
+ */
 #ifndef PRISM_LIST_H
 #define PRISM_LIST_H
 
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
index 6b817a5521..1eae6ab1ba 100644
--- a/prism/util/pm_memchr.h
+++ b/prism/util/pm_memchr.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_memchr.h
+ *
+ * A custom memchr implementation.
+ */
 #ifndef PRISM_MEMCHR_H
 #define PRISM_MEMCHR_H
 
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
index 603a84c38c..a31051f4e0 100644
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@@ -1,11 +1,16 @@
-// When compiling the syntax tree, it's necessary to know the line and column
-// of many nodes. This is necessary to support things like error messages,
-// tracepoints, etc.
-//
-// It's possible that we could store the start line, start column, end line, and
-// end column on every node in addition to the offsets that we already store,
-// but that would be quite a lot of memory overhead.
-
+/**
+ * @file pm_newline_list.h
+ *
+ * A list of byte offsets of newlines in a string.
+ *
+ * When compiling the syntax tree, it's necessary to know the line and column
+ * of many nodes. This is necessary to support things like error messages,
+ * tracepoints, etc.
+ *
+ * It's possible that we could store the start line, start column, end line, and
+ * end column on every node in addition to the offsets that we already store,
+ * but that would be quite a lot of memory overhead.
+ */
 #ifndef PRISM_NEWLINE_LIST_H
 #define PRISM_NEWLINE_LIST_H
 
diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h
index 7268a3fd63..1ce57a2209 100644
--- a/prism/util/pm_state_stack.h
+++ b/prism/util/pm_state_stack.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_state_stack.h
+ *
+ * A stack of boolean values.
+ */
 #ifndef PRISM_STATE_STACK_H
 #define PRISM_STATE_STACK_H
 
@@ -30,6 +35,7 @@ void pm_state_stack_pop(pm_state_stack_t *stack);
  * Returns the value at the top of the stack.
  *
  * @param stack The stack to get the value from.
+ * @return The value at the top of the stack.
  */
 bool pm_state_stack_p(pm_state_stack_t *stack);
 
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
index b0b7c6bf2d..ddb153784f 100644
--- a/prism/util/pm_string.h
+++ b/prism/util/pm_string.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_string.h
+ *
+ * A generic string type that can have various ownership semantics.
+ */
 #ifndef PRISM_STRING_H
 #define PRISM_STRING_H
 
diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h
index 1f460e5dc9..0d406cc5d8 100644
--- a/prism/util/pm_string_list.h
+++ b/prism/util/pm_string_list.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_string_list.h
+ *
+ * A list of strings.
+ */
 #ifndef PRISM_STRING_LIST_H
 #define PRISM_STRING_LIST_H
 
diff --git a/prism/util/pm_strncasecmp.h b/prism/util/pm_strncasecmp.h
index 6cf7aa8023..c381ea38f4 100644
--- a/prism/util/pm_strncasecmp.h
+++ b/prism/util/pm_strncasecmp.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_strncasecmp.h
+ *
+ * A custom strncasecmp implementation.
+ */
 #ifndef PRISM_STRNCASECMP_H
 #define PRISM_STRNCASECMP_H
 
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
index b589004abf..61a443e51a 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/util/pm_strpbrk.h
@@ -1,3 +1,8 @@
+/**
+ * @file pm_strpbrk.h
+ *
+ * A custom strpbrk implementation.
+ */
 #ifndef PRISM_STRPBRK_H
 #define PRISM_STRPBRK_H
 
diff --git a/prism/version.h b/prism/version.h
index 25ee409c74..2e5e84cdf1 100644
--- a/prism/version.h
+++ b/prism/version.h
@@ -1,4 +1,12 @@
 /**
+ * @file version.h
+ *
+ * The version of the Prism library.
+ */
+#ifndef PRISM_VERSION_H
+#define PRISM_VERSION_H
+
+/**
  * The major version of the Prism library as an int.
  */
 #define PRISM_VERSION_MAJOR 0
@@ -17,3 +25,5 @@
  * The version of the Prism library as a constant string.
  */
 #define PRISM_VERSION "0.16.0"
+
+#endif
author	Kevin Newton <kddnewton@gmail.com>	2023-10-31 13:26:31 -0400
committer	Kevin Newton <kddnewton@gmail.com>	2023-11-01 13:10:29 -0400
commit	690f3bbf5d67d8629d6e6d7305b79fcc5362b0c9 (patch)
tree	5d01ab6281f21ccac723ac4f856c0b441a98d98a /prism
parent	e745af2f0e36f7ff3499019dfb038799a9d3d069 (diff)