summaryrefslogtreecommitdiff
path: root/prism/prism.h
blob: 5eec5f49ec98b3f037f91faf4db2f2d8bffce264 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/**
 * @file prism.h
 *
 * The main header file for the prism parser.
 */
#ifndef PRISM_H
#define PRISM_H

#include "prism/defines.h"
#include "prism/util/pm_buffer.h"
#include "prism/util/pm_char.h"
#include "prism/util/pm_memchr.h"
#include "prism/util/pm_strncasecmp.h"
#include "prism/util/pm_strpbrk.h"
#include "prism/ast.h"
#include "prism/diagnostic.h"
#include "prism/node.h"
#include "prism/options.h"
#include "prism/pack.h"
#include "prism/parser.h"
#include "prism/prettyprint.h"
#include "prism/regexp.h"
#include "prism/version.h"

#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifndef _WIN32
#include <strings.h>
#endif

/**
 * The prism version and the serialization format.
 *
 * @returns The prism version as a constant string.
 */
PRISM_EXPORTED_FUNCTION const char * pm_version(void);

/**
 * Initialize a parser with the given start and end pointers.
 *
 * @param parser The parser to initialize.
 * @param source The source to parse.
 * @param size The size of the source.
 * @param options The optional options to use when parsing.
 */
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options);

/**
 * Register a callback that will be called whenever prism changes the encoding
 * it is using to parse based on the magic comment.
 *
 * @param parser The parser to register the callback with.
 * @param callback The callback to register.
 */
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);

/**
 * Register a callback that will be called when prism encounters a magic comment
 * with an encoding referenced that it doesn't understand. The callback should
 * return NULL if it also doesn't understand the encoding or it should return a
 * pointer to a pm_encoding_t struct that contains the functions necessary to
 * parse identifiers.
 *
 * @param parser The parser to register the callback with.
 * @param callback The callback to register.
 */
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_decode_callback(pm_parser_t *parser, pm_encoding_decode_callback_t callback);

/**
 * Free any memory associated with the given parser.
 *
 * @param parser The parser to free.
 */
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser);

/**
 * Initiate the parser with the given parser.
 *
 * @param parser The parser to use.
 * @return The AST representing the source.
 */
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser);

/**
 * Serialize the given list of comments to the given buffer.
 *
 * @param parser The parser to serialize.
 * @param list The list of comments to serialize.
 * @param buffer The buffer to serialize to.
 */
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);

/**
 * Serialize the name of the encoding to the buffer.
 *
 * @param encoding The encoding to serialize.
 * @param buffer The buffer to serialize to.
 */
void pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer);

/**
 * Serialize the encoding, metadata, nodes, and constant pool.
 *
 * @param parser The parser to serialize.
 * @param node The node to serialize.
 * @param buffer The buffer to serialize to.
 */
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);

/**
 * Serialize the AST represented by the given node to the given buffer.
 *
 * @param parser The parser to serialize.
 * @param node The node to serialize.
 * @param buffer The buffer to serialize to.
 */
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer);

/**
 * Parse the given source to the AST and dump the AST to the given buffer.
 *
 * @param buffer The buffer to serialize to.
 * @param source The source to parse.
 * @param size The size of the source.
 * @param data The optional data to pass to the parser.
 */
PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);

/**
 * Parse and serialize the comments in the given source to the given buffer.
 *
 * @param buffer The buffer to serialize to.
 * @param source The source to parse.
 * @param size The size of the source.
 * @param data The optional data to pass to the parser.
 */
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);

/**
 * Lex the given source and serialize to the given buffer.
 *
 * @param source The source to lex.
 * @param size The size of the source.
 * @param buffer The buffer to serialize to.
 * @param data The optional data to pass to the lexer.
 */
PRISM_EXPORTED_FUNCTION void pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);

/**
 * Parse and serialize both the AST and the tokens represented by the given
 * source to the given buffer.
 *
 * @param buffer The buffer to serialize to.
 * @param source The source to parse.
 * @param size The size of the source.
 * @param data The optional data to pass to the parser.
 */
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data);

/**
 * Returns a string representation of the given token type.
 *
 * @param token_type The token type to convert to a string.
 * @return A string representation of the given token type.
 */
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);

/**
 * @mainpage
 *
 * Prism is a parser for the Ruby programming language. It is designed to be
 * portable, error tolerant, and maintainable. It is written in C99 and has no
 * dependencies. It is currently being integrated into
 * [CRuby](https://github.com/ruby/ruby),
 * [JRuby](https://github.com/jruby/jruby),
 * [TruffleRuby](https://github.com/oracle/truffleruby),
 * [Sorbet](https://github.com/sorbet/sorbet), and
 * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree).
 *
 * @section getting-started Getting started
 *
 * If you're vendoring this project and compiling it statically then as long as
 * you have a C99 compiler you will be fine. If you're linking against it as
 * shared library, then you should compile with `-fvisibility=hidden` and
 * `-DPRISM_EXPORT_SYMBOLS` to tell prism to make only its public interface
 * visible.
 *
 * @section parsing Parsing
 *
 * In order to parse Ruby code, the structures and functions that you're going
 * to want to use and be aware of are:
 *
 * * `pm_parser_t` - the main parser structure
 * * `pm_parser_init` - initialize a parser
 * * `pm_parse` - parse and return the root node
 * * `pm_node_destroy` - deallocate the root node returned by `pm_parse`
 * * `pm_parser_free` - free the internal memory of the parser
 *
 * Putting all of this together would look something like:
 *
 * ```c
 * void parse(const uint8_t *source, size_t length) {
 *     pm_parser_t parser;
 *     pm_parser_init(&parser, source, length, NULL);
 *
 *     pm_node_t *root = pm_parse(&parser);
 *     printf("PARSED!\n");
 *
 *     pm_node_destroy(root);
 *     pm_parser_free(&parser);
 * }
 * ```
 *
 * All of the nodes "inherit" from `pm_node_t` by embedding those structures as
 * their first member. This means you can downcast and upcast any node in the
 * tree to a `pm_node_t`.
 *
 * @section serializing Serializing
 *
 * Prism provides the ability to serialize the AST and its related metadata into
 * a binary format. This format is designed to be portable to different
 * languages and runtimes so that you only need to make one FFI call in order to
 * parse Ruby code. The structures and functions that you're going to want to
 * use and be aware of are:
 *
 * * `pm_buffer_t` - a small buffer object that will hold the serialized AST
 * * `pm_buffer_free` - free the memory associated with the buffer
 * * `pm_serialize` - serialize the AST into a buffer
 * * `pm_serialize_parse` - parse and serialize the AST into a buffer
 *
 * Putting all of this together would look something like:
 *
 * ```c
 * void serialize(const uint8_t *source, size_t length) {
 *     pm_buffer_t buffer = { 0 };
 *
 *     pm_serialize_parse(&buffer, source, length, NULL);
 *     printf("SERIALIZED!\n");
 *
 *     pm_buffer_free(&buffer);
 * }
 * ```
 *
 * @section inspecting Inspecting
 *
 * Prism provides the ability to inspect the AST by pretty-printing nodes. You
 * can do this with the `pm_prettyprint` function, which you would use like:
 *
 * ```c
 * void prettyprint(const uint8_t *source, size_t length) {
 *     pm_parser_t parser;
 *     pm_parser_init(&parser, source, length, NULL);
 *
 *     pm_node_t *root = pm_parse(&parser);
 *     pm_buffer_t buffer = { 0 };
 *
 *     pm_prettyprint(&buffer, &parser, root);
 *     printf("*.s%\n", (int) buffer.length, buffer.value);
 *
 *     pm_buffer_free(&buffer);
 *     pm_node_destroy(root);
 *     pm_parser_free(&parser);
 * }
 * ```
 */

#endif