#include "prism/extension.h" #ifdef _WIN32 #include #endif // NOTE: this file should contain only bindings. All non-trivial logic should be // in libprism so it can be shared its the various callers. VALUE rb_cPrism; VALUE rb_cPrismNode; VALUE rb_cPrismSource; VALUE rb_cPrismToken; VALUE rb_cPrismLocation; VALUE rb_cPrismComment; VALUE rb_cPrismInlineComment; VALUE rb_cPrismEmbDocComment; VALUE rb_cPrismMagicComment; VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; VALUE rb_cPrismDebugEncoding; ID rb_option_id_filepath; ID rb_option_id_encoding; ID rb_option_id_line; ID rb_option_id_frozen_string_literal; ID rb_option_id_version; ID rb_option_id_scopes; ID rb_option_id_command_line; /******************************************************************************/ /* IO of Ruby code */ /******************************************************************************/ /** * Check if the given VALUE is a string. If it's nil, then return NULL. If it's * not a string, then raise a type error. Otherwise return the VALUE as a C * string. */ static const char * check_string(VALUE value) { // If the value is nil, then we don't need to do anything. if (NIL_P(value)) { return NULL; } // Check if the value is a string. If it's not, then raise a type error. if (!RB_TYPE_P(value, T_STRING)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(value)); } // Otherwise, return the value as a C string. return RSTRING_PTR(value); } /** * Load the contents and size of the given string into the given pm_string_t. */ static void input_load_string(pm_string_t *input, VALUE string) { // Check if the string is a string. If it's not, then raise a type error. if (!RB_TYPE_P(string, T_STRING)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string)); } pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); } /******************************************************************************/ /* Building C options from Ruby options */ /******************************************************************************/ /** * Build the scopes associated with the provided Ruby keyword value. */ static void build_options_scopes(pm_options_t *options, VALUE scopes) { // Check if the value is an array. If it's not, then raise a type error. if (!RB_TYPE_P(scopes, T_ARRAY)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes)); } // Initialize the scopes array. size_t scopes_count = RARRAY_LEN(scopes); if (!pm_options_scopes_init(options, scopes_count)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } // Iterate over the scopes and add them to the options. for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { VALUE scope = rb_ary_entry(scopes, scope_index); // Check that the scope is an array. If it's not, then raise a type // error. if (!RB_TYPE_P(scope, T_ARRAY)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope)); } // Initialize the scope array. size_t locals_count = RARRAY_LEN(scope); pm_options_scope_t *options_scope = &options->scopes[scope_index]; if (!pm_options_scope_init(options_scope, locals_count)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } // Iterate over the locals and add them to the scope. for (size_t local_index = 0; local_index < locals_count; local_index++) { VALUE local = rb_ary_entry(scope, local_index); // Check that the local is a symbol. If it's not, then raise a // type error. if (!RB_TYPE_P(local, T_SYMBOL)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local)); } // Add the local to the scope. pm_string_t *scope_local = &options_scope->locals[local_index]; const char *name = rb_id2name(SYM2ID(local)); pm_string_constant_init(scope_local, name, strlen(name)); } } } /** * An iterator function that is called for each key-value in the keywords hash. */ static int build_options_i(VALUE key, VALUE value, VALUE argument) { pm_options_t *options = (pm_options_t *) argument; ID key_id = SYM2ID(key); if (key_id == rb_option_id_filepath) { if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value)); } else if (key_id == rb_option_id_encoding) { if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); } else if (key_id == rb_option_id_line) { if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value)); } else if (key_id == rb_option_id_frozen_string_literal) { if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); } else if (key_id == rb_option_id_version) { if (!NIL_P(value)) { const char *version = check_string(value); if (!pm_options_version_set(options, version, RSTRING_LEN(value))) { rb_raise(rb_eArgError, "invalid version: %" PRIsVALUE, value); } } } else if (key_id == rb_option_id_scopes) { if (!NIL_P(value)) build_options_scopes(options, value); } else if (key_id == rb_option_id_command_line) { if (!NIL_P(value)) { const char *string = check_string(value); uint8_t command_line = 0; for (size_t index = 0; index < strlen(string); index++) { switch (string[index]) { case 'a': command_line |= PM_OPTIONS_COMMAND_LINE_A; break; case 'e': command_line |= PM_OPTIONS_COMMAND_LINE_E; break; case 'l': command_line |= PM_OPTIONS_COMMAND_LINE_L; break; case 'n': command_line |= PM_OPTIONS_COMMAND_LINE_N; break; case 'p': command_line |= PM_OPTIONS_COMMAND_LINE_P; break; case 'x': command_line |= PM_OPTIONS_COMMAND_LINE_X; break; default: rb_raise(rb_eArgError, "invalid command line flag: '%c'", string[index]); break; } } pm_options_command_line_set(options, command_line); } } else { rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, key); } return ST_CONTINUE; } /** * We need a struct here to pass through rb_protect and it has to be a single * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this * through as an opaque pointer and cast it on both sides. */ struct build_options_data { pm_options_t *options; VALUE keywords; }; /** * Build the set of options from the given keywords. Note that this can raise a * Ruby error if the options are not valid. */ static VALUE build_options(VALUE argument) { struct build_options_data *data = (struct build_options_data *) argument; rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options); return Qnil; } /** * Extract the options from the given keyword arguments. */ static void extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { options->line = 1; // default if (!NIL_P(keywords)) { struct build_options_data data = { .options = options, .keywords = keywords }; struct build_options_data *argument = &data; int state = 0; rb_protect(build_options, (VALUE) argument, &state); if (state != 0) { pm_options_free(options); rb_jump_tag(state); } } if (!NIL_P(filepath)) { if (!RB_TYPE_P(filepath, T_STRING)) { pm_options_free(options); rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); } pm_options_filepath_set(options, RSTRING_PTR(filepath)); } } /** * Read options for methods that look like (source, **options). */ static void string_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { VALUE string; VALUE keywords; rb_scan_args(argc, argv, "1:", &string, &keywords); extract_options(options, Qnil, keywords); input_load_string(input, string); } /** * Read options for methods that look like (filepath, **options). */ static void file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { VALUE filepath; VALUE keywords; rb_scan_args(argc, argv, "1:", &filepath, &keywords); Check_Type(filepath, T_STRING); extract_options(options, filepath, keywords); const char * string_source = (const char *) pm_string_source(&options->filepath); if (!pm_string_mapped_init(input, string_source)) { pm_options_free(options); #ifdef _WIN32 int e = rb_w32_map_errno(GetLastError()); #else int e = errno; #endif rb_syserr_fail(e, string_source); } } /******************************************************************************/ /* Serializing the AST */ /******************************************************************************/ /** * Dump the AST corresponding to the given input to a string. */ static VALUE dump_input(pm_string_t *input, const pm_options_t *options) { pm_buffer_t buffer; if (!pm_buffer_init(&buffer)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } pm_parser_t parser; pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); pm_serialize(&parser, node, &buffer); VALUE result = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer)); pm_node_destroy(&parser, node); pm_buffer_free(&buffer); pm_parser_free(&parser); return result; } /** * call-seq: * Prism::dump(source, **options) -> String * * Dump the AST corresponding to the given string to a string. For supported * options, see Prism::parse. */ static VALUE dump(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); #ifdef PRISM_DEBUG_MODE_BUILD size_t length = pm_string_length(&input); char* dup = malloc(length); memcpy(dup, pm_string_source(&input), length); pm_string_constant_init(&input, dup, length); #endif VALUE value = dump_input(&input, &options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); #endif pm_string_free(&input); pm_options_free(&options); return value; } /** * call-seq: * Prism::dump_file(filepath, **options) -> String * * Dump the AST corresponding to the given file to a string. For supported * options, see Prism::parse. */ static VALUE dump_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE value = dump_input(&input, &options); pm_string_free(&input); pm_options_free(&options); return value; } /******************************************************************************/ /* Extracting values for the parse result */ /******************************************************************************/ /** * Extract the comments out of the parser into an array. */ static VALUE parser_comments(pm_parser_t *parser, VALUE source) { VALUE comments = rb_ary_new(); for (pm_comment_t *comment = (pm_comment_t *) parser->comment_list.head; comment != NULL; comment = (pm_comment_t *) comment->node.next) { VALUE location_argv[] = { source, LONG2FIX(comment->location.start - parser->start), LONG2FIX(comment->location.end - comment->location.start) }; VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment; VALUE comment_argv[] = { rb_class_new_instance(3, location_argv, rb_cPrismLocation) }; rb_ary_push(comments, rb_class_new_instance(1, comment_argv, type)); } return comments; } /** * Extract the magic comments out of the parser into an array. */ static VALUE parser_magic_comments(pm_parser_t *parser, VALUE source) { VALUE magic_comments = rb_ary_new(); for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) { VALUE key_loc_argv[] = { source, LONG2FIX(magic_comment->key_start - parser->start), LONG2FIX(magic_comment->key_length) }; VALUE value_loc_argv[] = { source, LONG2FIX(magic_comment->value_start - parser->start), LONG2FIX(magic_comment->value_length) }; VALUE magic_comment_argv[] = { rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation), rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation) }; rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment)); } return magic_comments; } /** * Extract out the data location from the parser into a Location instance if one * exists. */ static VALUE parser_data_loc(const pm_parser_t *parser, VALUE source) { if (parser->data_loc.end == NULL) { return Qnil; } else { VALUE argv[] = { source, LONG2FIX(parser->data_loc.start - parser->start), LONG2FIX(parser->data_loc.end - parser->data_loc.start) }; return rb_class_new_instance(3, argv, rb_cPrismLocation); } } /** * Extract the errors out of the parser into an array. */ static VALUE parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { VALUE errors = rb_ary_new(); pm_diagnostic_t *error; for (error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) { VALUE location_argv[] = { source, LONG2FIX(error->location.start - parser->start), LONG2FIX(error->location.end - error->location.start) }; VALUE level = Qnil; switch (error->level) { case PM_ERROR_LEVEL_FATAL: level = ID2SYM(rb_intern("fatal")); break; case PM_ERROR_LEVEL_ARGUMENT: level = ID2SYM(rb_intern("argument")); break; default: rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, error->level); } VALUE error_argv[] = { rb_enc_str_new_cstr(error->message, encoding), rb_class_new_instance(3, location_argv, rb_cPrismLocation), level }; rb_ary_push(errors, rb_class_new_instance(3, error_argv, rb_cPrismParseError)); } return errors; } /** * Extract the warnings out of the parser into an array. */ static VALUE parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) { VALUE warnings = rb_ary_new(); pm_diagnostic_t *warning; for (warning = (pm_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (pm_diagnostic_t *) warning->node.next) { VALUE location_argv[] = { source, LONG2FIX(warning->location.start - parser->start), LONG2FIX(warning->location.end - warning->location.start) }; VALUE level = Qnil; switch (warning->level) { case PM_WARNING_LEVEL_DEFAULT: level = ID2SYM(rb_intern("default")); break; case PM_WARNING_LEVEL_VERBOSE: level = ID2SYM(rb_intern("verbose")); break; default: rb_raise(rb_eRuntimeError, "Unknown level: %" PRIu8, warning->level); } VALUE warning_argv[] = { rb_enc_str_new_cstr(warning->message, encoding), rb_class_new_instance(3, location_argv, rb_cPrismLocation), level }; rb_ary_push(warnings, rb_class_new_instance(3, warning_argv, rb_cPrismParseWarning)); } return warnings; } /******************************************************************************/ /* Lexing Ruby code */ /******************************************************************************/ /** * This struct gets stored in the parser and passed in to the lex callback any * time a new token is found. We use it to store the necessary information to * initialize a Token instance. */ typedef struct { VALUE source; VALUE tokens; rb_encoding *encoding; } parse_lex_data_t; /** * This is passed as a callback to the parser. It gets called every time a new * token is found. Once found, we initialize a new instance of Token and push it * onto the tokens array. */ static void parse_lex_token(void *data, pm_parser_t *parser, pm_token_t *token) { parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; VALUE yields = rb_ary_new_capa(2); rb_ary_push(yields, pm_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source)); rb_ary_push(yields, INT2FIX(parser->lex_state)); rb_ary_push(parse_lex_data->tokens, yields); } /** * This is called whenever the encoding changes based on the magic comment at * the top of the file. We use it to update the encoding that we are using to * create tokens. */ static void parse_lex_encoding_changed_callback(pm_parser_t *parser) { parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data; parse_lex_data->encoding = rb_enc_find(parser->encoding->name); // Since the encoding changed, we need to go back and change the encoding of // the tokens that were already lexed. This is only going to end up being // one or two tokens, since the encoding can only change at the top of the // file. VALUE tokens = parse_lex_data->tokens; for (long index = 0; index < RARRAY_LEN(tokens); index++) { VALUE yields = rb_ary_entry(tokens, index); VALUE token = rb_ary_entry(yields, 0); VALUE value = rb_ivar_get(token, rb_intern("@value")); rb_enc_associate(value, parse_lex_data->encoding); ENC_CODERANGE_CLEAR(value); } } /** * Parse the given input and return a ParseResult containing just the tokens or * the nodes and tokens. */ static VALUE parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nodes) { pm_parser_t parser; pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback); VALUE source_string = rb_str_new((const char *) pm_string_source(input), pm_string_length(input)); VALUE offsets = rb_ary_new(); VALUE source_argv[] = { source_string, LONG2NUM(parser.start_line), offsets }; VALUE source = rb_class_new_instance(3, source_argv, rb_cPrismSource); parse_lex_data_t parse_lex_data = { .source = source, .tokens = rb_ary_new(), .encoding = rb_utf8_encoding() }; parse_lex_data_t *data = &parse_lex_data; pm_lex_callback_t lex_callback = (pm_lex_callback_t) { .data = (void *) data, .callback = parse_lex_token, }; parser.lex_callback = &lex_callback; pm_node_t *node = pm_parse(&parser); // Here we need to update the Source object to have the correct // encoding for the source string and the correct newline offsets. // We do it here because we've already created the Source object and given // it over to all of the tokens, and both of these are only set after pm_parse(). rb_encoding *encoding = rb_enc_find(parser.encoding->name); rb_enc_associate(source_string, encoding); for (size_t index = 0; index < parser.newline_list.size; index++) { rb_ary_push(offsets, ULONG2NUM(parser.newline_list.offsets[index])); } VALUE value; if (return_nodes) { value = rb_ary_new_capa(2); rb_ary_push(value, pm_ast_new(&parser, node, parse_lex_data.encoding, source)); rb_ary_push(value, parse_lex_data.tokens); } else { value = parse_lex_data.tokens; } VALUE result_argv[] = { value, parser_comments(&parser, source), parser_magic_comments(&parser, source), parser_data_loc(&parser, source), parser_errors(&parser, parse_lex_data.encoding, source), parser_warnings(&parser, parse_lex_data.encoding, source), source }; pm_node_destroy(&parser, node); pm_parser_free(&parser); return rb_class_new_instance(7, result_argv, rb_cPrismParseResult); } /** * call-seq: * Prism::lex(source, **options) -> Array * * Return an array of Token instances corresponding to the given string. For * supported options, see Prism::parse. */ static VALUE lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); VALUE result = parse_lex_input(&input, &options, false); pm_string_free(&input); pm_options_free(&options); return result; } /** * call-seq: * Prism::lex_file(filepath, **options) -> Array * * Return an array of Token instances corresponding to the given file. For * supported options, see Prism::parse. */ static VALUE lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, false); pm_string_free(&input); pm_options_free(&options); return value; } /******************************************************************************/ /* Parsing Ruby code */ /******************************************************************************/ /** * Parse the given input and return a ParseResult instance. */ static VALUE parse_input(pm_string_t *input, const pm_options_t *options) { pm_parser_t parser; pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE source = pm_source_new(&parser, encoding); VALUE result_argv[] = { pm_ast_new(&parser, node, encoding, source), parser_comments(&parser, source), parser_magic_comments(&parser, source), parser_data_loc(&parser, source), parser_errors(&parser, encoding, source), parser_warnings(&parser, encoding, source), source }; VALUE result = rb_class_new_instance(7, result_argv, rb_cPrismParseResult); pm_node_destroy(&parser, node); pm_parser_free(&parser); return result; } /** * call-seq: * Prism::parse(source, **options) -> ParseResult * * Parse the given string and return a ParseResult instance. The options that * are supported are: * * * `command_line` - either nil or a string of the various options that were * set on the command line. Valid values are combinations of "a", "l", * "n", "p", and "x". * * `encoding` - the encoding of the source being parsed. This should be an * encoding or nil. * * `filepath` - the filepath of the source being parsed. This should be a * string or nil. * * `frozen_string_literal` - whether or not the frozen string literal pragma * has been set. This should be a boolean or nil. * * `line` - the line number that the parse starts on. This should be an * integer or nil. Note that this is 1-indexed. * * `scopes` - the locals that are in scope surrounding the code that is being * parsed. This should be an array of arrays of symbols or nil. Scopes are * ordered from the outermost scope to the innermost one. * * `version` - the version of Ruby syntax that prism should used to parse Ruby * code. By default prism assumes you want to parse with the latest vesion * of Ruby syntax (which you can trigger with `nil` or `"latest"`). You * may also restrict the syntax to a specific version of Ruby. The * supported values are `"3.3.0"` and `"3.4.0"`. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); #ifdef PRISM_DEBUG_MODE_BUILD size_t length = pm_string_length(&input); char* dup = malloc(length); memcpy(dup, pm_string_source(&input), length); pm_string_constant_init(&input, dup, length); #endif VALUE value = parse_input(&input, &options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); #endif pm_string_free(&input); pm_options_free(&options); return value; } /** * call-seq: * Prism::parse_file(filepath, **options) -> ParseResult * * Parse the given file and return a ParseResult instance. For supported * options, see Prism::parse. */ static VALUE parse_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE value = parse_input(&input, &options); pm_string_free(&input); pm_options_free(&options); return value; } /** * Parse the given input and return an array of Comment objects. */ static VALUE parse_input_comments(pm_string_t *input, const pm_options_t *options) { pm_parser_t parser; pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE source = pm_source_new(&parser, encoding); VALUE comments = parser_comments(&parser, source); pm_node_destroy(&parser, node); pm_parser_free(&parser); return comments; } /** * call-seq: * Prism::parse_comments(source, **options) -> Array * * Parse the given string and return an array of Comment objects. For supported * options, see Prism::parse. */ static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); VALUE result = parse_input_comments(&input, &options); pm_string_free(&input); pm_options_free(&options); return result; } /** * call-seq: * Prism::parse_file_comments(filepath, **options) -> Array * * Parse the given file and return an array of Comment objects. For supported * options, see Prism::parse. */ static VALUE parse_file_comments(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE value = parse_input_comments(&input, &options); pm_string_free(&input); pm_options_free(&options); return value; } /** * call-seq: * Prism::parse_lex(source, **options) -> ParseResult * * Parse the given string and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is * an array of Token instances. * * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse or * Prism::lex. * * For supported options, see Prism::parse. */ static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); pm_options_free(&options); return value; } /** * call-seq: * Prism::parse_lex_file(filepath, **options) -> ParseResult * * Parse the given file and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is * an array of Token instances. * * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse_file * or Prism::lex_file. * * For supported options, see Prism::parse. */ static VALUE parse_lex_file(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE value = parse_lex_input(&input, &options, true); pm_string_free(&input); pm_options_free(&options); return value; } /** * Parse the given input and return true if it parses without errors. */ static VALUE parse_input_success_p(pm_string_t *input, const pm_options_t *options) { pm_parser_t parser; pm_parser_init(&parser, pm_string_source(input), pm_string_length(input), options); pm_node_t *node = pm_parse(&parser); pm_node_destroy(&parser, node); VALUE result = parser.error_list.size == 0 ? Qtrue : Qfalse; pm_parser_free(&parser); return result; } /** * call-seq: * Prism::parse_success?(source, **options) -> Array * * Parse the given string and return true if it parses without errors. For * supported options, see Prism::parse. */ static VALUE parse_success_p(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; string_options(argc, argv, &input, &options); VALUE result = parse_input_success_p(&input, &options); pm_string_free(&input); pm_options_free(&options); return result; } /** * call-seq: * Prism::parse_file_success?(filepath, **options) -> Array * * Parse the given file and return true if it parses without errors. For * supported options, see Prism::parse. */ static VALUE parse_file_success_p(int argc, VALUE *argv, VALUE self) { pm_string_t input; pm_options_t options = { 0 }; file_options(argc, argv, &input, &options); VALUE result = parse_input_success_p(&input, &options); pm_string_free(&input); pm_options_free(&options); return result; } /******************************************************************************/ /* Utility functions exposed to make testing easier */ /******************************************************************************/ /** * call-seq: * Debug::named_captures(source) -> Array * * Returns an array of strings corresponding to the named capture groups in the * given source string. If prism was unable to parse the regular expression, * this function returns nil. */ static VALUE named_captures(VALUE self, VALUE source) { pm_string_list_t string_list = { 0 }; if (!pm_regexp_named_capture_group_names((const uint8_t *) RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, PM_ENCODING_UTF_8_ENTRY)) { pm_string_list_free(&string_list); return Qnil; } VALUE names = rb_ary_new(); for (size_t index = 0; index < string_list.length; index++) { const pm_string_t *string = &string_list.strings[index]; rb_ary_push(names, rb_str_new((const char *) pm_string_source(string), pm_string_length(string))); } pm_string_list_free(&string_list); return names; } /** * call-seq: * Debug::integer_parse(source) -> [Integer, String] * * Parses the given source string and returns the integer it represents, as well * as a decimal string representation. */ static VALUE integer_parse(VALUE self, VALUE source) { const uint8_t *start = (const uint8_t *) RSTRING_PTR(source); size_t length = RSTRING_LEN(source); pm_integer_t integer = { 0 }; pm_integer_parse(&integer, PM_INTEGER_BASE_UNKNOWN, start, start + length); VALUE number = UINT2NUM(integer.head.value); size_t shift = 0; for (pm_integer_word_t *node = integer.head.next; node != NULL; node = node->next) { VALUE receiver = rb_funcall(UINT2NUM(node->value), rb_intern("<<"), 1, ULONG2NUM(++shift * 32)); number = rb_funcall(receiver, rb_intern("|"), 1, number); } if (integer.negative) number = rb_funcall(number, rb_intern("-@"), 0); pm_buffer_t buffer = { 0 }; pm_integer_string(&buffer, &integer); VALUE string = rb_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer)); pm_buffer_free(&buffer); pm_integer_free(&integer); VALUE result = rb_ary_new_capa(2); rb_ary_push(result, number); rb_ary_push(result, string); return result; } /** * call-seq: * Debug::memsize(source) -> { length: xx, memsize: xx, node_count: xx } * * Return a hash of information about the given source string's memory usage. */ static VALUE memsize(VALUE self, VALUE string) { pm_parser_t parser; size_t length = RSTRING_LEN(string); pm_parser_init(&parser, (const uint8_t *) RSTRING_PTR(string), length, NULL); pm_node_t *node = pm_parse(&parser); pm_memsize_t memsize; pm_node_memsize(node, &memsize); pm_node_destroy(&parser, node); pm_parser_free(&parser); VALUE result = rb_hash_new(); rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length)); rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize)); rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count)); return result; } /** * call-seq: * Debug::profile_file(filepath) -> nil * * Parse the file, but do nothing with the result. This is used to profile the * parser for memory and speed. */ static VALUE profile_file(VALUE self, VALUE filepath) { pm_string_t input; const char *checked = check_string(filepath); Check_Type(filepath, T_STRING); if (!pm_string_mapped_init(&input, checked)) { #ifdef _WIN32 int e = rb_w32_map_errno(GetLastError()); #else int e = errno; #endif rb_syserr_fail(e, checked); } pm_options_t options = { 0 }; pm_options_filepath_set(&options, checked); pm_parser_t parser; pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), &options); pm_node_t *node = pm_parse(&parser); pm_node_destroy(&parser, node); pm_parser_free(&parser); pm_options_free(&options); pm_string_free(&input); return Qnil; } /** * call-seq: * Debug::inspect_node(source) -> inspected * * Inspect the AST that represents the given source using the prism pretty print * as opposed to the Ruby implementation. */ static VALUE inspect_node(VALUE self, VALUE source) { pm_string_t input; input_load_string(&input, source); pm_parser_t parser; pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL); pm_node_t *node = pm_parse(&parser); pm_buffer_t buffer = { 0 }; pm_prettyprint(&buffer, &parser, node); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE string = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding); pm_buffer_free(&buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); return string; } /** * call-seq: * Debug::format_errors(source, colorize) -> String * * Format the errors that are found when parsing the given source string. */ static VALUE format_errors(VALUE self, VALUE source, VALUE colorize) { pm_string_t input; input_load_string(&input, source); pm_parser_t parser; pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL); pm_node_t *node = pm_parse(&parser); pm_buffer_t buffer = { 0 }; pm_parser_errors_format(&parser, &buffer, RTEST(colorize)); rb_encoding *encoding = rb_enc_find(parser.encoding->name); VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding); pm_buffer_free(&buffer); pm_node_destroy(&parser, node); pm_parser_free(&parser); pm_string_free(&input); return result; } /** * call-seq: Debug::Encoding.all -> Array[Debug::Encoding] * * Return an array of all of the encodings that prism knows about. */ static VALUE encoding_all(VALUE self) { VALUE encodings = rb_ary_new(); for (size_t index = 0; index < PM_ENCODING_MAXIMUM; index++) { const pm_encoding_t *encoding = &pm_encodings[index]; VALUE encoding_argv[] = { rb_str_new_cstr(encoding->name), encoding->multibyte ? Qtrue : Qfalse }; rb_ary_push(encodings, rb_class_new_instance(2, encoding_argv, rb_cPrismDebugEncoding)); } return encodings; } static const pm_encoding_t * encoding_find(VALUE name) { const uint8_t *source = (const uint8_t *) RSTRING_PTR(name); size_t length = RSTRING_LEN(name); const pm_encoding_t *encoding = pm_encoding_find(source, source + length); if (encoding == NULL) { rb_raise(rb_eArgError, "Unknown encoding: %s", source); } return encoding; } /** * call-seq: Debug::Encoding.width(source) -> Integer * * Returns the width of the first character in the given string if it is valid * in the encoding. If it is not, this function returns 0. */ static VALUE encoding_char_width(VALUE self, VALUE name, VALUE value) { return ULONG2NUM(encoding_find(name)->char_width((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value))); } /** * call-seq: Debug::Encoding.alnum?(source) -> true | false * * Returns true if the first character in the given string is an alphanumeric * character in the encoding. */ static VALUE encoding_alnum_char(VALUE self, VALUE name, VALUE value) { return encoding_find(name)->alnum_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse; } /** * call-seq: Debug::Encoding.alpha?(source) -> true | false * * Returns true if the first character in the given string is an alphabetic * character in the encoding. */ static VALUE encoding_alpha_char(VALUE self, VALUE name, VALUE value) { return encoding_find(name)->alpha_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) > 0 ? Qtrue : Qfalse; } /** * call-seq: Debug::Encoding.upper?(source) -> true | false * * Returns true if the first character in the given string is an uppercase * character in the encoding. */ static VALUE encoding_isupper_char(VALUE self, VALUE name, VALUE value) { return encoding_find(name)->isupper_char((const uint8_t *) RSTRING_PTR(value), RSTRING_LEN(value)) ? Qtrue : Qfalse; } /******************************************************************************/ /* Initialization of the extension */ /******************************************************************************/ /** * The init function that Ruby calls when loading this extension. */ RUBY_FUNC_EXPORTED void Init_prism(void) { // Make sure that the prism library version matches the expected version. // Otherwise something was compiled incorrectly. if (strcmp(pm_version(), EXPECTED_PRISM_VERSION) != 0) { rb_raise( rb_eRuntimeError, "The prism library version (%s) does not match the expected version (%s)", pm_version(), EXPECTED_PRISM_VERSION ); } // Grab up references to all of the constants that we're going to need to // reference throughout this extension. rb_cPrism = rb_define_module("Prism"); rb_cPrismNode = rb_define_class_under(rb_cPrism, "Node", rb_cObject); rb_cPrismSource = rb_define_class_under(rb_cPrism, "Source", rb_cObject); rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject); rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject); rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject); rb_cPrismInlineComment = rb_define_class_under(rb_cPrism, "InlineComment", rb_cPrismComment); rb_cPrismEmbDocComment = rb_define_class_under(rb_cPrism, "EmbDocComment", rb_cPrismComment); rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject); rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject); rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject); rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject); // Intern all of the options that we support so that we don't have to do it // every time we parse. rb_option_id_filepath = rb_intern_const("filepath"); rb_option_id_encoding = rb_intern_const("encoding"); rb_option_id_line = rb_intern_const("line"); rb_option_id_frozen_string_literal = rb_intern_const("frozen_string_literal"); rb_option_id_version = rb_intern_const("version"); rb_option_id_scopes = rb_intern_const("scopes"); rb_option_id_command_line = rb_intern_const("command_line"); /** * The version of the prism library. */ rb_define_const(rb_cPrism, "VERSION", rb_str_new2(EXPECTED_PRISM_VERSION)); // First, the functions that have to do with lexing and parsing. rb_define_singleton_method(rb_cPrism, "dump", dump, -1); rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1); rb_define_singleton_method(rb_cPrism, "lex", lex, -1); rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1); rb_define_singleton_method(rb_cPrism, "parse", parse, -1); rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1); rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1); rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1); rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1); rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1); rb_define_singleton_method(rb_cPrism, "parse_success?", parse_success_p, -1); rb_define_singleton_method(rb_cPrism, "parse_file_success?", parse_file_success_p, -1); // Next, the functions that will be called by the parser to perform various // internal tasks. We expose these to make them easier to test. VALUE rb_cPrismDebug = rb_define_module_under(rb_cPrism, "Debug"); rb_define_singleton_method(rb_cPrismDebug, "named_captures", named_captures, 1); rb_define_singleton_method(rb_cPrismDebug, "integer_parse", integer_parse, 1); rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1); rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1); rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1); rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 2); // Next, define the functions that are exposed through the private // Debug::Encoding class. rb_cPrismDebugEncoding = rb_define_class_under(rb_cPrismDebug, "Encoding", rb_cObject); rb_define_singleton_method(rb_cPrismDebugEncoding, "all", encoding_all, 0); rb_define_singleton_method(rb_cPrismDebugEncoding, "_width", encoding_char_width, 2); rb_define_singleton_method(rb_cPrismDebugEncoding, "_alnum?", encoding_alnum_char, 2); rb_define_singleton_method(rb_cPrismDebugEncoding, "_alpha?", encoding_alpha_char, 2); rb_define_singleton_method(rb_cPrismDebugEncoding, "_upper?", encoding_isupper_char, 2); // Next, initialize the other APIs. Init_prism_api_node(); Init_prism_pack(); }