diff options
author | yui-knk <spiketeika@gmail.com> | 2023-05-28 20:00:20 +0900 |
---|---|---|
committer | Yuichiro Kaneko <spiketeika@gmail.com> | 2023-06-12 18:23:48 +0900 |
commit | b481b673d753339204290d7582dbb91a6e14447a (patch) | |
tree | efb6e1149f38562c047b42146307578b74d5349a /internal | |
parent | b308f7cfe4dd17ca34ac614ce09ea8bedbb134ea (diff) |
[Feature #19719] Universal Parser
Introduce Universal Parser mode for the parser.
This commit includes these changes:
* Introduce `UNIVERSAL_PARSER` macro. All of CRuby related functions
are passed via `struct rb_parser_config_struct` when this macro is enabled.
* Add CI task with 'cppflags=-DUNIVERSAL_PARSER' for ubuntu.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7927
Diffstat (limited to 'internal')
-rw-r--r-- | internal/gc.h | 12 | ||||
-rw-r--r-- | internal/parse.h | 123 | ||||
-rw-r--r-- | internal/ruby_parser.h | 68 |
3 files changed, 193 insertions, 10 deletions
diff --git a/internal/gc.h b/internal/gc.h index d19b09f6fc..e345f20cb6 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -302,6 +302,12 @@ ruby_sized_xfree_inlined(void *ptr, size_t size) # define SIZED_REALLOC_N(x, y, z, w) REALLOC_N(x, y, z) +static inline void * +ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t old_count) +{ + return ruby_xrealloc2(ptr, new_count, element_size); +} + #else static inline void * @@ -325,6 +331,12 @@ ruby_sized_xfree_inlined(void *ptr, size_t size) # define SIZED_REALLOC_N(v, T, m, n) \ ((v) = (T *)ruby_sized_xrealloc2((void *)(v), (m), sizeof(T), (n))) +static inline void * +ruby_sized_realloc_n(void *ptr, size_t new_count, size_t element_size, size_t old_count) +{ + return ruby_sized_xrealloc2(ptr, new_count, element_size, old_count); +} + #endif /* HAVE_MALLOC_USABLE_SIZE */ #define ruby_sized_xrealloc ruby_sized_xrealloc_inlined diff --git a/internal/parse.h b/internal/parse.h index f242c384ad..9e257cf39d 100644 --- a/internal/parse.h +++ b/internal/parse.h @@ -8,18 +8,121 @@ * file COPYING are met. Consult the file for details. * @brief Internal header for the parser. */ -#include "ruby/ruby.h" /* for VALUE */ +#include <limits.h> +#include "rubyparser.h" +#include "internal/static_assert.h" + +#ifdef UNIVERSAL_PARSER +#define rb_encoding void +#endif + struct rb_iseq_struct; /* in vm_core.h */ +#define STRTERM_HEREDOC IMEMO_FL_USER0 + +/* structs for managing terminator of string literal and heredocment */ +typedef struct rb_strterm_literal_struct { + union { + VALUE dummy; + long nest; + } u0; + union { + VALUE dummy; + long func; /* STR_FUNC_* (e.g., STR_FUNC_ESCAPE and STR_FUNC_EXPAND) */ + } u1; + union { + VALUE dummy; + long paren; /* '(' of `%q(...)` */ + } u2; + union { + VALUE dummy; + long term; /* ')' of `%q(...)` */ + } u3; +} rb_strterm_literal_t; + +#define HERETERM_LENGTH_BITS ((SIZEOF_VALUE - 1) * CHAR_BIT - 1) + +typedef struct rb_strterm_heredoc_struct { + VALUE lastline; /* the string of line that contains `<<"END"` */ + long offset; /* the column of END in `<<"END"` */ + int sourceline; /* lineno of the line that contains `<<"END"` */ + unsigned length /* the length of END in `<<"END"` */ +#if HERETERM_LENGTH_BITS < SIZEOF_INT * CHAR_BIT + : HERETERM_LENGTH_BITS +# define HERETERM_LENGTH_MAX ((1U << HERETERM_LENGTH_BITS) - 1) +#else +# define HERETERM_LENGTH_MAX UINT_MAX +#endif + ; +#if HERETERM_LENGTH_BITS < SIZEOF_INT * CHAR_BIT + unsigned quote: 1; + unsigned func: 8; +#else + uint8_t quote; + uint8_t func; +#endif +} rb_strterm_heredoc_t; +STATIC_ASSERT(rb_strterm_heredoc_t, sizeof(rb_strterm_heredoc_t) <= 4 * SIZEOF_VALUE); + +typedef struct rb_strterm_struct { + VALUE flags; + union { + rb_strterm_literal_t literal; + rb_strterm_heredoc_t heredoc; + } u; +} rb_strterm_t; + /* parse.y */ -VALUE rb_parser_set_yydebug(VALUE, VALUE); -void *rb_parser_load_file(VALUE parser, VALUE name); -void rb_parser_keep_script_lines(VALUE vparser); -void rb_parser_error_tolerant(VALUE vparser); -void rb_parser_keep_tokens(VALUE vparser); - -RUBY_SYMBOL_EXPORT_BEGIN -VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int); -RUBY_SYMBOL_EXPORT_END +void rb_ruby_parser_mark(void *ptr); +size_t rb_ruby_parser_memsize(const void *ptr); + +void rb_ruby_parser_set_options(rb_parser_t *p, int print, int loop, int chomp, int split); +rb_parser_t *rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, int main); +void rb_ruby_parser_keep_script_lines(rb_parser_t *p); +void rb_ruby_parser_error_tolerant(rb_parser_t *p); +rb_ast_t* rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start); +void rb_ruby_parser_keep_tokens(rb_parser_t *p); +rb_ast_t* rb_ruby_parser_compile_generic(rb_parser_t *p, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start); +rb_ast_t* rb_ruby_parser_compile_string_path(rb_parser_t *p, VALUE f, VALUE s, int line); +VALUE rb_ruby_parser_encoding(rb_parser_t *p); +int rb_ruby_parser_end_seen_p(rb_parser_t *p); +int rb_ruby_parser_set_yydebug(rb_parser_t *p, int flag); + +int rb_reg_named_capture_assign_iter_impl(struct parser_params *p, const char *s, long len, rb_encoding *enc, NODE **succ_block, const rb_code_location_t *loc); + +#ifdef RIPPER +void ripper_parser_mark(void *ptr); +void ripper_parser_free(void *ptr); +size_t ripper_parser_memsize(const void *ptr); +void ripper_error(struct parser_params *p); +VALUE ripper_value(struct parser_params *p); +int rb_ruby_parser_get_yydebug(rb_parser_t *p); +void rb_ruby_parser_set_value(rb_parser_t *p, VALUE value); +int rb_ruby_parser_error_p(rb_parser_t *p); +VALUE rb_ruby_parser_debug_output(rb_parser_t *p); +void rb_ruby_parser_set_debug_output(rb_parser_t *p, VALUE output); +VALUE rb_ruby_parser_parsing_thread(rb_parser_t *p); +void rb_ruby_parser_set_parsing_thread(rb_parser_t *p, VALUE parsing_thread); +void rb_ruby_parser_ripper_initialize(rb_parser_t *p, VALUE (*gets)(struct parser_params*,VALUE), VALUE input, VALUE sourcefile_string, const char *sourcefile, int sourceline); +VALUE rb_ruby_parser_result(rb_parser_t *p); +rb_encoding *rb_ruby_parser_enc(rb_parser_t *p); +VALUE rb_ruby_parser_ruby_sourcefile_string(rb_parser_t *p); +int rb_ruby_parser_ruby_sourceline(rb_parser_t *p); +int rb_ruby_parser_lex_state(rb_parser_t *p); +void rb_ruby_ripper_parse0(rb_parser_t *p); +int rb_ruby_ripper_dedent_string(rb_parser_t *p, VALUE string, int width); +VALUE rb_ruby_ripper_lex_get_str(rb_parser_t *p, VALUE s); +int rb_ruby_ripper_initialized_p(rb_parser_t *p); +void rb_ruby_ripper_parser_initialize(rb_parser_t *p); +long rb_ruby_ripper_column(rb_parser_t *p); +long rb_ruby_ripper_token_len(rb_parser_t *p); +VALUE rb_ruby_ripper_lex_lastline(rb_parser_t *p); +VALUE rb_ruby_ripper_lex_state_name(struct parser_params *p, int state); +struct parser_params *rb_ruby_ripper_parser_allocate(void); +#endif + +#ifdef UNIVERSAL_PARSER +#undef rb_encoding +#endif #endif /* INTERNAL_PARSE_H */ diff --git a/internal/ruby_parser.h b/internal/ruby_parser.h new file mode 100644 index 0000000000..97ac49dd3b --- /dev/null +++ b/internal/ruby_parser.h @@ -0,0 +1,68 @@ +#ifndef INTERNAL_RUBY_PARSE_H +#define INTERNAL_RUBY_PARSE_H + +#include "rubyparser.h" +#include "internal.h" +#include "vm.h" + +RUBY_SYMBOL_EXPORT_BEGIN +#ifdef UNIVERSAL_PARSER +void rb_parser_config_initialize(rb_parser_config_t *config); +#endif +VALUE rb_parser_set_context(VALUE, const struct rb_iseq_struct *, int); +RUBY_SYMBOL_EXPORT_END + +VALUE rb_parser_new(void); +VALUE rb_parser_end_seen_p(VALUE); +VALUE rb_parser_encoding(VALUE); +VALUE rb_parser_set_yydebug(VALUE, VALUE); +void rb_parser_set_options(VALUE, int, int, int, int); +void *rb_parser_load_file(VALUE parser, VALUE name); +void rb_parser_keep_script_lines(VALUE vparser); +void rb_parser_error_tolerant(VALUE vparser); +void rb_parser_keep_tokens(VALUE vparser); + +rb_ast_t *rb_parser_compile_string(VALUE, const char*, VALUE, int); +rb_ast_t *rb_parser_compile_string_path(VALUE vparser, VALUE fname, VALUE src, int line); +rb_ast_t *rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE input, int line); +rb_ast_t *rb_parser_compile_generic(VALUE vparser, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int line); + +enum lex_state_bits { + EXPR_BEG_bit, /* ignore newline, +/- is a sign. */ + EXPR_END_bit, /* newline significant, +/- is an operator. */ + EXPR_ENDARG_bit, /* ditto, and unbound braces. */ + EXPR_ENDFN_bit, /* ditto, and unbound braces. */ + EXPR_ARG_bit, /* newline significant, +/- is an operator. */ + EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */ + EXPR_MID_bit, /* newline significant, +/- is an operator. */ + EXPR_FNAME_bit, /* ignore newline, no reserved words. */ + EXPR_DOT_bit, /* right after `.', `&.' or `::', no reserved words. */ + EXPR_CLASS_bit, /* immediate after `class', no here document. */ + EXPR_LABEL_bit, /* flag bit, label is allowed. */ + EXPR_LABELED_bit, /* flag bit, just after a label. */ + EXPR_FITEM_bit, /* symbol literal as FNAME. */ + EXPR_MAX_STATE +}; +/* examine combinations */ +enum lex_state_e { +#define DEF_EXPR(n) EXPR_##n = (1 << EXPR_##n##_bit) + DEF_EXPR(BEG), + DEF_EXPR(END), + DEF_EXPR(ENDARG), + DEF_EXPR(ENDFN), + DEF_EXPR(ARG), + DEF_EXPR(CMDARG), + DEF_EXPR(MID), + DEF_EXPR(FNAME), + DEF_EXPR(DOT), + DEF_EXPR(CLASS), + DEF_EXPR(LABEL), + DEF_EXPR(LABELED), + DEF_EXPR(FITEM), + EXPR_VALUE = EXPR_BEG, + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS), + EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG), + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN), + EXPR_NONE = 0 +}; +#endif /* INTERNAL_RUBY_PARSE_H */ |