summaryrefslogtreecommitdiff
path: root/include/ruby/internal/core/rmatch.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal/core/rmatch.h')
-rw-r--r--include/ruby/internal/core/rmatch.h91
1 files changed, 81 insertions, 10 deletions
diff --git a/include/ruby/internal/core/rmatch.h b/include/ruby/internal/core/rmatch.h
index 03ab5e5d82..a528c2999e 100644
--- a/include/ruby/internal/core/rmatch.h
+++ b/include/ruby/internal/core/rmatch.h
@@ -17,18 +17,23 @@
* recursively included from extension libraries written in C++.
* Do not expect for instance `__VA_ARGS__` is always available.
* We assume C99 for ruby itself but we don't assume languages of
- * extension libraries. They could be written in C++98.
+ * extension libraries. They could be written in C++98.
* @brief Defines struct ::RMatch.
*/
#include "ruby/internal/attr/artificial.h"
#include "ruby/internal/attr/pure.h"
-#include "ruby/internal/attr/returns_nonnull.h"
#include "ruby/internal/cast.h"
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/value.h"
#include "ruby/internal/value_type.h"
#include "ruby/assert.h"
+/**
+ * Convenient casting macro.
+ *
+ * @param obj An object, which is in fact an ::RMatch.
+ * @return The passed object casted to ::RMatch.
+ */
#define RMATCH(obj) RBIMPL_CAST((struct RMatch *)(obj))
/** @cond INTERNAL_MACRO */
#define RMATCH_REGS RMATCH_REGS
@@ -37,37 +42,103 @@
struct re_patter_buffer; /* a.k.a. OnigRegexType, defined in onigmo.h */
struct re_registers; /* Also in onigmo.h */
-/* @shyouhei wonders: is anyone actively using this typedef ...? */
+/**
+ * @old{re_pattern_buffer}
+ *
+ * @internal
+ *
+ * @shyouhei wonders: is anyone actively using this typedef ...?
+ */
typedef struct re_pattern_buffer Regexp;
+/**
+ * Represents the region of a capture group. This is basically for caching
+ * purpose. re_registers have similar concepts (`beg` and `end`) but they are
+ * in `ptrdiff_t*`. In order for us to implement `MatchData#offset` that info
+ * has to be converted to offset integers. This is the struct to hold such
+ * things.
+ *
+ * @internal
+ *
+ * But why on earth it has to be visible from extension libraries?
+ */
struct rmatch_offset {
- long beg;
- long end;
+ long beg; /**< Beginning of a group. */
+ long end; /**< End of a group. */
};
-struct rmatch {
+/** Represents a match. */
+struct rb_matchext_struct {
+ /**
+ * "Registers" of a match. This is a quasi-opaque struct that holds
+ * execution result of a match. Roughly resembles `&~`.
+ */
struct re_registers regs;
+ /** Capture group offsets, in C array. */
struct rmatch_offset *char_offset;
+
+ /** Number of ::rmatch_offset that ::rmatch::char_offset holds. */
int char_offset_num_allocated;
};
+typedef struct rb_matchext_struct rb_matchext_t;
+
+/**
+ * Regular expression execution context. When a regular expression "matches"
+ * to a string, it generates capture groups etc. This struct holds that info.
+ * Visible from Ruby as an instance of `MatchData`.
+ *
+ * @note There is no way for extension libraries to manually generate this
+ * struct except by actually exercising the match operation of a regular
+ * expression.
+ */
struct RMatch {
+
+ /** Basic part, including flags and class. */
struct RBasic basic;
+
+ /**
+ * The target string that the match was made against.
+ */
VALUE str;
- struct rmatch *rmatch;
+
+ /**
+ * The expression of this match.
+ */
VALUE regexp; /* RRegexp */
};
+#define RMATCH_EXT(m) ((rb_matchext_t *)((char *)(m) + sizeof(struct RMatch)))
+
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
-RBIMPL_ATTR_RETURNS_NONNULL()
RBIMPL_ATTR_ARTIFICIAL()
+/**
+ * Queries the raw ::re_registers.
+ *
+ * @param[in] match A match object
+ * @pre `match` must be of ::RMatch.
+ * @return Its execution result.
+ * @note Good. So you are aware of the fact that it could return NULL.
+ * Yes. It actually does. This is a really bizarre thing. The
+ * situation is about `String#gsub` and its family. They take
+ * strings as arguments, like `"foo".sub("bar", "baz")`. On such
+ * situations, in order to optimise memory allocations, these
+ * methods do not involve regular expressions at all. They just
+ * sequentially scan the receiver. Okay. The story begins here.
+ * Even when they do not kick our regexp engine, there must be
+ * backref objects e.g. `$&`. But how? You know what? Ruby fakes
+ * them. It allocates an empty ::RMatch and behaves as if there
+ * were execution contexts. In reality there weren't. No
+ * ::re_registers are allocated then. There is no way for this
+ * function but to return NULL for those fake ::RMatch. This is
+ * the reason for the nullability of this function.
+ */
static inline struct re_registers *
RMATCH_REGS(VALUE match)
{
RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH);
- RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL);
- return &RMATCH(match)->rmatch->regs;
+ return &RMATCH_EXT(match)->regs;
}
#endif /* RBIMPL_RMATCH_H */