summaryrefslogtreecommitdiff
path: root/include/ruby/re.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/re.h')
-rw-r--r--include/ruby/re.h154
1 files changed, 146 insertions, 8 deletions
diff --git a/include/ruby/re.h b/include/ruby/re.h
index ec0f425db0..f86d6f26cf 100644
--- a/include/ruby/re.h
+++ b/include/ruby/re.h
@@ -11,23 +11,161 @@
* file COPYING are met. Consult the file for details.
*/
#include "ruby/internal/config.h"
-#include <sys/types.h>
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
#include <stdio.h>
+#include "ruby/onigmo.h"
#include "ruby/regex.h"
#include "ruby/internal/core/rmatch.h"
#include "ruby/internal/dllexport.h"
+struct re_registers; /* Defined in onigmo.h */
+
RBIMPL_SYMBOL_EXPORT_BEGIN()
-VALUE rb_reg_regcomp(VALUE);
-long rb_reg_search(VALUE, VALUE, long, int);
-VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE);
-long rb_reg_adjust_startpos(VALUE, VALUE, long, int);
-void rb_match_busy(VALUE);
-VALUE rb_reg_quote(VALUE);
+/**
+ * Creates a new instance of ::rb_cRegexp. It can be seen as a specialised
+ * version of rb_reg_new_str() where it does not take options.
+ *
+ * @param[in] str Source code in String.
+ * @return Allocated new instance of ::rb_cRegexp.
+ */
+VALUE rb_reg_regcomp(VALUE str);
+
+/**
+ * Runs the passed regular expression over the passed string. Unlike
+ * rb_reg_search() this function also takes position and direction of the
+ * search, which make it possible for this function to run from in middle of
+ * the string.
+ *
+ * @param[in] re Regular expression to execute.
+ * @param[in] str Target string to search.
+ * @param[in] pos Offset in `str` to start searching, in bytes.
+ * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for
+ * the opposite.
+ * @exception rb_eArgError `re` is broken.
+ * @exception rb_eRegexpError `re` is malformed.
+ * @retval -1 Match failed.
+ * @retval otherwise Offset of first such byte where match happened.
+ * @post `Regexp.last_match` is updated.
+ * @post `$&`, `$~`, etc., are updated.
+ *
+ * @internal
+ *
+ * Distinction between raising ::rb_eArgError and ::rb_eRegexpError is not
+ * obvious, at least to @shyouhei.
+ */
+long rb_reg_search(VALUE re, VALUE str, long pos, int dir);
+
+/**
+ * Substitution. This is basically the implementation of `String#sub`. Also
+ * `String#gsub` repeatedly calls this function.
+ *
+ * @param[in] repl Replacement string, e.g. `"\\1\\2"`
+ * @param[in] src Source string, to be replaced.
+ * @param[in] regs Matched data generated by applying `rexp` to `src`.
+ * @param[in] rexp Regular expression.
+ * @return A substituted string.
+ *
+ * @internal
+ *
+ * This function does not check for encoding compatibility. `String#sub!`
+ * etc. employ their own checker.
+ *
+ * `regs` should have been `const struct re_registers *` because it is read
+ * only. Kept as-is for compatibility.
+ */
+VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp);
+
+/**
+ * Tell us if this is a wrong idea, but it seems this function has no usage at
+ * all. Just remains here for theoretical backwards compatibility.
+ *
+ * @param[in] re Regular expression to execute.
+ * @param[in] str Target string to search.
+ * @param[in] pos Offset in `str` to start searching, in bytes.
+ * @param[in] dir `pos`' direction; 0 means left-to-right, 1 for
+ * the opposite.
+ * @return Adjusted nearest offset to `pos` inside of `str`, where is a
+ * character boundary.
+ *
+ */
+long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);
+
+/**
+ * Escapes any characters that would have special meaning in a regular
+ * expression.
+ *
+ * @param[in] str Target string to escape.
+ * @return A copy of `str` whose contents are escaped.
+ */
+VALUE rb_reg_quote(VALUE str);
+
+/**
+ * Exercises various checks and preprocesses so that the given regular
+ * expression can be applied to the given string. The preprocess here includes
+ * (but not limited to) for instance encoding conversion.
+ *
+ * @param[in] re Target regular expression.
+ * @param[in] str What `re` is about to run on.
+ * @exception rb_eArgError `re` does not fit for `str`.
+ * @exception rb_eEncCompatError `re` and `str` are incompatible.
+ * @exception rb_eRegexpError `re` is malformed.
+ * @return A preprocessesed pattern buffer ready to be applied to `str`.
+ * @note The return value is manages by our GC. Don't free.
+ *
+ * @internal
+ *
+ * The return type, `regex_t *`, is defined in `<ruby/onigmo.h>`, _and_
+ * _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
+ * at this point. Just don't mix the two.
+ */
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
-int rb_reg_region_copy(struct re_registers *, const struct re_registers *);
+
+/**
+ * Runs a regular expression match using function `match`. Performs preparation,
+ * error handling, and memory cleanup.
+ *
+ * @param[in] re Target regular expression.
+ * @param[in] str What `re` is about to run on.
+ * @param[in] match The function to run to match `str` against `re`.
+ * @param[in] args Pointer to arguments to pass into `match`.
+ * @param[out] regs Registers on a successful match.
+ * @exception rb_eArgError `re` does not fit for `str`.
+ * @exception rb_eEncCompatError `re` and `str` are incompatible.
+ * @exception rb_eRegexpError `re` is malformed.
+ * @return Match position on a successful match, `ONIG_MISMATCH` otherwise.
+ *
+ * @internal
+ *
+ * The type `regex_t *` is defined in `<ruby/onigmo.h>`, _and_
+ * _conflicts_ with POSIX's `<regex.h>`. We can no longer save the situation
+ * at this point. Just don't mix the two.
+ */
+OnigPosition rb_reg_onig_match(VALUE re, VALUE str,
+ OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
+ void *args, struct re_registers *regs);
+
+/**
+ * Duplicates a match data. This is roughly the same as `onig_region_copy()`,
+ * except it tries to GC when there is not enough memory.
+ *
+ * @param[out] dst Target registers to fill.
+ * @param[in] src Source registers to duplicate.
+ * @exception rb_eNoMemError Not enough memory.
+ * @retval 0 Successful
+ * @retval ONIGERR_MEMORY Not enough memory, even after GC (unlikely).
+ * @post `dst` has identical contents to `src`.
+ *
+ * @internal
+ *
+ * It seems this function is here for `ext/strscan` and nothing else.
+ */
+int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src);
RBIMPL_SYMBOL_EXPORT_END()