include/ruby/re.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

#ifndef RUBY_RE_H                                    /*-*-C++-*-vi:se ft=cpp:*/
#define RUBY_RE_H 1
/**
 * @file
 * @author     $Author$
 * @date       Thu Sep 30 14:18:32 JST 1993
 * @copyright  Copyright (C) 1993-2007 Yukihiro Matsumoto
 * @copyright  This  file  is   a  part  of  the   programming  language  Ruby.
 *             Permission  is hereby  granted,  to  either redistribute  and/or
 *             modify this file, provided that  the conditions mentioned in the
 *             file COPYING are met.  Consult the file for details.
 */
#include "ruby/internal/config.h"

#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif

#include <stdio.h>

#include "ruby/onigmo.h"
#include "ruby/regex.h"
#include "ruby/internal/core/rmatch.h"
#include "ruby/internal/dllexport.h"

struct re_registers;            /* Defined in onigmo.h */

RBIMPL_SYMBOL_EXPORT_BEGIN()

/**
 * Creates a  new instance of  ::rb_cRegexp.  It can  be seen as  a specialised
 * version of rb_reg_new_str() where it does not take options.
 *
 * @param[in]  str  Source code in String.
 * @return     Allocated new instance of ::rb_cRegexp.
 */
VALUE rb_reg_regcomp(VALUE str);

/**
 * Runs  the  passed  regular  expression   over  the  passed  string.   Unlike
 * rb_reg_search()  this function  also  takes position  and  direction of  the
 * search, which make  it possible for this  function to run from  in middle of
 * the string.
 *
 * @param[in]  re               Regular expression to execute.
 * @param[in]  str              Target string to search.
 * @param[in]  pos              Offset in `str` to start searching, in bytes.
 * @param[in]  dir              `pos`' direction; 0  means left-to-right, 1 for
 *                              the opposite.
 * @exception  rb_eArgError     `re` is broken.
 * @exception  rb_eRegexpError  `re` is malformed.
 * @retval     -1               Match failed.
 * @retval     otherwise        Offset of first such byte where match happened.
 * @post       `Regexp.last_match` is updated.
 * @post       `$&`, `$~`, etc., are updated.
 *
 * @internal
 *
 * Distinction  between raising  ::rb_eArgError  and  ::rb_eRegexpError is  not
 * obvious, at least to @shyouhei.
 */
long rb_reg_search(VALUE re, VALUE str, long pos, int dir);

/**
 * Substitution.  This  is basically the implementation  of `String#sub`.  Also
 * `String#gsub` repeatedly calls this function.
 *
 * @param[in]  repl  Replacement string, e.g. `"\\1\\2"`
 * @param[in]  src   Source string, to be replaced.
 * @param[in]  regs  Matched data generated by applying `rexp` to `src`.
 * @param[in]  rexp  Regular expression.
 * @return     A substituted string.
 *
 * @internal
 *
 * This  function does  not  check for  encoding compatibility.   `String#sub!`
 * etc. employ their own checker.
 *
 * `regs` should  have been `const  struct re_registers  *` because it  is read
 * only.  Kept as-is for compatibility.
 */
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp);

/**
 * Tell us if this is a wrong idea,  but it seems this function has no usage at
 * all.  Just remains here for theoretical backwards compatibility.
 *
 * @param[in]  re               Regular expression to execute.
 * @param[in]  str              Target string to search.
 * @param[in]  pos              Offset in `str` to start searching, in bytes.
 * @param[in]  dir              `pos`' direction; 0  means left-to-right, 1 for
 *                              the opposite.
 * @return     Adjusted nearest  offset to  `pos` inside of  `str`, where  is a
 *             character boundary.
 *
 */
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir);

/**
 * Escapes  any  characters  that  would  have special  meaning  in  a  regular
 * expression.
 *
 * @param[in]  str  Target string to escape.
 * @return     A copy of `str` whose contents are escaped.
 */
VALUE rb_reg_quote(VALUE str);

/**
 * Exercises  various  checks  and  preprocesses  so  that  the  given  regular
 * expression can be applied to the given string.  The preprocess here includes
 * (but not limited to) for instance encoding conversion.
 *
 * @param[in]  re                  Target regular expression.
 * @param[in]  str                 What `re` is about to run on.
 * @exception  rb_eArgError        `re` does not fit for `str`.
 * @exception  rb_eEncCompatError  `re` and `str` are incompatible.
 * @exception  rb_eRegexpError     `re` is malformed.
 * @return     A preprocessesed pattern buffer ready to be applied to `str`.
 * @note       The return value is manages by our GC.  Don't free.
 *
 * @internal
 *
 * The  return  type,  `regex_t  *`, is  defined  in  `<ruby/onigmo.h>`,  _and_
 * _conflicts_ with POSIX's  `<regex.h>`.  We can no longer  save the situation
 * at this point.  Just don't mix the two.
 */
regex_t *rb_reg_prepare_re(VALUE re, VALUE str);

/**
 * Runs a regular expression match using function `match`. Performs preparation,
 * error handling, and memory cleanup.
 *
 * @param[in]  re                  Target regular expression.
 * @param[in]  str                 What `re` is about to run on.
 * @param[in]  match               The function to run to match `str` against `re`.
 * @param[in]  args                Pointer to arguments to pass into `match`.
 * @param[out] regs                Registers on a successful match.
 * @exception  rb_eArgError        `re` does not fit for `str`.
 * @exception  rb_eEncCompatError  `re` and `str` are incompatible.
 * @exception  rb_eRegexpError     `re` is malformed.
 * @return     Match position on a successful match, `ONIG_MISMATCH` otherwise.
 *
 * @internal
 *
 * The type `regex_t  *` is  defined  in  `<ruby/onigmo.h>`,  _and_
 * _conflicts_ with POSIX's  `<regex.h>`.  We can no longer  save the situation
 * at this point.  Just don't mix the two.
 */
OnigPosition rb_reg_onig_match(VALUE re, VALUE str,
                               OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
                               void *args, struct re_registers *regs);

/**
 * Duplicates a match data.  This  is roughly the same as `onig_region_copy()`,
 * except it tries to GC when there is not enough memory.
 *
 * @param[out]  dst             Target registers to fill.
 * @param[in]   src             Source registers to duplicate.
 * @exception   rb_eNoMemError  Not enough memory.
 * @retval      0               Successful
 * @retval      ONIGERR_MEMORY  Not enough memory, even after GC (unlikely).
 * @post        `dst` has identical contents to `src`.
 *
 * @internal
 *
 * It seems this function is here for `ext/strscan` and nothing else.
 */
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src);

RBIMPL_SYMBOL_EXPORT_END()

#endif /* RUBY_RE_H */