summaryrefslogtreecommitdiff
path: root/include/ruby/internal/core/rmatch.h
blob: 2d2fd897f5d22de2b73d948d8a426001712d728c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#ifndef RBIMPL_RMATCH_H                              /*-*-C++-*-vi:se ft=cpp:*/
#define RBIMPL_RMATCH_H
/**
 * @file
 * @author     Ruby developers <ruby-core@ruby-lang.org>
 * @copyright  This  file  is   a  part  of  the   programming  language  Ruby.
 *             Permission  is hereby  granted,  to  either redistribute  and/or
 *             modify this file, provided that  the conditions mentioned in the
 *             file COPYING are met.  Consult the file for details.
 * @warning    Symbols   prefixed  with   either  `RBIMPL`   or  `rbimpl`   are
 *             implementation details.   Don't take  them as canon.  They could
 *             rapidly appear then vanish.  The name (path) of this header file
 *             is also an  implementation detail.  Do not expect  it to persist
 *             at the place it is now.  Developers are free to move it anywhere
 *             anytime at will.
 * @note       To  ruby-core:  remember  that   this  header  can  be  possibly
 *             recursively included  from extension  libraries written  in C++.
 *             Do not  expect for  instance `__VA_ARGS__` is  always available.
 *             We assume C99  for ruby itself but we don't  assume languages of
 *             extension libraries.  They could be written in C++98.
 * @brief      Defines struct ::RMatch.
 */
#include "ruby/internal/attr/artificial.h"
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/cast.h"
#include "ruby/internal/core/rbasic.h"
#include "ruby/internal/value.h"
#include "ruby/internal/value_type.h"
#include "ruby/assert.h"

/**
 * Convenient casting macro.
 *
 * @param   obj  An object, which is in fact an ::RMatch.
 * @return  The passed object casted to ::RMatch.
 */
#define RMATCH(obj) RBIMPL_CAST((struct RMatch *)(obj))
/** @cond INTERNAL_MACRO */
#define RMATCH_REGS RMATCH_REGS
/** @endcond */

struct re_patter_buffer; /* a.k.a. OnigRegexType, defined in onigmo.h */
struct re_registers;     /* Also in onigmo.h */

/**
 * @old{re_pattern_buffer}
 *
 * @internal
 *
 * @shyouhei wonders: is anyone actively using this typedef ...?
 */
typedef struct re_pattern_buffer Regexp;

/**
 * Represents the  region of a  capture group.   This is basically  for caching
 * purpose.  re_registers have similar concepts  (`beg` and `end`) but they are
 * in `ptrdiff_t*`.  In order for  us to implement `MatchData#offset` that info
 * has to  be converted to  offset integers.  This is  the struct to  hold such
 * things.
 *
 * @internal
 *
 * But why on earth it has to be visible from extension libraries?
 */
struct rmatch_offset {
    long beg; /**< Beginning of a group. */
    long end; /**< End of a group. */
};

/** Represents a match. */
struct rmatch {
    /**
     * "Registers"  of a  match.   This  is a  quasi-opaque  struct that  holds
     * execution result of a match.  Roughly resembles `&~`.
     */
    struct re_registers regs;

    /** Capture group offsets, in C array. */
    struct rmatch_offset *char_offset;

    /** Number of ::rmatch_offset that ::rmatch::char_offset holds. */
    int char_offset_num_allocated;
};

/**
 * Regular expression  execution context.  When a  regular expression "matches"
 * to a string, it generates capture  groups etc.  This struct holds that info.
 * Visible from Ruby as an instance of `MatchData`.
 *
 * @note  There is  no way  for extension libraries  to manually  generate this
 *        struct except by actually exercising the match operation of a regular
 *        expression.
 */
struct RMatch {

    /** Basic part, including flags and class. */
    struct RBasic basic;

    /**
     * The target string that the match was made against.
     */
    VALUE str;

    /**
     * The result of this match.
     */
    struct rmatch *rmatch;

    /**
     * The expression of this match.
     */
    VALUE regexp;  /* RRegexp */
};

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
RBIMPL_ATTR_ARTIFICIAL()
/**
 * Queries the raw ::re_registers.
 *
 * @param[in]  match  A match object
 * @pre        `match` must be of ::RMatch.
 * @return     Its execution result.
 * @note       Good.  So you  are aware of the fact that  it could return NULL.
 *             Yes.  It  actually does.  This  is a really bizarre  thing.  The
 *             situation  is about  `String#gsub`  and its  family.  They  take
 *             strings as  arguments, like `"foo".sub("bar", "baz")`.   On such
 *             situations,  in  order  to optimise  memory  allocations,  these
 *             methods do  not involve regular  expressions at all.   They just
 *             sequentially scan  the receiver.  Okay.  The  story begins here.
 *             Even when  they do  not kick  our regexp  engine, there  must be
 *             backref objects e.g. `$&`.  But how?  You know what?  Ruby fakes
 *             them.  It  allocates an empty  ::RMatch and behaves as  if there
 *             were  execution   contexts.   In  reality  there   weren't.   No
 *             ::re_registers are  allocated then.   There is  no way  for this
 *             function but  to return NULL  for those fake ::RMatch.   This is
 *             the reason for the nullability of this function.
 */
static inline struct re_registers *
RMATCH_REGS(VALUE match)
{
    RBIMPL_ASSERT_TYPE(match, RUBY_T_MATCH);
    RBIMPL_ASSERT_OR_ASSUME(RMATCH(match)->rmatch != NULL);
    return &RMATCH(match)->rmatch->regs;
}

#endif /* RBIMPL_RMATCH_H */