summaryrefslogtreecommitdiff
path: root/include/ruby/internal/encoding/coderange.h
blob: 7a81208c9e23f520fc18b329b88e143908080f3d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H           /*-*-C++-*-vi:se ft=cpp:*/
#define RUBY_INTERNAL_ENCODING_CODERANGE_H
/**
 * @file
 * @author     Ruby developers <ruby-core@ruby-lang.org>
 * @copyright  This  file  is   a  part  of  the   programming  language  Ruby.
 *             Permission  is hereby  granted,  to  either redistribute  and/or
 *             modify this file, provided that  the conditions mentioned in the
 *             file COPYING are met.  Consult the file for details.
 * @warning    Symbols   prefixed  with   either  `RBIMPL`   or  `rbimpl`   are
 *             implementation details.   Don't take  them as canon.  They could
 *             rapidly appear then vanish.  The name (path) of this header file
 *             is also an  implementation detail.  Do not expect  it to persist
 *             at the place it is now.  Developers are free to move it anywhere
 *             anytime at will.
 * @note       To  ruby-core:  remember  that   this  header  can  be  possibly
 *             recursively included  from extension  libraries written  in C++.
 *             Do not  expect for  instance `__VA_ARGS__` is  always available.
 *             We assume C99  for ruby itself but we don't  assume languages of
 *             extension libraries.  They could be written in C++98.
 * @brief      Routines for code ranges.
 */

#include "ruby/internal/attr/const.h"
#include "ruby/internal/attr/pure.h"
#include "ruby/internal/dllexport.h"
#include "ruby/internal/fl_type.h"
#include "ruby/internal/value.h"

RBIMPL_SYMBOL_EXPORT_BEGIN()

/** What rb_enc_str_coderange() returns. */
enum ruby_coderange_type {

    /** The object's coderange is unclear yet. */
    RUBY_ENC_CODERANGE_UNKNOWN  = 0,

    /** The object holds 0 to 127 inclusive and nothing else. */
    RUBY_ENC_CODERANGE_7BIT     = ((int)RUBY_FL_USER8),

    /** The object's encoding and contents are consistent each other */
    RUBY_ENC_CODERANGE_VALID    = ((int)RUBY_FL_USER9),

    /** The object holds invalid/malformed/broken character(s). */
    RUBY_ENC_CODERANGE_BROKEN   = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)),

    /** Where the coderange resides. */
    RUBY_ENC_CODERANGE_MASK     = (RUBY_ENC_CODERANGE_7BIT|
                                   RUBY_ENC_CODERANGE_VALID|
                                   RUBY_ENC_CODERANGE_BROKEN)
};

RBIMPL_ATTR_CONST()
/**
 * @private
 *
 * This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P.  People don't
 * use it directly.
 *
 * @param[in]  cr  An enum ::ruby_coderange_type.
 * @retval     1   It is.
 * @retval     0   It isn't.
 */
static inline int
rb_enc_coderange_clean_p(int cr)
{
    return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
}

RBIMPL_ATTR_CONST()
/**
 * Queries if  a code range  is "clean".  "Clean" in  this context means  it is
 * known and valid.
 *
 * @param[in]  cr  An enum ::ruby_coderange_type.
 * @retval     1   It is.
 * @retval     0   It isn't.
 */
static inline bool
RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
{
    return rb_enc_coderange_clean_p(cr);
}

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
/**
 * Queries the  (inline) code range of  the passed object.  The  object must be
 * capable  of   having  inline   encoding.   Using   this  macro   needs  deep
 * understanding of bit level object binary layout.
 *
 * @param[in]  obj  Target object.
 * @return     An enum ::ruby_coderange_type.
 */
static inline enum ruby_coderange_type
RB_ENC_CODERANGE(VALUE obj)
{
    VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);

    return RBIMPL_CAST((enum ruby_coderange_type)ret);
}

RBIMPL_ATTR_PURE_UNLESS_DEBUG()
/**
 * Queries   the    (inline)   code   range    of   the   passed    object   is
 * ::RUBY_ENC_CODERANGE_7BIT.   The object  must  be capable  of having  inline
 * encoding.  Using  this macro  needs deep understanding  of bit  level object
 * binary layout.
 *
 * @param[in]  obj  Target object.
 * @retval     1    It is ascii only.
 * @retval     0    Otherwise (including cases when the range is not known).
 */
static inline bool
RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
{
    return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
}

/**
 * Destructively modifies the passed object so  that its (inline) code range is
 * the  passed one.   The object  must be  capable of  having inline  encoding.
 * Using this macro needs deep understanding of bit level object binary layout.
 *
 * @param[out]  obj  Target object.
 * @param[out]  cr   An enum ::ruby_coderange_type.
 * @post        `obj`'s code range is `cr`.
 */
static inline void
RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
{
    RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
    RB_FL_SET_RAW(obj, cr);
}

/**
 * Destructively clears  the passed object's  (inline) code range.   The object
 * must be  capable of  having inline  encoding.  Using  this macro  needs deep
 * understanding of bit level object binary layout.
 *
 * @param[out]  obj  Target object.
 * @post        `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
 */
static inline void
RB_ENC_CODERANGE_CLEAR(VALUE obj)
{
    RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
}

RBIMPL_ATTR_CONST()
/* assumed ASCII compatibility */
/**
 * "Mix"  two code  ranges  into one.   This  is handy  for  instance when  you
 * concatenate two  strings into one.   Consider one of  then is valid  but the
 * other isn't.  The result must be  invalid.  This macro computes that kind of
 * mixture.
 *
 * @param[in]  a  An enum ::ruby_coderange_type.
 * @param[in]  b  Another enum ::ruby_coderange_type.
 * @return     The `a` "and" `b`.
 */
static inline enum ruby_coderange_type
RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
{
    if (a == RUBY_ENC_CODERANGE_7BIT) {
        return b;
    }
    else if (a != RUBY_ENC_CODERANGE_VALID) {
        return RUBY_ENC_CODERANGE_UNKNOWN;
    }
    else if (b == RUBY_ENC_CODERANGE_7BIT) {
        return RUBY_ENC_CODERANGE_VALID;
    }
    else {
        return b;
    }
}

#define ENC_CODERANGE_MASK                        RUBY_ENC_CODERANGE_MASK                      /**< @old{RUBY_ENC_CODERANGE_MASK} */
#define ENC_CODERANGE_UNKNOWN                     RUBY_ENC_CODERANGE_UNKNOWN                   /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
#define ENC_CODERANGE_7BIT                        RUBY_ENC_CODERANGE_7BIT                      /**< @old{RUBY_ENC_CODERANGE_7BIT} */
#define ENC_CODERANGE_VALID                       RUBY_ENC_CODERANGE_VALID                     /**< @old{RUBY_ENC_CODERANGE_VALID} */
#define ENC_CODERANGE_BROKEN                      RUBY_ENC_CODERANGE_BROKEN                    /**< @old{RUBY_ENC_CODERANGE_BROKEN} */
#define ENC_CODERANGE_CLEAN_P(cr)                 RB_ENC_CODERANGE_CLEAN_P(cr)                 /**< @old{RB_ENC_CODERANGE_CLEAN_P} */
#define ENC_CODERANGE(obj)                        RB_ENC_CODERANGE(obj)                        /**< @old{RB_ENC_CODERANGE} */
#define ENC_CODERANGE_ASCIIONLY(obj)              RB_ENC_CODERANGE_ASCIIONLY(obj)              /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */
#define ENC_CODERANGE_SET(obj,cr)                 RB_ENC_CODERANGE_SET(obj,cr)                 /**< @old{RB_ENC_CODERANGE_SET} */
#define ENC_CODERANGE_CLEAR(obj)                  RB_ENC_CODERANGE_CLEAR(obj)                  /**< @old{RB_ENC_CODERANGE_CLEAR} */
#define ENC_CODERANGE_AND(a, b)                   RB_ENC_CODERANGE_AND(a, b)                   /**< @old{RB_ENC_CODERANGE_AND} */
#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */

/** @cond INTERNAL_MACRO */
#define RB_ENC_CODERANGE           RB_ENC_CODERANGE
#define RB_ENC_CODERANGE_AND       RB_ENC_CODERANGE_AND
#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
#define RB_ENC_CODERANGE_CLEAN_P   RB_ENC_CODERANGE_CLEAN_P
#define RB_ENC_CODERANGE_CLEAR     RB_ENC_CODERANGE_CLEAR
#define RB_ENC_CODERANGE_SET       RB_ENC_CODERANGE_SET
/** @endcond */

RBIMPL_SYMBOL_EXPORT_END()

#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */