summaryrefslogtreecommitdiff
path: root/prism/util/pm_char.c
blob: dce19abd1b102f41e9716c06409e25228e4afe52 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
#include "prism/util/pm_char.h"

#define PRISM_CHAR_BIT_WHITESPACE (1 << 0)
#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
#define PRISM_CHAR_BIT_REGEXP_OPTION (1 << 2)

#define PRISM_NUMBER_BIT_BINARY_DIGIT (1 << 0)
#define PRISM_NUMBER_BIT_BINARY_NUMBER (1 << 1)
#define PRISM_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
#define PRISM_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
#define PRISM_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
#define PRISM_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
#define PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
#define PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)

static const uint8_t pm_byte_table[256] = {
//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 4x
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 5x
    0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 6x
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, // 7x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
};

static const uint8_t pm_number_table[256] = {
    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
};

/**
 * Returns the number of characters at the start of the string that match the
 * given kind. Disallows searching past the given maximum number of characters.
 */
static inline size_t
pm_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (pm_byte_table[string[size]] & kind)) size++;
    return size;
}

/**
 * Returns the number of characters at the start of the string that are
 * whitespace. Disallows searching past the given maximum number of characters.
 */
size_t
pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_WHITESPACE);
}

/**
 * Returns the number of characters at the start of the string that are
 * whitespace while also tracking the location of each newline. Disallows
 * searching past the given maximum number of characters.
 */
size_t
pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_WHITESPACE)) {
        if (string[size] == '\n') {
            pm_newline_list_append(newline_list, string + size);
        }

        size++;
    }

    return size;
}

/**
 * Returns the number of characters at the start of the string that are inline
 * whitespace. Disallows searching past the given maximum number of characters.
 */
size_t
pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_INLINE_WHITESPACE);
}

/**
 * Returns the number of characters at the start of the string that are regexp
 * options. Disallows searching past the given maximum number of characters.
 */
size_t
pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
    return pm_strspn_char_kind(string, length, PRISM_CHAR_BIT_REGEXP_OPTION);
}

/**
 * Returns true if the given character matches the given kind.
 */
static inline bool
pm_char_is_char_kind(const uint8_t b, uint8_t kind) {
    return (pm_byte_table[b] & kind) != 0;
}

/**
 * Returns true if the given character is a whitespace character.
 */
bool
pm_char_is_whitespace(const uint8_t b) {
    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_WHITESPACE);
}

/**
 * Returns true if the given character is an inline whitespace character.
 */
bool
pm_char_is_inline_whitespace(const uint8_t b) {
    return pm_char_is_char_kind(b, PRISM_CHAR_BIT_INLINE_WHITESPACE);
}

/**
 * Scan through the string and return the number of characters at the start of
 * the string that match the given kind. Disallows searching past the given
 * maximum number of characters.
 */
static inline size_t
pm_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    while (size < maximum && (pm_number_table[string[size]] & kind)) size++;
    return size;
}

/**
 * Scan through the string and return the number of characters at the start of
 * the string that match the given kind. Disallows searching past the given
 * maximum number of characters.
 *
 * Additionally, report the location of the last invalid underscore character
 * found in the string through the out invalid parameter.
 */
static inline size_t
pm_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
    if (length <= 0) return 0;

    size_t size = 0;
    size_t maximum = (size_t) length;

    bool underscore = false;
    while (size < maximum && (pm_number_table[string[size]] & kind)) {
        if (string[size] == '_') {
            if (underscore) *invalid = string + size;
            underscore = true;
        } else {
            underscore = false;
        }

        size++;
    }

    if (string[size - 1] == '_') *invalid = string + size - 1;
    return size;
}

/**
 * Returns the number of characters at the start of the string that are binary
 * digits or underscores. Disallows searching past the given maximum number of
 * characters.
 *
 * If multiple underscores are found in a row or if an underscore is
 * found at the end of the number, then the invalid pointer is set to the index
 * of the first invalid underscore.
 */
size_t
pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_BINARY_NUMBER);
}

/**
 * Returns the number of characters at the start of the string that are octal
 * digits or underscores. Disallows searching past the given maximum number of
 * characters.
 *
 * If multiple underscores are found in a row or if an underscore is
 * found at the end of the number, then the invalid pointer is set to the index
 * of the first invalid underscore.
 */
size_t
pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_OCTAL_NUMBER);
}

/**
 * Returns the number of characters at the start of the string that are decimal
 * digits. Disallows searching past the given maximum number of characters.
 */
size_t
pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
}

/**
 * Returns the number of characters at the start of the string that are decimal
 * digits or underscores. Disallows searching past the given maximum number of
 * characters.
 *
 * If multiple underscores are found in a row or if an underscore is
 * found at the end of the number, then the invalid pointer is set to the index
 * of the first invalid underscore
 */
size_t
pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_DECIMAL_NUMBER);
}

/**
 * Returns the number of characters at the start of the string that are
 * hexadecimal digits. Disallows searching past the given maximum number of
 * characters.
 */
size_t
pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
    return pm_strspn_number_kind(string, length, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
}

/**
 * Returns the number of characters at the start of the string that are
 * hexadecimal digits or underscores. Disallows searching past the given maximum
 * number of characters.
 *
 * If multiple underscores are found in a row or if an underscore is
 * found at the end of the number, then the invalid pointer is set to the index
 * of the first invalid underscore.
 */
size_t
pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
    return pm_strspn_number_kind_underscores(string, length, invalid, PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER);
}

/**
 * Returns true if the given character matches the given kind.
 */
static inline bool
pm_char_is_number_kind(const uint8_t b, uint8_t kind) {
    return (pm_number_table[b] & kind) != 0;
}

/**
 * Returns true if the given character is a binary digit.
 */
bool
pm_char_is_binary_digit(const uint8_t b) {
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_BINARY_DIGIT);
}

/**
 * Returns true if the given character is an octal digit.
 */
bool
pm_char_is_octal_digit(const uint8_t b) {
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_OCTAL_DIGIT);
}

/**
 * Returns true if the given character is a decimal digit.
 */
bool
pm_char_is_decimal_digit(const uint8_t b) {
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_DECIMAL_DIGIT);
}

/**
 * Returns true if the given character is a hexadecimal digit.
 */
bool
pm_char_is_hexadecimal_digit(const uint8_t b) {
    return pm_char_is_number_kind(b, PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT);
}

#undef PRISM_CHAR_BIT_WHITESPACE
#undef PRISM_CHAR_BIT_INLINE_WHITESPACE
#undef PRISM_CHAR_BIT_REGEXP_OPTION

#undef PRISM_NUMBER_BIT_BINARY_DIGIT
#undef PRISM_NUMBER_BIT_BINARY_NUMBER
#undef PRISM_NUMBER_BIT_OCTAL_DIGIT
#undef PRISM_NUMBER_BIT_OCTAL_NUMBER
#undef PRISM_NUMBER_BIT_DECIMAL_DIGIT
#undef PRISM_NUMBER_BIT_DECIMAL_NUMBER
#undef PRISM_NUMBER_BIT_HEXADECIMAL_NUMBER
#undef PRISM_NUMBER_BIT_HEXADECIMAL_DIGIT