diff options
Diffstat (limited to 'prism/internal/char.h')
| -rw-r--r-- | prism/internal/char.h | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/prism/internal/char.h b/prism/internal/char.h new file mode 100644 index 0000000000..9a58fba8c5 --- /dev/null +++ b/prism/internal/char.h @@ -0,0 +1,139 @@ +#ifndef PRISM_INTERNAL_CHAR_H +#define PRISM_INTERNAL_CHAR_H + +#include "prism/compiler/force_inline.h" + +#include "prism/arena.h" +#include "prism/line_offset_list.h" + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +/* Bit flag for whitespace characters in pm_byte_table. */ +#define PRISM_CHAR_BIT_WHITESPACE (1 << 0) + +/* Bit flag for inline whitespace characters in pm_byte_table. */ +#define PRISM_CHAR_BIT_INLINE_WHITESPACE (1 << 1) + +/* + * A lookup table for classifying bytes. Each entry is a bitfield of + * PRISM_CHAR_BIT_* flags. Defined in char.c. + */ +extern const uint8_t pm_byte_table[256]; + +/* Returns true if the given character is a whitespace character. */ +static PRISM_FORCE_INLINE bool +pm_char_is_whitespace(const uint8_t b) { + return (pm_byte_table[b] & PRISM_CHAR_BIT_WHITESPACE) != 0; +} + +/* Returns true if the given character is an inline whitespace character. */ +static PRISM_FORCE_INLINE bool +pm_char_is_inline_whitespace(const uint8_t b) { + return (pm_byte_table[b] & PRISM_CHAR_BIT_INLINE_WHITESPACE) != 0; +} + +/* + * Returns the number of characters at the start of the string that are inline + * whitespace (space/tab). Scans the byte table directly for use in hot paths. + */ +static PRISM_FORCE_INLINE size_t +pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { + if (length <= 0) return 0; + size_t size = 0; + size_t maximum = (size_t) length; + while (size < maximum && (pm_byte_table[string[size]] & PRISM_CHAR_BIT_INLINE_WHITESPACE)) size++; + return size; +} + +/* + * Returns the number of characters at the start of the string that are + * whitespace. Disallows searching past the given maximum number of characters. + */ +size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length); + +/* + * Returns the number of characters at the start of the string that are + * whitespace while also tracking the location of each newline. Disallows + * searching past the given maximum number of characters. + */ +size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_arena_t *arena, pm_line_offset_list_t *line_offsets, uint32_t start_offset); + +/* + * Returns the number of characters at the start of the string that are decimal + * digits. Disallows searching past the given maximum number of characters. + */ +size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length); + +/* + * Returns the number of characters at the start of the string that are + * hexadecimal digits. Disallows searching past the given maximum number of + * characters. + */ +size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); + +/* + * Returns the number of characters at the start of the string that are octal + * digits or underscores. Disallows searching past the given maximum number of + * characters. + * + * If multiple underscores are found in a row or if an underscore is + * found at the end of the number, then the invalid pointer is set to the index + * of the first invalid underscore. + */ +size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +/* + * Returns the number of characters at the start of the string that are decimal + * digits or underscores. Disallows searching past the given maximum number of + * characters. + * + * If multiple underscores are found in a row or if an underscore is + * found at the end of the number, then the invalid pointer is set to the index + * of the first invalid underscore. + */ +size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +/* + * Returns the number of characters at the start of the string that are + * hexadecimal digits or underscores. Disallows searching past the given maximum + * number of characters. + * + * If multiple underscores are found in a row or if an underscore is + * found at the end of the number, then the invalid pointer is set to the index + * of the first invalid underscore. + */ +size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +/* + * Returns the number of characters at the start of the string that are regexp + * options. Disallows searching past the given maximum number of characters. + */ +size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); + +/* + * Returns the number of characters at the start of the string that are binary + * digits or underscores. Disallows searching past the given maximum number of + * characters. + * + * If multiple underscores are found in a row or if an underscore is + * found at the end of the number, then the invalid pointer is set to the index + * of the first invalid underscore. + */ +size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + + +/* Returns true if the given character is a binary digit. */ +bool pm_char_is_binary_digit(const uint8_t b); + +/* Returns true if the given character is an octal digit. */ +bool pm_char_is_octal_digit(const uint8_t b); + +/* Returns true if the given character is a decimal digit. */ +bool pm_char_is_decimal_digit(const uint8_t b); + +/* Returns true if the given character is a hexadecimal digit. */ +bool pm_char_is_hexadecimal_digit(const uint8_t b); + +#endif |
