diff options
Diffstat (limited to 'prism/util/pm_strpbrk.h')
-rw-r--r-- | prism/util/pm_strpbrk.h | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h new file mode 100644 index 0000000000..f387bd5782 --- /dev/null +++ b/prism/util/pm_strpbrk.h @@ -0,0 +1,46 @@ +/** + * @file pm_strpbrk.h + * + * A custom strpbrk implementation. + */ +#ifndef PRISM_STRPBRK_H +#define PRISM_STRPBRK_H + +#include "prism/defines.h" +#include "prism/diagnostic.h" +#include "prism/parser.h" + +#include <stddef.h> +#include <string.h> + +/** + * Here we have rolled our own version of strpbrk. The standard library strpbrk + * has undefined behavior when the source string is not null-terminated. We want + * to support strings that are not null-terminated because pm_parse does not + * have the contract that the string is null-terminated. (This is desirable + * because it means the extension can call pm_parse with the result of a call to + * mmap). + * + * The standard library strpbrk also does not support passing a maximum length + * to search. We want to support this for the reason mentioned above, but we + * also don't want it to stop on null bytes. Ruby actually allows null bytes + * within strings, comments, regular expressions, etc. So we need to be able to + * skip past them. + * + * Finally, we want to support encodings wherein the charset could contain + * characters that are trailing bytes of multi-byte characters. For example, in + * Shift-JIS, the backslash character can be a trailing byte. In that case we + * need to take a slower path and iterate one multi-byte character at a time. + * + * @param parser The parser. + * @param source The source to search. + * @param charset The charset to search for. + * @param length The maximum number of bytes to search. + * @param validate Whether to validate that the source string is valid in the + * current encoding of the parser. + * @return A pointer to the first character in the source string that is in the + * charset, or NULL if no such character exists. + */ +const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate); + +#endif |