summaryrefslogtreecommitdiff
path: root/include/ruby/internal/encoding/pathname.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ruby/internal/encoding/pathname.h')
-rw-r--r--include/ruby/internal/encoding/pathname.h184
1 files changed, 184 insertions, 0 deletions
diff --git a/include/ruby/internal/encoding/pathname.h b/include/ruby/internal/encoding/pathname.h
new file mode 100644
index 0000000000..0b5e85a524
--- /dev/null
+++ b/include/ruby/internal/encoding/pathname.h
@@ -0,0 +1,184 @@
+#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/
+#define RUBY_INTERNAL_ENCODING_PATHNAME_H
+/**
+ * @file
+ * @author Ruby developers <ruby-core@ruby-lang.org>
+ * @copyright This file is a part of the programming language Ruby.
+ * Permission is hereby granted, to either redistribute and/or
+ * modify this file, provided that the conditions mentioned in the
+ * file COPYING are met. Consult the file for details.
+ * @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
+ * implementation details. Don't take them as canon. They could
+ * rapidly appear then vanish. The name (path) of this header file
+ * is also an implementation detail. Do not expect it to persist
+ * at the place it is now. Developers are free to move it anywhere
+ * anytime at will.
+ * @note To ruby-core: remember that this header can be possibly
+ * recursively included from extension libraries written in C++.
+ * Do not expect for instance `__VA_ARGS__` is always available.
+ * We assume C99 for ruby itself but we don't assume languages of
+ * extension libraries. They could be written in C++98.
+ * @brief Routines to manipulate encodings of pathnames.
+ */
+
+#include "ruby/internal/attr/nonnull.h"
+#include "ruby/internal/dllexport.h"
+#include "ruby/internal/encoding/encoding.h"
+#include "ruby/internal/value.h"
+
+RBIMPL_SYMBOL_EXPORT_BEGIN()
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Returns a path component directly adjacent to the passed pointer.
+ *
+ * ```
+ * "/multi/byte/encoded/pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the next path component
+ * resides, or `end` if there is no next path component.
+ */
+char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Seeks for non-prefix part of a pathname. This can be a no-op when the OS
+ * has no such concept like a path prefix. But there are OSes where path
+ * prefixes do exist.
+ *
+ * ```
+ * "C:\multi\byte\encoded\pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where non-prefix part starts, or
+ * `path` if the OS does not have path prefix.
+ */
+char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * Returns the last path component.
+ *
+ * ```
+ * "/multi/byte/encoded/pathname.txt"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the last path component
+ * resides, or `end` if there is no more path component.
+ */
+char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL(())
+/**
+ * This just returns the passed end basically. It makes difference in case the
+ * passed string ends with tons of path separators like the following:
+ *
+ * ```
+ * "/path/that/ends/with/lots/of/slashes//////////////"
+ * ^ ^ ^
+ * | | +--- end
+ * | +--- @return
+ * +--- path
+ * ```
+ *
+ * @param[in] path Where to start scanning.
+ * @param[in] end End of the path string.
+ * @param[in] enc Encoding of the string.
+ * @return A pointer in the passed string where the trailing path
+ * separators start, or `end` if there is no trailing path
+ * separators.
+ *
+ * @internal
+ *
+ * It seems this function was introduced to mimic what POSIX says about
+ * `basename(3)`.
+ */
+char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1, 4))
+/**
+ * Our own encoding-aware version of `basename(3)`. Normally, this function
+ * returns the last path component of the given name. However in case the
+ * passed name ends with a path separator, it returns the name of the
+ * directory, not the last (empty) component. Also if the passed name is a
+ * root directory, it returns that root directory. Note however that Windows
+ * filesystem have drive letters, which this function does not return.
+ *
+ * @param[in] name Target path.
+ * @param[out] baselen Return buffer.
+ * @param[in,out] alllen Number of bytes of `name`.
+ * @param[enc] enc Encoding of `name`.
+ * @return The rightmost component of `name`.
+ * @post `baselen`, if passed, is updated to be the number of bytes
+ * of the returned basename.
+ * @post `alllen`, if passed, is updated to be the number of bytes of
+ * strings not considered as the basename.
+ */
+const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc);
+
+RBIMPL_ATTR_NONNULL((1, 3))
+/**
+ * Our own encoding-aware version of `extname`. This function first applies
+ * rb_enc_path_last_separator() to the passed name and only concerns its return
+ * value (ignores any parent directories). This function returns complicated
+ * results:
+ *
+ * ```CXX
+ * auto path = "...";
+ * auto len = strlen(path);
+ * auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding());
+ *
+ * switch(len) {
+ * case 0:
+ * if (ret == 0) {
+ * // `path` is a file without extensions.
+ * }
+ * else {
+ * // `path` is a dotfile.
+ * // `ret` is the file's name.
+ * }
+ * break;
+ *
+ * case 1:
+ * // `path` _ends_ with a dot.
+ * // `ret` is that dot.
+ * break;
+ *
+ * default:
+ * // `path` has an extension.
+ * // `ret` is that extension.
+ * }
+ * ```
+ *
+ * @param[in] name Target path.
+ * @param[in,out] len Number of bytes of `name`.
+ * @param[in] enc Encoding of `name`.
+ * @return See above.
+ * @post `len`, if passed, is updated (see above).
+ */
+const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc);
+
+RBIMPL_SYMBOL_EXPORT_END()
+
+#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */