summaryrefslogtreecommitdiff
path: root/template/unicode_properties.rdoc.tmpl
diff options
context:
space:
mode:
Diffstat (limited to 'template/unicode_properties.rdoc.tmpl')
-rwxr-xr-xtemplate/unicode_properties.rdoc.tmpl59
1 files changed, 59 insertions, 0 deletions
diff --git a/template/unicode_properties.rdoc.tmpl b/template/unicode_properties.rdoc.tmpl
new file mode 100755
index 0000000000..7bbedc780c
--- /dev/null
+++ b/template/unicode_properties.rdoc.tmpl
@@ -0,0 +1,59 @@
+== \Regexps Based on Unicode Properties
+
+The properties shown here are those currently supported in Ruby.
+Older versions may not support all of these.
+<%
+# Generate a documentation file for the unicode properties.
+#
+# Usage:
+#
+# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org
+# (http://unicode.org/Public/UNIDATA/) and run
+# ```
+# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h
+# ```
+
+data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } ||
+ abort("Usage: #{$0} data_directory [name2ctype.h]")
+
+# Map group names, given as last argument to #make_const in enc-unicode.rb,
+# to sections in the doc. The order in this hash controls the order in the doc.
+map = {
+ /\[\[:/ => 'POSIX brackets',
+ '-' => 'Special',
+ /.+ Category/ => 'Major and General Categories',
+ 'Binary Property' => 'Prop List',
+ /Derived Property/ => 'Derived Core Properties',
+ 'Script' => 'Scripts',
+ 'Block' => 'Blocks',
+ 'Emoji' => 'Emoji',
+ /Grapheme/ => 'Graphemes',
+ /Derived Age/ => 'Derived Ages',
+}
+
+# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' }
+aliases = (
+ File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) +
+ File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/)
+).to_h
+
+props_by_section = {}
+ARGF.each_line do |line|
+ next unless /'(?<prop>[^']+)': (?<name>.+) \*/ =~ line
+ next if prop == 'NEWLINE' # ignore custom internal prop
+
+ section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}")
+
+ # normalize prop names - the header file uses a mix of short and long names
+ long_prop_name = aliases[prop] || prop
+ (props_by_section[section] ||= []) << long_prop_name
+end
+
+map.each_value do |section| -%>
+
+=== <%=section%>
+
+% props_by_section[section].sort.each do |prop|
+- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "<tt>\\p{#{v}}</tt>" }.join(', ') %>
+% end
+% end