diff options
Diffstat (limited to 'template/unicode_properties.rdoc.tmpl')
-rwxr-xr-x | template/unicode_properties.rdoc.tmpl | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/template/unicode_properties.rdoc.tmpl b/template/unicode_properties.rdoc.tmpl new file mode 100755 index 0000000000..7bbedc780c --- /dev/null +++ b/template/unicode_properties.rdoc.tmpl @@ -0,0 +1,59 @@ +== \Regexps Based on Unicode Properties + +The properties shown here are those currently supported in Ruby. +Older versions may not support all of these. +<% +# Generate a documentation file for the unicode properties. +# +# Usage: +# +# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org +# (http://unicode.org/Public/UNIDATA/) and run +# ``` +# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h +# ``` + +data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } || + abort("Usage: #{$0} data_directory [name2ctype.h]") + +# Map group names, given as last argument to #make_const in enc-unicode.rb, +# to sections in the doc. The order in this hash controls the order in the doc. +map = { + /\[\[:/ => 'POSIX brackets', + '-' => 'Special', + /.+ Category/ => 'Major and General Categories', + 'Binary Property' => 'Prop List', + /Derived Property/ => 'Derived Core Properties', + 'Script' => 'Scripts', + 'Block' => 'Blocks', + 'Emoji' => 'Emoji', + /Grapheme/ => 'Graphemes', + /Derived Age/ => 'Derived Ages', +} + +# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' } +aliases = ( + File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) + + File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/) +).to_h + +props_by_section = {} +ARGF.each_line do |line| + next unless /'(?<prop>[^']+)': (?<name>.+) \*/ =~ line + next if prop == 'NEWLINE' # ignore custom internal prop + + section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}") + + # normalize prop names - the header file uses a mix of short and long names + long_prop_name = aliases[prop] || prop + (props_by_section[section] ||= []) << long_prop_name +end + +map.each_value do |section| -%> + +=== <%=section%> + +% props_by_section[section].sort.each do |prop| +- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "<tt>\\p{#{v}}</tt>" }.join(', ') %> +% end +% end |