diff options
author | Janosch Müller <janosch84@gmail.com> | 2023-07-01 16:22:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-01 23:22:17 +0900 |
commit | 08b3fb11524e6cde453476f24ac80fd60457dfef (patch) | |
tree | 58390c5ff62bcd0116b6aa857c228818a5b465b4 /template | |
parent | 3fd1968d6f10579a36f77eb3838f6c62a3e9f205 (diff) |
[Bug #19728] Auto-generate unicode property docs
https://bugs.ruby-lang.org/issues/19728
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/7944
Merged-By: nobu <nobu@ruby-lang.org>
Diffstat (limited to 'template')
-rwxr-xr-x | template/unicode_properties.rdoc.tmpl | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/template/unicode_properties.rdoc.tmpl b/template/unicode_properties.rdoc.tmpl new file mode 100755 index 0000000000..7bbedc780c --- /dev/null +++ b/template/unicode_properties.rdoc.tmpl @@ -0,0 +1,59 @@ +== \Regexps Based on Unicode Properties + +The properties shown here are those currently supported in Ruby. +Older versions may not support all of these. +<% +# Generate a documentation file for the unicode properties. +# +# Usage: +# +# Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org +# (http://unicode.org/Public/UNIDATA/) and run +# ``` +# ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h +# ``` + +data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } || + abort("Usage: #{$0} data_directory [name2ctype.h]") + +# Map group names, given as last argument to #make_const in enc-unicode.rb, +# to sections in the doc. The order in this hash controls the order in the doc. +map = { + /\[\[:/ => 'POSIX brackets', + '-' => 'Special', + /.+ Category/ => 'Major and General Categories', + 'Binary Property' => 'Prop List', + /Derived Property/ => 'Derived Core Properties', + 'Script' => 'Scripts', + 'Block' => 'Blocks', + 'Emoji' => 'Emoji', + /Grapheme/ => 'Graphemes', + /Derived Age/ => 'Derived Ages', +} + +# aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' } +aliases = ( + File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) + + File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/) +).to_h + +props_by_section = {} +ARGF.each_line do |line| + next unless /'(?<prop>[^']+)': (?<name>.+) \*/ =~ line + next if prop == 'NEWLINE' # ignore custom internal prop + + section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}") + + # normalize prop names - the header file uses a mix of short and long names + long_prop_name = aliases[prop] || prop + (props_by_section[section] ||= []) << long_prop_name +end + +map.each_value do |section| -%> + +=== <%=section%> + +% props_by_section[section].sort.each do |prop| +- <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "<tt>\\p{#{v}}</tt>" }.join(', ') %> +% end +% end |