From 84f0b051de55b80211eab0ad2438f500af45e4a5 Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 18 Dec 2003 21:08:25 +0000 Subject: Annotate enum.c. Add pager support, and report on methods in included modules git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5214 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- bin/ri | 20 ++- enum.c | 334 ++++++++++++++++++++++++++++++++++++ lib/rdoc/README | 12 ++ lib/rdoc/generators/ri_generator.rb | 39 ++++- lib/rdoc/options.rb | 9 + lib/rdoc/parsers/parse_c.rb | 29 +++- lib/rdoc/ri/ri_cache.rb | 16 +- lib/rdoc/ri/ri_descriptions.rb | 8 + lib/rdoc/ri/ri_paths.rb | 12 +- lib/rdoc/ri/ri_reader.rb | 10 ++ lib/rdoc/ri/ri_writer.rb | 16 +- pack.c | 201 ++++++++++++++++++++++ 12 files changed, 692 insertions(+), 14 deletions(-) diff --git a/bin/ri b/bin/ri index 4103f3a7a8..d5277e1a7a 100755 --- a/bin/ri +++ b/bin/ri @@ -66,7 +66,7 @@ class RiDisplay STDOUT.reopen(@save_stdout) @save_stdout = nil paged = false - for pager in [ ENV['pager'], "less", "more <" ].compact.uniq + for pager in [ ENV['PAGER'], "less", "more <", 'pager' ].compact.uniq if system("#{pager} #{path}") paged = true break @@ -129,6 +129,24 @@ def display_class_info(class_entry) display_flow(klass.comment) @formatter.draw_line + unless klass.includes.empty? + @formatter.blankline + @formatter.wrap("Includes:", "") + incs = [] + klass.includes.each do |inc| + inc_desc = @ri_reader.find_class_by_name(inc.name) + if inc_desc + str = inc.name + "(" + str << inc_desc.instance_methods.map{|m| m.name}.join(", ") + str << ")" + incs << str + else + incs << inc.name + end + end + @formatter.wrap(incs.sort.join(', ')) + end + unless klass.constants.empty? @formatter.blankline @formatter.wrap("Constants:", "") diff --git a/enum.c b/enum.c index ee0b05838f..28f28cbd5f 100644 --- a/enum.c +++ b/enum.c @@ -44,6 +44,24 @@ grep_iter_i(i, arg) return Qnil; } +/* + * call-seq: + * enum.grep(pattern) => array + * enum.grep(pattern) {| obj | block } => array + * + * Returns an array of every element in enum for which + * Pattern === element. If the optional block is + * supplied, each matching element is passed to it, and the block's + * result is stored in the output array. + * + * (1..100).grep 38..44 #=> [38, 39, 40, 41, 42, 43, 44] + * c = IO.constants + * c.grep(/SEEK/) #=> ["SEEK_END", "SEEK_SET", "SEEK_CUR"] + * res = c.grep(/SEEK/) {|v| IO.const_get(v) } + * res #=> [2, 0, 1] + * + */ + static VALUE enum_grep(obj, pat) VALUE obj, pat; @@ -72,6 +90,20 @@ find_i(i, memo) return Qnil; } +/* + * call-seq: + * enum.detect {| obj | block } => obj or nil + * enum.find {| obj | block } => obj or nil + * + * Passes each entry in enum to block. Returns the + * first for which block is not false. Returns + * nil if no object matches. + * + * (1..10).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> nil + * (1..100).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> 35 + * + */ + static VALUE enum_find(argc, argv, obj) int argc; @@ -105,6 +137,19 @@ find_all_i(i, ary) return Qnil; } +/* + * call-seq: + * enum.find_all {| obj | block } => array + * enum.select {| obj | block } => array + * + * Returns an array containing all elements of enum for which + * block is not false (see also + * Enumerable#reject). + * + * (1..10).find_all {|i| i % 3 == 0 } #=> [3, 6, 9] + * + */ + static VALUE enum_find_all(obj) VALUE obj; @@ -126,6 +171,17 @@ reject_i(i, ary) return Qnil; } +/* + * call-seq: + * enum.reject {| obj | block } => array + * + * Returns an array for all elements of enum for which + * block is false (see also Enumerable#find_all). + * + * (1..10).reject {|i| i % 3 == 0 } #=> [1, 2, 4, 5, 7, 8, 10] + * + */ + static VALUE enum_reject(obj) VALUE obj; @@ -155,6 +211,19 @@ collect_all(i, ary) return Qnil; } +/* + * call-seq: + * enum.collect {| obj | block } => array + * enum.map {| obj | block } => array + * + * Returns a new array with the results of running block once + * for every element in enum. + * + * (1..4).collect {|i| i*i } #=> [1, 4, 9, 16] + * (1..4).collect { "cat" } #=> ["cat", "cat", "cat", "cat"] + * + */ + static VALUE enum_collect(obj) VALUE obj; @@ -166,6 +235,16 @@ enum_collect(obj) return ary; } +/* + * call-seq: + * enum.to_a => array + * enum.entries => array + * + * Returns an array containing the items in enum. + * + * (1..7).to_a #=> [1, 2, 3, 4, 5, 6, 7] + * { 'a'=>1, 'b'=>2, 'c'=>3 }.to_a #=> [["a", 1], ["b", 2], ["c", 3]] + */ static VALUE enum_to_a(obj) VALUE obj; @@ -192,6 +271,37 @@ inject_i(i, memo) return Qnil; } +/* + * call-seq: + * enum.inject(initial) {| memo, obj | block } => obj + * enum.inject {| memo, obj | block } => obj + * + * Combines the elements of enum by applying the block to an + * accumulator value (memo) and each element in turn. At each + * step, memo is set to the value returned by the block. The + * first form lets you supply an initial value for memo. The + * second form uses the first element of the collection as a the + * initial value (and skips that element while iterating). + * + * # Sum some numbers + * (5..10).inject {|sum, n| sum + n } #=> 45 + * # Multiply some numbers + * (5..10).inject(1) {|product, n| product * n } #=> 151200 + * + * # find the longest word + * longest = %w{ cat sheep bear }.inject do |memo,word| + * memo.length > word.length ? memo : word + * end + * longest #=> "sheep" + * + * # find the length of the longest word + * longest = %w{ cat sheep bear }.inject(0) do |memo,word| + * memo >= word.length ? memo : word.length + * end + * longest #=> 5 + * + */ + static VALUE enum_inject(argc, argv, obj) int argc; @@ -225,6 +335,18 @@ partition_i(i, ary) return Qnil; } +/* + * call-seq: + * enum.partition {| obj | block } => [ true_array, false_array ] + * + * Returns two arrays, the first containing the elements of + * enum for which the block evaluates to true, the second + * containing the rest. + * + * (1..6).partition {|i| (i&1).zero?} #=> [[2, 4, 6], [1, 3, 5]] + * + */ + static VALUE enum_partition(obj) VALUE obj; @@ -238,6 +360,23 @@ enum_partition(obj) return rb_assoc_new(ary[0], ary[1]); } +/* + * call-seq: + * enum.sort => array + * enum.sort {| a, b | block } => array + * + * Returns an array containing the items in enum sorted, + * either according to their own <=> method, or by using + * the results of the supplied block. The block should return -1, 0, or + * +1 depending on the comparison between a and b. As of + * Ruby 1.8, the method Enumerable#sort_by implements a + * built-in Schwartzian Transform, useful when key computation or + * comparison is expensive.. + * + * %w(rhea kea flea).sort #=> ["flea", "kea", "rhea"] + * (1..10).sort {|a,b| b <=> a} #=> [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + */ + static VALUE enum_sort(obj) VALUE obj; @@ -267,6 +406,75 @@ sort_by_cmp(a, b) return rb_cmpint(retval, *a, *b); } +/* + * call-seq: + * enum.sort_by {| obj | block } => array + * + * Sorts enum using a set of keys generated by mapping the + * values in enum through the given block. + * + * %w{ apple pear fig }.sort_by {|word| word.length} + #=> ["fig", "pear", "apple"] + * + * The current implementation of sort_by generates an + * array of tuples containing the original collection element and the + * mapped value. This makes sort_by fairly expensive when + * the keysets are simple + * + * require 'benchmark' + * include Benchmark + * + * a = (1..100000).map {rand(100000)} + * + * bm(10) do |b| + * b.report("Sort") { a.sort } + * b.report("Sort by") { a.sort_by {|a| a} } + * end + * + * produces: + * + * user system total real + * Sort 0.180000 0.000000 0.180000 ( 0.175469) + * Sort by 1.980000 0.040000 2.020000 ( 2.013586) + * + * However, consider the case where comparing the keys is a non-trivial + * operation. The following code sorts some files on modification time + * using the basic sort method. + * + * files = Dir["*"] + * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime} + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This sort is inefficient: it generates two new File + * objects during every comparison. A slightly better technique is to + * use the Kernel#test method to generate the modification + * times directly. + * + * files = Dir["*"] + * sorted = files.sort { |a,b| + * test(?M, a) <=> test(?M, b) + * } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This still generates many unnecessary Time objects. A + * more efficient technique is to cache the sort keys (modification + * times in this case) before the sort. Perl users often call this + * approach a Schwartzian Transform, after Randal Schwartz. We + * construct a temporary array, where each element is an array + * containing our sort key along with the filename. We sort this array, + * and then extract the filename from the result. + * + * sorted = Dir["*"].collect { |f| + * [test(?M, f), f] + * }.sort.collect { |f| f[1] } + * sorted #=> ["mon", "tues", "wed", "thurs"] + * + * This is exactly what sort_by does internally. + * + * sorted = Dir["*"].sort_by {|f| test(?M, f)} + * sorted #=> ["mon", "tues", "wed", "thurs"] + */ + static VALUE enum_sort_by(obj) VALUE obj; @@ -315,6 +523,23 @@ all_i(i, memo) return Qnil; } +/* + * call-seq: + * enum.all? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block never returns + * false or nil. If the block is not given, + * Ruby adds an implicit block of {|obj| obj} (that is + * all? will return true only if none of the + * collection members are false or nil.) + * + * %w{ ant bear cat}.all? {|word| word.length >= 3} #=> true + * %w{ ant bear cat}.all? {|word| word.length >= 4} #=> false + * [ nil, true, 99 ].all? #=> false + * + */ + static VALUE enum_all(obj) VALUE obj; @@ -353,6 +578,24 @@ any_i(i, memo) return Qnil; } +/* + * call-seq: + * enum.any? [{|obj| block } ] => true or false + * + * Passes each element of the collection to the given block. The method + * returns true if the block ever returns a value other + * that false or nil. If the block is not + * given, Ruby adds an implicit block of {|obj| obj} (that + * is any? will return true if at least one + * of the collection members is not false or + * nil. + * + * %w{ ant bear cat}.any? {|word| word.length >= 3} #=> true + * %w{ ant bear cat}.any? {|word| word.length >= 4} #=> true + * [ nil, true, 99 ].any? #=> true + * + */ + static VALUE enum_any(obj) VALUE obj; @@ -405,6 +648,21 @@ min_ii(i, memo) return Qnil; } + +/* + * call-seq: + * enum.min => obj + * enum.min {| a,b | block } => obj + * + * Returns the object in enum with the minimum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.min #=> "albatross" + * a.max {|a,b| a.length <=> b.length } #=> "dog" + */ + static VALUE enum_min(obj) VALUE obj; @@ -418,6 +676,20 @@ enum_min(obj) return result; } +/* + * call-seq: + * enum.max => obj + * enum.max {| a,b | block } => obj + * + * Returns the object in enum with the maximum value. The + * first form assumes all objects implement Comparable; + * the second uses the block to return a <=> b. + * + * a = %w(albatross dog horse) + * a.max #=> "horse" + * a.max {|a,b| a.length <=> b.length } #=> "albatross" + */ + static VALUE max_i(i, memo) VALUE i; @@ -481,6 +753,19 @@ member_i(item, memo) return Qnil; } +/* + * call-seq: + * enum.include?(obj) => true or false + * enum.member?(obj) => true or false + * + * Returns true if any member of enum equals + * obj. Equality is tested using ==. + * + * IO.constants.include? "SEEK_SET" #=> true + * IO.constants.include? "SEEK_NO_FURTHER" #=> false + * + */ + static VALUE enum_member(obj, val) VALUE obj, val; @@ -504,6 +789,21 @@ each_with_index_i(val, memo) return Qnil; } +/* + * call-seq: + * enum.each_with_index {|obj, i| block } -> nil + * + * Calls block with two arguments, the item and its index, for + * each item in enum. + * + * hash = Hash.new + * %w(cat dog wombat).each_with_index {|item, index| + * hash[item] = index + * } + * hash #=> {"cat"=>0, "wombat"=>2, "dog"=>1} + * + */ + static VALUE enum_each_with_index(obj) VALUE obj; @@ -540,6 +840,29 @@ zip_i(val, memo) return Qnil; } +/* + * call-seq: + * enum.zip(arg, ...) => array + * enum.zip(arg, ...) {|arr| block } => nil + * + * Converts any arguments to arrays, then merges elements of + * enum with corresponding elements from each argument. This + * generates a sequence of enum#size n-element + * arrays, where n is one more that the count of arguments. If + * the size of any arguemnt is less than enum#size, + * nil values are supplied. If a block given, it is + * invoked for each output array, otherwise an array of arrays is + * returned. + * + * a = [ 4, 5, 6 ] + * b = [ 7, 8, 9 ] + * + * (1..3).zip(a, b) #=> [[1, 4, 7], [2, 5, 8], [3, 6, 9]] + * "cat\ndog".zip([1]) #=> [["cat\n", 1], ["dog", nil]] + * (1..3).zip #=> [[1], [2], [3]] + * + */ + static VALUE enum_zip(argc, argv, obj) int argc; @@ -560,6 +883,17 @@ enum_zip(argc, argv, obj) return result; } +/* + * The Enumerable mixin provides collection classes with + * several traversal and searching methods, and with the ability to + * sort. The class must provide a method each, which + * yields successive members of the collection. If + * Enumerable#max, #min, or + * #sort is used, the objects in the collection must also + * implement a meaningful <=> operator, as these methods + * rely on an ordering between members of the collection. + */ + void Init_Enumerable() { diff --git a/lib/rdoc/README b/lib/rdoc/README index 39ce8bb75b..1b8671182f 100644 --- a/lib/rdoc/README +++ b/lib/rdoc/README @@ -166,6 +166,11 @@ Options are: [--main _name_] set the class, module, or file to appear on the index page +[--merge] + when generating _ri_ output, if classes being processed already + exist in the destination directory, merge in the current details + rather than overwrite them. + [--one-file] place all the output into a single file @@ -191,6 +196,13 @@ Options are: [--quiet] do not display progress messages +[--ri _and_ --ri-site] + generate output than can be read by the _ri_ command-line tool. + By default --ri places its output in ~/.rdoc, and --ri-site in + $datadir/ri//site. Both can be overridden with a subsequent + --op option. Both default directories are in ri's default search + path. + [--show-hash] A name of the form #name in a comment is a possible hyperlink to an instance method name. When displayed, the '#' is removed unless diff --git a/lib/rdoc/generators/ri_generator.rb b/lib/rdoc/generators/ri_generator.rb index 375c534923..c2b5269c3c 100644 --- a/lib/rdoc/generators/ri_generator.rb +++ b/lib/rdoc/generators/ri_generator.rb @@ -42,6 +42,8 @@ require 'rdoc/markup/simple_markup' require 'rdoc/markup/simple_markup/to_flow' require 'cgi' +require 'rdoc/ri/ri_cache' +require 'rdoc/ri/ri_reader' require 'rdoc/ri/ri_writer' require 'rdoc/ri/ri_descriptions' @@ -121,8 +123,7 @@ module Generators RI::MethodSummary.new(m.name) end - @ri_writer.remove_class(cls_desc) - @ri_writer.add_class(cls_desc) + update_or_replace(cls_desc) class_methods.each do |m| generate_method_info(cls_desc, m) @@ -219,5 +220,39 @@ module Generators @markup.convert(content, @to_flow) end + + # By default we replace existing classes with the + # same name. If the --merge option was given, we instead + # merge this definition into an existing class. We add + # our methods, aliases, etc to that class, but do not + # change the class's description. + + def update_or_replace(cls_desc) + old_cls = nil + + if @options.merge + rdr = RI::RiReader.new(RI::RiCache.new(@options.op_dir)) + + namespace = rdr.top_level_namespace + namespace = rdr.lookup_namespace_in(cls_desc.name, namespace) + if namespace.empty? + raise RiError.new("Nothing known about #{arg}") + else + old_cls = namespace[0] + end + end + + if old_cls.nil? + # no merge: simply overwrite + @ri_writer.remove_class(cls_desc) + @ri_writer.add_class(cls_desc) + else + # existing class: merge in + old_desc = rdr.get_class(old_cls) + + old_desc.merge_in(cls_desc) + @ri_writer.add_class(old_desc) + end + end end end diff --git a/lib/rdoc/options.rb b/lib/rdoc/options.rb index 36b82eab7d..ab99c18b86 100644 --- a/lib/rdoc/options.rb +++ b/lib/rdoc/options.rb @@ -28,6 +28,9 @@ class Options # the first file we encounter is used) attr_accessor :main_page + # merge into classes of the name name when generating ri + attr_reader :merge + # Don't display progress as we process the files attr_reader :quiet @@ -160,6 +163,10 @@ class Options [ "--main", "-m", "name", "'name' will be the initial page displayed" ], + [ "--merge", "-M", nil, + "when creating ri output, merge processed classes\n" + + "into previously documented classes of the name name"], + [ "--one-file", "-1", nil, "put all the output into a single file" ], @@ -332,6 +339,7 @@ class Options @op_name = nil @show_all = false @main_page = nil + @marge = false @exclude = nil @quiet = false @generator_name = 'html' @@ -376,6 +384,7 @@ class Options when "--inline-source" then @inline_source = true when "--line-numbers" then @include_line_numbers = true when "--main" then @main_page = arg + when "--merge" then @merge = true when "--one-file" then @all_one_file = true when "--op" then @op_dir = arg when "--opname" then @op_name = arg diff --git a/lib/rdoc/parsers/parse_c.rb b/lib/rdoc/parsers/parse_c.rb index 3e8b48da34..39d136089b 100644 --- a/lib/rdoc/parsers/parse_c.rb +++ b/lib/rdoc/parsers/parse_c.rb @@ -140,6 +140,7 @@ module RDoc remove_commented_out_lines do_classes do_methods + do_includes @top_level end @@ -237,7 +238,8 @@ module RDoc next if meth_name == "initialize_copy" class_name = @known_classes[var_name] || var_name - class_obj = @classes[var_name] + class_obj = find_class(var_name, class_name) + if class_obj if meth_name == "initialize" meth_name = "new" @@ -297,7 +299,18 @@ module RDoc end end - + + # Look for includes of the form + # rb_include_module(rb_cArray, rb_mEnumerable); + def do_includes + @body.scan(/rb_include_module\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m| + if cls = @classes[c] + m = KNOWN_CLASSES[m] || m + cls.add_include(Include.new(m, "")) + end + end + end + # Remove the /*'s and leading asterisks from C comments def mangle_comment(comment) @@ -306,7 +319,17 @@ module RDoc comment.gsub!(/^[ \t]*\*/m) { " " * $&.length } comment end - + + def find_class(raw_name, name) + unless @classes[name] + if raw_name =~ /^rb_m/ + @classes[name] = @top_level.add_module(NormalModule, name) + else + @classes[name] = @top_level.add_class(NormalClass, name, nil) + end + end + @classes[name] + end end end diff --git a/lib/rdoc/ri/ri_cache.rb b/lib/rdoc/ri/ri_cache.rb index f2cdbf6f38..0a2fac184c 100644 --- a/lib/rdoc/ri/ri_cache.rb +++ b/lib/rdoc/ri/ri_cache.rb @@ -31,7 +31,7 @@ module RI if name =~ /^(.*?)-(c|i).yaml$/ external_name = $1 is_class_method = $2 == "c" - internal_name = external_name + internal_name = RiWriter.external_to_internal(external_name) list = is_class_method ? @class_methods : @instance_methods path = File.join(dir, name) list << MethodEntry.new(path, internal_name, is_class_method, self) @@ -53,6 +53,11 @@ module RI @inferior_classes.find_all {|c| c.name[name]} end + # Return an exact match to a particular name + def contained_class_named(name) + @inferior_classes.find {|c| c.name == name} + end + # return the list of local methods matching name # We're split into two because we need distinct behavior # when called from the toplevel @@ -72,7 +77,7 @@ module RI # Return our full name - def full_name + def full_namep res = @in_class.full_name res << "::" unless res.empty? res << @name @@ -93,7 +98,7 @@ module RI else fail "Unknown is_class_method" end - list.find_all {|m| m.name[name]} + list.find_all {|m| m.name; m.name[name]} end end @@ -108,6 +113,11 @@ module RI def full_name "" end + + def module_named(name) + + end + end class MethodEntry diff --git a/lib/rdoc/ri/ri_descriptions.rb b/lib/rdoc/ri/ri_descriptions.rb index f99905719b..47984cf41d 100644 --- a/lib/rdoc/ri/ri_descriptions.rb +++ b/lib/rdoc/ri/ri_descriptions.rb @@ -46,6 +46,14 @@ module RI attr_accessor :superclass attr_accessor :includes + # merge in another class desscription into this one + def merge_in(old) + @class_methods.concat(old.class_methods).sort! + @instance_methods.concat(old.instance_methods).sort! + @attributes.concat(old.attributes).sort! + @constants.concat(old.constants).sort! + @includes.concat(old.includes).sort! + end end class MethodDescription < Description diff --git a/lib/rdoc/ri/ri_paths.rb b/lib/rdoc/ri/ri_paths.rb index 14288d9a94..ff8257a546 100644 --- a/lib/rdoc/ri/ri_paths.rb +++ b/lib/rdoc/ri/ri_paths.rb @@ -15,8 +15,8 @@ module RI # # There's contention about all this, but for now: # - # system:: $prefix/lib/ruby//doc/rdoc - # site:: $prefix/lib/ruby/site_dir//doc/rdoc + # system:: $datadir/ri//system/... + # site:: $datadir/ri//site/... # user:: ~/.rdoc module Paths @@ -26,8 +26,11 @@ module RI DOC_DIR = "doc/rdoc" - SYSDIR = File.join(Config::CONFIG['rubylibdir'], DOC_DIR) - SITEDIR = File.join(Config::CONFIG['sitelibdir'], DOC_DIR) + version = Config::CONFIG['ruby_version'] + + base = File.join(Config::CONFIG['datadir'], "ri", version) + SYSDIR = File.join(base, "system") + SITEDIR = File.join(base, "site") homedir = ENV['HOME'] || ENV['USERPROFILE'] || ENV['HOMEPATH'] if homedir @@ -36,6 +39,7 @@ module RI HOMEDIR = nil end + # This is the search path for 'ri' PATH = [ SYSDIR, SITEDIR, HOMEDIR ].find_all {|p| p && File.directory?(p)} end end diff --git a/lib/rdoc/ri/ri_reader.rb b/lib/rdoc/ri/ri_reader.rb index eb56d654fb..dd647b3f89 100644 --- a/lib/rdoc/ri/ri_reader.rb +++ b/lib/rdoc/ri/ri_reader.rb @@ -21,6 +21,16 @@ module RI result end + def find_class_by_name(full_name) + names = full_name.split(/::/) + ns = @cache.toplevel + for name in names + ns = ns.contained_class_named(name) + return nil if ns.nil? + end + get_class(ns) + end + def find_methods(name, is_class_method, namespaces) result = [] namespaces.each do |ns| diff --git a/lib/rdoc/ri/ri_writer.rb b/lib/rdoc/ri/ri_writer.rb index 70468cb1f5..78c68e8409 100644 --- a/lib/rdoc/ri/ri_writer.rb +++ b/lib/rdoc/ri/ri_writer.rb @@ -8,6 +8,19 @@ module RI end + # Convert a name from internal form (containing punctuation) + # to an external form (where punctuation is replaced + # by %xx) + + def RiWriter.internal_to_external(name) + name.gsub(/\W/) { sprintf("%%%02x", $&[0]) } + end + + # And the reverse operation + def RiWriter.external_to_internal(name) + name.gsub(/%([0-9a-f]{2,2})/) { $1.to_i(16).chr } + end + def initialize(base_dir) @base_dir = base_dir end @@ -27,7 +40,8 @@ module RI def add_method(class_desc, method_desc) dir = path_to_dir(class_desc.full_name) - meth_file_name = File.join(dir, method_desc.name) + file_name = RiWriter.internal_to_external(method_desc.name) + meth_file_name = File.join(dir, file_name) if method_desc.is_singleton meth_file_name += "-c.yaml" else diff --git a/pack.c b/pack.c index 848d9fdcae..ab9b4b65e9 100644 --- a/pack.c +++ b/pack.c @@ -408,6 +408,71 @@ static void qpencode _((VALUE,VALUE,long)); static int uv_to_utf8 _((char*,unsigned long)); static unsigned long utf8_to_uv _((char*,long*)); +/* + * call-seq: + * arr.pack ( aTemplateString ) -> aBinaryString + * + * Packs the contents of arr into a binary sequence according to + * the directives in aTemplateString (see the table below) + * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, + * which gives the width of the resulting field. The remaining + * directives also may take a count, indicating the number of array + * elements to convert. If the count is an asterisk + * (``*''), all remaining array elements will be + * converted. Any of the directives ``sSiIlL'' may be + * followed by an underscore (``_'') to use the underlying + * platform's native size for the specified type; otherwise, they use a + * platform-independent size. Spaces are ignored in the template + * string. See also String#unpack. + * + * a = [ "a", "b", "c" ] + * n = [ 65, 66, 67 ] + * a.pack("A3A3A3") #=> "a b c " + * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" + * n.pack("ccc") #=> "ABC" + * + * Directives for +pack+. + * + * Directive Meaning + * --------------------------------------------------------------- + * @ | Moves to absolute position + * A | ASCII string (space padded, count is width) + * a | ASCII string (null padded, count is width) + * B | Bit string (descending bit order) + * b | Bit string (ascending bit order) + * C | Unsigned char + * c | Char + * D, d | Double-precision float, native format + * E | Double-precision float, little-endian byte order + * e | Single-precision float, little-endian byte order + * F, f | Single-precision float, native format + * G | Double-precision float, network (big-endian) byte order + * g | Single-precision float, network (big-endian) byte order + * H | Hex string (high nibble first) + * h | Hex string (low nibble first) + * I | Unsigned integer + * i | Integer + * L | Unsigned long + * l | Long + * M | Quoted printable, MIME encoding (see RFC2045) + * m | Base64 encoded string + * N | Long, network (big-endian) byte order + * n | Short, network (big-endian) byte-order + * P | Pointer to a structure (fixed-length string) + * p | Pointer to a null-terminated string + * Q, q | 64-bit number + * S | Unsigned short + * s | Short + * U | UTF-8 + * u | UU-encoded string + * V | Long, little-endian byte order + * v | Short, little-endian byte order + * w | BER-compressed integer\fnm + * X | Back up a byte + * x | Null byte + * Z | Same as ``A'' + */ + static VALUE pack_pack(ary, fmt) VALUE ary, fmt; @@ -1153,6 +1218,142 @@ infected_str_new(ptr, len, str) return s; } +/* + * call-seq: + * str.unpack(format) => anArray + * + * Decodes str (which may contain binary data) according to the + * format string, returning an array of each value extracted. The + * format string consists of a sequence of single-character directives, + * summarized in the table at the end of this entry. + * Each directive may be followed + * by a number, indicating the number of times to repeat with this + * directive. An asterisk (``*'') will use up all + * remaining elements. The directives sSiIlL may each be + * followed by an underscore (``_'') to use the underlying + * platform's native size for the specified type; otherwise, it uses a + * platform-independent consistent size. Spaces are ignored in the + * format string. See also Array#pack. + * + * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] + * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] + * "aa".unpack('b8B8') #=> ["10000110", "01100001"] + * "aaa".unpack('h2H2c') #=> ["16", "61", 97] + * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] + * "now=20is".unpack('M*') #=> ["now is"] + * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] + * + * This table summarizes the various formats and the Ruby classes + * returned by each. + * + * Format | Returns | Function + * -------+---------+----------------------------------------- + * A | String | with trailing nulls and spaces removed + * -------+---------+----------------------------------------- + * a | String | string + * -------+---------+----------------------------------------- + * B | String | extract bits from each character (msb first) + * -------+---------+----------------------------------------- + * b | String | extract bits from each character (lsb first) + * -------+---------+----------------------------------------- + * C | Fixnum | extract a character as an unsigned integer + * -------+---------+----------------------------------------- + * c | Fixnum | extract a character as an integer + * -------+---------+----------------------------------------- + * d,D | Float | treat sizeof(double) characters as + * | | a native double + * -------+---------+----------------------------------------- + * E | Float | treat sizeof(double) characters as + * | | a double in little-endian byte order + * -------+---------+----------------------------------------- + * e | Float | treat sizeof(float) characters as + * | | a float in little-endian byte order + * -------+---------+----------------------------------------- + * f,F | Float | treat sizeof(float) characters as + * | | a native float + * -------+---------+----------------------------------------- + * G | Float | treat sizeof(double) characters as + * | | a double in network byte order + * -------+---------+----------------------------------------- + * g | Float | treat sizeof(float) characters as a + * | | float in network byte order + * -------+---------+----------------------------------------- + * H | String | extract hex nibbles from each character + * | | (most significant first) + * -------+---------+----------------------------------------- + * h | String | extract hex nibbles from each character + * | | (least significant first) + * -------+---------+----------------------------------------- + * I | Integer | treat sizeof(int) (modified by _) + * | | successive characters as an unsigned + * | | native integer + * -------+---------+----------------------------------------- + * i | Integer | treat sizeof(int) (modified by _) + * | | successive characters as a signed + * | | native integer + * -------+---------+----------------------------------------- + * L | Integer | treat four (modified by _) successive + * | | characters as an unsigned native + * | | long integer + * -------+---------+----------------------------------------- + * l | Integer | treat four (modified by _) successive + * | | characters as a signed native + * | | long integer + * -------+---------+----------------------------------------- + * M | String | quoted-printable + * -------+---------+----------------------------------------- + * m | String | base64-encoded + * -------+---------+----------------------------------------- + * N | Integer | treat four characters as an unsigned + * | | long in network byte order + * -------+---------+----------------------------------------- + * n | Fixnum | treat two characters as an unsigned + * | | short in network byte order + * -------+---------+----------------------------------------- + * P | String | treat sizeof(char *) characters as a + * | | pointer, and return \emph{len} characters + * | | from the referenced location + * -------+---------+----------------------------------------- + * p | String | treat sizeof(char *) characters as a + * | | pointer to a null-terminated string + * -------+---------+----------------------------------------- + * Q | Integer | treat 8 characters as an unsigned + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * q | Integer | treat 8 characters as a signed + * | | quad word (64 bits) + * -------+---------+----------------------------------------- + * S | Fixnum | treat two (different if _ used) + * | | successive characters as an unsigned + * | | short in native byte order + * -------+---------+----------------------------------------- + * s | Fixnum | Treat two (different if _ used) + * | | successive characters as a signed short + * | | in native byte order + * -------+---------+----------------------------------------- + * U | Integer | UTF-8 characters as unsigned integers + * -------+---------+----------------------------------------- + * u | String | UU-encoded + * -------+---------+----------------------------------------- + * V | Fixnum | treat four characters as an unsigned + * | | long in little-endian byte order + * -------+---------+----------------------------------------- + * v | Fixnum | treat two characters as an unsigned + * | | short in little-endian byte order + * -------+---------+----------------------------------------- + * w | Integer | BER-compressed integer (see Array.pack) + * -------+---------+----------------------------------------- + * X | --- | skip backward one character + * -------+---------+----------------------------------------- + * x | --- | skip forward one character + * -------+---------+----------------------------------------- + * Z | String | with trailing nulls removed + * -------+---------+----------------------------------------- + * @ | --- | skip to the offset given by the + * | | length argument + * -------+---------+----------------------------------------- + */ + static VALUE pack_unpack(str, fmt) VALUE str, fmt; -- cgit v1.2.3