From 0dc342de848a642ecce8db697b8fecd83a63e117 Mon Sep 17 00:00:00 2001 From: yugui Date: Mon, 25 Aug 2008 15:02:05 +0000 Subject: added tag v1_9_0_4 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_9_0_4@18845 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- trunk/lib/pstore.rb | 543 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 543 insertions(+) create mode 100644 trunk/lib/pstore.rb (limited to 'trunk/lib/pstore.rb') diff --git a/trunk/lib/pstore.rb b/trunk/lib/pstore.rb new file mode 100644 index 0000000000..fdc518eaec --- /dev/null +++ b/trunk/lib/pstore.rb @@ -0,0 +1,543 @@ +# = PStore -- Transactional File Storage for Ruby Objects +# +# pstore.rb - +# originally by matz +# documentation by Kev Jackson and James Edward Gray II +# improved by Hongli Lai +# +# See PStore for documentation. + + +require "fileutils" +require "digest/md5" +require "thread" + +# +# PStore implements a file based persistence mechanism based on a Hash. User +# code can store hierarchies of Ruby objects (values) into the data store file +# by name (keys). An object hierarchy may be just a single object. User code +# may later read values back from the data store or even update data, as needed. +# +# The transactional behavior ensures that any changes succeed or fail together. +# This can be used to ensure that the data store is not left in a transitory +# state, where some values were updated but others were not. +# +# Behind the scenes, Ruby objects are stored to the data store file with +# Marshal. That carries the usual limitations. Proc objects cannot be +# marshalled, for example. +# +# == Usage example: +# +# require "pstore" +# +# # a mock wiki object... +# class WikiPage +# def initialize( page_name, author, contents ) +# @page_name = page_name +# @revisions = Array.new +# +# add_revision(author, contents) +# end +# +# attr_reader :page_name +# +# def add_revision( author, contents ) +# @revisions << { :created => Time.now, +# :author => author, +# :contents => contents } +# end +# +# def wiki_page_references +# [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/) +# end +# +# # ... +# end +# +# # create a new page... +# home_page = WikiPage.new( "HomePage", "James Edward Gray II", +# "A page about the JoysOfDocumentation..." ) +# +# # then we want to update page data and the index together, or not at all... +# wiki = PStore.new("wiki_pages.pstore") +# wiki.transaction do # begin transaction; do all of this or none of it +# # store page... +# wiki[home_page.page_name] = home_page +# # ensure that an index has been created... +# wiki[:wiki_index] ||= Array.new +# # update wiki index... +# wiki[:wiki_index].push(*home_page.wiki_page_references) +# end # commit changes to wiki data store file +# +# ### Some time later... ### +# +# # read wiki data... +# wiki.transaction(true) do # begin read-only transaction, no changes allowed +# wiki.roots.each do |data_root_name| +# p data_root_name +# p wiki[data_root_name] +# end +# end +# +# == Transaction modes +# +# By default, file integrity is only ensured as long as the operating system +# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an +# I/O error occurs while PStore is writing to its file, then the file will +# become corrupted. +# +# You can prevent this by setting pstore.ultra_safe = true. +# However, this results in a minor performance loss, and only works on platforms +# that support atomic file renames. Please consult the documentation for +# +ultra_safe+ for details. +# +# Needless to say, if you're storing valuable data with PStore, then you should +# backup the PStore files from time to time. +class PStore + binmode = defined?(File::BINARY) ? File::BINARY : 0 + RDWR_ACCESS = File::RDWR | File::CREAT | binmode + RD_ACCESS = File::RDONLY | binmode + WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode + + # The error type thrown by all PStore methods. + class Error < StandardError + end + + # Whether PStore should do its best to prevent file corruptions, even when under + # unlikely-to-occur error conditions such as out-of-space conditions and other + # unusual OS filesystem errors. Setting this flag comes at the price in the form + # of a performance loss. + # + # This flag only has effect on platforms on which file renames are atomic (e.g. + # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false. + attr_accessor :ultra_safe + + # + # To construct a PStore object, pass in the _file_ path where you would like + # the data to be stored. + # + # PStore objects are always reentrant. But if _thread_safe_ is set to true, + # then it will become thread-safe at the cost of a minor performance hit. + # + def initialize(file, thread_safe = false) + dir = File::dirname(file) + unless File::directory? dir + raise PStore::Error, format("directory %s does not exist", dir) + end + if File::exist? file and not File::readable? file + raise PStore::Error, format("file %s not readable", file) + end + @transaction = false + @filename = file + @abort = false + @ultra_safe = false + if @thread_safe + @lock = Mutex.new + else + @lock = DummyMutex.new + end + end + + # Raises PStore::Error if the calling code is not in a PStore#transaction. + def in_transaction + raise PStore::Error, "not in transaction" unless @transaction + end + # + # Raises PStore::Error if the calling code is not in a PStore#transaction or + # if the code is in a read-only PStore#transaction. + # + def in_transaction_wr() + in_transaction() + raise PStore::Error, "in read-only transaction" if @rdonly + end + private :in_transaction, :in_transaction_wr + + # + # Retrieves a value from the PStore file data, by _name_. The hierarchy of + # Ruby objects stored under that root _name_ will be returned. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def [](name) + in_transaction + @table[name] + end + # + # This method is just like PStore#[], save that you may also provide a + # _default_ value for the object. In the event the specified _name_ is not + # found in the data store, your _default_ will be returned instead. If you do + # not specify a default, PStore::Error will be raised if the object is not + # found. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def fetch(name, default=PStore::Error) + in_transaction + unless @table.key? name + if default == PStore::Error + raise PStore::Error, format("undefined root name `%s'", name) + else + return default + end + end + @table[name] + end + # + # Stores an individual Ruby object or a hierarchy of Ruby objects in the data + # store file under the root _name_. Assigning to a _name_ already in the data + # store clobbers the old data. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # # load some data into the store... + # store[:single_object] = "My data..." + # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"], + # "James Gray" => ["erb.rb", "pstore.rb"] } + # end # commit changes to data store file + # + # *WARNING*: This method is only valid in a PStore#transaction and it cannot + # be read-only. It will raise PStore::Error if called at any other time. + # + def []=(name, value) + in_transaction_wr() + @table[name] = value + end + # + # Removes an object hierarchy from the data store, by _name_. + # + # *WARNING*: This method is only valid in a PStore#transaction and it cannot + # be read-only. It will raise PStore::Error if called at any other time. + # + def delete(name) + in_transaction_wr() + @table.delete name + end + + # + # Returns the names of all object hierarchies currently in the store. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def roots + in_transaction + @table.keys + end + # + # Returns true if the supplied _name_ is currently in the data store. + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def root?(name) + in_transaction + @table.key? name + end + # Returns the path to the data store file. + def path + @filename + end + + # + # Ends the current PStore#transaction, committing any changes to the data + # store immediately. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # # load some data into the store... + # store[:one] = 1 + # store[:two] = 2 + # + # store.commit # end transaction here, committing changes + # + # store[:three] = 3 # this change is never reached + # end + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def commit + in_transaction + @abort = false + throw :pstore_abort_transaction + end + # + # Ends the current PStore#transaction, discarding any changes to the data + # store. + # + # == Example: + # + # require "pstore" + # + # store = PStore.new("data_file.pstore") + # store.transaction do # begin transaction + # store[:one] = 1 # this change is not applied, see below... + # store[:two] = 2 # this change is not applied, see below... + # + # store.abort # end transaction here, discard all changes + # + # store[:three] = 3 # this change is never reached + # end + # + # *WARNING*: This method is only valid in a PStore#transaction. It will + # raise PStore::Error if called at any other time. + # + def abort + in_transaction + @abort = true + throw :pstore_abort_transaction + end + + # + # Opens a new transaction for the data store. Code executed inside a block + # passed to this method may read and write data to and from the data store + # file. + # + # At the end of the block, changes are committed to the data store + # automatically. You may exit the transaction early with a call to either + # PStore#commit or PStore#abort. See those methods for details about how + # changes are handled. Raising an uncaught Exception in the block is + # equivalent to calling PStore#abort. + # + # If _read_only_ is set to +true+, you will only be allowed to read from the + # data store during the transaction and any attempts to change the data will + # raise a PStore::Error. + # + # Note that PStore does not support nested transactions. + # + def transaction(read_only = false, &block) # :yields: pstore + value = nil + raise PStore::Error, "nested transaction" if @transaction + @lock.synchronize do + @rdonly = read_only + @transaction = true + @abort = false + file = open_and_lock_file(@filename, read_only) + if file + begin + @table, checksum, original_data_size = load_data(file, read_only) + + catch(:pstore_abort_transaction) do + value = yield(self) + end + + if !@abort && !read_only + save_data(checksum, original_data_size, file) + end + ensure + file.close if !file.closed? + end + else + # This can only occur if read_only == true. + @table = {} + catch(:pstore_abort_transaction) do + value = yield(self) + end + end + end + value + ensure + @transaction = false + end + + private + # Constant for relieving Ruby's garbage collector. + EMPTY_STRING = "" + EMPTY_MARSHAL_DATA = Marshal.dump({}) + EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA) + + class DummyMutex + def synchronize + yield + end + end + + # + # Open the specified filename (either in read-only mode or in + # read-write mode) and lock it for reading or writing. + # + # The opened File object will be returned. If _read_only_ is true, + # and the file does not exist, then nil will be returned. + # + # All exceptions are propagated. + # + def open_and_lock_file(filename, read_only) + if read_only + begin + file = File.new(filename, RD_ACCESS) + begin + file.flock(File::LOCK_SH) + return file + rescue + file.close + raise + end + rescue Errno::ENOENT + return nil + end + else + file = File.new(filename, RDWR_ACCESS) + file.flock(File::LOCK_EX) + return file + end + end + + # Load the given PStore file. + # If +read_only+ is true, the unmarshalled Hash will be returned. + # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled + # Hash, an MD5 checksum of the data, and the size of the data. + def load_data(file, read_only) + if read_only + begin + table = load(file) + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." + end + rescue EOFError + # This seems to be a newly-created file. + table = {} + end + table + else + data = file.read + if data.empty? + # This seems to be a newly-created file. + table = {} + checksum = empty_marshal_checksum + size = empty_marshal_data.size + else + table = load(data) + checksum = Digest::MD5.digest(data) + size = data.size + if !table.is_a?(Hash) + raise Error, "PStore file seems to be corrupted." + end + end + data.replace(EMPTY_STRING) + [table, checksum, size] + end + end + + def on_windows? + is_windows = RUBY_PLATFORM =~ /mswin/ || + RUBY_PLATFORM =~ /mingw/ || + RUBY_PLATFORM =~ /bbcwin/ || + RUBY_PLATFORM =~ /wince/ + self.class.__send__(:define_method, :on_windows?) do + is_windows + end + is_windows + end + + # Check whether Marshal.dump supports the 'canonical' option. This option + # makes sure that Marshal.dump always dumps data structures in the same order. + # This is important because otherwise, the checksums that we generate may differ. + def marshal_dump_supports_canonical_option? + begin + Marshal.dump(nil, -1, true) + result = true + rescue + result = false + end + self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do + result + end + result + end + + def save_data(original_checksum, original_file_size, file) + # We only want to save the new data if the size or checksum has changed. + # This results in less filesystem calls, which is good for performance. + if marshal_dump_supports_canonical_option? + new_data = Marshal.dump(@table, -1, true) + else + new_data = dump(@table) + end + new_checksum = Digest::MD5.digest(new_data) + + if new_data.size != original_file_size || new_checksum != original_checksum + if @ultra_safe && !on_windows? + # Windows doesn't support atomic file renames. + save_data_with_atomic_file_rename_strategy(new_data, file) + else + save_data_with_fast_strategy(new_data, file) + end + end + + new_data.replace(EMPTY_STRING) + end + + def save_data_with_atomic_file_rename_strategy(data, file) + temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}" + temp_file = File.new(temp_filename, WR_ACCESS) + begin + temp_file.flock(File::LOCK_EX) + temp_file.write(data) + temp_file.flush + File.rename(temp_filename, @filename) + rescue + File.unlink(temp_file) rescue nil + raise + ensure + temp_file.close + end + end + + def save_data_with_fast_strategy(data, file) + file.rewind + file.truncate(0) + file.write(data) + end + + + # This method is just a wrapped around Marshal.dump + # to allow subclass overriding used in YAML::Store. + def dump(table) # :nodoc: + Marshal::dump(table) + end + + # This method is just a wrapped around Marshal.load. + # to allow subclass overriding used in YAML::Store. + def load(content) # :nodoc: + Marshal::load(content) + end + + def empty_marshal_data + EMPTY_MARSHAL_DATA + end + def empty_marshal_checksum + EMPTY_MARSHAL_CHECKSUM + end +end + +# :enddoc: + +if __FILE__ == $0 + db = PStore.new("/tmp/foo") + db.transaction do + p db.roots + ary = db["root"] = [1,2,3,4] + ary[1] = [1,1.5] + end + + 1000.times do + db.transaction do + db["root"][0] += 1 + p db["root"][0] + end + end + + db.transaction(true) do + p db["root"] + end +end -- cgit v1.2.3