summaryrefslogtreecommitdiff
path: root/trunk/lib/pstore.rb
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/lib/pstore.rb')
-rw-r--r--trunk/lib/pstore.rb543
1 files changed, 543 insertions, 0 deletions
diff --git a/trunk/lib/pstore.rb b/trunk/lib/pstore.rb
new file mode 100644
index 0000000000..fdc518eaec
--- /dev/null
+++ b/trunk/lib/pstore.rb
@@ -0,0 +1,543 @@
+# = PStore -- Transactional File Storage for Ruby Objects
+#
+# pstore.rb -
+# originally by matz
+# documentation by Kev Jackson and James Edward Gray II
+# improved by Hongli Lai
+#
+# See PStore for documentation.
+
+
+require "fileutils"
+require "digest/md5"
+require "thread"
+
+#
+# PStore implements a file based persistence mechanism based on a Hash. User
+# code can store hierarchies of Ruby objects (values) into the data store file
+# by name (keys). An object hierarchy may be just a single object. User code
+# may later read values back from the data store or even update data, as needed.
+#
+# The transactional behavior ensures that any changes succeed or fail together.
+# This can be used to ensure that the data store is not left in a transitory
+# state, where some values were updated but others were not.
+#
+# Behind the scenes, Ruby objects are stored to the data store file with
+# Marshal. That carries the usual limitations. Proc objects cannot be
+# marshalled, for example.
+#
+# == Usage example:
+#
+# require "pstore"
+#
+# # a mock wiki object...
+# class WikiPage
+# def initialize( page_name, author, contents )
+# @page_name = page_name
+# @revisions = Array.new
+#
+# add_revision(author, contents)
+# end
+#
+# attr_reader :page_name
+#
+# def add_revision( author, contents )
+# @revisions << { :created => Time.now,
+# :author => author,
+# :contents => contents }
+# end
+#
+# def wiki_page_references
+# [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
+# end
+#
+# # ...
+# end
+#
+# # create a new page...
+# home_page = WikiPage.new( "HomePage", "James Edward Gray II",
+# "A page about the JoysOfDocumentation..." )
+#
+# # then we want to update page data and the index together, or not at all...
+# wiki = PStore.new("wiki_pages.pstore")
+# wiki.transaction do # begin transaction; do all of this or none of it
+# # store page...
+# wiki[home_page.page_name] = home_page
+# # ensure that an index has been created...
+# wiki[:wiki_index] ||= Array.new
+# # update wiki index...
+# wiki[:wiki_index].push(*home_page.wiki_page_references)
+# end # commit changes to wiki data store file
+#
+# ### Some time later... ###
+#
+# # read wiki data...
+# wiki.transaction(true) do # begin read-only transaction, no changes allowed
+# wiki.roots.each do |data_root_name|
+# p data_root_name
+# p wiki[data_root_name]
+# end
+# end
+#
+# == Transaction modes
+#
+# By default, file integrity is only ensured as long as the operating system
+# (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
+# I/O error occurs while PStore is writing to its file, then the file will
+# become corrupted.
+#
+# You can prevent this by setting <em>pstore.ultra_safe = true</em>.
+# However, this results in a minor performance loss, and only works on platforms
+# that support atomic file renames. Please consult the documentation for
+# +ultra_safe+ for details.
+#
+# Needless to say, if you're storing valuable data with PStore, then you should
+# backup the PStore files from time to time.
+class PStore
+ binmode = defined?(File::BINARY) ? File::BINARY : 0
+ RDWR_ACCESS = File::RDWR | File::CREAT | binmode
+ RD_ACCESS = File::RDONLY | binmode
+ WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode
+
+ # The error type thrown by all PStore methods.
+ class Error < StandardError
+ end
+
+ # Whether PStore should do its best to prevent file corruptions, even when under
+ # unlikely-to-occur error conditions such as out-of-space conditions and other
+ # unusual OS filesystem errors. Setting this flag comes at the price in the form
+ # of a performance loss.
+ #
+ # This flag only has effect on platforms on which file renames are atomic (e.g.
+ # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
+ attr_accessor :ultra_safe
+
+ #
+ # To construct a PStore object, pass in the _file_ path where you would like
+ # the data to be stored.
+ #
+ # PStore objects are always reentrant. But if _thread_safe_ is set to true,
+ # then it will become thread-safe at the cost of a minor performance hit.
+ #
+ def initialize(file, thread_safe = false)
+ dir = File::dirname(file)
+ unless File::directory? dir
+ raise PStore::Error, format("directory %s does not exist", dir)
+ end
+ if File::exist? file and not File::readable? file
+ raise PStore::Error, format("file %s not readable", file)
+ end
+ @transaction = false
+ @filename = file
+ @abort = false
+ @ultra_safe = false
+ if @thread_safe
+ @lock = Mutex.new
+ else
+ @lock = DummyMutex.new
+ end
+ end
+
+ # Raises PStore::Error if the calling code is not in a PStore#transaction.
+ def in_transaction
+ raise PStore::Error, "not in transaction" unless @transaction
+ end
+ #
+ # Raises PStore::Error if the calling code is not in a PStore#transaction or
+ # if the code is in a read-only PStore#transaction.
+ #
+ def in_transaction_wr()
+ in_transaction()
+ raise PStore::Error, "in read-only transaction" if @rdonly
+ end
+ private :in_transaction, :in_transaction_wr
+
+ #
+ # Retrieves a value from the PStore file data, by _name_. The hierarchy of
+ # Ruby objects stored under that root _name_ will be returned.
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def [](name)
+ in_transaction
+ @table[name]
+ end
+ #
+ # This method is just like PStore#[], save that you may also provide a
+ # _default_ value for the object. In the event the specified _name_ is not
+ # found in the data store, your _default_ will be returned instead. If you do
+ # not specify a default, PStore::Error will be raised if the object is not
+ # found.
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def fetch(name, default=PStore::Error)
+ in_transaction
+ unless @table.key? name
+ if default == PStore::Error
+ raise PStore::Error, format("undefined root name `%s'", name)
+ else
+ return default
+ end
+ end
+ @table[name]
+ end
+ #
+ # Stores an individual Ruby object or a hierarchy of Ruby objects in the data
+ # store file under the root _name_. Assigning to a _name_ already in the data
+ # store clobbers the old data.
+ #
+ # == Example:
+ #
+ # require "pstore"
+ #
+ # store = PStore.new("data_file.pstore")
+ # store.transaction do # begin transaction
+ # # load some data into the store...
+ # store[:single_object] = "My data..."
+ # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"],
+ # "James Gray" => ["erb.rb", "pstore.rb"] }
+ # end # commit changes to data store file
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction and it cannot
+ # be read-only. It will raise PStore::Error if called at any other time.
+ #
+ def []=(name, value)
+ in_transaction_wr()
+ @table[name] = value
+ end
+ #
+ # Removes an object hierarchy from the data store, by _name_.
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction and it cannot
+ # be read-only. It will raise PStore::Error if called at any other time.
+ #
+ def delete(name)
+ in_transaction_wr()
+ @table.delete name
+ end
+
+ #
+ # Returns the names of all object hierarchies currently in the store.
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def roots
+ in_transaction
+ @table.keys
+ end
+ #
+ # Returns true if the supplied _name_ is currently in the data store.
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def root?(name)
+ in_transaction
+ @table.key? name
+ end
+ # Returns the path to the data store file.
+ def path
+ @filename
+ end
+
+ #
+ # Ends the current PStore#transaction, committing any changes to the data
+ # store immediately.
+ #
+ # == Example:
+ #
+ # require "pstore"
+ #
+ # store = PStore.new("data_file.pstore")
+ # store.transaction do # begin transaction
+ # # load some data into the store...
+ # store[:one] = 1
+ # store[:two] = 2
+ #
+ # store.commit # end transaction here, committing changes
+ #
+ # store[:three] = 3 # this change is never reached
+ # end
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def commit
+ in_transaction
+ @abort = false
+ throw :pstore_abort_transaction
+ end
+ #
+ # Ends the current PStore#transaction, discarding any changes to the data
+ # store.
+ #
+ # == Example:
+ #
+ # require "pstore"
+ #
+ # store = PStore.new("data_file.pstore")
+ # store.transaction do # begin transaction
+ # store[:one] = 1 # this change is not applied, see below...
+ # store[:two] = 2 # this change is not applied, see below...
+ #
+ # store.abort # end transaction here, discard all changes
+ #
+ # store[:three] = 3 # this change is never reached
+ # end
+ #
+ # *WARNING*: This method is only valid in a PStore#transaction. It will
+ # raise PStore::Error if called at any other time.
+ #
+ def abort
+ in_transaction
+ @abort = true
+ throw :pstore_abort_transaction
+ end
+
+ #
+ # Opens a new transaction for the data store. Code executed inside a block
+ # passed to this method may read and write data to and from the data store
+ # file.
+ #
+ # At the end of the block, changes are committed to the data store
+ # automatically. You may exit the transaction early with a call to either
+ # PStore#commit or PStore#abort. See those methods for details about how
+ # changes are handled. Raising an uncaught Exception in the block is
+ # equivalent to calling PStore#abort.
+ #
+ # If _read_only_ is set to +true+, you will only be allowed to read from the
+ # data store during the transaction and any attempts to change the data will
+ # raise a PStore::Error.
+ #
+ # Note that PStore does not support nested transactions.
+ #
+ def transaction(read_only = false, &block) # :yields: pstore
+ value = nil
+ raise PStore::Error, "nested transaction" if @transaction
+ @lock.synchronize do
+ @rdonly = read_only
+ @transaction = true
+ @abort = false
+ file = open_and_lock_file(@filename, read_only)
+ if file
+ begin
+ @table, checksum, original_data_size = load_data(file, read_only)
+
+ catch(:pstore_abort_transaction) do
+ value = yield(self)
+ end
+
+ if !@abort && !read_only
+ save_data(checksum, original_data_size, file)
+ end
+ ensure
+ file.close if !file.closed?
+ end
+ else
+ # This can only occur if read_only == true.
+ @table = {}
+ catch(:pstore_abort_transaction) do
+ value = yield(self)
+ end
+ end
+ end
+ value
+ ensure
+ @transaction = false
+ end
+
+ private
+ # Constant for relieving Ruby's garbage collector.
+ EMPTY_STRING = ""
+ EMPTY_MARSHAL_DATA = Marshal.dump({})
+ EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
+
+ class DummyMutex
+ def synchronize
+ yield
+ end
+ end
+
+ #
+ # Open the specified filename (either in read-only mode or in
+ # read-write mode) and lock it for reading or writing.
+ #
+ # The opened File object will be returned. If _read_only_ is true,
+ # and the file does not exist, then nil will be returned.
+ #
+ # All exceptions are propagated.
+ #
+ def open_and_lock_file(filename, read_only)
+ if read_only
+ begin
+ file = File.new(filename, RD_ACCESS)
+ begin
+ file.flock(File::LOCK_SH)
+ return file
+ rescue
+ file.close
+ raise
+ end
+ rescue Errno::ENOENT
+ return nil
+ end
+ else
+ file = File.new(filename, RDWR_ACCESS)
+ file.flock(File::LOCK_EX)
+ return file
+ end
+ end
+
+ # Load the given PStore file.
+ # If +read_only+ is true, the unmarshalled Hash will be returned.
+ # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
+ # Hash, an MD5 checksum of the data, and the size of the data.
+ def load_data(file, read_only)
+ if read_only
+ begin
+ table = load(file)
+ if !table.is_a?(Hash)
+ raise Error, "PStore file seems to be corrupted."
+ end
+ rescue EOFError
+ # This seems to be a newly-created file.
+ table = {}
+ end
+ table
+ else
+ data = file.read
+ if data.empty?
+ # This seems to be a newly-created file.
+ table = {}
+ checksum = empty_marshal_checksum
+ size = empty_marshal_data.size
+ else
+ table = load(data)
+ checksum = Digest::MD5.digest(data)
+ size = data.size
+ if !table.is_a?(Hash)
+ raise Error, "PStore file seems to be corrupted."
+ end
+ end
+ data.replace(EMPTY_STRING)
+ [table, checksum, size]
+ end
+ end
+
+ def on_windows?
+ is_windows = RUBY_PLATFORM =~ /mswin/ ||
+ RUBY_PLATFORM =~ /mingw/ ||
+ RUBY_PLATFORM =~ /bbcwin/ ||
+ RUBY_PLATFORM =~ /wince/
+ self.class.__send__(:define_method, :on_windows?) do
+ is_windows
+ end
+ is_windows
+ end
+
+ # Check whether Marshal.dump supports the 'canonical' option. This option
+ # makes sure that Marshal.dump always dumps data structures in the same order.
+ # This is important because otherwise, the checksums that we generate may differ.
+ def marshal_dump_supports_canonical_option?
+ begin
+ Marshal.dump(nil, -1, true)
+ result = true
+ rescue
+ result = false
+ end
+ self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do
+ result
+ end
+ result
+ end
+
+ def save_data(original_checksum, original_file_size, file)
+ # We only want to save the new data if the size or checksum has changed.
+ # This results in less filesystem calls, which is good for performance.
+ if marshal_dump_supports_canonical_option?
+ new_data = Marshal.dump(@table, -1, true)
+ else
+ new_data = dump(@table)
+ end
+ new_checksum = Digest::MD5.digest(new_data)
+
+ if new_data.size != original_file_size || new_checksum != original_checksum
+ if @ultra_safe && !on_windows?
+ # Windows doesn't support atomic file renames.
+ save_data_with_atomic_file_rename_strategy(new_data, file)
+ else
+ save_data_with_fast_strategy(new_data, file)
+ end
+ end
+
+ new_data.replace(EMPTY_STRING)
+ end
+
+ def save_data_with_atomic_file_rename_strategy(data, file)
+ temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
+ temp_file = File.new(temp_filename, WR_ACCESS)
+ begin
+ temp_file.flock(File::LOCK_EX)
+ temp_file.write(data)
+ temp_file.flush
+ File.rename(temp_filename, @filename)
+ rescue
+ File.unlink(temp_file) rescue nil
+ raise
+ ensure
+ temp_file.close
+ end
+ end
+
+ def save_data_with_fast_strategy(data, file)
+ file.rewind
+ file.truncate(0)
+ file.write(data)
+ end
+
+
+ # This method is just a wrapped around Marshal.dump
+ # to allow subclass overriding used in YAML::Store.
+ def dump(table) # :nodoc:
+ Marshal::dump(table)
+ end
+
+ # This method is just a wrapped around Marshal.load.
+ # to allow subclass overriding used in YAML::Store.
+ def load(content) # :nodoc:
+ Marshal::load(content)
+ end
+
+ def empty_marshal_data
+ EMPTY_MARSHAL_DATA
+ end
+ def empty_marshal_checksum
+ EMPTY_MARSHAL_CHECKSUM
+ end
+end
+
+# :enddoc:
+
+if __FILE__ == $0
+ db = PStore.new("/tmp/foo")
+ db.transaction do
+ p db.roots
+ ary = db["root"] = [1,2,3,4]
+ ary[1] = [1,1.5]
+ end
+
+ 1000.times do
+ db.transaction do
+ db["root"][0] += 1
+ p db["root"][0]
+ end
+ end
+
+ db.transaction(true) do
+ p db["root"]
+ end
+end