diff --git a/.travis.yml b/.travis.yml index 531cb49..b9833e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,13 @@ +sudo: false language: ruby rvm: - 1.9.3 - 2.0.0 - - 2.1.1 + - 2.1.10 + - 2.2.8 + - 2.3.4 + - 2.4.2 script: "bundle exec rake spec" + +before_install: + - gem install bundler diff --git a/Gemfile b/Gemfile index 3506a0e..375affa 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source "http://rubygems.org" gemspec group :test do - gem 'rake' + gem 'rake', '< 11' end diff --git a/README.md b/README.md index 1b84b25..d80b62f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ HashDiff is a ruby library to compute the smallest difference between two hashes. -**Demo**: [HashDiff](http://hashdiff.herokuapp.com/) +It also supports comparing two arrays. + +HashDiff does not monkey-patch any existing class. All features are contained inside the `HashDiff` module. **Docs**: [Documentation](http://rubydoc.info/gems/hashdiff) @@ -70,8 +72,8 @@ diff.should == [['-', 'a[0].x', 2], ['-', 'a[0].z', 4], ['-', 'a[1].y', 22], ['- patch example: ```ruby -a = {a: 3} -b = {a: {a1: 1, a2: 2}} +a = {'a' => 3} +b = {'a' => {'a1' => 1, 'a2' => 2}} diff = HashDiff.diff(a, b) HashDiff.patch!(a, diff).should == b @@ -80,8 +82,8 @@ HashDiff.patch!(a, diff).should == b unpatch example: ```ruby -a = [{a: 1, b: 2, c: 3, d: 4, e: 5}, {x: 5, y: 6, z: 3}, 1] -b = [1, {a: 1, b: 2, c: 3, e: 5}] +a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, {'x' => 5, 'y' => 6, 'z' => 3}, 1] +b = [1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}] diff = HashDiff.diff(a, b) # diff two array is OK HashDiff.unpatch!(b, diff).should == a @@ -89,7 +91,9 @@ HashDiff.unpatch!(b, diff).should == a ### Options -There are five options available: `:delimiter`, `:similarity`, `:strict`, `:numeric_tolerance` and `:strip`. +There are eight options available: `:delimiter`, `:similarity`, +`:strict`, `:numeric_tolerance`, `:strip`, `:case_insensitive`, `:array_path` +and `:use_lcs` #### `:delimiter` @@ -135,6 +139,73 @@ diff = HashDiff.diff(a, b, :comparison => { :numeric_tolerance => 0.1, :strip => diff.should == [["~", "x", 5, 6]] ``` +#### `:case_insensitive` + +The :case_insensitive option makes string comparisons ignore case. + +```ruby +a = {x:5, s:'FooBar'} +b = {x:6, s:'foobar'} + +diff = HashDiff.diff(a, b, :comparison => { :numeric_tolerance => 0.1, :case_insensitive => true }) +diff.should == [["~", "x", 5, 6]] +``` + +#### `:array_path` + +The :array_path option represents the path of the diff in an array rather than +a string. This can be used to show differences in between hash key types and +is useful for `patch!` when used on hashes without string keys. + +```ruby +a = {x:5} +b = {'x'=>6} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [['-', [:x], 5], ['+', ['x'], 6]] +``` + +For cases where there are arrays in paths their index will be added to the path. +```ruby +a = {x:[0,1]} +b = {x:[0,2]} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [["-", [:x, 1], 1], ["+", [:x, 1], 2]] +``` + +This shouldn't cause problems if you are comparing an array with a hash: + +```ruby +a = {x:{0=>1}} +b = {x:[1]} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [["~", [:a], [1], {0=>1}]] +``` + +#### `:use_lcs` + +The :use_lcs option is used to specify whether a +[Longest common subsequence](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) +(LCS) algorithm is used to determine differences in arrays. This defaults to +`true` but can be changed to `false` for significantly faster array comparisons +(O(n) complexity rather than O(n2) for LCS). + +When :use_lcs is false the results of array comparisons have a tendency to +show changes at indexes rather than additions and subtractions when :use_lcs is +true. + +Note, currently the :similarity option has no effect when :use_lcs is false. + +```ruby +a = {x: [0, 1, 2]} +b = {x: [0, 2, 2, 3]} + +diff = HashDiff.diff(a, b, :use_lcs => false) +diff.should == [["~", "x[1]", 1, 2], ["+", "x[3]", 3]] +``` + #### Specifying a custom comparison method It's possible to specify how the values of a key should be compared. @@ -171,6 +242,8 @@ diff.should == [["~", "a", "car", "bus"], ["~", "b[1]", "plane", " plan"], ["-", When a comparison block is given, it'll be given priority over other specified options. If the block returns value other than `true` or `false`, then the two values will be compared with other specified options. +When used in conjunction with the `array_path` option, the path passed in as an argument will be an array. When determining the ordering of an array a key of `"*"` will be used in place of the `key[*]` field. It is possible, if you have hashes with integer or `"*"` keys, to have problems distinguishing between arrays and hashes - although this shouldn't be an issue unless your data is very difficult to predict and/or your custom rules are very specific. + #### Sorting arrays before comparison An order difference alone between two arrays can create too many diffs to be useful. Consider sorting them prior to diffing. @@ -186,23 +259,6 @@ b[:b].sort! HashDiff.diff(a, b) => [] ``` -### Special use cases - -#### Using HashDiff on JSON API results - -```ruby -require 'uri' -require 'net/http' -require 'json' - -uri = URI('http://time.jsontest.com/') -json_resp = ->(uri) { JSON.parse(Net::HTTP.get_response(uri).body) } -a = json_resp.call(uri) -b = json_resp.call(uri) - -HashDiff.diff(a,b) => [["~", "milliseconds_since_epoch", 1410542545874, 1410542545985]] -``` - ## License HashDiff is distributed under the MIT-LICENSE. diff --git a/changelog.md b/changelog.md index a48905c..27879b7 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,41 @@ # Change Log +## v0.3.7 2017-10-08 + +* remove 1.8.7 support from gemspec #39 + +## v0.3.6 2017-08-22 + +* add option `use_lcs` #35 + +## v0.3.5 2017-08-06 + +* add option `array_path` #34 + +## v0.3.4 2017-05-01 + +* performance improvement of HashDiff#similar? #31 + +## v0.3.2 2016-12-27 + +* replace `Fixnum` by `Integer` #28 + +## v0.3.1 2016-11-24 + +* fix an error when a hash has mixed types #26 + +## v0.3.0 2016-2-11 + +* support `:case_insensitive` option + +## v0.2.3 2015-11-5 + +* improve performance of LCS algorithm #12 + +## v0.2.2 2014-10-6 + +* make library 1.8.7 compatible + ## v0.2.1 2014-7-13 * yield added/deleted keys for custom comparison diff --git a/hashdiff.gemspec b/hashdiff.gemspec index 251b504..c50bd0d 100644 --- a/hashdiff.gemspec +++ b/hashdiff.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |s| s.test_files = `git ls-files -- Appraisals {spec}/*`.split("\n") s.require_paths = ['lib'] - s.required_ruby_version = Gem::Requirement.new(">= 1.8.7") + s.required_ruby_version = Gem::Requirement.new(">= 1.9.3") s.authors = ["Liu Fengyun"] s.email = ["liufengyunchina@gmail.com"] diff --git a/lib/hashdiff.rb b/lib/hashdiff.rb index 08e4936..1052d0d 100644 --- a/lib/hashdiff.rb +++ b/lib/hashdiff.rb @@ -1,5 +1,6 @@ require 'hashdiff/util' require 'hashdiff/lcs' +require 'hashdiff/linear_compare_array' require 'hashdiff/diff' require 'hashdiff/patch' require 'hashdiff/version' diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 65ecdf1..c7cd482 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -7,10 +7,12 @@ module HashDiff # @param [Array, Hash] obj1 # @param [Array, Hash] obj2 # @param [Hash] options the options to use when comparing - # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Fixnum, Float, BigDecimal to each other + # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing + # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. + # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -27,15 +29,15 @@ module HashDiff def self.best_diff(obj1, obj2, options = {}, &block) options[:comparison] = block if block_given? - opts = {similarity: 0.3}.merge!(options) + opts = { :similarity => 0.3 }.merge!(options) diffs_1 = diff(obj1, obj2, opts) count_1 = count_diff diffs_1 - opts = {similarity: 0.5}.merge!(options) + opts = { :similarity => 0.5 }.merge!(options) diffs_2 = diff(obj1, obj2, opts) count_2 = count_diff diffs_2 - opts = {similarity: 0.8}.merge!(options) + opts = { :similarity => 0.8 }.merge!(options) diffs_3 = diff(obj1, obj2, opts) count_3 = count_diff diffs_3 @@ -48,11 +50,14 @@ def self.best_diff(obj1, obj2, options = {}, &block) # @param [Array, Hash] obj1 # @param [Array, Hash] obj2 # @param [Hash] options the options to use when comparing - # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Fixnum, Float, BigDecimal to each other + # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other # * :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}. # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing + # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. + # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. + # # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -74,9 +79,13 @@ def self.diff(obj1, obj2, options = {}, &block) :delimiter => '.', :strict => true, :strip => false, - :numeric_tolerance => 0 + :numeric_tolerance => 0, + :array_path => false, + :use_lcs => true }.merge!(options) + opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == '' + opts[:comparison] = block if block_given? # prefer to compare with provided block @@ -103,62 +112,59 @@ def self.diff(obj1, obj2, options = {}, &block) end result = [] - if obj1.is_a?(Array) - changeset = diff_array(obj1, obj2, opts) do |lcs| + if obj1.is_a?(Array) && opts[:use_lcs] + changeset = diff_array_lcs(obj1, obj2, opts) do |lcs| # use a's index for similarity lcs.each do |pair| - result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(prefix: "#{opts[:prefix]}[#{pair[0]}]"))) + prefix = prefix_append_array_index(opts[:prefix], pair[0], opts) + result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => prefix))) end end changeset.each do |change| + change_key = prefix_append_array_index(opts[:prefix], change[1], opts) if change[0] == '-' - result << ['-', "#{opts[:prefix]}[#{change[1]}]", change[2]] + result << ['-', change_key, change[2]] elsif change[0] == '+' - result << ['+', "#{opts[:prefix]}[#{change[1]}]", change[2]] + result << ['+', change_key, change[2]] end end + elsif obj1.is_a?(Array) && !opts[:use_lcs] + result.concat(LinearCompareArray.call(obj1, obj2, opts)) elsif obj1.is_a?(Hash) - if opts[:prefix].empty? - prefix = "" - else - prefix = "#{opts[:prefix]}#{opts[:delimiter]}" - end - deleted_keys = [] - common_keys = [] - - obj1.each do |k, v| - if obj2.key?(k) - common_keys << k - else - deleted_keys << k - end - end + deleted_keys = obj1.keys - obj2.keys + common_keys = obj1.keys & obj2.keys + added_keys = obj2.keys - obj1.keys # add deleted properties - deleted_keys.each do |k| - custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", obj1[k], nil) + deleted_keys.sort_by{|k,v| k.to_s }.each do |k| + change_key = prefix_append_key(opts[:prefix], k, opts) + custom_result = custom_compare(opts[:comparison], change_key, obj1[k], nil) if custom_result result.concat(custom_result) else - result << ['-', "#{prefix}#{k}", obj1[k]] + result << ['-', change_key, obj1[k]] end end # recursive comparison for common keys - common_keys.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(prefix: "#{prefix}#{k}"))) } + common_keys.sort_by{|k,v| k.to_s }.each do |k| + prefix = prefix_append_key(opts[:prefix], k, opts) + result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => prefix))) + end # added properties - obj2.each do |k, v| + added_keys.sort_by{|k,v| k.to_s }.each do |k| + change_key = prefix_append_key(opts[:prefix], k, opts) unless obj1.key?(k) - custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", nil, v) + custom_result = custom_compare(opts[:comparison], change_key, nil, obj2[k]) if custom_result result.concat(custom_result) else - result << ['+', "#{prefix}#{k}", obj2[k]] + result << ['+', change_key, obj2[k]] end end end @@ -173,7 +179,7 @@ def self.diff(obj1, obj2, options = {}, &block) # @private # # diff array using LCS algorithm - def self.diff_array(a, b, options = {}) + def self.diff_array_lcs(a, b, options = {}) opts = { :prefix => '', :similarity => 0.8, @@ -226,5 +232,4 @@ def self.diff_array(a, b, options = {}) change_set end - end diff --git a/lib/hashdiff/lcs.rb b/lib/hashdiff/lcs.rb index 88c31c9..6da8e42 100644 --- a/lib/hashdiff/lcs.rb +++ b/lib/hashdiff/lcs.rb @@ -6,7 +6,7 @@ module HashDiff def self.lcs(a, b, options = {}) opts = { :similarity => 0.8 }.merge!(options) - opts[:prefix] = "#{opts[:prefix]}[*]" + opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts) return [] if a.size == 0 or b.size == 0 @@ -16,9 +16,9 @@ def self.lcs(a, b, options = {}) vector = [] lcs = [] - (0..b_finish).each do |bi| + (b_start..b_finish).each do |bi| lcs[bi] = [] - (0..a_finish).each do |ai| + (a_start..a_finish).each do |ai| if similar?(a[ai], b[bi], opts) topleft = (ai > 0 and bi > 0)? lcs[bi-1][ai-1][1] : 0 lcs[bi][ai] = [:topleft, topleft + 1] diff --git a/lib/hashdiff/linear_compare_array.rb b/lib/hashdiff/linear_compare_array.rb new file mode 100644 index 0000000..c2933b6 --- /dev/null +++ b/lib/hashdiff/linear_compare_array.rb @@ -0,0 +1,155 @@ +module HashDiff + # @private + # + # Used to compare arrays in a linear complexity, which produces longer diffs + # than using the lcs algorithm but is considerably faster + class LinearCompareArray + def self.call(old_array, new_array, options = {}) + instance = self.new(old_array, new_array, options) + instance.call + end + + def call + return [] if old_array.empty? && new_array.empty? + + self.old_index = 0 + self.new_index = 0 + # by comparing the array lengths we can expect that a number of items + # are either added or removed + self.expected_additions = new_array.length - old_array.length + + loop do + if extra_items_in_old_array? + append_deletion(old_array[old_index], old_index) + elsif extra_items_in_new_array? + append_addition(new_array[new_index], new_index) + else + compare_at_index + end + + self.old_index = old_index + 1 + self.new_index = new_index + 1 + break if iterated_through_both_arrays? + end + + changes + end + + private + + attr_reader :old_array, :new_array, :options, :additions, :deletions, :differences + attr_accessor :old_index, :new_index, :expected_additions + + def initialize(old_array, new_array, options) + @old_array = old_array + @new_array = new_array + @options = { prefix: '' }.merge!(options) + + @additions = [] + @deletions = [] + @differences = [] + end + + def extra_items_in_old_array? + old_index < old_array.length && new_index >= new_array.length + end + + def extra_items_in_new_array? + new_index < new_array.length && old_index >= old_array.length + end + + def iterated_through_both_arrays? + old_index >= old_array.length && new_index >= new_array.length + end + + def compare_at_index + difference = item_difference(old_array[old_index], new_array[new_index], old_index) + return if difference.empty? + + index_after_additions = index_of_match_after_additions + append_addititions_before_match(index_after_additions) + + index_after_deletions = index_of_match_after_deletions + append_deletions_before_match(index_after_deletions) + + match = index_after_additions || index_after_deletions + + append_differences(difference) unless match + end + + def item_difference(old_item, new_item, item_index) + prefix = HashDiff.prefix_append_array_index(options[:prefix], item_index, options) + HashDiff.diff(old_item, new_item, options.merge(:prefix => prefix)) + end + + # look ahead in the new array to see if the current item appears later + # thereby having new items added + def index_of_match_after_additions + return unless expected_additions > 0 + + (1..expected_additions).each do |i| + next_difference = item_difference( + old_array[old_index], + new_array[new_index + i], + old_index + ) + + return new_index + i if next_difference.empty? + end + + nil + end + + # look ahead in the old array to see if the current item appears later + # thereby having items removed + def index_of_match_after_deletions + return unless expected_additions < 0 + + (1..(expected_additions.abs)).each do |i| + next_difference = item_difference( + old_array[old_index + i], + new_array[new_index], + old_index + ) + + return old_index + i if next_difference.empty? + end + + nil + end + + def append_addititions_before_match(match_index) + return unless match_index + (new_index...match_index).each { |i| append_addition(new_array[i], i) } + self.expected_additions = expected_additions - (match_index - new_index) + self.new_index = match_index + end + + def append_deletions_before_match(match_index) + return unless match_index + (old_index...match_index).each { |i| append_deletion(old_array[i], i) } + self.expected_additions = expected_additions + (match_index - new_index) + self.old_index = match_index + end + + def append_addition(item, index) + key = HashDiff.prefix_append_array_index(options[:prefix], index, options) + additions << ['+', key, item] + end + + def append_deletion(item, index) + key = HashDiff.prefix_append_array_index(options[:prefix], index, options) + deletions << ['-', key, item] + end + + def append_differences(difference) + differences.concat(difference) + end + + def changes + # this algorithm only allows there to be additions or deletions + # deletions are reverse so they don't change the index of earlier items + differences + additions + deletions.reverse + end + end +end diff --git a/lib/hashdiff/patch.rb b/lib/hashdiff/patch.rb index 3c184c4..bb9a036 100644 --- a/lib/hashdiff/patch.rb +++ b/lib/hashdiff/patch.rb @@ -1,4 +1,4 @@ -# +# # This module provides methods to diff two hash, patch and unpatch hash # module HashDiff @@ -17,19 +17,21 @@ def self.patch!(obj, changes, options = {}) delimiter = options[:delimiter] || '.' changes.each do |change| - parts = decode_property_path(change[1], delimiter) + parts = change[1] + parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array) + last_part = parts.last parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Fixnum) + if parent_node.is_a?(Array) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] end elsif change[0] == '-' - if last_part.is_a?(Fixnum) + if parent_node.is_a?(Array) parent_node.delete_at(last_part) else parent_node.delete(last_part) @@ -56,19 +58,21 @@ def self.unpatch!(obj, changes, options = {}) delimiter = options[:delimiter] || '.' changes.reverse_each do |change| - parts = decode_property_path(change[1], delimiter) + parts = change[1] + parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array) + last_part = parts.last parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Fixnum) + if parent_node.is_a?(Array) parent_node.delete_at(last_part) else parent_node.delete(last_part) end elsif change[0] == '-' - if last_part.is_a?(Fixnum) + if parent_node.is_a?(Array) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] diff --git a/lib/hashdiff/util.rb b/lib/hashdiff/util.rb index 6d5aeac..ace3b7d 100644 --- a/lib/hashdiff/util.rb +++ b/lib/hashdiff/util.rb @@ -7,6 +7,7 @@ def self.similar?(a, b, options = {}) if(a.is_a?(Hash) && b.is_a?(Hash) && a["id"].present? && b["id"].present?) return a["id"] == b["id"] end + return compare_values(a, b, options) unless a.is_a?(Array) || a.is_a?(Hash) || b.is_a?(Array) || b.is_a?(Hash) opts = { :similarity => 0.8 }.merge(options) count_a = count_nodes(a) @@ -57,19 +58,17 @@ def self.count_nodes(obj) # # e.g. "a.b[3].c" => ['a', 'b', 3, 'c'] def self.decode_property_path(path, delimiter='.') - parts = path.split(delimiter).collect do |part| - if part =~ /^(\w*)\[(\d+)\]$/ + path.split(delimiter).inject([]) do |memo, part| + if part =~ /^(.*)\[(\d+)\]$/ if $1.size > 0 - [$1, $2.to_i] + memo + [$1, $2.to_i] else - $2.to_i + memo + [$2.to_i] end else - part + memo + [part] end end - - parts.flatten end # @private @@ -93,9 +92,13 @@ def self.compare_values(obj1, obj2, options = {}) end if options[:strip] == true - first = obj1.strip if obj1.respond_to?(:strip) - second = obj2.strip if obj2.respond_to?(:strip) - return first == second + obj1 = obj1.strip if obj1.respond_to?(:strip) + obj2 = obj2.strip if obj2.respond_to?(:strip) + end + + if options[:case_insensitive] == true + obj1 = obj1.downcase if obj1.respond_to?(:downcase) + obj2 = obj2.downcase if obj2.respond_to?(:downcase) end obj1 == obj2 @@ -127,4 +130,20 @@ def self.custom_compare(method, key, obj1, obj2) end end end + + def self.prefix_append_key(prefix, key, opts) + if opts[:array_path] + prefix + [key] + else + prefix.empty? ? "#{key}" : "#{prefix}#{opts[:delimiter]}#{key}" + end + end + + def self.prefix_append_array_index(prefix, array_index, opts) + if opts[:array_path] + prefix + [array_index] + else + "#{prefix}[#{array_index}]" + end + end end diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 01b8ec0..2dd32ef 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.2.1' + VERSION = '0.3.7' end diff --git a/spec/hashdiff/best_diff_spec.rb b/spec/hashdiff/best_diff_spec.rb index c444e88..9d9eddf 100644 --- a/spec/hashdiff/best_diff_spec.rb +++ b/spec/hashdiff/best_diff_spec.rb @@ -62,4 +62,13 @@ ['+', 'menu.popup.menuitem[1]', {"value" => "Open", "onclick" => "OpenDoc()"}] ] end + + it "should be able to have an array_path specified" do + a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]} + b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] } + + diff = HashDiff.best_diff(a, b, :array_path => true) + diff.should == [["-", ["x", 0, "c"], 3], ["+", ["x", 0, "b"], 2], ["-", ["x", 1], {"y"=>3}]] + end + end diff --git a/spec/hashdiff/diff_array_spec.rb b/spec/hashdiff/diff_array_spec.rb index c22af75..827226f 100644 --- a/spec/hashdiff/diff_array_spec.rb +++ b/spec/hashdiff/diff_array_spec.rb @@ -5,7 +5,7 @@ a = [1, 2, 3] b = [1, 2, 3] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [] end @@ -13,7 +13,7 @@ a = [1, 2, 3] b = [1, 8, 7] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 2, 3], ['-', 1, 2], ['+', 1, 8], ['+', 2, 7]] end @@ -21,7 +21,7 @@ a = [1, 2] b = [] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 1, 2], ['-', 0, 1]] end @@ -29,7 +29,7 @@ a = [] b = [1, 2] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['+', 0, 1], ['+', 1, 2]] end @@ -37,7 +37,7 @@ a = [1, 3, 5, 7] b = [2, 3, 7, 5] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 0, 1], ['+', 0, 2], ['+', 2, 7], ['-', 4, 7]] end @@ -45,14 +45,14 @@ a = [1, 3, 4, 7] b = [2, 3, 7, 5] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 0, 1], ['+', 0, 2], ['-', 2, 4], ['+', 3, 5]] end it "should be able to diff two arrays with similar elements" do a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, 3] b = [1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['+', 0, 1], ['-', 2, 3]] end diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 315d485..62caf3c 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -7,7 +7,7 @@ end it "should be able to diff an hash with an empty hash" do - a = {a:3, b:2} + a = { 'a' => 3, 'b' => 2 } b = {} diff = HashDiff.diff(a, b) @@ -18,32 +18,54 @@ end it "should be able to diff two equal hashes" do - diff = HashDiff.diff({a:2, b:2}, {a:2, b:2}) + diff = HashDiff.diff({ 'a' => 2, 'b' => 2}, { 'a' => 2, 'b' => 2 }) diff.should == [] end + it "should be able to diff two equal hashes with mixed key types" do + a = { 'a' => 1, :b => 1 } + diff = HashDiff.diff(a, a) + diff.should == [] + end + + it "should be able to diff if mixed key types are removed" do + a = { 'a' => 1, :b => 1 } + b = {} + diff = HashDiff.diff(a, b) + diff.should == [["-", "a", 1], ["-", "b", 1]] + end + + it "should be able to diff if mixed key types are added" do + a = { 'a' => 1, :b => 1 } + b = {} + diff = HashDiff.diff(b, a) + diff.should == [["+", "a", 1], ["+", "b", 1]] + end + it "should be able to diff two hashes with equivalent numerics, when strict is false" do - diff = HashDiff.diff({a:2.0, b:2}, {a:2, b:2.0}, :strict => false) + diff = HashDiff.diff({ 'a' => 2.0, 'b' => 2 }, { 'a' => 2, 'b' => 2.0 }, :strict => false) diff.should == [] end it "should be able to diff changes in hash value" do - diff = HashDiff.diff({a:2, b:3, c:" hello"}, {a:2, b:4, c:"hello"}) + diff = HashDiff.diff({ 'a' => 2, 'b' => 3, 'c' => " hello" }, { 'a' => 2, 'b' => 4, 'c' => "hello" }) diff.should == [['~', 'b', 3, 4], ['~', 'c', " hello", "hello"]] end it "should be able to diff changes in hash value which is array" do - diff = HashDiff.diff({a:2, b:[1, 2, 3]}, {a:2, b:[1, 3, 4]}) + diff = HashDiff.diff({ 'a' => 2, 'b' => [1, 2, 3] }, { 'a' => 2, 'b' => [1, 3, 4]}) diff.should == [['-', 'b[1]', 2], ['+', 'b[2]', 4]] end it "should be able to diff changes in hash value which is hash" do - diff = HashDiff.diff({a:{x:2, y:3, z:4}, b:{x:3, z:45}}, {a:{y:3}, b:{y:3, z:30}}) + diff = HashDiff.diff({ 'a' => { 'x' => 2, 'y' => 3, 'z' => 4 }, 'b' => { 'x' => 3, 'z' => 45 } }, + { 'a' => { 'y' => 3 }, 'b' => { 'y' => 3, 'z' => 30 } }) diff.should == [['-', 'a.x', 2], ['-', 'a.z', 4], ['-', 'b.x', 3], ['~', 'b.z', 45, 30], ['+', 'b.y', 3]] end it "should be able to diff similar objects in array" do - diff = HashDiff.best_diff({a:[{x:2, y:3, z:4}, {x:11, y:22, z:33}], b:{x:3, z:45}}, {a:[{y:3}, {x:11, z:33}], b:{y:22}}) + diff = HashDiff.best_diff({ 'a' => [{ 'x' => 2, 'y' => 3, 'z' => 4 }, { 'x' => 11, 'y' => 22, 'z' => 33 }], 'b' => { 'x' => 3, 'z' => 45 } }, + { 'a' => [{ 'y' => 3 }, { 'x' => 11, 'z' => 33 }], 'b' => { 'y' => 22 } }) diff.should == [['-', 'a[0].x', 2], ['-', 'a[0].z', 4], ['-', 'a[1].y', 22], ['-', 'b.x', 3], ['-', 'b.z', 45], ['+', 'b.y', 22]] end @@ -137,7 +159,7 @@ a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, {'x' => 5, 'y' => 6, 'z' => 3}, 3] b = [{'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}, 3] - diff = HashDiff.diff(a, b, similarity: 0.8, delimiter: "\t") + diff = HashDiff.diff(a, b, :similarity => 0.8, :delimiter => "\t") diff.should == [["-", "[0]\td", 4], ["-", "[1]", {"x"=>5, "y"=>6, "z"=>3}]] end @@ -167,32 +189,57 @@ context 'when :strip requested' do it "should strip strings before comparing" do - a = {a:" foo", b:"fizz buzz"} - b = {a:"foo", b:"fizzbuzz"} + a = { 'a' => " foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzbuzz"} diff = HashDiff.diff(a, b, :strip => true) diff.should == [['~', 'b', "fizz buzz", "fizzbuzz"]] end it "should strip nested strings before comparing" do - a = {a:{x:" foo"}, b:["fizz buzz", "nerf"]} - b = {a:{x:"foo"}, b:["fizzbuzz", "nerf"]} + a = { 'a' => { 'x' => " foo" }, 'b' => ["fizz buzz", "nerf"] } + b = { 'a' => { 'x' => "foo" }, 'b' => ["fizzbuzz", "nerf"] } diff = HashDiff.diff(a, b, :strip => true) diff.should == [['-', 'b[0]', "fizz buzz"], ['+', 'b[0]', "fizzbuzz"]] end end + context 'when :case_insensitive requested' do + it "should strip strings before comparing" do + a = { 'a' => "Foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzBuzz"} + diff = HashDiff.diff(a, b, :case_insensitive => true) + diff.should == [['~', 'b', "fizz buzz", "fizzBuzz"]] + end + + it "should ignore case on nested strings before comparing" do + a = { 'a' => { 'x' => "Foo" }, 'b' => ["fizz buzz", "nerf"] } + b = { 'a' => { 'x' => "foo" }, 'b' => ["fizzbuzz", "nerf"] } + diff = HashDiff.diff(a, b, :case_insensitive => true) + diff.should == [['-', 'b[0]', "fizz buzz"], ['+', 'b[0]', "fizzbuzz"]] + end + end + context 'when both :strip and :numeric_tolerance requested' do it 'should apply filters to proper object types' do - a = {a:" foo", b:35, c:'bar', d:'baz'} - b = {a:"foo", b:35.005, c:'bar', d:18.5} + a = { 'a' => " foo", 'b' => 35, 'c' => 'bar', 'd' => 'baz' } + b = { 'a' => "foo", 'b' => 35.005, 'c' => 'bar', 'd' => 18.5} diff = HashDiff.diff(a, b, :strict => false, :numeric_tolerance => 0.01, :strip => true) diff.should == [['~', 'd', "baz", 18.5]] end end + context "when both :strip and :case_insensitive requested" do + it "should apply both filters to strings" do + a = { 'a' => " Foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzBuzz"} + diff = HashDiff.diff(a, b, :case_insensitive => true, :strip => true) + diff.should == [['~', 'b', "fizz buzz", "fizzBuzz"]] + end + end + context 'with custom comparison' do - let(:a) { {a:'car', b:'boat', c:'plane'} } - let(:b) { {a:'bus', b:'truck', c:' plan'} } + let(:a) { { 'a' => 'car', 'b' => 'boat', 'c' => 'plane'} } + let(:b) { { 'a' => 'bus', 'b' => 'truck', 'c' => ' plan'} } it 'should compare using proc specified in block' do diff = HashDiff.diff(a, b) do |prefix, obj1, obj2| @@ -205,8 +252,8 @@ end it 'should yield added keys' do - x = {a:'car', b:'boat'} - y = {a:'car'} + x = { 'a' => 'car', 'b' => 'boat'} + y = { 'a' => 'car' } diff = HashDiff.diff(x, y) do |prefix, obj1, obj2| case prefix @@ -227,4 +274,66 @@ diff.should == [['~', 'b', 'boat', 'truck'], ['~', 'c', 'plane', ' plan']] end end + + context 'when :array_path is true' do + it 'should return the diff path in an array rather than a string' do + x = { 'a' => 'foo' } + y = { 'a' => 'bar' } + diff = HashDiff.diff(x, y, :array_path => true) + + diff.should == [['~', ['a'], 'foo', 'bar']] + end + + it 'should show array indexes in paths' do + x = { 'a' => [0, 1, 2] } + y = { 'a' => [0, 1, 2, 3] } + + diff = HashDiff.diff(x, y, :array_path => true) + + diff.should == [['+', ['a', 3], 3]] + end + + it 'should show differences with string and symbol keys' do + x = { 'a' => 'foo' } + y = { :a => 'bar' } + + diff = HashDiff.diff(x, y, :array_path => true) + diff.should == [['-', ['a'], 'foo'], ['+', [:a], 'bar']] + end + + it 'should support other key types' do + time = Time.now + x = { time => 'foo' } + y = { 0 => 'bar' } + + diff = HashDiff.diff(x, y, :array_path => true) + diff.should == [['-', [time], 'foo'], ['+', [0], 'bar']] + end + end + + context 'when :use_lcs is false' do + it 'should show items in an array as changed' do + x = [:a, :b] + y = [:c, :d] + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['~', '[0]', :a, :c], ['~', '[1]', :b, :d]] + end + + it 'should show additions to arrays' do + x = { :a => [0] } + y = { :a => [0, 1] } + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['+', 'a[1]', 1]] + end + + it 'shows changes to nested arrays' do + x = { :a => [[0, 1]] } + y = { :a => [[1, 2]] } + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['~', 'a[0][0]', 0, 1], ['~', 'a[0][1]', 1, 2]] + end + end end diff --git a/spec/hashdiff/linear_compare_array_spec.rb b/spec/hashdiff/linear_compare_array_spec.rb new file mode 100644 index 0000000..0b2140c --- /dev/null +++ b/spec/hashdiff/linear_compare_array_spec.rb @@ -0,0 +1,48 @@ +require 'spec_helper' + +describe HashDiff::LinearCompareArray do + it "should find no differences between two empty arrays" do + difference = described_class.call([], []) + difference.should == [] + end + + it "should find added items when the old array is empty" do + difference = described_class.call([], [:a, :b]) + difference.should == [['+', '[0]', :a], ['+', '[1]', :b]] + end + + it "should find removed items when the new array is empty" do + difference = described_class.call([:a, :b], []) + difference.should == [['-', '[1]', :b], ['-', '[0]', :a]] + end + + it "should find no differences between identical arrays" do + difference = described_class.call([:a, :b], [:a, :b]) + difference.should == [] + end + + it "should find added items in an array" do + difference = described_class.call([:a, :d], [:a, :b, :c, :d]) + difference.should == [['+', '[1]', :b], ['+', '[2]', :c]] + end + + it "should find removed items in an array" do + difference = described_class.call([:a, :b, :c, :d, :e, :f], [:a, :d, :f]) + difference.should == [['-', '[4]', :e], ['-', '[2]', :c], ['-', '[1]', :b]] + end + + it "should show additions and deletions as changed items" do + difference = described_class.call([:a, :b, :c], [:c, :b, :a]) + difference.should == [['~', '[0]', :a, :c], ['~', '[2]', :c, :a]] + end + + it "should show changed items in a hash" do + difference = described_class.call([{ :a => :b }], [{ :a => :c }]) + difference.should == [['~', '[0].a', :b, :c]] + end + + it "should show changed items and added items" do + difference = described_class.call([{ :a => 1, :b => 2 }], [{ :a => 2, :b => 2 }, :item]) + difference.should == [['~', '[0].a', 1, 2], ['+', '[1]', :item]] + end +end diff --git a/spec/hashdiff/patch_spec.rb b/spec/hashdiff/patch_spec.rb index 30d5a2e..8f4df82 100644 --- a/spec/hashdiff/patch_spec.rb +++ b/spec/hashdiff/patch_spec.rb @@ -61,6 +61,18 @@ HashDiff.unpatch!(b, diff).should == a end + it "should be able to patch array under hash key with non-word characters" do + a = {"a" => 1, "b-b" => [1, 2]} + b = {"a" => 1, "b-b" => [2, 1]} + diff = HashDiff.diff(a, b) + + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, "b-b" => [1, 2]} + b = {"a" => 1, "b-b" => [2, 1]} + HashDiff.unpatch!(b, diff).should == a + end + it "should be able to patch hash value removal" do a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} b = {"a" => 1} @@ -133,4 +145,39 @@ HashDiff.unpatch!(b, diff).should == a end + it "should be able to patch hash value removal with custom delimiter" do + a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} + b = {"a" => 1, "b" => {"b1" => 3} } + diff = HashDiff.diff(a, b, :delimiter => "\n") + + HashDiff.patch!(a, diff, :delimiter => "\n").should == b + + a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} + b = {"a" => 1, "b" => {"b1" => 3} } + HashDiff.unpatch!(b, diff, :delimiter => "\n").should == a + end + + it "should be able to patch when the diff is generated with an array_path" do + a = {"a" => 1, "b" => 1} + b = {"a" => 1, "b" => 2} + diff = HashDiff.diff(a, b, :array_path => true) + + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, "b" => 1} + b = {"a" => 1, "b" => 2} + HashDiff.unpatch!(b, diff).should == a + end + + it "should be able to use non string keys when diff is generated with an array_path" do + a = {"a" => 1, :a => 2, 0 => 3} + b = {"a" => 5, :a => 6, 0 => 7} + diff = HashDiff.diff(a, b, :array_path => true) + + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, :a => 2, 0 => 3} + b = {"a" => 5, :a => 6, 0 => 7} + HashDiff.unpatch!(b, diff).should == a + end end diff --git a/spec/hashdiff/util_spec.rb b/spec/hashdiff/util_spec.rb index bce861c..18e745a 100644 --- a/spec/hashdiff/util_spec.rb +++ b/spec/hashdiff/util_spec.rb @@ -14,60 +14,65 @@ it "should be able to tell similiar hash" do a = {'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5} b = {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5} - HashDiff.similar?(a, b).should be_true - HashDiff.similar?(a, b, :similarity => 1).should be_false + HashDiff.similar?(a, b).should be true + HashDiff.similar?(a, b, :similarity => 1).should be false end it "should be able to tell similiar hash with values within tolerance" do a = {'a' => 1.5, 'b' => 2.25, 'c' => 3, 'd' => 4, 'e' => 5} b = {'a' => 1.503, 'b' => 2.22, 'c' => 3, 'e' => 5} - HashDiff.similar?(a, b, :numeric_tolerance => 0.05).should be_true - HashDiff.similar?(a, b).should be_false + HashDiff.similar?(a, b, :numeric_tolerance => 0.05).should be true + HashDiff.similar?(a, b).should be false end it "should be able to tell numbers and strings" do - HashDiff.similar?(1, 2).should be_false - HashDiff.similar?("a", "b").should be_false - HashDiff.similar?("a", [1, 2, 3]).should be_false - HashDiff.similar?(1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}).should be_false + HashDiff.similar?(1, 2).should be false + HashDiff.similar?("a", "b").should be false + HashDiff.similar?("a", [1, 2, 3]).should be false + HashDiff.similar?(1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}).should be false end it "should be able to tell true when similarity == 0.5" do a = {"value" => "New1", "onclick" => "CreateNewDoc()"} b = {"value" => "New", "onclick" => "CreateNewDoc()"} - HashDiff.similar?(a, b, :similarity => 0.5).should be_true + HashDiff.similar?(a, b, :similarity => 0.5).should be true end it "should be able to tell false when similarity == 0.5" do a = {"value" => "New1", "onclick" => "open()"} b = {"value" => "New", "onclick" => "CreateNewDoc()"} - HashDiff.similar?(a, b, :similarity => 0.5).should be_false + HashDiff.similar?(a, b, :similarity => 0.5).should be false end describe '.compare_values' do it "should compare numeric values exactly when no tolerance" do - expect(HashDiff.compare_values(10.004, 10.003)).to be_false + expect(HashDiff.compare_values(10.004, 10.003)).to be false end it "should allow tolerance with numeric values" do - expect(HashDiff.compare_values(10.004, 10.003, :numeric_tolerance => 0.01)).to be_true + expect(HashDiff.compare_values(10.004, 10.003, :numeric_tolerance => 0.01)).to be true end it "should compare other objects with or without tolerance" do - expect(HashDiff.compare_values('hats', 'ninjas')).to be_false - expect(HashDiff.compare_values('hats', 'ninjas', :numeric_tolerance => 0.01)).to be_false - expect(HashDiff.compare_values('horse', 'horse')).to be_true + expect(HashDiff.compare_values('hats', 'ninjas')).to be false + expect(HashDiff.compare_values('hats', 'ninjas', :numeric_tolerance => 0.01)).to be false + expect(HashDiff.compare_values('horse', 'horse')).to be true end it 'should compare strings exactly by default' do - expect(HashDiff.compare_values(' horse', 'horse')).to be_false + expect(HashDiff.compare_values(' horse', 'horse')).to be false + expect(HashDiff.compare_values('horse', 'Horse')).to be false end it 'should strip strings before comparing when requested' do - expect(HashDiff.compare_values(' horse', 'horse', :strip => true)).to be_true + expect(HashDiff.compare_values(' horse', 'horse', :strip => true)).to be true end + + it "should ignore string case when requested" do + expect(HashDiff.compare_values('horse', 'Horse', :case_insensitive => true)).to be true + end + end end -