From 548df9479d3fb86be13b0bf5e9f888d4ad2a37e2 Mon Sep 17 00:00:00 2001 From: liufengyun Date: Mon, 6 Oct 2014 20:59:50 +0200 Subject: [PATCH 01/33] make library 1.8.7 compatible --- .travis.yml | 1 + lib/hashdiff/diff.rb | 29 +++++++++++------------------ spec/hashdiff/diff_spec.rb | 38 ++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 36 deletions(-) diff --git a/.travis.yml b/.travis.yml index 531cb49..c931dfd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: ruby rvm: + - 1.8.7 - 1.9.3 - 2.0.0 - 2.1.1 diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 7c1735f..9f2a0c9 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -27,15 +27,15 @@ module HashDiff def self.best_diff(obj1, obj2, options = {}, &block) options[:comparison] = block if block_given? - opts = {similarity: 0.3}.merge!(options) + opts = { :similarity => 0.3 }.merge!(options) diffs_1 = diff(obj1, obj2, opts) count_1 = count_diff diffs_1 - opts = {similarity: 0.5}.merge!(options) + opts = { :similarity => 0.5 }.merge!(options) diffs_2 = diff(obj1, obj2, opts) count_2 = count_diff diffs_2 - opts = {similarity: 0.8}.merge!(options) + opts = { :similarity => 0.8 }.merge!(options) diffs_3 = diff(obj1, obj2, opts) count_3 = count_diff diffs_3 @@ -104,7 +104,7 @@ def self.diff(obj1, obj2, options = {}, &block) changeset = diff_array(obj1, obj2, opts) do |lcs| # use a's index for similarity lcs.each do |pair| - result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(prefix: "#{opts[:prefix]}[#{pair[0]}]"))) + result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => "#{opts[:prefix]}[#{pair[0]}]"))) end end @@ -122,19 +122,12 @@ def self.diff(obj1, obj2, options = {}, &block) prefix = "#{opts[:prefix]}#{opts[:delimiter]}" end - deleted_keys = [] - common_keys = [] - - obj1.each do |k, v| - if obj2.key?(k) - common_keys << k - else - deleted_keys << k - end - end + deleted_keys = obj1.keys - obj2.keys + common_keys = obj1.keys & obj2.keys + added_keys = obj2.keys - obj1.keys # add deleted properties - deleted_keys.each do |k| + deleted_keys.sort.each do |k| custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", obj1[k], nil) if custom_result @@ -145,12 +138,12 @@ def self.diff(obj1, obj2, options = {}, &block) end # recursive comparison for common keys - common_keys.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(prefix: "#{prefix}#{k}"))) } + common_keys.sort.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => "#{prefix}#{k}"))) } # added properties - obj2.each do |k, v| + added_keys.sort.each do |k| unless obj1.key?(k) - custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", nil, v) + custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", nil, obj2[k]) if custom_result result.concat(custom_result) diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 315d485..903e35f 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -7,7 +7,7 @@ end it "should be able to diff an hash with an empty hash" do - a = {a:3, b:2} + a = { 'a' => 3, 'b' => 2 } b = {} diff = HashDiff.diff(a, b) @@ -18,32 +18,34 @@ end it "should be able to diff two equal hashes" do - diff = HashDiff.diff({a:2, b:2}, {a:2, b:2}) + diff = HashDiff.diff({ 'a' => 2, 'b' => 2}, { 'a' => 2, 'b' => 2 }) diff.should == [] end it "should be able to diff two hashes with equivalent numerics, when strict is false" do - diff = HashDiff.diff({a:2.0, b:2}, {a:2, b:2.0}, :strict => false) + diff = HashDiff.diff({ 'a' => 2.0, 'b' => 2 }, { 'a' => 2, 'b' => 2.0 }, :strict => false) diff.should == [] end it "should be able to diff changes in hash value" do - diff = HashDiff.diff({a:2, b:3, c:" hello"}, {a:2, b:4, c:"hello"}) + diff = HashDiff.diff({ 'a' => 2, 'b' => 3, 'c' => " hello" }, { 'a' => 2, 'b' => 4, 'c' => "hello" }) diff.should == [['~', 'b', 3, 4], ['~', 'c', " hello", "hello"]] end it "should be able to diff changes in hash value which is array" do - diff = HashDiff.diff({a:2, b:[1, 2, 3]}, {a:2, b:[1, 3, 4]}) + diff = HashDiff.diff({ 'a' => 2, 'b' => [1, 2, 3] }, { 'a' => 2, 'b' => [1, 3, 4]}) diff.should == [['-', 'b[1]', 2], ['+', 'b[2]', 4]] end it "should be able to diff changes in hash value which is hash" do - diff = HashDiff.diff({a:{x:2, y:3, z:4}, b:{x:3, z:45}}, {a:{y:3}, b:{y:3, z:30}}) + diff = HashDiff.diff({ 'a' => { 'x' => 2, 'y' => 3, 'z' => 4 }, 'b' => { 'x' => 3, 'z' => 45 } }, + { 'a' => { 'y' => 3 }, 'b' => { 'y' => 3, 'z' => 30 } }) diff.should == [['-', 'a.x', 2], ['-', 'a.z', 4], ['-', 'b.x', 3], ['~', 'b.z', 45, 30], ['+', 'b.y', 3]] end it "should be able to diff similar objects in array" do - diff = HashDiff.best_diff({a:[{x:2, y:3, z:4}, {x:11, y:22, z:33}], b:{x:3, z:45}}, {a:[{y:3}, {x:11, z:33}], b:{y:22}}) + diff = HashDiff.best_diff({ 'a' => [{ 'x' => 2, 'y' => 3, 'z' => 4 }, { 'x' => 11, 'y' => 22, 'z' => 33 }], 'b' => { 'x' => 3, 'z' => 45 } }, + { 'a' => [{ 'y' => 3 }, { 'x' => 11, 'z' => 33 }], 'b' => { 'y' => 22 } }) diff.should == [['-', 'a[0].x', 2], ['-', 'a[0].z', 4], ['-', 'a[1].y', 22], ['-', 'b.x', 3], ['-', 'b.z', 45], ['+', 'b.y', 22]] end @@ -137,7 +139,7 @@ a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, {'x' => 5, 'y' => 6, 'z' => 3}, 3] b = [{'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}, 3] - diff = HashDiff.diff(a, b, similarity: 0.8, delimiter: "\t") + diff = HashDiff.diff(a, b, :similarity => 0.8, :delimiter => "\t") diff.should == [["-", "[0]\td", 4], ["-", "[1]", {"x"=>5, "y"=>6, "z"=>3}]] end @@ -167,15 +169,15 @@ context 'when :strip requested' do it "should strip strings before comparing" do - a = {a:" foo", b:"fizz buzz"} - b = {a:"foo", b:"fizzbuzz"} + a = { 'a' => " foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzbuzz"} diff = HashDiff.diff(a, b, :strip => true) diff.should == [['~', 'b', "fizz buzz", "fizzbuzz"]] end it "should strip nested strings before comparing" do - a = {a:{x:" foo"}, b:["fizz buzz", "nerf"]} - b = {a:{x:"foo"}, b:["fizzbuzz", "nerf"]} + a = { 'a' => { 'x' => " foo" }, 'b' => ["fizz buzz", "nerf"] } + b = { 'a' => { 'x' => "foo" }, 'b' => ["fizzbuzz", "nerf"] } diff = HashDiff.diff(a, b, :strip => true) diff.should == [['-', 'b[0]', "fizz buzz"], ['+', 'b[0]', "fizzbuzz"]] end @@ -183,16 +185,16 @@ context 'when both :strip and :numeric_tolerance requested' do it 'should apply filters to proper object types' do - a = {a:" foo", b:35, c:'bar', d:'baz'} - b = {a:"foo", b:35.005, c:'bar', d:18.5} + a = { 'a' => " foo", 'b' => 35, 'c' => 'bar', 'd' => 'baz' } + b = { 'a' => "foo", 'b' => 35.005, 'c' => 'bar', 'd' => 18.5} diff = HashDiff.diff(a, b, :strict => false, :numeric_tolerance => 0.01, :strip => true) diff.should == [['~', 'd', "baz", 18.5]] end end context 'with custom comparison' do - let(:a) { {a:'car', b:'boat', c:'plane'} } - let(:b) { {a:'bus', b:'truck', c:' plan'} } + let(:a) { { 'a' => 'car', 'b' => 'boat', 'c' => 'plane'} } + let(:b) { { 'a' => 'bus', 'b' => 'truck', 'c' => ' plan'} } it 'should compare using proc specified in block' do diff = HashDiff.diff(a, b) do |prefix, obj1, obj2| @@ -205,8 +207,8 @@ end it 'should yield added keys' do - x = {a:'car', b:'boat'} - y = {a:'car'} + x = { 'a' => 'car', 'b' => 'boat'} + y = { 'a' => 'car' } diff = HashDiff.diff(x, y) do |prefix, obj1, obj2| case prefix From f9cbbeb79f5d7323151177718e88d91c9d284094 Mon Sep 17 00:00:00 2001 From: liufengyun Date: Mon, 6 Oct 2014 21:03:32 +0200 Subject: [PATCH 02/33] update version to 0.2.2 [ci skip] --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index a48905c..a9a775f 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.2.2 2014-10-6 + +* make library 1.8.7 compatible + ## v0.2.1 2014-7-13 * yield added/deleted keys for custom comparison diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 01b8ec0..6b8efc9 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.2.1' + VERSION = '0.2.2' end From 687de5d02c49d0de76da9474909c4d2a11b8d32c Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Wed, 26 Aug 2015 22:08:20 +0200 Subject: [PATCH 03/33] remove demo link --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 1b84b25..9b94662 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ HashDiff is a ruby library to compute the smallest difference between two hashes. -**Demo**: [HashDiff](http://hashdiff.herokuapp.com/) - **Docs**: [Documentation](http://rubydoc.info/gems/hashdiff) ## Why HashDiff? From 75ed5b51c5a29cb55ddab0056bfa4814c6b641c3 Mon Sep 17 00:00:00 2001 From: Marek Date: Wed, 4 Nov 2015 15:25:19 +0100 Subject: [PATCH 04/33] Use a_start and b_start variables in HashDiff.lcs These vars were already created at line 13 but they weren't used before. --- lib/hashdiff/lcs.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/hashdiff/lcs.rb b/lib/hashdiff/lcs.rb index 88c31c9..1cc27d5 100644 --- a/lib/hashdiff/lcs.rb +++ b/lib/hashdiff/lcs.rb @@ -16,9 +16,9 @@ def self.lcs(a, b, options = {}) vector = [] lcs = [] - (0..b_finish).each do |bi| + (b_start..b_finish).each do |bi| lcs[bi] = [] - (0..a_finish).each do |ai| + (a_start..a_finish).each do |ai| if similar?(a[ai], b[bi], opts) topleft = (ai > 0 and bi > 0)? lcs[bi-1][ai-1][1] : 0 lcs[bi][ai] = [:topleft, topleft + 1] From bfa0320b25c3a8711eb2dbbd9cd9fb3ac58fbe76 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Thu, 5 Nov 2015 21:14:52 +0100 Subject: [PATCH 05/33] bumps version to 0.2.3 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index a9a775f..a370e35 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.2.3 2015-11-5 + +* improve performance of LCS algorithm #12 + ## v0.2.2 2014-10-6 * make library 1.8.7 compatible diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 6b8efc9..452c3f3 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.2.2' + VERSION = '0.2.3' end From 45c572d72ba94e66568441bd8a39fc01212b267c Mon Sep 17 00:00:00 2001 From: ronco Date: Wed, 10 Feb 2016 21:41:18 -0700 Subject: [PATCH 06/33] Add case insensitive option --- lib/hashdiff/util.rb | 10 +++++++--- spec/hashdiff/diff_spec.rb | 25 +++++++++++++++++++++++++ spec/hashdiff/util_spec.rb | 7 ++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/lib/hashdiff/util.rb b/lib/hashdiff/util.rb index 862d39a..0af9004 100644 --- a/lib/hashdiff/util.rb +++ b/lib/hashdiff/util.rb @@ -90,9 +90,13 @@ def self.compare_values(obj1, obj2, options = {}) end if options[:strip] == true - first = obj1.strip if obj1.respond_to?(:strip) - second = obj2.strip if obj2.respond_to?(:strip) - return first == second + obj1 = obj1.strip if obj1.respond_to?(:strip) + obj2 = obj2.strip if obj2.respond_to?(:strip) + end + + if options[:case_insensitive] == true + obj1 = obj1.downcase if obj1.respond_to?(:downcase) + obj2 = obj2.downcase if obj2.respond_to?(:downcase) end obj1 == obj2 diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 903e35f..55296d2 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -183,6 +183,22 @@ end end + context 'when :case_insensitive requested' do + it "should strip strings before comparing" do + a = { 'a' => "Foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzBuzz"} + diff = HashDiff.diff(a, b, :case_insensitive => true) + diff.should == [['~', 'b', "fizz buzz", "fizzBuzz"]] + end + + it "should ignore case on nested strings before comparing" do + a = { 'a' => { 'x' => "Foo" }, 'b' => ["fizz buzz", "nerf"] } + b = { 'a' => { 'x' => "foo" }, 'b' => ["fizzbuzz", "nerf"] } + diff = HashDiff.diff(a, b, :case_insensitive => true) + diff.should == [['-', 'b[0]', "fizz buzz"], ['+', 'b[0]', "fizzbuzz"]] + end + end + context 'when both :strip and :numeric_tolerance requested' do it 'should apply filters to proper object types' do a = { 'a' => " foo", 'b' => 35, 'c' => 'bar', 'd' => 'baz' } @@ -192,6 +208,15 @@ end end + context "when both :strip and :case_insensitive requested" do + it "should apply both filters to strings" do + a = { 'a' => " Foo", 'b' => "fizz buzz"} + b = { 'a' => "foo", 'b' => "fizzBuzz"} + diff = HashDiff.diff(a, b, :case_insensitive => true, :strip => true) + diff.should == [['~', 'b', "fizz buzz", "fizzBuzz"]] + end + end + context 'with custom comparison' do let(:a) { { 'a' => 'car', 'b' => 'boat', 'c' => 'plane'} } let(:b) { { 'a' => 'bus', 'b' => 'truck', 'c' => ' plan'} } diff --git a/spec/hashdiff/util_spec.rb b/spec/hashdiff/util_spec.rb index bce861c..e340cc0 100644 --- a/spec/hashdiff/util_spec.rb +++ b/spec/hashdiff/util_spec.rb @@ -63,11 +63,16 @@ it 'should compare strings exactly by default' do expect(HashDiff.compare_values(' horse', 'horse')).to be_false + expect(HashDiff.compare_values('horse', 'Horse')).to be_false end it 'should strip strings before comparing when requested' do expect(HashDiff.compare_values(' horse', 'horse', :strip => true)).to be_true end + + it "should ignore string case when requested" do + expect(HashDiff.compare_values('horse', 'Horse', :case_insensitive => true)).to be_true + end + end end - From 366d83bb49801c284b06cbbc0286b863cdab72b9 Mon Sep 17 00:00:00 2001 From: ronco Date: Wed, 10 Feb 2016 21:45:10 -0700 Subject: [PATCH 07/33] add :case_insensitive option to README --- README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9b94662..c104457 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,8 @@ HashDiff.unpatch!(b, diff).should == a ### Options -There are five options available: `:delimiter`, `:similarity`, `:strict`, `:numeric_tolerance` and `:strip`. +There are six options available: `:delimiter`, `:similarity`, +`:strict`, `:numeric_tolerance`, `:strip` and `:case_insensitive`. #### `:delimiter` @@ -133,6 +134,18 @@ diff = HashDiff.diff(a, b, :comparison => { :numeric_tolerance => 0.1, :strip => diff.should == [["~", "x", 5, 6]] ``` +#### `:case_insensitive` + +The :case_insensitive option makes string comparisions ignore case. + +```ruby +a = {x:5, s:'FooBar'} +b = {x:6, s:'foobar'} + +diff = HashDiff.diff(a, b, :comparison => { :numeric_tolerance => 0.1, :case_insensitive => true }) +diff.should == [["~", "x", 5, 6]] +``` + #### Specifying a custom comparison method It's possible to specify how the values of a key should be compared. From 26f6a7150cde6672593f32e7b6f6c78034d188cd Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Thu, 11 Feb 2016 15:22:33 +0100 Subject: [PATCH 08/33] bumps version to 0.3.0 --- README.md | 17 ----------------- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index c104457..5f12da1 100644 --- a/README.md +++ b/README.md @@ -197,23 +197,6 @@ b[:b].sort! HashDiff.diff(a, b) => [] ``` -### Special use cases - -#### Using HashDiff on JSON API results - -```ruby -require 'uri' -require 'net/http' -require 'json' - -uri = URI('http://time.jsontest.com/') -json_resp = ->(uri) { JSON.parse(Net::HTTP.get_response(uri).body) } -a = json_resp.call(uri) -b = json_resp.call(uri) - -HashDiff.diff(a,b) => [["~", "milliseconds_since_epoch", 1410542545874, 1410542545985]] -``` - ## License HashDiff is distributed under the MIT-LICENSE. diff --git a/changelog.md b/changelog.md index a370e35..eb4a613 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.0 2016-2-11 + +* support `:case_insensitive` option + ## v0.2.3 2015-11-5 * improve performance of LCS algorithm #12 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 452c3f3..f7c074e 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.2.3' + VERSION = '0.3.0' end From fcb2b30b1d65e6d552fbfc1a20a2c333d7a7a2c9 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Thu, 11 Feb 2016 15:37:45 +0100 Subject: [PATCH 09/33] try fix travis test --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index c931dfd..92afbbd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,4 @@ +sudo: false language: ruby rvm: - 1.8.7 From d07ae0a43f24b6260c6a0b24888157889b2f8bc7 Mon Sep 17 00:00:00 2001 From: Eric Cohen Date: Sat, 28 May 2016 21:12:12 +0300 Subject: [PATCH 10/33] Fix bug with array under hash key with non-word characters. A hash key with anything not matching \w (word characters) like -|, etc would not match the regex detecting arrays in the patch path and would not patch correctly. --- lib/hashdiff/util.rb | 2 +- spec/hashdiff/patch_spec.rb | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/hashdiff/util.rb b/lib/hashdiff/util.rb index 0af9004..12cf534 100644 --- a/lib/hashdiff/util.rb +++ b/lib/hashdiff/util.rb @@ -55,7 +55,7 @@ def self.count_nodes(obj) # e.g. "a.b[3].c" => ['a', 'b', 3, 'c'] def self.decode_property_path(path, delimiter='.') parts = path.split(delimiter).collect do |part| - if part =~ /^(\w*)\[(\d+)\]$/ + if part =~ /^(.*)\[(\d+)\]$/ if $1.size > 0 [$1, $2.to_i] else diff --git a/spec/hashdiff/patch_spec.rb b/spec/hashdiff/patch_spec.rb index 30d5a2e..6c0f5b0 100644 --- a/spec/hashdiff/patch_spec.rb +++ b/spec/hashdiff/patch_spec.rb @@ -61,6 +61,18 @@ HashDiff.unpatch!(b, diff).should == a end + it "should be able to patch array under hash key with non-word characters" do + a = {"a" => 1, "b-b" => [1, 2]} + b = {"a" => 1, "b-b" => [2, 1]} + diff = HashDiff.diff(a, b) + + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, "b-b" => [1, 2]} + b = {"a" => 1, "b-b" => [2, 1]} + HashDiff.unpatch!(b, diff).should == a + end + it "should be able to patch hash value removal" do a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} b = {"a" => 1} From 7935759814bb2dc689683e6a660d66e334db7b0a Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Sun, 29 May 2016 13:59:30 +0200 Subject: [PATCH 11/33] don't test 1.8.7 --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 92afbbd..cab9990 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ sudo: false language: ruby rvm: - - 1.8.7 - 1.9.3 - 2.0.0 - 2.1.1 From af067d654e19eacb9a20d1d34e0889c0cd098173 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Thu, 1 Sep 2016 22:17:02 +0200 Subject: [PATCH 12/33] add test to :delimiter in patch/unpatch --- spec/hashdiff/patch_spec.rb | 13 +++++++++++++ spec/hashdiff/util_spec.rb | 38 ++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/spec/hashdiff/patch_spec.rb b/spec/hashdiff/patch_spec.rb index 6c0f5b0..9d67bf2 100644 --- a/spec/hashdiff/patch_spec.rb +++ b/spec/hashdiff/patch_spec.rb @@ -145,4 +145,17 @@ HashDiff.unpatch!(b, diff).should == a end + it "should be able to patch hash value removal with custom delimiter" do + a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} + b = {"a" => 1, "b" => {"b1" => 3} } + diff = HashDiff.diff(a, b, :delimiter => "\n") + + HashDiff.patch!(a, diff, :delimiter => "\n").should == b + + a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}} + b = {"a" => 1, "b" => {"b1" => 3} } + HashDiff.unpatch!(b, diff, :delimiter => "\n").should == a + end + + end diff --git a/spec/hashdiff/util_spec.rb b/spec/hashdiff/util_spec.rb index e340cc0..18e745a 100644 --- a/spec/hashdiff/util_spec.rb +++ b/spec/hashdiff/util_spec.rb @@ -14,64 +14,64 @@ it "should be able to tell similiar hash" do a = {'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5} b = {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5} - HashDiff.similar?(a, b).should be_true - HashDiff.similar?(a, b, :similarity => 1).should be_false + HashDiff.similar?(a, b).should be true + HashDiff.similar?(a, b, :similarity => 1).should be false end it "should be able to tell similiar hash with values within tolerance" do a = {'a' => 1.5, 'b' => 2.25, 'c' => 3, 'd' => 4, 'e' => 5} b = {'a' => 1.503, 'b' => 2.22, 'c' => 3, 'e' => 5} - HashDiff.similar?(a, b, :numeric_tolerance => 0.05).should be_true - HashDiff.similar?(a, b).should be_false + HashDiff.similar?(a, b, :numeric_tolerance => 0.05).should be true + HashDiff.similar?(a, b).should be false end it "should be able to tell numbers and strings" do - HashDiff.similar?(1, 2).should be_false - HashDiff.similar?("a", "b").should be_false - HashDiff.similar?("a", [1, 2, 3]).should be_false - HashDiff.similar?(1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}).should be_false + HashDiff.similar?(1, 2).should be false + HashDiff.similar?("a", "b").should be false + HashDiff.similar?("a", [1, 2, 3]).should be false + HashDiff.similar?(1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}).should be false end it "should be able to tell true when similarity == 0.5" do a = {"value" => "New1", "onclick" => "CreateNewDoc()"} b = {"value" => "New", "onclick" => "CreateNewDoc()"} - HashDiff.similar?(a, b, :similarity => 0.5).should be_true + HashDiff.similar?(a, b, :similarity => 0.5).should be true end it "should be able to tell false when similarity == 0.5" do a = {"value" => "New1", "onclick" => "open()"} b = {"value" => "New", "onclick" => "CreateNewDoc()"} - HashDiff.similar?(a, b, :similarity => 0.5).should be_false + HashDiff.similar?(a, b, :similarity => 0.5).should be false end describe '.compare_values' do it "should compare numeric values exactly when no tolerance" do - expect(HashDiff.compare_values(10.004, 10.003)).to be_false + expect(HashDiff.compare_values(10.004, 10.003)).to be false end it "should allow tolerance with numeric values" do - expect(HashDiff.compare_values(10.004, 10.003, :numeric_tolerance => 0.01)).to be_true + expect(HashDiff.compare_values(10.004, 10.003, :numeric_tolerance => 0.01)).to be true end it "should compare other objects with or without tolerance" do - expect(HashDiff.compare_values('hats', 'ninjas')).to be_false - expect(HashDiff.compare_values('hats', 'ninjas', :numeric_tolerance => 0.01)).to be_false - expect(HashDiff.compare_values('horse', 'horse')).to be_true + expect(HashDiff.compare_values('hats', 'ninjas')).to be false + expect(HashDiff.compare_values('hats', 'ninjas', :numeric_tolerance => 0.01)).to be false + expect(HashDiff.compare_values('horse', 'horse')).to be true end it 'should compare strings exactly by default' do - expect(HashDiff.compare_values(' horse', 'horse')).to be_false - expect(HashDiff.compare_values('horse', 'Horse')).to be_false + expect(HashDiff.compare_values(' horse', 'horse')).to be false + expect(HashDiff.compare_values('horse', 'Horse')).to be false end it 'should strip strings before comparing when requested' do - expect(HashDiff.compare_values(' horse', 'horse', :strip => true)).to be_true + expect(HashDiff.compare_values(' horse', 'horse', :strip => true)).to be true end it "should ignore string case when requested" do - expect(HashDiff.compare_values('horse', 'Horse', :case_insensitive => true)).to be_true + expect(HashDiff.compare_values('horse', 'Horse', :case_insensitive => true)).to be true end end From 68425c12450d0d7c8a8278cf62667bec608d001a Mon Sep 17 00:00:00 2001 From: Andreas Zuber Date: Wed, 23 Nov 2016 17:22:02 +0100 Subject: [PATCH 13/33] fix an error when a hash has mixed types --- lib/hashdiff/diff.rb | 6 +++--- spec/hashdiff/diff_spec.rb | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 9f2a0c9..9bf571c 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -127,7 +127,7 @@ def self.diff(obj1, obj2, options = {}, &block) added_keys = obj2.keys - obj1.keys # add deleted properties - deleted_keys.sort.each do |k| + deleted_keys.sort_by{|k,v| k.to_s }.each do |k| custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", obj1[k], nil) if custom_result @@ -138,10 +138,10 @@ def self.diff(obj1, obj2, options = {}, &block) end # recursive comparison for common keys - common_keys.sort.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => "#{prefix}#{k}"))) } + common_keys.sort_by{|k,v| k.to_s }.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => "#{prefix}#{k}"))) } # added properties - added_keys.sort.each do |k| + added_keys.sort_by{|k,v| k.to_s }.each do |k| unless obj1.key?(k) custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", nil, obj2[k]) diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 55296d2..77ee0f1 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -22,6 +22,26 @@ diff.should == [] end + it "should be able to diff two equal hashes with mixed key types" do + a = { 'a' => 1, :b => 1 } + diff = HashDiff.diff(a, a) + diff.should == [] + end + + it "should be able to diff if mixed key types are removed" do + a = { 'a' => 1, :b => 1 } + b = {} + diff = HashDiff.diff(a, b) + diff.should == [["-", "a", 1], ["-", "b", 1]] + end + + it "should be able to diff if mixed key types are added" do + a = { 'a' => 1, :b => 1 } + b = {} + diff = HashDiff.diff(b, a) + diff.should == [["+", "a", 1], ["+", "b", 1]] + end + it "should be able to diff two hashes with equivalent numerics, when strict is false" do diff = HashDiff.diff({ 'a' => 2.0, 'b' => 2 }, { 'a' => 2, 'b' => 2.0 }, :strict => false) diff.should == [] From a8f5873425dfce05f9d46b9f0ca00b46111bfe88 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Thu, 24 Nov 2016 13:11:16 +0100 Subject: [PATCH 14/33] bumps to 0.3.1 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index eb4a613..e6b8770 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.1 2016-11-24 + +* fix an error when a hash has mixed types #26 + ## v0.3.0 2016-2-11 * support `:case_insensitive` option diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index f7c074e..04a74ff 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.0' + VERSION = '0.3.1' end From 938f7476781b2a497f88b706bfd53e5fcb8864e1 Mon Sep 17 00:00:00 2001 From: Vladimir Kochnev Date: Tue, 27 Dec 2016 07:57:33 +0300 Subject: [PATCH 15/33] New rubies support. - Add 2.2, 2.3 and 2.4 to Travis CI. - Modern version of `rake` fails with old RSpec so its version should be restricted. - Fixnum is deprecated in Ruby 2.4.0 so replace it with Integer. --- .travis.yml | 5 ++++- Gemfile | 2 +- lib/hashdiff/diff.rb | 4 ++-- lib/hashdiff/patch.rb | 8 ++++---- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index cab9990..3dd8454 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,5 +3,8 @@ language: ruby rvm: - 1.9.3 - 2.0.0 - - 2.1.1 + - 2.1.10 + - 2.2.6 + - 2.3.3 + - 2.4.0 script: "bundle exec rake spec" diff --git a/Gemfile b/Gemfile index 3506a0e..375affa 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source "http://rubygems.org" gemspec group :test do - gem 'rake' + gem 'rake', '< 11' end diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 9bf571c..780e0fc 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -7,7 +7,7 @@ module HashDiff # @param [Array, Hash] obj1 # @param [Array, Hash] obj2 # @param [Hash] options the options to use when comparing - # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Fixnum, Float, BigDecimal to each other + # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing @@ -48,7 +48,7 @@ def self.best_diff(obj1, obj2, options = {}, &block) # @param [Array, Hash] obj1 # @param [Array, Hash] obj2 # @param [Hash] options the options to use when comparing - # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Fixnum, Float, BigDecimal to each other + # * :strict (Boolean) [true] whether numeric values will be compared on type as well as value. Set to false to allow comparing Integer, Float, BigDecimal to each other # * :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}. # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. diff --git a/lib/hashdiff/patch.rb b/lib/hashdiff/patch.rb index 3c184c4..553a199 100644 --- a/lib/hashdiff/patch.rb +++ b/lib/hashdiff/patch.rb @@ -23,13 +23,13 @@ def self.patch!(obj, changes, options = {}) parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Fixnum) + if last_part.is_a?(Integer) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] end elsif change[0] == '-' - if last_part.is_a?(Fixnum) + if last_part.is_a?(Integer) parent_node.delete_at(last_part) else parent_node.delete(last_part) @@ -62,13 +62,13 @@ def self.unpatch!(obj, changes, options = {}) parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Fixnum) + if last_part.is_a?(Integer) parent_node.delete_at(last_part) else parent_node.delete(last_part) end elsif change[0] == '-' - if last_part.is_a?(Fixnum) + if last_part.is_a?(Integer) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] From 9cacb45110aefebc83f6814360331e8573cce2fa Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Tue, 27 Dec 2016 09:49:58 +0100 Subject: [PATCH 16/33] bumps to 0.3.2 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index e6b8770..3873d17 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.2 2016-12-27 + +* replace `Fixnum` by `Integer` #28 + ## v0.3.1 2016-11-24 * fix an error when a hash has mixed types #26 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 04a74ff..f17dac7 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.1' + VERSION = '0.3.2' end From 1a4bf751237b32202f6e03af5e633c3f84ad3cdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibaut=20Barr=C3=A8re?= Date: Wed, 8 Feb 2017 11:08:59 +0100 Subject: [PATCH 17/33] Mention 2 compelling reasons to start using HashDiff --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5f12da1..79c86cf 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ HashDiff is a ruby library to compute the smallest difference between two hashes. +It also supports comparing two arrays. + +HashDiff does not monkey-patch any existing class. All features are contained inside the `HashDiff` module. + **Docs**: [Documentation](http://rubydoc.info/gems/hashdiff) ## Why HashDiff? From 1d1960a06cc73b3aab2eac6a0a8ee247e0a672b0 Mon Sep 17 00:00:00 2001 From: cloakedcode Date: Sun, 30 Apr 2017 20:06:30 -0700 Subject: [PATCH 18/33] Greatly improve performance of HashDiff#similar? HashDiff#similar? compares values based on "node count" using HashDiff#count_nodes and HashDiff#diff (if the counts don't cancel out). If `a` and `b` are arrays/hashes `count_nodes` recursively counts the elements, otherwise returns 1. If `a` and `b` are not arrays/hashes, HashDiff#similar? needlessly counts/diffs values that will ultimately end up passing through HashDiff#compare_values. A considerable performance improvement can be had by circumventing the needless recursion and call HashDiff#compare_values if `a` and `b` are not arrays/hashes. This has been benchmarked with this snippet: ```ruby $LOAD_PATH << File.join(File.dirname(__FILE__), 'lib') require 'hashdiff' require 'benchmark' seq1 = %w(a b c e h j l m n p) seq2 = %w(a b c d e f j k l m r s t) n = 10000 Benchmark.bm do |x| x.report('lcs') { n.times do ; HashDiff.lcs(seq1, seq2); end } end ``` Before: ```sh $ ruby benchmark.rb user system total real lcs 6.640000 0.010000 6.650000 ( 6.649822) ``` After: ```sh $ ruby benchmark.rb user system total real lcs 1.030000 0.010000 1.040000 ( 1.037542) ``` --- lib/hashdiff/util.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/hashdiff/util.rb b/lib/hashdiff/util.rb index 12cf534..6268e7d 100644 --- a/lib/hashdiff/util.rb +++ b/lib/hashdiff/util.rb @@ -4,6 +4,7 @@ module HashDiff # # judge whether two objects are similar def self.similar?(a, b, options = {}) + return compare_values(a, b, options) unless a.is_a?(Array) || a.is_a?(Hash) || b.is_a?(Array) || b.is_a?(Hash) opts = { :similarity => 0.8 }.merge(options) count_a = count_nodes(a) From 7588d7d87659064ef0815f03a3929c33b1d4b64d Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Mon, 1 May 2017 06:02:56 +0200 Subject: [PATCH 19/33] bumps to 0.3.4 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 3873d17..a76b324 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.4 2017-05-01 + +* performance improvement of HashDiff#similar? #31 + ## v0.3.2 2016-12-27 * replace `Fixnum` by `Integer` #28 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index f17dac7..64a29ea 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.2' + VERSION = '0.3.4' end From 7ada0b75bc1121f0a006ad66b97c44fb45038f12 Mon Sep 17 00:00:00 2001 From: Stephen Date: Wed, 5 Jul 2017 18:57:46 -0700 Subject: [PATCH 20/33] add codecov gem --- Gemfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Gemfile b/Gemfile index 375affa..4f18655 100644 --- a/Gemfile +++ b/Gemfile @@ -3,4 +3,5 @@ gemspec group :test do gem 'rake', '< 11' + gem 'codecov' end From c553aa3cbe7fc16e210de94867aa8b962c02ae76 Mon Sep 17 00:00:00 2001 From: Stephen Date: Wed, 5 Jul 2017 18:58:42 -0700 Subject: [PATCH 21/33] add codecov --- spec/spec_helper.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index c52a6fc..1b12d53 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,3 +1,10 @@ +require 'simplecov' +SimpleCov.start +if ENV['CI'] == 'true' + require 'codecov' + SimpleCov.formatter = SimpleCov::Formatter::Codecov +end + $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') require 'rubygems' From acb2d7e0ea29f96d0e53ee30d5ea63fd8e43686c Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Sat, 29 Jul 2017 13:49:25 +0100 Subject: [PATCH 22/33] Update patch documentation on README The patches suggested in the README only work when you have string keys on the hash. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 79c86cf..3644400 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,8 @@ diff.should == [['-', 'a[0].x', 2], ['-', 'a[0].z', 4], ['-', 'a[1].y', 22], ['- patch example: ```ruby -a = {a: 3} -b = {a: {a1: 1, a2: 2}} +a = {'a' => 3} +b = {'a' => {'a1' => 1, 'a2' => 2}} diff = HashDiff.diff(a, b) HashDiff.patch!(a, diff).should == b @@ -82,8 +82,8 @@ HashDiff.patch!(a, diff).should == b unpatch example: ```ruby -a = [{a: 1, b: 2, c: 3, d: 4, e: 5}, {x: 5, y: 6, z: 3}, 1] -b = [1, {a: 1, b: 2, c: 3, e: 5}] +a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, {'x' => 5, 'y' => 6, 'z' => 3}, 1] +b = [1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}] diff = HashDiff.diff(a, b) # diff two array is OK HashDiff.unpatch!(b, diff).should == a From 9f44b1cb83081a698d11bf6aab7b51b6cc492c2c Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Sat, 29 Jul 2017 13:51:33 +0100 Subject: [PATCH 23/33] Introduce an array_path option This introduces an array_path option that can be used when generating a diff. This represents the path to aspects of the diff as an array rather than a string. eg. ``` x = {'a' => 1} y = {'a' => 2} HashDiff.diff(x, y) => [["~", "a", 1, 2]]h HashDiff.diff(x, y, :array_path => true) => [["~", ["a"], 1, 2]] ``` This allows there to be more flexibility with the types used as keys in a hash. Allowing workarounds for issues such as: https://github.com/liufengyun/hashdiff/issues/25 eg ``` x = {'a'=>1} y = {:a=>1} HashDiff.diff(x, y) => [["-", "a", 1], ["+", "a", 1]] HashDiff.diff(x, y, :array_path => true) => [["-", ["a"], 1], ["+", [:a], 1]] ``` And improved ability to patch hashes with keys: eg ``` x = {a: {b: :c}} y = {a: {b: :d}} diff = HashDiff.diff(x, y) => [["~", "a.b", :c, :d]] HashDiff.patch!(x, diff) NoMethodError: undefined method `[]=' for nil:NilClass diff = HashDiff.diff(x, y, array_path: true) => [["~", [:a, :b], :c, :d]] HashDiff.patch!(x, diff) => {:a=>{:b=>:d}} ``` This updates the `patch!` and `unpatch!` methods to accept diffs with either paths as strings or as arrays. --- README.md | 40 +++++++++++++++++++++++++++++++-- lib/hashdiff/diff.rb | 35 +++++++++++++++++------------ lib/hashdiff/lcs.rb | 2 +- lib/hashdiff/patch.rb | 18 +++++++++------ lib/hashdiff/util.rb | 26 ++++++++++++++++----- spec/hashdiff/best_diff_spec.rb | 9 ++++++++ spec/hashdiff/diff_spec.rb | 36 +++++++++++++++++++++++++++++ spec/hashdiff/patch_spec.rb | 22 ++++++++++++++++++ 8 files changed, 158 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 3644400..691a41f 100644 --- a/README.md +++ b/README.md @@ -91,8 +91,9 @@ HashDiff.unpatch!(b, diff).should == a ### Options -There are six options available: `:delimiter`, `:similarity`, -`:strict`, `:numeric_tolerance`, `:strip` and `:case_insensitive`. +There are seven options available: `:delimiter`, `:similarity`, +`:strict`, `:numeric_tolerance`, `:strip`, `:case_insensitive` +and `:array_path`. #### `:delimiter` @@ -150,6 +151,39 @@ diff = HashDiff.diff(a, b, :comparison => { :numeric_tolerance => 0.1, :case_ins diff.should == [["~", "x", 5, 6]] ``` +#### `:array_path` + +The :array_path option represents the path of the diff in an array rather than +a string. This can be used to show differences in between hash key types and +is useful for `patch!` when used on hashes without string keys. + +```ruby +a = {x:5} +b = {'x'=>6} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [['-', [:x], 5], ['+', ['x'], 6]] +``` + +For cases where there are arrays in paths their index will be added to the path. +```ruby +a = {x:[0,1]} +b = {x:[0,2]} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [["-", [:x, 1], 1], ["+", [:x, 1], 2]] +``` + +This shouldn't cause problems if you are comparing an array with a hash: + +```ruby +a = {x:{0=>1}} +b = {x:[1]} + +diff = HashDiff.diff(a, b, :array_path => true) +diff.should == [["~", [:a], [1], {0=>1}]] +``` + #### Specifying a custom comparison method It's possible to specify how the values of a key should be compared. @@ -186,6 +220,8 @@ diff.should == [["~", "a", "car", "bus"], ["~", "b[1]", "plane", " plan"], ["-", When a comparison block is given, it'll be given priority over other specified options. If the block returns value other than `true` or `false`, then the two values will be compared with other specified options. +When used in conjunction with the `array_path` option, the path passed in as an argument will be an array. When determining the ordering of an array a key of `"*"` will be used in place of the `key[*]` field. It is possible, if you have hashes with integer or `"*"` keys, to have problems distinguishing between arrays and hashes - although this shouldn't be an issue unless your data is very difficult to predict and/or your custom rules are very specific. + #### Sorting arrays before comparison An order difference alone between two arrays can create too many diffs to be useful. Consider sorting them prior to diffing. diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 780e0fc..60f1610 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -11,6 +11,7 @@ module HashDiff # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing + # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -53,6 +54,7 @@ def self.best_diff(obj1, obj2, options = {}, &block) # * :delimiter (String) ['.'] the delimiter used when returning nested key references # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing + # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -74,9 +76,12 @@ def self.diff(obj1, obj2, options = {}, &block) :delimiter => '.', :strict => true, :strip => false, - :numeric_tolerance => 0 + :numeric_tolerance => 0, + :array_path => false }.merge!(options) + opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == '' + opts[:comparison] = block if block_given? # prefer to compare with provided block @@ -104,23 +109,20 @@ def self.diff(obj1, obj2, options = {}, &block) changeset = diff_array(obj1, obj2, opts) do |lcs| # use a's index for similarity lcs.each do |pair| - result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => "#{opts[:prefix]}[#{pair[0]}]"))) + prefix = prefix_append_array_index(opts[:prefix], pair[0], opts) + result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => prefix))) end end changeset.each do |change| + change_key = prefix_append_array_index(opts[:prefix], change[1], opts) if change[0] == '-' - result << ['-', "#{opts[:prefix]}[#{change[1]}]", change[2]] + result << ['-', change_key, change[2]] elsif change[0] == '+' - result << ['+', "#{opts[:prefix]}[#{change[1]}]", change[2]] + result << ['+', change_key, change[2]] end end elsif obj1.is_a?(Hash) - if opts[:prefix].empty? - prefix = "" - else - prefix = "#{opts[:prefix]}#{opts[:delimiter]}" - end deleted_keys = obj1.keys - obj2.keys common_keys = obj1.keys & obj2.keys @@ -128,27 +130,32 @@ def self.diff(obj1, obj2, options = {}, &block) # add deleted properties deleted_keys.sort_by{|k,v| k.to_s }.each do |k| - custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", obj1[k], nil) + change_key = prefix_append_key(opts[:prefix], k, opts) + custom_result = custom_compare(opts[:comparison], change_key, obj1[k], nil) if custom_result result.concat(custom_result) else - result << ['-', "#{prefix}#{k}", obj1[k]] + result << ['-', change_key, obj1[k]] end end # recursive comparison for common keys - common_keys.sort_by{|k,v| k.to_s }.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => "#{prefix}#{k}"))) } + common_keys.sort_by{|k,v| k.to_s }.each do |k| + prefix = prefix_append_key(opts[:prefix], k, opts) + result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => prefix))) + end # added properties added_keys.sort_by{|k,v| k.to_s }.each do |k| + change_key = prefix_append_key(opts[:prefix], k, opts) unless obj1.key?(k) - custom_result = custom_compare(opts[:comparison], "#{prefix}#{k}", nil, obj2[k]) + custom_result = custom_compare(opts[:comparison], change_key, nil, obj2[k]) if custom_result result.concat(custom_result) else - result << ['+', "#{prefix}#{k}", obj2[k]] + result << ['+', change_key, obj2[k]] end end end diff --git a/lib/hashdiff/lcs.rb b/lib/hashdiff/lcs.rb index 1cc27d5..6da8e42 100644 --- a/lib/hashdiff/lcs.rb +++ b/lib/hashdiff/lcs.rb @@ -6,7 +6,7 @@ module HashDiff def self.lcs(a, b, options = {}) opts = { :similarity => 0.8 }.merge!(options) - opts[:prefix] = "#{opts[:prefix]}[*]" + opts[:prefix] = prefix_append_array_index(opts[:prefix], '*', opts) return [] if a.size == 0 or b.size == 0 diff --git a/lib/hashdiff/patch.rb b/lib/hashdiff/patch.rb index 553a199..bb9a036 100644 --- a/lib/hashdiff/patch.rb +++ b/lib/hashdiff/patch.rb @@ -1,4 +1,4 @@ -# +# # This module provides methods to diff two hash, patch and unpatch hash # module HashDiff @@ -17,19 +17,21 @@ def self.patch!(obj, changes, options = {}) delimiter = options[:delimiter] || '.' changes.each do |change| - parts = decode_property_path(change[1], delimiter) + parts = change[1] + parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array) + last_part = parts.last parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Integer) + if parent_node.is_a?(Array) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] end elsif change[0] == '-' - if last_part.is_a?(Integer) + if parent_node.is_a?(Array) parent_node.delete_at(last_part) else parent_node.delete(last_part) @@ -56,19 +58,21 @@ def self.unpatch!(obj, changes, options = {}) delimiter = options[:delimiter] || '.' changes.reverse_each do |change| - parts = decode_property_path(change[1], delimiter) + parts = change[1] + parts = decode_property_path(parts, delimiter) unless parts.is_a?(Array) + last_part = parts.last parent_node = node(obj, parts[0, parts.size-1]) if change[0] == '+' - if last_part.is_a?(Integer) + if parent_node.is_a?(Array) parent_node.delete_at(last_part) else parent_node.delete(last_part) end elsif change[0] == '-' - if last_part.is_a?(Integer) + if parent_node.is_a?(Array) parent_node.insert(last_part, change[2]) else parent_node[last_part] = change[2] diff --git a/lib/hashdiff/util.rb b/lib/hashdiff/util.rb index 6268e7d..bbb23ce 100644 --- a/lib/hashdiff/util.rb +++ b/lib/hashdiff/util.rb @@ -55,19 +55,17 @@ def self.count_nodes(obj) # # e.g. "a.b[3].c" => ['a', 'b', 3, 'c'] def self.decode_property_path(path, delimiter='.') - parts = path.split(delimiter).collect do |part| + path.split(delimiter).inject([]) do |memo, part| if part =~ /^(.*)\[(\d+)\]$/ if $1.size > 0 - [$1, $2.to_i] + memo + [$1, $2.to_i] else - $2.to_i + memo + [$2.to_i] end else - part + memo + [part] end end - - parts.flatten end # @private @@ -129,4 +127,20 @@ def self.custom_compare(method, key, obj1, obj2) end end end + + def self.prefix_append_key(prefix, key, opts) + if opts[:array_path] + prefix + [key] + else + prefix.empty? ? "#{key}" : "#{prefix}#{opts[:delimiter]}#{key}" + end + end + + def self.prefix_append_array_index(prefix, array_index, opts) + if opts[:array_path] + prefix + [array_index] + else + "#{prefix}[#{array_index}]" + end + end end diff --git a/spec/hashdiff/best_diff_spec.rb b/spec/hashdiff/best_diff_spec.rb index c444e88..9d9eddf 100644 --- a/spec/hashdiff/best_diff_spec.rb +++ b/spec/hashdiff/best_diff_spec.rb @@ -62,4 +62,13 @@ ['+', 'menu.popup.menuitem[1]', {"value" => "Open", "onclick" => "OpenDoc()"}] ] end + + it "should be able to have an array_path specified" do + a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]} + b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] } + + diff = HashDiff.best_diff(a, b, :array_path => true) + diff.should == [["-", ["x", 0, "c"], 3], ["+", ["x", 0, "b"], 2], ["-", ["x", 1], {"y"=>3}]] + end + end diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 77ee0f1..0c10ae9 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -274,4 +274,40 @@ diff.should == [['~', 'b', 'boat', 'truck'], ['~', 'c', 'plane', ' plan']] end end + + context 'when :array_path is true' do + it 'should return the diff path in an array rather than a string' do + x = { 'a' => 'foo' } + y = { 'a' => 'bar' } + diff = HashDiff.diff(x, y, :array_path => true) + + diff.should == [['~', ['a'], 'foo', 'bar']] + end + + it 'should show array indexes in paths' do + x = { 'a' => [0, 1, 2] } + y = { 'a' => [0, 1, 2, 3] } + + diff = HashDiff.diff(x, y, :array_path => true) + + diff.should == [['+', ['a', 3], 3]] + end + + it 'should show differences with string and symbol keys' do + x = { 'a' => 'foo' } + y = { :a => 'bar' } + + diff = HashDiff.diff(x, y, :array_path => true) + diff.should == [['-', ['a'], 'foo'], ['+', [:a], 'bar']] + end + + it 'should support other key types' do + time = Time.now + x = { time => 'foo' } + y = { 0 => 'bar' } + + diff = HashDiff.diff(x, y, :array_path => true) + diff.should == [['-', [time], 'foo'], ['+', [0], 'bar']] + end + end end diff --git a/spec/hashdiff/patch_spec.rb b/spec/hashdiff/patch_spec.rb index 9d67bf2..8f4df82 100644 --- a/spec/hashdiff/patch_spec.rb +++ b/spec/hashdiff/patch_spec.rb @@ -157,5 +157,27 @@ HashDiff.unpatch!(b, diff, :delimiter => "\n").should == a end + it "should be able to patch when the diff is generated with an array_path" do + a = {"a" => 1, "b" => 1} + b = {"a" => 1, "b" => 2} + diff = HashDiff.diff(a, b, :array_path => true) + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, "b" => 1} + b = {"a" => 1, "b" => 2} + HashDiff.unpatch!(b, diff).should == a + end + + it "should be able to use non string keys when diff is generated with an array_path" do + a = {"a" => 1, :a => 2, 0 => 3} + b = {"a" => 5, :a => 6, 0 => 7} + diff = HashDiff.diff(a, b, :array_path => true) + + HashDiff.patch!(a, diff).should == b + + a = {"a" => 1, :a => 2, 0 => 3} + b = {"a" => 5, :a => 6, 0 => 7} + HashDiff.unpatch!(b, diff).should == a + end end From 04e4e8b48997a2b6a97f1bc292c982b8ed8091bc Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Thu, 3 Aug 2017 22:42:19 +0100 Subject: [PATCH 24/33] Fix typo s/comparisions/comparisons --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 691a41f..1548865 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ diff.should == [["~", "x", 5, 6]] #### `:case_insensitive` -The :case_insensitive option makes string comparisions ignore case. +The :case_insensitive option makes string comparisons ignore case. ```ruby a = {x:5, s:'FooBar'} From 83c6f4b50c2ca02c937767ee7503210e4b9312a6 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Sun, 6 Aug 2017 21:19:22 +0200 Subject: [PATCH 25/33] bumps to 0.3.5 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index a76b324..3f5201e 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.5 2017-08-06 + +* add option `array_path` #34 + ## v0.3.4 2017-05-01 * performance improvement of HashDiff#similar? #31 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 64a29ea..b48eb34 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.4' + VERSION = '0.3.5' end From a885a778b1afb9694f15939763cb1fd3d461ce2d Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Tue, 22 Aug 2017 14:37:19 +0100 Subject: [PATCH 26/33] Option to allow array comparisons in linear complexity The LCS algorithm produces excellent diffs. Unfortunately it has a complexity, running at n^2 for the number of items in an array - which can lead to extremely slow computations. ``` > require 'hashdiff';require 'benchmark' > x = (1..100).map { |i| { key: i, foo: :bar } } > puts Benchmark.measure { HashDiff.diff(x, x) } 0.420000 0.000000 0.420000 ( 0.430212) ``` If the size of the array is 1000 then we see it get really painful ``` > x = (1..1000).map { |i| { key: i, foo: :bar } } > puts Benchmark.measure { HashDiff.diff(x, x) } 42.680000 0.590000 43.270000 ( 43.530287) ``` This commit introduces an option to sacrifice the quality of the diff for a faster computational result with the `use_lcs` option, which can be set to false to disable use of the LCS algorithm. With `use_lcs` as false the array comparison is much simpler with a complexity of at worst 2n for an array. ``` > x = (1..100).map { |i| { key: i, foo: :bar } } > puts Benchmark.measure { HashDiff.diff(x, x, use_lcs: false) } 0.010000 0.000000 0.010000 ( 0.004894) > x = (1..1000).map { |i| { key: i, foo: :bar } } > puts Benchmark.measure { HashDiff.diff(x, x, use_lcs: false) } 0.040000 0.000000 0.040000 ( 0.042547) ``` The linear approach to comparing the array works on the basis that if arrays are the same length it treats the array as having no additions or deletions, only changes. ``` > HashDiff.diff([0,1,2], [3,4,5], use_lcs: false) => [["~", "[0]", 0, 3], ["~", "[1]", 1, 4], ["~", "[2]", 2, 5]] ``` compared to: ``` > HashDiff.diff([0,1,2], [3,4,5]) => [["-", "[2]", 2], ["-", "[1]", 1], ["-", "[0]", 0], ["+", "[0]", 3], ["+", "[1]", 4], ["+", "[2]", 5]] ``` Whereas if there are more items in one array than the other it checks the items surrounding the index for a match to calculate additions and removals. ``` > HashDiff.diff([0, 3, 5], [0, 1, 2, 3, 4, 5], use_lcs: false) => [["+", "[1]", 1], ["+", "[2]", 2], ["+", "[4]", 4]] > HashDiff.diff([0, 3, 5], [0, 1, 2, 3, 4, 5], use_lcs: false) == HashDiff.diff([0, 3, 5], [0, 1, 2, 3, 4, 5]) => true ``` For a combination of added and changed items the diff will appear different to the lcs approach: ``` > HashDiff.diff([0, 1, 2], [0, 2, 2, 3], use_lcs: false) => [["~", "[1]", 1, 2], ["+", "[3]", 3]] > HashDiff.diff([0, 1, 2], [0, 2, 2, 3]) => [["-", "[1]", 1], ["+", "[1]", 2], ["+", "[3]", 3]] ``` However all diffs produce same results through `patch!` and `unpatch!` methods. --- lib/hashdiff.rb | 1 + lib/hashdiff/diff.rb | 12 +- lib/hashdiff/linear_compare_array.rb | 155 +++++++++++++++++++++ spec/hashdiff/diff_array_spec.rb | 14 +- spec/hashdiff/diff_spec.rb | 26 ++++ spec/hashdiff/linear_compare_array_spec.rb | 48 +++++++ 6 files changed, 244 insertions(+), 12 deletions(-) create mode 100644 lib/hashdiff/linear_compare_array.rb create mode 100644 spec/hashdiff/linear_compare_array_spec.rb diff --git a/lib/hashdiff.rb b/lib/hashdiff.rb index 08e4936..1052d0d 100644 --- a/lib/hashdiff.rb +++ b/lib/hashdiff.rb @@ -1,5 +1,6 @@ require 'hashdiff/util' require 'hashdiff/lcs' +require 'hashdiff/linear_compare_array' require 'hashdiff/diff' require 'hashdiff/patch' require 'hashdiff/version' diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index 60f1610..d003e08 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -77,7 +77,8 @@ def self.diff(obj1, obj2, options = {}, &block) :strict => true, :strip => false, :numeric_tolerance => 0, - :array_path => false + :array_path => false, + :use_lcs => true }.merge!(options) opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == '' @@ -105,8 +106,8 @@ def self.diff(obj1, obj2, options = {}, &block) end result = [] - if obj1.is_a?(Array) - changeset = diff_array(obj1, obj2, opts) do |lcs| + if obj1.is_a?(Array) && opts[:use_lcs] + changeset = diff_array_lcs(obj1, obj2, opts) do |lcs| # use a's index for similarity lcs.each do |pair| prefix = prefix_append_array_index(opts[:prefix], pair[0], opts) @@ -122,6 +123,8 @@ def self.diff(obj1, obj2, options = {}, &block) result << ['+', change_key, change[2]] end end + elsif obj1.is_a?(Array) && !opts[:use_lcs] + result.concat(LinearCompareArray.call(obj1, obj2, opts)) elsif obj1.is_a?(Hash) deleted_keys = obj1.keys - obj2.keys @@ -170,7 +173,7 @@ def self.diff(obj1, obj2, options = {}, &block) # @private # # diff array using LCS algorithm - def self.diff_array(a, b, options = {}) + def self.diff_array_lcs(a, b, options = {}) opts = { :prefix => '', :similarity => 0.8, @@ -223,5 +226,4 @@ def self.diff_array(a, b, options = {}) change_set end - end diff --git a/lib/hashdiff/linear_compare_array.rb b/lib/hashdiff/linear_compare_array.rb new file mode 100644 index 0000000..c2933b6 --- /dev/null +++ b/lib/hashdiff/linear_compare_array.rb @@ -0,0 +1,155 @@ +module HashDiff + # @private + # + # Used to compare arrays in a linear complexity, which produces longer diffs + # than using the lcs algorithm but is considerably faster + class LinearCompareArray + def self.call(old_array, new_array, options = {}) + instance = self.new(old_array, new_array, options) + instance.call + end + + def call + return [] if old_array.empty? && new_array.empty? + + self.old_index = 0 + self.new_index = 0 + # by comparing the array lengths we can expect that a number of items + # are either added or removed + self.expected_additions = new_array.length - old_array.length + + loop do + if extra_items_in_old_array? + append_deletion(old_array[old_index], old_index) + elsif extra_items_in_new_array? + append_addition(new_array[new_index], new_index) + else + compare_at_index + end + + self.old_index = old_index + 1 + self.new_index = new_index + 1 + break if iterated_through_both_arrays? + end + + changes + end + + private + + attr_reader :old_array, :new_array, :options, :additions, :deletions, :differences + attr_accessor :old_index, :new_index, :expected_additions + + def initialize(old_array, new_array, options) + @old_array = old_array + @new_array = new_array + @options = { prefix: '' }.merge!(options) + + @additions = [] + @deletions = [] + @differences = [] + end + + def extra_items_in_old_array? + old_index < old_array.length && new_index >= new_array.length + end + + def extra_items_in_new_array? + new_index < new_array.length && old_index >= old_array.length + end + + def iterated_through_both_arrays? + old_index >= old_array.length && new_index >= new_array.length + end + + def compare_at_index + difference = item_difference(old_array[old_index], new_array[new_index], old_index) + return if difference.empty? + + index_after_additions = index_of_match_after_additions + append_addititions_before_match(index_after_additions) + + index_after_deletions = index_of_match_after_deletions + append_deletions_before_match(index_after_deletions) + + match = index_after_additions || index_after_deletions + + append_differences(difference) unless match + end + + def item_difference(old_item, new_item, item_index) + prefix = HashDiff.prefix_append_array_index(options[:prefix], item_index, options) + HashDiff.diff(old_item, new_item, options.merge(:prefix => prefix)) + end + + # look ahead in the new array to see if the current item appears later + # thereby having new items added + def index_of_match_after_additions + return unless expected_additions > 0 + + (1..expected_additions).each do |i| + next_difference = item_difference( + old_array[old_index], + new_array[new_index + i], + old_index + ) + + return new_index + i if next_difference.empty? + end + + nil + end + + # look ahead in the old array to see if the current item appears later + # thereby having items removed + def index_of_match_after_deletions + return unless expected_additions < 0 + + (1..(expected_additions.abs)).each do |i| + next_difference = item_difference( + old_array[old_index + i], + new_array[new_index], + old_index + ) + + return old_index + i if next_difference.empty? + end + + nil + end + + def append_addititions_before_match(match_index) + return unless match_index + (new_index...match_index).each { |i| append_addition(new_array[i], i) } + self.expected_additions = expected_additions - (match_index - new_index) + self.new_index = match_index + end + + def append_deletions_before_match(match_index) + return unless match_index + (old_index...match_index).each { |i| append_deletion(old_array[i], i) } + self.expected_additions = expected_additions + (match_index - new_index) + self.old_index = match_index + end + + def append_addition(item, index) + key = HashDiff.prefix_append_array_index(options[:prefix], index, options) + additions << ['+', key, item] + end + + def append_deletion(item, index) + key = HashDiff.prefix_append_array_index(options[:prefix], index, options) + deletions << ['-', key, item] + end + + def append_differences(difference) + differences.concat(difference) + end + + def changes + # this algorithm only allows there to be additions or deletions + # deletions are reverse so they don't change the index of earlier items + differences + additions + deletions.reverse + end + end +end diff --git a/spec/hashdiff/diff_array_spec.rb b/spec/hashdiff/diff_array_spec.rb index c22af75..827226f 100644 --- a/spec/hashdiff/diff_array_spec.rb +++ b/spec/hashdiff/diff_array_spec.rb @@ -5,7 +5,7 @@ a = [1, 2, 3] b = [1, 2, 3] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [] end @@ -13,7 +13,7 @@ a = [1, 2, 3] b = [1, 8, 7] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 2, 3], ['-', 1, 2], ['+', 1, 8], ['+', 2, 7]] end @@ -21,7 +21,7 @@ a = [1, 2] b = [] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 1, 2], ['-', 0, 1]] end @@ -29,7 +29,7 @@ a = [] b = [1, 2] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['+', 0, 1], ['+', 1, 2]] end @@ -37,7 +37,7 @@ a = [1, 3, 5, 7] b = [2, 3, 7, 5] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 0, 1], ['+', 0, 2], ['+', 2, 7], ['-', 4, 7]] end @@ -45,14 +45,14 @@ a = [1, 3, 4, 7] b = [2, 3, 7, 5] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['-', 0, 1], ['+', 0, 2], ['-', 2, 4], ['+', 3, 5]] end it "should be able to diff two arrays with similar elements" do a = [{'a' => 1, 'b' => 2, 'c' => 3, 'd' => 4, 'e' => 5}, 3] b = [1, {'a' => 1, 'b' => 2, 'c' => 3, 'e' => 5}] - diff = HashDiff.diff_array(a, b) + diff = HashDiff.diff_array_lcs(a, b) diff.should == [['+', 0, 1], ['-', 2, 3]] end diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 0c10ae9..62caf3c 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -310,4 +310,30 @@ diff.should == [['-', [time], 'foo'], ['+', [0], 'bar']] end end + + context 'when :use_lcs is false' do + it 'should show items in an array as changed' do + x = [:a, :b] + y = [:c, :d] + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['~', '[0]', :a, :c], ['~', '[1]', :b, :d]] + end + + it 'should show additions to arrays' do + x = { :a => [0] } + y = { :a => [0, 1] } + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['+', 'a[1]', 1]] + end + + it 'shows changes to nested arrays' do + x = { :a => [[0, 1]] } + y = { :a => [[1, 2]] } + diff = HashDiff.diff(x, y, :use_lcs => false) + + diff.should == [['~', 'a[0][0]', 0, 1], ['~', 'a[0][1]', 1, 2]] + end + end end diff --git a/spec/hashdiff/linear_compare_array_spec.rb b/spec/hashdiff/linear_compare_array_spec.rb new file mode 100644 index 0000000..0b2140c --- /dev/null +++ b/spec/hashdiff/linear_compare_array_spec.rb @@ -0,0 +1,48 @@ +require 'spec_helper' + +describe HashDiff::LinearCompareArray do + it "should find no differences between two empty arrays" do + difference = described_class.call([], []) + difference.should == [] + end + + it "should find added items when the old array is empty" do + difference = described_class.call([], [:a, :b]) + difference.should == [['+', '[0]', :a], ['+', '[1]', :b]] + end + + it "should find removed items when the new array is empty" do + difference = described_class.call([:a, :b], []) + difference.should == [['-', '[1]', :b], ['-', '[0]', :a]] + end + + it "should find no differences between identical arrays" do + difference = described_class.call([:a, :b], [:a, :b]) + difference.should == [] + end + + it "should find added items in an array" do + difference = described_class.call([:a, :d], [:a, :b, :c, :d]) + difference.should == [['+', '[1]', :b], ['+', '[2]', :c]] + end + + it "should find removed items in an array" do + difference = described_class.call([:a, :b, :c, :d, :e, :f], [:a, :d, :f]) + difference.should == [['-', '[4]', :e], ['-', '[2]', :c], ['-', '[1]', :b]] + end + + it "should show additions and deletions as changed items" do + difference = described_class.call([:a, :b, :c], [:c, :b, :a]) + difference.should == [['~', '[0]', :a, :c], ['~', '[2]', :c, :a]] + end + + it "should show changed items in a hash" do + difference = described_class.call([{ :a => :b }], [{ :a => :c }]) + difference.should == [['~', '[0].a', :b, :c]] + end + + it "should show changed items and added items" do + difference = described_class.call([{ :a => 1, :b => 2 }], [{ :a => 2, :b => 2 }, :item]) + difference.should == [['~', '[0].a', 1, 2], ['+', '[1]', :item]] + end +end From 30a59a8f8db0daaef8d6a47cfd9c4419ca8275d8 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Tue, 22 Aug 2017 20:48:35 +0200 Subject: [PATCH 27/33] remove code coverage --- Gemfile | 1 - spec/spec_helper.rb | 7 ------- 2 files changed, 8 deletions(-) diff --git a/Gemfile b/Gemfile index 4f18655..375affa 100644 --- a/Gemfile +++ b/Gemfile @@ -3,5 +3,4 @@ gemspec group :test do gem 'rake', '< 11' - gem 'codecov' end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 1b12d53..c52a6fc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,10 +1,3 @@ -require 'simplecov' -SimpleCov.start -if ENV['CI'] == 'true' - require 'codecov' - SimpleCov.formatter = SimpleCov::Formatter::Codecov -end - $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib') require 'rubygems' From 8c40f161d84f3586764c1fe288161c3957b7edbd Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Tue, 22 Aug 2017 20:50:15 +0200 Subject: [PATCH 28/33] bumps to 0.3.6 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 3f5201e..a5615fb 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.6 2017-08-22 + +* add option `use_lcs` #35 + ## v0.3.5 2017-08-06 * add option `array_path` #34 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index b48eb34..6e7b1cf 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.5' + VERSION = '0.3.6' end From 59a92b3da4bb1366e8477622b8a8868d690d7793 Mon Sep 17 00:00:00 2001 From: Kevin Dew Date: Tue, 22 Aug 2017 22:12:32 +0100 Subject: [PATCH 29/33] Documentation for the :use_lcs option --- README.md | 28 +++++++++++++++++++++++++--- lib/hashdiff/diff.rb | 3 +++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1548865..d80b62f 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,9 @@ HashDiff.unpatch!(b, diff).should == a ### Options -There are seven options available: `:delimiter`, `:similarity`, -`:strict`, `:numeric_tolerance`, `:strip`, `:case_insensitive` -and `:array_path`. +There are eight options available: `:delimiter`, `:similarity`, +`:strict`, `:numeric_tolerance`, `:strip`, `:case_insensitive`, `:array_path` +and `:use_lcs` #### `:delimiter` @@ -184,6 +184,28 @@ diff = HashDiff.diff(a, b, :array_path => true) diff.should == [["~", [:a], [1], {0=>1}]] ``` +#### `:use_lcs` + +The :use_lcs option is used to specify whether a +[Longest common subsequence](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem) +(LCS) algorithm is used to determine differences in arrays. This defaults to +`true` but can be changed to `false` for significantly faster array comparisons +(O(n) complexity rather than O(n2) for LCS). + +When :use_lcs is false the results of array comparisons have a tendency to +show changes at indexes rather than additions and subtractions when :use_lcs is +true. + +Note, currently the :similarity option has no effect when :use_lcs is false. + +```ruby +a = {x: [0, 1, 2]} +b = {x: [0, 2, 2, 3]} + +diff = HashDiff.diff(a, b, :use_lcs => false) +diff.should == [["~", "x[1]", 1, 2], ["+", "x[3]", 3]] +``` + #### Specifying a custom comparison method It's possible to specify how the values of a key should be compared. diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index d003e08..06cf5b4 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -12,6 +12,7 @@ module HashDiff # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. + # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -55,6 +56,8 @@ def self.best_diff(obj1, obj2, options = {}, &block) # * :numeric_tolerance (Numeric) [0] should be a positive numeric value. Value by which numeric differences must be greater than. By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value. # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. + # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. + # # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # From 34681b2ab8a3ffb071151913ee31813b630dde39 Mon Sep 17 00:00:00 2001 From: Joe Francis Date: Sat, 7 Oct 2017 12:16:58 -0500 Subject: [PATCH 30/33] update minimum ruby to reflect actual support --- hashdiff.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hashdiff.gemspec b/hashdiff.gemspec index 251b504..c50bd0d 100644 --- a/hashdiff.gemspec +++ b/hashdiff.gemspec @@ -12,7 +12,7 @@ Gem::Specification.new do |s| s.test_files = `git ls-files -- Appraisals {spec}/*`.split("\n") s.require_paths = ['lib'] - s.required_ruby_version = Gem::Requirement.new(">= 1.8.7") + s.required_ruby_version = Gem::Requirement.new(">= 1.9.3") s.authors = ["Liu Fengyun"] s.email = ["liufengyunchina@gmail.com"] From f153d241a1579f40ea5601f94df47ade4812aefc Mon Sep 17 00:00:00 2001 From: Joe Francis Date: Sat, 7 Oct 2017 12:42:30 -0500 Subject: [PATCH 31/33] set higher retry, bump ruby versions --- .travis.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3dd8454..fd5bb50 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,8 @@ rvm: - 1.9.3 - 2.0.0 - 2.1.10 - - 2.2.6 - - 2.3.3 - - 2.4.0 + - 2.2.8 + - 2.3.4 + - 2.4.2 script: "bundle exec rake spec" +bundler_args: --retry 5 From 70fc43e73a3a95b358154e768c67b1f01889e7b0 Mon Sep 17 00:00:00 2001 From: Joe Francis Date: Sat, 7 Oct 2017 13:09:18 -0500 Subject: [PATCH 32/33] attempting to work around apparently bundler problem with travis and ruby-1.9.3 --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fd5bb50..b9833e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,4 +8,6 @@ rvm: - 2.3.4 - 2.4.2 script: "bundle exec rake spec" -bundler_args: --retry 5 + +before_install: + - gem install bundler From 0946ded222b24ff35448205074fa2f20b87623b1 Mon Sep 17 00:00:00 2001 From: liu fengyun Date: Sun, 8 Oct 2017 13:06:49 +0200 Subject: [PATCH 33/33] bumps to 0.3.7 --- changelog.md | 4 ++++ lib/hashdiff/version.rb | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index a5615fb..27879b7 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,9 @@ # Change Log +## v0.3.7 2017-10-08 + +* remove 1.8.7 support from gemspec #39 + ## v0.3.6 2017-08-22 * add option `use_lcs` #35 diff --git a/lib/hashdiff/version.rb b/lib/hashdiff/version.rb index 6e7b1cf..2dd32ef 100644 --- a/lib/hashdiff/version.rb +++ b/lib/hashdiff/version.rb @@ -1,3 +1,3 @@ module HashDiff - VERSION = '0.3.6' + VERSION = '0.3.7' end