From 4e380807746ec9c15ab55b9660b34d294e354be7 Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Tue, 19 Nov 2019 22:49:00 +0100 Subject: [PATCH] Add RDF.Diff --- CHANGELOG.md | 1 + lib/rdf/diff.ex | 152 ++++++++++++++++++++++++++++++ test/unit/diff_test.exs | 200 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 353 insertions(+) create mode 100644 lib/rdf/diff.ex create mode 100644 test/unit/diff_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 794b6d0..d71ccd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](http://semver.org/) and ### Added +- `RDF.Diff` data structure for diffs between RDF graphs and descriptions - `RDF.Description.update/4` updates the objects of a predicate in a description with a custom update function - `RDF.Graph.update/4` updates the descriptions of a subject in a graph diff --git a/lib/rdf/diff.ex b/lib/rdf/diff.ex new file mode 100644 index 0000000..d8224ef --- /dev/null +++ b/lib/rdf/diff.ex @@ -0,0 +1,152 @@ +defmodule RDF.Diff do + @moduledoc """ + A data structure for diffs between `RDF.Graph`s and `RDF.Description`s. + + A `RDF.Diff` is a struct consisting of two fields `additions` and `deletions` + with `RDF.Graph`s of added and deleted statements. + """ + + defstruct [:additions, :deletions] + + alias RDF.{Description, Graph} + + @doc """ + Creates a `RDF.Diff` struct. + + Some initial additions and deletions can be provided optionally with the resp. + `additions` and `deletions` keywords. The statements for the additions and + deletions can be provided in any form supported by the `RDF.Graph.new/1` function. + """ + def new(diff \\ []) do + %__MODULE__{ + additions: Keyword.get(diff, :additions) |> coerce_graph(), + deletions: Keyword.get(diff, :deletions) |> coerce_graph() + } + end + + defp coerce_graph(nil), do: Graph.new() + defp coerce_graph(%Description{} = description), + do: if Enum.empty?(description), do: Graph.new(), else: Graph.new(description) + defp coerce_graph(data), do: Graph.new(data) + + @doc """ + Computes the diff between two `RDF.Graph`s or `RDF.Description`s. + + The first argument represents the original and the second argument the new version + of the RDF data to be compared. Any combination of `RDF.Graph`s or + `RDF.Description`s can be passed as first and second argument. + + ## Examples + + iex> RDF.Diff.diff( + ...> RDF.description(EX.S1, EX.p1, [EX.O1, EX.O2]), + ...> RDF.graph([ + ...> {EX.S1, EX.p1, [EX.O2, EX.O3]}, + ...> {EX.S2, EX.p2, EX.O4} + ...> ])) + %RDF.Diff{ + additions: RDF.graph([ + {EX.S1, EX.p1, EX.O3}, + {EX.S2, EX.p2, EX.O4} + ]), + deletions: RDF.graph({EX.S1, EX.p1, EX.O1}) + } + """ + def diff(original_rdf_data, new_rdf_data) + + def diff(%Description{} = description, description), do: new() + + def diff(%Description{subject: subject} = original_description, + %Description{subject: subject} = new_description) do + {additions, deletions} = + original_description + |> Description.predicates() + |> Enum.reduce({new_description, Description.new(subject)}, + fn property, {additions, deletions} -> + original_objects = Description.get(original_description, property) + case Description.get(new_description, property) do + nil -> + { + additions, + Description.add(deletions, property, original_objects) + } + + new_objects -> + {unchanged_objects, deleted_objects} = + Enum.reduce(original_objects, {[], []}, fn + original_object, {unchanged_objects, deleted_objects} -> + if original_object in new_objects do + {[original_object | unchanged_objects], deleted_objects} + else + {unchanged_objects, [original_object | deleted_objects]} + end + end) + + { + Description.delete(additions, property, unchanged_objects), + Description.add(deletions, property, deleted_objects), + } + end + end) + new(additions: additions, deletions: deletions) + end + + def diff(%Description{} = original_description, %Description{} = new_description), + do: new(additions: new_description, deletions: original_description) + + def diff(%Graph{} = graph1, %Graph{} = graph2) do + graph1_subjects = graph1 |> Graph.subjects() |> MapSet.new() + graph2_subjects = graph2 |> Graph.subjects() |> MapSet.new() + deleted_subjects = MapSet.difference(graph1_subjects, graph2_subjects) + added_subjects = MapSet.difference(graph2_subjects, graph1_subjects) + + graph1_subjects + |> MapSet.intersection(graph2_subjects) + |> Enum.reduce( + new( + additions: Graph.take(graph2, added_subjects), + deletions: Graph.take(graph1, deleted_subjects) + ), + fn subject, diff -> + merge(diff, diff( + Graph.description(graph1, subject), + Graph.description(graph2, subject) + )) + end) + end + + def diff(%Description{} = description, %Graph{} = graph) do + case Graph.pop(graph, description.subject) do + {nil, graph} -> + new( + additions: graph, + deletions: description + ) + + {new_description, graph} -> + new(additions: graph) + |> merge(diff(description, new_description)) + end + end + + def diff(%Graph{} = graph, %Description{} = description) do + diff = diff(description, graph) + %__MODULE__{ diff | + additions: diff.deletions, + deletions: diff.additions + } + end + + @doc """ + Merges two diffs. + + The diffs are merged by adding up the `additions` and `deletions` of both + diffs respectively. + """ + def merge(%__MODULE__{} = diff1, %__MODULE__{} = diff2) do + new( + additions: Graph.add(diff1.additions, diff2.additions), + deletions: Graph.add(diff1.deletions, diff2.deletions) + ) + end +end diff --git a/test/unit/diff_test.exs b/test/unit/diff_test.exs new file mode 100644 index 0000000..3e9364e --- /dev/null +++ b/test/unit/diff_test.exs @@ -0,0 +1,200 @@ +defmodule RDF.DiffTest do + use RDF.Test.Case + + doctest RDF.Diff + + alias RDF.Diff + + test "new" do + assert Diff.new() == + %Diff{additions: Graph.new(), deletions: Graph.new()} + assert Diff.new(additions: [], deletions: []) == + %Diff{additions: Graph.new(), deletions: Graph.new()} + assert Diff.new(additions: Graph.new(), deletions: Graph.new) == + %Diff{additions: Graph.new(), deletions: Graph.new()} + description = Description.new({EX.S, EX.p, EX.O1}) + graph = Graph.new({EX.S, EX.p, EX.O2}) + assert Diff.new(additions: description, deletions: graph) == + %Diff{additions: Graph.new(description), deletions: graph} + end + + describe "diff/2 " do + test "with two descriptions that are equal it returns an empty diff" do + assert Diff.diff(description(), description()) == Diff.new() + description = description({EX.foo(), EX.Bar}) + assert Diff.diff(description, description) == Diff.new() + end + + test "with two descriptions with different subjects" do + description1 = Description.new({EX.S1, EX.p, EX.O}) + description2 = Description.new({EX.S2, EX.p, EX.O}) + assert Diff.diff(description1, description2) == + Diff.new(additions: Graph.new(description2), + deletions: Graph.new(description1)) + end + + test "with two descriptions when the second description has additional statements" do + description1 = Description.new({EX.S, EX.p, EX.O}) + description2 = + description1 + |> EX.p(EX.O2) + |> EX.p2(EX.O) + + assert Diff.diff(description1, description2) == + Diff.new(additions: Graph.new( + EX.S + |> EX.p(EX.O2) + |> EX.p2(EX.O) + ), + deletions: Graph.new()) + end + + test "with two descriptions when the first description has additional statements" do + description1 = Description.new({EX.S, EX.p, EX.O}) + description2 = + description1 + |> EX.p(EX.O2) + |> EX.p2(EX.O) + + assert Diff.diff(description2, description1) == + Diff.new(additions: Graph.new, + deletions: Graph.new( + EX.S + |> EX.p(EX.O2) + |> EX.p2(EX.O) + )) + end + end + + test "with two descriptions with additions and deletions" do + description1 = + EX.S + |> EX.p(EX.O1, EX.O2) + |> EX.p2(EX.O) + description2 = + EX.S + |> EX.p(EX.O1, EX.O3) + |> EX.p3(EX.O) + + assert Diff.diff(description1, description2) == + Diff.new( + additions: Graph.new( + EX.S + |> EX.p(EX.O3) + |> EX.p3(EX.O) + + ), + deletions: Graph.new( + EX.S + |> EX.p(EX.O2) + |> EX.p2(EX.O) + )) + end + + test "with one description and a graph" do + description = + EX.S1 + |> EX.p(EX.O1, EX.O2) + |> EX.p2(EX.O) + graph = Graph.new([ + EX.S1 + |> EX.p(EX.O2, EX.O3) + |> EX.p3(EX.O), + EX.S3 + |> EX.p(EX.O) + ]) + assert Diff.diff(description, graph) == + Diff.new( + additions: Graph.new([ + EX.S1 + |> EX.p(EX.O3) + |> EX.p3(EX.O), + EX.S3 + |> EX.p(EX.O) + ]), + deletions: Graph.new([ + EX.S1 + |> EX.p(EX.O1) + |> EX.p2(EX.O), + ])) + + assert Diff.diff(graph, description) == + Diff.new( + additions: Graph.new([ + EX.S1 + |> EX.p(EX.O1) + |> EX.p2(EX.O), + ]), + deletions: Graph.new([ + EX.S1 + |> EX.p(EX.O3) + |> EX.p3(EX.O), + EX.S3 + |> EX.p(EX.O) + ]) + ) + + disjoint_description = + EX.S + |> EX.p(EX.O1, EX.O2) + |> EX.p2(EX.O) + assert Diff.diff(disjoint_description, graph) == + Diff.new( + additions: graph, + deletions: Graph.new(disjoint_description)) + assert Diff.diff(graph, disjoint_description) == + Diff.new( + additions: Graph.new(disjoint_description), + deletions: graph) + end + + test "with two graphs with additions and deletions" do + graph1 = Graph.new([ + EX.S1 + |> EX.p(EX.O1, EX.O2) + |> EX.p2(EX.O), + EX.S2 + |> EX.p(EX.O) + ]) + graph2 = Graph.new([ + EX.S1 + |> EX.p(EX.O2, EX.O3) + |> EX.p3(EX.O), + EX.S3 + |> EX.p(EX.O) + ]) + + assert Diff.diff(graph1, graph2) == + Diff.new( + additions: Graph.new([ + EX.S1 + |> EX.p(EX.O3) + |> EX.p3(EX.O), + EX.S3 + |> EX.p(EX.O) + ]), + deletions: Graph.new([ + EX.S1 + |> EX.p(EX.O1) + |> EX.p2(EX.O), + EX.S2 + |> EX.p(EX.O) + ])) + end + + test "merge/2" do + assert Diff.merge( + Diff.new(additions: Graph.new({EX.S, EX.p, EX.O1}), + deletions: Graph.new({EX.S1, EX.p, EX.O})), + Diff.new(additions: Graph.new({EX.S, EX.p, EX.O2}), + deletions: Graph.new({EX.S2, EX.p, EX.O})) + ) == + Diff.new( + additions: Graph.new({EX.S, EX.p, [EX.O1, EX.O2]}), + deletions: Graph.new([ + {EX.S1, EX.p, EX.O}, + {EX.S2, EX.p, EX.O} + ]) + ) + end +end