diff --git a/lib/rdf/serialization/encoder.ex b/lib/rdf/serialization/encoder.ex index 985f180..85e39f8 100644 --- a/lib/rdf/serialization/encoder.ex +++ b/lib/rdf/serialization/encoder.ex @@ -24,9 +24,10 @@ defmodule RDF.Serialization.Encoder do @doc """ Serializes a RDF data structure into a stream. - It should return a stream emitting either strings or iodata of the + It should return a stream emitting either strings or iodata of the serialized RDF data structure. If both forms are supported the form - should be configurable via the `:mode` option. + should be configurable via the `:mode` option and its values `:string` + respective `:iodata`. """ @callback stream(RDF.Data.t(), keyword) :: Enumerable.t() diff --git a/lib/rdf/serialization/format.ex b/lib/rdf/serialization/format.ex index 175c413..1d69b1a 100644 --- a/lib/rdf/serialization/format.ex +++ b/lib/rdf/serialization/format.ex @@ -121,6 +121,8 @@ defmodule RDF.Serialization.Format do General serialization-independent options: + - `:stream`: Allows to enable reading the data from a file directly via a + stream (default: `false` on this function, `true` on the bang version) - `:gzip`: Allows to read directly from a gzipped file (default: `false`) - `:file_mode`: A list with the Elixir `File.open` modes to be used for reading (default: `[:read, :utf8]`) @@ -133,7 +135,8 @@ defmodule RDF.Serialization.Format do @doc """ Deserializes a graph or dataset from a file. - As opposed to `read_file/2`, it raises an exception if an error occurs. + As opposed to `read_file/2`, it raises an exception if an error occurs and + defaults to `stream: true`. See `read_file/3` for the available format-independent options. @@ -188,6 +191,11 @@ defmodule RDF.Serialization.Format do General serialization-independent options: + - `:stream`: Allows to enable writing the serialized data to the file directly + via a stream. Possible values: `:string` or `:iodata` for writing to the file + with a stream of strings respective IO lists, `true` if you want to use streams, + but don't care for the exact method or `false` for not writing with + a stream (default: `false` on this function, `:iodata` on the bang version) - `:gzip`: Allows to write directly to a gzipped file (default: `false`) - `:force`: If not set to `true`, an error is raised when the given file already exists (default: `false`) diff --git a/lib/rdf/serialization/reader.ex b/lib/rdf/serialization/reader.ex index 3edfbec..4d2e680 100644 --- a/lib/rdf/serialization/reader.ex +++ b/lib/rdf/serialization/reader.ex @@ -74,7 +74,7 @@ defmodule RDF.Serialization.Reader do end end - defp do_read_file!(true, decoder, file, opts) do + defp do_read_file!(_stream_mode, decoder, file, opts) do file |> File.stream!(file_mode(decoder, opts)) |> decoder.decode_from_stream(opts) diff --git a/lib/rdf/serialization/serialization.ex b/lib/rdf/serialization/serialization.ex index 984f2ea..1b05d48 100644 --- a/lib/rdf/serialization/serialization.ex +++ b/lib/rdf/serialization/serialization.ex @@ -20,7 +20,8 @@ defmodule RDF.Serialization do RDF.Turtle, JSON.LD, RDF.NTriples, - RDF.NQuads + RDF.NQuads, + RDF.XML ] @doc """ @@ -195,6 +196,8 @@ defmodule RDF.Serialization do Other available serialization-independent options: + - `:stream`: Allows to enable reading the data from a file directly via a + stream (default: `false` on this function, `true` on the bang version) - `:gzip`: Allows to read directly from a gzipped file (default: `false`) - `:file_mode`: A list with the Elixir `File.open` modes to be used for reading (default: `[:read, :utf8]`) @@ -212,12 +215,15 @@ defmodule RDF.Serialization do @doc """ Deserializes a graph or dataset from a file. - As opposed to `read_file/2`, it raises an exception if an error occurs. + As opposed to `read_file/2`, it raises an exception if an error occurs and + defaults to `stream: true`. The format can be specified with the `format` option and a format name or the `media_type` option and the media type of the format. If none of these are given, the format gets inferred from the extension of the given file name. + See `read_file/3` for the available format-independent options. + Please refer to the documentation of the decoder of a RDF serialization format for format-specific options. """ @@ -300,6 +306,11 @@ defmodule RDF.Serialization do Other available serialization-independent options: + - `:stream`: Allows to enable writing the serialized data to the file directly + via a stream. Possible values: `:string` or `:iodata` for writing to the file + with a stream of strings respective IO lists, `true` if you want to use streams, + but don't care for the exact method or `false` for not writing with + a stream (default: `false` on this function, `:iodata` on the bang version) - `:gzip`: Allows to write directly to a gzipped file (default: `false`) - `:force`: If not set to `true`, an error is raised when the given file already exists (default: `false`) @@ -363,15 +374,18 @@ defmodule RDF.Serialization do @doc false def use_file_streaming(mod, opts) do case Keyword.get(opts, :stream) do - true -> + nil -> + false + + false -> + false + + stream_mode -> if mod.stream_support?() do - true + stream_mode else raise "#{inspect(mod)} does not support streams" end - - _ -> - false end end @@ -381,15 +395,15 @@ defmodule RDF.Serialization do nil -> mod.stream_support?() - true -> + false -> + false + + stream_mode -> if mod.stream_support?() do - true + stream_mode else raise "#{inspect(mod)} does not support streams" end - - false -> - false end end end diff --git a/lib/rdf/serialization/writer.ex b/lib/rdf/serialization/writer.ex index f78495c..0ff76dc 100644 --- a/lib/rdf/serialization/writer.ex +++ b/lib/rdf/serialization/writer.ex @@ -10,6 +10,7 @@ defmodule RDF.Serialization.Writer do alias RDF.Serialization @default_file_mode ~w[write exclusive]a + @default_stream_mode :iodata @spec write_string(module, RDF.Data.t(), keyword) :: {:ok, String.t()} | {:error, any} def write_string(encoder, data, opts \\ []) do @@ -48,9 +49,9 @@ defmodule RDF.Serialization.Writer do end end - defp do_write_file(true, encoder, data, path, opts) do + defp do_write_file(stream_mode, encoder, data, path, opts) do data - |> encoder.stream(opts) + |> encoder.stream(set_stream_mode(opts, stream_mode)) |> Enum.into(File.stream!(path, file_mode(encoder, opts))) end @@ -66,14 +67,17 @@ defmodule RDF.Serialization.Writer do File.write!(path, encoded_string, file_mode(encoder, opts)) end - defp do_write_file!(true, encoder, data, path, opts) do + defp do_write_file!(stream_mode, encoder, data, path, opts) do data - |> encoder.stream(opts) + |> encoder.stream(set_stream_mode(opts, stream_mode)) |> Enum.into(File.stream!(path, file_mode(encoder, opts))) :ok end + defp set_stream_mode(opts, true), do: Keyword.put(opts, :mode, @default_stream_mode) + defp set_stream_mode(opts, stream_mode), do: Keyword.put(opts, :mode, stream_mode) + @doc false def file_mode(_encoder, opts) do opts diff --git a/lib/rdf/serializations/nquads_encoder.ex b/lib/rdf/serializations/nquads_encoder.ex index 09dbf7c..434523b 100644 --- a/lib/rdf/serializations/nquads_encoder.ex +++ b/lib/rdf/serializations/nquads_encoder.ex @@ -21,6 +21,7 @@ defmodule RDF.NQuads.Encoder do case Keyword.get(opts, :mode, :string) do :string -> Stream.map(data, &statement(&1)) :iodata -> Stream.map(data, &iolist_statement(&1)) + invalid -> raise "Invalid stream mode: #{invalid}" end end diff --git a/lib/rdf/serializations/ntriples_encoder.ex b/lib/rdf/serializations/ntriples_encoder.ex index 5fff37c..5dc3997 100644 --- a/lib/rdf/serializations/ntriples_encoder.ex +++ b/lib/rdf/serializations/ntriples_encoder.ex @@ -21,6 +21,7 @@ defmodule RDF.NTriples.Encoder do case Keyword.get(opts, :mode, :string) do :string -> Stream.map(data, &statement(&1)) :iodata -> Stream.map(data, &iolist_statement(&1)) + invalid -> raise "Invalid stream mode: #{invalid}" end end diff --git a/test/unit/serialization/serialization_test.exs b/test/unit/serialization/serialization_test.exs index aad265b..ce59947 100644 --- a/test/unit/serialization/serialization_test.exs +++ b/test/unit/serialization/serialization_test.exs @@ -342,8 +342,8 @@ defmodule RDF.SerializationTest do end test "when stream: true and format does support streams" do - assert Serialization.use_file_streaming(NTriples.Decoder, stream: true) - assert Serialization.use_file_streaming(NTriples.Encoder, stream: true) + assert Serialization.use_file_streaming(NTriples.Decoder, stream: :iolist) + assert Serialization.use_file_streaming(NTriples.Encoder, stream: :string) end test "when stream: true and format does not support streams" do @@ -366,8 +366,10 @@ defmodule RDF.SerializationTest do end test "when stream: true and format does support streams" do - assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true) - assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true) + assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true) == true + assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true) == true + assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :iodata) == :iodata + assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :string) == :string end test "when stream: true and format does not support streams" do