Allow setting the stream mode via the new :stream opt on file writers

This commit is contained in:
Marcel Otto 2020-11-05 22:06:10 +01:00
parent 41a299e122
commit 8b8c3feef7
8 changed files with 55 additions and 24 deletions

View file

@ -24,9 +24,10 @@ defmodule RDF.Serialization.Encoder do
@doc """ @doc """
Serializes a RDF data structure into a stream. Serializes a RDF data structure into a stream.
It should return a stream emitting either strings or iodata of the It should return a stream emitting either strings or iodata of the
serialized RDF data structure. If both forms are supported the form serialized RDF data structure. If both forms are supported the form
should be configurable via the `:mode` option. should be configurable via the `:mode` option and its values `:string`
respective `:iodata`.
""" """
@callback stream(RDF.Data.t(), keyword) :: Enumerable.t() @callback stream(RDF.Data.t(), keyword) :: Enumerable.t()

View file

@ -121,6 +121,8 @@ defmodule RDF.Serialization.Format do
General serialization-independent options: General serialization-independent options:
- `:stream`: Allows to enable reading the data from a file directly via a
stream (default: `false` on this function, `true` on the bang version)
- `:gzip`: Allows to read directly from a gzipped file (default: `false`) - `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading - `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`) (default: `[:read, :utf8]`)
@ -133,7 +135,8 @@ defmodule RDF.Serialization.Format do
@doc """ @doc """
Deserializes a graph or dataset from a file. Deserializes a graph or dataset from a file.
As opposed to `read_file/2`, it raises an exception if an error occurs. As opposed to `read_file/2`, it raises an exception if an error occurs and
defaults to `stream: true`.
See `read_file/3` for the available format-independent options. See `read_file/3` for the available format-independent options.
@ -188,6 +191,11 @@ defmodule RDF.Serialization.Format do
General serialization-independent options: General serialization-independent options:
- `:stream`: Allows to enable writing the serialized data to the file directly
via a stream. Possible values: `:string` or `:iodata` for writing to the file
with a stream of strings respective IO lists, `true` if you want to use streams,
but don't care for the exact method or `false` for not writing with
a stream (default: `false` on this function, `:iodata` on the bang version)
- `:gzip`: Allows to write directly to a gzipped file (default: `false`) - `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file - `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`) already exists (default: `false`)

View file

@ -74,7 +74,7 @@ defmodule RDF.Serialization.Reader do
end end
end end
defp do_read_file!(true, decoder, file, opts) do defp do_read_file!(_stream_mode, decoder, file, opts) do
file file
|> File.stream!(file_mode(decoder, opts)) |> File.stream!(file_mode(decoder, opts))
|> decoder.decode_from_stream(opts) |> decoder.decode_from_stream(opts)

View file

@ -20,7 +20,8 @@ defmodule RDF.Serialization do
RDF.Turtle, RDF.Turtle,
JSON.LD, JSON.LD,
RDF.NTriples, RDF.NTriples,
RDF.NQuads RDF.NQuads,
RDF.XML
] ]
@doc """ @doc """
@ -195,6 +196,8 @@ defmodule RDF.Serialization do
Other available serialization-independent options: Other available serialization-independent options:
- `:stream`: Allows to enable reading the data from a file directly via a
stream (default: `false` on this function, `true` on the bang version)
- `:gzip`: Allows to read directly from a gzipped file (default: `false`) - `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading - `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`) (default: `[:read, :utf8]`)
@ -212,12 +215,15 @@ defmodule RDF.Serialization do
@doc """ @doc """
Deserializes a graph or dataset from a file. Deserializes a graph or dataset from a file.
As opposed to `read_file/2`, it raises an exception if an error occurs. As opposed to `read_file/2`, it raises an exception if an error occurs and
defaults to `stream: true`.
The format can be specified with the `format` option and a format name or the The format can be specified with the `format` option and a format name or the
`media_type` option and the media type of the format. If none of these are `media_type` option and the media type of the format. If none of these are
given, the format gets inferred from the extension of the given file name. given, the format gets inferred from the extension of the given file name.
See `read_file/3` for the available format-independent options.
Please refer to the documentation of the decoder of a RDF serialization format Please refer to the documentation of the decoder of a RDF serialization format
for format-specific options. for format-specific options.
""" """
@ -300,6 +306,11 @@ defmodule RDF.Serialization do
Other available serialization-independent options: Other available serialization-independent options:
- `:stream`: Allows to enable writing the serialized data to the file directly
via a stream. Possible values: `:string` or `:iodata` for writing to the file
with a stream of strings respective IO lists, `true` if you want to use streams,
but don't care for the exact method or `false` for not writing with
a stream (default: `false` on this function, `:iodata` on the bang version)
- `:gzip`: Allows to write directly to a gzipped file (default: `false`) - `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file - `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`) already exists (default: `false`)
@ -363,15 +374,18 @@ defmodule RDF.Serialization do
@doc false @doc false
def use_file_streaming(mod, opts) do def use_file_streaming(mod, opts) do
case Keyword.get(opts, :stream) do case Keyword.get(opts, :stream) do
true -> nil ->
false
false ->
false
stream_mode ->
if mod.stream_support?() do if mod.stream_support?() do
true stream_mode
else else
raise "#{inspect(mod)} does not support streams" raise "#{inspect(mod)} does not support streams"
end end
_ ->
false
end end
end end
@ -381,15 +395,15 @@ defmodule RDF.Serialization do
nil -> nil ->
mod.stream_support?() mod.stream_support?()
true -> false ->
false
stream_mode ->
if mod.stream_support?() do if mod.stream_support?() do
true stream_mode
else else
raise "#{inspect(mod)} does not support streams" raise "#{inspect(mod)} does not support streams"
end end
false ->
false
end end
end end
end end

View file

@ -10,6 +10,7 @@ defmodule RDF.Serialization.Writer do
alias RDF.Serialization alias RDF.Serialization
@default_file_mode ~w[write exclusive]a @default_file_mode ~w[write exclusive]a
@default_stream_mode :iodata
@spec write_string(module, RDF.Data.t(), keyword) :: {:ok, String.t()} | {:error, any} @spec write_string(module, RDF.Data.t(), keyword) :: {:ok, String.t()} | {:error, any}
def write_string(encoder, data, opts \\ []) do def write_string(encoder, data, opts \\ []) do
@ -48,9 +49,9 @@ defmodule RDF.Serialization.Writer do
end end
end end
defp do_write_file(true, encoder, data, path, opts) do defp do_write_file(stream_mode, encoder, data, path, opts) do
data data
|> encoder.stream(opts) |> encoder.stream(set_stream_mode(opts, stream_mode))
|> Enum.into(File.stream!(path, file_mode(encoder, opts))) |> Enum.into(File.stream!(path, file_mode(encoder, opts)))
end end
@ -66,14 +67,17 @@ defmodule RDF.Serialization.Writer do
File.write!(path, encoded_string, file_mode(encoder, opts)) File.write!(path, encoded_string, file_mode(encoder, opts))
end end
defp do_write_file!(true, encoder, data, path, opts) do defp do_write_file!(stream_mode, encoder, data, path, opts) do
data data
|> encoder.stream(opts) |> encoder.stream(set_stream_mode(opts, stream_mode))
|> Enum.into(File.stream!(path, file_mode(encoder, opts))) |> Enum.into(File.stream!(path, file_mode(encoder, opts)))
:ok :ok
end end
defp set_stream_mode(opts, true), do: Keyword.put(opts, :mode, @default_stream_mode)
defp set_stream_mode(opts, stream_mode), do: Keyword.put(opts, :mode, stream_mode)
@doc false @doc false
def file_mode(_encoder, opts) do def file_mode(_encoder, opts) do
opts opts

View file

@ -21,6 +21,7 @@ defmodule RDF.NQuads.Encoder do
case Keyword.get(opts, :mode, :string) do case Keyword.get(opts, :mode, :string) do
:string -> Stream.map(data, &statement(&1)) :string -> Stream.map(data, &statement(&1))
:iodata -> Stream.map(data, &iolist_statement(&1)) :iodata -> Stream.map(data, &iolist_statement(&1))
invalid -> raise "Invalid stream mode: #{invalid}"
end end
end end

View file

@ -21,6 +21,7 @@ defmodule RDF.NTriples.Encoder do
case Keyword.get(opts, :mode, :string) do case Keyword.get(opts, :mode, :string) do
:string -> Stream.map(data, &statement(&1)) :string -> Stream.map(data, &statement(&1))
:iodata -> Stream.map(data, &iolist_statement(&1)) :iodata -> Stream.map(data, &iolist_statement(&1))
invalid -> raise "Invalid stream mode: #{invalid}"
end end
end end

View file

@ -342,8 +342,8 @@ defmodule RDF.SerializationTest do
end end
test "when stream: true and format does support streams" do test "when stream: true and format does support streams" do
assert Serialization.use_file_streaming(NTriples.Decoder, stream: true) assert Serialization.use_file_streaming(NTriples.Decoder, stream: :iolist)
assert Serialization.use_file_streaming(NTriples.Encoder, stream: true) assert Serialization.use_file_streaming(NTriples.Encoder, stream: :string)
end end
test "when stream: true and format does not support streams" do test "when stream: true and format does not support streams" do
@ -366,8 +366,10 @@ defmodule RDF.SerializationTest do
end end
test "when stream: true and format does support streams" do test "when stream: true and format does support streams" do
assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true) assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true) == true
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true) assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true) == true
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :iodata) == :iodata
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :string) == :string
end end
test "when stream: true and format does not support streams" do test "when stream: true and format does not support streams" do