Allow setting the stream mode via the new :stream opt on file writers

This commit is contained in:
Marcel Otto 2020-11-05 22:06:10 +01:00
parent 41a299e122
commit 8b8c3feef7
8 changed files with 55 additions and 24 deletions

View file

@ -26,7 +26,8 @@ defmodule RDF.Serialization.Encoder do
It should return a stream emitting either strings or iodata of the
serialized RDF data structure. If both forms are supported the form
should be configurable via the `:mode` option.
should be configurable via the `:mode` option and its values `:string`
respective `:iodata`.
"""
@callback stream(RDF.Data.t(), keyword) :: Enumerable.t()

View file

@ -121,6 +121,8 @@ defmodule RDF.Serialization.Format do
General serialization-independent options:
- `:stream`: Allows to enable reading the data from a file directly via a
stream (default: `false` on this function, `true` on the bang version)
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`)
@ -133,7 +135,8 @@ defmodule RDF.Serialization.Format do
@doc """
Deserializes a graph or dataset from a file.
As opposed to `read_file/2`, it raises an exception if an error occurs.
As opposed to `read_file/2`, it raises an exception if an error occurs and
defaults to `stream: true`.
See `read_file/3` for the available format-independent options.
@ -188,6 +191,11 @@ defmodule RDF.Serialization.Format do
General serialization-independent options:
- `:stream`: Allows to enable writing the serialized data to the file directly
via a stream. Possible values: `:string` or `:iodata` for writing to the file
with a stream of strings respective IO lists, `true` if you want to use streams,
but don't care for the exact method or `false` for not writing with
a stream (default: `false` on this function, `:iodata` on the bang version)
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`)

View file

@ -74,7 +74,7 @@ defmodule RDF.Serialization.Reader do
end
end
defp do_read_file!(true, decoder, file, opts) do
defp do_read_file!(_stream_mode, decoder, file, opts) do
file
|> File.stream!(file_mode(decoder, opts))
|> decoder.decode_from_stream(opts)

View file

@ -20,7 +20,8 @@ defmodule RDF.Serialization do
RDF.Turtle,
JSON.LD,
RDF.NTriples,
RDF.NQuads
RDF.NQuads,
RDF.XML
]
@doc """
@ -195,6 +196,8 @@ defmodule RDF.Serialization do
Other available serialization-independent options:
- `:stream`: Allows to enable reading the data from a file directly via a
stream (default: `false` on this function, `true` on the bang version)
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`)
@ -212,12 +215,15 @@ defmodule RDF.Serialization do
@doc """
Deserializes a graph or dataset from a file.
As opposed to `read_file/2`, it raises an exception if an error occurs.
As opposed to `read_file/2`, it raises an exception if an error occurs and
defaults to `stream: true`.
The format can be specified with the `format` option and a format name or the
`media_type` option and the media type of the format. If none of these are
given, the format gets inferred from the extension of the given file name.
See `read_file/3` for the available format-independent options.
Please refer to the documentation of the decoder of a RDF serialization format
for format-specific options.
"""
@ -300,6 +306,11 @@ defmodule RDF.Serialization do
Other available serialization-independent options:
- `:stream`: Allows to enable writing the serialized data to the file directly
via a stream. Possible values: `:string` or `:iodata` for writing to the file
with a stream of strings respective IO lists, `true` if you want to use streams,
but don't care for the exact method or `false` for not writing with
a stream (default: `false` on this function, `:iodata` on the bang version)
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`)
@ -363,15 +374,18 @@ defmodule RDF.Serialization do
@doc false
def use_file_streaming(mod, opts) do
case Keyword.get(opts, :stream) do
true ->
nil ->
false
false ->
false
stream_mode ->
if mod.stream_support?() do
true
stream_mode
else
raise "#{inspect(mod)} does not support streams"
end
_ ->
false
end
end
@ -381,15 +395,15 @@ defmodule RDF.Serialization do
nil ->
mod.stream_support?()
true ->
false ->
false
stream_mode ->
if mod.stream_support?() do
true
stream_mode
else
raise "#{inspect(mod)} does not support streams"
end
false ->
false
end
end
end

View file

@ -10,6 +10,7 @@ defmodule RDF.Serialization.Writer do
alias RDF.Serialization
@default_file_mode ~w[write exclusive]a
@default_stream_mode :iodata
@spec write_string(module, RDF.Data.t(), keyword) :: {:ok, String.t()} | {:error, any}
def write_string(encoder, data, opts \\ []) do
@ -48,9 +49,9 @@ defmodule RDF.Serialization.Writer do
end
end
defp do_write_file(true, encoder, data, path, opts) do
defp do_write_file(stream_mode, encoder, data, path, opts) do
data
|> encoder.stream(opts)
|> encoder.stream(set_stream_mode(opts, stream_mode))
|> Enum.into(File.stream!(path, file_mode(encoder, opts)))
end
@ -66,14 +67,17 @@ defmodule RDF.Serialization.Writer do
File.write!(path, encoded_string, file_mode(encoder, opts))
end
defp do_write_file!(true, encoder, data, path, opts) do
defp do_write_file!(stream_mode, encoder, data, path, opts) do
data
|> encoder.stream(opts)
|> encoder.stream(set_stream_mode(opts, stream_mode))
|> Enum.into(File.stream!(path, file_mode(encoder, opts)))
:ok
end
defp set_stream_mode(opts, true), do: Keyword.put(opts, :mode, @default_stream_mode)
defp set_stream_mode(opts, stream_mode), do: Keyword.put(opts, :mode, stream_mode)
@doc false
def file_mode(_encoder, opts) do
opts

View file

@ -21,6 +21,7 @@ defmodule RDF.NQuads.Encoder do
case Keyword.get(opts, :mode, :string) do
:string -> Stream.map(data, &statement(&1))
:iodata -> Stream.map(data, &iolist_statement(&1))
invalid -> raise "Invalid stream mode: #{invalid}"
end
end

View file

@ -21,6 +21,7 @@ defmodule RDF.NTriples.Encoder do
case Keyword.get(opts, :mode, :string) do
:string -> Stream.map(data, &statement(&1))
:iodata -> Stream.map(data, &iolist_statement(&1))
invalid -> raise "Invalid stream mode: #{invalid}"
end
end

View file

@ -342,8 +342,8 @@ defmodule RDF.SerializationTest do
end
test "when stream: true and format does support streams" do
assert Serialization.use_file_streaming(NTriples.Decoder, stream: true)
assert Serialization.use_file_streaming(NTriples.Encoder, stream: true)
assert Serialization.use_file_streaming(NTriples.Decoder, stream: :iolist)
assert Serialization.use_file_streaming(NTriples.Encoder, stream: :string)
end
test "when stream: true and format does not support streams" do
@ -366,8 +366,10 @@ defmodule RDF.SerializationTest do
end
test "when stream: true and format does support streams" do
assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true)
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true)
assert Serialization.use_file_streaming!(NTriples.Decoder, stream: true) == true
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: true) == true
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :iodata) == :iodata
assert Serialization.use_file_streaming!(NTriples.Encoder, stream: :string) == :string
end
test "when stream: true and format does not support streams" do