Add :gzip opt on all read and write file serialization functions
This commit is contained in:
parent
d3f66bd5d9
commit
41a299e122
8 changed files with 180 additions and 18 deletions
|
@ -11,6 +11,9 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
|
|||
|
||||
- general serialization functions for reading from and writing to streams
|
||||
and implementations for N-Triples and N-Quads (Turtle still to come)
|
||||
- a `:gzip` option flag on all `read_file/3` and `write_file/3` functions
|
||||
allows to and read and write all supported serialization formats from and to
|
||||
gzipped files (works also with the new possibility to read and write files via streams)
|
||||
- `RDF.Dataset.prefixes/1` for getting an aggregated `RDF.PrefixMap` over all graphs
|
||||
- `RDF.PrefixMap.put/3` for adding a prefix mapping and overwrite an existing one
|
||||
- `RDF.BlankNode.value/1` for getting the internal string representation of a blank node
|
||||
|
|
|
@ -117,6 +117,14 @@ defmodule RDF.Serialization.Format do
|
|||
It returns an `{:ok, data}` tuple, with `data` being the deserialized graph or
|
||||
dataset, or `{:error, reason}` if an error occurs.
|
||||
|
||||
## Options
|
||||
|
||||
General serialization-independent options:
|
||||
|
||||
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
|
||||
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
|
||||
(default: `[:read, :utf8]`)
|
||||
|
||||
#{@decoder_doc_ref}
|
||||
"""
|
||||
@spec read_file(Path.t(), keyword) :: {:ok, Graph.t() | Dataset.t()} | {:error, any}
|
||||
|
@ -127,6 +135,8 @@ defmodule RDF.Serialization.Format do
|
|||
|
||||
As opposed to `read_file/2`, it raises an exception if an error occurs.
|
||||
|
||||
See `read_file/3` for the available format-independent options.
|
||||
|
||||
#{@decoder_doc_ref}
|
||||
"""
|
||||
@spec read_file!(Path.t(), keyword) :: Graph.t() | Dataset.t()
|
||||
|
@ -178,9 +188,10 @@ defmodule RDF.Serialization.Format do
|
|||
|
||||
General serialization-independent options:
|
||||
|
||||
- `:force` - If not set to `true`, an error is raised when the given file
|
||||
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
|
||||
- `:force`: If not set to `true`, an error is raised when the given file
|
||||
already exists (default: `false`)
|
||||
- `:file_mode` - A list with the Elixir `File.open` modes to be used for writing
|
||||
- `:file_mode`: A list with the Elixir `File.open` modes to be used for writing
|
||||
(default: `[:write, :exclusive]`)
|
||||
|
||||
#{@encoder_doc_ref}
|
||||
|
|
|
@ -9,6 +9,8 @@ defmodule RDF.Serialization.Reader do
|
|||
|
||||
alias RDF.{Serialization, Dataset, Graph}
|
||||
|
||||
@default_file_mode ~w[read utf8]a
|
||||
|
||||
@spec read_string(module, String.t(), keyword) :: {:ok, Graph.t() | Dataset.t()} | {:error, any}
|
||||
def read_string(decoder, content, opts \\ []) do
|
||||
decoder.decode(content, opts)
|
||||
|
@ -36,16 +38,19 @@ defmodule RDF.Serialization.Reader do
|
|||
end
|
||||
|
||||
defp do_read_file(false, decoder, file, opts) do
|
||||
case File.read(file) do
|
||||
file
|
||||
|> File.open(file_mode(decoder, opts), &IO.read(&1, :all))
|
||||
|> case do
|
||||
{:ok, {:error, error}} -> {:error, error}
|
||||
{:ok, content} -> decoder.decode(content, opts)
|
||||
{:error, reason} -> {:error, reason}
|
||||
{:error, error} -> {:error, error}
|
||||
end
|
||||
end
|
||||
|
||||
defp do_read_file(true, decoder, file, opts) do
|
||||
{:ok,
|
||||
file
|
||||
|> File.stream!()
|
||||
|> File.stream!(file_mode(decoder, opts))
|
||||
|> decoder.decode_from_stream(opts)}
|
||||
rescue
|
||||
error in RuntimeError -> {:error, error.message}
|
||||
|
@ -61,13 +66,28 @@ defmodule RDF.Serialization.Reader do
|
|||
|
||||
defp do_read_file!(false, decoder, file, opts) do
|
||||
file
|
||||
|> File.read!()
|
||||
|> decoder.decode!(opts)
|
||||
|> File.open!(file_mode(decoder, opts), &IO.read(&1, :all))
|
||||
|> case do
|
||||
{:error, error} when is_tuple(error) -> error |> inspect() |> raise()
|
||||
{:error, error} -> raise(error)
|
||||
content -> decoder.decode!(content, opts)
|
||||
end
|
||||
end
|
||||
|
||||
defp do_read_file!(true, decoder, file, opts) do
|
||||
file
|
||||
|> File.stream!()
|
||||
|> File.stream!(file_mode(decoder, opts))
|
||||
|> decoder.decode_from_stream(opts)
|
||||
end
|
||||
|
||||
@doc false
|
||||
def file_mode(_decoder, opts) do
|
||||
opts
|
||||
|> Keyword.get(:file_mode, @default_file_mode)
|
||||
|> List.wrap()
|
||||
|> set_gzip(Keyword.get(opts, :gzip))
|
||||
end
|
||||
|
||||
defp set_gzip(file_mode, true), do: [:compressed | file_mode]
|
||||
defp set_gzip(file_mode, _), do: file_mode
|
||||
end
|
||||
|
|
|
@ -187,10 +187,18 @@ defmodule RDF.Serialization do
|
|||
It returns an `{:ok, data}` tuple, with `data` being the deserialized graph or
|
||||
dataset, or `{:error, reason}` if an error occurs.
|
||||
|
||||
## Options
|
||||
|
||||
The format can be specified with the `format` option and a format name or the
|
||||
`media_type` option and the media type of the format. If none of these are
|
||||
given, the format gets inferred from the extension of the given file name.
|
||||
|
||||
Other available serialization-independent options:
|
||||
|
||||
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
|
||||
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
|
||||
(default: `[:read, :utf8]`)
|
||||
|
||||
Please refer to the documentation of the decoder of a RDF serialization format
|
||||
for format-specific options.
|
||||
"""
|
||||
|
@ -292,9 +300,10 @@ defmodule RDF.Serialization do
|
|||
|
||||
Other available serialization-independent options:
|
||||
|
||||
- `:force` - If not set to `true`, an error is raised when the given file
|
||||
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
|
||||
- `:force`: If not set to `true`, an error is raised when the given file
|
||||
already exists (default: `false`)
|
||||
- `:file_mode` - A list with the Elixir `File.open` modes to be used for writing
|
||||
- `:file_mode`: A list with the Elixir `File.open` modes to be used for writing
|
||||
(default: `[:write, :exclusive]`)
|
||||
|
||||
Please refer to the documentation of the encoder of a RDF serialization format
|
||||
|
|
|
@ -65,6 +65,7 @@ defmodule RDF.Serialization.Writer do
|
|||
encoded_string = encoder.encode!(data, opts)
|
||||
File.write!(path, encoded_string, file_mode(encoder, opts))
|
||||
end
|
||||
|
||||
defp do_write_file!(true, encoder, data, path, opts) do
|
||||
data
|
||||
|> encoder.stream(opts)
|
||||
|
@ -73,15 +74,18 @@ defmodule RDF.Serialization.Writer do
|
|||
:ok
|
||||
end
|
||||
|
||||
@doc false
|
||||
def file_mode(_encoder, opts) do
|
||||
opts
|
||||
|> Keyword.get(:file_mode, @default_file_mode)
|
||||
|> List.wrap()
|
||||
|> set_force(Keyword.get(opts, :force))
|
||||
|> set_gzip(Keyword.get(opts, :gzip))
|
||||
end
|
||||
|
||||
defp file_mode(_encoder, opts) do
|
||||
file_mode = Keyword.get(opts, :file_mode, @default_file_mode)
|
||||
defp set_force(file_mode, true), do: List.delete(file_mode, :exclusive)
|
||||
defp set_force(file_mode, _), do: file_mode
|
||||
|
||||
if Keyword.get(opts, :force) do
|
||||
List.delete(file_mode, :exclusive)
|
||||
else
|
||||
file_mode
|
||||
end
|
||||
end
|
||||
defp set_gzip(file_mode, true), do: [:compressed | file_mode]
|
||||
defp set_gzip(file_mode, _), do: file_mode
|
||||
end
|
||||
|
|
19
test/unit/serialization/reader_test.exs
Normal file
19
test/unit/serialization/reader_test.exs
Normal file
|
@ -0,0 +1,19 @@
|
|||
defmodule RDF.Serialization.ReaderTest do
|
||||
use RDF.Test.Case
|
||||
|
||||
doctest RDF.Serialization.Reader
|
||||
|
||||
alias RDF.Serialization.Reader
|
||||
alias RDF.Turtle
|
||||
|
||||
describe "file_mode/2" do
|
||||
test ":gzip without other :file_mode opts" do
|
||||
assert Reader.file_mode(Turtle.Decoder, gzip: true) == ~w[compressed read utf8]a
|
||||
end
|
||||
|
||||
test ":gzip with other :file_mode opts" do
|
||||
assert Reader.file_mode(Turtle.Decoder, gzip: true, file_mode: [:charlist]) ==
|
||||
~w[compressed charlist]a
|
||||
end
|
||||
end
|
||||
end
|
|
@ -260,6 +260,79 @@ defmodule RDF.SerializationTest do
|
|||
end
|
||||
end
|
||||
|
||||
test ":gzip opt" do
|
||||
# first ensure that :gzip is not ignored on both read and write which would lead to a false positive
|
||||
file = file("gzip_test.gz")
|
||||
Serialization.write_file!(@example_graph, file, format: :turtle, gzip: true, force: true)
|
||||
assert_raise RuntimeError, fn -> Serialization.read_file!(file, format: :turtle) end
|
||||
|
||||
Serialization.write_file!(@example_graph, file,
|
||||
format: :ntriples,
|
||||
gzip: true,
|
||||
stream: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
# Why do we get an UndefinedFunctionError (function :unicode.format_error/1 is undefined or private)
|
||||
assert_raise UndefinedFunctionError, fn ->
|
||||
Serialization.read_file!(file, format: :ntriples, stream: true)
|
||||
end
|
||||
|
||||
:ok = Serialization.write_file(@example_graph, file, format: :turtle, gzip: true, force: true)
|
||||
assert {:error, _} = Serialization.read_file(file, format: :turtle)
|
||||
|
||||
:ok =
|
||||
Serialization.write_file(@example_graph, file,
|
||||
format: :ntriples,
|
||||
gzip: true,
|
||||
stream: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
assert {:error, _} = Serialization.read_file(file, format: :ntriples, stream: true)
|
||||
|
||||
# start of the actual tests
|
||||
assert :ok =
|
||||
Serialization.write_file(@example_graph, file,
|
||||
format: :turtle,
|
||||
gzip: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
assert Serialization.read_file(file, format: :turtle, gzip: true) == {:ok, @example_graph}
|
||||
|
||||
assert :ok =
|
||||
Serialization.write_file(@example_graph, file,
|
||||
format: :ntriples,
|
||||
gzip: true,
|
||||
stream: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
assert Serialization.read_file(file, format: :ntriples, stream: true, gzip: true) ==
|
||||
{:ok, Graph.clear_metadata(@example_graph)}
|
||||
|
||||
assert :ok =
|
||||
Serialization.write_file!(@example_graph, file,
|
||||
format: :turtle,
|
||||
gzip: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
assert Serialization.read_file!(file, format: :turtle, gzip: true) == @example_graph
|
||||
|
||||
assert :ok =
|
||||
Serialization.write_file!(@example_graph, file,
|
||||
format: :ntriples,
|
||||
gzip: true,
|
||||
stream: true,
|
||||
force: true
|
||||
)
|
||||
|
||||
assert Serialization.read_file!(file, format: :ntriples, stream: true, gzip: true) ==
|
||||
Graph.clear_metadata(@example_graph)
|
||||
end
|
||||
|
||||
describe "use_file_streaming/2" do
|
||||
test "without opts" do
|
||||
refute Serialization.use_file_streaming(NTriples.Decoder, [])
|
||||
|
|
23
test/unit/serialization/writer_test.exs
Normal file
23
test/unit/serialization/writer_test.exs
Normal file
|
@ -0,0 +1,23 @@
|
|||
defmodule RDF.Serialization.WriterTest do
|
||||
use RDF.Test.Case
|
||||
|
||||
doctest RDF.Serialization.Writer
|
||||
|
||||
alias RDF.Serialization.Writer
|
||||
alias RDF.Turtle
|
||||
|
||||
describe "file_mode/2" do
|
||||
test ":force" do
|
||||
assert Writer.file_mode(Turtle.Encoder, force: true) == ~w[write]a
|
||||
end
|
||||
|
||||
test ":gzip without other :file_mode opts" do
|
||||
assert Writer.file_mode(Turtle.Encoder, gzip: true) == ~w[compressed write exclusive]a
|
||||
end
|
||||
|
||||
test ":gzip with other :file_mode opts" do
|
||||
assert Writer.file_mode(Turtle.Encoder, gzip: true, file_mode: [:append]) ==
|
||||
~w[compressed append]a
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue