Add :gzip opt on all read and write file serialization functions

This commit is contained in:
Marcel Otto 2020-11-05 17:23:59 +01:00
parent d3f66bd5d9
commit 41a299e122
8 changed files with 180 additions and 18 deletions

View file

@ -11,6 +11,9 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
- general serialization functions for reading from and writing to streams
and implementations for N-Triples and N-Quads (Turtle still to come)
- a `:gzip` option flag on all `read_file/3` and `write_file/3` functions
allows to and read and write all supported serialization formats from and to
gzipped files (works also with the new possibility to read and write files via streams)
- `RDF.Dataset.prefixes/1` for getting an aggregated `RDF.PrefixMap` over all graphs
- `RDF.PrefixMap.put/3` for adding a prefix mapping and overwrite an existing one
- `RDF.BlankNode.value/1` for getting the internal string representation of a blank node

View file

@ -117,6 +117,14 @@ defmodule RDF.Serialization.Format do
It returns an `{:ok, data}` tuple, with `data` being the deserialized graph or
dataset, or `{:error, reason}` if an error occurs.
## Options
General serialization-independent options:
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`)
#{@decoder_doc_ref}
"""
@spec read_file(Path.t(), keyword) :: {:ok, Graph.t() | Dataset.t()} | {:error, any}
@ -127,6 +135,8 @@ defmodule RDF.Serialization.Format do
As opposed to `read_file/2`, it raises an exception if an error occurs.
See `read_file/3` for the available format-independent options.
#{@decoder_doc_ref}
"""
@spec read_file!(Path.t(), keyword) :: Graph.t() | Dataset.t()
@ -178,9 +188,10 @@ defmodule RDF.Serialization.Format do
General serialization-independent options:
- `:force` - If not set to `true`, an error is raised when the given file
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`)
- `:file_mode` - A list with the Elixir `File.open` modes to be used for writing
- `:file_mode`: A list with the Elixir `File.open` modes to be used for writing
(default: `[:write, :exclusive]`)
#{@encoder_doc_ref}

View file

@ -9,6 +9,8 @@ defmodule RDF.Serialization.Reader do
alias RDF.{Serialization, Dataset, Graph}
@default_file_mode ~w[read utf8]a
@spec read_string(module, String.t(), keyword) :: {:ok, Graph.t() | Dataset.t()} | {:error, any}
def read_string(decoder, content, opts \\ []) do
decoder.decode(content, opts)
@ -36,16 +38,19 @@ defmodule RDF.Serialization.Reader do
end
defp do_read_file(false, decoder, file, opts) do
case File.read(file) do
file
|> File.open(file_mode(decoder, opts), &IO.read(&1, :all))
|> case do
{:ok, {:error, error}} -> {:error, error}
{:ok, content} -> decoder.decode(content, opts)
{:error, reason} -> {:error, reason}
{:error, error} -> {:error, error}
end
end
defp do_read_file(true, decoder, file, opts) do
{:ok,
file
|> File.stream!()
|> File.stream!(file_mode(decoder, opts))
|> decoder.decode_from_stream(opts)}
rescue
error in RuntimeError -> {:error, error.message}
@ -61,13 +66,28 @@ defmodule RDF.Serialization.Reader do
defp do_read_file!(false, decoder, file, opts) do
file
|> File.read!()
|> decoder.decode!(opts)
|> File.open!(file_mode(decoder, opts), &IO.read(&1, :all))
|> case do
{:error, error} when is_tuple(error) -> error |> inspect() |> raise()
{:error, error} -> raise(error)
content -> decoder.decode!(content, opts)
end
end
defp do_read_file!(true, decoder, file, opts) do
file
|> File.stream!()
|> File.stream!(file_mode(decoder, opts))
|> decoder.decode_from_stream(opts)
end
@doc false
def file_mode(_decoder, opts) do
opts
|> Keyword.get(:file_mode, @default_file_mode)
|> List.wrap()
|> set_gzip(Keyword.get(opts, :gzip))
end
defp set_gzip(file_mode, true), do: [:compressed | file_mode]
defp set_gzip(file_mode, _), do: file_mode
end

View file

@ -187,10 +187,18 @@ defmodule RDF.Serialization do
It returns an `{:ok, data}` tuple, with `data` being the deserialized graph or
dataset, or `{:error, reason}` if an error occurs.
## Options
The format can be specified with the `format` option and a format name or the
`media_type` option and the media type of the format. If none of these are
given, the format gets inferred from the extension of the given file name.
Other available serialization-independent options:
- `:gzip`: Allows to read directly from a gzipped file (default: `false`)
- `:file_mode`: A list with the Elixir `File.open` modes to be used for reading
(default: `[:read, :utf8]`)
Please refer to the documentation of the decoder of a RDF serialization format
for format-specific options.
"""
@ -292,9 +300,10 @@ defmodule RDF.Serialization do
Other available serialization-independent options:
- `:force` - If not set to `true`, an error is raised when the given file
- `:gzip`: Allows to write directly to a gzipped file (default: `false`)
- `:force`: If not set to `true`, an error is raised when the given file
already exists (default: `false`)
- `:file_mode` - A list with the Elixir `File.open` modes to be used for writing
- `:file_mode`: A list with the Elixir `File.open` modes to be used for writing
(default: `[:write, :exclusive]`)
Please refer to the documentation of the encoder of a RDF serialization format

View file

@ -65,6 +65,7 @@ defmodule RDF.Serialization.Writer do
encoded_string = encoder.encode!(data, opts)
File.write!(path, encoded_string, file_mode(encoder, opts))
end
defp do_write_file!(true, encoder, data, path, opts) do
data
|> encoder.stream(opts)
@ -73,15 +74,18 @@ defmodule RDF.Serialization.Writer do
:ok
end
@doc false
def file_mode(_encoder, opts) do
opts
|> Keyword.get(:file_mode, @default_file_mode)
|> List.wrap()
|> set_force(Keyword.get(opts, :force))
|> set_gzip(Keyword.get(opts, :gzip))
end
defp file_mode(_encoder, opts) do
file_mode = Keyword.get(opts, :file_mode, @default_file_mode)
defp set_force(file_mode, true), do: List.delete(file_mode, :exclusive)
defp set_force(file_mode, _), do: file_mode
if Keyword.get(opts, :force) do
List.delete(file_mode, :exclusive)
else
file_mode
end
end
defp set_gzip(file_mode, true), do: [:compressed | file_mode]
defp set_gzip(file_mode, _), do: file_mode
end

View file

@ -0,0 +1,19 @@
defmodule RDF.Serialization.ReaderTest do
use RDF.Test.Case
doctest RDF.Serialization.Reader
alias RDF.Serialization.Reader
alias RDF.Turtle
describe "file_mode/2" do
test ":gzip without other :file_mode opts" do
assert Reader.file_mode(Turtle.Decoder, gzip: true) == ~w[compressed read utf8]a
end
test ":gzip with other :file_mode opts" do
assert Reader.file_mode(Turtle.Decoder, gzip: true, file_mode: [:charlist]) ==
~w[compressed charlist]a
end
end
end

View file

@ -260,6 +260,79 @@ defmodule RDF.SerializationTest do
end
end
test ":gzip opt" do
# first ensure that :gzip is not ignored on both read and write which would lead to a false positive
file = file("gzip_test.gz")
Serialization.write_file!(@example_graph, file, format: :turtle, gzip: true, force: true)
assert_raise RuntimeError, fn -> Serialization.read_file!(file, format: :turtle) end
Serialization.write_file!(@example_graph, file,
format: :ntriples,
gzip: true,
stream: true,
force: true
)
# Why do we get an UndefinedFunctionError (function :unicode.format_error/1 is undefined or private)
assert_raise UndefinedFunctionError, fn ->
Serialization.read_file!(file, format: :ntriples, stream: true)
end
:ok = Serialization.write_file(@example_graph, file, format: :turtle, gzip: true, force: true)
assert {:error, _} = Serialization.read_file(file, format: :turtle)
:ok =
Serialization.write_file(@example_graph, file,
format: :ntriples,
gzip: true,
stream: true,
force: true
)
assert {:error, _} = Serialization.read_file(file, format: :ntriples, stream: true)
# start of the actual tests
assert :ok =
Serialization.write_file(@example_graph, file,
format: :turtle,
gzip: true,
force: true
)
assert Serialization.read_file(file, format: :turtle, gzip: true) == {:ok, @example_graph}
assert :ok =
Serialization.write_file(@example_graph, file,
format: :ntriples,
gzip: true,
stream: true,
force: true
)
assert Serialization.read_file(file, format: :ntriples, stream: true, gzip: true) ==
{:ok, Graph.clear_metadata(@example_graph)}
assert :ok =
Serialization.write_file!(@example_graph, file,
format: :turtle,
gzip: true,
force: true
)
assert Serialization.read_file!(file, format: :turtle, gzip: true) == @example_graph
assert :ok =
Serialization.write_file!(@example_graph, file,
format: :ntriples,
gzip: true,
stream: true,
force: true
)
assert Serialization.read_file!(file, format: :ntriples, stream: true, gzip: true) ==
Graph.clear_metadata(@example_graph)
end
describe "use_file_streaming/2" do
test "without opts" do
refute Serialization.use_file_streaming(NTriples.Decoder, [])

View file

@ -0,0 +1,23 @@
defmodule RDF.Serialization.WriterTest do
use RDF.Test.Case
doctest RDF.Serialization.Writer
alias RDF.Serialization.Writer
alias RDF.Turtle
describe "file_mode/2" do
test ":force" do
assert Writer.file_mode(Turtle.Encoder, force: true) == ~w[write]a
end
test ":gzip without other :file_mode opts" do
assert Writer.file_mode(Turtle.Encoder, gzip: true) == ~w[compressed write exclusive]a
end
test ":gzip with other :file_mode opts" do
assert Writer.file_mode(Turtle.Encoder, gzip: true, file_mode: [:append]) ==
~w[compressed append]a
end
end
end