add the rich media ttl based on image exp time

This commit is contained in:
Sachin Joshi 2019-07-16 22:37:36 +05:45
parent d3b9222761
commit 18234cc44e
7 changed files with 172 additions and 1 deletions

View file

@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Configuration: Filter.AnonymizeFilename added ability to retain file extension with custom text
- Admin API: changed json structure for saving config settings.
- RichMedia: parsers and their order are configured in `rich_media` config.
- RichMedia: add the rich media ttl based on image expiration time.
## [1.0.1] - 2019-07-14
### Security

View file

@ -344,7 +344,8 @@
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
]
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]
config :pleroma, :media_proxy,
enabled: false,

View file

@ -0,0 +1,32 @@
# How to set rich media cache ttl based on image ttl
## Explanation
Richmedia are cached without the ttl but the rich media may have image which can expire, like aws signed url.
In such cases the old image url (expired) is returned from the media cache.
So to avoid such situation we can define a moddule that will set ttl based no image.
The module must have a `run` function and it should be registered in the config.
### Example
```exs
defmodule MyModule do
def run(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# ttl is unix time
ttl = parse_ttl_from_url(image_url)
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
end
end
```
And update the config
```exs
config :pleroma, :rich_media,
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, MyModule]
```
> For reference there is a parser for AWS signed URL `Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl`, it's enabled by default.

View file

@ -24,6 +24,7 @@ def parse(url) do
Cachex.fetch!(:rich_media_cache, url, fn _ ->
{:commit, parse_url(url)}
end)
|> set_ttl_based_on_image(url)
rescue
e ->
{:error, "Cachex error: #{inspect(e)}"}
@ -31,6 +32,46 @@ def parse(url) do
end
end
@doc """
Set the rich media cache based on the expiration time of image.
Define a module that has `run` function
## Example
defmodule MyModule do
def run(data, url) do
image_url = Map.get(data, :image)
# do some parsing in the url and get the ttl of the image
# ttl is unix time
ttl = parse_ttl_from_url(image_url)
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
end
end
Define the module in the config
config :pleroma, :rich_media,
ttl_setters: [MyModule]
"""
def set_ttl_based_on_image({:ok, data}, url) do
case Cachex.ttl(:rich_media_cache, url) do
{:ok, nil} ->
modules = Pleroma.Config.get([:rich_media, :ttl_setters])
if Enum.count(modules) > 0 do
Enum.each(modules, & &1.run(data, url))
end
{:ok, data}
_ ->
{:ok, data}
end
end
def set_ttl_based_on_image(data, _url), do: data
defp parse_url(url) do
try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: @hackney_options)

View file

@ -0,0 +1,54 @@
defmodule Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl do
def run(data, url) do
image = Map.get(data, :image)
if is_aws_signed_url(image) do
image
|> parse_query_params()
|> format_query_params()
|> get_expiration_timestamp()
|> set_ttl(url)
end
end
defp is_aws_signed_url(""), do: nil
defp is_aws_signed_url(nil), do: nil
defp is_aws_signed_url(image) when is_binary(image) do
%URI{host: host, query: query} = URI.parse(image)
if String.contains?(host, "amazonaws.com") and
String.contains?(query, "X-Amz-Expires") do
image
else
nil
end
end
defp is_aws_signed_url(_), do: nil
defp parse_query_params(image) do
%URI{query: query} = URI.parse(image)
query
end
defp format_query_params(query) do
query
|> String.split(~r/&|=/)
|> Enum.chunk_every(2)
|> Map.new(fn [k, v] -> {k, v} end)
end
defp get_expiration_timestamp(params) when is_map(params) do
{:ok, date} =
params
|> Map.get("X-Amz-Date")
|> Timex.parse("{ISO:Basic:Z}")
Timex.to_unix(date) + String.to_integer(Map.get(params, "X-Amz-Expires"))
end
defp set_ttl(ttl, url) do
Cachex.expire_at(:rich_media_cache, url, ttl * 1000)
end
end

5
test/fixtures/rich_media/amz.html vendored Normal file
View file

@ -0,0 +1,5 @@
<meta name="twitter:card" content="summary" />
<meta name="twitter:site" content="@flickr" />
<meta name="twitter:title" content="Small Island Developing States Photo Submission" />
<meta name="twitter:description" content="View the album on Flickr." />
<meta name="twitter:image" content="https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=20190716T175105Z&X-Amz-Expires=300000&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host" />

View file

@ -0,0 +1,37 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.TTL.AwsSignedUrlTest do
use ExUnit.Case, async: true
test "amazon signed url is parsed and correct ttl is set for rich media" do
url = "https://pleroma.social/amz"
{:ok, timestamp} =
Timex.now()
|> DateTime.truncate(:second)
|> Timex.format("{ISO:Basic:Z}")
# in seconds
valid_till = 30
data = %{
image:
"https://pleroma.s3.ap-southeast-1.amazonaws.com/sachin%20%281%29%20_a%20-%25%2Aasdasd%20BNN%20bnnn%20.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIBLWWK6RGDQXDLJQ%2F20190716%2Fap-southeast-1%2Fs3%2Faws4_request&X-Amz-Date=#{
timestamp
}&X-Amz-Expires=#{valid_till}&X-Amz-Signature=04ffd6b98634f4b1bbabc62e0fac4879093cd54a6eed24fe8eb38e8369526bbf&X-Amz-SignedHeaders=host",
locale: "en_US",
site_name: "Pleroma",
title: "PLeroma",
url: url
}
Cachex.put(:rich_media_cache, url, data)
assert {:ok, _} = Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl.run(data, url)
{:ok, cache_ttl} = Cachex.ttl(:rich_media_cache, url)
# as there is delay in setting and pulling the data from cache we ignore 1 second
assert_in_delta(valid_till * 1000, cache_ttl, 1000)
end
end