Merge branch 'bugfix/rich-media-non-unicode' into 'develop'

rich media non-unicode bugfix

See merge request pleroma/pleroma!749
This commit is contained in:
lambda 2019-01-31 16:54:48 +00:00
commit 44913c1019
4 changed files with 4948 additions and 13 deletions

View file

@ -30,7 +30,7 @@ defp parse_url(url) do
try do try do
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media]) {:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
html |> maybe_parse() |> get_parsed_data() html |> maybe_parse() |> clean_parsed_data() |> check_parsed_data()
rescue rescue
e -> e ->
{:error, "Parsing error: #{inspect(e)}"} {:error, "Parsing error: #{inspect(e)}"}
@ -46,11 +46,33 @@ defp maybe_parse(html) do
end) end)
end end
defp get_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do defp check_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
{:ok, data} {:ok, data}
end end
defp get_parsed_data(data) do defp check_parsed_data(data) do
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"} {:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
end end
defp string_is_valid_unicode(data) when is_binary(data) do
data
|> :unicode.characters_to_binary()
|> clean_string()
end
defp string_is_valid_unicode(data), do: {:ok, data}
defp clean_string({:error, _, _}), do: {:error, "Invalid data"}
defp clean_string(data), do: {:ok, data}
defp clean_parsed_data(data) do
data
|> Enum.reject(fn {_, val} ->
case string_is_valid_unicode(val) do
{:ok, _} -> false
_ -> true
end
end)
|> Map.new()
end
end end

File diff suppressed because one or more lines are too long

View file

@ -143,7 +143,10 @@ def get(
}} }}
end end
def get("https://squeet.me/xrd/?uri=lain@squeet.me", _, _, def get(
"https://squeet.me/xrd/?uri=lain@squeet.me",
_,
_,
Accept: "application/xrd+xml,application/jrd+json" Accept: "application/xrd+xml,application/jrd+json"
) do ) do
{:ok, {:ok,
@ -153,7 +156,10 @@ def get("https://squeet.me/xrd/?uri=lain@squeet.me", _, _,
}} }}
end end
def get("https://mst3k.interlinked.me/users/luciferMysticus", _, _, def get(
"https://mst3k.interlinked.me/users/luciferMysticus",
_,
_,
Accept: "application/activity+json" Accept: "application/activity+json"
) do ) do
{:ok, {:ok,
@ -171,7 +177,10 @@ def get("https://prismo.news/@mxb", _, _, _) do
}} }}
end end
def get("https://hubzilla.example.org/channel/kaniini", _, _, def get(
"https://hubzilla.example.org/channel/kaniini",
_,
_,
Accept: "application/activity+json" Accept: "application/activity+json"
) do ) do
{:ok, {:ok,
@ -248,7 +257,10 @@ def get("http://mastodon.example.org/users/admin", _, _, Accept: "application/ac
}} }}
end end
def get("http://mastodon.example.org/@admin/99541947525187367", _, _, def get(
"http://mastodon.example.org/@admin/99541947525187367",
_,
_,
Accept: "application/activity+json" Accept: "application/activity+json"
) do ) do
{:ok, {:ok,
@ -274,7 +286,10 @@ def get("https://mstdn.io/users/mayuutann", _, _, Accept: "application/activity+
}} }}
end end
def get("https://mstdn.io/users/mayuutann/statuses/99568293732299394", _, _, def get(
"https://mstdn.io/users/mayuutann/statuses/99568293732299394",
_,
_,
Accept: "application/activity+json" Accept: "application/activity+json"
) do ) do
{:ok, {:ok,
@ -429,7 +444,10 @@ def get(
}} }}
end end
def get("https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056", _, _, def get(
"https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056",
_,
_,
Accept: "application/atom+xml" Accept: "application/atom+xml"
) do ) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/sakamoto.atom")}} {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/sakamoto.atom")}}
@ -510,7 +528,10 @@ def get("http://squeet.me/.well-known/host-meta", _, _, _) do
%Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/squeet.me_host_meta")}} %Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/squeet.me_host_meta")}}
end end
def get("https://squeet.me/xrd?uri=lain@squeet.me", _, _, def get(
"https://squeet.me/xrd?uri=lain@squeet.me",
_,
_,
Accept: "application/xrd+xml,application/jrd+json" Accept: "application/xrd+xml,application/jrd+json"
) do ) do
{:ok, {:ok,
@ -541,7 +562,10 @@ def get("http://framatube.org/.well-known/host-meta", _, _, _) do
}} }}
end end
def get("http://framatube.org/main/xrd?uri=framasoft@framatube.org", _, _, def get(
"http://framatube.org/main/xrd?uri=framasoft@framatube.org",
_,
_,
Accept: "application/xrd+xml,application/jrd+json" Accept: "application/xrd+xml,application/jrd+json"
) do ) do
{:ok, {:ok,
@ -560,7 +584,10 @@ def get("http://gnusocial.de/.well-known/host-meta", _, _, _) do
}} }}
end end
def get("http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de", _, _, def get(
"http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de",
_,
_,
Accept: "application/xrd+xml,application/jrd+json" Accept: "application/xrd+xml,application/jrd+json"
) do ) do
{:ok, {:ok,
@ -594,7 +621,10 @@ def get("http://gerzilla.de/.well-known/host-meta", _, _, _) do
}} }}
end end
def get("https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de", _, _, def get(
"https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de",
_,
_,
Accept: "application/xrd+xml,application/jrd+json" Accept: "application/xrd+xml,application/jrd+json"
) do ) do
{:ok, {:ok,
@ -657,6 +687,11 @@ def get("http://example.com/ogp", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}} {:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
end end
def get("http://example.com/malformed", _, _, _) do
{:ok,
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}
end
def get("http://example.com/empty", _, _, _) do def get("http://example.com/empty", _, _, _) do
{:ok, %Tesla.Env{status: 200, body: "hello"}} {:ok, %Tesla.Env{status: 200, body: "hello"}}
end end

View file

@ -88,4 +88,8 @@ test "parses OEmbed" do
width: "1024" width: "1024"
}} }}
end end
test "rejects invalid OGP data" do
assert {:error, _} = Pleroma.Web.RichMedia.Parser.parse("http://example.com/malformed")
end
end end