forked from AkkomaGang/akkoma
Merge branch 'bugfix/rich-media-non-unicode' into 'develop'
rich media non-unicode bugfix See merge request pleroma/pleroma!749
This commit is contained in:
commit
44913c1019
4 changed files with 4948 additions and 13 deletions
|
@ -30,7 +30,7 @@ defp parse_url(url) do
|
|||
try do
|
||||
{:ok, %Tesla.Env{body: html}} = Pleroma.HTTP.get(url, [], adapter: [pool: :media])
|
||||
|
||||
html |> maybe_parse() |> get_parsed_data()
|
||||
html |> maybe_parse() |> clean_parsed_data() |> check_parsed_data()
|
||||
rescue
|
||||
e ->
|
||||
{:error, "Parsing error: #{inspect(e)}"}
|
||||
|
@ -46,11 +46,33 @@ defp maybe_parse(html) do
|
|||
end)
|
||||
end
|
||||
|
||||
defp get_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
|
||||
defp check_parsed_data(%{title: title} = data) when is_binary(title) and byte_size(title) > 0 do
|
||||
{:ok, data}
|
||||
end
|
||||
|
||||
defp get_parsed_data(data) do
|
||||
defp check_parsed_data(data) do
|
||||
{:error, "Found metadata was invalid or incomplete: #{inspect(data)}"}
|
||||
end
|
||||
|
||||
defp string_is_valid_unicode(data) when is_binary(data) do
|
||||
data
|
||||
|> :unicode.characters_to_binary()
|
||||
|> clean_string()
|
||||
end
|
||||
|
||||
defp string_is_valid_unicode(data), do: {:ok, data}
|
||||
|
||||
defp clean_string({:error, _, _}), do: {:error, "Invalid data"}
|
||||
defp clean_string(data), do: {:ok, data}
|
||||
|
||||
defp clean_parsed_data(data) do
|
||||
data
|
||||
|> Enum.reject(fn {_, val} ->
|
||||
case string_is_valid_unicode(val) do
|
||||
{:ok, _} -> false
|
||||
_ -> true
|
||||
end
|
||||
end)
|
||||
|> Map.new()
|
||||
end
|
||||
end
|
||||
|
|
4874
test/fixtures/rich_media/malformed-data.html
vendored
Normal file
4874
test/fixtures/rich_media/malformed-data.html
vendored
Normal file
File diff suppressed because one or more lines are too long
|
@ -143,7 +143,10 @@ def get(
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://squeet.me/xrd/?uri=lain@squeet.me", _, _,
|
||||
def get(
|
||||
"https://squeet.me/xrd/?uri=lain@squeet.me",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/xrd+xml,application/jrd+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -153,7 +156,10 @@ def get("https://squeet.me/xrd/?uri=lain@squeet.me", _, _,
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://mst3k.interlinked.me/users/luciferMysticus", _, _,
|
||||
def get(
|
||||
"https://mst3k.interlinked.me/users/luciferMysticus",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/activity+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -171,7 +177,10 @@ def get("https://prismo.news/@mxb", _, _, _) do
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://hubzilla.example.org/channel/kaniini", _, _,
|
||||
def get(
|
||||
"https://hubzilla.example.org/channel/kaniini",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/activity+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -248,7 +257,10 @@ def get("http://mastodon.example.org/users/admin", _, _, Accept: "application/ac
|
|||
}}
|
||||
end
|
||||
|
||||
def get("http://mastodon.example.org/@admin/99541947525187367", _, _,
|
||||
def get(
|
||||
"http://mastodon.example.org/@admin/99541947525187367",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/activity+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -274,7 +286,10 @@ def get("https://mstdn.io/users/mayuutann", _, _, Accept: "application/activity+
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://mstdn.io/users/mayuutann/statuses/99568293732299394", _, _,
|
||||
def get(
|
||||
"https://mstdn.io/users/mayuutann/statuses/99568293732299394",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/activity+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -429,7 +444,10 @@ def get(
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056", _, _,
|
||||
def get(
|
||||
"https://social.sakamoto.gq/objects/0ccc1a2c-66b0-4305-b23a-7f7f2b040056",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/atom+xml"
|
||||
) do
|
||||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/sakamoto.atom")}}
|
||||
|
@ -510,7 +528,10 @@ def get("http://squeet.me/.well-known/host-meta", _, _, _) do
|
|||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/httpoison_mock/squeet.me_host_meta")}}
|
||||
end
|
||||
|
||||
def get("https://squeet.me/xrd?uri=lain@squeet.me", _, _,
|
||||
def get(
|
||||
"https://squeet.me/xrd?uri=lain@squeet.me",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/xrd+xml,application/jrd+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -541,7 +562,10 @@ def get("http://framatube.org/.well-known/host-meta", _, _, _) do
|
|||
}}
|
||||
end
|
||||
|
||||
def get("http://framatube.org/main/xrd?uri=framasoft@framatube.org", _, _,
|
||||
def get(
|
||||
"http://framatube.org/main/xrd?uri=framasoft@framatube.org",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/xrd+xml,application/jrd+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -560,7 +584,10 @@ def get("http://gnusocial.de/.well-known/host-meta", _, _, _) do
|
|||
}}
|
||||
end
|
||||
|
||||
def get("http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de", _, _,
|
||||
def get(
|
||||
"http://gnusocial.de/main/xrd?uri=winterdienst@gnusocial.de",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/xrd+xml,application/jrd+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -594,7 +621,10 @@ def get("http://gerzilla.de/.well-known/host-meta", _, _, _) do
|
|||
}}
|
||||
end
|
||||
|
||||
def get("https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de", _, _,
|
||||
def get(
|
||||
"https://gerzilla.de/xrd/?uri=kaniini@gerzilla.de",
|
||||
_,
|
||||
_,
|
||||
Accept: "application/xrd+xml,application/jrd+json"
|
||||
) do
|
||||
{:ok,
|
||||
|
@ -657,6 +687,11 @@ def get("http://example.com/ogp", _, _, _) do
|
|||
{:ok, %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")}}
|
||||
end
|
||||
|
||||
def get("http://example.com/malformed", _, _, _) do
|
||||
{:ok,
|
||||
%Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/malformed-data.html")}}
|
||||
end
|
||||
|
||||
def get("http://example.com/empty", _, _, _) do
|
||||
{:ok, %Tesla.Env{status: 200, body: "hello"}}
|
||||
end
|
||||
|
|
|
@ -88,4 +88,8 @@ test "parses OEmbed" do
|
|||
width: "1024"
|
||||
}}
|
||||
end
|
||||
|
||||
test "rejects invalid OGP data" do
|
||||
assert {:error, _} = Pleroma.Web.RichMedia.Parser.parse("http://example.com/malformed")
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue