fetcher: return final URL after redirects from get_object

Since we reject cross-domain redirects, this doesn’t yet
make a difference, but it’s requried for stricter checking
subsequent commits will introduce.

To make sure (and in case we ever decide to reallow
cross-domain redirects) also use the final location
for containment and reachability checks.
This commit is contained in:
Oneric 2024-03-15 18:57:09 -01:00
parent f07eb4cb55
commit 3e134b07fa
2 changed files with 43 additions and 11 deletions

View file

@ -259,12 +259,12 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
with {:scheme, true} <- {:scheme, String.starts_with?(id, "http")}, with {:scheme, true} <- {:scheme, String.starts_with?(id, "http")},
{_, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)}, {_, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)},
{:ok, body} <- get_object(id), {:ok, final_id, body} <- get_object(id),
{:ok, data} <- safe_json_decode(body), {:ok, data} <- safe_json_decode(body),
{_, :ok} <- {:containment, Containment.contain_origin_from_id(id, data)}, {_, :ok} <- {:containment, Containment.contain_origin_from_id(final_id, data)},
{_, :ok} <- {:containment, Containment.contain_origin(id, data)} do {_, :ok} <- {:containment, Containment.contain_origin(final_id, data)} do
unless Instances.reachable?(id) do unless Instances.reachable?(final_id) do
Instances.set_reachable(id) Instances.set_reachable(final_id)
end end
{:ok, data} {:ok, data}
@ -305,6 +305,15 @@ defp check_crossdomain_redirect(final_host, original_url) do
{:cross_domain_redirect, final_host != URI.parse(original_url).host} {:cross_domain_redirect, final_host != URI.parse(original_url).host}
end end
if @mix_env == :test do
defp get_final_id(nil, initial_url), do: initial_url
defp get_final_id("", initial_url), do: initial_url
end
defp get_final_id(final_url, _intial_url) do
final_url
end
@doc "Do NOT use; only public for use in tests" @doc "Do NOT use; only public for use in tests"
def get_object(id) do def get_object(id) do
date = Pleroma.Signature.signed_date() date = Pleroma.Signature.signed_date()
@ -325,16 +334,18 @@ def get_object(id) do
{:has_content_type, List.keyfind(headers, "content-type", 0)}, {:has_content_type, List.keyfind(headers, "content-type", 0)},
{:parse_content_type, {:ok, "application", subtype, type_params}} <- {:parse_content_type, {:ok, "application", subtype, type_params}} <-
{:parse_content_type, Plug.Conn.Utils.media_type(content_type)} do {:parse_content_type, Plug.Conn.Utils.media_type(content_type)} do
final_id = get_final_id(final_url, id)
case {subtype, type_params} do case {subtype, type_params} do
{"activity+json", _} -> {"activity+json", _} ->
{:ok, body} {:ok, final_id, body}
{"ld+json", %{"profile" => "https://www.w3.org/ns/activitystreams"}} -> {"ld+json", %{"profile" => "https://www.w3.org/ns/activitystreams"}} ->
{:ok, body} {:ok, final_id, body}
# pixelfed sometimes (and only sometimes) responds with http instead of https # pixelfed sometimes (and only sometimes) responds with http instead of https
{"ld+json", %{"profile" => "http://www.w3.org/ns/activitystreams"}} -> {"ld+json", %{"profile" => "http://www.w3.org/ns/activitystreams"}} ->
{:ok, body} {:ok, final_id, body}
_ -> _ ->
{:error, {:content_type, content_type}} {:error, {:content_type, content_type}}

View file

@ -693,12 +693,13 @@ test "should return ok if the content type is application/activity+json" do
} -> } ->
%Tesla.Env{ %Tesla.Env{
status: 200, status: 200,
url: "https://mastodon.social/2",
headers: [{"content-type", "application/activity+json"}], headers: [{"content-type", "application/activity+json"}],
body: "{}" body: "{}"
} }
end) end)
assert {:ok, "{}"} = Fetcher.get_object("https://mastodon.social/2") assert {:ok, _, "{}"} = Fetcher.get_object("https://mastodon.social/2")
end end
test "should return ok if the content type is application/ld+json with a profile" do test "should return ok if the content type is application/ld+json with a profile" do
@ -709,6 +710,7 @@ test "should return ok if the content type is application/ld+json with a profile
} -> } ->
%Tesla.Env{ %Tesla.Env{
status: 200, status: 200,
url: "https://mastodon.social/2",
headers: [ headers: [
{"content-type", {"content-type",
"application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\""} "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\""}
@ -717,7 +719,7 @@ test "should return ok if the content type is application/ld+json with a profile
} }
end) end)
assert {:ok, "{}"} = Fetcher.get_object("https://mastodon.social/2") assert {:ok, _, "{}"} = Fetcher.get_object("https://mastodon.social/2")
Tesla.Mock.mock(fn Tesla.Mock.mock(fn
%{ %{
@ -734,7 +736,7 @@ test "should return ok if the content type is application/ld+json with a profile
} }
end) end)
assert {:ok, "{}"} = Fetcher.get_object("https://mastodon.social/2") assert {:ok, _, "{}"} = Fetcher.get_object("https://mastodon.social/2")
end end
test "should not return ok with other content types" do test "should not return ok with other content types" do
@ -745,6 +747,7 @@ test "should not return ok with other content types" do
} -> } ->
%Tesla.Env{ %Tesla.Env{
status: 200, status: 200,
url: "https://mastodon.social/2",
headers: [{"content-type", "application/json"}], headers: [{"content-type", "application/json"}],
body: "{}" body: "{}"
} }
@ -753,5 +756,23 @@ test "should not return ok with other content types" do
assert {:error, {:content_type, "application/json"}} = assert {:error, {:content_type, "application/json"}} =
Fetcher.get_object("https://mastodon.social/2") Fetcher.get_object("https://mastodon.social/2")
end end
test "returns the url after redirects" do
Tesla.Mock.mock(fn
%{
method: :get,
url: "https://mastodon.social/5"
} ->
%Tesla.Env{
status: 200,
url: "https://mastodon.social/7",
headers: [{"content-type", "application/activity+json"}],
body: "{}"
}
end)
assert {:ok, "https://mastodon.social/7", "{}"} =
Fetcher.get_object("https://mastodon.social/5")
end
end end
end end