Merge pull request 'Tweak fetch security checks' (#819) from Oneric/akkoma:id-refetch into develop
Some checks are pending
ci/woodpecker/push/build-amd64 Pipeline is pending
ci/woodpecker/push/build-arm64 Pipeline is pending
ci/woodpecker/push/docs Pipeline is pending
ci/woodpecker/push/lint Pipeline is pending
ci/woodpecker/push/test Pipeline is pending

Reviewed-on: #819
This commit is contained in:
floatingghost 2024-10-16 14:16:14 +00:00
commit 09fa7227f6
5 changed files with 116 additions and 77 deletions

View file

@ -12,8 +12,6 @@ defmodule Pleroma.Object.Containment do
spoofing, therefore removal of object containment functions is NOT recommended. spoofing, therefore removal of object containment functions is NOT recommended.
""" """
alias Pleroma.Web.ActivityPub.Transmogrifier
def get_actor(%{"actor" => actor}) when is_binary(actor) do def get_actor(%{"actor" => actor}) when is_binary(actor) do
actor actor
end end
@ -50,16 +48,39 @@ def get_object(_) do
defp compare_uris(%URI{host: host} = _id_uri, %URI{host: host} = _other_uri), do: :ok defp compare_uris(%URI{host: host} = _id_uri, %URI{host: host} = _other_uri), do: :ok
defp compare_uris(_id_uri, _other_uri), do: :error defp compare_uris(_id_uri, _other_uri), do: :error
defp compare_uris_exact(uri, uri), do: :ok defp uri_strip_slash(%URI{path: path} = uri) when is_binary(path),
do: %{uri | path: String.replace_suffix(path, "/", "")}
defp compare_uris_exact(%URI{} = id, %URI{} = other), defp uri_strip_slash(uri), do: uri
do: compare_uris_exact(URI.to_string(id), URI.to_string(other))
defp compare_uris_exact(id_uri, other_uri) # domain names are case-insensitive per spec (other parts of URIs arent necessarily)
when is_binary(id_uri) and is_binary(other_uri) do defp uri_normalise_host(%URI{host: host} = uri) when is_binary(host),
norm_id = String.replace_suffix(id_uri, "/", "") do: %{uri | host: String.downcase(host, :ascii)}
norm_other = String.replace_suffix(other_uri, "/", "")
if norm_id == norm_other, do: :ok, else: :error defp uri_normalise_host(uri), do: uri
defp compare_uri_identities(uri, uri), do: :ok
defp compare_uri_identities(id_uri, other_uri) when is_binary(id_uri) and is_binary(other_uri),
do: compare_uri_identities(URI.parse(id_uri), URI.parse(other_uri))
defp compare_uri_identities(%URI{} = id, %URI{} = other) do
normid =
%{id | fragment: nil}
|> uri_strip_slash()
|> uri_normalise_host()
normother =
%{other | fragment: nil}
|> uri_strip_slash()
|> uri_normalise_host()
# Conversion back to binary avoids issues from non-normalised deprecated authority field
if URI.to_string(normid) == URI.to_string(normother) do
:ok
else
:error
end
end end
@doc """ @doc """
@ -93,21 +114,13 @@ def contain_origin(id, %{"attributedTo" => actor} = params),
def contain_origin(_id, _data), do: :ok def contain_origin(_id, _data), do: :ok
@doc """ @doc """
Check whether the fetch URL (after redirects) exactly (sans tralining slash) matches either Check whether the fetch URL (after redirects) is the
the canonical ActivityPub id or the objects url field (for display URLs from *key and Mastodon) same location the canonical ActivityPub id points to.
Since this is meant to be used for fetches, anonymous or transient objects are not accepted here. Since this is meant to be used for fetches, anonymous or transient objects are not accepted here.
""" """
def contain_id_to_fetch(url, %{"id" => id} = data) when is_binary(id) do def contain_id_to_fetch(url, %{"id" => id}) when is_binary(id) do
with {:id, :error} <- {:id, compare_uris_exact(id, url)}, compare_uri_identities(url, id)
# "url" can be a "Link" object and this is checked before full normalisation
display_url <- Transmogrifier.fix_url(data)["url"],
true <- display_url != nil do
compare_uris_exact(display_url, url)
else
{:id, :ok} -> :ok
_ -> :error
end
end end
def contain_id_to_fetch(_url, _data), do: :error def contain_id_to_fetch(_url, _data), do: :error

View file

@ -116,7 +116,7 @@ defp reinject_object(%Object{} = object, new_data) do
@doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)" @doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)"
def refetch_object(%Object{data: %{"id" => id}} = object) do def refetch_object(%Object{data: %{"id" => id}} = object) do
with {:local, false} <- {:local, Object.local?(object)}, with {:local, false} <- {:local, Object.local?(object)},
{:ok, new_data} <- fetch_and_contain_remote_object_from_id(id), {:ok, new_data} <- fetch_and_contain_remote_object_from_id(id, true),
{:id, true} <- {:id, new_data["id"] == id}, {:id, true} <- {:id, new_data["id"] == id},
{:ok, object} <- reinject_object(object, new_data) do {:ok, object} <- reinject_object(object, new_data) do
{:ok, object} {:ok, object}
@ -253,14 +253,17 @@ defp maybe_date_fetch(headers, date) do
end end
end end
@doc "Fetches arbitrary remote object and performs basic safety and authenticity checks" @doc """
def fetch_and_contain_remote_object_from_id(id) Fetches arbitrary remote object and performs basic safety and authenticity checks.
When the fetch URL is known to already be a canonical AP id, checks are stricter.
"""
def fetch_and_contain_remote_object_from_id(id, is_ap_id \\ false)
def fetch_and_contain_remote_object_from_id(%{"id" => id}), def fetch_and_contain_remote_object_from_id(%{"id" => id}, is_ap_id),
do: fetch_and_contain_remote_object_from_id(id) do: fetch_and_contain_remote_object_from_id(id, is_ap_id)
def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do def fetch_and_contain_remote_object_from_id(id, is_ap_id) when is_binary(id) do
Logger.debug("Fetching object #{id} via AP") Logger.debug("Fetching object #{id} via AP [ap_id=#{is_ap_id}]")
with {:valid_uri_scheme, true} <- {:valid_uri_scheme, String.starts_with?(id, "http")}, with {:valid_uri_scheme, true} <- {:valid_uri_scheme, String.starts_with?(id, "http")},
%URI{} = uri <- URI.parse(id), %URI{} = uri <- URI.parse(id),
@ -270,18 +273,31 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
{:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)}, {:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)},
{:local_fetch, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)}, {:local_fetch, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)},
{:ok, final_id, body} <- get_object(id), {:ok, final_id, body} <- get_object(id),
# a canonical ID shouldn't be a redirect
true <- !is_ap_id || final_id == id,
{:ok, data} <- safe_json_decode(body), {:ok, data} <- safe_json_decode(body),
{_, :ok} <- {:strict_id, Containment.contain_id_to_fetch(final_id, data)}, {_, :ok} <- {:containment, Containment.contain_origin(final_id, data)},
{_, :ok} <- {:containment, Containment.contain_origin(final_id, data)} do {_, _, :ok} <- {:strict_id, data["id"], Containment.contain_id_to_fetch(final_id, data)} do
unless Instances.reachable?(final_id) do unless Instances.reachable?(final_id) do
Instances.set_reachable(final_id) Instances.set_reachable(final_id)
end end
{:ok, data} {:ok, data}
else else
{:strict_id, _} = e -> # E.g. Mastodon and *key serve the AP object directly under their display URLs without
# redirecting to their canonical location first, thus ids will expectedly differ.
# Similarly keys, either use a fragment ID and are a subobjects or a distinct ID
# but for compatibility are still a subobject presenting their owning actors ID at the toplevel.
# Refetching _once_ from the listed id, should yield a strict match afterwards.
{:strict_id, ap_id, _} = e ->
case is_ap_id do
false ->
fetch_and_contain_remote_object_from_id(ap_id, true)
true ->
log_fetch_error(id, e) log_fetch_error(id, e)
{:error, :id_mismatch} {:error, :id_mismatch}
end
{:mrf_reject_check, _} = e -> {:mrf_reject_check, _} = e ->
log_fetch_error(id, e) log_fetch_error(id, e)
@ -301,7 +317,7 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
{:containment, reason} -> {:containment, reason} ->
log_fetch_error(id, reason) log_fetch_error(id, reason)
{:error, reason} {:error, {:containment, reason}}
{:error, e} -> {:error, e} ->
{:error, e} {:error, e}
@ -311,25 +327,13 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
end end
end end
def fetch_and_contain_remote_object_from_id(_id), def fetch_and_contain_remote_object_from_id(_id, _is_ap_id),
do: {:error, :invalid_id} do: {:error, :invalid_id}
defp check_crossdomain_redirect(final_host, original_url)
# HOPEFULLY TEMPORARY # HOPEFULLY TEMPORARY
# Basically none of our Tesla mocks in tests set the (supposed to # Basically none of our Tesla mocks in tests set the (supposed to
# exist for Tesla proper) url parameter for their responses # exist for Tesla proper) url parameter for their responses
# causing almost every fetch in test to fail otherwise # causing almost every fetch in test to fail otherwise
if @mix_env == :test do
defp check_crossdomain_redirect(nil, _) do
{:cross_domain_redirect, false}
end
end
defp check_crossdomain_redirect(final_host, original_url) do
{:cross_domain_redirect, final_host != URI.parse(original_url).host}
end
if @mix_env == :test do if @mix_env == :test do
defp get_final_id(nil, initial_url), do: initial_url defp get_final_id(nil, initial_url), do: initial_url
defp get_final_id("", initial_url), do: initial_url defp get_final_id("", initial_url), do: initial_url
@ -355,10 +359,6 @@ def get_object(id) do
with {:ok, %{body: body, status: code, headers: headers, url: final_url}} with {:ok, %{body: body, status: code, headers: headers, url: final_url}}
when code in 200..299 <- when code in 200..299 <-
HTTP.Backoff.get(id, headers), HTTP.Backoff.get(id, headers),
remote_host <-
URI.parse(final_url).host,
{:cross_domain_redirect, false} <-
check_crossdomain_redirect(remote_host, id),
{:has_content_type, {_, content_type}} <- {:has_content_type, {_, content_type}} <-
{:has_content_type, List.keyfind(headers, "content-type", 0)}, {:has_content_type, List.keyfind(headers, "content-type", 0)},
{:parse_content_type, {:ok, "application", subtype, type_params}} <- {:parse_content_type, {:ok, "application", subtype, type_params}} <-

View file

@ -9,7 +9,6 @@ defmodule Pleroma.Object.ContainmentTest do
alias Pleroma.User alias Pleroma.User
import Pleroma.Factory import Pleroma.Factory
import ExUnit.CaptureLog
setup_all do setup_all do
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
@ -136,21 +135,15 @@ test "contain_id_to_fetch() allows matching IDs" do
) )
end end
test "contain_id_to_fetch() allows display URLs" do test "contain_id_to_fetch() allows fragments and normalises domain casing" do
data = %{ data = %{
"id" => "http://example.com/~alyssa/activities/1234.json", "id" => "http://example.com/users/capybara",
"url" => "http://example.com/@alyssa/status/1234" "url" => "http://example.com/@capybara"
} }
:ok = assert :ok ==
Containment.contain_id_to_fetch( Containment.contain_id_to_fetch(
"http://example.com/@alyssa/status/1234", "http://EXAMPLE.com/users/capybara#key",
data
)
:ok =
Containment.contain_id_to_fetch(
"http://example.com/@alyssa/status/1234/",
data data
) )
end end
@ -164,10 +157,14 @@ test "users cannot be collided through fake direction spoofing attempts" do
follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"}) follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"})
}) })
assert capture_log(fn -> # Fetch from an attempted spoof id will suceed, but automatically retrieve
{:error, _} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye") # the real data from the homeserver instead of naïvely using the spoof
end) =~ {:ok, fetched_user} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye")
"[error] Could not decode user at fetch https://n1u.moe/users/rye"
refute fetched_user.name == "evil rye"
refute fetched_user.raw_bio == "boooo!"
assert fetched_user.name == "♡ rye ♡"
assert fetched_user.nickname == "rye@niu.moe"
end end
test "contain_origin_from_id() gracefully handles cases where no ID is present" do test "contain_origin_from_id() gracefully handles cases where no ID is present" do

View file

@ -22,6 +22,7 @@ defp spoofed_object_with_ids(
|> Jason.decode!() |> Jason.decode!()
|> Map.put("id", id) |> Map.put("id", id)
|> Map.put("actor", actor_id) |> Map.put("actor", actor_id)
|> Map.put("attributedTo", actor_id)
|> Jason.encode!() |> Jason.encode!()
end end
@ -109,7 +110,7 @@ defp spoofed_object_with_ids(
body: spoofed_object_with_ids("https://patch.cx/objects/spoof_media_redirect1") body: spoofed_object_with_ids("https://patch.cx/objects/spoof_media_redirect1")
} }
# Spoof: cross-domain redirect with final domain id # Spoof: cross-domain redirect with final domain id, but original id actor
%{method: :get, url: "https://patch.cx/objects/spoof_media_redirect2"} -> %{method: :get, url: "https://patch.cx/objects/spoof_media_redirect2"} ->
%Tesla.Env{ %Tesla.Env{
status: 200, status: 200,
@ -118,6 +119,19 @@ defp spoofed_object_with_ids(
body: spoofed_object_with_ids("https://media.patch.cx/objects/spoof_media_redirect2") body: spoofed_object_with_ids("https://media.patch.cx/objects/spoof_media_redirect2")
} }
# No-Spoof: cross-domain redirect with id and actor from final domain
%{method: :get, url: "https://patch.cx/objects/spoof_media_redirect3"} ->
%Tesla.Env{
status: 200,
url: "https://media.patch.cx/objects/spoof_media_redirect3",
headers: [{"content-type", "application/activity+json"}],
body:
spoofed_object_with_ids(
"https://media.patch.cx/objects/spoof_media_redirect3",
"https://media.patch.cx/users/rin"
)
}
# No-Spoof: same domain redirect # No-Spoof: same domain redirect
%{method: :get, url: "https://patch.cx/objects/spoof_redirect"} -> %{method: :get, url: "https://patch.cx/objects/spoof_redirect"} ->
%Tesla.Env{ %Tesla.Env{
@ -252,7 +266,7 @@ test "it does not fetch a spoofed object with wrong content type" do
end end
test "it does not fetch a spoofed object with id different from URL" do test "it does not fetch a spoofed object with id different from URL" do
assert {:error, :id_mismatch} = assert {:error, :not_found} =
Fetcher.fetch_and_contain_remote_object_from_id( Fetcher.fetch_and_contain_remote_object_from_id(
"https://patch.cx/media/03ca3c8b4ac3ddd08bf0f84be7885f2f88de0f709112131a22d83650819e36c2.json" "https://patch.cx/media/03ca3c8b4ac3ddd08bf0f84be7885f2f88de0f709112131a22d83650819e36c2.json"
) )
@ -264,19 +278,29 @@ test "it does not fetch a spoofed object with id different from URL" do
end end
test "it does not fetch an object via cross-domain redirects (initial id)" do test "it does not fetch an object via cross-domain redirects (initial id)" do
assert {:error, {:cross_domain_redirect, true}} = assert {:error, {:containment, _}} =
Fetcher.fetch_and_contain_remote_object_from_id( Fetcher.fetch_and_contain_remote_object_from_id(
"https://patch.cx/objects/spoof_media_redirect1" "https://patch.cx/objects/spoof_media_redirect1"
) )
end end
test "it does not fetch an object via cross-domain redirects (final id)" do test "it does not fetch an object via cross-domain redirect if the actor is from the original domain" do
assert {:error, {:cross_domain_redirect, true}} = assert {:error, {:containment, :error}} =
Fetcher.fetch_and_contain_remote_object_from_id( Fetcher.fetch_and_contain_remote_object_from_id(
"https://patch.cx/objects/spoof_media_redirect2" "https://patch.cx/objects/spoof_media_redirect2"
) )
end end
test "it allows cross-domain redirects when id and author are from final domain" do
assert {:ok, %{"id" => id, "attributedTo" => author}} =
Fetcher.fetch_and_contain_remote_object_from_id(
"https://patch.cx/objects/spoof_media_redirect3"
)
assert URI.parse(id).host == "media.patch.cx"
assert URI.parse(author).host == "media.patch.cx"
end
test "it accepts same-domain redirects" do test "it accepts same-domain redirects" do
assert {:ok, %{"id" => id} = _object} = assert {:ok, %{"id" => id} = _object} =
Fetcher.fetch_and_contain_remote_object_from_id( Fetcher.fetch_and_contain_remote_object_from_id(

View file

@ -263,7 +263,12 @@ def get("https://n1u.moe/users/rye", _, _, @activitypub_accept_headers) do
{:ok, {:ok,
%Tesla.Env{ %Tesla.Env{
status: 200, status: 200,
body: File.read!("test/fixtures/tesla_mock/rye.json"), body:
File.read!("test/fixtures/tesla_mock/rye.json")
|> Jason.decode!()
|> Map.put("name", "evil rye")
|> Map.put("bio", "boooo!")
|> Jason.encode!(),
headers: activitypub_object_headers() headers: activitypub_object_headers()
}} }}
end end