Refetch on AP ID mismatch
As hinted at in the commit message when strict checking
was added in 8684964c5d
,
refetching is more robust than display URL comparison
but in exchange is harder to implement correctly.
A similar refetch approach is also employed by
e.g. Mastodon, IceShrimp and FireFish.
To make sure no checks can be bypassed by forcing
a refetch, id checking is placed at the very end.
This will fix:
- Peertube display URL arrays our transmogrifier fails to normalise
- non-canonical display URLs from alternative frontends
(theoretical; we didnt’t get any actual reports about this)
It will also be helpful in the planned key handling overhaul.
The modified user collision test was introduced in
https://git.pleroma.social/pleroma/pleroma/-/merge_requests/461
and unfortunately the issues this fixes aren’t public.
Afaict it was just meant to guard against someone serving
faked data belonging to an unrelated domain. Since we now
refetch and the id actually is mocked, lookup now succeeds
but will use the real data from the authorative server
making it unproblematic. Instead modify the fake data further
and make sure we don’t end up using the spoofed version.
This commit is contained in:
parent
3c72b48a05
commit
940792f8ba
5 changed files with 87 additions and 56 deletions
|
@ -12,8 +12,6 @@ defmodule Pleroma.Object.Containment do
|
|||
spoofing, therefore removal of object containment functions is NOT recommended.
|
||||
"""
|
||||
|
||||
alias Pleroma.Web.ActivityPub.Transmogrifier
|
||||
|
||||
def get_actor(%{"actor" => actor}) when is_binary(actor) do
|
||||
actor
|
||||
end
|
||||
|
@ -50,16 +48,39 @@ def get_object(_) do
|
|||
defp compare_uris(%URI{host: host} = _id_uri, %URI{host: host} = _other_uri), do: :ok
|
||||
defp compare_uris(_id_uri, _other_uri), do: :error
|
||||
|
||||
defp compare_uris_exact(uri, uri), do: :ok
|
||||
defp uri_strip_slash(%URI{path: path} = uri) when is_binary(path),
|
||||
do: %{uri | path: String.replace_suffix(path, "/", "")}
|
||||
|
||||
defp compare_uris_exact(%URI{} = id, %URI{} = other),
|
||||
do: compare_uris_exact(URI.to_string(id), URI.to_string(other))
|
||||
defp uri_strip_slash(uri), do: uri
|
||||
|
||||
defp compare_uris_exact(id_uri, other_uri)
|
||||
when is_binary(id_uri) and is_binary(other_uri) do
|
||||
norm_id = String.replace_suffix(id_uri, "/", "")
|
||||
norm_other = String.replace_suffix(other_uri, "/", "")
|
||||
if norm_id == norm_other, do: :ok, else: :error
|
||||
# domain names are case-insensitive per spec (other parts of URIs aren’t necessarily)
|
||||
defp uri_normalise_host(%URI{host: host} = uri) when is_binary(host),
|
||||
do: %{uri | host: String.downcase(host, :ascii)}
|
||||
|
||||
defp uri_normalise_host(uri), do: uri
|
||||
|
||||
defp compare_uri_identities(uri, uri), do: :ok
|
||||
|
||||
defp compare_uri_identities(id_uri, other_uri) when is_binary(id_uri) and is_binary(other_uri),
|
||||
do: compare_uri_identities(URI.parse(id_uri), URI.parse(other_uri))
|
||||
|
||||
defp compare_uri_identities(%URI{} = id, %URI{} = other) do
|
||||
normid =
|
||||
%{id | fragment: nil}
|
||||
|> uri_strip_slash()
|
||||
|> uri_normalise_host()
|
||||
|
||||
normother =
|
||||
%{other | fragment: nil}
|
||||
|> uri_strip_slash()
|
||||
|> uri_normalise_host()
|
||||
|
||||
# Conversion back to binary avoids issues from non-normalised deprecated authority field
|
||||
if URI.to_string(normid) == URI.to_string(normother) do
|
||||
:ok
|
||||
else
|
||||
:error
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
|
@ -93,21 +114,13 @@ def contain_origin(id, %{"attributedTo" => actor} = params),
|
|||
def contain_origin(_id, _data), do: :ok
|
||||
|
||||
@doc """
|
||||
Check whether the fetch URL (after redirects) exactly (sans tralining slash) matches either
|
||||
the canonical ActivityPub id or the objects url field (for display URLs from *key and Mastodon)
|
||||
Check whether the fetch URL (after redirects) is the
|
||||
same location the canonical ActivityPub id points to.
|
||||
|
||||
Since this is meant to be used for fetches, anonymous or transient objects are not accepted here.
|
||||
"""
|
||||
def contain_id_to_fetch(url, %{"id" => id} = data) when is_binary(id) do
|
||||
with {:id, :error} <- {:id, compare_uris_exact(id, url)},
|
||||
# "url" can be a "Link" object and this is checked before full normalisation
|
||||
display_url <- Transmogrifier.fix_url(data)["url"],
|
||||
true <- display_url != nil do
|
||||
compare_uris_exact(display_url, url)
|
||||
else
|
||||
{:id, :ok} -> :ok
|
||||
_ -> :error
|
||||
end
|
||||
def contain_id_to_fetch(url, %{"id" => id}) when is_binary(id) do
|
||||
compare_uri_identities(url, id)
|
||||
end
|
||||
|
||||
def contain_id_to_fetch(_url, _data), do: :error
|
||||
|
|
|
@ -116,7 +116,7 @@ defp reinject_object(%Object{} = object, new_data) do
|
|||
@doc "Assumes object already is in our database and refetches from remote to update (e.g. for polls)"
|
||||
def refetch_object(%Object{data: %{"id" => id}} = object) do
|
||||
with {:local, false} <- {:local, Object.local?(object)},
|
||||
{:ok, new_data} <- fetch_and_contain_remote_object_from_id(id),
|
||||
{:ok, new_data} <- fetch_and_contain_remote_object_from_id(id, true),
|
||||
{:id, true} <- {:id, new_data["id"] == id},
|
||||
{:ok, object} <- reinject_object(object, new_data) do
|
||||
{:ok, object}
|
||||
|
@ -253,14 +253,17 @@ defp maybe_date_fetch(headers, date) do
|
|||
end
|
||||
end
|
||||
|
||||
@doc "Fetches arbitrary remote object and performs basic safety and authenticity checks"
|
||||
def fetch_and_contain_remote_object_from_id(id)
|
||||
@doc """
|
||||
Fetches arbitrary remote object and performs basic safety and authenticity checks.
|
||||
When the fetch URL is known to already be a canonical AP id, checks are stricter.
|
||||
"""
|
||||
def fetch_and_contain_remote_object_from_id(id, is_ap_id \\ false)
|
||||
|
||||
def fetch_and_contain_remote_object_from_id(%{"id" => id}),
|
||||
do: fetch_and_contain_remote_object_from_id(id)
|
||||
def fetch_and_contain_remote_object_from_id(%{"id" => id}, is_ap_id),
|
||||
do: fetch_and_contain_remote_object_from_id(id, is_ap_id)
|
||||
|
||||
def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
|
||||
Logger.debug("Fetching object #{id} via AP")
|
||||
def fetch_and_contain_remote_object_from_id(id, is_ap_id) when is_binary(id) do
|
||||
Logger.debug("Fetching object #{id} via AP [ap_id=#{is_ap_id}]")
|
||||
|
||||
with {:valid_uri_scheme, true} <- {:valid_uri_scheme, String.starts_with?(id, "http")},
|
||||
%URI{} = uri <- URI.parse(id),
|
||||
|
@ -270,18 +273,31 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
|
|||
{:mrf_accept_check, Pleroma.Web.ActivityPub.MRF.SimplePolicy.check_accept(uri)},
|
||||
{:local_fetch, :ok} <- {:local_fetch, Containment.contain_local_fetch(id)},
|
||||
{:ok, final_id, body} <- get_object(id),
|
||||
# a canonical ID shouldn't be a redirect
|
||||
true <- !is_ap_id || final_id == id,
|
||||
{:ok, data} <- safe_json_decode(body),
|
||||
{_, :ok} <- {:strict_id, Containment.contain_id_to_fetch(final_id, data)},
|
||||
{_, :ok} <- {:containment, Containment.contain_origin(final_id, data)} do
|
||||
{_, :ok} <- {:containment, Containment.contain_origin(final_id, data)},
|
||||
{_, _, :ok} <- {:strict_id, data["id"], Containment.contain_id_to_fetch(final_id, data)} do
|
||||
unless Instances.reachable?(final_id) do
|
||||
Instances.set_reachable(final_id)
|
||||
end
|
||||
|
||||
{:ok, data}
|
||||
else
|
||||
{:strict_id, _} = e ->
|
||||
# E.g. Mastodon and *key serve the AP object directly under their display URLs without
|
||||
# redirecting to their canonical location first, thus ids will expectedly differ.
|
||||
# Similarly keys, either use a fragment ID and are a subobjects or a distinct ID
|
||||
# but for compatibility are still a subobject presenting their owning actors ID at the toplevel.
|
||||
# Refetching _once_ from the listed id, should yield a strict match afterwards.
|
||||
{:strict_id, ap_id, _} = e ->
|
||||
case is_ap_id do
|
||||
false ->
|
||||
fetch_and_contain_remote_object_from_id(ap_id, true)
|
||||
|
||||
true ->
|
||||
log_fetch_error(id, e)
|
||||
{:error, :id_mismatch}
|
||||
end
|
||||
|
||||
{:mrf_reject_check, _} = e ->
|
||||
log_fetch_error(id, e)
|
||||
|
@ -311,7 +327,7 @@ def fetch_and_contain_remote_object_from_id(id) when is_binary(id) do
|
|||
end
|
||||
end
|
||||
|
||||
def fetch_and_contain_remote_object_from_id(_id),
|
||||
def fetch_and_contain_remote_object_from_id(_id, _is_ap_id),
|
||||
do: {:error, :invalid_id}
|
||||
|
||||
defp check_crossdomain_redirect(final_host, original_url)
|
||||
|
|
|
@ -9,7 +9,6 @@ defmodule Pleroma.Object.ContainmentTest do
|
|||
alias Pleroma.User
|
||||
|
||||
import Pleroma.Factory
|
||||
import ExUnit.CaptureLog
|
||||
|
||||
setup_all do
|
||||
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
|
||||
|
@ -136,21 +135,15 @@ test "contain_id_to_fetch() allows matching IDs" do
|
|||
)
|
||||
end
|
||||
|
||||
test "contain_id_to_fetch() allows display URLs" do
|
||||
test "contain_id_to_fetch() allows fragments and normalises domain casing" do
|
||||
data = %{
|
||||
"id" => "http://example.com/~alyssa/activities/1234.json",
|
||||
"url" => "http://example.com/@alyssa/status/1234"
|
||||
"id" => "http://example.com/users/capybara",
|
||||
"url" => "http://example.com/@capybara"
|
||||
}
|
||||
|
||||
:ok =
|
||||
assert :ok ==
|
||||
Containment.contain_id_to_fetch(
|
||||
"http://example.com/@alyssa/status/1234",
|
||||
data
|
||||
)
|
||||
|
||||
:ok =
|
||||
Containment.contain_id_to_fetch(
|
||||
"http://example.com/@alyssa/status/1234/",
|
||||
"http://EXAMPLE.com/users/capybara#key",
|
||||
data
|
||||
)
|
||||
end
|
||||
|
@ -164,10 +157,14 @@ test "users cannot be collided through fake direction spoofing attempts" do
|
|||
follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"})
|
||||
})
|
||||
|
||||
assert capture_log(fn ->
|
||||
{:error, _} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye")
|
||||
end) =~
|
||||
"[error] Could not decode user at fetch https://n1u.moe/users/rye"
|
||||
# Fetch from an attempted spoof id will suceed, but automatically retrieve
|
||||
# the real data from the homeserver instead of naïvely using the spoof
|
||||
{:ok, fetched_user} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye")
|
||||
|
||||
refute fetched_user.name == "evil rye"
|
||||
refute fetched_user.raw_bio == "boooo!"
|
||||
assert fetched_user.name == "♡ rye ♡"
|
||||
assert fetched_user.nickname == "rye@niu.moe"
|
||||
end
|
||||
|
||||
test "contain_origin_from_id() gracefully handles cases where no ID is present" do
|
||||
|
|
|
@ -252,7 +252,7 @@ test "it does not fetch a spoofed object with wrong content type" do
|
|||
end
|
||||
|
||||
test "it does not fetch a spoofed object with id different from URL" do
|
||||
assert {:error, :id_mismatch} =
|
||||
assert {:error, :not_found} =
|
||||
Fetcher.fetch_and_contain_remote_object_from_id(
|
||||
"https://patch.cx/media/03ca3c8b4ac3ddd08bf0f84be7885f2f88de0f709112131a22d83650819e36c2.json"
|
||||
)
|
||||
|
|
|
@ -263,7 +263,12 @@ def get("https://n1u.moe/users/rye", _, _, @activitypub_accept_headers) do
|
|||
{:ok,
|
||||
%Tesla.Env{
|
||||
status: 200,
|
||||
body: File.read!("test/fixtures/tesla_mock/rye.json"),
|
||||
body:
|
||||
File.read!("test/fixtures/tesla_mock/rye.json")
|
||||
|> Jason.decode!()
|
||||
|> Map.put("name", "evil rye")
|
||||
|> Map.put("bio", "boooo!")
|
||||
|> Jason.encode!(),
|
||||
headers: activitypub_object_headers()
|
||||
}}
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue