akkoma/test/pleroma/object/containment_test.exs

215 lines
6.6 KiB
Elixir
Raw Normal View History

# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Object.ContainmentTest do
use Pleroma.DataCase
alias Pleroma.Object.Containment
2019-04-17 11:52:01 +00:00
alias Pleroma.User
import Pleroma.Factory
2019-06-04 05:46:19 +00:00
setup_all do
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
:ok
end
describe "general origin containment" do
test "handles completly actorless objects gracefully" do
assert :ok ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"deleted" => "2019-10-30T05:48:50.249606Z",
"formerType" => "Note",
"id" => "https://glaceon.social/statuses/123",
"type" => "Tombstone"
})
end
test "errors for spoofed actors" do
assert :error ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"actor" => "https://otp.akkoma.dev/users/you",
"id" => "https://glaceon.social/statuses/123",
"type" => "Note"
})
end
test "errors for spoofed attributedTo" do
assert :error ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"attributedTo" => "https://otp.akkoma.dev/users/you",
"id" => "https://glaceon.social/statuses/123",
"type" => "Note"
})
end
test "accepts valid actors" do
assert :ok ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"actor" => "https://glaceon.social/users/monorail",
"attributedTo" => "https://glaceon.social/users/monorail",
"id" => "https://glaceon.social/statuses/123",
"type" => "Note"
})
assert :ok ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"actor" => "https://glaceon.social/users/monorail",
"id" => "https://glaceon.social/statuses/123",
"type" => "Note"
})
assert :ok ==
Containment.contain_origin("https://glaceon.social/statuses/123", %{
"attributedTo" => "https://glaceon.social/users/monorail",
"id" => "https://glaceon.social/statuses/123",
"type" => "Note"
})
end
test "contain_origin_from_id() catches obvious spoofing attempts" do
data = %{
"id" => "http://example.com/~alyssa/activities/1234.json"
}
:error =
Containment.contain_origin_from_id(
"http://example.org/~alyssa/activities/1234.json",
data
)
end
test "contain_origin_from_id() allows alternate IDs within the same origin domain" do
data = %{
"id" => "http://example.com/~alyssa/activities/1234.json"
}
:ok =
Containment.contain_origin_from_id(
"http://example.com/~alyssa/activities/1234",
data
)
end
test "contain_origin_from_id() allows matching IDs" do
data = %{
"id" => "http://example.com/~alyssa/activities/1234.json"
}
:ok =
Containment.contain_origin_from_id(
"http://example.com/~alyssa/activities/1234.json",
data
)
end
Only allow exact id matches This protects us from falling for obvious spoofs as from the current upload exploit (unfortunately we can’t reasonably do anything about spoofs with exact matches as was possible via emoji and proxy). Such objects being invalid is supported by the spec, sepcifically sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id Anonymous objects are not relevant here (they can only exists within parent objects iiuc) and neither is client-to-server or transient objects (as those cannot be fetched in the first place). This leaves us with the requirement for `id` to (a) exist and (b) be a publicly dereferencable URI from the originating server. This alone does not yet demand strict equivalence, but the spec then further explains objects ought to be fetchable _via their ID_. Meaning an object not retrievable via its ID, is invalid. This reading is supported by the fact, e.g. GoToSocial (recently) and Mastodon (for 6+ years) do already implement such strict ID checks, additionally proving this doesn’t cause federation issues in practice. However, apart from canonical IDs there can also be additional display URLs. *omas first redirect those to their canonical location, but *keys and Mastodon directly serve the AP representation without redirects. Mastodon and GTS deal with this in two different ways, but both constitute an effective countermeasure: - Mastodon: Unless it already is a known AP id, two fetches occur. The first fetch just reads the `id` property and then refetches from the id. The last fetch requires the returned id to exactly match the URL the content was fetched from. (This can be optimised by skipping the second fetch if it already matches) https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168 https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af - GTS: Only does a single fetch and then checks if _either_ the id _or_ url property (which can be an object) match the original fetch URL. This relies on implementations always including their display URL as "url" if differing from the id. For actors this is true for all investigated implementations, for posts only Mastodon includes an "url", but it is also the only one with a differing display URL. https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222 Albeit Mastodon’s refetch offers higher compatibility with theoretical implmentations using either multiple different display URL or not denoting any of them as "url" at all, for now we chose to adopt a GTS-like refetch-free approach to avoid additional implementation concerns wrt to whether redirects should be allowed when fetching a canonical AP id and potential for accidentally loosening some checks (e.g. cross-domain refetches) for one of the fetches. This may be reconsidered in the future.
2024-03-16 00:00:19 +00:00
test "contain_id_to_fetch() refuses alternate IDs within the same origin domain" do
data = %{
"id" => "http://example.com/~alyssa/activities/1234.json",
"url" => "http://example.com/@alyssa/status/1234"
}
:error =
Containment.contain_id_to_fetch(
"http://example.com/~alyssa/activities/1234",
data
)
end
test "contain_id_to_fetch() allows matching IDs" do
data = %{
"id" => "http://example.com/~alyssa/activities/1234.json/"
}
:ok =
Containment.contain_id_to_fetch(
"http://example.com/~alyssa/activities/1234.json/",
data
)
:ok =
Containment.contain_id_to_fetch(
"http://example.com/~alyssa/activities/1234.json",
data
)
end
test "contain_id_to_fetch() allows fragments and normalises domain casing" do
Only allow exact id matches This protects us from falling for obvious spoofs as from the current upload exploit (unfortunately we can’t reasonably do anything about spoofs with exact matches as was possible via emoji and proxy). Such objects being invalid is supported by the spec, sepcifically sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id Anonymous objects are not relevant here (they can only exists within parent objects iiuc) and neither is client-to-server or transient objects (as those cannot be fetched in the first place). This leaves us with the requirement for `id` to (a) exist and (b) be a publicly dereferencable URI from the originating server. This alone does not yet demand strict equivalence, but the spec then further explains objects ought to be fetchable _via their ID_. Meaning an object not retrievable via its ID, is invalid. This reading is supported by the fact, e.g. GoToSocial (recently) and Mastodon (for 6+ years) do already implement such strict ID checks, additionally proving this doesn’t cause federation issues in practice. However, apart from canonical IDs there can also be additional display URLs. *omas first redirect those to their canonical location, but *keys and Mastodon directly serve the AP representation without redirects. Mastodon and GTS deal with this in two different ways, but both constitute an effective countermeasure: - Mastodon: Unless it already is a known AP id, two fetches occur. The first fetch just reads the `id` property and then refetches from the id. The last fetch requires the returned id to exactly match the URL the content was fetched from. (This can be optimised by skipping the second fetch if it already matches) https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168 https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af - GTS: Only does a single fetch and then checks if _either_ the id _or_ url property (which can be an object) match the original fetch URL. This relies on implementations always including their display URL as "url" if differing from the id. For actors this is true for all investigated implementations, for posts only Mastodon includes an "url", but it is also the only one with a differing display URL. https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222 Albeit Mastodon’s refetch offers higher compatibility with theoretical implmentations using either multiple different display URL or not denoting any of them as "url" at all, for now we chose to adopt a GTS-like refetch-free approach to avoid additional implementation concerns wrt to whether redirects should be allowed when fetching a canonical AP id and potential for accidentally loosening some checks (e.g. cross-domain refetches) for one of the fetches. This may be reconsidered in the future.
2024-03-16 00:00:19 +00:00
data = %{
"id" => "http://example.com/users/capybara",
"url" => "http://example.com/@capybara"
Only allow exact id matches This protects us from falling for obvious spoofs as from the current upload exploit (unfortunately we can’t reasonably do anything about spoofs with exact matches as was possible via emoji and proxy). Such objects being invalid is supported by the spec, sepcifically sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id Anonymous objects are not relevant here (they can only exists within parent objects iiuc) and neither is client-to-server or transient objects (as those cannot be fetched in the first place). This leaves us with the requirement for `id` to (a) exist and (b) be a publicly dereferencable URI from the originating server. This alone does not yet demand strict equivalence, but the spec then further explains objects ought to be fetchable _via their ID_. Meaning an object not retrievable via its ID, is invalid. This reading is supported by the fact, e.g. GoToSocial (recently) and Mastodon (for 6+ years) do already implement such strict ID checks, additionally proving this doesn’t cause federation issues in practice. However, apart from canonical IDs there can also be additional display URLs. *omas first redirect those to their canonical location, but *keys and Mastodon directly serve the AP representation without redirects. Mastodon and GTS deal with this in two different ways, but both constitute an effective countermeasure: - Mastodon: Unless it already is a known AP id, two fetches occur. The first fetch just reads the `id` property and then refetches from the id. The last fetch requires the returned id to exactly match the URL the content was fetched from. (This can be optimised by skipping the second fetch if it already matches) https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168 https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af - GTS: Only does a single fetch and then checks if _either_ the id _or_ url property (which can be an object) match the original fetch URL. This relies on implementations always including their display URL as "url" if differing from the id. For actors this is true for all investigated implementations, for posts only Mastodon includes an "url", but it is also the only one with a differing display URL. https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222 Albeit Mastodon’s refetch offers higher compatibility with theoretical implmentations using either multiple different display URL or not denoting any of them as "url" at all, for now we chose to adopt a GTS-like refetch-free approach to avoid additional implementation concerns wrt to whether redirects should be allowed when fetching a canonical AP id and potential for accidentally loosening some checks (e.g. cross-domain refetches) for one of the fetches. This may be reconsidered in the future.
2024-03-16 00:00:19 +00:00
}
assert :ok ==
Containment.contain_id_to_fetch(
"http://EXAMPLE.com/users/capybara#key",
data
)
Only allow exact id matches This protects us from falling for obvious spoofs as from the current upload exploit (unfortunately we can’t reasonably do anything about spoofs with exact matches as was possible via emoji and proxy). Such objects being invalid is supported by the spec, sepcifically sections 3.1 and 3.2: https://www.w3.org/TR/activitypub/#obj-id Anonymous objects are not relevant here (they can only exists within parent objects iiuc) and neither is client-to-server or transient objects (as those cannot be fetched in the first place). This leaves us with the requirement for `id` to (a) exist and (b) be a publicly dereferencable URI from the originating server. This alone does not yet demand strict equivalence, but the spec then further explains objects ought to be fetchable _via their ID_. Meaning an object not retrievable via its ID, is invalid. This reading is supported by the fact, e.g. GoToSocial (recently) and Mastodon (for 6+ years) do already implement such strict ID checks, additionally proving this doesn’t cause federation issues in practice. However, apart from canonical IDs there can also be additional display URLs. *omas first redirect those to their canonical location, but *keys and Mastodon directly serve the AP representation without redirects. Mastodon and GTS deal with this in two different ways, but both constitute an effective countermeasure: - Mastodon: Unless it already is a known AP id, two fetches occur. The first fetch just reads the `id` property and then refetches from the id. The last fetch requires the returned id to exactly match the URL the content was fetched from. (This can be optimised by skipping the second fetch if it already matches) https://github.com/mastodon/mastodon/blob/05eda8d19330a9c27c0cf07de19a87edff269057/app/helpers/jsonld_helper.rb#L168 https://github.com/mastodon/mastodon/commit/63f097979990bf5ba9db848b8a253056bad781af - GTS: Only does a single fetch and then checks if _either_ the id _or_ url property (which can be an object) match the original fetch URL. This relies on implementations always including their display URL as "url" if differing from the id. For actors this is true for all investigated implementations, for posts only Mastodon includes an "url", but it is also the only one with a differing display URL. https://github.com/superseriousbusiness/gotosocial/commit/2bafd7daf542d985ee76d9079a30a602cb7be827#diff-943bbb02c8ac74ac5dc5d20807e561dcdfaebdc3b62b10730f643a20ac23c24fR222 Albeit Mastodon’s refetch offers higher compatibility with theoretical implmentations using either multiple different display URL or not denoting any of them as "url" at all, for now we chose to adopt a GTS-like refetch-free approach to avoid additional implementation concerns wrt to whether redirects should be allowed when fetching a canonical AP id and potential for accidentally loosening some checks (e.g. cross-domain refetches) for one of the fetches. This may be reconsidered in the future.
2024-03-16 00:00:19 +00:00
end
test "users cannot be collided through fake direction spoofing attempts" do
2019-04-17 09:27:29 +00:00
_user =
insert(:user, %{
nickname: "rye@niu.moe",
local: false,
ap_id: "https://niu.moe/users/rye",
follower_address: User.ap_followers(%User{nickname: "rye@niu.moe"})
})
# Fetch from an attempted spoof id will suceed, but automatically retrieve
# the real data from the homeserver instead of naïvely using the spoof
{:ok, fetched_user} = User.get_or_fetch_by_ap_id("https://n1u.moe/users/rye")
refute fetched_user.name == "evil rye"
refute fetched_user.raw_bio == "boooo!"
assert fetched_user.name == "♡ rye ♡"
assert fetched_user.nickname == "rye@niu.moe"
end
test "contain_origin_from_id() gracefully handles cases where no ID is present" do
data = %{
"type" => "Create",
"object" => %{
"id" => "http://example.net/~alyssa/activities/1234",
"attributedTo" => "http://example.org/~alyssa"
},
"actor" => "http://example.com/~bob"
}
:error =
Containment.contain_origin_from_id("http://example.net/~alyssa/activities/1234", data)
end
end
describe "containment of children" do
test "contain_child() catches spoofing attempts" do
data = %{
"id" => "http://example.com/whatever",
"type" => "Create",
"object" => %{
"id" => "http://example.net/~alyssa/activities/1234",
"attributedTo" => "http://example.org/~alyssa"
},
"actor" => "http://example.com/~bob"
}
:error = Containment.contain_child(data)
end
test "contain_child() allows correct origins" do
data = %{
"id" => "http://example.org/~alyssa/activities/5678",
"type" => "Create",
"object" => %{
"id" => "http://example.org/~alyssa/activities/1234",
"attributedTo" => "http://example.org/~alyssa"
},
"actor" => "http://example.org/~alyssa"
}
:ok = Containment.contain_child(data)
end
end
end