diff --git a/config/config.exs b/config/config.exs index edeed7da2..644155aeb 100644 --- a/config/config.exs +++ b/config/config.exs @@ -807,7 +807,8 @@ config :web_push_encryption, http_client: Pleroma.HTTP.WebPush -config :pleroma, :instances_favicons, enabled: false +config :pleroma, :instances_favicons, enabled: true +config :pleroma, :instances_nodeinfo, enabled: true config :floki, :html_parser, Floki.HTMLParser.FastHtml diff --git a/config/description.exs b/config/description.exs index 1ff0a582b..1e81c3905 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3047,6 +3047,19 @@ } ] }, + %{ + group: :pleroma, + key: :instances_nodeinfo, + type: :group, + description: "Control favicons for instances", + children: [ + %{ + key: :enabled, + type: :boolean, + description: "Allow/disallow getting instance nodeinfo" + } + ] + }, %{ group: :ex_aws, key: :s3, diff --git a/config/test.exs b/config/test.exs index a5edb1149..3056dbd03 100644 --- a/config/test.exs +++ b/config/test.exs @@ -139,6 +139,8 @@ # Reduce recompilation time # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects config :phoenix, :plug_init_mode, :runtime +config :pleroma, :instances_favicons, enabled: false +config :pleroma, :instances_nodeinfo, enabled: false if File.exists?("./config/test.secret.exs") do import_config "test.secret.exs" diff --git a/lib/pleroma/instances/instance.ex b/lib/pleroma/instances/instance.ex index 0627aec64..65062135f 100644 --- a/lib/pleroma/instances/instance.ex +++ b/lib/pleroma/instances/instance.ex @@ -154,6 +154,14 @@ def update_metadata(%URI{host: host} = uri) do Logger.info("Checking metadata for #{host}") existing_record = Repo.get_by(Instance, %{host: host}) + if reachable?(host) do + do_update_metadata(uri, existing_record) + else + {:discard, :unreachable} + end + end + + defp do_update_metadata(%URI{host: host} = uri, existing_record) do if existing_record do if needs_update(existing_record) do Logger.info("Updating metadata for #{host}") @@ -205,7 +213,8 @@ def get_favicon(%URI{host: host}) do end defp scrape_nodeinfo(%URI{} = instance_uri) do - with {_, true} <- {:reachable, reachable?(instance_uri.host)}, + with true <- Pleroma.Config.get([:instances_nodeinfo, :enabled]), + {_, true} <- {:reachable, reachable?(instance_uri.host)}, {:ok, %Tesla.Env{status: 200, body: body}} <- Tesla.get( "https://#{instance_uri.host}/.well-known/nodeinfo", @@ -218,12 +227,20 @@ defp scrape_nodeinfo(%URI{} = instance_uri) do Enum.find(links, &(&1["rel"] == "http://nodeinfo.diaspora.software/ns/schema/2.0"))}, {:ok, %Tesla.Env{body: data}} <- Pleroma.HTTP.get(href, [{"accept", "application/json"}], []), + {:length, true} <- {:length, String.length(data) < 50_000}, {:ok, nodeinfo} <- Jason.decode(data) do nodeinfo else {:reachable, false} -> Logger.debug( - "Instance.scrape_favicon(\"#{to_string(instance_uri)}\") ignored unreachable host" + "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored unreachable host" + ) + + nil + + {:length, false} -> + Logger.debug( + "Instance.scrape_nodeinfo(\"#{to_string(instance_uri)}\") ignored too long body" ) nil @@ -234,13 +251,15 @@ defp scrape_nodeinfo(%URI{} = instance_uri) do end defp scrape_favicon(%URI{} = instance_uri) do - with {_, true} <- {:reachable, reachable?(instance_uri.host)}, + with true <- Pleroma.Config.get([:instances_favicons, :enabled]), + {_, true} <- {:reachable, reachable?(instance_uri.host)}, {:ok, %Tesla.Env{body: html}} <- Pleroma.HTTP.get(to_string(instance_uri), [{"accept", "text/html"}], []), {_, [favicon_rel | _]} when is_binary(favicon_rel) <- {:parse, html |> Floki.parse_document!() |> Floki.attribute("link[rel=icon]", "href")}, {_, favicon} when is_binary(favicon) <- - {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()} do + {:merge, URI.merge(instance_uri, favicon_rel) |> to_string()}, + {:length, true} <- {:length, String.length(favicon) < 255} do favicon else {:reachable, false} -> @@ -282,10 +301,9 @@ def get_by_url(url_or_host) do def get_cached_by_url(url_or_host) do url = host(url_or_host) - @cachex.fetch!(:instances_cache, "instances:#{url}", fn _ -> with %Instance{} = instance <- get_by_url(url) do - {:ok, instance} + {:commit, {:ok, instance}} else _ -> {:ignore, nil} end diff --git a/test/pleroma/instances/instance_test.exs b/test/pleroma/instances/instance_test.exs index e49922724..a2ca9ca00 100644 --- a/test/pleroma/instances/instance_test.exs +++ b/test/pleroma/instances/instance_test.exs @@ -9,12 +9,16 @@ defmodule Pleroma.Instances.InstanceTest do alias Pleroma.Tests.ObanHelpers alias Pleroma.Web.CommonAPI - use Pleroma.DataCase + use Pleroma.DataCase, async: true import ExUnit.CaptureLog import Pleroma.Factory - setup_all do: clear_config([:instance, :federation_reachability_timeout_days], 1) + setup_all do + clear_config([:instance, :federation_reachability_timeout_days], 1) + clear_config([:instances_nodeinfo, :enabled], true) + clear_config([:instances_favicons, :enabled], true) + end describe "set_reachable/1" do test "clears `unreachable_since` of existing matching Instance record having non-nil `unreachable_since`" do @@ -102,62 +106,217 @@ test "does NOT modify `unreachable_since` value of existing record in case it's end end - describe "get_or_update_favicon/1" do - test "Scrapes favicon URLs" do - Tesla.Mock.mock(fn %{url: "https://favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: ~s[
] - } + describe "update_metadata/1" do + test "Scrapes favicon URLs and nodeinfo" do + Tesla.Mock.mock(fn + %{url: "https://favicon.example.org/"} -> + %Tesla.Env{ + status: 200, + body: ~s[] + } + + %{url: "https://favicon.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: + Jason.encode!(%{ + links: [ + %{ + rel: "http://nodeinfo.diaspora.software/ns/schema/2.0", + href: "https://favicon.example.org/nodeinfo/2.0" + } + ] + }) + } + + %{url: "https://favicon.example.org/nodeinfo/2.0"} -> + %Tesla.Env{ + status: 200, + body: Jason.encode!(%{version: "2.0", software: %{name: "Akkoma"}}) + } end) - assert "https://favicon.example.org/favicon.png" == - Instance.get_or_update_favicon(URI.parse("https://favicon.example.org/")) + assert {:ok, true} == + Instance.update_metadata(URI.parse("https://favicon.example.org/")) + + {:ok, instance} = Instance.get_cached_by_url("https://favicon.example.org/") + assert instance.favicon == "https://favicon.example.org/favicon.png" + assert instance.nodeinfo == %{"version" => "2.0", "software" => %{"name" => "Akkoma"}} end - test "Returns nil on too long favicon URLs" do + test "Does not retain favicons that are too long" do long_favicon_url = "https://Lorem.ipsum.dolor.sit.amet/consecteturadipiscingelit/Praesentpharetrapurusutaliquamtempus/Mauriseulaoreetarcu/atfacilisisorci/Nullamporttitor/nequesedfeugiatmollis/dolormagnaefficiturlorem/nonpretiumsapienorcieurisus/Nullamveleratsem/Maecenassedaccumsanexnam/favicon.png" - Tesla.Mock.mock(fn %{url: "https://long-favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: - ~s[] - } + Tesla.Mock.mock(fn + %{url: "https://long-favicon.example.org/"} -> + %Tesla.Env{ + status: 200, + body: + ~s[] + } + + %{url: "https://long-favicon.example.org/.well-known/nodeinfo"} -> + %Tesla.Env{ + status: 200, + body: + Jason.encode!(%{ + links: [ + %{ + rel: "http://nodeinfo.diaspora.software/ns/schema/2.0", + href: "https://long-favicon.example.org/nodeinfo/2.0" + } + ] + }) + } + + %{url: "https://long-favicon.example.org/nodeinfo/2.0"} -> + %Tesla.Env{ + status: 200, + body: Jason.encode!(%{version: "2.0", software: %{name: "Akkoma"}}) + } end) - assert capture_log(fn -> - assert nil == - Instance.get_or_update_favicon( - URI.parse("https://long-favicon.example.org/") - ) - end) =~ - "Instance.get_or_update_favicon(\"long-favicon.example.org\") error: %Postgrex.Error{" + assert {:ok, true} == + Instance.update_metadata(URI.parse("https://long-favicon.example.org/")) + + {:ok, instance} = Instance.get_cached_by_url("https://long-favicon.example.org/") + assert instance.favicon == nil end test "Handles not getting a favicon URL properly" do - Tesla.Mock.mock(fn %{url: "https://no-favicon.example.org/"} -> - %Tesla.Env{ - status: 200, - body: ~s[