diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index a0473676b..169394af9 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -5,18 +5,8 @@ defmodule Pleroma.HTML do alias HtmlSanitizeEx.Scrubber - defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] - defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers - defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] - - def get_scrubbers() do - Pleroma.Config.get([:markup, :scrub_policy]) - |> get_scrubbers - end - - def filter_tags(html, nil) do - get_scrubbers() - |> Enum.reduce(html, fn scrubber, html -> + def filter_tags(html, scrubbers) when is_list(scrubbers) do + Enum.reduce(scrubbers, html, fn scrubber, html -> filter_tags(html, scrubber) end) end @@ -39,6 +29,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -77,6 +71,10 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -152,6 +150,10 @@ end defmodule Pleroma.HTML.Transform.MediaProxy do @moduledoc "Transforms inline image URIs to use MediaProxy." + def version do + 0 + end + alias Pleroma.Web.MediaProxy def before_scrub(html), do: html diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 1f930479d..b0b65cbe2 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -733,7 +733,9 @@ defmodule Pleroma.User do Pleroma.HTML.Scrubber.TwitterText end - def html_filter_policy(_), do: nil + @default_scrubbers Pleroma.Config.get([:markup, :scrub_policy]) + + def html_filter_policy(_), do: @default_scrubbers def get_or_fetch_by_ap_id(ap_id) do user = get_by_ap_id(ap_id) diff --git a/lib/pleroma/web/common_api/common_api.ex b/lib/pleroma/web/common_api/common_api.ex index 5e5821561..06d44451e 100644 --- a/lib/pleroma/web/common_api/common_api.ex +++ b/lib/pleroma/web/common_api/common_api.ex @@ -128,7 +128,8 @@ defmodule Pleroma.Web.CommonAPI do |> Enum.reduce(%{}, fn {name, file}, acc -> Map.put(acc, name, "#{Pleroma.Web.Endpoint.static_url()}#{file}") end) - ) do + ), + object <- Map.put(object, "scrubber_cache", %{}) do res = ActivityPub.create(%{ to: to, diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index b91cfc4bb..5c37fd671 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -5,7 +5,7 @@ defmodule Pleroma.Web.CommonAPI.Utils do alias Calendar.Strftime alias Comeonin.Pbkdf2 - alias Pleroma.{Activity, Formatter, Object, Repo} + alias Pleroma.{Activity, Formatter, Object, Repo, HTML} alias Pleroma.User alias Pleroma.Web alias Pleroma.Web.ActivityPub.Utils @@ -261,4 +261,63 @@ defmodule Pleroma.Web.CommonAPI.Utils do } end) end + + @doc """ + Get sanitized HTML from cache, or scrub it and save to cache. + """ + def get_scrubbed_html( + content, + scrubbers, + %{data: %{"object" => object}} = activity + ) do + scrubber_cache = + if object["scrubber_cache"] != nil and is_list(object["scrubber_cache"]) do + object["scrubber_cache"] + else + [] + end + + key = generate_scrubber_key(scrubbers) + + {new_scrubber_cache, scrubbed_html} = + Enum.map_reduce(scrubber_cache, nil, fn %{ + :scrubbers => current_key, + :content => current_content + }, + _ -> + if Map.keys(current_key) == Map.keys(key) do + if scrubbers == key do + {current_key, current_content} + else + # Remove the entry if scrubber version is outdated + {nil, nil} + end + end + end) + + new_scrubber_cache = Enum.reject(new_scrubber_cache, &is_nil/1) + + if !(new_scrubber_cache == scrubber_cache) or scrubbed_html == nil do + scrubbed_html = HTML.filter_tags(content, scrubbers) + new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] + + activity = + Map.merge(activity, %{ + data: %{"object" => %{"scrubber_cache" => new_scrubber_cache}} + }) + + cng = Ecto.Changeset.change(activity) + Repo.update(cng) + scrubbed_html + else + IO.puts("got the post from cache") + scrubbed_html + end + end + + defp generate_scrubber_key(scrubbers) do + Enum.reduce(scrubbers, %{}, fn scrubber, acc -> + Map.put(acc, to_string(scrubber), scrubber.version) + end) + end end diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 4d4681da8..8fa3798a6 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do content = object |> render_content() - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 592cf622f..adac1dfe9 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -245,7 +245,7 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do html = content - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text =