From 19f9889fbe9d120acfaed2a5aedb4032d56eb217 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sat, 29 Dec 2018 17:45:50 +0100 Subject: [PATCH 01/17] I am not sure what's going on anymore so I'll just commit and reset all the other files to HEAD --- lib/pleroma/html.ex | 26 ++++---- lib/pleroma/user.ex | 4 +- lib/pleroma/web/common_api/common_api.ex | 3 +- lib/pleroma/web/common_api/utils.ex | 61 ++++++++++++++++++- .../web/mastodon_api/views/status_view.ex | 2 +- .../web/twitter_api/views/activity_view.ex | 2 +- 6 files changed, 81 insertions(+), 17 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index a0473676b..169394af9 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -5,18 +5,8 @@ defmodule Pleroma.HTML do alias HtmlSanitizeEx.Scrubber - defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] - defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers - defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] - - def get_scrubbers() do - Pleroma.Config.get([:markup, :scrub_policy]) - |> get_scrubbers - end - - def filter_tags(html, nil) do - get_scrubbers() - |> Enum.reduce(html, fn scrubber, html -> + def filter_tags(html, scrubbers) when is_list(scrubbers) do + Enum.reduce(scrubbers, html, fn scrubber, html -> filter_tags(html, scrubber) end) end @@ -39,6 +29,10 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -77,6 +71,10 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta + def version do + 0 + end + @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -152,6 +150,10 @@ defmodule Pleroma.HTML.Scrubber.Default do defmodule Pleroma.HTML.Transform.MediaProxy do @moduledoc "Transforms inline image URIs to use MediaProxy." + def version do + 0 + end + alias Pleroma.Web.MediaProxy def before_scrub(html), do: html diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 1f930479d..b0b65cbe2 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -733,7 +733,9 @@ def html_filter_policy(%User{info: %{no_rich_text: true}}) do Pleroma.HTML.Scrubber.TwitterText end - def html_filter_policy(_), do: nil + @default_scrubbers Pleroma.Config.get([:markup, :scrub_policy]) + + def html_filter_policy(_), do: @default_scrubbers def get_or_fetch_by_ap_id(ap_id) do user = get_by_ap_id(ap_id) diff --git a/lib/pleroma/web/common_api/common_api.ex b/lib/pleroma/web/common_api/common_api.ex index 5e5821561..06d44451e 100644 --- a/lib/pleroma/web/common_api/common_api.ex +++ b/lib/pleroma/web/common_api/common_api.ex @@ -128,7 +128,8 @@ def post(user, %{"status" => status} = data) do |> Enum.reduce(%{}, fn {name, file}, acc -> Map.put(acc, name, "#{Pleroma.Web.Endpoint.static_url()}#{file}") end) - ) do + ), + object <- Map.put(object, "scrubber_cache", %{}) do res = ActivityPub.create(%{ to: to, diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index b91cfc4bb..5c37fd671 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -5,7 +5,7 @@ defmodule Pleroma.Web.CommonAPI.Utils do alias Calendar.Strftime alias Comeonin.Pbkdf2 - alias Pleroma.{Activity, Formatter, Object, Repo} + alias Pleroma.{Activity, Formatter, Object, Repo, HTML} alias Pleroma.User alias Pleroma.Web alias Pleroma.Web.ActivityPub.Utils @@ -261,4 +261,63 @@ def emoji_from_profile(%{info: _info} = user) do } end) end + + @doc """ + Get sanitized HTML from cache, or scrub it and save to cache. + """ + def get_scrubbed_html( + content, + scrubbers, + %{data: %{"object" => object}} = activity + ) do + scrubber_cache = + if object["scrubber_cache"] != nil and is_list(object["scrubber_cache"]) do + object["scrubber_cache"] + else + [] + end + + key = generate_scrubber_key(scrubbers) + + {new_scrubber_cache, scrubbed_html} = + Enum.map_reduce(scrubber_cache, nil, fn %{ + :scrubbers => current_key, + :content => current_content + }, + _ -> + if Map.keys(current_key) == Map.keys(key) do + if scrubbers == key do + {current_key, current_content} + else + # Remove the entry if scrubber version is outdated + {nil, nil} + end + end + end) + + new_scrubber_cache = Enum.reject(new_scrubber_cache, &is_nil/1) + + if !(new_scrubber_cache == scrubber_cache) or scrubbed_html == nil do + scrubbed_html = HTML.filter_tags(content, scrubbers) + new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] + + activity = + Map.merge(activity, %{ + data: %{"object" => %{"scrubber_cache" => new_scrubber_cache}} + }) + + cng = Ecto.Changeset.change(activity) + Repo.update(cng) + scrubbed_html + else + IO.puts("got the post from cache") + scrubbed_html + end + end + + defp generate_scrubber_key(scrubbers) do + Enum.reduce(scrubbers, %{}, fn scrubber, acc -> + Map.put(acc, to_string(scrubber), scrubber.version) + end) + end end diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 4d4681da8..8fa3798a6 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,7 @@ def render("status.json", %{activity: %{data: %{"object" => object}} = activity} content = object |> render_content() - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 592cf622f..adac1dfe9 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -245,7 +245,7 @@ def render( html = content - |> HTML.filter_tags(User.html_filter_policy(opts[:for])) + |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = From 9a0163db53580182599a9358bd0197ee0c61779d Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sat, 29 Dec 2018 21:50:34 +0100 Subject: [PATCH 02/17] use Kernel.put_in instead of Map.merge --- lib/pleroma/web/common_api/utils.ex | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 5c37fd671..7b11bc3ed 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -300,17 +300,19 @@ def get_scrubbed_html( if !(new_scrubber_cache == scrubber_cache) or scrubbed_html == nil do scrubbed_html = HTML.filter_tags(content, scrubbers) new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] + IO.puts(activity) activity = - Map.merge(activity, %{ - data: %{"object" => %{"scrubber_cache" => new_scrubber_cache}} - }) + Map.put( + activity, + :data, + Kernel.put_in(activity.data, ["object", "scrubber_cache"], new_scrubber_cache) + ) cng = Ecto.Changeset.change(activity) Repo.update(cng) scrubbed_html else - IO.puts("got the post from cache") scrubbed_html end end From a32e23905aa24335215f04fd56e33b663af54321 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 11:08:19 +0100 Subject: [PATCH 03/17] Use Object.change instead of Ecto.Changeset.change --- lib/pleroma/web/common_api/utils.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 7b11bc3ed..593404e4d 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -300,7 +300,6 @@ def get_scrubbed_html( if !(new_scrubber_cache == scrubber_cache) or scrubbed_html == nil do scrubbed_html = HTML.filter_tags(content, scrubbers) new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] - IO.puts(activity) activity = Map.put( @@ -309,7 +308,7 @@ def get_scrubbed_html( Kernel.put_in(activity.data, ["object", "scrubber_cache"], new_scrubber_cache) ) - cng = Ecto.Changeset.change(activity) + cng = Object.change(activity) Repo.update(cng) scrubbed_html else From 66d1c31461826b34d5c907dc3a91e86cce808c3e Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 13:51:01 +0100 Subject: [PATCH 04/17] Fix some stupid typos --- lib/pleroma/web/common_api/utils.ex | 33 ++++++++++++----------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 593404e4d..1aedbf962 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -281,35 +281,25 @@ def get_scrubbed_html( {new_scrubber_cache, scrubbed_html} = Enum.map_reduce(scrubber_cache, nil, fn %{ - :scrubbers => current_key, - :content => current_content - }, - _ -> + "scrubbers" => current_key, + "content" => current_content + } = current_element, + _content -> if Map.keys(current_key) == Map.keys(key) do - if scrubbers == key do - {current_key, current_content} + if current_key == key do + {current_element, current_content} else # Remove the entry if scrubber version is outdated {nil, nil} end end end) - + new_scrubber_cache = Enum.reject(new_scrubber_cache, &is_nil/1) - - if !(new_scrubber_cache == scrubber_cache) or scrubbed_html == nil do + if scrubbed_html == nil or new_scrubber_cache != scrubber_cache do scrubbed_html = HTML.filter_tags(content, scrubbers) new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] - - activity = - Map.put( - activity, - :data, - Kernel.put_in(activity.data, ["object", "scrubber_cache"], new_scrubber_cache) - ) - - cng = Object.change(activity) - Repo.update(cng) + update_scrubber_cache(activity, new_scrubber_cache) scrubbed_html else scrubbed_html @@ -321,4 +311,9 @@ defp generate_scrubber_key(scrubbers) do Map.put(acc, to_string(scrubber), scrubber.version) end) end + + defp update_scrubber_cache(activity, scrubber_cache) do + cng = Object.change(activity, %{data: Kernel.put_in(activity.data, ["object", "scrubber_cache"], scrubber_cache)}) + {:ok, _struct} = Repo.update(cng) + end end From bce152aba000e9b59562bf95f3a6df8540686317 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 15:58:19 +0100 Subject: [PATCH 05/17] Tidy up the code. Rename key to signature --- lib/pleroma/web/common_api/utils.ex | 53 +++++++++++++++-------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 1aedbf962..d4c169ad9 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -271,49 +271,50 @@ def get_scrubbed_html( %{data: %{"object" => object}} = activity ) do scrubber_cache = - if object["scrubber_cache"] != nil and is_list(object["scrubber_cache"]) do + if is_list(object["scrubber_cache"]) do object["scrubber_cache"] else [] end - key = generate_scrubber_key(scrubbers) + signature = generate_scrubber_signature(scrubbers) {new_scrubber_cache, scrubbed_html} = - Enum.map_reduce(scrubber_cache, nil, fn %{ - "scrubbers" => current_key, - "content" => current_content - } = current_element, - _content -> - if Map.keys(current_key) == Map.keys(key) do - if current_key == key do - {current_element, current_content} - else - # Remove the entry if scrubber version is outdated - {nil, nil} + Enum.map_reduce(scrubber_cache, nil, fn + entry, _content -> + if Map.keys(entry["scrubbers"]) == Map.keys(signature) do + if entry["scrubbers"] == signature do + {entry, entry["content"]} + else + # Remove the entry if scrubber version is outdated + {nil, nil} + end end - end end) - + + # Remove nil objects new_scrubber_cache = Enum.reject(new_scrubber_cache, &is_nil/1) + if scrubbed_html == nil or new_scrubber_cache != scrubber_cache do scrubbed_html = HTML.filter_tags(content, scrubbers) - new_scrubber_cache = [%{:scrubbers => key, :content => scrubbed_html} | new_scrubber_cache] + new_scrubber_cache = [%{:scrubbers => signature, :content => scrubbed_html} | new_scrubber_cache] update_scrubber_cache(activity, new_scrubber_cache) - scrubbed_html - else - scrubbed_html end + scrubbed_html end - defp generate_scrubber_key(scrubbers) do - Enum.reduce(scrubbers, %{}, fn scrubber, acc -> - Map.put(acc, to_string(scrubber), scrubber.version) + defp generate_scrubber_signature(scrubbers) do + Enum.reduce(scrubbers, %{}, fn scrubber, signature -> + Map.put(signature, to_string(scrubber), scrubber.version) end) end - defp update_scrubber_cache(activity, scrubber_cache) do - cng = Object.change(activity, %{data: Kernel.put_in(activity.data, ["object", "scrubber_cache"], scrubber_cache)}) - {:ok, _struct} = Repo.update(cng) - end + defp update_scrubber_cache(activity, scrubber_cache) do + cng = + Object.change(activity, %{ + data: Kernel.put_in(activity.data, ["object", "scrubber_cache"], scrubber_cache) + }) + + {:ok, _struct} = Repo.update(cng) + end end From d9f40b05b30dd735d0dc87f8268db842bf8ad1f0 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 16:51:16 +0100 Subject: [PATCH 06/17] Added get_stripped_html_for_object. Renamed a few things --- lib/pleroma/web/common_api/utils.ex | 34 ++++++++++++++++--- .../web/mastodon_api/views/status_view.ex | 2 +- .../web/twitter_api/views/activity_view.ex | 5 ++- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index d4c169ad9..759bd62af 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -262,10 +262,13 @@ def emoji_from_profile(%{info: _info} = user) do end) end + def get_scrubbed_html_for_object(content, scrubber, activity) when is_atom(scrubber) do + get_scrubbed_html_for_object(content, [scrubber], activity) + end @doc """ Get sanitized HTML from cache, or scrub it and save to cache. """ - def get_scrubbed_html( + def get_scrubbed_html_for_object( content, scrubbers, %{data: %{"object" => object}} = activity @@ -281,7 +284,7 @@ def get_scrubbed_html( {new_scrubber_cache, scrubbed_html} = Enum.map_reduce(scrubber_cache, nil, fn - entry, _content -> + entry, content -> if Map.keys(entry["scrubbers"]) == Map.keys(signature) do if entry["scrubbers"] == signature do {entry, entry["content"]} @@ -289,6 +292,8 @@ def get_scrubbed_html( # Remove the entry if scrubber version is outdated {nil, nil} end + else + {entry, content} end end) @@ -297,15 +302,30 @@ def get_scrubbed_html( if scrubbed_html == nil or new_scrubber_cache != scrubber_cache do scrubbed_html = HTML.filter_tags(content, scrubbers) - new_scrubber_cache = [%{:scrubbers => signature, :content => scrubbed_html} | new_scrubber_cache] + + new_scrubber_cache = [ + %{:scrubbers => signature, :content => scrubbed_html} | new_scrubber_cache + ] + update_scrubber_cache(activity, new_scrubber_cache) + scrubbed_html + else + scrubbed_html end - scrubbed_html end defp generate_scrubber_signature(scrubbers) do Enum.reduce(scrubbers, %{}, fn scrubber, signature -> - Map.put(signature, to_string(scrubber), scrubber.version) + Map.put( + signature, + to_string(scrubber), + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) + if Kernel.function_exported?(scrubber, :version, 0) do + scrubber.version + else + 0 + end + ) end) end @@ -317,4 +337,8 @@ defp update_scrubber_cache(activity, scrubber_cache) do {:ok, _struct} = Repo.update(cng) end + + def get_stripped_html_for_object(content, activity) do + get_scrubbed_html_for_object(content, [HtmlSanitizeEx.Scrubber.StripTags], activity) + end end diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 8fa3798a6..05ed602d5 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -120,7 +120,7 @@ def render("status.json", %{activity: %{data: %{"object" => object}} = activity} content = object |> render_content() - |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) + |> Utils.get_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index adac1dfe9..7d0dea8c2 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -15,7 +15,6 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do alias Pleroma.User alias Pleroma.Repo alias Pleroma.Formatter - alias Pleroma.HTML import Ecto.Query require Logger @@ -245,14 +244,14 @@ def render( html = content - |> Utils.get_scrubbed_html(User.html_filter_policy(opts[:for]), activity) + |> Utils.get_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r//, "\n") - |> HTML.strip_tags() + |> Utils.get_stripped_html_for_object(activity) end reply_parent = Activity.get_in_reply_to_activity(activity) From 535fddd2864f3ed8eebc24cbaf0e5b04ec6f4dbe Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 19:33:36 +0100 Subject: [PATCH 07/17] Friendship ended with Postgresql now Cachex is my best friend --- lib/pleroma/application.ex | 10 +++ lib/pleroma/object.ex | 34 +++++++- lib/pleroma/web/common_api/common_api.ex | 3 +- lib/pleroma/web/common_api/utils.ex | 81 +------------------ .../web/mastodon_api/views/status_view.ex | 3 +- .../web/twitter_api/views/activity_view.ex | 4 +- 6 files changed, 49 insertions(+), 86 deletions(-) diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 36a3694f2..4b997c048 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -53,6 +53,16 @@ def start(_type, _args) do ], id: :cachex_object ), + worker( + Cachex, + [ + :scrubber_cache, + [ + limit: 2500 + ] + ], + id: :cachex_scrubber + ), worker( Cachex, [ diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index cc4a2181a..e148c1d75 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -4,7 +4,7 @@ defmodule Pleroma.Object do use Ecto.Schema - alias Pleroma.{Repo, Object, User, Activity} + alias Pleroma.{Repo, Object, User, Activity, HTML} import Ecto.{Query, Changeset} schema "objects" do @@ -73,4 +73,36 @@ def delete(%Object{data: %{"id" => id}} = object) do {:ok, object} end end + + def get_cached_scrubbed_html(content, scrubbers, object) do + key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" + Cachex.fetch!(:scrubber_cache, key, fn(_key) -> ensure_scrubbed_html(content, scrubbers) end ) + end + + def get_cached_stripped_html(content, object) do + get_cached_scrubbed_html(content, HtmlSanitizeEx.Scrubber.StripTags, object) + end + + def ensure_scrubbed_html( + content, + scrubbers + ) do + {:commit, HTML.filter_tags(content, scrubbers)} + end + + defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do + generate_scrubber_signature([scrubber]) + end + + defp generate_scrubber_signature(scrubbers) do + Enum.reduce(scrubbers, "", fn scrubber, signature -> + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) + version = if Kernel.function_exported?(scrubber, :version, 0) do + scrubber.version + else + 0 + end + "#{signature}#{to_string(scrubber)}#{version}" + end) + end end diff --git a/lib/pleroma/web/common_api/common_api.ex b/lib/pleroma/web/common_api/common_api.ex index 06d44451e..5e5821561 100644 --- a/lib/pleroma/web/common_api/common_api.ex +++ b/lib/pleroma/web/common_api/common_api.ex @@ -128,8 +128,7 @@ def post(user, %{"status" => status} = data) do |> Enum.reduce(%{}, fn {name, file}, acc -> Map.put(acc, name, "#{Pleroma.Web.Endpoint.static_url()}#{file}") end) - ), - object <- Map.put(object, "scrubber_cache", %{}) do + ) do res = ActivityPub.create(%{ to: to, diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 759bd62af..813eb4093 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -5,7 +5,7 @@ defmodule Pleroma.Web.CommonAPI.Utils do alias Calendar.Strftime alias Comeonin.Pbkdf2 - alias Pleroma.{Activity, Formatter, Object, Repo, HTML} + alias Pleroma.{Activity, Formatter, Object, Repo} alias Pleroma.User alias Pleroma.Web alias Pleroma.Web.ActivityPub.Utils @@ -262,83 +262,4 @@ def emoji_from_profile(%{info: _info} = user) do end) end - def get_scrubbed_html_for_object(content, scrubber, activity) when is_atom(scrubber) do - get_scrubbed_html_for_object(content, [scrubber], activity) - end - @doc """ - Get sanitized HTML from cache, or scrub it and save to cache. - """ - def get_scrubbed_html_for_object( - content, - scrubbers, - %{data: %{"object" => object}} = activity - ) do - scrubber_cache = - if is_list(object["scrubber_cache"]) do - object["scrubber_cache"] - else - [] - end - - signature = generate_scrubber_signature(scrubbers) - - {new_scrubber_cache, scrubbed_html} = - Enum.map_reduce(scrubber_cache, nil, fn - entry, content -> - if Map.keys(entry["scrubbers"]) == Map.keys(signature) do - if entry["scrubbers"] == signature do - {entry, entry["content"]} - else - # Remove the entry if scrubber version is outdated - {nil, nil} - end - else - {entry, content} - end - end) - - # Remove nil objects - new_scrubber_cache = Enum.reject(new_scrubber_cache, &is_nil/1) - - if scrubbed_html == nil or new_scrubber_cache != scrubber_cache do - scrubbed_html = HTML.filter_tags(content, scrubbers) - - new_scrubber_cache = [ - %{:scrubbers => signature, :content => scrubbed_html} | new_scrubber_cache - ] - - update_scrubber_cache(activity, new_scrubber_cache) - scrubbed_html - else - scrubbed_html - end - end - - defp generate_scrubber_signature(scrubbers) do - Enum.reduce(scrubbers, %{}, fn scrubber, signature -> - Map.put( - signature, - to_string(scrubber), - # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) - if Kernel.function_exported?(scrubber, :version, 0) do - scrubber.version - else - 0 - end - ) - end) - end - - defp update_scrubber_cache(activity, scrubber_cache) do - cng = - Object.change(activity, %{ - data: Kernel.put_in(activity.data, ["object", "scrubber_cache"], scrubber_cache) - }) - - {:ok, _struct} = Repo.update(cng) - end - - def get_stripped_html_for_object(content, activity) do - get_scrubbed_html_for_object(content, [HtmlSanitizeEx.Scrubber.StripTags], activity) - end end diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 05ed602d5..8a57a233a 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -9,6 +9,7 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do alias Pleroma.HTML alias Pleroma.Repo alias Pleroma.User + alias Pleroma.Object alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Web.MediaProxy alias Pleroma.Web.MastodonAPI.AccountView @@ -120,7 +121,7 @@ def render("status.json", %{activity: %{data: %{"object" => object}} = activity} content = object |> render_content() - |> Utils.get_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) + |> Object.get_cached_scrubbed_html(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 7d0dea8c2..4c29e03ce 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -244,14 +244,14 @@ def render( html = content - |> Utils.get_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) + |> Object.get_cached_scrubbed_html(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r//, "\n") - |> Utils.get_stripped_html_for_object(activity) + |> Object.get_cached_stripped_html(activity) end reply_parent = Activity.get_in_reply_to_activity(activity) From c119ea3a5727d521c36d60eca22fa3f20d507b63 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 20:00:01 +0100 Subject: [PATCH 08/17] Friendship ended with Postgresql now Cachex is my best friend --- lib/pleroma/object.ex | 16 +++++++++------- lib/pleroma/web/common_api/utils.ex | 1 - 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index e148c1d75..d9f0e91b0 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -73,10 +73,10 @@ def delete(%Object{data: %{"id" => id}} = object) do {:ok, object} end end - + def get_cached_scrubbed_html(content, scrubbers, object) do key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" - Cachex.fetch!(:scrubber_cache, key, fn(_key) -> ensure_scrubbed_html(content, scrubbers) end ) + Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) end def get_cached_stripped_html(content, object) do @@ -87,22 +87,24 @@ def ensure_scrubbed_html( content, scrubbers ) do - {:commit, HTML.filter_tags(content, scrubbers)} + {:commit, HTML.filter_tags(content, scrubbers)} end - + defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do generate_scrubber_signature([scrubber]) end defp generate_scrubber_signature(scrubbers) do Enum.reduce(scrubbers, "", fn scrubber, signature -> - # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) - version = if Kernel.function_exported?(scrubber, :version, 0) do + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) + version = + if Kernel.function_exported?(scrubber, :version, 0) do scrubber.version else 0 end - "#{signature}#{to_string(scrubber)}#{version}" + + "#{signature}#{to_string(scrubber)}#{version}" end) end end diff --git a/lib/pleroma/web/common_api/utils.ex b/lib/pleroma/web/common_api/utils.ex index 813eb4093..b91cfc4bb 100644 --- a/lib/pleroma/web/common_api/utils.ex +++ b/lib/pleroma/web/common_api/utils.ex @@ -261,5 +261,4 @@ def emoji_from_profile(%{info: _info} = user) do } end) end - end From 62af23bd26d370ecc38159a8a3803562514596f4 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 20:12:12 +0100 Subject: [PATCH 09/17] Revert some changes in html.ex --- lib/pleroma/html.ex | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 169394af9..f363ed85c 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -5,8 +5,18 @@ defmodule Pleroma.HTML do alias HtmlSanitizeEx.Scrubber - def filter_tags(html, scrubbers) when is_list(scrubbers) do - Enum.reduce(scrubbers, html, fn scrubber, html -> + defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber] + defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers + defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default] + + def get_scrubbers() do + Pleroma.Config.get([:markup, :scrub_policy]) + |> get_scrubbers + end + + def filter_tags(html, nil) do + get_scrubbers() + |> Enum.reduce(html, fn scrubber, html -> filter_tags(html, scrubber) end) end @@ -28,11 +38,8 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - - def version do - 0 - end - + + def version do 0 end Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -70,11 +77,7 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - - def version do - 0 - end - + def version do 0 end @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -150,12 +153,9 @@ def version do defmodule Pleroma.HTML.Transform.MediaProxy do @moduledoc "Transforms inline image URIs to use MediaProxy." - def version do - 0 - end - alias Pleroma.Web.MediaProxy - + + def version do 0 end def before_scrub(html), do: html def scrub_attribute("img", {"src", "http" <> target}) do From 3f9da55adc9798bd66749dcdbd02fded8494fda3 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 20:16:42 +0100 Subject: [PATCH 10/17] Fix formating. Aparently my pre-commit hook broke. --- lib/pleroma/html.ex | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index f363ed85c..44b6776f9 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -38,8 +38,11 @@ defmodule Pleroma.HTML.Scrubber.TwitterText do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - - def version do 0 end + + def version do + 0 + end + Meta.remove_cdata_sections_before_scrub() Meta.strip_comments() @@ -77,7 +80,11 @@ defmodule Pleroma.HTML.Scrubber.Default do require HtmlSanitizeEx.Scrubber.Meta alias HtmlSanitizeEx.Scrubber.Meta - def version do 0 end + + def version do + 0 + end + @markup Application.get_env(:pleroma, :markup) @uri_schemes Application.get_env(:pleroma, :uri_schemes, []) @valid_schemes Keyword.get(@uri_schemes, :valid_schemes, []) @@ -154,8 +161,11 @@ defmodule Pleroma.HTML.Transform.MediaProxy do @moduledoc "Transforms inline image URIs to use MediaProxy." alias Pleroma.Web.MediaProxy - - def version do 0 end + + def version do + 0 + end + def before_scrub(html), do: html def scrub_attribute("img", {"src", "http" <> target}) do From c50353e6aef5ec482a427298fb20b1b75c208bca Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 20:44:17 +0100 Subject: [PATCH 11/17] shame on me for not testing after revert --- lib/pleroma/html.ex | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 44b6776f9..5e1f5bf96 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -15,8 +15,11 @@ def get_scrubbers() do end def filter_tags(html, nil) do - get_scrubbers() - |> Enum.reduce(html, fn scrubber, html -> + filter_tags(html, get_scrubbers()) + end + + def filter_tags(html, scrubbers) when is_list(scrubbers) do + Enum.reduce(scrubbers, html, fn scrubber, html -> filter_tags(html, scrubber) end) end From ab3089d6a718d4a70b0d702307d41e64e17bc505 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Sun, 30 Dec 2018 20:51:31 +0100 Subject: [PATCH 12/17] Fix comment in object.ex --- lib/pleroma/object.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index d9f0e91b0..99c836309 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -96,7 +96,7 @@ defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do defp generate_scrubber_signature(scrubbers) do Enum.reduce(scrubbers, "", fn scrubber, signature -> - # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber) it is assumed it is always 0) + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) version = if Kernel.function_exported?(scrubber, :version, 0) do scrubber.version From 91d5a7e81c3161f80bb70cd9e1e7a7c8bc70c0b1 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Mon, 31 Dec 2018 00:03:03 +0300 Subject: [PATCH 13/17] Fix test failure --- test/user_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/user_test.exs b/test/user_test.exs index 4680850ea..ef652daf7 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -709,7 +709,7 @@ test "insert or update a user from given data" do test "html_filter_policy returns nil when rich-text is enabled" do user = insert(:user) - assert nil == User.html_filter_policy(user) + assert [Pleroma.HTML.Transform.MediaProxy, Pleroma.HTML.Scrubber.Default] == User.html_filter_policy(user) end test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do From 05743e2000a5837365ab2393732b9a9468d738d7 Mon Sep 17 00:00:00 2001 From: rinpatch Date: Mon, 31 Dec 2018 00:12:14 +0300 Subject: [PATCH 14/17] Get default scrubbers from config instead of hardcoded --- test/user_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/user_test.exs b/test/user_test.exs index ef652daf7..bd40d7f85 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -709,7 +709,7 @@ test "insert or update a user from given data" do test "html_filter_policy returns nil when rich-text is enabled" do user = insert(:user) - assert [Pleroma.HTML.Transform.MediaProxy, Pleroma.HTML.Scrubber.Default] == User.html_filter_policy(user) + assert Pleroma.Config.get([:markup, :scrub_policy]) == User.html_filter_policy(user) end test "html_filter_policy returns TwitterText scrubber when rich-text is disabled" do From 7e09c2bd7d4e22eff75037d8ac1252347a404aea Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Mon, 31 Dec 2018 08:19:48 +0100 Subject: [PATCH 15/17] Move scrubber cache-related functions to Pleroma.HTML --- lib/pleroma/html.ex | 34 +++++++++++++++++++ lib/pleroma/object.ex | 34 ------------------- .../web/mastodon_api/views/status_view.ex | 3 +- .../web/twitter_api/views/activity_view.ex | 5 +-- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex index 5e1f5bf96..eb31f131e 100644 --- a/lib/pleroma/html.ex +++ b/lib/pleroma/html.ex @@ -27,6 +27,40 @@ def filter_tags(html, scrubbers) when is_list(scrubbers) do def filter_tags(html, scrubber), do: Scrubber.scrub(html, scrubber) def filter_tags(html), do: filter_tags(html, nil) def strip_tags(html), do: Scrubber.scrub(html, Scrubber.StripTags) + + def get_cached_scrubbed_html_for_object(content, scrubbers, object) do + key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" + Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) + end + + def get_cached_stripped_html_for_object(content, object) do + get_cached_scrubbed_html_for_object(content, HtmlSanitizeEx.Scrubber.StripTags, object) + end + + def ensure_scrubbed_html( + content, + scrubbers + ) do + {:commit, filter_tags(content, scrubbers)} + end + + defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do + generate_scrubber_signature([scrubber]) + end + + defp generate_scrubber_signature(scrubbers) do + Enum.reduce(scrubbers, "", fn scrubber, signature -> + # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) + version = + if Kernel.function_exported?(scrubber, :version, 0) do + scrubber.version + else + 0 + end + + "#{signature}#{to_string(scrubber)}#{version}" + end) + end end defmodule Pleroma.HTML.Scrubber.TwitterText do diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 9a6c256df..5241f00ae 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -4,7 +4,6 @@ defmodule Pleroma.Object do use Ecto.Schema - alias Pleroma.{Repo, Object, User, Activity, HTML, ObjectTombstone} alias Pleroma.{Repo, Object, User, Activity, ObjectTombstone} import Ecto.{Query, Changeset} @@ -92,37 +91,4 @@ def delete(%Object{data: %{"id" => id}} = object) do end end - def get_cached_scrubbed_html(content, scrubbers, object) do - key = "#{generate_scrubber_signature(scrubbers)}|#{object.id}" - Cachex.fetch!(:scrubber_cache, key, fn _key -> ensure_scrubbed_html(content, scrubbers) end) - end - - def get_cached_stripped_html(content, object) do - get_cached_scrubbed_html(content, HtmlSanitizeEx.Scrubber.StripTags, object) - end - - def ensure_scrubbed_html( - content, - scrubbers - ) do - {:commit, HTML.filter_tags(content, scrubbers)} - end - - defp generate_scrubber_signature(scrubber) when is_atom(scrubber) do - generate_scrubber_signature([scrubber]) - end - - defp generate_scrubber_signature(scrubbers) do - Enum.reduce(scrubbers, "", fn scrubber, signature -> - # If a scrubber does not have a version(e.g HtmlSanitizeEx.Scrubber.StripTags) it is assumed it is always 0) - version = - if Kernel.function_exported?(scrubber, :version, 0) do - scrubber.version - else - 0 - end - - "#{signature}#{to_string(scrubber)}#{version}" - end) - end end diff --git a/lib/pleroma/web/mastodon_api/views/status_view.ex b/lib/pleroma/web/mastodon_api/views/status_view.ex index 8a57a233a..da61bbd86 100644 --- a/lib/pleroma/web/mastodon_api/views/status_view.ex +++ b/lib/pleroma/web/mastodon_api/views/status_view.ex @@ -9,7 +9,6 @@ defmodule Pleroma.Web.MastodonAPI.StatusView do alias Pleroma.HTML alias Pleroma.Repo alias Pleroma.User - alias Pleroma.Object alias Pleroma.Web.CommonAPI.Utils alias Pleroma.Web.MediaProxy alias Pleroma.Web.MastodonAPI.AccountView @@ -121,7 +120,7 @@ def render("status.json", %{activity: %{data: %{"object" => object}} = activity} content = object |> render_content() - |> Object.get_cached_scrubbed_html(User.html_filter_policy(opts[:for]), activity) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) %{ id: to_string(activity.id), diff --git a/lib/pleroma/web/twitter_api/views/activity_view.ex b/lib/pleroma/web/twitter_api/views/activity_view.ex index 4c29e03ce..469f780c7 100644 --- a/lib/pleroma/web/twitter_api/views/activity_view.ex +++ b/lib/pleroma/web/twitter_api/views/activity_view.ex @@ -11,6 +11,7 @@ defmodule Pleroma.Web.TwitterAPI.ActivityView do alias Pleroma.Web.TwitterAPI.TwitterAPI alias Pleroma.Web.TwitterAPI.Representers.ObjectRepresenter alias Pleroma.Activity + alias Pleroma.HTML alias Pleroma.Object alias Pleroma.User alias Pleroma.Repo @@ -244,14 +245,14 @@ def render( html = content - |> Object.get_cached_scrubbed_html(User.html_filter_policy(opts[:for]), activity) + |> HTML.get_cached_scrubbed_html_for_object(User.html_filter_policy(opts[:for]), activity) |> Formatter.emojify(object["emoji"]) text = if content do content |> String.replace(~r//, "\n") - |> Object.get_cached_stripped_html(activity) + |> HTML.get_cached_stripped_html_for_object(activity) end reply_parent = Activity.get_in_reply_to_activity(activity) From 6ed9b31a5fea055aad7d390d50ead4cdfd6b7378 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Mon, 31 Dec 2018 08:25:48 +0100 Subject: [PATCH 16/17] Eh? --- lib/pleroma/object.ex | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/pleroma/object.ex b/lib/pleroma/object.ex index 5241f00ae..e2b648727 100644 --- a/lib/pleroma/object.ex +++ b/lib/pleroma/object.ex @@ -90,5 +90,4 @@ def delete(%Object{data: %{"id" => id}} = object) do {:ok, object} end end - end From 9f5881cbb1957a286d9b191e3d5be7f06b5a2941 Mon Sep 17 00:00:00 2001 From: Rin Toshaka Date: Mon, 31 Dec 2018 08:34:14 +0100 Subject: [PATCH 17/17] Fix a typo in user_test.ex --- test/user_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/user_test.exs b/test/user_test.exs index bd40d7f85..869e9196d 100644 --- a/test/user_test.exs +++ b/test/user_test.exs @@ -706,7 +706,7 @@ test "insert or update a user from given data" do end describe "per-user rich-text filtering" do - test "html_filter_policy returns nil when rich-text is enabled" do + test "html_filter_policy returns default policies, when rich-text is enabled" do user = insert(:user) assert Pleroma.Config.get([:markup, :scrub_policy]) == User.html_filter_policy(user)