From 429407ba0a2bb2d083d340347faa43767ed8c589 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Mon, 6 Dec 2021 11:44:17 +0000 Subject: [PATCH 01/16] Make deactivated user check into a subquery Fixes #2792 --- .../web/activity_pub/activity_pub_test.exs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index a61244c76..b57e87247 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -776,6 +776,21 @@ test "doesn't return blocked activities" do assert Enum.member?(activities, activity_one) end + test "doesn't return activities from deactivated users" do + _user = insert(:user) + deactivated = insert(:user) + active = insert(:user) + {:ok, activity_one} = CommonAPI.post(deactivated, %{status: "hey!"}) + {:ok, activity_two} = CommonAPI.post(active, %{status: "yay!"}) + {:ok, _updated_user} = User.set_activation(deactivated, false) + + activities = ActivityPub.fetch_activities([], %{}) + + refute Enum.member?(activities, activity_one) + assert Enum.member?(activities, activity_two) + end + + test "always see your own posts even when they address people you block" do user = insert(:user) blockee = insert(:user) From 0fab887eee79b7cf21d1db9eee324f7f000b6bd4 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Mon, 6 Dec 2021 11:50:51 +0000 Subject: [PATCH 02/16] make linter happy --- lib/pleroma/activity.ex | 3 +-- test/pleroma/web/activity_pub/activity_pub_test.exs | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index c84e96aa2..4106feef6 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -362,8 +362,7 @@ def following_requests_for_actor(%User{ap_id: ap_id}) do end def restrict_deactivated_users(query) do - deactivated_users_query = - from(u in User.Query.build(%{deactivated: true}), select: u.ap_id) + deactivated_users_query = from(u in User.Query.build(%{deactivated: true}), select: u.ap_id) from(activity in query, where: activity.actor not in subquery(deactivated_users_query)) end diff --git a/test/pleroma/web/activity_pub/activity_pub_test.exs b/test/pleroma/web/activity_pub/activity_pub_test.exs index b57e87247..574ef0d71 100644 --- a/test/pleroma/web/activity_pub/activity_pub_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_test.exs @@ -783,14 +783,13 @@ test "doesn't return activities from deactivated users" do {:ok, activity_one} = CommonAPI.post(deactivated, %{status: "hey!"}) {:ok, activity_two} = CommonAPI.post(active, %{status: "yay!"}) {:ok, _updated_user} = User.set_activation(deactivated, false) - + activities = ActivityPub.fetch_activities([], %{}) - + refute Enum.member?(activities, activity_one) assert Enum.member?(activities, activity_two) end - test "always see your own posts even when they address people you block" do user = insert(:user) blockee = insert(:user) From 2ac3d341b04f489d690f4bad1dab2773f0f02288 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Fri, 3 Dec 2021 16:20:54 -0500 Subject: [PATCH 03/16] Fix benchmarks --- benchmarks/load_testing/activities.ex | 2 +- config/benchmark.exs | 5 ++--- mix.exs | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/benchmarks/load_testing/activities.ex b/benchmarks/load_testing/activities.ex index b9f6b24da..7f262d228 100644 --- a/benchmarks/load_testing/activities.ex +++ b/benchmarks/load_testing/activities.ex @@ -394,7 +394,7 @@ defp get_actor(group, users), do: Enum.random(users[group]) defp other_data(actor, content) do %{host: host} = URI.parse(actor.ap_id) - datetime = DateTime.utc_now() + datetime = DateTime.utc_now() |> to_string() context_id = "https://#{host}/contexts/#{UUID.generate()}" activity_id = "https://#{host}/activities/#{UUID.generate()}" object_id = "https://#{host}/objects/#{UUID.generate()}" diff --git a/config/benchmark.exs b/config/benchmark.exs index a4d048f1b..9a7ea5669 100644 --- a/config/benchmark.exs +++ b/config/benchmark.exs @@ -4,8 +4,7 @@ # you can enable the server option below. config :pleroma, Pleroma.Web.Endpoint, http: [port: 4001], - url: [port: 4001], - server: true + url: [port: 4001] # Disable captha for tests config :pleroma, Pleroma.Captcha, @@ -44,7 +43,7 @@ pool_size: 10 # Reduce hash rounds for testing -config :pbkdf2_elixir, rounds: 1 +config :pleroma, :password, iterations: 1 config :tesla, adapter: Tesla.Mock diff --git a/mix.exs b/mix.exs index 9385f7cf4..e69c737dd 100644 --- a/mix.exs +++ b/mix.exs @@ -86,7 +86,7 @@ def application do end # Specifies which paths to compile per environment. - defp elixirc_paths(:benchmark), do: ["lib", "benchmarks"] + defp elixirc_paths(:benchmark), do: ["lib", "benchmarks", "priv/scrubbers"] defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] From 66b60f1ee261317998257c9b8e250e7749e427ef Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Tue, 7 Dec 2021 09:18:53 +0100 Subject: [PATCH 04/16] move result into with guard --- lib/pleroma/web/activity_pub/publisher.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/pleroma/web/activity_pub/publisher.ex b/lib/pleroma/web/activity_pub/publisher.ex index 4f29a4411..849b359d0 100644 --- a/lib/pleroma/web/activity_pub/publisher.ex +++ b/lib/pleroma/web/activity_pub/publisher.ex @@ -63,8 +63,7 @@ def publish_one(%{inbox: inbox, json: json, actor: %User{} = actor, id: id} = pa date: date }) - with {:ok, %{status: code}} when code in 200..299 <- - result = + with {:ok, %{status: code}} = result when code in 200..299 <- HTTP.post( inbox, json, From eb7bbe3fc89170693684b33e14d4a4efebdb35bd Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Tue, 7 Dec 2021 12:12:23 -0500 Subject: [PATCH 05/16] Linting. --- lib/pleroma/web/activity_pub/publisher.ex | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/web/activity_pub/publisher.ex b/lib/pleroma/web/activity_pub/publisher.ex index 849b359d0..ed99079e2 100644 --- a/lib/pleroma/web/activity_pub/publisher.ex +++ b/lib/pleroma/web/activity_pub/publisher.ex @@ -64,16 +64,16 @@ def publish_one(%{inbox: inbox, json: json, actor: %User{} = actor, id: id} = pa }) with {:ok, %{status: code}} = result when code in 200..299 <- - HTTP.post( - inbox, - json, - [ - {"Content-Type", "application/activity+json"}, - {"Date", date}, - {"signature", signature}, - {"digest", digest} - ] - ) do + HTTP.post( + inbox, + json, + [ + {"Content-Type", "application/activity+json"}, + {"Date", date}, + {"signature", signature}, + {"digest", digest} + ] + ) do if not Map.has_key?(params, :unreachable_since) || params[:unreachable_since] do Instances.set_reachable(inbox) end From 185d70ed3dfaa59a1fe1a4bcc7ffe9eb2fa96b2a Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Wed, 8 Dec 2021 11:54:41 -0600 Subject: [PATCH 06/16] Benchmarks: fix user timeline and tags benchmarks --- .../mix/tasks/pleroma/benchmarks/tags.ex | 19 ++++++++++--------- .../mix/tasks/pleroma/benchmarks/timelines.ex | 10 +++++----- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/benchmarks/mix/tasks/pleroma/benchmarks/tags.ex b/benchmarks/mix/tasks/pleroma/benchmarks/tags.ex index c051335a5..a32de2db4 100644 --- a/benchmarks/mix/tasks/pleroma/benchmarks/tags.ex +++ b/benchmarks/mix/tasks/pleroma/benchmarks/tags.ex @@ -99,15 +99,16 @@ defp hashtag_fetching(params, user, local_only) do |> Enum.map(&String.downcase(&1)) _activities = - params - |> Map.put(:type, "Create") - |> Map.put(:local_only, local_only) - |> Map.put(:blocking_user, user) - |> Map.put(:muting_user, user) - |> Map.put(:user, user) - |> Map.put(:tag, tags) - |> Map.put(:tag_all, tag_all) - |> Map.put(:tag_reject, tag_reject) + %{ + type: "Create", + local_only: local_only, + blocking_user: user, + muting_user: user, + user: user, + tag: tags, + tag_all: tag_all, + tag_reject: tag_reject, + } |> Pleroma.Web.ActivityPub.ActivityPub.fetch_public_activities() end end diff --git a/benchmarks/mix/tasks/pleroma/benchmarks/timelines.ex b/benchmarks/mix/tasks/pleroma/benchmarks/timelines.ex index aed32f194..3770ca163 100644 --- a/benchmarks/mix/tasks/pleroma/benchmarks/timelines.ex +++ b/benchmarks/mix/tasks/pleroma/benchmarks/timelines.ex @@ -17,14 +17,14 @@ def run(_args) do # Let the user make 100 posts 1..100 - |> Enum.each(fn i -> CommonAPI.post(user, %{"status" => to_string(i)}) end) + |> Enum.each(fn i -> CommonAPI.post(user, %{status: to_string(i)}) end) # Let 10 random users post posts = users |> Enum.take_random(10) |> Enum.map(fn {:ok, random_user} -> - {:ok, activity} = CommonAPI.post(random_user, %{"status" => "."}) + {:ok, activity} = CommonAPI.post(random_user, %{status: "."}) activity end) @@ -42,7 +42,7 @@ def run(_args) do |> Conn.assign(:user, reading_user) |> Conn.assign(:skip_link_headers, true) - Pleroma.Web.MastodonAPI.AccountController.statuses(conn, %{"id" => user.id}) + Pleroma.Web.MastodonAPI.AccountController.statuses(conn, %{id: user.id}) end }, inputs: %{"user" => user, "no user" => nil}, @@ -50,7 +50,7 @@ def run(_args) do ) users - |> Enum.each(fn {:ok, follower, user} -> Pleroma.User.follow(follower, user) end) + |> Enum.each(fn {:ok, follower} -> Pleroma.User.follow(follower, user) end) Benchee.run( %{ @@ -60,7 +60,7 @@ def run(_args) do |> Conn.assign(:user, reading_user) |> Conn.assign(:skip_link_headers, true) - Pleroma.Web.MastodonAPI.AccountController.statuses(conn, %{"id" => user.id}) + Pleroma.Web.MastodonAPI.AccountController.statuses(conn, %{id: user.id}) end }, inputs: %{"user" => user, "no user" => nil}, From 494149f17368921059d87706c8d59c43bd96c834 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 7 Dec 2021 01:10:47 -0500 Subject: [PATCH 07/16] Transmogrifier: test fix_attachments/1 --- .../web/activity_pub/transmogrifier_test.exs | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 5a3b57acb..4616f8090 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -524,4 +524,44 @@ test "returns {:ok, %Object{}} for success case" do ) end end + + describe "fix_attachments/1" do + test "transforms dimensions into a url" do + object = %{ + "attachment" => [ + %{ + "type" => "Document", + "name" => "Hello world", + "url" => "https://media.example.tld/1.jpg", + "width" => 880, + "height" => 960, + "mediaType" => "image/jpeg", + "blurhash" => "eTKL26+HDjcEIBVl;ds+K6t301W.t7nit7y1E,R:v}ai4nXSt7V@of" + } + ] + } + + expected = %{ + "attachment" => [ + %{ + "type" => "Document", + "name" => "Hello world", + "url" => [ + %{ + "type" => "Link", + "mediaType" => "image/jpeg", + "href" => "https://media.example.tld/1.jpg", + "width" => 880, + "height" => 960 + } + ], + "mediaType" => "image/jpeg", + "blurhash" => "eTKL26+HDjcEIBVl;ds+K6t301W.t7nit7y1E,R:v}ai4nXSt7V@of" + } + ] + } + + assert Transmogrifier.fix_attachments(object) == expected + end + end end From da83839dc1e3b79a90a3b7d8dc078a4c7b3690eb Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 7 Dec 2021 12:59:03 -0500 Subject: [PATCH 08/16] AttachmentValidator: ingest width and height --- .../object_validators/attachment_validator.ex | 10 +++--- .../attachment_validator_test.exs | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/web/activity_pub/object_validators/attachment_validator.ex b/lib/pleroma/web/activity_pub/object_validators/attachment_validator.ex index 837787b9f..59fef42d6 100644 --- a/lib/pleroma/web/activity_pub/object_validators/attachment_validator.ex +++ b/lib/pleroma/web/activity_pub/object_validators/attachment_validator.ex @@ -68,12 +68,14 @@ def fix_media_type(data) do end end - defp handle_href(href, mediaType) do + defp handle_href(href, mediaType, data) do [ %{ "href" => href, "type" => "Link", - "mediaType" => mediaType + "mediaType" => mediaType, + "width" => data["width"], + "height" => data["height"] } ] end @@ -81,10 +83,10 @@ defp handle_href(href, mediaType) do defp fix_url(data) do cond do is_binary(data["url"]) -> - Map.put(data, "url", handle_href(data["url"], data["mediaType"])) + Map.put(data, "url", handle_href(data["url"], data["mediaType"], data)) is_binary(data["href"]) and data["url"] == nil -> - Map.put(data, "url", handle_href(data["href"], data["mediaType"])) + Map.put(data, "url", handle_href(data["href"], data["mediaType"], data)) true -> data diff --git a/test/pleroma/web/activity_pub/object_validators/attachment_validator_test.exs b/test/pleroma/web/activity_pub/object_validators/attachment_validator_test.exs index 0e49fda99..9150b8d41 100644 --- a/test/pleroma/web/activity_pub/object_validators/attachment_validator_test.exs +++ b/test/pleroma/web/activity_pub/object_validators/attachment_validator_test.exs @@ -105,5 +105,37 @@ test "it handles image dimensions" do assert attachment.mediaType == "image/jpeg" end + + test "it transforms image dimentions to our internal format" do + attachment = %{ + "type" => "Document", + "name" => "Hello world", + "url" => "https://media.example.tld/1.jpg", + "width" => 880, + "height" => 960, + "mediaType" => "image/jpeg", + "blurhash" => "eTKL26+HDjcEIBVl;ds+K6t301W.t7nit7y1E,R:v}ai4nXSt7V@of" + } + + expected = %AttachmentValidator{ + type: "Document", + name: "Hello world", + mediaType: "image/jpeg", + blurhash: "eTKL26+HDjcEIBVl;ds+K6t301W.t7nit7y1E,R:v}ai4nXSt7V@of", + url: [ + %AttachmentValidator.UrlObjectValidator{ + type: "Link", + mediaType: "image/jpeg", + href: "https://media.example.tld/1.jpg", + width: 880, + height: 960 + } + ] + } + + {:ok, ^expected} = + AttachmentValidator.cast_and_validate(attachment) + |> Ecto.Changeset.apply_action(:insert) + end end end From 2936adbbce19b21d20ed55192f6cccf1aa6ce154 Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 7 Dec 2021 13:35:34 -0500 Subject: [PATCH 09/16] Fix VideoHandlingTest --- .../transmogrifier/video_handling_test.exs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs index fc3ec7450..29a75701b 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs @@ -58,7 +58,9 @@ test "it remaps video URLs as attachments if necessary" do "href" => "https://peertube.moe/static/webseed/df5f464b-be8d-46fb-ad81-2d4c2d1630e3-480.mp4", "mediaType" => "video/mp4", - "type" => "Link" + "type" => "Link", + "width" => 480, + "height" => nil } ] } @@ -79,7 +81,9 @@ test "it remaps video URLs as attachments if necessary" do "href" => "https://framatube.org/static/webseed/6050732a-8a7a-43d4-a6cd-809525a1d206-1080.mp4", "mediaType" => "video/mp4", - "type" => "Link" + "type" => "Link", + "width" => nil, + "height" => 1080 } ] } @@ -107,7 +111,9 @@ test "it works for peertube videos with only their mpegURL map" do "href" => "https://peertube.stream/static/streaming-playlists/hls/abece3c3-b9c6-47f4-8040-f3eed8c602e6/abece3c3-b9c6-47f4-8040-f3eed8c602e6-1080-fragmented.mp4", "mediaType" => "video/mp4", - "type" => "Link" + "type" => "Link", + "width" => nil, + "height" => 1080 } ] } From ecab82dea77c5ab74b4cef887ca3c8a8dc484cf7 Mon Sep 17 00:00:00 2001 From: Haelwenn Date: Tue, 7 Dec 2021 22:53:36 +0000 Subject: [PATCH 10/16] Apply alexgleason's suggestion(s) to 1 file(s) --- test/pleroma/web/activity_pub/transmogrifier_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/pleroma/web/activity_pub/transmogrifier_test.exs b/test/pleroma/web/activity_pub/transmogrifier_test.exs index 4616f8090..06daf6a9f 100644 --- a/test/pleroma/web/activity_pub/transmogrifier_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier_test.exs @@ -526,7 +526,7 @@ test "returns {:ok, %Object{}} for success case" do end describe "fix_attachments/1" do - test "transforms dimensions into a url" do + test "puts dimensions into attachment url field" do object = %{ "attachment" => [ %{ From 938c95d75d1fafea0a338e1c1e75263ec276a50a Mon Sep 17 00:00:00 2001 From: Alex Gleason Date: Tue, 7 Dec 2021 21:55:54 -0500 Subject: [PATCH 11/16] VideoHandlingTest: remove nil values --- .../web/activity_pub/transmogrifier/video_handling_test.exs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs b/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs index 29a75701b..87c53ebf4 100644 --- a/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs +++ b/test/pleroma/web/activity_pub/transmogrifier/video_handling_test.exs @@ -59,8 +59,7 @@ test "it remaps video URLs as attachments if necessary" do "https://peertube.moe/static/webseed/df5f464b-be8d-46fb-ad81-2d4c2d1630e3-480.mp4", "mediaType" => "video/mp4", "type" => "Link", - "width" => 480, - "height" => nil + "width" => 480 } ] } @@ -82,7 +81,6 @@ test "it remaps video URLs as attachments if necessary" do "https://framatube.org/static/webseed/6050732a-8a7a-43d4-a6cd-809525a1d206-1080.mp4", "mediaType" => "video/mp4", "type" => "Link", - "width" => nil, "height" => 1080 } ] @@ -112,7 +110,6 @@ test "it works for peertube videos with only their mpegURL map" do "https://peertube.stream/static/streaming-playlists/hls/abece3c3-b9c6-47f4-8040-f3eed8c602e6/abece3c3-b9c6-47f4-8040-f3eed8c602e6-1080-fragmented.mp4", "mediaType" => "video/mp4", "type" => "Link", - "width" => nil, "height" => 1080 } ] From 9002e5155bfe0b7bedfac321f1fb07ddd77acee5 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 17:36:49 +0000 Subject: [PATCH 12/16] Add import functionality --- lib/mix/tasks/pleroma/search.ex | 46 +++++++++++++++++++ .../elasticsearch/document_mappings/note.ex | 13 ++++++ lib/pleroma/elasticsearch/store.ex | 36 +++++++++++++++ mix.exs | 3 +- mix.lock | 4 ++ 5 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 lib/mix/tasks/pleroma/search.ex create mode 100644 lib/pleroma/elasticsearch/document_mappings/note.ex create mode 100644 lib/pleroma/elasticsearch/store.ex diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex new file mode 100644 index 000000000..4acd0e34a --- /dev/null +++ b/lib/mix/tasks/pleroma/search.ex @@ -0,0 +1,46 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search do + use Mix.Task + import Mix.Pleroma + import Ecto.Query + alias Pleroma.Elasticsearch + alias Pleroma.Activity + alias Pleroma.Repo + alias Pleroma.Pagination + + @shortdoc "Manages elasticsearch" + + def run(["import" | rest]) do + start_pleroma() + + query = from(a in Activity, where: not ilike(a.actor, "%/relay")) + |> Activity.with_preloaded_object + |> Activity.with_preloaded_user_actor + |> get_all + end + + defp get_all(query, max_id \\ nil) do + params = %{limit: 20} + params = if max_id == nil do + params + else + Map.put(params, :max_id, max_id) + end + + res = query + |> Pagination.fetch_paginated(params) + + if res == [] do + :ok + else + res + |> Pleroma.Elasticsearch.bulk_post(:activities) + + get_all(query, List.last(res).id) + end + end + +end diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex new file mode 100644 index 000000000..f4e3307fe --- /dev/null +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -0,0 +1,13 @@ +defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do + alias Pleroma.Object + + def id(obj), do: obj.id + def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do + %{ + user: activity.user_actor.nickname, + content: activity.object.data["content"], + instance: URI.parse(activity.user_actor.ap_id).host, + hashtags: Object.hashtags(activity.object) + } + end +end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex new file mode 100644 index 000000000..2ff4bf889 --- /dev/null +++ b/lib/pleroma/elasticsearch/store.ex @@ -0,0 +1,36 @@ +defmodule Pleroma.Elasticsearch do + alias Pleroma.Activity + alias Pleroma.Elasticsearch.DocumentMappings + + defp url do + Pleroma.Config.get([:elasticsearch, :url]) + end + + def put(%Activity{} = activity) do + Elastix.Document.index( + url(), + "activities", + "activity", + DocumentMappings.Activity.id(activity), + DocumentMappings.Activity.encode(activity) + ) + end + + def bulk_post(data, :activities) do + d = data + |> Enum.map(fn d -> + [ + %{index: %{_id: DocumentMappings.Activity.id(d)}}, + DocumentMappings.Activity.encode(d) + ] + end) + |> List.flatten() + + IO.inspect Elastix.Bulk.post( + url(), + d, + index: "activities", + type: "activity" + ) + end +end diff --git a/mix.exs b/mix.exs index e69c737dd..f49353f7f 100644 --- a/mix.exs +++ b/mix.exs @@ -91,7 +91,7 @@ defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] defp warnings_as_errors(:prod), do: false - defp warnings_as_errors(_), do: true + defp warnings_as_errors(_), do: false # Specifies OAuth dependencies. defp oauth_deps do @@ -197,6 +197,7 @@ defp deps do ref: "289cda1b6d0d70ccb2ba508a2b0bd24638db2880"}, {:eblurhash, "~> 1.1.0"}, {:open_api_spex, "~> 3.10"}, + {:elastix, ">= 0.0.0"}, # indirect dependency version override {:plug, "~> 1.10.4", override: true}, diff --git a/mix.lock b/mix.lock index 18d5e3bea..bec9d025e 100644 --- a/mix.lock +++ b/mix.lock @@ -34,6 +34,8 @@ "ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"}, "ecto_sql": {:hex, :ecto_sql, "3.6.2", "9526b5f691701a5181427634c30655ac33d11e17e4069eff3ae1176c764e0ba3", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.6.2", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.4.0 or ~> 0.5.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "5ec9d7e6f742ea39b63aceaea9ac1d1773d574ea40df5a53ef8afbd9242fdb6b"}, "eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"}, + "elasticsearch": {:hex, :elasticsearch, "1.0.1", "8339538d90af6b280f10ecd02b1eae372f09373e629b336a13461babf7366495", [:mix], [{:httpoison, ">= 0.0.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, ">= 0.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:sigaws, "~> 0.7", [hex: :sigaws, repo: "hexpm", optional: true]}, {:vex, "~> 0.6", [hex: :vex, repo: "hexpm", optional: false]}], "hexpm", "83e7d8b8bee3e7e19a06ab4d357d24845ac1da894e79678227fd52c0b7f71867"}, + "elastix": {:hex, :elastix, "0.10.0", "7567da885677ba9deffc20063db5f3ca8cd10f23cff1ab3ed9c52b7063b7e340", [:mix], [{:httpoison, "~> 1.4", [hex: :httpoison, repo: "hexpm", optional: false]}, {:poison, "~> 3.0 or ~> 4.0", [hex: :poison, repo: "hexpm", optional: true]}, {:retry, "~> 0.8", [hex: :retry, repo: "hexpm", optional: false]}], "hexpm", "5fb342ce068b20f7845f5dd198c2dc80d967deafaa940a6e51b846db82696d1d"}, "elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"}, "esshd": {:hex, :esshd, "0.1.1", "d4dd4c46698093a40a56afecce8a46e246eb35463c457c246dacba2e056f31b5", [:mix], [], "hexpm", "d73e341e3009d390aa36387dc8862860bf9f874c94d9fd92ade2926376f49981"}, "eternal": {:hex, :eternal, "1.2.2", "d1641c86368de99375b98d183042dd6c2b234262b8d08dfd72b9eeaafc2a1abd", [:mix], [], "hexpm", "2c9fe32b9c3726703ba5e1d43a1d255a4f3f2d8f8f9bc19f094c7cb1a7a9e782"}, @@ -112,6 +114,7 @@ "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, "recon": {:hex, :recon, "2.5.1", "430ffa60685ac1efdfb1fe4c97b8767c92d0d92e6e7c3e8621559ba77598678a", [:mix, :rebar3], [], "hexpm", "5721c6b6d50122d8f68cccac712caa1231f97894bab779eff5ff0f886cb44648"}, "remote_ip": {:git, "https://git.pleroma.social/pleroma/remote_ip.git", "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8", [ref: "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8"]}, + "retry": {:hex, :retry, "0.15.0", "ba6aaeba92905a396c18c299a07e638947b2ba781e914f803202bc1b9ae867c3", [:mix], [], "hexpm", "93d3310bce78c0a30cc94610684340a14adfc9136856a3f662e4d9ce6013c784"}, "sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"}, @@ -125,6 +128,7 @@ "ueberauth": {:hex, :ueberauth, "0.6.3", "d42ace28b870e8072cf30e32e385579c57b9cc96ec74fa1f30f30da9c14f3cc0", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "afc293d8a1140d6591b53e3eaf415ca92842cb1d32fad3c450c6f045f7f91b60"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, "unsafe": {:hex, :unsafe, "1.0.1", "a27e1874f72ee49312e0a9ec2e0b27924214a05e3ddac90e91727bc76f8613d8", [:mix], [], "hexpm", "6c7729a2d214806450d29766abc2afaa7a2cbecf415be64f36a6691afebb50e5"}, + "vex": {:hex, :vex, "0.9.0", "613ea5eb3055662e7178b83e25b2df0975f68c3d8bb67c1645f0573e1a78d606", [:mix], [], "hexpm", "c69fff44d5c8aa3f1faee71bba1dcab05dd36364c5a629df8bb11751240c857f"}, "web_push_encryption": {:git, "https://github.com/lanodan/elixir-web-push-encryption.git", "026a043037a89db4da8f07560bc8f9c68bcf0cc0", [branch: "bugfix/otp-24"]}, "websocket_client": {:git, "https://github.com/jeremyong/websocket_client.git", "9a6f65d05ebf2725d62fb19262b21f1805a59fbf", []}, } From 449d8ff16572cb520b209476fea2a6dcb9312ac5 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 18:48:46 +0000 Subject: [PATCH 13/16] integrate search endpoint with ES --- config/config.exs | 3 ++ .../elasticsearch/document_mappings/note.ex | 1 + lib/pleroma/elasticsearch/store.ex | 11 +++++- lib/pleroma/web/common_api.ex | 19 +++++++++- .../controllers/search_controller.ex | 37 +++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) diff --git a/config/config.exs b/config/config.exs index e7d1014bb..58718bf9d 100644 --- a/config/config.exs +++ b/config/config.exs @@ -852,6 +852,9 @@ {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} ] +config :pleroma, :search, + provider: :builtin + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex index f4e3307fe..60efde599 100644 --- a/lib/pleroma/elasticsearch/document_mappings/note.ex +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -4,6 +4,7 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do def id(obj), do: obj.id def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do %{ + _timestamp: activity.inserted_at, user: activity.user_actor.nickname, content: activity.object.data["content"], instance: URI.parse(activity.user_actor.ap_id).host, diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 2ff4bf889..d9e9ed1a7 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -26,11 +26,20 @@ def bulk_post(data, :activities) do end) |> List.flatten() - IO.inspect Elastix.Bulk.post( + Elastix.Bulk.post( url(), d, index: "activities", type: "activity" ) end + + def search(query) do + Elastix.Search.search( + url(), + "activities", + ["activity"], + %{query: %{term: %{content: query}}} + ) + end end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 6f685cb7b..95ac7b71a 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -16,6 +16,8 @@ defmodule Pleroma.Web.CommonAPI do alias Pleroma.Web.ActivityPub.Utils alias Pleroma.Web.ActivityPub.Visibility alias Pleroma.Web.CommonAPI.ActivityDraft + alias Pleroma.Elasticsearch + alias Pleroma.Config import Pleroma.Web.Gettext import Pleroma.Web.CommonAPI.Utils @@ -395,9 +397,24 @@ def listen(user, data) do end end + def maybe_put_into_elasticsearch({:ok, activity}) do + if Config.get([:search, :provider]) == :elasticsearch do + actor = Pleroma.Activity.user_actor(activity) + activity + |> Map.put(:user_actor, actor) + |> Elasticsearch.put() + end + end + + def maybe_put_into_elasticsearch(_) do + {:ok, :skipped} + end + def post(user, %{status: _} = data) do with {:ok, draft} <- ActivityDraft.create(user, data) do - ActivityPub.create(draft.changes, draft.preview?) + activity = ActivityPub.create(draft.changes, draft.preview?) + maybe_put_into_elasticsearch(activity) + activity end end diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 64b177eb3..484a959af 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -45,6 +45,43 @@ def search(conn, params), do: do_search(:v1, conn, params) defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do query = String.trim(query) + options = search_options(params, user) + if Pleroma.Config.get([:search, :provider]) == :elasticsearch do + elasticsearch_search(conn, query, options) + else + builtin_search(version, conn, params) + end + end + + defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do + with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do + results = raw_results + |> Map.get(:body) + |> Map.get("hits") + |> Map.get("hits") + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + + json( + conn, + %{ + accounts: [], + hashtags: [], + statuses: StatusView.render("index.json", + activities: results, + for: user, + as: :activity + )} + ) + else + {:error, _} -> + conn + |> put_status(:internal_server_error) + |> json(%{error: "Search failed"}) + end + end + + defp builtin_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do options = search_options(params, user) timeout = Keyword.get(Repo.config(), :timeout, 15_000) default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} From fce13cce46862a73aabfcb026e0fa55bfc8fe811 Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sat, 11 Dec 2021 19:56:15 +0000 Subject: [PATCH 14/16] add extra filters --- lib/pleroma/elasticsearch/store.ex | 76 ++++++++++++++++++- .../controllers/search_controller.ex | 6 +- 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index d9e9ed1a7..55c459801 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -2,6 +2,10 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.Elasticsearch.DocumentMappings + @searchable [ + "hashtag", "instance", "user" + ] + defp url do Pleroma.Config.get([:elasticsearch, :url]) end @@ -34,12 +38,82 @@ def bulk_post(data, :activities) do ) end + defp parse_term(t) do + if String.contains?(t, ":") and !String.starts_with?(t, "\"") do + [field, query] = String.split(t, ":") + if Enum.member?(@searchable, field) do + {field, query} + else + {"content", query} + end + else + {"content", t} + end + end + + defp search_user(params, q) do + if q["user"] != nil do + params ++ [%{match: %{user: %{ + query: Enum.join(q["user"], " "), + operator: "OR" + }}}] + else + params + end + end + + defp search_instance(params, q) do + if q["instance"] != nil do + params ++ [%{match: %{instance: %{ + query: Enum.join(q["instance"], " "), + operator: "OR" + }}}] + else + params + end + end + + defp search_content(params, q) do + if q["content"] != nil do + params ++ [%{match: %{content: %{ + query: Enum.join(q["content"], " "), + operator: "AND" + }}}] + else + params + end + end + + defp to_es(q) do + [] + |> search_content(q) + |> search_instance(q) + |> search_user(q) + end + + defp parse(query) do + String.split(query, " ") + |> Enum.map(&parse_term/1) + |> Enum.reduce(%{}, fn {field, query}, acc -> + Map.put(acc, field, + Map.get(acc, field, []) ++ [query] + ) + end) + |> to_es() + end + def search(query) do + q = %{query: %{ + bool: %{ + must: parse(query) + } + }} + IO.inspect(q) Elastix.Search.search( url(), "activities", ["activity"], - %{query: %{term: %{content: query}}} + q ) end end diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 484a959af..920ff5980 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -56,9 +56,9 @@ defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do results = raw_results - |> Map.get(:body) - |> Map.get("hits") - |> Map.get("hits") + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) |> Enum.map(fn result -> result["_id"] end) |> Pleroma.Activity.all_by_ids_with_object() From de60b1f3c1f1b49da757f21260a648cab0709c3f Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 17:23:44 +0000 Subject: [PATCH 15/16] make search provider configurable --- config/config.exs | 3 +- lib/mix/tasks/pleroma/search.ex | 28 +-- .../elasticsearch/document_mappings/note.ex | 13 +- lib/pleroma/elasticsearch/store.ex | 115 +++--------- lib/pleroma/search.ex | 12 ++ lib/pleroma/search/builtin.ex | 137 ++++++++++++++ lib/pleroma/search/elasticsearch.ex | 80 ++++++++ lib/pleroma/web/common_api.ex | 3 +- .../controllers/search_controller.ex | 172 +----------------- mix.exs | 6 +- mix.lock | 3 +- test/pleroma/web/rich_media/parser_test.exs | 4 +- 12 files changed, 289 insertions(+), 287 deletions(-) create mode 100644 lib/pleroma/search.ex create mode 100644 lib/pleroma/search/builtin.ex create mode 100644 lib/pleroma/search/elasticsearch.ex diff --git a/config/config.exs b/config/config.exs index 58718bf9d..581f3831a 100644 --- a/config/config.exs +++ b/config/config.exs @@ -852,8 +852,7 @@ {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} ] -config :pleroma, :search, - provider: :builtin +config :pleroma, :search, provider: Pleroma.Search.Builtin # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. diff --git a/lib/mix/tasks/pleroma/search.ex b/lib/mix/tasks/pleroma/search.ex index 4acd0e34a..9e0f376c3 100644 --- a/lib/mix/tasks/pleroma/search.ex +++ b/lib/mix/tasks/pleroma/search.ex @@ -6,32 +6,33 @@ defmodule Mix.Tasks.Pleroma.Search do use Mix.Task import Mix.Pleroma import Ecto.Query - alias Pleroma.Elasticsearch alias Pleroma.Activity - alias Pleroma.Repo alias Pleroma.Pagination @shortdoc "Manages elasticsearch" - def run(["import" | rest]) do + def run(["import" | _rest]) do start_pleroma() - query = from(a in Activity, where: not ilike(a.actor, "%/relay")) - |> Activity.with_preloaded_object - |> Activity.with_preloaded_user_actor + from(a in Activity, where: not ilike(a.actor, "%/relay")) + |> Activity.with_preloaded_object() + |> Activity.with_preloaded_user_actor() |> get_all end defp get_all(query, max_id \\ nil) do params = %{limit: 20} - params = if max_id == nil do - params - else - Map.put(params, :max_id, max_id) - end - res = query - |> Pagination.fetch_paginated(params) + params = + if max_id == nil do + params + else + Map.put(params, :max_id, max_id) + end + + res = + query + |> Pagination.fetch_paginated(params) if res == [] do :ok @@ -42,5 +43,4 @@ defp get_all(query, max_id \\ nil) do get_all(query, List.last(res).id) end end - end diff --git a/lib/pleroma/elasticsearch/document_mappings/note.ex b/lib/pleroma/elasticsearch/document_mappings/note.ex index 60efde599..35b70dd4a 100644 --- a/lib/pleroma/elasticsearch/document_mappings/note.ex +++ b/lib/pleroma/elasticsearch/document_mappings/note.ex @@ -2,13 +2,14 @@ defmodule Pleroma.Elasticsearch.DocumentMappings.Activity do alias Pleroma.Object def id(obj), do: obj.id - def encode(%{object: %{data: %{ "type" => "Note" }}} = activity) do + + def encode(%{object: %{data: %{"type" => "Note"}}} = activity) do %{ - _timestamp: activity.inserted_at, - user: activity.user_actor.nickname, - content: activity.object.data["content"], - instance: URI.parse(activity.user_actor.ap_id).host, - hashtags: Object.hashtags(activity.object) + _timestamp: activity.inserted_at, + user: activity.user_actor.nickname, + content: activity.object.data["content"], + instance: URI.parse(activity.user_actor.ap_id).host, + hashtags: Object.hashtags(activity.object) } end end diff --git a/lib/pleroma/elasticsearch/store.ex b/lib/pleroma/elasticsearch/store.ex index 55c459801..31f77fadf 100644 --- a/lib/pleroma/elasticsearch/store.ex +++ b/lib/pleroma/elasticsearch/store.ex @@ -2,118 +2,45 @@ defmodule Pleroma.Elasticsearch do alias Pleroma.Activity alias Pleroma.Elasticsearch.DocumentMappings - @searchable [ - "hashtag", "instance", "user" - ] - defp url do Pleroma.Config.get([:elasticsearch, :url]) end def put(%Activity{} = activity) do Elastix.Document.index( - url(), - "activities", - "activity", - DocumentMappings.Activity.id(activity), - DocumentMappings.Activity.encode(activity) + url(), + "activities", + "activity", + DocumentMappings.Activity.id(activity), + DocumentMappings.Activity.encode(activity) ) end def bulk_post(data, :activities) do - d = data - |> Enum.map(fn d -> + d = + data + |> Enum.map(fn d -> [ - %{index: %{_id: DocumentMappings.Activity.id(d)}}, - DocumentMappings.Activity.encode(d) + %{index: %{_id: DocumentMappings.Activity.id(d)}}, + DocumentMappings.Activity.encode(d) ] - end) - |> List.flatten() + end) + |> List.flatten() Elastix.Bulk.post( - url(), - d, - index: "activities", - type: "activity" + url(), + d, + index: "activities", + type: "activity" ) end - defp parse_term(t) do - if String.contains?(t, ":") and !String.starts_with?(t, "\"") do - [field, query] = String.split(t, ":") - if Enum.member?(@searchable, field) do - {field, query} - else - {"content", query} - end - else - {"content", t} - end - end - - defp search_user(params, q) do - if q["user"] != nil do - params ++ [%{match: %{user: %{ - query: Enum.join(q["user"], " "), - operator: "OR" - }}}] - else - params - end - end - - defp search_instance(params, q) do - if q["instance"] != nil do - params ++ [%{match: %{instance: %{ - query: Enum.join(q["instance"], " "), - operator: "OR" - }}}] - else - params - end - end - - defp search_content(params, q) do - if q["content"] != nil do - params ++ [%{match: %{content: %{ - query: Enum.join(q["content"], " "), - operator: "AND" - }}}] - else - params - end - end - - defp to_es(q) do - [] - |> search_content(q) - |> search_instance(q) - |> search_user(q) - end - - defp parse(query) do - String.split(query, " ") - |> Enum.map(&parse_term/1) - |> Enum.reduce(%{}, fn {field, query}, acc -> - Map.put(acc, field, - Map.get(acc, field, []) ++ [query] - ) - end) - |> to_es() - end - - def search(query) do - q = %{query: %{ - bool: %{ - must: parse(query) - } - }} - IO.inspect(q) + def search_activities(q) do Elastix.Search.search( - url(), - "activities", - ["activity"], - q + url(), + "activities", + ["activity"], + q ) end end diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..99bce632c --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,12 @@ +defmodule Pleroma.Search do + @type search_map :: %{ + statuses: [map], + accounts: [map], + hashtags: [map] + } + + @doc """ + Searches for stuff + """ + @callback search(map, map, keyword) :: search_map +end diff --git a/lib/pleroma/search/builtin.ex b/lib/pleroma/search/builtin.ex new file mode 100644 index 000000000..019713f52 --- /dev/null +++ b/lib/pleroma/search/builtin.ex @@ -0,0 +1,137 @@ +defmodule Pleroma.Search.Builtin do + @behaviour Pleroma.Search + + alias Pleroma.Repo + alias Pleroma.User + alias Pleroma.Activity + alias Pleroma.Web.MastodonAPI.AccountView + alias Pleroma.Web.MastodonAPI.StatusView + alias Pleroma.Web.Endpoint + + require Logger + + @impl Pleroma.Search + def search(_conn, %{q: query} = params, options) do + version = Keyword.get(options, :version) + timeout = Keyword.get(Repo.config(), :timeout, 15_000) + default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} + + default_values + |> Enum.map(fn {resource, default_value} -> + if params[:type] in [nil, resource] do + {resource, fn -> resource_search(version, resource, query, options) end} + else + {resource, fn -> default_value end} + end + end) + |> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end, + timeout: timeout, + on_timeout: :kill_task + ) + |> Enum.reduce(default_values, fn + {:ok, {resource, result}}, acc -> + Map.put(acc, resource, result) + + _error, acc -> + acc + end) + end + + defp resource_search(_, "accounts", query, options) do + accounts = with_fallback(fn -> User.search(query, options) end) + + AccountView.render("index.json", + users: accounts, + for: options[:for_user], + embed_relationships: options[:embed_relationships] + ) + end + + defp resource_search(_, "statuses", query, options) do + statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) + + StatusView.render("index.json", + activities: statuses, + for: options[:for_user], + as: :activity + ) + end + + defp resource_search(:v2, "hashtags", query, options) do + tags_path = Endpoint.url() <> "/tag/" + + query + |> prepare_tags(options) + |> Enum.map(fn tag -> + %{name: tag, url: tags_path <> tag} + end) + end + + defp resource_search(:v1, "hashtags", query, options) do + prepare_tags(query, options) + end + + defp prepare_tags(query, options) do + tags = + query + |> preprocess_uri_query() + |> String.split(~r/[^#\w]+/u, trim: true) + |> Enum.uniq_by(&String.downcase/1) + + explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) + + tags = + if Enum.any?(explicit_tags) do + explicit_tags + else + tags + end + + tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) + + tags = + if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do + add_joined_tag(tags) + else + tags + end + + Pleroma.Pagination.paginate(tags, options) + end + + # If `query` is a URI, returns last component of its path, otherwise returns `query` + defp preprocess_uri_query(query) do + if query =~ ~r/https?:\/\// do + query + |> String.trim_trailing("/") + |> URI.parse() + |> Map.get(:path) + |> String.split("/") + |> Enum.at(-1) + else + query + end + end + + defp add_joined_tag(tags) do + tags + |> Kernel.++([joined_tag(tags)]) + |> Enum.uniq_by(&String.downcase/1) + end + + defp joined_tag(tags) do + tags + |> Enum.map(fn tag -> String.capitalize(tag) end) + |> Enum.join() + end + + defp with_fallback(f, fallback \\ []) do + try do + f.() + rescue + error -> + Logger.error("#{__MODULE__} search error: #{inspect(error)}") + fallback + end + end +end diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex new file mode 100644 index 000000000..f16ae58ce --- /dev/null +++ b/lib/pleroma/search/elasticsearch.ex @@ -0,0 +1,80 @@ +defmodule Pleroma.Search.Elasticsearch do + @behaviour Pleroma.Search + + alias Pleroma.Web.MastodonAPI.StatusView + + defp to_es(term) when is_binary(term) do + %{ + match: %{ + content: %{ + query: term, + operator: "AND" + } + } + } + end + + defp to_es({:quoted, term}), do: to_es(term) + + defp to_es({:filter, ["hashtag", query]}) do + %{ + term: %{ + hashtags: %{ + value: query + } + } + } + end + + defp to_es({:filter, [field, query]}) do + %{ + term: %{ + field => %{ + value: query + } + } + } + end + + defp parse(query) do + query + |> SearchParser.parse!() + |> Enum.map(&to_es/1) + end + + @impl Pleroma.Search + def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) do + q = %{ + query: %{ + bool: %{ + must: parse(query) + } + } + } + + IO.inspect(q) + + out = Pleroma.Elasticsearch.search_activities(q) + + with {:ok, raw_results} <- out do + results = + raw_results + |> Map.get(:body, %{}) + |> Map.get("hits", %{}) + |> Map.get("hits", []) + |> Enum.map(fn result -> result["_id"] end) + |> Pleroma.Activity.all_by_ids_with_object() + + %{ + "accounts" => [], + "hashtags" => [], + "statuses" => + StatusView.render("index.json", + activities: results, + for: user, + as: :activity + ) + } + end + end +end diff --git a/lib/pleroma/web/common_api.ex b/lib/pleroma/web/common_api.ex index 95ac7b71a..0c93b1976 100644 --- a/lib/pleroma/web/common_api.ex +++ b/lib/pleroma/web/common_api.ex @@ -398,8 +398,9 @@ def listen(user, data) do end def maybe_put_into_elasticsearch({:ok, activity}) do - if Config.get([:search, :provider]) == :elasticsearch do + if Config.get([:search, :provider]) == Pleroma.Search.Elasticsearch do actor = Pleroma.Activity.user_actor(activity) + activity |> Map.put(:user_actor, actor) |> Elasticsearch.put() diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 920ff5980..c8f820f00 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,13 +5,9 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller - alias Pleroma.Activity - alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper - alias Pleroma.Web.Endpoint alias Pleroma.Web.MastodonAPI.AccountView - alias Pleroma.Web.MastodonAPI.StatusView alias Pleroma.Web.Plugs.OAuthScopesPlug alias Pleroma.Web.Plugs.RateLimiter @@ -43,71 +39,13 @@ def account_search(%{assigns: %{user: user}} = conn, %{q: query} = params) do def search2(conn, params), do: do_search(:v2, conn, params) def search(conn, params), do: do_search(:v1, conn, params) - defp do_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do - query = String.trim(query) - options = search_options(params, user) - if Pleroma.Config.get([:search, :provider]) == :elasticsearch do - elasticsearch_search(conn, query, options) - else - builtin_search(version, conn, params) - end - end + defp do_search(version, %{assigns: %{user: user}} = conn, params) do + options = + search_options(params, user) + |> Keyword.put(:version, version) - defp elasticsearch_search(%{assigns: %{user: user}} = conn, query, options) do - with {:ok, raw_results} <- Pleroma.Elasticsearch.search(query) do - results = raw_results - |> Map.get(:body, %{}) - |> Map.get("hits", %{}) - |> Map.get("hits", []) - |> Enum.map(fn result -> result["_id"] end) - |> Pleroma.Activity.all_by_ids_with_object() - - json( - conn, - %{ - accounts: [], - hashtags: [], - statuses: StatusView.render("index.json", - activities: results, - for: user, - as: :activity - )} - ) - else - {:error, _} -> - conn - |> put_status(:internal_server_error) - |> json(%{error: "Search failed"}) - end - end - - defp builtin_search(version, %{assigns: %{user: user}} = conn, %{q: query} = params) do - options = search_options(params, user) - timeout = Keyword.get(Repo.config(), :timeout, 15_000) - default_values = %{"statuses" => [], "accounts" => [], "hashtags" => []} - - result = - default_values - |> Enum.map(fn {resource, default_value} -> - if params[:type] in [nil, resource] do - {resource, fn -> resource_search(version, resource, query, options) end} - else - {resource, fn -> default_value end} - end - end) - |> Task.async_stream(fn {resource, f} -> {resource, with_fallback(f)} end, - timeout: timeout, - on_timeout: :kill_task - ) - |> Enum.reduce(default_values, fn - {:ok, {resource, result}}, acc -> - Map.put(acc, resource, result) - - _error, acc -> - acc - end) - - json(conn, result) + search_provider = Pleroma.Config.get([:search, :provider]) + json(conn, search_provider.search(conn, params, options)) end defp search_options(params, user) do @@ -124,104 +62,6 @@ defp search_options(params, user) do |> Enum.filter(&elem(&1, 1)) end - defp resource_search(_, "accounts", query, options) do - accounts = with_fallback(fn -> User.search(query, options) end) - - AccountView.render("index.json", - users: accounts, - for: options[:for_user], - embed_relationships: options[:embed_relationships] - ) - end - - defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) - - StatusView.render("index.json", - activities: statuses, - for: options[:for_user], - as: :activity - ) - end - - defp resource_search(:v2, "hashtags", query, options) do - tags_path = Endpoint.url() <> "/tag/" - - query - |> prepare_tags(options) - |> Enum.map(fn tag -> - %{name: tag, url: tags_path <> tag} - end) - end - - defp resource_search(:v1, "hashtags", query, options) do - prepare_tags(query, options) - end - - defp prepare_tags(query, options) do - tags = - query - |> preprocess_uri_query() - |> String.split(~r/[^#\w]+/u, trim: true) - |> Enum.uniq_by(&String.downcase/1) - - explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) - - tags = - if Enum.any?(explicit_tags) do - explicit_tags - else - tags - end - - tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) - - tags = - if Enum.empty?(explicit_tags) && !options[:skip_joined_tag] do - add_joined_tag(tags) - else - tags - end - - Pleroma.Pagination.paginate(tags, options) - end - - defp add_joined_tag(tags) do - tags - |> Kernel.++([joined_tag(tags)]) - |> Enum.uniq_by(&String.downcase/1) - end - - # If `query` is a URI, returns last component of its path, otherwise returns `query` - defp preprocess_uri_query(query) do - if query =~ ~r/https?:\/\// do - query - |> String.trim_trailing("/") - |> URI.parse() - |> Map.get(:path) - |> String.split("/") - |> Enum.at(-1) - else - query - end - end - - defp joined_tag(tags) do - tags - |> Enum.map(fn tag -> String.capitalize(tag) end) - |> Enum.join() - end - - defp with_fallback(f, fallback \\ []) do - try do - f.() - rescue - error -> - Logger.error("#{__MODULE__} search error: #{inspect(error)}") - fallback - end - end - defp get_author(%{account_id: account_id}) when is_binary(account_id), do: User.get_cached_by_id(account_id) diff --git a/mix.exs b/mix.exs index f49353f7f..195fd3a9d 100644 --- a/mix.exs +++ b/mix.exs @@ -91,7 +91,7 @@ defp elixirc_paths(:test), do: ["lib", "test/support"] defp elixirc_paths(_), do: ["lib"] defp warnings_as_errors(:prod), do: false - defp warnings_as_errors(_), do: false + defp warnings_as_errors(_), do: true # Specifies OAuth dependencies. defp oauth_deps do @@ -198,6 +198,10 @@ defp deps do {:eblurhash, "~> 1.1.0"}, {:open_api_spex, "~> 3.10"}, {:elastix, ">= 0.0.0"}, + {:search_parser, + git: "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git", + ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"}, + {:nimble_parsec, "~> 1.0", override: true}, # indirect dependency version override {:plug, "~> 1.10.4", override: true}, diff --git a/mix.lock b/mix.lock index bec9d025e..20e95c19f 100644 --- a/mix.lock +++ b/mix.lock @@ -83,7 +83,7 @@ "mogrify": {:hex, :mogrify, "0.9.1", "a26f107c4987477769f272bd0f7e3ac4b7b75b11ba597fd001b877beffa9c068", [:mix], [], "hexpm", "134edf189337d2125c0948bf0c228fdeef975c594317452d536224069a5b7f05"}, "mox": {:hex, :mox, "1.0.0", "4b3c7005173f47ff30641ba044eb0fe67287743eec9bd9545e37f3002b0a9f8b", [:mix], [], "hexpm", "201b0a20b7abdaaab083e9cf97884950f8a30a1350a1da403b3145e213c6f4df"}, "myhtmlex": {:git, "https://git.pleroma.social/pleroma/myhtmlex.git", "ad0097e2f61d4953bfef20fb6abddf23b87111e6", [ref: "ad0097e2f61d4953bfef20fb6abddf23b87111e6", submodules: true]}, - "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm", "5c040b8469c1ff1b10093d3186e2e10dbe483cd73d79ec017993fb3985b8a9b3"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.2.0", "b44d75e2a6542dcb6acf5d71c32c74ca88960421b6874777f79153bbbbd7dccc", [:mix], [], "hexpm", "52b2871a7515a5ac49b00f214e4165a40724cf99798d8e4a65e4fd64ebd002c1"}, "nimble_pool": {:hex, :nimble_pool, "0.1.0", "ffa9d5be27eee2b00b0c634eb649aa27f97b39186fec3c493716c2a33e784ec6", [:mix], [], "hexpm", "343a1eaa620ddcf3430a83f39f2af499fe2370390d4f785cd475b4df5acaf3f9"}, "nodex": {:git, "https://git.pleroma.social/pleroma/nodex", "cb6730f943cfc6aad674c92161be23a8411f15d1", [ref: "cb6730f943cfc6aad674c92161be23a8411f15d1"]}, "oban": {:hex, :oban, "2.3.4", "ec7509b9af2524d55f529cb7aee93d36131ae0bf0f37706f65d2fe707f4d9fd8", [:mix], [{:ecto_sql, ">= 3.4.3", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.14", [hex: :postgrex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c70ca0434758fd1805422ea4446af5e910ddc697c0c861549c8f0eb0cfbd2fdf"}, @@ -115,6 +115,7 @@ "recon": {:hex, :recon, "2.5.1", "430ffa60685ac1efdfb1fe4c97b8767c92d0d92e6e7c3e8621559ba77598678a", [:mix, :rebar3], [], "hexpm", "5721c6b6d50122d8f68cccac712caa1231f97894bab779eff5ff0f886cb44648"}, "remote_ip": {:git, "https://git.pleroma.social/pleroma/remote_ip.git", "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8", [ref: "b647d0deecaa3acb140854fe4bda5b7e1dc6d1c8"]}, "retry": {:hex, :retry, "0.15.0", "ba6aaeba92905a396c18c299a07e638947b2ba781e914f803202bc1b9ae867c3", [:mix], [], "hexpm", "93d3310bce78c0a30cc94610684340a14adfc9136856a3f662e4d9ce6013c784"}, + "search_parser": {:git, "https://github.com/FloatingGhost/pleroma-contrib-search-parser.git", "08971a81e68686f9ac465cfb6661d51c5e4e1e7f", [ref: "08971a81e68686f9ac465cfb6661d51c5e4e1e7f"]}, "sleeplocks": {:hex, :sleeplocks, "1.1.1", "3d462a0639a6ef36cc75d6038b7393ae537ab394641beb59830a1b8271faeed3", [:rebar3], [], "hexpm", "84ee37aeff4d0d92b290fff986d6a95ac5eedf9b383fadfd1d88e9b84a1c02e1"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"}, "sweet_xml": {:hex, :sweet_xml, "0.6.6", "fc3e91ec5dd7c787b6195757fbcf0abc670cee1e4172687b45183032221b66b8", [:mix], [], "hexpm", "2e1ec458f892ffa81f9f8386e3f35a1af6db7a7a37748a64478f13163a1f3573"}, diff --git a/test/pleroma/web/rich_media/parser_test.exs b/test/pleroma/web/rich_media/parser_test.exs index 2f363b012..2fe7f1b0b 100644 --- a/test/pleroma/web/rich_media/parser_test.exs +++ b/test/pleroma/web/rich_media/parser_test.exs @@ -133,13 +133,13 @@ test "parses OEmbed" do assert Parser.parse("http://example.com/oembed") == {:ok, %{ - "author_name" => "‮‭‬bees‬", + "author_name" => "\u202E\u202D\u202Cbees\u202C", "author_url" => "https://www.flickr.com/photos/bees/", "cache_age" => 3600, "flickr_type" => "photo", "height" => "768", "html" => - "\"Bacon", + "\"Bacon", "license" => "All Rights Reserved", "license_id" => 0, "provider_name" => "Flickr", From 0fbf7faf85f792f3f22d1b4c185576cd4edccada Mon Sep 17 00:00:00 2001 From: FloatingGhost Date: Sun, 12 Dec 2021 17:26:06 +0000 Subject: [PATCH 16/16] Remove IO inspect --- lib/pleroma/search/elasticsearch.ex | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/pleroma/search/elasticsearch.ex b/lib/pleroma/search/elasticsearch.ex index f16ae58ce..af2e13e48 100644 --- a/lib/pleroma/search/elasticsearch.ex +++ b/lib/pleroma/search/elasticsearch.ex @@ -52,8 +52,6 @@ def search(%{assigns: %{user: user}} = _conn, %{q: query} = _params, _options) d } } - IO.inspect(q) - out = Pleroma.Elasticsearch.search_activities(q) with {:ok, raw_results} <- out do