From 0cf365490781fc7d77b43e69bb7de90bb3d1c044 Mon Sep 17 00:00:00 2001 From: Ekaterina Vaartis Date: Sun, 22 Aug 2021 22:53:18 +0300 Subject: [PATCH] Rework task indexing to share code with the main module The code in the main module now scrubs new posts too --- lib/mix/tasks/pleroma/search/meilisearch.ex | 35 +--------------- lib/pleroma/search/meilisearch.ex | 46 ++++++++++++++------- 2 files changed, 34 insertions(+), 47 deletions(-) diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex index 3704e0bdc..b5a394e34 100644 --- a/lib/mix/tasks/pleroma/search/meilisearch.ex +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -51,40 +51,9 @@ def run(["index"]) do ), timeout: :infinity ) + |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.filter(fn o -> not is_nil(o) end) |> Stream.chunk_every(chunk_size) - |> Stream.map(fn objects -> - Enum.map(objects, fn object -> - data = object.data - - content_str = - case data["content"] do - [nil | rest] -> to_string(rest) - str -> str - end - - {:ok, published, _} = DateTime.from_iso8601(data["published"]) - - content = - with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), - trimmed <- String.trim(scrubbed) do - trimmed - end - - # Only index if there is anything in the string. If there is a single symbol, - # it's probably a dot from mastodon posts with just the picture - if String.length(content) > 1 do - %{ - id: object.id, - content: content, - ap: data["id"], - published: published |> DateTime.to_unix() - } - else - nil - end - end) - |> Enum.filter(fn o -> not is_nil(o) end) - end) |> Stream.transform(0, fn objects, acc -> new_acc = acc + Enum.count(objects) diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 87fdeaf5e..10468e36c 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -39,28 +39,46 @@ def search(user, query, options \\ []) do end end - def add_to_index(activity) do - object = activity.object - - if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and + def object_to_search_data(object) do + if not is_nil(object) and object.data["type"] == "Note" and Pleroma.Constants.as_public() in object.data["to"] do data = object.data - endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end - {:ok, published, _} = DateTime.from_iso8601(data["published"]) + content = + with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str), + trimmed <- String.trim(scrubbed) do + trimmed + end + + if String.length(content) > 1 do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end + + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity) + + if activity.data["type"] == "Create" and maybe_search_data do + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) {:ok, result} = Pleroma.HTTP.post( "#{endpoint}/indexes/objects/documents", - Jason.encode!([ - %{ - id: object.id, - content: data["content"] |> Pleroma.HTML.filter_tags(), - ap: data["id"], - published: published |> DateTime.to_unix() - } - ]) + Jason.encode!([maybe_search_data]) ) if not Map.has_key?(Jason.decode!(result.body), "updateId") do