Rework task indexing to share code with the main module

The code in the main module now scrubs new posts too
This commit is contained in:
Ekaterina Vaartis 2021-08-22 22:53:18 +03:00 committed by FloatingGhost
parent 117f525fd6
commit 0cf3654907
2 changed files with 34 additions and 47 deletions

View file

@ -51,40 +51,9 @@ def run(["index"]) do
), ),
timeout: :infinity timeout: :infinity
) )
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|> Stream.filter(fn o -> not is_nil(o) end)
|> Stream.chunk_every(chunk_size) |> Stream.chunk_every(chunk_size)
|> Stream.map(fn objects ->
Enum.map(objects, fn object ->
data = object.data
content_str =
case data["content"] do
[nil | rest] -> to_string(rest)
str -> str
end
{:ok, published, _} = DateTime.from_iso8601(data["published"])
content =
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
trimmed <- String.trim(scrubbed) do
trimmed
end
# Only index if there is anything in the string. If there is a single symbol,
# it's probably a dot from mastodon posts with just the picture
if String.length(content) > 1 do
%{
id: object.id,
content: content,
ap: data["id"],
published: published |> DateTime.to_unix()
}
else
nil
end
end)
|> Enum.filter(fn o -> not is_nil(o) end)
end)
|> Stream.transform(0, fn objects, acc -> |> Stream.transform(0, fn objects, acc ->
new_acc = acc + Enum.count(objects) new_acc = acc + Enum.count(objects)

View file

@ -39,28 +39,46 @@ def search(user, query, options \\ []) do
end end
end end
def add_to_index(activity) do def object_to_search_data(object) do
object = activity.object if not is_nil(object) and object.data["type"] == "Note" and
if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and
Pleroma.Constants.as_public() in object.data["to"] do Pleroma.Constants.as_public() in object.data["to"] do
data = object.data data = object.data
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) content_str =
case data["content"] do
[nil | rest] -> to_string(rest)
str -> str
end
content =
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
trimmed <- String.trim(scrubbed) do
trimmed
end
if String.length(content) > 1 do
{:ok, published, _} = DateTime.from_iso8601(data["published"]) {:ok, published, _} = DateTime.from_iso8601(data["published"])
%{
id: object.id,
content: content,
ap: data["id"],
published: published |> DateTime.to_unix()
}
end
end
end
def add_to_index(activity) do
maybe_search_data = object_to_search_data(activity)
if activity.data["type"] == "Create" and maybe_search_data do
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
{:ok, result} = {:ok, result} =
Pleroma.HTTP.post( Pleroma.HTTP.post(
"#{endpoint}/indexes/objects/documents", "#{endpoint}/indexes/objects/documents",
Jason.encode!([ Jason.encode!([maybe_search_data])
%{
id: object.id,
content: data["content"] |> Pleroma.HTML.filter_tags(),
ap: data["id"],
published: published |> DateTime.to_unix()
}
])
) )
if not Map.has_key?(Jason.decode!(result.body), "updateId") do if not Map.has_key?(Jason.decode!(result.body), "updateId") do