forked from AkkomaGang/akkoma
Rework task indexing to share code with the main module
The code in the main module now scrubs new posts too
This commit is contained in:
parent
117f525fd6
commit
0cf3654907
2 changed files with 34 additions and 47 deletions
|
@ -51,40 +51,9 @@ def run(["index"]) do
|
||||||
),
|
),
|
||||||
timeout: :infinity
|
timeout: :infinity
|
||||||
)
|
)
|
||||||
|
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|
||||||
|
|> Stream.filter(fn o -> not is_nil(o) end)
|
||||||
|> Stream.chunk_every(chunk_size)
|
|> Stream.chunk_every(chunk_size)
|
||||||
|> Stream.map(fn objects ->
|
|
||||||
Enum.map(objects, fn object ->
|
|
||||||
data = object.data
|
|
||||||
|
|
||||||
content_str =
|
|
||||||
case data["content"] do
|
|
||||||
[nil | rest] -> to_string(rest)
|
|
||||||
str -> str
|
|
||||||
end
|
|
||||||
|
|
||||||
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
|
||||||
|
|
||||||
content =
|
|
||||||
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
|
|
||||||
trimmed <- String.trim(scrubbed) do
|
|
||||||
trimmed
|
|
||||||
end
|
|
||||||
|
|
||||||
# Only index if there is anything in the string. If there is a single symbol,
|
|
||||||
# it's probably a dot from mastodon posts with just the picture
|
|
||||||
if String.length(content) > 1 do
|
|
||||||
%{
|
|
||||||
id: object.id,
|
|
||||||
content: content,
|
|
||||||
ap: data["id"],
|
|
||||||
published: published |> DateTime.to_unix()
|
|
||||||
}
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end)
|
|
||||||
|> Enum.filter(fn o -> not is_nil(o) end)
|
|
||||||
end)
|
|
||||||
|> Stream.transform(0, fn objects, acc ->
|
|> Stream.transform(0, fn objects, acc ->
|
||||||
new_acc = acc + Enum.count(objects)
|
new_acc = acc + Enum.count(objects)
|
||||||
|
|
||||||
|
|
|
@ -39,28 +39,46 @@ def search(user, query, options \\ []) do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def add_to_index(activity) do
|
def object_to_search_data(object) do
|
||||||
object = activity.object
|
if not is_nil(object) and object.data["type"] == "Note" and
|
||||||
|
|
||||||
if activity.data["type"] == "Create" and not is_nil(object) and object.data["type"] == "Note" and
|
|
||||||
Pleroma.Constants.as_public() in object.data["to"] do
|
Pleroma.Constants.as_public() in object.data["to"] do
|
||||||
data = object.data
|
data = object.data
|
||||||
|
|
||||||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
content_str =
|
||||||
|
case data["content"] do
|
||||||
|
[nil | rest] -> to_string(rest)
|
||||||
|
str -> str
|
||||||
|
end
|
||||||
|
|
||||||
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
content =
|
||||||
|
with {:ok, scrubbed} <- FastSanitize.strip_tags(content_str),
|
||||||
|
trimmed <- String.trim(scrubbed) do
|
||||||
|
trimmed
|
||||||
|
end
|
||||||
|
|
||||||
|
if String.length(content) > 1 do
|
||||||
|
{:ok, published, _} = DateTime.from_iso8601(data["published"])
|
||||||
|
|
||||||
|
%{
|
||||||
|
id: object.id,
|
||||||
|
content: content,
|
||||||
|
ap: data["id"],
|
||||||
|
published: published |> DateTime.to_unix()
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_to_index(activity) do
|
||||||
|
maybe_search_data = object_to_search_data(activity)
|
||||||
|
|
||||||
|
if activity.data["type"] == "Create" and maybe_search_data do
|
||||||
|
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
||||||
|
|
||||||
{:ok, result} =
|
{:ok, result} =
|
||||||
Pleroma.HTTP.post(
|
Pleroma.HTTP.post(
|
||||||
"#{endpoint}/indexes/objects/documents",
|
"#{endpoint}/indexes/objects/documents",
|
||||||
Jason.encode!([
|
Jason.encode!([maybe_search_data])
|
||||||
%{
|
|
||||||
id: object.id,
|
|
||||||
content: data["content"] |> Pleroma.HTML.filter_tags(),
|
|
||||||
ap: data["id"],
|
|
||||||
published: published |> DateTime.to_unix()
|
|
||||||
}
|
|
||||||
])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if not Map.has_key?(Jason.decode!(result.body), "updateId") do
|
if not Map.has_key?(Jason.decode!(result.body), "updateId") do
|
||||||
|
|
Loading…
Reference in a new issue