Merge branch 'features/hashtag-column' into 'develop'

Insert text representation of hashtags into object["hashtags"]

See merge request pleroma/pleroma!2824
This commit is contained in:
Haelwenn 2020-12-28 10:14:58 +00:00
commit b122b6ffa3
24 changed files with 163 additions and 70 deletions

View file

@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Changed
- **Breaking:** Changed storage of hashtags in plain-text to `object->hashtags`, run [`pleroma.database fill_old_hashtags` mix task](docs/administration/CLI_tasks/database.md) for old objects (works while pleroma is running).
- Polls now always return a `voters_count`, even if they are single-choice.
- Admin Emails: The ap id is used as the user link in emails now.
- Improved registration workflow for email confirmation and account approval modes.
@ -443,7 +444,6 @@ switched to a new configuration mechanism, however it was not officially removed
- Static-FE: Fix remote posts not being sanitized
### Fixed
=======
- Rate limiter crashes when there is no explicitly specified ip in the config
- 500 errors when no `Accept` header is present if Static-FE is enabled
- Instance panel not being updated immediately due to wrong `Cache-Control` headers

View file

@ -91,6 +91,18 @@ Can be safely re-run
mix pleroma.database fix_likes_collections
```
## Fill hashtags for old objects
Migrate hashags fields for old objects, from now to `months_limit` months.
```sh tab="OTP"
./bin/pleroma_ctl database fill_old_hashtags <months_limit>
```
```sh tab="From Source"
mix pleroma.database fill_old_hashtags <months_limit>
```
## Vacuum the database
### Analyze

View file

@ -128,6 +128,57 @@ def run(["fix_likes_collections"]) do
|> Stream.run()
end
def run(["fill_old_hashtags", month_limit]) do
import Ecto.Query
start_pleroma()
month_limit = String.to_integer(month_limit)
if month_limit < 1 do
shell_error("Invalid `month_limit` argument, needs to be greater than 1")
else
time_limit = DateTime.utc_now() |> Timex.shift(months: -month_limit)
from(
o in Object,
where: fragment("(?)->>'hashtags' is null", o.data),
where: fragment("(?)->>'tag' != '[]'", o.data),
where: o.inserted_at < ^time_limit,
select: %{id: o.id, tag: fragment("(?)->>'tag'", o.data)}
)
|> Pleroma.Repo.chunk_stream(200, :batches)
|> Stream.each(fn objects ->
Repo.transaction(fn ->
objects_first = objects |> List.first()
objects_last = objects |> List.last()
Logger.info(
"fill_old_hashtags: #{objects_first.id} (#{objects_first.inserted_at}) -- #{
objects_last.id
} (#{objects_last.inserted_at})"
)
objects
|> Enum.map(fn object ->
tags =
object.tag
|> Jason.decode!()
|> Enum.filter(&is_bitstring(&1))
Object
|> where([o], o.id == ^object.id)
|> update([o],
set: [data: fragment("safe_jsonb_set(?, '{hashtags}', ?, true)", o.data, ^tags)]
)
|> Repo.update_all([], timeout: :infinity)
end)
end)
end)
|> Stream.run()
end
end
def run(["vacuum", args]) do
start_pleroma()

View file

@ -48,14 +48,12 @@ defp item_creation_tags(tags, _, _) do
tags
end
defp hashtags_to_topics(%{data: %{"tag" => tags}}) do
tags
|> Enum.filter(&is_bitstring(&1))
defp hashtags_to_topics(object) do
object
|> Object.hashtags()
|> Enum.map(fn tag -> "hashtag:" <> tag end)
end
defp hashtags_to_topics(_), do: []
defp remote_topics(%{local: true}), do: []
defp remote_topics(%{actor: actor}) when is_binary(actor),

View file

@ -18,7 +18,8 @@ defmodule Pleroma.Constants do
"emoji",
"context_id",
"deleted_activity_id",
"pleroma_internal"
"pleroma_internal",
"hashtags"
]
)

View file

@ -346,4 +346,8 @@ def replies(object, opts \\ []) do
def self_replies(object, opts \\ []),
do: replies(object, Keyword.put(opts, :self_only, true))
def hashtags(%Object{data: %{"hashtags" => hashtags}}), do: hashtags || []
def hashtags(%Object{data: %{"tag" => tags}}), do: Enum.filter(tags, &is_bitstring(&1))
def hashtags(_), do: []
end

View file

@ -669,7 +669,7 @@ defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true})
defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do
from(
[_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
where: fragment("not (?)->'hashtags' \\?| (?)", object.data, ^tag_reject)
)
end
@ -682,7 +682,7 @@ defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
where: fragment("(?)->'hashtags' \\?& (?)", object.data, ^tag_all)
)
end
@ -695,14 +695,14 @@ defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do
defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
where: fragment("(?)->'hashtags' \\?| (?)", object.data, ^tag)
)
end
defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\? (?)", object.data, ^tag)
where: fragment("(?)->'hashtags' \\? (?)", object.data, ^tag)
)
end

View file

@ -8,6 +8,7 @@ defmodule Pleroma.Web.ActivityPub.MRF.SimplePolicy do
alias Pleroma.Config
alias Pleroma.FollowingRelationship
alias Pleroma.Object
alias Pleroma.User
alias Pleroma.Web.ActivityPub.MRF
@ -74,9 +75,11 @@ defp check_media_nsfw(
object =
if MRF.subdomain_match?(media_nsfw, actor_host) do
tags = (child_object["tag"] || []) ++ ["nsfw"]
child_object = Map.put(child_object, "tag", tags)
child_object = Map.put(child_object, "sensitive", true)
child_object =
child_object
|> Map.put("hashtags", Object.hashtags(%Object{data: child_object}) ++ ["nsfw"])
|> Map.put("sensitive", true)
Map.put(object, "object", child_object)
else
object

View file

@ -312,16 +312,15 @@ def fix_emoji(%{"tag" => %{"type" => "Emoji"} = tag} = object) do
def fix_emoji(object), do: object
def fix_tag(%{"tag" => tag} = object) when is_list(tag) do
tags =
hashtags =
tag
|> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end)
|> Enum.map(fn %{"name" => name} ->
name
|> String.slice(1..-1)
|> String.downcase()
|> Enum.map(fn
%{"name" => "#" <> hashtag} -> String.downcase(hashtag)
%{"name" => hashtag} -> String.downcase(hashtag)
end)
Map.put(object, "tag", tag ++ tags)
Map.put(object, "hashtags", hashtags)
end
def fix_tag(%{"tag" => %{} = tag} = object) do
@ -864,23 +863,18 @@ def maybe_fix_object_url(%{"object" => object} = data) when is_binary(object) do
def maybe_fix_object_url(data), do: data
def add_hashtags(object) do
tags =
(object["tag"] || [])
|> Enum.map(fn
# Expand internal representation tags into AS2 tags.
tag when is_binary(tag) ->
hashtags =
%Object{data: object}
|> Object.hashtags()
|> Enum.map(fn tag ->
%{
"href" => Pleroma.Web.Endpoint.url() <> "/tags/#{tag}",
"name" => "##{tag}",
"type" => "Hashtag"
}
# Do not process tags which are already AS2 tag objects.
tag when is_map(tag) ->
tag
end)
Map.put(object, "tag", tags)
Map.put(object, "tag", hashtags ++ (object["tag"] || []))
end
# TODO These should be added on our side on insertion, it doesn't make much
@ -936,7 +930,7 @@ def set_sensitive(%{"sensitive" => _} = object) do
end
def set_sensitive(object) do
tags = object["tag"] || []
tags = object["hashtags"] || object["tag"] || []
Map.put(object, "sensitive", "nsfw" in tags)
end

View file

@ -310,7 +310,16 @@ def make_note_data(%ActivityDraft{} = draft) do
"context" => draft.context,
"attachment" => draft.attachments,
"actor" => draft.user.ap_id,
"tag" => Keyword.values(draft.tags) |> Enum.uniq()
"tag" => Enum.filter(draft.tags, &is_map(&1)) |> Enum.uniq(),
"hashtags" =>
draft.tags
|> Enum.reduce([], fn
# Why so many formats
{:name, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc
{"#" <> _, x}, acc -> if is_bitstring(x), do: [x | acc], else: acc
x, acc -> if is_bitstring(x), do: [x | acc], else: acc
end)
|> Enum.uniq()
}
|> add_in_reply_to(draft.in_reply_to)
|> Map.merge(draft.extra)

View file

@ -32,6 +32,7 @@ def prepare_activity(activity, opts \\ []) do
%{
activity: activity,
object: object,
data: Map.get(object, :data),
actor: actor
}

View file

@ -347,7 +347,7 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
media_attachments: attachments,
poll: render(PollView, "show.json", object: object, for: opts[:for]),
mentions: mentions,
tags: build_tags(tags),
tags: build_tags(Object.hashtags(object)),
application: %{
name: "Web",
website: nil

View file

@ -22,8 +22,8 @@
<link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/>
<% end %>
<%= for tag <- @data["tag"] || [] do %>
<category term="<%= tag %>"></category>
<%= for hashtag <- Object.hashtags(@object) do %>
<category term="<%= hashtag %>"></category>
<% end %>
<%= for attachment <- @data["attachment"] || [] do %>

View file

@ -21,8 +21,8 @@
<link><%= @data["external_url"] %></link>
<% end %>
<%= for tag <- @data["tag"] || [] do %>
<category term="<%= tag %>"></category>
<%= for hashtag <- Object.hashtags(@object) do %>
<category term="<%= hashtag %>"></category>
<% end %>
<%= for attachment <- @data["attachment"] || [] do %>

View file

@ -41,8 +41,8 @@
<% end %>
<% end %>
<%= for tag <- @data["tag"] || [] do %>
<category term="<%= tag %>"></category>
<%= for hashtag <- Object.hashtags(@object) do %>
<category term="<%= hashtag %>"></category>
<% end %>
<%= for {emoji, file} <- @data["emoji"] || %{} do %>

View file

@ -0,0 +1,11 @@
defmodule Pleroma.Repo.Migrations.AddHashtagsIndexToObjects do
use Ecto.Migration
def change do
drop_if_exists(index(:objects, ["(data->'tag')"], using: :gin, name: :objects_tags))
create_if_not_exists(
index(:objects, ["(data->'hashtags')"], using: :gin, name: :objects_hashtags)
)
end
end

View file

@ -78,7 +78,7 @@ test "with no attachments doesn't produce public:media topics", %{activity: acti
end
test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do
tagged_data = Map.put(data, "tag", ["foo", "bar"])
tagged_data = Map.put(data, "hashtags", ["foo", "bar"])
activity = %{activity | object: %{object | data: tagged_data}}
topics = Topics.get_activity_topics(activity)

View file

@ -78,7 +78,7 @@ test "has a matching host" do
assert SimplePolicy.filter(media_message) ==
{:ok,
media_message
|> put_in(["object", "tag"], ["foo", "nsfw"])
|> put_in(["object", "hashtags"], ["foo", "nsfw"])
|> put_in(["object", "sensitive"], true)}
assert SimplePolicy.filter(local_message) == {:ok, local_message}
@ -92,7 +92,7 @@ test "match with wildcard domain" do
assert SimplePolicy.filter(media_message) ==
{:ok,
media_message
|> put_in(["object", "tag"], ["foo", "nsfw"])
|> put_in(["object", "hashtags"], ["foo", "nsfw"])
|> put_in(["object", "sensitive"], true)}
assert SimplePolicy.filter(local_message) == {:ok, local_message}
@ -105,7 +105,7 @@ defp build_media_message do
"type" => "Create",
"object" => %{
"attachment" => [%{}],
"tag" => ["foo"],
"hashtags" => ["foo"],
"sensitive" => false
}
}

View file

@ -39,7 +39,7 @@ test "it works for incoming notices with tag not being an array (kroeg)" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"])
assert "test" in object.data["tag"]
assert ["test"] == object.data["hashtags"]
end
test "it cleans up incoming notices which are not really DMs" do
@ -220,7 +220,7 @@ test "it works for incoming notices with hashtags" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"])
assert Enum.at(object.data["tag"], 2) == "moo"
assert object.data["hashtags"] == ["moo"]
end
test "it works for incoming notices with contentMap" do

View file

@ -204,30 +204,37 @@ test "it strips internal fields" do
{:ok, activity} = CommonAPI.post(user, %{status: "#2hu :firefox:"})
{:ok, modified} = Transmogrifier.prepare_outgoing(activity.data)
{:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data)
assert length(modified["object"]["tag"]) == 2
assert [
%{"name" => "#2hu", "type" => "Hashtag"},
%{"name" => ":firefox:", "type" => "Emoji"}
] = modified_object["tag"]
assert is_nil(modified["object"]["emoji"])
assert is_nil(modified["object"]["like_count"])
assert is_nil(modified["object"]["announcements"])
assert is_nil(modified["object"]["announcement_count"])
assert is_nil(modified["object"]["context_id"])
refute Map.has_key?(modified_object, "hashtags")
refute Map.has_key?(modified_object, "emoji")
refute Map.has_key?(modified_object, "like_count")
refute Map.has_key?(modified_object, "announcements")
refute Map.has_key?(modified_object, "announcement_count")
refute Map.has_key?(modified_object, "context_id")
end
test "it strips internal fields of article" do
activity = insert(:article_activity)
{:ok, modified} = Transmogrifier.prepare_outgoing(activity.data)
{:ok, %{"object" => modified_object}} = Transmogrifier.prepare_outgoing(activity.data)
assert length(modified["object"]["tag"]) == 2
assert [
%{"name" => "#2hu", "type" => "Hashtag"},
%{"name" => ":2hu:", "type" => "Emoji"}
] = modified_object["tag"]
assert is_nil(modified["object"]["emoji"])
assert is_nil(modified["object"]["like_count"])
assert is_nil(modified["object"]["announcements"])
assert is_nil(modified["object"]["announcement_count"])
assert is_nil(modified["object"]["context_id"])
assert is_nil(modified["object"]["likes"])
refute Map.has_key?(modified_object, "hashtags")
refute Map.has_key?(modified_object, "emoji")
refute Map.has_key?(modified_object, "like_count")
refute Map.has_key?(modified_object, "announcements")
refute Map.has_key?(modified_object, "announcement_count")
refute Map.has_key?(modified_object, "context_id")
end
test "the directMessage flag is present" do

View file

@ -591,7 +591,8 @@ test "returns note data" do
"context" => "2hu",
"sensitive" => false,
"summary" => "test summary",
"tag" => ["jimm"],
"hashtags" => ["jimm"],
"tag" => [],
"to" => [user2.ap_id],
"type" => "Note",
"custom_tag" => "test"

View file

@ -493,7 +493,8 @@ test "it de-duplicates tags" do
object = Object.normalize(activity)
assert object.data["tag"] == ["2hu"]
assert object.data["tag"] == []
assert object.data["hashtags"] == ["2hu"]
end
test "it adds emoji in the object" do

View file

@ -262,8 +262,8 @@ test "a note activity" do
mentions: [],
tags: [
%{
name: "#{object_data["tag"]}",
url: "/tag/#{object_data["tag"]}"
name: "2hu",
url: "/tag/2hu"
}
],
application: %{

View file

@ -93,7 +93,7 @@ def note_factory(attrs \\ %{}) do
"like_count" => 0,
"context" => "2hu",
"summary" => "2hu",
"tag" => ["2hu"],
"hashtags" => ["2hu"],
"emoji" => %{
"2hu" => "corndog.png"
}