Merge branch 'feature/object-hashtags-rework' into 'develop'

Hashtags extraction from objects. Background migration infrastructure.

Closes #1840 and #2455

See merge request pleroma/pleroma!3213
This commit is contained in:
rinpatch 2021-03-15 09:35:46 +00:00
commit 8194622a72
46 changed files with 1359 additions and 175 deletions

View file

@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Pleroma API: Reroute `/api/pleroma/*` to `/api/v1/pleroma/*` - Pleroma API: Reroute `/api/pleroma/*` to `/api/v1/pleroma/*`
</details> </details>
- Improved hashtag timeline performance (requires a background migration).
### Added ### Added

View file

@ -654,6 +654,10 @@
config :pleroma, :database, rum_enabled: false config :pleroma, :database, rum_enabled: false
config :pleroma, :features, improved_hashtag_timeline: :auto
config :pleroma, :populate_hashtags_table, fault_rate_allowance: 0.01
config :pleroma, :env, Mix.env() config :pleroma, :env, Mix.env()
config :http_signatures, config :http_signatures,

View file

@ -459,6 +459,42 @@
} }
] ]
}, },
%{
group: :pleroma,
key: :features,
type: :group,
description: "Customizable features",
children: [
%{
key: :improved_hashtag_timeline,
type: {:dropdown, :atom},
description:
"Setting to force toggle / force disable improved hashtags timeline. `:enabled` forces hashtags to be fetched from `hashtags` table for hashtags timeline. `:disabled` forces object-embedded hashtags to be used (slower). Keep it `:auto` for automatic behaviour (it is auto-set to `:enabled` [unless overridden] when HashtagsTableMigrator completes).",
suggestions: [:auto, :enabled, :disabled]
}
]
},
%{
group: :pleroma,
key: :populate_hashtags_table,
type: :group,
description: "`populate_hashtags_table` background migration settings",
children: [
%{
key: :fault_rate_allowance,
type: :float,
description:
"Max accepted rate of objects that failed in the migration. Any value from 0.0 which tolerates no errors to 1.0 which will enable the feature even if hashtags transfer failed for all records.",
suggestions: [0.01]
},
%{
key: :sleep_interval_ms,
type: :integer,
description:
"Sleep interval between each chunk of processed records in order to decrease the load on the system (defaults to 0 and should be keep default on most instances)."
}
]
},
%{ %{
group: :pleroma, group: :pleroma,
key: :instance, key: :instance,

View file

@ -65,6 +65,13 @@ To add configuration to your config file, you can copy it from the base config.
* `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`). * `show_reactions`: Let favourites and emoji reactions be viewed through the API (default: `true`).
* `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day). * `password_reset_token_validity`: The time after which reset tokens aren't accepted anymore, in seconds (default: one day).
## :database
* `improved_hashtag_timeline`: Setting to force toggle / force disable improved hashtags timeline. `:enabled` forces hashtags to be fetched from `hashtags` table for hashtags timeline. `:disabled` forces object-embedded hashtags to be used (slower). Keep it `:auto` for automatic behaviour (it is auto-set to `:enabled` [unless overridden] when HashtagsTableMigrator completes).
## Background migrations
* `populate_hashtags_table/sleep_interval_ms`: Sleep interval between each chunk of processed records in order to decrease the load on the system (defaults to 0 and should be keep default on most instances).
* `populate_hashtags_table/fault_rate_allowance`: Max rate of failed objects to actually processed objects in order to enable the feature (any value from 0.0 which tolerates no errors to 1.0 which will enable the feature even if hashtags transfer failed for all records).
## Welcome ## Welcome
* `direct_message`: - welcome message sent as a direct message. * `direct_message`: - welcome message sent as a direct message.
* `enabled`: Enables the send a direct message to a newly registered user. Defaults to `false`. * `enabled`: Enables the send a direct message to a newly registered user. Defaults to `false`.

View file

@ -8,10 +8,13 @@ defmodule Mix.Tasks.Pleroma.Database do
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.User alias Pleroma.User
require Logger require Logger
require Pleroma.Constants require Pleroma.Constants
import Ecto.Query import Ecto.Query
import Mix.Pleroma import Mix.Pleroma
use Mix.Task use Mix.Task
@shortdoc "A collection of database related tasks" @shortdoc "A collection of database related tasks"
@ -214,4 +217,32 @@ def run(["set_text_search_config", tsconfig]) do
shell_info('Done.') shell_info('Done.')
end end
end end
# Rolls back a specific migration (leaving subsequent migrations applied).
# WARNING: imposes a risk of unrecoverable data loss — proceed at your own responsibility.
# Based on https://stackoverflow.com/a/53825840
def run(["rollback", version]) do
prompt = "SEVERE WARNING: this operation may result in unrecoverable data loss. Continue?"
if shell_prompt(prompt, "n") in ~w(Yn Y y) do
{_, result, _} =
Ecto.Migrator.with_repo(Pleroma.Repo, fn repo ->
version = String.to_integer(version)
re = ~r/^#{version}_.*\.exs/
path = Ecto.Migrator.migrations_path(repo)
with {_, "" <> file} <- {:find, Enum.find(File.ls!(path), &String.match?(&1, re))},
{_, [{mod, _} | _]} <- {:compile, Code.compile_file(Path.join(path, file))},
{_, :ok} <- {:rollback, Ecto.Migrator.down(repo, version, mod)} do
{:ok, "Reversed migration: #{file}"}
else
{:find, _} -> {:error, "No migration found with version prefix: #{version}"}
{:compile, e} -> {:error, "Problem compiling migration module: #{inspect(e)}"}
{:rollback, e} -> {:error, "Problem reversing migration: #{inspect(e)}"}
end
end)
shell_info(inspect(result))
end
end
end end

View file

@ -113,6 +113,7 @@ def with_preloaded_bookmark(query, %User{} = user) do
from([a] in query, from([a] in query,
left_join: b in Bookmark, left_join: b in Bookmark,
on: b.user_id == ^user.id and b.activity_id == a.id, on: b.user_id == ^user.id and b.activity_id == a.id,
as: :bookmark,
preload: [bookmark: b] preload: [bookmark: b]
) )
end end
@ -123,6 +124,7 @@ def with_preloaded_report_notes(query) do
from([a] in query, from([a] in query,
left_join: r in ReportNote, left_join: r in ReportNote,
on: a.id == r.activity_id, on: a.id == r.activity_id,
as: :report_note,
preload: [report_notes: r] preload: [report_notes: r]
) )
end end

View file

@ -48,14 +48,12 @@ defp item_creation_tags(tags, _, _) do
tags tags
end end
defp hashtags_to_topics(%{data: %{"tag" => tags}}) do defp hashtags_to_topics(object) do
tags object
|> Enum.filter(&is_bitstring(&1)) |> Object.hashtags()
|> Enum.map(fn tag -> "hashtag:" <> tag end) |> Enum.map(fn hashtag -> "hashtag:" <> hashtag end)
end end
defp hashtags_to_topics(_), do: []
defp remote_topics(%{local: true}), do: [] defp remote_topics(%{local: true}), do: []
defp remote_topics(%{actor: actor}) when is_binary(actor), defp remote_topics(%{actor: actor}) when is_binary(actor),

View file

@ -103,9 +103,7 @@ def start(_type, _args) do
task_children(@mix_env) ++ task_children(@mix_env) ++
dont_run_in_test(@mix_env) ++ dont_run_in_test(@mix_env) ++
chat_child(chat_enabled?()) ++ chat_child(chat_enabled?()) ++
[ [Pleroma.Gopher.Server]
Pleroma.Gopher.Server
]
# See http://elixir-lang.org/docs/stable/elixir/Supervisor.html # See http://elixir-lang.org/docs/stable/elixir/Supervisor.html
# for other strategies and supported options # for other strategies and supported options
@ -230,6 +228,12 @@ defp dont_run_in_test(_) do
keys: :duplicate, keys: :duplicate,
partitions: System.schedulers_online() partitions: System.schedulers_online()
]} ]}
] ++ background_migrators()
end
defp background_migrators do
[
Pleroma.Migrators.HashtagsTableMigrator
] ]
end end

View file

@ -99,4 +99,8 @@ def restrict_unauthenticated_access?(resource, kind) do
def oauth_consumer_strategies, do: get([:auth, :oauth_consumer_strategies], []) def oauth_consumer_strategies, do: get([:auth, :oauth_consumer_strategies], [])
def oauth_consumer_enabled?, do: oauth_consumer_strategies() != [] def oauth_consumer_enabled?, do: oauth_consumer_strategies() != []
def feature_enabled?(feature_name) do
get([:features, feature_name]) not in [nil, false, :disabled, :auto]
end
end end

View file

@ -0,0 +1,45 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.DataMigration do
use Ecto.Schema
alias Pleroma.DataMigration
alias Pleroma.DataMigration.State
alias Pleroma.Repo
import Ecto.Changeset
import Ecto.Query
schema "data_migrations" do
field(:name, :string)
field(:state, State, default: :pending)
field(:feature_lock, :boolean, default: false)
field(:params, :map, default: %{})
field(:data, :map, default: %{})
timestamps()
end
def changeset(data_migration, params \\ %{}) do
data_migration
|> cast(params, [:name, :state, :feature_lock, :params, :data])
|> validate_required([:name])
|> unique_constraint(:name)
end
def update_one_by_id(id, params \\ %{}) do
with {1, _} <-
from(dm in DataMigration, where: dm.id == ^id)
|> Repo.update_all(set: params) do
:ok
end
end
def get_by_name(name) do
Repo.get_by(DataMigration, name: name)
end
def populate_hashtags_table, do: get_by_name("populate_hashtags_table")
end

View file

@ -9,7 +9,6 @@ defmodule Pleroma.Delivery do
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.User alias Pleroma.User
alias Pleroma.User
import Ecto.Changeset import Ecto.Changeset
import Ecto.Query import Ecto.Query

View file

@ -17,3 +17,11 @@
follow_accept: 2, follow_accept: 2,
follow_reject: 3 follow_reject: 3
) )
defenum(Pleroma.DataMigration.State,
pending: 1,
running: 2,
complete: 3,
failed: 4,
manual: 5
)

106
lib/pleroma/hashtag.ex Normal file
View file

@ -0,0 +1,106 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Hashtag do
use Ecto.Schema
import Ecto.Changeset
import Ecto.Query
alias Ecto.Multi
alias Pleroma.Hashtag
alias Pleroma.Object
alias Pleroma.Repo
schema "hashtags" do
field(:name, :string)
many_to_many(:objects, Object, join_through: "hashtags_objects", on_replace: :delete)
timestamps()
end
def normalize_name(name) do
name
|> String.downcase()
|> String.trim()
end
def get_or_create_by_name(name) do
changeset = changeset(%Hashtag{}, %{name: name})
Repo.insert(
changeset,
on_conflict: [set: [name: get_field(changeset, :name)]],
conflict_target: :name,
returning: true
)
end
def get_or_create_by_names(names) when is_list(names) do
names = Enum.map(names, &normalize_name/1)
timestamp = NaiveDateTime.truncate(NaiveDateTime.utc_now(), :second)
structs =
Enum.map(names, fn name ->
%Hashtag{}
|> changeset(%{name: name})
|> Map.get(:changes)
|> Map.merge(%{inserted_at: timestamp, updated_at: timestamp})
end)
try do
with {:ok, %{query_op: hashtags}} <-
Multi.new()
|> Multi.insert_all(:insert_all_op, Hashtag, structs,
on_conflict: :nothing,
conflict_target: :name
)
|> Multi.run(:query_op, fn _repo, _changes ->
{:ok, Repo.all(from(ht in Hashtag, where: ht.name in ^names))}
end)
|> Repo.transaction() do
{:ok, hashtags}
else
{:error, _name, value, _changes_so_far} -> {:error, value}
end
rescue
e -> {:error, e}
end
end
def changeset(%Hashtag{} = struct, params) do
struct
|> cast(params, [:name])
|> update_change(:name, &normalize_name/1)
|> validate_required([:name])
|> unique_constraint(:name)
end
def unlink(%Object{id: object_id}) do
with {_, hashtag_ids} <-
from(hto in "hashtags_objects",
where: hto.object_id == ^object_id,
select: hto.hashtag_id
)
|> Repo.delete_all(),
{:ok, unreferenced_count} <- delete_unreferenced(hashtag_ids) do
{:ok, length(hashtag_ids), unreferenced_count}
end
end
@delete_unreferenced_query """
DELETE FROM hashtags WHERE id IN
(SELECT hashtags.id FROM hashtags
LEFT OUTER JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id
WHERE hashtags_objects.hashtag_id IS NULL AND hashtags.id = ANY($1));
"""
def delete_unreferenced(ids) do
with {:ok, %{num_rows: deleted_count}} <- Repo.query(@delete_unreferenced_query, [ids]) do
{:ok, deleted_count}
end
end
end

View file

@ -0,0 +1,208 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Migrators.HashtagsTableMigrator do
defmodule State do
use Pleroma.Migrators.Support.BaseMigratorState
@impl Pleroma.Migrators.Support.BaseMigratorState
defdelegate data_migration(), to: Pleroma.DataMigration, as: :populate_hashtags_table
end
use Pleroma.Migrators.Support.BaseMigrator
alias Pleroma.Hashtag
alias Pleroma.Migrators.Support.BaseMigrator
alias Pleroma.Object
@impl BaseMigrator
def feature_config_path, do: [:features, :improved_hashtag_timeline]
@impl BaseMigrator
def fault_rate_allowance, do: Config.get([:populate_hashtags_table, :fault_rate_allowance], 0)
@impl BaseMigrator
def perform do
data_migration_id = data_migration_id()
max_processed_id = get_stat(:max_processed_id, 0)
Logger.info("Transferring embedded hashtags to `hashtags` (from oid: #{max_processed_id})...")
query()
|> where([object], object.id > ^max_processed_id)
|> Repo.chunk_stream(100, :batches, timeout: :infinity)
|> Stream.each(fn objects ->
object_ids = Enum.map(objects, & &1.id)
results = Enum.map(objects, &transfer_object_hashtags(&1))
failed_ids =
results
|> Enum.filter(&(elem(&1, 0) == :error))
|> Enum.map(&elem(&1, 1))
# Count of objects with hashtags: `{:noop, id}` is returned for objects having other AS2 tags
chunk_affected_count =
results
|> Enum.filter(&(elem(&1, 0) == :ok))
|> length()
for failed_id <- failed_ids do
_ =
Repo.query(
"INSERT INTO data_migration_failed_ids(data_migration_id, record_id) " <>
"VALUES ($1, $2) ON CONFLICT DO NOTHING;",
[data_migration_id, failed_id]
)
end
_ =
Repo.query(
"DELETE FROM data_migration_failed_ids " <>
"WHERE data_migration_id = $1 AND record_id = ANY($2)",
[data_migration_id, object_ids -- failed_ids]
)
max_object_id = Enum.at(object_ids, -1)
put_stat(:max_processed_id, max_object_id)
increment_stat(:iteration_processed_count, length(object_ids))
increment_stat(:processed_count, length(object_ids))
increment_stat(:failed_count, length(failed_ids))
increment_stat(:affected_count, chunk_affected_count)
put_stat(:records_per_second, records_per_second())
persist_state()
# A quick and dirty approach to controlling the load this background migration imposes
sleep_interval = Config.get([:populate_hashtags_table, :sleep_interval_ms], 0)
Process.sleep(sleep_interval)
end)
|> Stream.run()
end
@impl BaseMigrator
def query do
# Note: most objects have Mention-type AS2 tags and no hashtags (but we can't filter them out)
# Note: not checking activity type, expecting remove_non_create_objects_hashtags/_ to clean up
from(
object in Object,
where:
fragment("(?)->'tag' IS NOT NULL AND (?)->'tag' != '[]'::jsonb", object.data, object.data),
select: %{
id: object.id,
tag: fragment("(?)->'tag'", object.data)
}
)
|> join(:left, [o], hashtags_objects in fragment("SELECT object_id FROM hashtags_objects"),
on: hashtags_objects.object_id == o.id
)
|> where([_o, hashtags_objects], is_nil(hashtags_objects.object_id))
end
@spec transfer_object_hashtags(Map.t()) :: {:noop | :ok | :error, integer()}
defp transfer_object_hashtags(object) do
embedded_tags = if Map.has_key?(object, :tag), do: object.tag, else: object.data["tag"]
hashtags = Object.object_data_hashtags(%{"tag" => embedded_tags})
if Enum.any?(hashtags) do
transfer_object_hashtags(object, hashtags)
else
{:noop, object.id}
end
end
defp transfer_object_hashtags(object, hashtags) do
Repo.transaction(fn ->
with {:ok, hashtag_records} <- Hashtag.get_or_create_by_names(hashtags) do
maps = Enum.map(hashtag_records, &%{hashtag_id: &1.id, object_id: object.id})
base_error = "ERROR when inserting hashtags_objects for object with id #{object.id}"
try do
with {rows_count, _} when is_integer(rows_count) <-
Repo.insert_all("hashtags_objects", maps, on_conflict: :nothing) do
object.id
else
e ->
Logger.error("#{base_error}: #{inspect(e)}")
Repo.rollback(object.id)
end
rescue
e ->
Logger.error("#{base_error}: #{inspect(e)}")
Repo.rollback(object.id)
end
else
e ->
error = "ERROR: could not create hashtags for object #{object.id}: #{inspect(e)}"
Logger.error(error)
Repo.rollback(object.id)
end
end)
end
@impl BaseMigrator
def retry_failed do
data_migration_id = data_migration_id()
failed_objects_query()
|> Repo.chunk_stream(100, :one)
|> Stream.each(fn object ->
with {res, _} when res != :error <- transfer_object_hashtags(object) do
_ =
Repo.query(
"DELETE FROM data_migration_failed_ids " <>
"WHERE data_migration_id = $1 AND record_id = $2",
[data_migration_id, object.id]
)
end
end)
|> Stream.run()
put_stat(:failed_count, failures_count())
persist_state()
force_continue()
end
defp failed_objects_query do
from(o in Object)
|> join(:inner, [o], dmf in fragment("SELECT * FROM data_migration_failed_ids"),
on: dmf.record_id == o.id
)
|> where([_o, dmf], dmf.data_migration_id == ^data_migration_id())
|> order_by([o], asc: o.id)
end
@doc """
Service func to delete `hashtags_objects` for legacy objects not associated with Create activity.
Also deletes unreferenced `hashtags` records (might occur after deletion of `hashtags_objects`).
"""
def delete_non_create_activities_hashtags do
hashtags_objects_cleanup_query = """
DELETE FROM hashtags_objects WHERE object_id IN
(SELECT DISTINCT objects.id FROM objects
JOIN hashtags_objects ON hashtags_objects.object_id = objects.id LEFT JOIN activities
ON COALESCE(activities.data->'object'->>'id', activities.data->>'object') =
(objects.data->>'id')
AND activities.data->>'type' = 'Create'
WHERE activities.id IS NULL);
"""
hashtags_cleanup_query = """
DELETE FROM hashtags WHERE id IN
(SELECT hashtags.id FROM hashtags
LEFT OUTER JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id
WHERE hashtags_objects.hashtag_id IS NULL);
"""
{:ok, %{num_rows: hashtags_objects_count}} =
Repo.query(hashtags_objects_cleanup_query, [], timeout: :infinity)
{:ok, %{num_rows: hashtags_count}} =
Repo.query(hashtags_cleanup_query, [], timeout: :infinity)
{:ok, hashtags_objects_count, hashtags_count}
end
end

View file

@ -0,0 +1,210 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Migrators.Support.BaseMigrator do
@moduledoc """
Base background migrator functionality.
"""
@callback perform() :: any()
@callback retry_failed() :: any()
@callback feature_config_path() :: list(atom())
@callback query() :: Ecto.Query.t()
@callback fault_rate_allowance() :: integer() | float()
defmacro __using__(_opts) do
quote do
use GenServer
require Logger
import Ecto.Query
alias __MODULE__.State
alias Pleroma.Config
alias Pleroma.Repo
@behaviour Pleroma.Migrators.Support.BaseMigrator
defdelegate data_migration(), to: State
defdelegate data_migration_id(), to: State
defdelegate state(), to: State
defdelegate persist_state(), to: State, as: :persist_to_db
defdelegate get_stat(key, value \\ nil), to: State, as: :get_data_key
defdelegate put_stat(key, value), to: State, as: :put_data_key
defdelegate increment_stat(key, increment), to: State, as: :increment_data_key
@reg_name {:global, __MODULE__}
def whereis, do: GenServer.whereis(@reg_name)
def start_link(_) do
case whereis() do
nil ->
GenServer.start_link(__MODULE__, nil, name: @reg_name)
pid ->
{:ok, pid}
end
end
@impl true
def init(_) do
{:ok, nil, {:continue, :init_state}}
end
@impl true
def handle_continue(:init_state, _state) do
{:ok, _} = State.start_link(nil)
data_migration = data_migration()
manual_migrations = Config.get([:instance, :manual_data_migrations], [])
cond do
Config.get(:env) == :test ->
update_status(:noop)
is_nil(data_migration) ->
message = "Data migration does not exist."
update_status(:failed, message)
Logger.error("#{__MODULE__}: #{message}")
data_migration.state == :manual or data_migration.name in manual_migrations ->
message = "Data migration is in manual execution or manual fix mode."
update_status(:manual, message)
Logger.warn("#{__MODULE__}: #{message}")
data_migration.state == :complete ->
on_complete(data_migration)
true ->
send(self(), :perform)
end
{:noreply, nil}
end
@impl true
def handle_info(:perform, state) do
State.reinit()
update_status(:running)
put_stat(:iteration_processed_count, 0)
put_stat(:started_at, NaiveDateTime.utc_now())
perform()
fault_rate = fault_rate()
put_stat(:fault_rate, fault_rate)
fault_rate_allowance = fault_rate_allowance()
cond do
fault_rate == 0 ->
set_complete()
is_float(fault_rate) and fault_rate <= fault_rate_allowance ->
message = """
Done with fault rate of #{fault_rate} which doesn't exceed #{fault_rate_allowance}.
Putting data migration to manual fix mode. Try running `#{__MODULE__}.retry_failed/0`.
"""
Logger.warn("#{__MODULE__}: #{message}")
update_status(:manual, message)
on_complete(data_migration())
true ->
message = "Too many failures. Try running `#{__MODULE__}.retry_failed/0`."
Logger.error("#{__MODULE__}: #{message}")
update_status(:failed, message)
end
persist_state()
{:noreply, state}
end
defp on_complete(data_migration) do
if data_migration.feature_lock || feature_state() == :disabled do
Logger.warn(
"#{__MODULE__}: migration complete but feature is locked; consider enabling."
)
:noop
else
Config.put(feature_config_path(), :enabled)
:ok
end
end
@doc "Approximate count for current iteration (including processed records count)"
def count(force \\ false, timeout \\ :infinity) do
stored_count = get_stat(:count)
if stored_count && !force do
stored_count
else
processed_count = get_stat(:processed_count, 0)
max_processed_id = get_stat(:max_processed_id, 0)
query = where(query(), [entity], entity.id > ^max_processed_id)
count = Repo.aggregate(query, :count, :id, timeout: timeout) + processed_count
put_stat(:count, count)
persist_state()
count
end
end
def failures_count do
with {:ok, %{rows: [[count]]}} <-
Repo.query(
"SELECT COUNT(record_id) FROM data_migration_failed_ids WHERE data_migration_id = $1;",
[data_migration_id()]
) do
count
end
end
def feature_state, do: Config.get(feature_config_path())
def force_continue do
send(whereis(), :perform)
end
def force_restart do
:ok = State.reset()
force_continue()
end
def set_complete do
update_status(:complete)
persist_state()
on_complete(data_migration())
end
defp update_status(status, message \\ nil) do
put_stat(:state, status)
put_stat(:message, message)
end
defp fault_rate do
with failures_count when is_integer(failures_count) <- failures_count() do
failures_count / Enum.max([get_stat(:affected_count, 0), 1])
else
_ -> :error
end
end
defp records_per_second do
get_stat(:iteration_processed_count, 0) / Enum.max([running_time(), 1])
end
defp running_time do
NaiveDateTime.diff(
NaiveDateTime.utc_now(),
get_stat(:started_at, NaiveDateTime.utc_now())
)
end
end
end
end

View file

@ -0,0 +1,117 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Migrators.Support.BaseMigratorState do
@moduledoc """
Base background migrator state functionality.
"""
@callback data_migration() :: Pleroma.DataMigration.t()
defmacro __using__(_opts) do
quote do
use Agent
alias Pleroma.DataMigration
@behaviour Pleroma.Migrators.Support.BaseMigratorState
@reg_name {:global, __MODULE__}
def start_link(_) do
Agent.start_link(fn -> load_state_from_db() end, name: @reg_name)
end
def data_migration, do: raise("data_migration/0 is not implemented")
defoverridable data_migration: 0
defp load_state_from_db do
data_migration = data_migration()
data =
if data_migration do
Map.new(data_migration.data, fn {k, v} -> {String.to_atom(k), v} end)
else
%{}
end
%{
data_migration_id: data_migration && data_migration.id,
data: data
}
end
def persist_to_db do
%{data_migration_id: data_migration_id, data: data} = state()
if data_migration_id do
DataMigration.update_one_by_id(data_migration_id, data: data)
else
{:error, :nil_data_migration_id}
end
end
def reset do
%{data_migration_id: data_migration_id} = state()
with false <- is_nil(data_migration_id),
:ok <-
DataMigration.update_one_by_id(data_migration_id,
state: :pending,
data: %{}
) do
reinit()
else
true -> {:error, :nil_data_migration_id}
e -> e
end
end
def reinit do
Agent.update(@reg_name, fn _state -> load_state_from_db() end)
end
def state do
Agent.get(@reg_name, & &1)
end
def get_data_key(key, default \\ nil) do
get_in(state(), [:data, key]) || default
end
def put_data_key(key, value) do
_ = persist_non_data_change(key, value)
Agent.update(@reg_name, fn state ->
put_in(state, [:data, key], value)
end)
end
def increment_data_key(key, increment \\ 1) do
Agent.update(@reg_name, fn state ->
initial_value = get_in(state, [:data, key]) || 0
updated_value = initial_value + increment
put_in(state, [:data, key], updated_value)
end)
end
defp persist_non_data_change(:state, value) do
with true <- get_data_key(:state) != value,
true <- value in Pleroma.DataMigration.State.__valid_values__(),
%{data_migration_id: data_migration_id} when not is_nil(data_migration_id) <-
state() do
DataMigration.update_one_by_id(data_migration_id, state: value)
else
false -> :ok
_ -> {:error, :nil_data_migration_id}
end
end
defp persist_non_data_change(_, _) do
nil
end
def data_migration_id, do: Map.get(state(), :data_migration_id)
end
end
end

View file

@ -10,6 +10,7 @@ defmodule Pleroma.Object do
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Config alias Pleroma.Config
alias Pleroma.Hashtag
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Object.Fetcher alias Pleroma.Object.Fetcher
alias Pleroma.ObjectTombstone alias Pleroma.ObjectTombstone
@ -28,6 +29,8 @@ defmodule Pleroma.Object do
schema "objects" do schema "objects" do
field(:data, :map) field(:data, :map)
many_to_many(:hashtags, Hashtag, join_through: "hashtags_objects", on_replace: :delete)
timestamps() timestamps()
end end
@ -49,7 +52,8 @@ def with_joined_activity(query, activity_type \\ "Create", join_type \\ :inner)
end end
def create(data) do def create(data) do
Object.change(%Object{}, %{data: data}) %Object{}
|> Object.change(%{data: data})
|> Repo.insert() |> Repo.insert()
end end
@ -58,8 +62,41 @@ def change(struct, params \\ %{}) do
|> cast(params, [:data]) |> cast(params, [:data])
|> validate_required([:data]) |> validate_required([:data])
|> unique_constraint(:ap_id, name: :objects_unique_apid_index) |> unique_constraint(:ap_id, name: :objects_unique_apid_index)
# Expecting `maybe_handle_hashtags_change/1` to run last:
|> maybe_handle_hashtags_change(struct)
end end
# Note: not checking activity type (assuming non-legacy objects are associated with Create act.)
defp maybe_handle_hashtags_change(changeset, struct) do
with %Ecto.Changeset{valid?: true} <- changeset,
data_hashtags_change = get_change(changeset, :data),
{_, true} <- {:changed, hashtags_changed?(struct, data_hashtags_change)},
{:ok, hashtag_records} <-
data_hashtags_change
|> object_data_hashtags()
|> Hashtag.get_or_create_by_names() do
put_assoc(changeset, :hashtags, hashtag_records)
else
%{valid?: false} ->
changeset
{:changed, false} ->
changeset
{:error, _} ->
validate_change(changeset, :data, fn _, _ ->
[data: "error referencing hashtags"]
end)
end
end
defp hashtags_changed?(%Object{} = struct, %{"tag" => _} = data) do
Enum.sort(embedded_hashtags(struct)) !=
Enum.sort(object_data_hashtags(data))
end
defp hashtags_changed?(_, _), do: false
def get_by_id(nil), do: nil def get_by_id(nil), do: nil
def get_by_id(id), do: Repo.get(Object, id) def get_by_id(id), do: Repo.get(Object, id)
@ -187,9 +224,13 @@ def make_tombstone(%Object{data: %{"id" => id, "type" => type}}, deleted \\ Date
def swap_object_with_tombstone(object) do def swap_object_with_tombstone(object) do
tombstone = make_tombstone(object) tombstone = make_tombstone(object)
with {:ok, object} <-
object object
|> Object.change(%{data: tombstone}) |> Object.change(%{data: tombstone})
|> Repo.update() |> Repo.update() do
Hashtag.unlink(object)
{:ok, object}
end
end end
def delete(%Object{data: %{"id" => id}} = object) do def delete(%Object{data: %{"id" => id}} = object) do
@ -349,4 +390,39 @@ def replies(object, opts \\ []) do
def self_replies(object, opts \\ []), def self_replies(object, opts \\ []),
do: replies(object, Keyword.put(opts, :self_only, true)) do: replies(object, Keyword.put(opts, :self_only, true))
def tags(%Object{data: %{"tag" => tags}}) when is_list(tags), do: tags
def tags(_), do: []
def hashtags(%Object{} = object) do
# Note: always using embedded hashtags regardless whether they are migrated to hashtags table
# (embedded hashtags stay in sync anyways, and we avoid extra joins and preload hassle)
embedded_hashtags(object)
end
def embedded_hashtags(%Object{data: data}) do
object_data_hashtags(data)
end
def embedded_hashtags(_), do: []
def object_data_hashtags(%{"tag" => tags}) when is_list(tags) do
tags
|> Enum.filter(fn
%{"type" => "Hashtag"} = data -> Map.has_key?(data, "name")
plain_text when is_bitstring(plain_text) -> true
_ -> false
end)
|> Enum.map(fn
%{"name" => "#" <> hashtag} -> String.downcase(hashtag)
%{"name" => hashtag} -> String.downcase(hashtag)
hashtag when is_bitstring(hashtag) -> String.downcase(hashtag)
end)
|> Enum.uniq()
# Note: "" elements (plain text) might occur in `data.tag` for incoming objects
|> Enum.filter(&(&1 not in [nil, ""]))
end
def object_data_hashtags(_), do: []
end end

View file

@ -93,6 +93,7 @@ defp cast_params(params) do
max_id: :string, max_id: :string,
offset: :integer, offset: :integer,
limit: :integer, limit: :integer,
skip_extra_order: :boolean,
skip_order: :boolean skip_order: :boolean
} }
@ -114,6 +115,8 @@ defp restrict(query, :max_id, %{max_id: max_id}, table_binding) do
defp restrict(query, :order, %{skip_order: true}, _), do: query defp restrict(query, :order, %{skip_order: true}, _), do: query
defp restrict(%{order_bys: [_ | _]} = query, :order, %{skip_extra_order: true}, _), do: query
defp restrict(query, :order, %{min_id: _}, table_binding) do defp restrict(query, :order, %{min_id: _}, table_binding) do
order_by( order_by(
query, query,

View file

@ -8,6 +8,8 @@ defmodule Pleroma.Repo do
adapter: Ecto.Adapters.Postgres, adapter: Ecto.Adapters.Postgres,
migration_timestamps: [type: :naive_datetime_usec] migration_timestamps: [type: :naive_datetime_usec]
use Ecto.Explain
import Ecto.Query import Ecto.Query
require Logger require Logger
@ -63,8 +65,8 @@ def get_assoc(resource, association) do
iex> Pleroma.Repo.chunk_stream(Pleroma.Activity.Queries.by_actor(ap_id), 500, :batches) iex> Pleroma.Repo.chunk_stream(Pleroma.Activity.Queries.by_actor(ap_id), 500, :batches)
""" """
@spec chunk_stream(Ecto.Query.t(), integer(), atom()) :: Enumerable.t() @spec chunk_stream(Ecto.Query.t(), integer(), atom()) :: Enumerable.t()
def chunk_stream(query, chunk_size, returns_as \\ :one) do def chunk_stream(query, chunk_size, returns_as \\ :one, query_options \\ []) do
# We don't actually need start and end funcitons of resource streaming, # We don't actually need start and end functions of resource streaming,
# but it seems to be the only way to not fetch records one-by-one and # but it seems to be the only way to not fetch records one-by-one and
# have individual records be the elements of the stream, instead of # have individual records be the elements of the stream, instead of
# lists of records # lists of records
@ -76,7 +78,7 @@ def chunk_stream(query, chunk_size, returns_as \\ :one) do
|> order_by(asc: :id) |> order_by(asc: :id)
|> where([r], r.id > ^last_id) |> where([r], r.id > ^last_id)
|> limit(^chunk_size) |> limit(^chunk_size)
|> all() |> all(query_options)
|> case do |> case do
[] -> [] ->
{:halt, last_id} {:halt, last_id}

View file

@ -10,6 +10,7 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
alias Pleroma.Conversation alias Pleroma.Conversation
alias Pleroma.Conversation.Participation alias Pleroma.Conversation.Participation
alias Pleroma.Filter alias Pleroma.Filter
alias Pleroma.Hashtag
alias Pleroma.Maps alias Pleroma.Maps
alias Pleroma.Notification alias Pleroma.Notification
alias Pleroma.Object alias Pleroma.Object
@ -465,6 +466,23 @@ def fetch_latest_direct_activity_id_for_context(context, opts \\ %{}) do
|> Repo.one() |> Repo.one()
end end
defp fetch_paginated_optimized(query, opts, pagination) do
# Note: tag-filtering funcs may apply "ORDER BY objects.id DESC",
# and extra sorting on "activities.id DESC NULLS LAST" would worse the query plan
opts = Map.put(opts, :skip_extra_order, true)
Pagination.fetch_paginated(query, opts, pagination)
end
def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
list_memberships = Pleroma.List.memberships(opts[:user])
fetch_activities_query(recipients ++ list_memberships, opts)
|> fetch_paginated_optimized(opts, pagination)
|> Enum.reverse()
|> maybe_update_cc(list_memberships, opts[:user])
end
@spec fetch_public_or_unlisted_activities(map(), Pagination.type()) :: [Activity.t()] @spec fetch_public_or_unlisted_activities(map(), Pagination.type()) :: [Activity.t()]
def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do
opts = Map.delete(opts, :user) opts = Map.delete(opts, :user)
@ -472,7 +490,7 @@ def fetch_public_or_unlisted_activities(opts \\ %{}, pagination \\ :keyset) do
[Constants.as_public()] [Constants.as_public()]
|> fetch_activities_query(opts) |> fetch_activities_query(opts)
|> restrict_unlisted(opts) |> restrict_unlisted(opts)
|> Pagination.fetch_paginated(opts, pagination) |> fetch_paginated_optimized(opts, pagination)
end end
@spec fetch_public_activities(map(), Pagination.type()) :: [Activity.t()] @spec fetch_public_activities(map(), Pagination.type()) :: [Activity.t()]
@ -693,52 +711,144 @@ defp restrict_since(query, %{since_id: since_id}) do
defp restrict_since(query, _), do: query defp restrict_since(query, _), do: query
defp restrict_tag_reject(_query, %{tag_reject: _tag_reject, skip_preload: true}) do defp restrict_embedded_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
raise "Can't use the child object without preloading!" raise_on_missing_preload()
end end
defp restrict_tag_reject(query, %{tag_reject: [_ | _] = tag_reject}) do defp restrict_embedded_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
from(
[_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
)
end
defp restrict_tag_reject(query, _), do: query
defp restrict_tag_all(_query, %{tag_all: _tag_all, skip_preload: true}) do
raise "Can't use the child object without preloading!"
end
defp restrict_tag_all(query, %{tag_all: [_ | _] = tag_all}) do
from( from(
[_activity, object] in query, [_activity, object] in query,
where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all) where: fragment("(?)->'tag' \\?& (?)", object.data, ^tag_all)
) )
end end
defp restrict_tag_all(query, _), do: query defp restrict_embedded_tag_all(query, %{tag_all: tag}) when is_binary(tag) do
restrict_embedded_tag_any(query, %{tag: tag})
end
defp restrict_tag(_query, %{tag: _tag, skip_preload: true}) do defp restrict_embedded_tag_all(query, _), do: query
defp restrict_embedded_tag_any(_query, %{tag: _tag, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_embedded_tag_any(query, %{tag: [_ | _] = tag_any}) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag_any)
)
end
defp restrict_embedded_tag_any(query, %{tag: tag}) when is_binary(tag) do
restrict_embedded_tag_any(query, %{tag: [tag]})
end
defp restrict_embedded_tag_any(query, _), do: query
defp restrict_embedded_tag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_embedded_tag_reject_any(query, %{tag_reject: [_ | _] = tag_reject}) do
from(
[_activity, object] in query,
where: fragment("not (?)->'tag' \\?| (?)", object.data, ^tag_reject)
)
end
defp restrict_embedded_tag_reject_any(query, %{tag_reject: tag_reject})
when is_binary(tag_reject) do
restrict_embedded_tag_reject_any(query, %{tag_reject: [tag_reject]})
end
defp restrict_embedded_tag_reject_any(query, _), do: query
defp object_ids_query_for_tags(tags) do
from(hto in "hashtags_objects")
|> join(:inner, [hto], ht in Pleroma.Hashtag, on: hto.hashtag_id == ht.id)
|> where([hto, ht], ht.name in ^tags)
|> select([hto], hto.object_id)
|> distinct([hto], true)
end
defp restrict_hashtag_all(_query, %{tag_all: _tag, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_hashtag_all(query, %{tag_all: [single_tag]}) do
restrict_hashtag_any(query, %{tag: single_tag})
end
defp restrict_hashtag_all(query, %{tag_all: [_ | _] = tags}) do
from(
[_activity, object] in query,
where:
fragment(
"""
(SELECT array_agg(hashtags.name) FROM hashtags JOIN hashtags_objects
ON hashtags_objects.hashtag_id = hashtags.id WHERE hashtags.name = ANY(?)
AND hashtags_objects.object_id = ?) @> ?
""",
^tags,
object.id,
^tags
)
)
end
defp restrict_hashtag_all(query, %{tag_all: tag}) when is_binary(tag) do
restrict_hashtag_all(query, %{tag_all: [tag]})
end
defp restrict_hashtag_all(query, _), do: query
defp restrict_hashtag_any(_query, %{tag: _tag, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_hashtag_any(query, %{tag: [_ | _] = tags}) do
hashtag_ids =
from(ht in Hashtag, where: ht.name in ^tags, select: ht.id)
|> Repo.all()
# Note: NO extra ordering should be done on "activities.id desc nulls last" for optimal plan
from(
[_activity, object] in query,
join: hto in "hashtags_objects",
on: hto.object_id == object.id,
where: hto.hashtag_id in ^hashtag_ids,
distinct: [desc: object.id],
order_by: [desc: object.id]
)
end
defp restrict_hashtag_any(query, %{tag: tag}) when is_binary(tag) do
restrict_hashtag_any(query, %{tag: [tag]})
end
defp restrict_hashtag_any(query, _), do: query
defp restrict_hashtag_reject_any(_query, %{tag_reject: _tag_reject, skip_preload: true}) do
raise_on_missing_preload()
end
defp restrict_hashtag_reject_any(query, %{tag_reject: [_ | _] = tags_reject}) do
from(
[_activity, object] in query,
where: object.id not in subquery(object_ids_query_for_tags(tags_reject))
)
end
defp restrict_hashtag_reject_any(query, %{tag_reject: tag_reject}) when is_binary(tag_reject) do
restrict_hashtag_reject_any(query, %{tag_reject: [tag_reject]})
end
defp restrict_hashtag_reject_any(query, _), do: query
defp raise_on_missing_preload do
raise "Can't use the child object without preloading!" raise "Can't use the child object without preloading!"
end end
defp restrict_tag(query, %{tag: tag}) when is_list(tag) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\?| (?)", object.data, ^tag)
)
end
defp restrict_tag(query, %{tag: tag}) when is_binary(tag) do
from(
[_activity, object] in query,
where: fragment("(?)->'tag' \\? (?)", object.data, ^tag)
)
end
defp restrict_tag(query, _), do: query
defp restrict_recipients(query, [], _user), do: query defp restrict_recipients(query, [], _user), do: query
defp restrict_recipients(query, recipients, nil) do defp restrict_recipients(query, recipients, nil) do
@ -1098,6 +1208,26 @@ defp maybe_order(query, %{order: :asc}) do
defp maybe_order(query, _), do: query defp maybe_order(query, _), do: query
defp normalize_fetch_activities_query_opts(opts) do
Enum.reduce([:tag, :tag_all, :tag_reject], opts, fn key, opts ->
case opts[key] do
value when is_bitstring(value) ->
Map.put(opts, key, Hashtag.normalize_name(value))
value when is_list(value) ->
normalized_value =
value
|> Enum.map(&Hashtag.normalize_name/1)
|> Enum.uniq()
Map.put(opts, key, normalized_value)
_ ->
opts
end
end)
end
defp fetch_activities_query_ap_ids_ops(opts) do defp fetch_activities_query_ap_ids_ops(opts) do
source_user = opts[:muting_user] source_user = opts[:muting_user]
ap_id_relationships = if source_user, do: [:mute, :reblog_mute], else: [] ap_id_relationships = if source_user, do: [:mute, :reblog_mute], else: []
@ -1121,6 +1251,8 @@ defp fetch_activities_query_ap_ids_ops(opts) do
end end
def fetch_activities_query(recipients, opts \\ %{}) do def fetch_activities_query(recipients, opts \\ %{}) do
opts = normalize_fetch_activities_query_opts(opts)
{restrict_blocked_opts, restrict_muted_opts, restrict_muted_reblogs_opts} = {restrict_blocked_opts, restrict_muted_opts, restrict_muted_reblogs_opts} =
fetch_activities_query_ap_ids_ops(opts) fetch_activities_query_ap_ids_ops(opts)
@ -1128,6 +1260,7 @@ def fetch_activities_query(recipients, opts \\ %{}) do
skip_thread_containment: Config.get([:instance, :skip_thread_containment]) skip_thread_containment: Config.get([:instance, :skip_thread_containment])
} }
query =
Activity Activity
|> maybe_preload_objects(opts) |> maybe_preload_objects(opts)
|> maybe_preload_bookmarks(opts) |> maybe_preload_bookmarks(opts)
@ -1136,9 +1269,6 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|> maybe_order(opts) |> maybe_order(opts)
|> restrict_recipients(recipients, opts[:user]) |> restrict_recipients(recipients, opts[:user])
|> restrict_replies(opts) |> restrict_replies(opts)
|> restrict_tag(opts)
|> restrict_tag_reject(opts)
|> restrict_tag_all(opts)
|> restrict_since(opts) |> restrict_since(opts)
|> restrict_local(opts) |> restrict_local(opts)
|> restrict_remote(opts) |> restrict_remote(opts)
@ -1163,15 +1293,18 @@ def fetch_activities_query(recipients, opts \\ %{}) do
|> exclude_chat_messages(opts) |> exclude_chat_messages(opts)
|> exclude_invisible_actors(opts) |> exclude_invisible_actors(opts)
|> exclude_visibility(opts) |> exclude_visibility(opts)
if Config.feature_enabled?(:improved_hashtag_timeline) do
query
|> restrict_hashtag_any(opts)
|> restrict_hashtag_all(opts)
|> restrict_hashtag_reject_any(opts)
else
query
|> restrict_embedded_tag_any(opts)
|> restrict_embedded_tag_all(opts)
|> restrict_embedded_tag_reject_any(opts)
end end
def fetch_activities(recipients, opts \\ %{}, pagination \\ :keyset) do
list_memberships = Pleroma.List.memberships(opts[:user])
fetch_activities_query(recipients ++ list_memberships, opts)
|> Pagination.fetch_paginated(opts, pagination)
|> Enum.reverse()
|> maybe_update_cc(list_memberships, opts[:user])
end end
@doc """ @doc """

View file

@ -74,9 +74,11 @@ defp check_media_nsfw(
object = object =
if MRF.subdomain_match?(media_nsfw, actor_host) do if MRF.subdomain_match?(media_nsfw, actor_host) do
tags = (child_object["tag"] || []) ++ ["nsfw"] child_object =
child_object = Map.put(child_object, "tag", tags) child_object
child_object = Map.put(child_object, "sensitive", true) |> Map.put("tag", (child_object["tag"] || []) ++ ["nsfw"])
|> Map.put("sensitive", true)
Map.put(object, "object", child_object) Map.put(object, "object", child_object)
else else
object object

View file

@ -32,18 +32,18 @@ defmodule Pleroma.Web.ActivityPub.Transmogrifier do
""" """
def fix_object(object, options \\ []) do def fix_object(object, options \\ []) do
object object
|> strip_internal_fields |> strip_internal_fields()
|> fix_actor |> fix_actor()
|> fix_url |> fix_url()
|> fix_attachments |> fix_attachments()
|> fix_context |> fix_context()
|> fix_in_reply_to(options) |> fix_in_reply_to(options)
|> fix_emoji |> fix_emoji()
|> fix_tag |> fix_tag()
|> set_sensitive |> set_sensitive()
|> fix_content_map |> fix_content_map()
|> fix_addressing |> fix_addressing()
|> fix_summary |> fix_summary()
|> fix_type(options) |> fix_type(options)
end end
@ -315,10 +315,9 @@ def fix_tag(%{"tag" => tag} = object) when is_list(tag) do
tags = tags =
tag tag
|> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end) |> Enum.filter(fn data -> data["type"] == "Hashtag" and data["name"] end)
|> Enum.map(fn %{"name" => name} -> |> Enum.map(fn
name %{"name" => "#" <> hashtag} -> String.downcase(hashtag)
|> String.slice(1..-1) %{"name" => hashtag} -> String.downcase(hashtag)
|> String.downcase()
end) end)
Map.put(object, "tag", tag ++ tags) Map.put(object, "tag", tag ++ tags)

View file

@ -32,6 +32,7 @@ def prepare_activity(activity, opts \\ []) do
%{ %{
activity: activity, activity: activity,
object: object,
data: Map.get(object, :data), data: Map.get(object, :data),
actor: actor actor: actor
} }

View file

@ -133,31 +133,22 @@ defp fail_on_bad_auth(conn) do
end end
defp hashtag_fetching(params, user, local_only) do defp hashtag_fetching(params, user, local_only) do
tags = # Note: not sanitizing tag options at this stage (may be mix-cased, have duplicates etc.)
tags_any =
[params[:tag], params[:any]] [params[:tag], params[:any]]
|> List.flatten() |> List.flatten()
|> Enum.uniq() |> Enum.filter(& &1)
|> Enum.reject(&is_nil/1)
|> Enum.map(&String.downcase/1)
tag_all = tag_all = Map.get(params, :all, [])
params tag_reject = Map.get(params, :none, [])
|> Map.get(:all, [])
|> Enum.map(&String.downcase/1)
tag_reject =
params
|> Map.get(:none, [])
|> Enum.map(&String.downcase/1)
_activities =
params params
|> Map.put(:type, "Create") |> Map.put(:type, "Create")
|> Map.put(:local_only, local_only) |> Map.put(:local_only, local_only)
|> Map.put(:blocking_user, user) |> Map.put(:blocking_user, user)
|> Map.put(:muting_user, user) |> Map.put(:muting_user, user)
|> Map.put(:user, user) |> Map.put(:user, user)
|> Map.put(:tag, tags) |> Map.put(:tag, tags_any)
|> Map.put(:tag_all, tag_all) |> Map.put(:tag_all, tag_all)
|> Map.put(:tag_reject, tag_reject) |> Map.put(:tag_reject, tag_reject)
|> ActivityPub.fetch_public_activities() |> ActivityPub.fetch_public_activities()

View file

@ -198,8 +198,10 @@ def render("show.json", %{activity: %{data: %{"object" => _object}} = activity}
like_count = object.data["like_count"] || 0 like_count = object.data["like_count"] || 0
announcement_count = object.data["announcement_count"] || 0 announcement_count = object.data["announcement_count"] || 0
tags = object.data["tag"] || [] hashtags = Object.hashtags(object)
sensitive = object.data["sensitive"] || Enum.member?(tags, "nsfw") sensitive = object.data["sensitive"] || Enum.member?(hashtags, "nsfw")
tags = Object.tags(object)
tag_mentions = tag_mentions =
tags tags

View file

@ -22,7 +22,7 @@
<link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/> <link type="text/html" href='<%= @data["external_url"] %>' rel="alternate"/>
<% end %> <% end %>
<%= for tag <- @data["tag"] || [] do %> <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category> <category term="<%= tag %>"></category>
<% end %> <% end %>

View file

@ -22,7 +22,7 @@
<link rel="ostatus:conversation"><%= activity_context(@activity) %></link> <link rel="ostatus:conversation"><%= activity_context(@activity) %></link>
<%= for tag <- @data["tag"] || [] do %> <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category> <category term="<%= tag %>"></category>
<% end %> <% end %>

View file

@ -41,7 +41,7 @@
<% end %> <% end %>
<% end %> <% end %>
<%= for tag <- @data["tag"] || [] do %> <%= for tag <- Pleroma.Object.hashtags(@object) do %>
<category term="<%= tag %>"></category> <category term="<%= tag %>"></category>
<% end %> <% end %>

View file

@ -121,6 +121,7 @@ defp deps do
{:phoenix_pubsub, "~> 2.0"}, {:phoenix_pubsub, "~> 2.0"},
{:phoenix_ecto, "~> 4.0"}, {:phoenix_ecto, "~> 4.0"},
{:ecto_enum, "~> 1.4"}, {:ecto_enum, "~> 1.4"},
{:ecto_explain, "~> 0.1.2"},
{:ecto_sql, "~> 3.4.4"}, {:ecto_sql, "~> 3.4.4"},
{:postgrex, ">= 0.15.5"}, {:postgrex, ">= 0.15.5"},
{:oban, "~> 2.3.4"}, {:oban, "~> 2.3.4"},

View file

@ -31,6 +31,7 @@
"earmark_parser": {:hex, :earmark_parser, "1.4.10", "6603d7a603b9c18d3d20db69921527f82ef09990885ed7525003c7fe7dc86c56", [:mix], [], "hexpm", "8e2d5370b732385db2c9b22215c3f59c84ac7dda7ed7e544d7c459496ae519c0"}, "earmark_parser": {:hex, :earmark_parser, "1.4.10", "6603d7a603b9c18d3d20db69921527f82ef09990885ed7525003c7fe7dc86c56", [:mix], [], "hexpm", "8e2d5370b732385db2c9b22215c3f59c84ac7dda7ed7e544d7c459496ae519c0"},
"ecto": {:hex, :ecto, "3.4.6", "08f7afad3257d6eb8613309af31037e16c36808dfda5a3cd0cb4e9738db030e4", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6f13a9e2a62e75c2dcfc7207bfc65645ab387af8360db4c89fee8b5a4bf3f70b"}, "ecto": {:hex, :ecto, "3.4.6", "08f7afad3257d6eb8613309af31037e16c36808dfda5a3cd0cb4e9738db030e4", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6f13a9e2a62e75c2dcfc7207bfc65645ab387af8360db4c89fee8b5a4bf3f70b"},
"ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"}, "ecto_enum": {:hex, :ecto_enum, "1.4.0", "d14b00e04b974afc69c251632d1e49594d899067ee2b376277efd8233027aec8", [:mix], [{:ecto, ">= 3.0.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "> 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:mariaex, ">= 0.0.0", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "8fb55c087181c2b15eee406519dc22578fa60dd82c088be376d0010172764ee4"},
"ecto_explain": {:hex, :ecto_explain, "0.1.2", "a9d504cbd4adc809911f796d5ef7ebb17a576a6d32286c3d464c015bd39d5541", [:mix], [], "hexpm", "1d0e7798ae30ecf4ce34e912e5354a0c1c832b7ebceba39298270b9a9f316330"},
"ecto_sql": {:hex, :ecto_sql, "3.4.5", "30161f81b167d561a9a2df4329c10ae05ff36eca7ccc84628f2c8b9fa1e43323", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0 or ~> 0.4.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.0", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "31990c6a3579b36a3c0841d34a94c275e727de8b84f58509da5f1b2032c98ac2"}, "ecto_sql": {:hex, :ecto_sql, "3.4.5", "30161f81b167d561a9a2df4329c10ae05ff36eca7ccc84628f2c8b9fa1e43323", [:mix], [{:db_connection, "~> 2.2", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.4.3", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.3.0 or ~> 0.4.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.15.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.0", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "31990c6a3579b36a3c0841d34a94c275e727de8b84f58509da5f1b2032c98ac2"},
"eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"}, "eimp": {:hex, :eimp, "1.0.14", "fc297f0c7e2700457a95a60c7010a5f1dcb768a083b6d53f49cd94ab95a28f22", [:rebar3], [{:p1_utils, "1.0.18", [hex: :p1_utils, repo: "hexpm", optional: false]}], "hexpm", "501133f3112079b92d9e22da8b88bf4f0e13d4d67ae9c15c42c30bd25ceb83b6"},
"elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"}, "elixir_make": {:hex, :elixir_make, "0.6.2", "7dffacd77dec4c37b39af867cedaabb0b59f6a871f89722c25b28fcd4bd70530", [:mix], [], "hexpm", "03e49eadda22526a7e5279d53321d1cced6552f344ba4e03e619063de75348d9"},

View file

@ -9,7 +9,7 @@ def change do
begin begin
result := jsonb_set(target, path, coalesce(new_value, 'null'::jsonb), create_missing); result := jsonb_set(target, path, coalesce(new_value, 'null'::jsonb), create_missing);
if result is NULL then if result is NULL then
raise 'jsonb_set tried to wipe the object, please report this incindent to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new'; raise 'jsonb_set tried to wipe the object, please report this incident to Pleroma bug tracker. https://git.pleroma.social/pleroma/pleroma/issues/new';
return target; return target;
else else
return result; return result;

View file

@ -0,0 +1,13 @@
defmodule Pleroma.Repo.Migrations.CreateHashtags do
use Ecto.Migration
def change do
create_if_not_exists table(:hashtags) do
add(:name, :citext, null: false)
timestamps()
end
create_if_not_exists(unique_index(:hashtags, [:name]))
end
end

View file

@ -0,0 +1,15 @@
defmodule Pleroma.Repo.Migrations.RemoveDataFromHashtags do
use Ecto.Migration
def up do
alter table(:hashtags) do
remove_if_exists(:data, :map)
end
end
def down do
alter table(:hashtags) do
add_if_not_exists(:data, :map, default: %{})
end
end
end

View file

@ -0,0 +1,13 @@
defmodule Pleroma.Repo.Migrations.CreateHashtagsObjects do
use Ecto.Migration
def change do
create_if_not_exists table(:hashtags_objects, primary_key: false) do
add(:hashtag_id, references(:hashtags), null: false, primary_key: true)
add(:object_id, references(:objects), null: false, primary_key: true)
end
# Note: PK index: "hashtags_objects_pkey" PRIMARY KEY, btree (hashtag_id, object_id)
create_if_not_exists(index(:hashtags_objects, [:object_id]))
end
end

View file

@ -0,0 +1,17 @@
defmodule Pleroma.Repo.Migrations.CreateDataMigrations do
use Ecto.Migration
def change do
create_if_not_exists table(:data_migrations) do
add(:name, :string, null: false)
add(:state, :integer, default: 1)
add(:feature_lock, :boolean, default: false)
add(:params, :map, default: %{})
add(:data, :map, default: %{})
timestamps()
end
create_if_not_exists(unique_index(:data_migrations, [:name]))
end
end

View file

@ -0,0 +1,16 @@
defmodule Pleroma.Repo.Migrations.DataMigrationCreatePopulateHashtagsTable do
use Ecto.Migration
def up do
dt = NaiveDateTime.utc_now()
execute(
"INSERT INTO data_migrations(name, inserted_at, updated_at) " <>
"VALUES ('populate_hashtags_table', '#{dt}', '#{dt}') ON CONFLICT DO NOTHING;"
)
end
def down do
execute("DELETE FROM data_migrations WHERE name = 'populate_hashtags_table';")
end
end

View file

@ -0,0 +1,14 @@
defmodule Pleroma.Repo.Migrations.CreateDataMigrationFailedIds do
use Ecto.Migration
def change do
create_if_not_exists table(:data_migration_failed_ids, primary_key: false) do
add(:data_migration_id, references(:data_migrations), null: false, primary_key: true)
add(:record_id, :bigint, null: false, primary_key: true)
end
create_if_not_exists(
unique_index(:data_migration_failed_ids, [:data_migration_id, :record_id])
)
end
end

View file

@ -0,0 +1,11 @@
defmodule Pleroma.Repo.Migrations.RemoveHashtagsObjectsDuplicateIndex do
use Ecto.Migration
@moduledoc "Removes `hashtags_objects_hashtag_id_object_id_index` index (duplicate of PK index)."
def up do
drop_if_exists(unique_index(:hashtags_objects, [:hashtag_id, :object_id]))
end
def down, do: nil
end

View file

@ -0,0 +1,15 @@
defmodule Pleroma.Repo.Migrations.ChangeHashtagsNameToText do
use Ecto.Migration
def up do
alter table(:hashtags) do
modify(:name, :text)
end
end
def down do
alter table(:hashtags) do
modify(:name, :citext)
end
end
end

View file

@ -11,6 +11,8 @@ defmodule Pleroma.Activity.Ir.TopicsTest do
require Pleroma.Constants require Pleroma.Constants
import Mock
describe "poll answer" do describe "poll answer" do
test "produce no topics" do test "produce no topics" do
activity = %Activity{object: %Object{data: %{"type" => "Answer"}}} activity = %Activity{object: %Object{data: %{"type" => "Answer"}}}
@ -77,15 +79,14 @@ test "with no attachments doesn't produce public:media topics", %{activity: acti
refute Enum.member?(topics, "public:local:media") refute Enum.member?(topics, "public:local:media")
end end
test "converts tags to hash tags", %{activity: %{object: %{data: data} = object} = activity} do test "converts tags to hash tags", %{activity: activity} do
tagged_data = Map.put(data, "tag", ["foo", "bar"]) with_mock(Object, [:passthrough], hashtags: fn _ -> ["foo", "bar"] end) do
activity = %{activity | object: %{object | data: tagged_data}}
topics = Topics.get_activity_topics(activity) topics = Topics.get_activity_topics(activity)
assert Enum.member?(topics, "hashtag:foo") assert Enum.member?(topics, "hashtag:foo")
assert Enum.member?(topics, "hashtag:bar") assert Enum.member?(topics, "hashtag:bar")
end end
end
test "only converts strings to hash tags", %{ test "only converts strings to hash tags", %{
activity: %{object: %{data: data} = object} = activity activity: %{object: %{data: data} = object} = activity

View file

@ -0,0 +1,17 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.HashtagTest do
use Pleroma.DataCase
alias Pleroma.Hashtag
describe "changeset validations" do
test "ensure non-blank :name" do
changeset = Hashtag.changeset(%Hashtag{}, %{name: ""})
assert {:name, {"can't be blank", [validation: :required]}} in changeset.errors
end
end
end

View file

@ -5,10 +5,13 @@
defmodule Pleroma.ObjectTest do defmodule Pleroma.ObjectTest do
use Pleroma.DataCase use Pleroma.DataCase
use Oban.Testing, repo: Pleroma.Repo use Oban.Testing, repo: Pleroma.Repo
import ExUnit.CaptureLog import ExUnit.CaptureLog
import Pleroma.Factory import Pleroma.Factory
import Tesla.Mock import Tesla.Mock
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Hashtag
alias Pleroma.Object alias Pleroma.Object
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.Tests.ObanHelpers alias Pleroma.Tests.ObanHelpers
@ -417,4 +420,28 @@ test "preserves internal fields on refetch", %{mock_modified: mock_modified} do
assert updated_object.data["like_count"] == 1 assert updated_object.data["like_count"] == 1
end end
end end
describe ":hashtags association" do
test "Hashtag records are created with Object record and updated on its change" do
user = insert(:user)
{:ok, %{object: object}} =
CommonAPI.post(user, %{status: "some text #hashtag1 #hashtag2 ..."})
assert [%Hashtag{name: "hashtag1"}, %Hashtag{name: "hashtag2"}] =
Enum.sort_by(object.hashtags, & &1.name)
{:ok, object} = Object.update_data(object, %{"tag" => []})
assert [] = object.hashtags
object = Object.get_by_id(object.id) |> Repo.preload(:hashtags)
assert [] = object.hashtags
{:ok, object} = Object.update_data(object, %{"tag" => ["abc", "def"]})
assert [%Hashtag{name: "abc"}, %Hashtag{name: "def"}] =
Enum.sort_by(object.hashtags, & &1.name)
end
end
end end

View file

@ -213,18 +213,24 @@ test "works for guppe actors" do
test "it fetches the appropriate tag-restricted posts" do test "it fetches the appropriate tag-restricted posts" do
user = insert(:user) user = insert(:user)
{:ok, status_one} = CommonAPI.post(user, %{status: ". #test"}) {:ok, status_one} = CommonAPI.post(user, %{status: ". #TEST"})
{:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"}) {:ok, status_two} = CommonAPI.post(user, %{status: ". #essais"})
{:ok, status_three} = CommonAPI.post(user, %{status: ". #test #reject"}) {:ok, status_three} = CommonAPI.post(user, %{status: ". #test #Reject"})
{:ok, status_four} = CommonAPI.post(user, %{status: ". #Any1 #any2"})
{:ok, status_five} = CommonAPI.post(user, %{status: ". #Any2 #any1"})
for hashtag_timeline_strategy <- [:enabled, :disabled] do
clear_config([:features, :improved_hashtag_timeline], hashtag_timeline_strategy)
fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"}) fetch_one = ActivityPub.fetch_activities([], %{type: "Create", tag: "test"})
fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["test", "essais"]}) fetch_two = ActivityPub.fetch_activities([], %{type: "Create", tag: ["TEST", "essais"]})
fetch_three = fetch_three =
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["test", "essais"], tag: ["test", "Essais"],
tag_reject: ["reject"] tag_reject: ["reject"]
}) })
@ -232,13 +238,39 @@ test "it fetches the appropriate tag-restricted posts" do
ActivityPub.fetch_activities([], %{ ActivityPub.fetch_activities([], %{
type: "Create", type: "Create",
tag: ["test"], tag: ["test"],
tag_all: ["test", "reject"] tag_all: ["test", "REJECT"]
}) })
# Testing that deduplication (if needed) is done on DB (not Ecto) level; :limit is important
fetch_five =
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["ANY1", "any2"],
limit: 2
})
fetch_six =
ActivityPub.fetch_activities([], %{
type: "Create",
tag: ["any1", "Any2"],
tag_all: [],
tag_reject: []
})
# Regression test: passing empty lists as filter options shouldn't affect the results
assert fetch_five == fetch_six
[fetch_one, fetch_two, fetch_three, fetch_four, fetch_five] =
Enum.map([fetch_one, fetch_two, fetch_three, fetch_four, fetch_five], fn statuses ->
Enum.map(statuses, fn s -> Repo.preload(s, object: :hashtags) end)
end)
assert fetch_one == [status_one, status_three] assert fetch_one == [status_one, status_three]
assert fetch_two == [status_one, status_two, status_three] assert fetch_two == [status_one, status_two, status_three]
assert fetch_three == [status_one, status_two] assert fetch_three == [status_one, status_two]
assert fetch_four == [status_three] assert fetch_four == [status_three]
assert fetch_five == [status_four, status_five]
end
end end
describe "insertion" do describe "insertion" do

View file

@ -39,7 +39,8 @@ test "it works for incoming notices with tag not being an array (kroeg)" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"], fetch: false) object = Object.normalize(data["object"], fetch: false)
assert "test" in object.data["tag"] assert "test" in Object.tags(object)
assert Object.hashtags(object) == ["test"]
end end
test "it cleans up incoming notices which are not really DMs" do test "it cleans up incoming notices which are not really DMs" do
@ -220,7 +221,8 @@ test "it works for incoming notices with hashtags" do
{:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data) {:ok, %Activity{data: data, local: false}} = Transmogrifier.handle_incoming(data)
object = Object.normalize(data["object"], fetch: false) object = Object.normalize(data["object"], fetch: false)
assert Enum.at(object.data["tag"], 2) == "moo" assert Enum.at(Object.tags(object), 2) == "moo"
assert Object.hashtags(object) == ["moo"]
end end
test "it works for incoming notices with contentMap" do test "it works for incoming notices with contentMap" do

View file

@ -493,7 +493,7 @@ test "it de-duplicates tags" do
object = Object.normalize(activity, fetch: false) object = Object.normalize(activity, fetch: false)
assert object.data["tag"] == ["2hu"] assert Object.tags(object) == ["2hu"]
end end
test "it adds emoji in the object" do test "it adds emoji in the object" do

View file

@ -262,8 +262,8 @@ test "a note activity" do
mentions: [], mentions: [],
tags: [ tags: [
%{ %{
name: "#{object_data["tag"]}", name: "#{hd(object_data["tag"])}",
url: "http://localhost:4001/tag/#{object_data["tag"]}" url: "http://localhost:4001/tag/#{hd(object_data["tag"])}"
} }
], ],
application: nil, application: nil,