ensure utf-8 nicknames on nickname GETs and user validator #1057

Merged
floatingghost merged 4 commits from user-utf8 into develop 2026-02-07 19:41:27 +00:00
3 changed files with 34 additions and 36 deletions
Showing only changes of commit 8da6785c46 - Show all commits

mix format
All checks were successful
ci/woodpecker/pr/test/2 Pipeline was successful
ci/woodpecker/pr/test/1 Pipeline was successful

FloatingGhost 2026-01-25 01:31:26 +00:00

View file

@ -1211,53 +1211,52 @@ defmodule Pleroma.User do
end
def get_cached_by_nickname(nickname) do
if String.valid?(nickname) do
key = "nickname:#{nickname}"
if String.valid?(nickname) do
key = "nickname:#{nickname}"
@cachex.fetch!(:user_cache, key, fn _ ->
case get_or_fetch_by_nickname(nickname) do
{:ok, user} -> {:commit, user}
{:error, _error} -> {:ignore, nil}
end
end)
@cachex.fetch!(:user_cache, key, fn _ ->
case get_or_fetch_by_nickname(nickname) do
{:ok, user} -> {:commit, user}
{:error, _error} -> {:ignore, nil}
end
end)
else
:error
end
:error
end
end
def get_cached_by_nickname_or_id(nickname_or_id, opts \\ []) do
if String.valid?(nickname_or_id) do
floatingghost marked this conversation as resolved

Since FlakeId can handle this and id-type arguments are presumably much more common it might be good to optimise this by only testing String.valid if there was no id match. E.g. something like:

with idlike <- is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id),
     nil <- if idlike, do: get_cached_by_id(nickname_or_id), else: nil,
     true <- String.valid?(nickname_or_id) do
  cond do
        restrict_to_local == false or not String.contains?(nickname_or_id, "@") ->
          get_cached_by_nickname(nickname_or_id)

        restrict_to_local == :unauthenticated and match?(%User{}, opts[:for]) ->
          get_cached_by_nickname(nickname_or_id)

        true ->
          nil
  end
else
  %User{} = user -> user
  _ -> nil
end

Though it might be nicer to split the "allowed to lookup" checks handled in the cond into a private function returning a boolean and then also handle this as just another with clause:

with idlike <- is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id),
     nil <- if idlike, do: get_cached_by_id(nickname_or_id), else: nil,
     true <- String.valid?(nickname_or_id),
     true <- allowed_to_lookup_nick(restrict_to_local, nickname_or_id, opts) do
  get_cached_by_nickname(nickname_or_id)
else
  %User{} = user -> user
  _ -> nil
end
Since `FlakeId` can handle this and id-type arguments are presumably much more common it might be good to optimise this by only testing `String.valid` if there was no id match. E.g. something like: ```elixir with idlike <- is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id), nil <- if idlike, do: get_cached_by_id(nickname_or_id), else: nil, true <- String.valid?(nickname_or_id) do cond do restrict_to_local == false or not String.contains?(nickname_or_id, "@") -> get_cached_by_nickname(nickname_or_id) restrict_to_local == :unauthenticated and match?(%User{}, opts[:for]) -> get_cached_by_nickname(nickname_or_id) true -> nil end else %User{} = user -> user _ -> nil end ``` Though it might be nicer to split the "allowed to lookup" checks handled in the `cond` into a private function returning a boolean and then also handle this as just another `with` clause: ```elixir with idlike <- is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id), nil <- if idlike, do: get_cached_by_id(nickname_or_id), else: nil, true <- String.valid?(nickname_or_id), true <- allowed_to_lookup_nick(restrict_to_local, nickname_or_id, opts) do get_cached_by_nickname(nickname_or_id) else %User{} = user -> user _ -> nil end ```

i just removed the check in nickname_by_id and let it live in the get_cached_by_nickname which should be suitable

i just removed the check in nickname_by_id and let it live in the get_cached_by_nickname which should be suitable
restrict_to_local = Config.get([:instance, :limit_to_local_content])
restrict_to_local = Config.get([:instance, :limit_to_local_content])
cond do
is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id) ->
get_cached_by_id(nickname_or_id) || get_cached_by_nickname(nickname_or_id)
cond do
is_integer(nickname_or_id) or FlakeId.flake_id?(nickname_or_id) ->
get_cached_by_id(nickname_or_id) || get_cached_by_nickname(nickname_or_id)
restrict_to_local == false or not String.contains?(nickname_or_id, "@") ->
get_cached_by_nickname(nickname_or_id)
restrict_to_local == false or not String.contains?(nickname_or_id, "@") ->
get_cached_by_nickname(nickname_or_id)
restrict_to_local == :unauthenticated and match?(%User{}, opts[:for]) ->
get_cached_by_nickname(nickname_or_id)
restrict_to_local == :unauthenticated and match?(%User{}, opts[:for]) ->
get_cached_by_nickname(nickname_or_id)
true ->
nil
end
true ->
nil
end
else
:error
:error
end
end
@spec get_by_nickname(String.t()) :: User.t() | nil
def get_by_nickname(nickname) do
if String.valid?(nickname) do
Repo.get_by(User, nickname: nickname) ||
if Regex.match?(~r(@#{Pleroma.Web.Endpoint.host()})i, nickname) do
Repo.get_by(User, nickname: local_nickname(nickname))
end
else
Repo.get_by(User, nickname: nickname) ||
if Regex.match?(~r(@#{Pleroma.Web.Endpoint.host()})i, nickname) do
Repo.get_by(User, nickname: local_nickname(nickname))
end
else
:error
end
end
floatingghost marked this conversation as resolved

Returning :error here breaks the documented type spec. It apparently still works fine anyway for /api/v1/accounts/:nick, but might still crash for other users of this function. I don’t think it’s necessary to distinguish this from a normal "not found" response though, after all no such user exists in the db and so just returning nil seems fine.

The same applies to the other two modified functions

Returning `:error` here breaks the documented type spec. It apparently still works fine anyway for `/api/v1/accounts/:nick`, but might still crash for other users of this function. I don’t think it’s necessary to distinguish this from a normal "not found" response though, after all no such user exists in the db and so just returning `nil` seems fine. The same applies to the other two modified functions
end
def get_by_email(email), do: Repo.get_by(User, email: email)

View file

@ -85,13 +85,13 @@ defmodule Pleroma.Web.ActivityPub.ObjectValidators.UserValidator do
end)
end
defp validate_nickname(%("preferredUsername" => nick)) when is_binary(nick) do
defp validate_nickname(%{"preferredUsername" => nick}) when is_binary(nick) do
Oneric marked this conversation as resolved

what will happen if "preferredusername" exists but isn’t a binary?

what will happen if `"preferredusername"` exists but isn’t a binary?

updated to error if we have the preferredUsername key but its type is silly

updated to error if we have the preferredUsername key but its type is silly
if String.valid?(nick) do
:ok
else
{:error, "Nickname is not valid UTF-8"}
end
end
{:error, "Nickname is not valid UTF-8"}
end
end
defp validate_nickname(_), do: :ok
defp validate_nickname(_), do: :ok
end

View file

@ -124,7 +124,6 @@ defmodule Pleroma.Web.MastodonAPI.AccountControllerTest do
assert %{"id" => id} = json_response_and_validate_schema(conn, 200)
assert id == user.id
end
test "accounts fetches correct account for nicknames beginning with numbers", %{conn: conn} do
# Need to set an old-style integer ID to reproduce the problem
@ -180,7 +179,7 @@ defmodule Pleroma.Web.MastodonAPI.AccountControllerTest do
end
test "returns 404 when the username is not valid utf8" do
nick = <<0xc0>>
nick = <<0xC0>>
refute String.valid?(nick)
assert %{"error" => "Can't find user"} =