Add translation module for Argos Translate (#351)
ci/woodpecker/push/woodpecker Pipeline is pending Details

Argos Translate is a Python module for translation and can be used as a command line tool.

This is also the engine for LibreTranslate, for which we already have a module.
Here we can use the engine directly from our server without doing requests to a third party or having to install our own LibreTranslate webservice (obviously you do have to install Argos Translate).

One thing that's currently still missing from Argos Translate is auto-detection of languages (see <https://github.com/argosopentech/argos-translate/issues/9>). For now, when no source language is provided, we just return the text unchanged, supposedly translated from the target language. That way you get a near immediate response in pleroma-fe when clicking Translate, after which you can select the source language from a dropdown.

Argos Translate also doesn't seem to handle html very well. Therefore we give admins the option to strip the html before translating. I made this an option because I'm unsure if/how this will change in the future.

Co-authored-by: ilja <git@ilja.space>
Reviewed-on: #351
Co-authored-by: ilja <akkoma.dev@ilja.space>
Co-committed-by: ilja <akkoma.dev@ilja.space>
This commit is contained in:
ilja 2022-12-19 13:06:39 +00:00 committed by floatingghost
parent 233c4bb3ba
commit c092fc9fd6
6 changed files with 241 additions and 2 deletions

View File

@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Added ### Added
- Prometheus metrics exporting from `/api/v1/akkoma/metrics` - Prometheus metrics exporting from `/api/v1/akkoma/metrics`
- Ability to alter http pool size - Ability to alter http pool size
- Translation of statuses via ArgosTranslate
### Removed ### Removed
- Non-finch HTTP adapters - Non-finch HTTP adapters

View File

@ -882,6 +882,11 @@ config :pleroma, :libre_translate,
url: "http://127.0.0.1:5000", url: "http://127.0.0.1:5000",
api_key: nil api_key: nil
config :pleroma, :argos_translate,
command_argos_translate: "argos-translate",
command_argospm: "argospm",
strip_html: true
# Import environment specific config. This must remain at the bottom # Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above. # of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs" import_config "#{Mix.env()}.exs"

View File

@ -3442,5 +3442,30 @@ config :pleroma, :config_description, [
suggestion: [nil] suggestion: [nil]
} }
] ]
},
%{
group: :pleroma,
key: :argos_translate,
type: :group,
description: "ArgosTranslate Settings.",
children: [
%{
key: :command_argos_translate,
type: :string,
description: "command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file.",
suggestion: ["argos-translate"]
},
%{
key: :command_argospm,
type: :string,
description: "command for `argospm`. Can be the command if it's in your PATH, or the full path to the file.",
suggestion: ["argospm"]
},
%{
key: :strip_html,
type: :boolean,
description: "Strip html from the post before translating it."
}
]
} }
] ]

View File

@ -1119,7 +1119,7 @@ Each job has these settings:
### Translation Settings ### Translation Settings
Settings to automatically translate statuses for end users. Currently supported Settings to automatically translate statuses for end users. Currently supported
translation services are DeepL and LibreTranslate. translation services are DeepL and LibreTranslate. The supported command line tool is [Argos Translate](https://github.com/argosopentech/argos-translate).
Translations are available at `/api/v1/statuses/:id/translations/:language`, where Translations are available at `/api/v1/statuses/:id/translations/:language`, where
`language` is the target language code (e.g `en`) `language` is the target language code (e.g `en`)
@ -1128,7 +1128,7 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
- `:enabled` - enables translation - `:enabled` - enables translation
- `:module` - Sets module to be used - `:module` - Sets module to be used
- Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate` - Either `Pleroma.Akkoma.Translators.DeepL`, `Pleroma.Akkoma.Translators.LibreTranslate`, or `Pleroma.Akkoma.Translators.ArgosTranslate`
### `:deepl` ### `:deepl`
@ -1140,3 +1140,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe
- `:url` - URL of LibreTranslate instance - `:url` - URL of LibreTranslate instance
- `:api_key` - API key for LibreTranslate - `:api_key` - API key for LibreTranslate
### `:argos_translate`
- `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`).
- `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`).
- `:strip_html` - Strip html from the post before translating it (default: `true`).

View File

@ -0,0 +1,109 @@
defmodule Pleroma.Akkoma.Translators.ArgosTranslate do
@behaviour Pleroma.Akkoma.Translator
alias Pleroma.Config
defp argos_translate do
Config.get([:argos_translate, :command_argos_translate])
end
defp argospm do
Config.get([:argos_translate, :command_argospm])
end
defp strip_html? do
Config.get([:argos_translate, :strip_html])
end
defp safe_languages() do
try do
System.cmd(argospm(), ["list"], stderr_to_stdout: true, parallelism: true)
rescue
_ -> {"Command #{argospm()} not found", 1}
end
end
@impl Pleroma.Akkoma.Translator
def languages do
with {response, 0} <- safe_languages() do
langs =
response
|> String.split("\n", trim: true)
|> Enum.map(fn
"translate-" <> l -> String.split(l, "_")
end)
source_langs =
langs
|> Enum.map(fn [l, _] -> %{code: l, name: l} end)
|> Enum.uniq()
dest_langs =
langs
|> Enum.map(fn [_, l] -> %{code: l, name: l} end)
|> Enum.uniq()
{:ok, source_langs, dest_langs}
else
{response, _} -> {:error, "ArgosTranslate failed to fetch languages (#{response})"}
end
end
defp safe_translate(string, from_language, to_language) do
try do
System.cmd(
argos_translate(),
["--from-lang", from_language, "--to-lang", to_language, string],
stderr_to_stdout: true,
parallelism: true
)
rescue
_ -> {"Command #{argos_translate()} not found", 1}
end
end
defp clean_string(string, true) do
string
|> String.replace("<p>", "\n")
|> String.replace("</p>", "\n")
|> String.replace("<br>", "\n")
|> String.replace("<br/>", "\n")
|> String.replace("<li>", "\n")
|> Pleroma.HTML.strip_tags()
|> HtmlEntities.decode()
end
defp clean_string(string, _), do: string
defp htmlify_response(string, true) do
string
|> HtmlEntities.encode()
|> String.replace("\n", "<br/>")
end
defp htmlify_response(string, _), do: string
@impl Pleroma.Akkoma.Translator
def translate(string, nil, to_language) do
# Akkoma's Pleroma-fe expects us to detect the source language automatically.
# Argos-translate doesn't have that option (yet?)
# see <https://github.com/argosopentech/argos-translate/issues/9>
# For now we return the text unchanged, supposedly translated from the target language.
# Afterwards people get the option to overwrite the source language from a dropdown.
{:ok, to_language, string}
end
def translate(string, from_language, to_language) do
# Argos Translate doesn't properly translate HTML (yet?)
# For now we give admins the option to strip the html before translating
# Note that we have to add some html back to the response afterwards
string = clean_string(string, strip_html?())
with {translated, 0} <-
safe_translate(string, from_language, to_language) do
{:ok, from_language, translated |> htmlify_response(strip_html?())}
else
{response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"}
end
end
end

View File

@ -0,0 +1,93 @@
defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do
alias Pleroma.Akkoma.Translators.ArgosTranslate
import Mock
use Pleroma.DataCase, async: true
setup do
clear_config([:argos_translate, :command_argos_translate], "argos-translate_test")
clear_config([:argos_translate, :command_argospm], "argospm_test")
end
test "it lists available languages" do
languages =
with_mock System, [:passthrough],
cmd: fn "argospm_test", ["list"], _ ->
{"translate-nl_en\ntranslate-en_nl\ntranslate-ja_en\n", 0}
end do
ArgosTranslate.languages()
end
assert {:ok, source_langs, dest_langs} = languages
assert [%{code: "en", name: "en"}, %{code: "ja", name: "ja"}, %{code: "nl", name: "nl"}] =
source_langs |> Enum.sort()
assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort()
end
test "it translates from the to language when no language is set and returns the text unchanged" do
assert {:ok, "nl", "blabla"} = ArgosTranslate.translate("blabla", nil, "nl")
end
test "it translates from the provided language if provided" do
translation_response =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ ->
{"yadayada", 0}
end do
ArgosTranslate.translate("blabla", "nl", "en")
end
assert {:ok, "nl", "yadayada"} = translation_response
end
test "it returns a proper error when the executable can't be found" do
non_existing_command = "sfqsfgqsefd"
clear_config([:argos_translate, :command_argos_translate], non_existing_command)
clear_config([:argos_translate, :command_argospm], non_existing_command)
assert nil == System.find_executable(non_existing_command)
assert {:error, "ArgosTranslate failed to fetch languages" <> _} = ArgosTranslate.languages()
assert {:error, "ArgosTranslate failed to translate" <> _} =
ArgosTranslate.translate("blabla", "nl", "en")
end
test "it can strip html" do
content =
~s[<p>What&#39;s up my fellow fedizens?</p><p>So anyway</p><ul><li><a class="hashtag" data-tag="cofe" href="https://suya.space/tag/cofe">#cofe</a></li><li><a class="hashtag" data-tag="suya" href="https://cofe.space/tag/suya">#Suya</a></li></ul><p>ammiright!<br/>:ablobfoxhyper:</p>]
stripped_content =
"\nWhat's up my fellow fedizens?\n\nSo anyway\n\n#cofe\n#Suya\nammiright!\n:ablobfoxhyper:\n"
expected_response_strip_html =
"<br/>What&#39;s up my fellow fedizens?<br/><br/>So anyway<br/><br/>#cofe<br/>#Suya<br/>ammiright!<br/>:ablobfoxhyper:<br/>"
response_strip_html =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test",
["--from-lang", _, "--to-lang", _, ^stripped_content],
_ ->
{stripped_content, 0}
end do
ArgosTranslate.translate(content, "nl", "en")
end
clear_config([:argos_translate, :strip_html], false)
response_no_strip_html =
with_mock System, [:passthrough],
cmd: fn "argos-translate_test", ["--from-lang", _, "--to-lang", _, string], _ ->
{string, 0}
end do
ArgosTranslate.translate(content, "nl", "en")
end
assert {:ok, "nl", content} == response_no_strip_html
assert {:ok, "nl", expected_response_strip_html} == response_strip_html
end
end