From f027db9654f90df92361ccd1215161862c9ff006 Mon Sep 17 00:00:00 2001 From: ilja Date: Thu, 8 Dec 2022 12:09:06 +0100 Subject: [PATCH 1/4] Add translation module for Argos Translate Argos Translate is a Python module for translation and can be used as a command line tool. This is also the engine for LibreTranslate, for which we already have a module. Here we can use the engine irectly from our server without doing requests to a third party or having to install our own LibreTranslate webservice. One thing that's currently still missing from ArgosTranslate is auto-detection of languages. --- config/config.exs | 5 ++ docs/docs/configuration/cheatsheet.md | 6 ++ .../akkoma/translators/argos_translate.ex | 83 +++++++++++++++++++ .../translators/argos_translate_test.exs | 66 +++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 lib/pleroma/akkoma/translators/argos_translate.ex create mode 100644 test/pleroma/akkoma/translators/argos_translate_test.exs diff --git a/config/config.exs b/config/config.exs index 48290fb05..6112cee09 100644 --- a/config/config.exs +++ b/config/config.exs @@ -882,6 +882,11 @@ config :pleroma, :libre_translate, url: "http://127.0.0.1:5000", api_key: nil +config :pleroma, :argos_translate, + command_argos_translate: "argos-translate", + command_argospm: "argospm", + default_language: "en" + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index 22fc4ecbe..e369545ff 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -1140,3 +1140,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe - `:url` - URL of LibreTranslate instance - `:api_key` - API key for LibreTranslate + +### `:argos_translate` + +- `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`). +- `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`). +- `:default_language` - When no language is provided to translate from, this language will be used. Must be a two letter langage code from a language you have installed (default: `en`). diff --git a/lib/pleroma/akkoma/translators/argos_translate.ex b/lib/pleroma/akkoma/translators/argos_translate.ex new file mode 100644 index 000000000..0e64c4743 --- /dev/null +++ b/lib/pleroma/akkoma/translators/argos_translate.ex @@ -0,0 +1,83 @@ +defmodule Pleroma.Akkoma.Translators.ArgosTranslate do + @behaviour Pleroma.Akkoma.Translator + + alias Pleroma.Config + + defp argos_translate do + Config.get([:argos_translate, :command_argos_translate]) + end + + defp argospm do + Config.get([:argos_translate, :command_argospm]) + end + + defp default_language do + Config.get([:argos_translate, :default_language]) + end + + defp safe_languages() do + try do + System.cmd(argospm(), ["list"], stderr_to_stdout: true, parallelism: true) + rescue + _ -> {"Command #{argospm()} not found", 1} + end + end + + @impl Pleroma.Akkoma.Translator + def languages do + with {response, 0} <- safe_languages() do + langs = + response + |> String.split("\n", trim: true) + |> Enum.map(fn + "translate-" <> l -> String.split(l, "_") + _ -> "" + end) + + source_langs = + langs + |> Enum.map(fn [l, _] -> %{code: l, name: l} end) + |> Enum.uniq() + + dest_langs = + langs + |> Enum.map(fn [_, l] -> %{code: l, name: l} end) + |> Enum.uniq() + + {:ok, source_langs, dest_langs} + else + {response, _} -> {:error, "ArgosTranslate failed to fetch languages (#{response})"} + end + end + + defp safe_translate(string, from_language, to_language) do + try do + System.cmd( + argos_translate(), + ["--from-lang", from_language, "--to-lang", to_language, string], + stderr_to_stdout: true, + parallelism: true + ) + rescue + _ -> {"Command #{argos_translate()} not found", 1} + end + end + + @impl Pleroma.Akkoma.Translator + def translate(string, from_language, to_language) do + # Akkoma's Pleroma-fe expects us to detect the source language automatically. + # Argos-translate doesn't have that option (yet?) + # see + # For now we choose a default source language from settings. + # Afterwards people get the option to overwrite the source language from a dropdown. + from_language = from_language || default_language() + to_language = to_language || default_language() + + with {translated, 0} <- + safe_translate(string, from_language, to_language) do + {:ok, from_language, translated} + else + {response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"} + end + end +end diff --git a/test/pleroma/akkoma/translators/argos_translate_test.exs b/test/pleroma/akkoma/translators/argos_translate_test.exs new file mode 100644 index 000000000..4c7c67d5a --- /dev/null +++ b/test/pleroma/akkoma/translators/argos_translate_test.exs @@ -0,0 +1,66 @@ +defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do + alias Pleroma.Akkoma.Translators.ArgosTranslate + + import Mock + + use Pleroma.DataCase, async: true + + setup do + clear_config([:argos_translate, :command_argos_translate], "argos-translate_test") + clear_config([:argos_translate, :command_argospm], "argospm_test") + end + + test "it lists available languages" do + languages = + with_mock System, [:passthrough], + cmd: fn "argospm_test", ["list"], _ -> + {"translate-nl_en\ntranslate-en_nl\ntranslate-ja_en\n", 0} + end do + ArgosTranslate.languages() + end + + assert {:ok, source_langs, dest_langs} = languages + + assert [%{code: "en", name: "en"}, %{code: "ja", name: "ja"}, %{code: "nl", name: "nl"}] = + source_langs |> Enum.sort() + + assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort() + end + + test "it translates from default language when no language is set" do + translation_response = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", ["--from-lang", "en", "--to-lang", "fr", "blabla"], _ -> + {"yadayada", 0} + end do + ArgosTranslate.translate("blabla", nil, "fr") + end + + assert {:ok, "en", "yadayada"} = translation_response + end + + test "it translates from the provided language" do + translation_response = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ -> + {"yadayada", 0} + end do + ArgosTranslate.translate("blabla", "nl", "en") + end + + assert {:ok, "nl", "yadayada"} = translation_response + end + + test "it returns a proper error when the executable can't be found" do + non_existing_command = "sfqsfgqsefd" + clear_config([:argos_translate, :command_argos_translate], non_existing_command) + clear_config([:argos_translate, :command_argospm], non_existing_command) + + assert nil == System.find_executable(non_existing_command) + + assert {:error, "ArgosTranslate failed to fetch languages" <> _} = ArgosTranslate.languages() + + assert {:error, "ArgosTranslate failed to translate" <> _} = + ArgosTranslate.translate("blabla", "nl", "en") + end +end -- 2.34.1 From f78bada08d320da5474e3793067d2dd2774e3c66 Mon Sep 17 00:00:00 2001 From: ilja Date: Fri, 9 Dec 2022 13:10:22 +0100 Subject: [PATCH 2/4] Allow stripping of html for Argos Translate module Argos Translate doesn't properly translate HTML. Libre Translate uses an extra module for that. Here we strip the HTML by default before translating and then add rudimentary HTML back so it displays properly. Stripping HTML is a default-on option, just in case argos adds support for html in the language models later on. That way admins will be able to overwrite the setting and not strip any more. --- config/config.exs | 3 +- docs/docs/configuration/cheatsheet.md | 5 ++- .../akkoma/translators/argos_translate.ex | 38 ++++++++++++++++--- .../translators/argos_translate_test.exs | 35 +++++++++++++++++ 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/config/config.exs b/config/config.exs index 6112cee09..1507c9360 100644 --- a/config/config.exs +++ b/config/config.exs @@ -885,7 +885,8 @@ config :pleroma, :libre_translate, config :pleroma, :argos_translate, command_argos_translate: "argos-translate", command_argospm: "argospm", - default_language: "en" + fallback_language: "en", + strip_html: true # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index e369545ff..d0b03e40d 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -1143,6 +1143,9 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe ### `:argos_translate` +[Argos Translate](https://github.com/argosopentech/argos-translate) is the library used by Libre Translate and can run as a command line tool. It's more basic than Libre Translate as it doesn't provide a way to properly handle html or language detection. + - `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`). - `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`). -- `:default_language` - When no language is provided to translate from, this language will be used. Must be a two letter langage code from a language you have installed (default: `en`). +- `:fallback_language` - When no language is provided to translate from, this language will be used. Must be a two letter langage code from a language you have installed (default: `en`). +- `:strip_html` - Strip html from the post before translating the text (default: `true`). diff --git a/lib/pleroma/akkoma/translators/argos_translate.ex b/lib/pleroma/akkoma/translators/argos_translate.ex index 0e64c4743..375a93b41 100644 --- a/lib/pleroma/akkoma/translators/argos_translate.ex +++ b/lib/pleroma/akkoma/translators/argos_translate.ex @@ -11,8 +11,8 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslate do Config.get([:argos_translate, :command_argospm]) end - defp default_language do - Config.get([:argos_translate, :default_language]) + defp fallback_language do + Config.get([:argos_translate, :fallback_language]) end defp safe_languages() do @@ -63,19 +63,45 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslate do end end + defp clean_string(string, true) do + string + |> String.replace("

", "\n") + |> String.replace("

", "\n") + |> String.replace("
", "\n") + |> String.replace("
", "\n") + |> String.replace("
  • ", "\n") + |> Pleroma.HTML.strip_tags() + |> HtmlEntities.decode() + end + + defp clean_string(string, _), do: string + + defp htmlify_response(string, true) do + string + |> HtmlEntities.encode() + |> String.replace("\n", "
    ") + end + + defp htmlify_response(string, _), do: string + @impl Pleroma.Akkoma.Translator def translate(string, from_language, to_language) do + strip_html = Config.get([:argos_translate, :strip_html]) # Akkoma's Pleroma-fe expects us to detect the source language automatically. # Argos-translate doesn't have that option (yet?) # see - # For now we choose a default source language from settings. + # For now we choose a fallback source language from settings. # Afterwards people get the option to overwrite the source language from a dropdown. - from_language = from_language || default_language() - to_language = to_language || default_language() + from_language = from_language || fallback_language() + to_language = to_language || fallback_language() + # Argos Translate doesn't properly translate HTML (yet?) + # For now we give admins the option to strip the html before translating + # Note that we have to add some html back to the response afterwards + string = clean_string(string, strip_html) with {translated, 0} <- safe_translate(string, from_language, to_language) do - {:ok, from_language, translated} + {:ok, from_language, translated |> htmlify_response(strip_html)} else {response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"} end diff --git a/test/pleroma/akkoma/translators/argos_translate_test.exs b/test/pleroma/akkoma/translators/argos_translate_test.exs index 4c7c67d5a..57368055a 100644 --- a/test/pleroma/akkoma/translators/argos_translate_test.exs +++ b/test/pleroma/akkoma/translators/argos_translate_test.exs @@ -63,4 +63,39 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do assert {:error, "ArgosTranslate failed to translate" <> _} = ArgosTranslate.translate("blabla", "nl", "en") end + + test "it can strip html" do + content = + ~s[

    What's up my fellow fedizens?

    So anyway

    ammiright!
    :ablobfoxhyper:

    ] + + stripped_content = + "\nWhat's up my fellow fedizens?\n\nSo anyway\n\n#cofe\n#Suya\nammiright!\n:ablobfoxhyper:\n" + + expected_response_strip_html = + "
    What's up my fellow fedizens?

    So anyway

    #cofe
    #Suya
    ammiright!
    :ablobfoxhyper:
    " + + response_strip_html = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", + ["--from-lang", _, "--to-lang", _, ^stripped_content], + _ -> + {stripped_content, 0} + end do + ArgosTranslate.translate(content, "nl", "en") + end + + clear_config([:argos_translate, :strip_html], false) + + response_no_strip_html = + with_mock System, [:passthrough], + cmd: fn "argos-translate_test", ["--from-lang", _, "--to-lang", _, string], _ -> + {string, 0} + end do + ArgosTranslate.translate(content, "nl", "en") + end + + assert {:ok, "nl", content} == response_no_strip_html + + assert {:ok, "nl", expected_response_strip_html} == response_strip_html + end end -- 2.34.1 From d1bb48014652ea42cc3205ef11d396bc0f0c6419 Mon Sep 17 00:00:00 2001 From: ilja Date: Sun, 11 Dec 2022 00:53:45 +0100 Subject: [PATCH 3/4] Change ArgosTranslate strategy for dealing with nil from-language Instead of choosing a fallback language, we just return the input, supposedly translated from the target language. This gives us a much faster response, so you can immedialty choose the correct from language yourself in pleroma-fe. Some other cleanup and improvements to the docs are also done. --- config/config.exs | 1 - docs/docs/configuration/cheatsheet.md | 9 +++------ .../akkoma/translators/argos_translate.ex | 19 ++++++++++--------- .../translators/argos_translate_test.exs | 14 +++----------- 4 files changed, 16 insertions(+), 27 deletions(-) diff --git a/config/config.exs b/config/config.exs index 1507c9360..0611f7c26 100644 --- a/config/config.exs +++ b/config/config.exs @@ -885,7 +885,6 @@ config :pleroma, :libre_translate, config :pleroma, :argos_translate, command_argos_translate: "argos-translate", command_argospm: "argospm", - fallback_language: "en", strip_html: true # Import environment specific config. This must remain at the bottom diff --git a/docs/docs/configuration/cheatsheet.md b/docs/docs/configuration/cheatsheet.md index d0b03e40d..4e84b9a44 100644 --- a/docs/docs/configuration/cheatsheet.md +++ b/docs/docs/configuration/cheatsheet.md @@ -1119,7 +1119,7 @@ Each job has these settings: ### Translation Settings Settings to automatically translate statuses for end users. Currently supported -translation services are DeepL and LibreTranslate. +translation services are DeepL and LibreTranslate. The supported command line tool is [Argos Translate](https://github.com/argosopentech/argos-translate). Translations are available at `/api/v1/statuses/:id/translations/:language`, where `language` is the target language code (e.g `en`) @@ -1128,7 +1128,7 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe - `:enabled` - enables translation - `:module` - Sets module to be used - - Either `Pleroma.Akkoma.Translators.DeepL` or `Pleroma.Akkoma.Translators.LibreTranslate` + - Either `Pleroma.Akkoma.Translators.DeepL`, `Pleroma.Akkoma.Translators.LibreTranslate`, or `Pleroma.Akkoma.Translators.ArgosTranslate` ### `:deepl` @@ -1143,9 +1143,6 @@ Translations are available at `/api/v1/statuses/:id/translations/:language`, whe ### `:argos_translate` -[Argos Translate](https://github.com/argosopentech/argos-translate) is the library used by Libre Translate and can run as a command line tool. It's more basic than Libre Translate as it doesn't provide a way to properly handle html or language detection. - - `:command_argos_translate` - command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file (default: `argos-translate`). - `:command_argospm` - command for `argospm`. Can be the command if it's in your PATH, or the full path to the file (default: `argospm`). -- `:fallback_language` - When no language is provided to translate from, this language will be used. Must be a two letter langage code from a language you have installed (default: `en`). -- `:strip_html` - Strip html from the post before translating the text (default: `true`). +- `:strip_html` - Strip html from the post before translating it (default: `true`). diff --git a/lib/pleroma/akkoma/translators/argos_translate.ex b/lib/pleroma/akkoma/translators/argos_translate.ex index 375a93b41..9e19b14c7 100644 --- a/lib/pleroma/akkoma/translators/argos_translate.ex +++ b/lib/pleroma/akkoma/translators/argos_translate.ex @@ -11,8 +11,8 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslate do Config.get([:argos_translate, :command_argospm]) end - defp fallback_language do - Config.get([:argos_translate, :fallback_language]) + defp strip_html? do + Config.get([:argos_translate, :strip_html]) end defp safe_languages() do @@ -85,23 +85,24 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslate do defp htmlify_response(string, _), do: string @impl Pleroma.Akkoma.Translator - def translate(string, from_language, to_language) do - strip_html = Config.get([:argos_translate, :strip_html]) + def translate(string, nil, to_language) do # Akkoma's Pleroma-fe expects us to detect the source language automatically. # Argos-translate doesn't have that option (yet?) # see - # For now we choose a fallback source language from settings. + # For now we return the text unchanged, supposedly translated from the target language. # Afterwards people get the option to overwrite the source language from a dropdown. - from_language = from_language || fallback_language() - to_language = to_language || fallback_language() + {:ok, to_language, string} + end + + def translate(string, from_language, to_language) do # Argos Translate doesn't properly translate HTML (yet?) # For now we give admins the option to strip the html before translating # Note that we have to add some html back to the response afterwards - string = clean_string(string, strip_html) + string = clean_string(string, strip_html?()) with {translated, 0} <- safe_translate(string, from_language, to_language) do - {:ok, from_language, translated |> htmlify_response(strip_html)} + {:ok, from_language, translated |> htmlify_response(strip_html?())} else {response, _} -> {:error, "ArgosTranslate failed to translate (#{response})"} end diff --git a/test/pleroma/akkoma/translators/argos_translate_test.exs b/test/pleroma/akkoma/translators/argos_translate_test.exs index 57368055a..977df1693 100644 --- a/test/pleroma/akkoma/translators/argos_translate_test.exs +++ b/test/pleroma/akkoma/translators/argos_translate_test.exs @@ -27,19 +27,11 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslateTest do assert [%{code: "en", name: "en"}, %{code: "nl", name: "nl"}] = dest_langs |> Enum.sort() end - test "it translates from default language when no language is set" do - translation_response = - with_mock System, [:passthrough], - cmd: fn "argos-translate_test", ["--from-lang", "en", "--to-lang", "fr", "blabla"], _ -> - {"yadayada", 0} - end do - ArgosTranslate.translate("blabla", nil, "fr") - end - - assert {:ok, "en", "yadayada"} = translation_response + test "it translates from the to language when no language is set and returns the text unchanged" do + assert {:ok, "nl", "blabla"} = ArgosTranslate.translate("blabla", nil, "nl") end - test "it translates from the provided language" do + test "it translates from the provided language if provided" do translation_response = with_mock System, [:passthrough], cmd: fn "argos-translate_test", ["--from-lang", "nl", "--to-lang", "en", "blabla"], _ -> -- 2.34.1 From d5b3d9104db7a6a805b1b31dfbd1b15bb4ad161c Mon Sep 17 00:00:00 2001 From: ilja Date: Sat, 17 Dec 2022 13:22:23 +0100 Subject: [PATCH 4/4] Code review * Add Changelog and description.exs * Also cleaned up some code that was left behind when still poking at things a bit --- CHANGELOG.md | 1 + config/description.exs | 25 +++++++++++++++++++ .../akkoma/translators/argos_translate.ex | 1 - 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dcbc4b14..efcfc6727 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Prometheus metrics exporting from `/api/v1/akkoma/metrics` - Ability to alter http pool size +- Translation of statuses via ArgosTranslate ### Removed - Non-finch HTTP adapters diff --git a/config/description.exs b/config/description.exs index eb61c7218..d7cce0ca1 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3443,5 +3443,30 @@ config :pleroma, :config_description, [ suggestion: [nil] } ] + }, + %{ + group: :pleroma, + key: :argos_translate, + type: :group, + description: "ArgosTranslate Settings.", + children: [ + %{ + key: :command_argos_translate, + type: :string, + description: "command for `argos-translate`. Can be the command if it's in your PATH, or the full path to the file.", + suggestion: ["argos-translate"] + }, + %{ + key: :command_argospm, + type: :string, + description: "command for `argospm`. Can be the command if it's in your PATH, or the full path to the file.", + suggestion: ["argospm"] + }, + %{ + key: :strip_html, + type: :boolean, + description: "Strip html from the post before translating it." + } + ] } ] diff --git a/lib/pleroma/akkoma/translators/argos_translate.ex b/lib/pleroma/akkoma/translators/argos_translate.ex index 9e19b14c7..dfec81d0a 100644 --- a/lib/pleroma/akkoma/translators/argos_translate.ex +++ b/lib/pleroma/akkoma/translators/argos_translate.ex @@ -31,7 +31,6 @@ defmodule Pleroma.Akkoma.Translators.ArgosTranslate do |> String.split("\n", trim: true) |> Enum.map(fn "translate-" <> l -> String.split(l, "_") - _ -> "" end) source_langs = -- 2.34.1