parse_source option to extract title

This commit is contained in:
Peter Zingg 2022-12-20 09:01:05 -08:00
parent c483c7dac4
commit 8b488899aa
7 changed files with 58 additions and 19 deletions

View file

@ -283,9 +283,10 @@
config :pleroma, :feed, config :pleroma, :feed,
post_title: %{ post_title: %{
# Set max_length to 0 to suppress titles (Dave Winer suggestion) # Set max_length to 0 to suppress titles (Dave Winer suggestion)
# Previously 100 max_length: 100,
max_length: 0, omission: "...",
omission: "..." # New method to extract title
parse_source: true
} }
config :pleroma, :markup, config :pleroma, :markup,

View file

@ -2938,7 +2938,13 @@
type: :string, type: :string,
description: "Replacement which will be used after truncating string", description: "Replacement which will be used after truncating string",
suggestions: ["..."] suggestions: ["..."]
} },
%{
key: :parse_source,
type: :boolean,
description: "Use content type-specific parsers to extract title (ignores max_length)",
suggestions: [true]
},
] ]
} }
] ]

View file

@ -70,14 +70,23 @@ def logo(user) do
def last_activity(activities), do: List.last(activities) def last_activity(activities), do: List.last(activities)
def maybe_activity_title(activity_data, opts \\ %{}) do def maybe_activity_title(activity, opts \\ %{}) do
case activity_title(activity_data, opts) do case activity_title(activity, opts) do
"" -> "" "" -> ""
title -> "<title>#{title}</title>" title -> "<title>#{title}</title>"
end end
end end
def activity_title(%{"content" => content}, opts \\ %{}) do def activity_title(activity, opts \\ %{})
def activity_title(
%{"source" => %{"mediaType" => content_type, "content" => content}},
%{parse_source: true} = opts
) do
split_content(content, content_type, opts) |> elem(0)
end
def activity_title(%{"content" => content}, opts) do
content content
|> Pleroma.Web.Metadata.Utils.scrub_html() |> Pleroma.Web.Metadata.Utils.scrub_html()
|> Pleroma.Emoji.Formatter.demojify() |> Pleroma.Emoji.Formatter.demojify()
@ -85,13 +94,36 @@ def activity_title(%{"content" => content}, opts \\ %{}) do
|> escape() |> escape()
end end
def activity_content(%{"content" => content}) do def activity_title(_, _), do: ""
content
|> String.replace(~r/[\n\r]/, "") def activity_content(activity, opts \\ %{})
def activity_content(
%{"source" => %{"mediaType" => content_type, "content" => content}},
%{parse_source: true} = opts
) do
start = split_content(content, content_type, opts) |> elem(1)
length = String.length(content)
{text, _mentions, _tags} =
String.slice(content, start, length)
|> Pleroma.Web.CommonAPI.Utils.format_input(content_type)
text
|> String.replace(~r/(\r?\n)+/, " ")
|> String.trim()
|> escape() |> escape()
end end
def activity_content(_), do: "" def activity_content(%{"content" => content}, _opts) do
# Replace 1 or more newlines with 1 space
content
|> String.replace(~r/(\r?\n)+/, " ")
|> String.trim()
|> escape()
end
def activity_content(_, _), do: ""
def activity_context(activity), do: escape(activity.data["context"]) def activity_context(activity), do: escape(activity.data["context"])
@ -111,15 +143,15 @@ def source_content(%{"source" => %{"mediaType" => _, "content" => content}}) do
xml_escape(content) xml_escape(content)
end end
def parse_title(activity_data, opts \\ %{}) def parse_title(activity, opts \\ %{})
def parse_title(%{"source" => %{"mediaType" => content_type, "content" => content}}, opts) do def parse_title(%{"source" => %{"mediaType" => content_type, "content" => content}}, opts) do
split_content(content, content_type, opts) |> elem(0) split_content(content, content_type, opts) |> elem(0)
end end
def parse_title(_activity_data, _opts), do: 0 def parse_title(_activity, _opts), do: 0
def parse_description_offset(activity_data, opts \\ %{}) def parse_description_offset(activity, opts \\ %{})
def parse_description_offset( def parse_description_offset(
%{"source" => %{"mediaType" => content_type, "content" => content}}, %{"source" => %{"mediaType" => content_type, "content" => content}},
@ -128,7 +160,7 @@ def parse_description_offset(
split_content(content, content_type, opts) |> elem(1) split_content(content, content_type, opts) |> elem(1)
end end
def parse_description_offset(_activity_data, _opts), do: 0 def parse_description_offset(_activity, _opts), do: 0
@spec split_content(binary(), binary(), any()) :: {binary(), non_neg_integer()} @spec split_content(binary(), binary(), any()) :: {binary(), non_neg_integer()}
def split_content(content, "text/html", _opts) do def split_content(content, "text/html", _opts) do

View file

@ -5,7 +5,7 @@
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %> <%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<content type="html"><%= activity_content(@data) %></content> <content type="html"><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></content>
<published><%= @activity.data["published"] %></published> <published><%= @activity.data["published"] %></published>
<updated><%= @activity.data["published"] %></updated> <updated><%= @activity.data["published"] %></updated>
<ostatus:conversation ref="<%= activity_context(@activity) %>"> <ostatus:conversation ref="<%= activity_context(@activity) %>">

View file

@ -5,7 +5,7 @@
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %> <%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<description><%= activity_content(@data) %></description> <description><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></description>
<pubDate><%= @activity.data["published"] %></pubDate> <pubDate><%= @activity.data["published"] %></pubDate>
<updated><%= @activity.data["published"] %></updated> <updated><%= @activity.data["published"] %></updated>
<ostatus:conversation ref="<%= activity_context(@activity) %>"> <ostatus:conversation ref="<%= activity_context(@activity) %>">

View file

@ -8,7 +8,7 @@
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %> <%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<content type="html"><%= activity_content(@data) %></content> <content type="html"><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></content>
<%= if @activity.local do %> <%= if @activity.local do %>
<link type="application/atom+xml" href='<%= @data["id"] %>' rel="self"/> <link type="application/atom+xml" href='<%= @data["id"] %>' rel="self"/>

View file

@ -5,7 +5,7 @@
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %> <%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<description><%= activity_content(@data) %></description> <description><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></description>
<%= for attachment <- @data["attachment"] || [] do %> <%= for attachment <- @data["attachment"] || [] do %>
<enclosure url="<%= attachment_href(attachment) %>" type="<%= attachment_type(attachment) %>"/> <enclosure url="<%= attachment_href(attachment) %>" type="<%= attachment_type(attachment) %>"/>
<% end %> <% end %>