Compare commits

...

4 commits

11 changed files with 251 additions and 27 deletions

View file

@ -282,8 +282,11 @@ config :pleroma, :welcome,
config :pleroma, :feed,
post_title: %{
# Set max_length to 0 to suppress titles (Dave Winer suggestion)
max_length: 100,
omission: "..."
omission: "...",
# New method to extract title
parse_source: false
}
config :pleroma, :markup,

View file

@ -2919,6 +2919,7 @@ config :pleroma, :config_description, [
%{
group: :pleroma,
key: :feed,
label: "RSS Feeds",
type: :group,
description: "Configure feed rendering",
children: [
@ -2938,7 +2939,13 @@ config :pleroma, :config_description, [
type: :string,
description: "Replacement which will be used after truncating string",
suggestions: ["..."]
}
},
%{
key: :parse_source,
type: :boolean,
description: "Use content type-specific parsers to extract title (ignores max_length)",
suggestions: [true]
},
]
}
]

View file

@ -150,15 +150,19 @@ defmodule Pleroma.Formatter do
|> Enum.join("")
end
def truncate(text, max_length \\ 200, omission \\ "...") do
def truncate(text, max_length \\ 200, omission \\ "...") when max_length >= 0 do
# Remove trailing whitespace
text = Regex.replace(~r/([^ \t\r\n])([ \t]+$)/u, text, "\\g{1}")
if String.length(text) < max_length do
text
else
length_with_omission = max_length - String.length(omission)
String.slice(text, 0, length_with_omission) <> omission
length_with_omission = max_length - String.length(omission)
cond do
String.length(text) <= max_length ->
text
length_with_omission > 0 ->
String.slice(text, 0, length_with_omission) <> omission
true ->
String.slice(text, 0, max_length)
end
end

View file

@ -12,6 +12,7 @@ defmodule Pleroma.Web.Feed.FeedView do
alias Pleroma.Web.Gettext
alias Pleroma.Web.MediaProxy
require Logger
require Pleroma.Constants
@spec pub_date(String.t() | DateTime.t()) :: String.t()
@ -69,7 +70,24 @@ defmodule Pleroma.Web.Feed.FeedView do
def last_activity(activities), do: List.last(activities)
def activity_title(%{"content" => content}, opts \\ %{}) do
def maybe_activity_title(activity, opts \\ %{}) do
case activity_title(activity, opts) do
"" -> ""
title -> "<title>#{title}</title>"
end
end
def activity_title(activity, opts \\ %{})
def activity_title(
%{"source" => %{"mediaType" => content_type, "content" => content}},
%{parse_source: true} = opts
) do
split_content(content, content_type, opts) |> elem(0)
end
# TODO: scrub_html should replace <p> with " "
def activity_title(%{"content" => content}, opts) do
content
|> Pleroma.Web.Metadata.Utils.scrub_html()
|> Pleroma.Emoji.Formatter.demojify()
@ -77,13 +95,36 @@ defmodule Pleroma.Web.Feed.FeedView do
|> escape()
end
def activity_content(%{"content" => content}) do
content
|> String.replace(~r/[\n\r]/, "")
def activity_title(_, _), do: ""
def activity_content(activity, opts \\ %{})
def activity_content(
%{"source" => %{"mediaType" => content_type, "content" => content}},
%{parse_source: true} = opts
) do
start = split_content(content, content_type, opts) |> elem(1)
length = String.length(content)
{text, _mentions, _tags} =
String.slice(content, start, length)
|> Pleroma.Web.CommonAPI.Utils.format_input(content_type)
text
|> String.replace(~r/(\r?\n)+/, " ")
|> String.trim()
|> escape()
end
def activity_content(_), do: ""
def activity_content(%{"content" => content}, _opts) do
# Replace 1 or more newlines with 1 space
content
|> String.replace(~r/(\r?\n)+/, " ")
|> String.trim()
|> escape()
end
def activity_content(_, _), do: ""
def activity_context(activity), do: escape(activity.data["context"])
@ -99,6 +140,117 @@ defmodule Pleroma.Web.Feed.FeedView do
|> Map.get("mediaType")
end
def source_content(%{"source" => %{"mediaType" => _, "content" => content}}) do
xml_escape(content)
end
def parse_title(activity, opts \\ %{})
def parse_title(%{"source" => %{"mediaType" => content_type, "content" => content}}, opts) do
split_content(content, content_type, opts) |> elem(0)
end
def parse_title(_activity, _opts), do: 0
def parse_description_offset(activity, opts \\ %{})
def parse_description_offset(
%{"source" => %{"mediaType" => content_type, "content" => content}},
opts
) do
split_content(content, content_type, opts) |> elem(1)
end
def parse_description_offset(_activity, _opts), do: 0
@spec split_content(binary(), binary(), any()) :: {binary(), non_neg_integer()}
def split_content(content, "text/html", _opts) do
case Regex.named_captures(
~r/^[ \t]*<(?<tag>h[12])([ \t][^>]+)?>(?<title_inner>[^<]+)<\/h[12](?<title_end>[ \t]*>)/im,
content,
return: :index
) do
%{
"title_inner" => {title_start, title_length},
"title_end" => {title_end_start, title_end_length}
} ->
title = String.slice(content, title_start, title_length) |> String.trim()
{title, title_end_start + title_end_length}
_ ->
Logger.error("No H1/H2 match")
{"", 0}
end
end
def split_content(content, "text/plain", opts) do
content
|> split_text_lines(opts)
|> case do
{_str, 0, 0} ->
{"", 0}
{str, te, ds} ->
{str
|> String.split_at(te)
|> elem(0)
|> String.trim(), ds}
end
end
def split_content(content, "text/bbcode", opts) do
content
|> split_text_lines(opts)
|> match_title(~r/\[b\](?<title>[^\[]+)\[\/b\]/)
end
def split_content(content, "text/markdown", opts) do
content
|> split_text_lines(opts)
|> match_title(~r/^[#]{1,2}[ \t](?<title>.+)/)
end
def split_content(content, "text/x.misskeymarkdown", opts) do
content
|> split_text_lines(opts)
|> match_title(~r/^\*\*(?<title>.+)\*\*/)
end
def split_content(_, _, _), do: {0, 0}
def split_text_lines(str, _opts) do
case String.split(str, ~r/\n/) do
[] ->
{str, 0, 0}
[_first, _rest] ->
{str, 0, 0}
[first | [second | _rest]] ->
title_end = String.trim_trailing(first) |> String.length()
description_start = String.length(first) + String.length(second)
{str, title_end, description_start}
end
end
def match_title({_str, 0, 0}, _regex), do: {"", 0}
def match_title({str, te, ds}, regex) do
maybe_title =
str
|> String.split_at(te)
|> elem(0)
|> String.trim()
case Regex.named_captures(regex, maybe_title) do
%{"title" => title} ->
{String.trim(title), ds}
_ ->
{"", 0}
end
end
def get_href(id) do
with %Object{data: %{"external_url" => external_url}} <- Object.get_cached_by_ap_id(id) do
external_url
@ -112,4 +264,37 @@ defmodule Pleroma.Web.Feed.FeedView do
|> html_escape()
|> safe_to_string()
end
def cdata(str), do: "<![CDATA[" <> str <> "]]>"
def xml_escape(nil), do: ""
def xml_escape(str) when is_binary(str) do
str
|> xml_escape_string()
|> to_string()
end
defp xml_escape_string(""), do: ""
defp xml_escape_string(<<"&"::utf8, rest::binary>>), do: xml_escape_entity(rest)
defp xml_escape_string(<<"<"::utf8, rest::binary>>), do: ["&lt;" | xml_escape_string(rest)]
defp xml_escape_string(<<">"::utf8, rest::binary>>), do: ["&gt;" | xml_escape_string(rest)]
defp xml_escape_string(<<"\t"::utf8, rest::binary>>), do: ["&#9;" | xml_escape_string(rest)]
defp xml_escape_string(<<"\n"::utf8, rest::binary>>), do: ["&#10;" | xml_escape_string(rest)]
defp xml_escape_string(<<"\r"::utf8, rest::binary>>), do: ["&#13;" | xml_escape_string(rest)]
defp xml_escape_string(<<"\""::utf8, rest::binary>>), do: ["&#22;" | xml_escape_string(rest)]
defp xml_escape_string(<<"\'"::utf8, rest::binary>>), do: ["&#39;" | xml_escape_string(rest)]
defp xml_escape_string(<<c::utf8, rest::binary>>), do: [c | xml_escape_string(rest)]
defp xml_escape_entity(<<"amp;"::utf8, rest::binary>>), do: ["&amp;" | xml_escape_string(rest)]
defp xml_escape_entity(<<"lt;"::utf8, rest::binary>>), do: ["&lt;" | xml_escape_string(rest)]
defp xml_escape_entity(<<"gt;"::utf8, rest::binary>>), do: ["&gt;" | xml_escape_string(rest)]
defp xml_escape_entity(<<"quot;"::utf8, rest::binary>>),
do: ["&quot;" | xml_escape_string(rest)]
defp xml_escape_entity(<<"apos;"::utf8, rest::binary>>),
do: ["&apos;" | xml_escape_string(rest)]
defp xml_escape_entity(rest), do: ["&amp;" | xml_escape_string(rest)]
end

View file

@ -42,8 +42,12 @@ defmodule Pleroma.Web.Metadata.Utils do
content
# html content comes from DB already encoded, decode first and scrub after
|> HtmlEntities.decode()
|> String.replace(~r/<br\s?\/?>/, " ")
|> String.replace(~r/<(br|p)[^>]*>/, "\\0&nbsp;")
|> String.replace(~r/<\/p\s*\/?>/, "&nbsp;\\0")
|> HTML.strip_tags()
# strip_tags will convert &nbsp; to U+00A0, adding /u will match these to " "
|> String.replace(~r/\s+/u, " ")
|> String.trim()
end
def scrub_html(content), do: content

View file

@ -2,8 +2,10 @@
<activity:object-type>http://activitystrea.ms/schema/1.0/note</activity:object-type>
<activity:verb>http://activitystrea.ms/schema/1.0/post</activity:verb>
<id><%= @data["id"] %></id>
<title><%= activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></title>
<content type="html"><%= activity_content(@data) %></content>
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<content type="html"><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></content>
<published><%= @activity.data["published"] %></published>
<updated><%= @activity.data["published"] %></updated>
<ostatus:conversation ref="<%= activity_context(@activity) %>">
@ -15,6 +17,13 @@
<summary><%= escape(@data["summary"]) %></summary>
<% end %>
<%= if !is_nil(get_in(@data, ["source", "mediaType"])) do %>
<source:contentType><%= get_in(@data, ["source", "mediaType"]) %></source:contentType>
<source:content><%= source_content(@data) %></source:content>
<source:title><%= parse_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></source:titleEnd>
<source:descriptionOffset><%= parse_description_offset(@data, Keyword.get(@feed_config, :post_title, %{})) %></source:descriptionStart>
<% end %>
<%= if @activity.local do %>
<link type="application/atom+xml" href='<%= @data["id"] %>' rel="self"/>
<link type="text/html" href='<%= @data["id"] %>' rel="alternate"/>

View file

@ -2,8 +2,10 @@
<activity:object-type>http://activitystrea.ms/schema/1.0/note</activity:object-type>
<activity:verb>http://activitystrea.ms/schema/1.0/post</activity:verb>
<guid><%= @data["id"] %></guid>
<title><%= activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></title>
<description><%= activity_content(@data) %></description>
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<description><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></description>
<pubDate><%= @activity.data["published"] %></pubDate>
<updated><%= @activity.data["published"] %></updated>
<ostatus:conversation ref="<%= activity_context(@activity) %>">
@ -11,7 +13,14 @@
</ostatus:conversation>
<%= if @data["summary"] do %>
<description><%= escape(@data["summary"]) %></description>
<masto:summary><%= escape(@data["summary"]) %></masto:summary>
<% end %>
<%= if !is_nil(get_in(@data, ["source", "mediaType"])) do %>
<source:contentType><%= get_in(@data, ["source", "mediaType"]) %></source:contentType>
<source:content><%= source_content(@data) %></source:content>
<source:title><%= parse_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></source:titleEnd>
<source:descriptionOffset><%= parse_description_offset(@data, Keyword.get(@feed_config, :post_title, %{})) %></source:descriptionStart>
<% end %>
<%= if @activity.local do %>

View file

@ -5,8 +5,10 @@
<%= render @view_module, "_tag_author.atom", assigns %>
<id><%= @data["id"] %></id>
<title><%= activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></title>
<content type="html"><%= activity_content(@data) %></content>
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<content type="html"><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></content>
<%= if @activity.local do %>
<link type="application/atom+xml" href='<%= @data["id"] %>' rel="self"/>

View file

@ -1,12 +1,11 @@
<item>
<title><%= activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %></title>
<guid isPermalink="true"><%= activity_context(@activity) %></guid>
<link><%= activity_context(@activity) %></link>
<pubDate><%= pub_date(@activity.data["published"]) %></pubDate>
<description><%= activity_content(@data) %></description>
<%= maybe_activity_title(@data, Keyword.get(@feed_config, :post_title, %{})) %>
<description><%= activity_content(@data, Keyword.get(@feed_config, :post_title, %{})) %></description>
<%= for attachment <- @data["attachment"] || [] do %>
<enclosure url="<%= attachment_href(attachment) %>" type="<%= attachment_type(attachment) %>"/>
<% end %>

View file

@ -7,7 +7,8 @@
xmlns:media="http://purl.org/syndication/atommedia"
xmlns:poco="http://portablecontacts.net/spec/1.0"
xmlns:ostatus="http://ostatus.org/schema/1.0"
xmlns:statusnet="http://status.net/schema/api/1/">
xmlns:statusnet="http://status.net/schema/api/1/"
xmlns:source="http://source.scripting.com/">
<id><%= '#{Routes.tag_feed_url(@conn, :feed, @tag)}.rss' %></id>
<title>#<%= @tag %></title>

View file

@ -4,7 +4,8 @@
xmlns:thr="http://purl.org/syndication/thread/1.0"
xmlns:activity="http://activitystrea.ms/spec/1.0/"
xmlns:poco="http://portablecontacts.net/spec/1.0"
xmlns:ostatus="http://ostatus.org/schema/1.0">
xmlns:ostatus="http://ostatus.org/schema/1.0"
xmlns:source="http://source.scripting.com/">
<id><%= Routes.user_feed_url(@conn, :feed, @user.nickname) <> ".atom" %></id>
<title><%= @user.nickname <> "'s timeline" %></title>