Merge OGP parser with TwitterCard

This commit is contained in:
Egor Kislitsyn 2020-06-11 17:57:31 +04:00
parent 7aa6c82937
commit 1f35acce54
No known key found for this signature in database
GPG Key ID: 1B49CB15B71E7805
9 changed files with 90 additions and 93 deletions

View File

@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [unreleased]
### Changed
- OGP rich media parser merged with TwitterCard
<details>
<summary>API Changes</summary>
- **Breaking:** Emoji API: changed methods and renamed routes.

View File

@ -385,7 +385,6 @@ config :pleroma, :rich_media,
ignore_tld: ["local", "localdomain", "lan"],
parsers: [
Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.OEmbed
],
ttl_setters: [Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl]

View File

@ -2091,9 +2091,7 @@ config :pleroma, :config_description, [
description:
"List of Rich Media parsers. Module names are shortened (removed leading `Pleroma.Web.RichMedia.Parsers.` part), but on adding custom module you need to use full name.",
suggestions: [
Pleroma.Web.RichMedia.Parsers.MetaTagsParser,
Pleroma.Web.RichMedia.Parsers.OEmbed,
Pleroma.Web.RichMedia.Parsers.OGP,
Pleroma.Web.RichMedia.Parsers.TwitterCard
]
},

View File

@ -105,8 +105,8 @@ defmodule Pleroma.Web.RichMedia.Parser do
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
case parser.parse(html, acc) do
{:ok, data} -> {:halt, data}
{:error, _msg} -> {:cont, acc}
data when data != %{} -> {:halt, data}
_ -> {:cont, acc}
end
end)
end

View File

@ -3,8 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do
meta_data =
def parse(data, html, prefix, key_name, value_name \\ "content") do
html
|> get_elements(key_name, prefix)
|> Enum.reduce(data, fn el, acc ->
@ -13,12 +12,6 @@ defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
Map.merge(acc, attributes)
end)
|> maybe_put_title(html)
if Enum.empty?(meta_data) do
{:error, error_message}
else
{:ok, meta_data}
end
end
defp get_elements(html, key_name, prefix) do

View File

@ -7,9 +7,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
with elements = [_ | _] <- get_discovery_data(html),
{:ok, oembed_url} <- get_oembed_url(elements),
{:ok, oembed_data} <- get_oembed_data(oembed_url) do
{:ok, oembed_data}
oembed_data
else
_e -> {:error, "No OEmbed data found"}
_e -> %{}
end
end

View File

@ -5,10 +5,9 @@
defmodule Pleroma.Web.RichMedia.Parsers.OGP do
def parse(html, data) do
Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse(
html,
data,
html,
"og",
"No OGP metadata found",
"property"
)
end

View File

@ -5,18 +5,11 @@
defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
@spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
@spec parse(list(), map()) :: map()
def parse(html, data) do
data
|> parse_name_attrs(html)
|> parse_property_attrs(html)
end
defp parse_name_attrs(data, html) do
MetaTagsParser.parse(html, data, "twitter", %{}, "name")
end
defp parse_property_attrs({_, data}, html) do
MetaTagsParser.parse(html, data, "twitter", "No twitter card metadata found", "property")
|> MetaTagsParser.parse(html, "og", "property")
|> MetaTagsParser.parse(html, "twitter", "name")
|> MetaTagsParser.parse(html, "twitter", "property")
end
end

View File

@ -7,8 +7,7 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
alias Pleroma.Web.RichMedia.Parsers.TwitterCard
test "returns error when html not contains twitter card" do
assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) ==
{:error, "No twitter card metadata found"}
assert TwitterCard.parse([{"html", [], [{"head", [], []}, {"body", [], []}]}], %{}) == %{}
end
test "parses twitter card with only name attributes" do
@ -17,15 +16,21 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
"app:id:googleplay": "com.nytimes.android",
"app:name:googleplay": "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622",
site: nil,
description:
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image:
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
type: "article",
url:
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times"
}}
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database."
}
end
test "parses twitter card with only property attributes" do
@ -34,7 +39,6 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
card: "summary_large_image",
description:
@ -45,8 +49,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
url:
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}}
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
type: "article"
}
end
test "parses twitter card with name & property attributes" do
@ -55,7 +60,6 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
"app:id:googleplay": "com.nytimes.android",
"app:name:googleplay": "NYTimes",
@ -70,8 +74,9 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
url:
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}}
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html",
type: "article"
}
end
test "respect only first title tag on the page" do
@ -84,14 +89,17 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
File.read!("test/fixtures/margaret-corbin-grave-west-point.html") |> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
site: "@atlasobscura",
title:
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
title: "The Missing Grave of Margaret Corbin, Revolutionary War Veteran",
card: "summary_large_image",
image: image_path
}}
image: image_path,
description:
"She's the only woman veteran honored with a monument at West Point. But where was she buried?",
site_name: "Atlas Obscura",
type: "article",
url: "http://www.atlasobscura.com/articles/margaret-corbin-grave-west-point"
}
end
test "takes first founded title in html head if there is html markup error" do
@ -100,14 +108,20 @@ defmodule Pleroma.Web.RichMedia.Parsers.TwitterCardTest do
|> Floki.parse_document!()
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
site: nil,
title:
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database. - The New York Times",
"She Was Arrested at 14. Then Her Photo Went to a Facial Recognition Database.",
"app:id:googleplay": "com.nytimes.android",
"app:name:googleplay": "NYTimes",
"app:url:googleplay": "nytimes://reader/id/100000006583622"
}}
"app:url:googleplay": "nytimes://reader/id/100000006583622",
description:
"With little oversight, the N.Y.P.D. has been using powerful surveillance technology on photos of children and teenagers.",
image:
"https://static01.nyt.com/images/2019/08/01/nyregion/01nypd-juveniles-promo/01nypd-juveniles-promo-facebookJumbo.jpg",
type: "article",
url:
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}
end
end