From 1f851a07232e510e82e7b13400dfcb31cca555bb Mon Sep 17 00:00:00 2001 From: Maxim Filippov Date: Thu, 10 Jan 2019 18:09:56 +0000 Subject: [PATCH] Add Twitter Card parser --- lib/pleroma/web/rich_media/parser.ex | 2 +- .../rich_media/parsers/meta_tags_parser.ex | 30 +++++++++++++++++ lib/pleroma/web/rich_media/parsers/ogp.ex | 33 ++++--------------- .../web/rich_media/parsers/twitter_card.ex | 11 +++++++ test/fixtures/rich_media/twitter_card.html | 5 +++ test/web/rich_media/parser_test.exs | 18 ++++++++++ 6 files changed, 72 insertions(+), 27 deletions(-) create mode 100644 lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex create mode 100644 lib/pleroma/web/rich_media/parsers/twitter_card.ex create mode 100644 test/fixtures/rich_media/twitter_card.html diff --git a/lib/pleroma/web/rich_media/parser.ex b/lib/pleroma/web/rich_media/parser.ex index 18d9e2df5..fe092bf19 100644 --- a/lib/pleroma/web/rich_media/parser.ex +++ b/lib/pleroma/web/rich_media/parser.ex @@ -1,5 +1,5 @@ defmodule Pleroma.Web.RichMedia.Parser do - @parsers [Pleroma.Web.RichMedia.Parsers.OGP] + @parsers [Pleroma.Web.RichMedia.Parsers.OGP, Pleroma.Web.RichMedia.Parsers.TwitterCard] if Mix.env() == :test do def parse(url), do: parse_url(url) diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex new file mode 100644 index 000000000..4a7c5eae0 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -0,0 +1,30 @@ +defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do + def parse(html, data, prefix, error_message, key_name, value_name \\ "content") do + with elements = [_ | _] <- get_elements(html, key_name, prefix), + meta_data = + Enum.reduce(elements, data, fn el, acc -> + attributes = normalize_attributes(el, prefix, key_name, value_name) + + Map.merge(acc, attributes) + end) do + {:ok, meta_data} + else + _e -> {:error, error_message} + end + end + + defp get_elements(html, key_name, prefix) do + html |> Floki.find("meta[#{key_name}^='#{prefix}:']") + end + + defp normalize_attributes(html_node, prefix, key_name, value_name) do + {_tag, attributes, _children} = html_node + + data = + Enum.into(attributes, %{}, fn {name, value} -> + {name, String.trim_leading(value, "#{prefix}:")} + end) + + %{String.to_atom(data[key_name]) => data[value_name]} + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex index 5773a5263..0e1a0e719 100644 --- a/lib/pleroma/web/rich_media/parsers/ogp.ex +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -1,30 +1,11 @@ defmodule Pleroma.Web.RichMedia.Parsers.OGP do def parse(html, data) do - with elements = [_ | _] <- get_elements(html), - ogp_data = - Enum.reduce(elements, data, fn el, acc -> - attributes = normalize_attributes(el) - - Map.merge(acc, attributes) - end) do - {:ok, ogp_data} - else - _e -> {:error, "No OGP metadata found"} - end - end - - defp get_elements(html) do - html |> Floki.find("meta[property^='og:']") - end - - defp normalize_attributes(html_node) do - {_tag, attributes, _children} = html_node - - data = - Enum.into(attributes, %{}, fn {name, value} -> - {name, String.trim_leading(value, "og:")} - end) - - %{String.to_atom(data["property"]) => data["content"]} + Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse( + html, + data, + "og", + "No OGP metadata found", + "property" + ) end end diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex new file mode 100644 index 000000000..a317c3e78 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex @@ -0,0 +1,11 @@ +defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do + def parse(html, data) do + Pleroma.Web.RichMedia.Parsers.MetaTagsParser.parse( + html, + data, + "twitter", + "No twitter card metadata found", + "name" + ) + end +end diff --git a/test/fixtures/rich_media/twitter_card.html b/test/fixtures/rich_media/twitter_card.html new file mode 100644 index 000000000..34c7c6ccd --- /dev/null +++ b/test/fixtures/rich_media/twitter_card.html @@ -0,0 +1,5 @@ + + + + + diff --git a/test/web/rich_media/parser_test.exs b/test/web/rich_media/parser_test.exs index caf81e9fa..ff3486a6d 100644 --- a/test/web/rich_media/parser_test.exs +++ b/test/web/rich_media/parser_test.exs @@ -9,6 +9,12 @@ defmodule Pleroma.Web.RichMedia.ParserTest do } -> %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/ogp.html")} + %{ + method: :get, + url: "http://example.com/twitter-card" + } -> + %Tesla.Env{status: 200, body: File.read!("test/fixtures/rich_media/twitter_card.html")} + %{method: :get, url: "http://example.com/empty"} -> %Tesla.Env{status: 200, body: "hello"} end) @@ -30,4 +36,16 @@ test "parses ogp" do url: "http://www.imdb.com/title/tt0117500/" }} end + + test "parses twitter card" do + assert Pleroma.Web.RichMedia.Parser.parse("http://example.com/twitter-card") == + {:ok, + %{ + card: "summary", + site: "@flickr", + image: "https://farm6.staticflickr.com/5510/14338202952_93595258ff_z.jpg", + title: "Small Island Developing States Photo Submission", + description: "View the album on Flickr." + }} + end end