meta tag parser respect first title header

This commit is contained in:
Alexander Strizhakov 2020-01-28 19:29:27 +03:00
parent d9cb8acd3e
commit 7bd4c14581
No known key found for this signature in database
GPG key ID: 022896A53AEF1381
3 changed files with 2915 additions and 1 deletions

View file

@ -48,6 +48,6 @@ defp maybe_put_title(meta, html) when meta != %{} do
defp maybe_put_title(meta, _), do: meta defp maybe_put_title(meta, _), do: meta
defp get_page_title(html) do defp get_page_title(html) do
Floki.find(html, "title") |> Floki.text() Floki.find(html, "title") |> List.first() |> Floki.text()
end end
end end

File diff suppressed because it is too large Load diff

View file

@ -66,4 +66,23 @@ test "parses twitter card with name & property attributes" do
"https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html" "https://www.nytimes.com/2019/08/01/nyregion/nypd-facial-recognition-children-teenagers.html"
}} }}
end end
test "respect only first title tag on the page" do
image_path =
"https://assets.atlasobscura.com/media/W1siZiIsInVwbG9hZHMvYXNzZXRzLzkwYzgyMzI4LThlMDUtNGRiNS05MDg3LTUzMGUxZTM5N2RmMmVkOTM5ZDM4MGM4OTIx" <>
"YTQ5MF9EQVIgZXhodW1hdGlvbiBvZiBNYXJnYXJldCBDb3JiaW4gZ3JhdmUgMTkyNi5qcGciXSxbInAiLCJjb252ZXJ0IiwiIl0sWyJwIiwiY29udmVydCIsIi1xdWFsaXR5IDgxIC1hdXRvLW9" <>
"yaWVudCJdLFsicCIsInRodW1iIiwiNjAweD4iXV0/DAR%20exhumation%20of%20Margaret%20Corbin%20grave%201926.jpg"
html = File.read!("test/fixtures/margaret-corbin-grave-west-point.html")
assert TwitterCard.parse(html, %{}) ==
{:ok,
%{
site: "@atlasobscura",
title:
"The Missing Grave of Margaret Corbin, Revolutionary War Veteran - Atlas Obscura",
card: "summary_large_image",
image: image_path
}}
end
end end