From be9abb2cc5fc219ca49ac6b32afed3eac323bf7a Mon Sep 17 00:00:00 2001
From: William Pitcock <nenolod@dereferenced.org>
Date: Sat, 26 Jan 2019 14:55:12 +0000
Subject: [PATCH] html: add utility function to extract first URL from an
 object and cache the result

---
 lib/pleroma/html.ex | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
index f5c6e5033..fb602d6b6 100644
--- a/lib/pleroma/html.ex
+++ b/lib/pleroma/html.ex
@@ -58,6 +58,20 @@ defp generate_scrubber_signature(scrubbers) do
       "#{signature}#{to_string(scrubber)}"
     end)
   end
+
+  def extract_first_external_url(object, content) do
+    key = "URL|#{object.id}"
+
+    Cachex.fetch!(:scrubber_cache, key, fn _key ->
+      result =
+        content
+        |> Floki.filter_out("a.mention")
+        |> Floki.attribute("a", "href")
+        |> Enum.at(0)
+
+      {:commit, result}
+    end)
+  end
 end
 
 defmodule Pleroma.HTML.Scrubber.TwitterText do