From 2755b2deb6dea4da01cd1030babdb174676a7dc2 Mon Sep 17 00:00:00 2001
From: Johann150 <johann.galle@protonmail.com>
Date: Mon, 1 May 2023 13:09:27 +0200
Subject: [PATCH] add script

---
 README.md          | 21 ++++++++++++++++++++-
 foundkey-survey.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 foundkey-survey.py

diff --git a/README.md b/README.md
index f1861b3..26dd158 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,22 @@
 # foundkey-survey
 
-small tool to "survey" foundkey instances' nodeinfo or similar
\ No newline at end of file
+Small tool to "survey" foundkey instances' nodeinfo or similar.
+This script has been used to collect usage information about enabled features in the past.
+It is currently programmed to collect information about integrations from the nodeinfo endpoint.
+
+The script will read a list of instances to check from the file `instances.csv`, and will consider the 1st column as the instances hostname.
+Other columns are ignored.
+Duplicates are only fetched the first time they are encountered.
+
+The script will output to a file called `survey.csv` (append only).
+The first column of the output will be the hostname again, and the other columns will be as configured.
+
+The script will not fetch twice evenif it crashes in the middle:
+If you start it and a file called `survey.csv` already exists, the hostnames that are already in that file (in the first column) will not be fetched again.
+This is handy when you did not write you collection code carefully enough which leads to it failing under some conditions.
+
+## CSV format
+
+The CSV files do not have headers.
+Fields are separated with commas.
+Fields are quoted with `"` (U+0022).
diff --git a/foundkey-survey.py b/foundkey-survey.py
new file mode 100644
index 0000000..5417a32
--- /dev/null
+++ b/foundkey-survey.py
@@ -0,0 +1,44 @@
+#!/bin/env python3
+
+import csv
+import requests
+
+# check which hostnames were fetched already
+f = open('survey.csv')
+reader = csv.reader(f, delimiter=',', quotechar='"')
+known_instances = set()
+
+for row in reader:
+    known_instances.add(row[0])
+
+# open survey.csv for appending now
+f = open('survey.csv', 'a', newline='')
+writer = csv.writer(f, delimiter=',', quotechar='"')
+
+# reader for instances to fetch
+f = open('instances.csv', mode='r+')
+reader = csv.reader(f, delimiter=',', quotechar='"')
+
+for row in reader:
+    instance = row[0]
+    if instance not in known_instances:
+        print("checking", instance)
+        # fetch nodeinfo data
+        try:
+            r = requests.get(
+                'https://{}/nodeinfo/2.1'.format(instance),
+                headers={'user-agent': 'foundkey instance survey'},
+                timeout=30
+            )
+        except:
+            print("dead")
+            continue
+        if not r.status_code == 200:
+        	print("skipping, status {}".format(r.status_code))
+        	continue
+        body = r.json()['metadata']
+        # select some data from nodeinfo and write it to resuling CSV file
+        writer.writerow([instance, body['enableDiscordIntegration'], body['enableGithubIntegration'], body['enableTwitterIntegration']])
+        print("done")
+    # make sure duplicates are not fetched
+    known_instances.add(instance)