add script

2023-05-01 13:09:27 +02:00 · 2023-05-01 13:09:27 +02:00 · 2755b2deb6
commit 2755b2deb6
parent 1de39a148a
2 changed files with 64 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,22 @@
 # foundkey-survey

-small tool to "survey" foundkey instances' nodeinfo or similar
+Small tool to "survey" foundkey instances' nodeinfo or similar.
+This script has been used to collect usage information about enabled features in the past.
+It is currently programmed to collect information about integrations from the nodeinfo endpoint.
+
+The script will read a list of instances to check from the file `instances.csv`, and will consider the 1st column as the instances hostname.
+Other columns are ignored.
+Duplicates are only fetched the first time they are encountered.
+
+The script will output to a file called `survey.csv` (append only).
+The first column of the output will be the hostname again, and the other columns will be as configured.
+
+The script will not fetch twice evenif it crashes in the middle:
+If you start it and a file called `survey.csv` already exists, the hostnames that are already in that file (in the first column) will not be fetched again.
+This is handy when you did not write you collection code carefully enough which leads to it failing under some conditions.
+
+## CSV format
+
+The CSV files do not have headers.
+Fields are separated with commas.
+Fields are quoted with `"` (U+0022).
--- a/foundkey-survey.py
+++ b/foundkey-survey.py
@ -0,0 +1,44 @@
+#!/bin/env python3
+
+import csv
+import requests
+
+# check which hostnames were fetched already
+f = open('survey.csv')
+reader = csv.reader(f, delimiter=',', quotechar='"')
+known_instances = set()
+
+for row in reader:
+    known_instances.add(row[0])
+
+# open survey.csv for appending now
+f = open('survey.csv', 'a', newline='')
+writer = csv.writer(f, delimiter=',', quotechar='"')
+
+# reader for instances to fetch
+f = open('instances.csv', mode='r+')
+reader = csv.reader(f, delimiter=',', quotechar='"')
+
+for row in reader:
+    instance = row[0]
+    if instance not in known_instances:
+        print("checking", instance)
+        # fetch nodeinfo data
+        try:
+            r = requests.get(
+                'https://{}/nodeinfo/2.1'.format(instance),
+                headers={'user-agent': 'foundkey instance survey'},
+                timeout=30
+            )
+        except:
+            print("dead")
+            continue
+        if not r.status_code == 200:
+        	print("skipping, status {}".format(r.status_code))
+        	continue
+        body = r.json()['metadata']
+        # select some data from nodeinfo and write it to resuling CSV file
+        writer.writerow([instance, body['enableDiscordIntegration'], body['enableGithubIntegration'], body['enableTwitterIntegration']])
+        print("done")
+    # make sure duplicates are not fetched
+    known_instances.add(instance)