From 2755b2deb6dea4da01cd1030babdb174676a7dc2 Mon Sep 17 00:00:00 2001 From: Johann150 Date: Mon, 1 May 2023 13:09:27 +0200 Subject: [PATCH] add script --- README.md | 21 ++++++++++++++++++++- foundkey-survey.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 foundkey-survey.py diff --git a/README.md b/README.md index f1861b3..26dd158 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,22 @@ # foundkey-survey -small tool to "survey" foundkey instances' nodeinfo or similar \ No newline at end of file +Small tool to "survey" foundkey instances' nodeinfo or similar. +This script has been used to collect usage information about enabled features in the past. +It is currently programmed to collect information about integrations from the nodeinfo endpoint. + +The script will read a list of instances to check from the file `instances.csv`, and will consider the 1st column as the instances hostname. +Other columns are ignored. +Duplicates are only fetched the first time they are encountered. + +The script will output to a file called `survey.csv` (append only). +The first column of the output will be the hostname again, and the other columns will be as configured. + +The script will not fetch twice evenif it crashes in the middle: +If you start it and a file called `survey.csv` already exists, the hostnames that are already in that file (in the first column) will not be fetched again. +This is handy when you did not write you collection code carefully enough which leads to it failing under some conditions. + +## CSV format + +The CSV files do not have headers. +Fields are separated with commas. +Fields are quoted with `"` (U+0022). diff --git a/foundkey-survey.py b/foundkey-survey.py new file mode 100644 index 0000000..5417a32 --- /dev/null +++ b/foundkey-survey.py @@ -0,0 +1,44 @@ +#!/bin/env python3 + +import csv +import requests + +# check which hostnames were fetched already +f = open('survey.csv') +reader = csv.reader(f, delimiter=',', quotechar='"') +known_instances = set() + +for row in reader: + known_instances.add(row[0]) + +# open survey.csv for appending now +f = open('survey.csv', 'a', newline='') +writer = csv.writer(f, delimiter=',', quotechar='"') + +# reader for instances to fetch +f = open('instances.csv', mode='r+') +reader = csv.reader(f, delimiter=',', quotechar='"') + +for row in reader: + instance = row[0] + if instance not in known_instances: + print("checking", instance) + # fetch nodeinfo data + try: + r = requests.get( + 'https://{}/nodeinfo/2.1'.format(instance), + headers={'user-agent': 'foundkey instance survey'}, + timeout=30 + ) + except: + print("dead") + continue + if not r.status_code == 200: + print("skipping, status {}".format(r.status_code)) + continue + body = r.json()['metadata'] + # select some data from nodeinfo and write it to resuling CSV file + writer.writerow([instance, body['enableDiscordIntegration'], body['enableGithubIntegration'], body['enableTwitterIntegration']]) + print("done") + # make sure duplicates are not fetched + known_instances.add(instance)