From 8eada425be9a5f35c5546ee2a14cc606d33aed6f Mon Sep 17 00:00:00 2001 From: Ivanq Date: Fri, 26 Mar 2021 23:58:31 +0300 Subject: [PATCH] Add a script to find duplicates --- find-duplicates.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 find-duplicates.py diff --git a/find-duplicates.py b/find-duplicates.py new file mode 100644 index 00000000..033d1f0e --- /dev/null +++ b/find-duplicates.py @@ -0,0 +1,18 @@ +import os +from collections import defaultdict + + +file_name_by_link = defaultdict(list) + +for file_name in sorted(os.listdir("_data/signed")): + with open(f"_data/signed/{file_name}") as f: + contents = f.read().replace("\r", "") + link = next(line for line in contents.split("\n") if line.startswith("link:"))[5:].strip() + if link == "/#": + continue + file_name_by_link[link].append(file_name) + +for link, file_names in file_name_by_link.items(): + if len(file_names) == 1: + continue + print(link, "duplicates:", file_names)