From 5d121f66af7f67fd1261165ee574a52e539fc4d6 Mon Sep 17 00:00:00 2001 From: cynic Date: Thu, 14 Jul 2022 05:57:25 -0400 Subject: [PATCH] mean heuristic test --- .gitignore | 1 + main.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ test.py | 11 +++++++++++ 3 files changed, 56 insertions(+) create mode 100644 .gitignore create mode 100644 test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..16f2dc5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.csv \ No newline at end of file diff --git a/main.py b/main.py index 7d07b1f..de575ff 100644 --- a/main.py +++ b/main.py @@ -6,3 +6,47 @@ except FileNotFoundError: print("downloading dataset...") txt = get("https://the.silly.computer/creditcard.csv").text open("creditcard.csv", "w").write(txt) + +data = pandas.read_csv("creditcard.csv") +data['mean'] = data.mean(axis=1) + +fraud_set = data.loc[data["Class"] == 1] +legit_set = data.loc[data["Class"] == 0] + +good_heuristics = [] + +for col_name in fraud_set.columns: + fm = fraud_set[col_name].mean() + lm = legit_set[col_name].mean() + corr = 0 + incorr = 0 + for r in data.iterrows(): + if abs(r[1][col_name] - fm) < abs(r[1][col_name] - lm): + if r[1]["Class"] == 1: corr += 1 + else: incorr += 1 + elif abs(r[1][col_name] - fm) > abs(r[1][col_name] - lm): + if r[1]["Class"] == 0: corr += 1 + else: incorr += 1 + + print(col_name) + print(fm) + print(lm) + accuracy = corr/(corr+incorr) + print(accuracy) + if (accuracy > .98) and col_name != "Class": + print("good heuristic!") + good_heuristics.append({"name": col_name, "fraud_mean": fm, "legit_mean": lm}) + print("") + +print(good_heuristics) + +guessed_class = [] +for r in data.iterrows(): + good = True + for h in good_heuristics: + if abs(r[1][h["name"]] - h["fraud_mean"]) < abs(r[1][h["name"]] - h["legit_mean"]): + good = False + guessed_class.append(0 if good else 1) +data["guess"] = guessed_class +print(data.head(10)) +data.to_csv("woo.csv") \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..2b375ce --- /dev/null +++ b/test.py @@ -0,0 +1,11 @@ +import pandas +data = pandas.read_csv("creditcard.csv") +guessed_class = [] +for r in data.iterrows(): + good = True + if abs(r[1]["V17"] - -6.665836399449663) < abs(r[1]["V17"] - 0.01153506325212925): + good = False + guessed_class.append(0 if good else 1) +data["guess"] = guessed_class +print(data.head(10)) +data.to_csv("woo2.csv") \ No newline at end of file