From c6c73122b08898d7907855fffcdb4f1385aba938 Mon Sep 17 00:00:00 2001 From: Conrad Date: Sat, 26 May 2018 20:55:35 +0200 Subject: [PATCH] K-Means Step 1 - Added simple cluster assigning Co-Authored-By: tchemn --- src/algorithms/kmeansMkI.py | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index 9376c7f..a2c28f7 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -24,12 +24,53 @@ import multiprocessing # CODE # Main function of the algorithm -def kmeansmk1(clusters, data): - print("Sorting data into " + str(clusters) + " clusters.") +def kmeansmk1(data): + clusters = 2 + data_size = len(data) + + # Defining cluster points + for i in range(0, clusters): + globals()["cpoint_" + str(i)] = randint(0, data_size) + print("Cluster " + str(i) + ": " + str(data[globals()["cpoint_" + str(i)]])) + + data_assigned = [] + highPoint = findHighest(data) + + for item in range(0, data_size): + min_cluster = highPoint + for cluster in range(0, clusters): + clusternumber = globals()["cpoint_" + str(cluster)] + if min_cluster > calcdiff(item, clusternumber, data): + min_cluster = calcdiff(item, clusternumber, data) + assinged_cluster = clusternumber + data_assigned.append(assinged_cluster) + + # for item in range(0, data_size): + # print("Datapoint: " + str(data[item]) + " | Assigned cluster: " + str(data[data_assigned[item]])) + +def findHighest(data): + maximum = 0 + for i in range(0, len(data)): + if int(data[i]) > maximum: + maximum = int(data[i]) + return maximum + +def calcdiff(point1, point2, data): + if int(data[point2]) > int(data[point1]): + difference = int(data[point2]) - int(data[point1]) + else: + difference = int(data[point1]) - int(data[point2]) + # print("Datapoint: " + str(data[point1]) + " | Cluster: " + str(data[point2]) + " | Difference: " + str(difference)) + return betrag(difference) + +def betrag(zahl): + if zahl < 0: + zahl = int((-2 * zahl) / 2) + return zahl # Startup function for collecting necesarry data def startup(data): - clusters = int(input("How many clusters are known? ")) + # clusters = int(input("How many clusters are known? ")) # cores = input("How many cores should be used? ") # path = input("Where is the data? ") or in this case data @@ -37,17 +78,17 @@ def startup(data): start_time = time.time() # Firing up the engines! - kmeansmk1(clusters, data) + kmeansmk1(data) # kmeansmk1(clusters, cores, path) # Stopping benchmark seconds = time.time() - start_time - print(str(seconds) + " seconds for execution") + # print(str(seconds) + " seconds for execution") # Simple generator for test data def testgenerator(): dataArray = [] - for i in range(1,100): + for i in range(0,100): if i <= 20: plz = generatePLZ("09") elif i > 20 and i < 50: