From faa113f05eeb86deb3c6469017ef8fea2e47800d Mon Sep 17 00:00:00 2001 From: tchemn Date: Thu, 31 May 2018 18:24:53 +0200 Subject: [PATCH] kmeans Update 1.1 The Algorithm now automaticly ends when the center of the found clusters doesnt move anymore. Other changes: - repurposed the runs variable to be a counter instead of a user given value - the results are now displayed through the kmeansmkI function instead of the startup function - updated versionnumber to 1.1 --- src/algorithms/kmeansMkI.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index a7acacf..2334a72 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -4,7 +4,7 @@ #author: Tillmann Brendel, Conrad Großer #license: Pending #date: 26.05.2018 -#version: 1.0 +#version: 1.1 #usage: python pyscript.py #notes: #known_issues: @@ -29,7 +29,7 @@ import dmtest # CODE # Main function of the algorithm -def kmeansmk1(data, clusters, runs): +def kmeansmk1(data, clusters): # Defining cluster points for i in range(0, clusters): globals()["cpoint_" + str(i)] = data[randint(0, len(data))] @@ -37,16 +37,28 @@ def kmeansmk1(data, clusters, runs): # Get max value in the data array highPoint = dmlib.findHighest(data) - - for run in range(0, runs): + done = 0 + runs = 0 + while done == 0: + runs = runs + 1 new_data = assignCluster(data, highPoint, clusters) calcClusters(new_data, clusters) + for cluster in range(0, clusters): + #keeps the algorith going until the central clusterpoint doesnt change anymore + if globals()["cpointchanged_" + str(cluster)] == 1: + done = 1 + + # Printing final clusters + for i in range(0, clusters): + print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs") return 0 # Calculates middle values for each cluster, takes 2D array (item, assigned_cluster) def calcClusters(data, clusters): for cluster in range(0, clusters): + globals()["cpointchanged_" + str(cluster)] = 0 + globals()["oldcpoint_" + str(cluster)] = globals()["cpoint_" + str(cluster)] clustersum = 0 count = 0 for item in range(0, len(data[0])): @@ -54,6 +66,10 @@ def calcClusters(data, clusters): clustersum = clustersum + int(data[0][item]) count = count + 1 globals()["cpoint_" + str(cluster)] = round(clustersum / count) + + #checking if old clusterpoint is equal to the one just calculated + if globals()["oldcpoint_" + str(cluster)] == globals()["cpoint_" + str(cluster)]: + globals()["cpointchanged_" + str(cluster)] = 1 return 0 def assignCluster(data, highPoint, clusters): @@ -87,25 +103,17 @@ def startup(data): clusters = int(input("How many clusters are known? ")) # cores = input("How many cores should be used? ") # path = input("Where is the data? ") or in this case data - - # runs = int(input("How many runs are sufficient? ")) - runs = 500 # For benchmarking starting the timer now start_time = time.time() # Firing up the engines! - kmeansmk1(data, clusters, runs) + kmeansmk1(data, clusters) # Stopping benchmark seconds = time.time() - start_time print(str(seconds) + " seconds for execution") - # Printing final clusters - for i in range(0, clusters): - print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)])) - - # Start the algorithm and generate test data data = dmtest.plzGen(1000)