kmeans Update 1.1

The Algorithm now automaticly ends  when the center of the found clusters doesnt move anymore.

Other changes:
- repurposed the runs variable to be a counter instead of a user given value
- the results are now displayed through the kmeansmkI function instead of the startup function
- updated versionnumber to 1.1
This commit is contained in:
tchemn
2018-05-31 18:24:53 +02:00
parent 70cd6c5939
commit faa113f05e

View File

@@ -4,7 +4,7 @@
#author: Tillmann Brendel, Conrad Großer
#license: Pending
#date: 26.05.2018
#version: 1.0
#version: 1.1
#usage: python pyscript.py
#notes:
#known_issues:
@@ -29,7 +29,7 @@ import dmtest
# CODE
# Main function of the algorithm
def kmeansmk1(data, clusters, runs):
def kmeansmk1(data, clusters):
# Defining cluster points
for i in range(0, clusters):
globals()["cpoint_" + str(i)] = data[randint(0, len(data))]
@@ -37,16 +37,28 @@ def kmeansmk1(data, clusters, runs):
# Get max value in the data array
highPoint = dmlib.findHighest(data)
for run in range(0, runs):
done = 0
runs = 0
while done == 0:
runs = runs + 1
new_data = assignCluster(data, highPoint, clusters)
calcClusters(new_data, clusters)
for cluster in range(0, clusters):
#keeps the algorith going until the central clusterpoint doesnt change anymore
if globals()["cpointchanged_" + str(cluster)] == 1:
done = 1
# Printing final clusters
for i in range(0, clusters):
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
return 0
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
def calcClusters(data, clusters):
for cluster in range(0, clusters):
globals()["cpointchanged_" + str(cluster)] = 0
globals()["oldcpoint_" + str(cluster)] = globals()["cpoint_" + str(cluster)]
clustersum = 0
count = 0
for item in range(0, len(data[0])):
@@ -54,6 +66,10 @@ def calcClusters(data, clusters):
clustersum = clustersum + int(data[0][item])
count = count + 1
globals()["cpoint_" + str(cluster)] = round(clustersum / count)
#checking if old clusterpoint is equal to the one just calculated
if globals()["oldcpoint_" + str(cluster)] == globals()["cpoint_" + str(cluster)]:
globals()["cpointchanged_" + str(cluster)] = 1
return 0
def assignCluster(data, highPoint, clusters):
@@ -87,25 +103,17 @@ def startup(data):
clusters = int(input("How many clusters are known? "))
# cores = input("How many cores should be used? ")
# path = input("Where is the data? ") or in this case data
# runs = int(input("How many runs are sufficient? "))
runs = 500
# For benchmarking starting the timer now
start_time = time.time()
# Firing up the engines!
kmeansmk1(data, clusters, runs)
kmeansmk1(data, clusters)
# Stopping benchmark
seconds = time.time() - start_time
print(str(seconds) + " seconds for execution")
# Printing final clusters
for i in range(0, clusters):
print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)]))
# Start the algorithm and generate test data
data = dmtest.plzGen(1000)