kmeans Update 1.1
The Algorithm now automaticly ends when the center of the found clusters doesnt move anymore. Other changes: - repurposed the runs variable to be a counter instead of a user given value - the results are now displayed through the kmeansmkI function instead of the startup function - updated versionnumber to 1.1
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
#author: Tillmann Brendel, Conrad Großer
|
#author: Tillmann Brendel, Conrad Großer
|
||||||
#license: Pending
|
#license: Pending
|
||||||
#date: 26.05.2018
|
#date: 26.05.2018
|
||||||
#version: 1.0
|
#version: 1.1
|
||||||
#usage: python pyscript.py
|
#usage: python pyscript.py
|
||||||
#notes:
|
#notes:
|
||||||
#known_issues:
|
#known_issues:
|
||||||
@@ -29,7 +29,7 @@ import dmtest
|
|||||||
|
|
||||||
# CODE
|
# CODE
|
||||||
# Main function of the algorithm
|
# Main function of the algorithm
|
||||||
def kmeansmk1(data, clusters, runs):
|
def kmeansmk1(data, clusters):
|
||||||
# Defining cluster points
|
# Defining cluster points
|
||||||
for i in range(0, clusters):
|
for i in range(0, clusters):
|
||||||
globals()["cpoint_" + str(i)] = data[randint(0, len(data))]
|
globals()["cpoint_" + str(i)] = data[randint(0, len(data))]
|
||||||
@@ -37,16 +37,28 @@ def kmeansmk1(data, clusters, runs):
|
|||||||
|
|
||||||
# Get max value in the data array
|
# Get max value in the data array
|
||||||
highPoint = dmlib.findHighest(data)
|
highPoint = dmlib.findHighest(data)
|
||||||
|
done = 0
|
||||||
for run in range(0, runs):
|
runs = 0
|
||||||
|
while done == 0:
|
||||||
|
runs = runs + 1
|
||||||
new_data = assignCluster(data, highPoint, clusters)
|
new_data = assignCluster(data, highPoint, clusters)
|
||||||
calcClusters(new_data, clusters)
|
calcClusters(new_data, clusters)
|
||||||
|
for cluster in range(0, clusters):
|
||||||
|
|
||||||
|
#keeps the algorith going until the central clusterpoint doesnt change anymore
|
||||||
|
if globals()["cpointchanged_" + str(cluster)] == 1:
|
||||||
|
done = 1
|
||||||
|
|
||||||
|
# Printing final clusters
|
||||||
|
for i in range(0, clusters):
|
||||||
|
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
||||||
def calcClusters(data, clusters):
|
def calcClusters(data, clusters):
|
||||||
for cluster in range(0, clusters):
|
for cluster in range(0, clusters):
|
||||||
|
globals()["cpointchanged_" + str(cluster)] = 0
|
||||||
|
globals()["oldcpoint_" + str(cluster)] = globals()["cpoint_" + str(cluster)]
|
||||||
clustersum = 0
|
clustersum = 0
|
||||||
count = 0
|
count = 0
|
||||||
for item in range(0, len(data[0])):
|
for item in range(0, len(data[0])):
|
||||||
@@ -54,6 +66,10 @@ def calcClusters(data, clusters):
|
|||||||
clustersum = clustersum + int(data[0][item])
|
clustersum = clustersum + int(data[0][item])
|
||||||
count = count + 1
|
count = count + 1
|
||||||
globals()["cpoint_" + str(cluster)] = round(clustersum / count)
|
globals()["cpoint_" + str(cluster)] = round(clustersum / count)
|
||||||
|
|
||||||
|
#checking if old clusterpoint is equal to the one just calculated
|
||||||
|
if globals()["oldcpoint_" + str(cluster)] == globals()["cpoint_" + str(cluster)]:
|
||||||
|
globals()["cpointchanged_" + str(cluster)] = 1
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def assignCluster(data, highPoint, clusters):
|
def assignCluster(data, highPoint, clusters):
|
||||||
@@ -88,24 +104,16 @@ def startup(data):
|
|||||||
# cores = input("How many cores should be used? ")
|
# cores = input("How many cores should be used? ")
|
||||||
# path = input("Where is the data? ") or in this case data
|
# path = input("Where is the data? ") or in this case data
|
||||||
|
|
||||||
# runs = int(input("How many runs are sufficient? "))
|
|
||||||
runs = 500
|
|
||||||
|
|
||||||
# For benchmarking starting the timer now
|
# For benchmarking starting the timer now
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Firing up the engines!
|
# Firing up the engines!
|
||||||
kmeansmk1(data, clusters, runs)
|
kmeansmk1(data, clusters)
|
||||||
|
|
||||||
# Stopping benchmark
|
# Stopping benchmark
|
||||||
seconds = time.time() - start_time
|
seconds = time.time() - start_time
|
||||||
print(str(seconds) + " seconds for execution")
|
print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
# Printing final clusters
|
|
||||||
for i in range(0, clusters):
|
|
||||||
print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)]))
|
|
||||||
|
|
||||||
|
|
||||||
# Start the algorithm and generate test data
|
# Start the algorithm and generate test data
|
||||||
data = dmtest.plzGen(1000)
|
data = dmtest.plzGen(1000)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user