kmeans Update 1.2

- Added dependencies to info (adjusted template too) - Removed unnecessary global variables - Added commentary - Saved a few variables - Removed unnecessary libary 'numpy' and 'multiprocessing'
2018-06-01 03:15:33 +02:00
parent e1d794c006
commit 7ea392c302
2 changed files with 29 additions and 26 deletions
--- a/src/algorithms/kmeansMkI.py
+++ b/src/algorithms/kmeansMkI.py
@@ -4,9 +4,10 @@
 #author:			Tillmann Brendel, Conrad Großer
 #license:			Pending
 #date:				26.05.2018
-#version:			1.1
+#version:			1.2
 #usage:				python pyscript.py
 #notes:
 #dependencies:		mathplotlib
 #known_issues:
 #python_version:	3.x
 #==============================================================================
@@ -20,11 +21,7 @@ from datetime import date
 # For random generation of numbers import randint
 from random import randint
 # Importing libary for multi core processing
 import multiprocessing
 # Importing libaries for easy plotting
 import numpy as np
 import matplotlib.pyplot as plt
 # Importing own libaries Datamining Libary and Datamining Test
@@ -41,54 +38,59 @@ def kmeansmk1(data, clusters):
 	# Get max value in the data array
 	highPoint = dmlib.findHighest(data)
 	# Define variables for running the algorithm (runs is just for benchmarking!)
 	done = 0
 	runs = 0
 	# As long as calcClusters returns done it will rearange the clusters and assign the data to the clusters
 	while done == 0:
 		runs = runs + 1
 		new_data = assignCluster(data, highPoint, clusters)
-		calcClusters(new_data, clusters)
+		done = calcClusters(new_data, clusters)
 		for cluster in range(0, clusters):
 			#keeps the algorithm going until the central clusterpoint doesnt change anymore
 			if globals()["cpointchanged_" + str(cluster)] == 1:
 				done = 1
 	# Printing final clusters
 	for i in range(0, clusters):
 		print("Endcluster " + str(i + 1) + " is calculated to be at  " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
-
+	# Getting artificial array for visualizing 1D data in an 2D graphic of the size of the original data
 	# plotting the random data and the found clusters
 	anew = []
 	inew = 0
-	while inew < 1000:
+	while inew < len(data):
 		anew.append(inew)
 		inew = inew + 1
-	floatdata = [int(x) for x in data]
+
 	# Drawing found clusters as lines
 	for i in range(0, clusters):
 		plt.axvline(x=int(globals()["cpoint_" + str(i)]), color='r')
-	plt.scatter(floatdata, anew, marker='x', s=7, color='k')
+
 	# Showing graph
 	plt.scatter([int(x) for x in data], anew, marker='x', s=7, color='k')
 	plt.show()
 	return 0
 # Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
 def calcClusters(data, clusters):
 	changed = 0
 	for cluster in range(0, clusters):
-		globals()["cpointchanged_" + str(cluster)] = 0
+		# Getting current cluster and saving it in temporary variable
-		globals()["oldcpoint_" + str(cluster)] = globals()["cpoint_" + str(cluster)]
+		prev_cluster = globals()["cpoint_" + str(cluster)]
 		# Sum of the cluster to calculate average difference between cluster center and data points 
 		clustersum = 0
-		count = 0
+		item_count = 0
 		for item in range(0, len(data[0])):
 			if data[1][item] == globals()["cpoint_" + str(cluster)]:
 				clustersum = clustersum + int(data[0][item])
-				count = count + 1
+				item_count = item_count + 1
-		globals()["cpoint_" + str(cluster)] = round(clustersum / count)
+		globals()["cpoint_" + str(cluster)] = round(clustersum / item_count)
-		#checking if old clusterpoint is equal to the one just calculated
+		# Checking if previous clusterpoint is equal to the one just calculated
-		if globals()["oldcpoint_" + str(cluster)] == globals()["cpoint_" + str(cluster)]:
+		if prev_cluster == globals()["cpoint_" + str(cluster)]:
-			globals()["cpointchanged_" + str(cluster)] = 1
+			changed = 1
-	return 0
+
 	return changed
 def assignCluster(data, highPoint, clusters):
 	# Create a new data array for working
--- a/src/template.py
+++ b/src/template.py
@@ -7,6 +7,7 @@
 #version:			Versionnumber
 #usage:				Description of how to use the programm quickly
 #notes:				Notes for parameters, thanks (...)
 #dependencies:		Preinstalled packages
 #known_issues:		Known issues in this version
 #python_version:	Compatible (tested) python version
 #==============================================================================