kmeans Minor Bugfixes

- Added cluster to test generator - Added sample data to main
2018-06-02 23:30:07 +02:00
parent 7ea392c302
commit 4e3ceac4a9
4 changed files with 62 additions and 6 deletions
--- a/src/algorithms/dmlib.py
+++ b/src/algorithms/dmlib.py
@@ -1,5 +1,5 @@
 # Calculate the difference between two points giving the indexes of these data entries
-def calcdiff(point1, point2, data):
+def calcdiff(point1, point2):
 	if int(point2) > int(point1):
 		difference = int(point2) - int(point1)
 	else:
@@ -20,3 +20,21 @@ def findHighest(data):
 		if int(data[i]) > maximum:
 			maximum = int(data[i])
 	return maximum
+
+def pp_calcdiff(data, clusterpoint):
+	max_diff = 0
+	new_cluster = 0
+	for item in range(0,len(data)):
+		if calcdiff(data[item], clusterpoint) > max_diff:
+			max_diff = calcdiff(data[item], clusterpoint)
+			new_cluster = data[item]
+	return new_cluster
+
+def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
+	max_diff = 0
+	new_cluster = 0
+	for item in range(0,len(data)):
+		if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
+			max_diff = calcdiff(data[item], clusterpoint)
+			new_cluster = data[item]
+	return new_cluster
--- a/src/algorithms/dmtest.py
+++ b/src/algorithms/dmtest.py
@@ -8,8 +8,10 @@ def plzGen(entries):
 	for i in range(0, int(entries)):
 		if i < round(entries * 0.4):
 			plz = generateNumber(plz_lenght, 2)
-		elif i >= round(entries * 0.4) and i < round(entries * 0.8):
+		elif i >= round(entries * 0.4) and i < round(entries * 0.6):
 			plz = generateNumber(plz_lenght, 9)
+		elif i >= round(entries * 0.6) and i < round(entries * 0.9):
+			plz = generateNumber(plz_lenght, 4)
 		else:
 			plz = generateNumber(plz_lenght, randint(0,9))
 		dataArray.append(plz)
--- a/src/algorithms/kmeansMkI.py
+++ b/src/algorithms/kmeansMkI.py
@@ -107,8 +107,8 @@ def assignCluster(data, highPoint, clusters):

 		# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference 
 		for cluster in range(0, clusters):
-			if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0]):
-				min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0])
+			if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]):
+				min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)])
 				assinged_cluster = globals()["cpoint_" + str(cluster)]
 		# Assign the minimal difference cluster to the data
 		data_assigned.append(assinged_cluster)
@@ -135,6 +135,6 @@ def startup(data):
 	print(str(seconds) + " seconds for execution")

 # Start the algorithm and generate test data
-data = dmtest.plzGen(1000)
+data = dmtest.plzGen(10000)

 startup(data)
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+#title:				main.py
+#description:		
+#author:			Conrad Großer
+#license:			https://github.com/tchemn/miner/blob/master/LICENSE
+#date:				02.06.2018
+#version:			0.1
+#usage:				PENDING
+#notes:				
+#dependencies:		
+#known_issues:		
+#python_version:	3.x
+#==============================================================================
+
+# IMPORTS
+
+# Importing the time for benchmarking purposes
+import time
+from datetime import date
+
+# CODE (FUNCTIONS)
+
+
+# EXECUTION
+if __name__ == '__main__':
+	# Print welcoming message
+	print("Hello world")
+
+	# For benchmarking starting the timer now
+	start_time = time.time()
+
+	# Get parameters, call functions, execute program (...)
+
+# BENCHMARKING [END]
+sec = time.time() - start_time
+print("The program took " + str(sec) + " seconds (" + str(sec/60) + " minutes) for execution.")