General Bugfixes

- Rewrote certain numGen functions - Removed ++ functions
2018-06-05 16:58:05 +02:00
parent 667e7881cc
commit 54f81b2291
4 changed files with 26 additions and 80 deletions
--- a/src/algorithms/dmlib.py
+++ b/src/algorithms/dmlib.py
@@ -1,27 +1,26 @@
-# Calculate the difference between two points giving the indexes of these xdata entries
 import math
-def calcdiff(point1, point2, data):
+
+# Calculate the difference between two points giving the indexes of these xdata entries
+def calcdiff(point1, point2):
    if int(point2) > int(point1):
        difference = int(point2) - int(point1)
    else:
        difference = int(point1) - int(point2)
-    # print("Datapoint: " + str(xdata[point1]) + " | Cluster: " + str(xdata[point2]) + " | Difference: " + str(difference))
-    return betrag(difference)
+    return difference

+# Calculate the difference between two points in 2D space
 def calcdiff2d(point1, point2):
    point1 = [int(i) for i in point1]
    point2 = [int(i) for i in point2]
-    difference = math.sqrt(((point2[0])-(point1[0]))**2+((point2[0])-(point1[0]))**2)
+    difference = math.sqrt(((point2[0]) - (point1[0])) ** 2 + ((point2[1]) - (point1[1])) ** 2)
    return betrag(difference)

-
 # Get the absolute value of a number and returns it as int
 def betrag(number):
    if number < 0:
        number = int((-2 * number) / 2)
    return number

-
 # Determine the highest int value in an array and returns is as an int
 def findHighest(data):
    maximum = 0
@@ -29,21 +28,3 @@ def findHighest(data):
        if int(data[i]) > maximum:
            maximum = int(data[i])
    return maximum
-
-def pp_calcdiff(data, clusterpoint):
-	max_diff = 0
-	new_cluster = 0
-	for item in range(0,len(data)):
-		if calcdiff(data[item], clusterpoint) > max_diff:
-			max_diff = calcdiff(data[item], clusterpoint)
-			new_cluster = data[item]
-	return new_cluster
-
-def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
-	max_diff = 0
-	new_cluster = 0
-	for item in range(0,len(data)):
-		if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
-			max_diff = calcdiff(data[item], clusterpoint)
-			new_cluster = data[item]
-	return new_cluster
--- a/src/algorithms/dmtest.py
+++ b/src/algorithms/dmtest.py
@@ -1,21 +1,21 @@
 # For random generation of numbers import randint
 from random import randint, shuffle

-# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
-def plzGen(entries):
+# Simple generator for test nums (40-40-20 biased), returns 1D array of nums
+def numGenLight(entries, shuffle, num_lenght):
 	dataArray = []
-	plz_lenght = 5
 	for i in range(0, int(entries)):
 		if i < round(entries * 0.4):
-			plz = generateNumber(plz_lenght, 2)
+			num = generateNumber(num_lenght, 2)
 		elif i >= round(entries * 0.4) and i < round(entries * 0.6):
-			plz = generateNumber(plz_lenght, 9)
+			num = generateNumber(num_lenght, 9)
 		elif i >= round(entries * 0.6) and i < round(entries * 0.9):
-			plz = generateNumber(plz_lenght, 4)
+			num = generateNumber(num_lenght, 4)
 		else:
-			plz = generateNumber(plz_lenght, randint(0,9))
-		dataArray.append(plz)
-	shuffle(dataArray)
+			num = generateNumber(num_lenght, randint(0,9))
+		dataArray.append(num)
+	if shuffle:
+		shuffle(dataArray)
 	return dataArray

 # Function for generating the content of one single row randomly
@@ -34,7 +34,7 @@ def writeFile(content, nameChunkStart, namePartStart):
 		file.write(content[w] + "\n")

 # Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters
-def numGen(entries, cluster, int_lenght):
+def numGen(entries, cluster, int_lenght, suffle_value):
 	dataArray = []
 	clusterArray = []

@@ -48,35 +48,8 @@ def numGen(entries, cluster, int_lenght):
 		else:
 			cluster_decider = randint(0, cluster - 1)
 			dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider]))
-	shuffle(dataArray)
+
+	if suffle_value:
+		shuffle(dataArray)
+
 	return dataArray
-# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
-def plzGenNS(entries):
-    dataArray = []
-    plz_lenght = 5
-    for i in range(0, int(entries)):
-        if i < round(entries * 0.4):
-            plz = generateNumber(plz_lenght, 2)
-        elif i >= round(entries * 0.4) and i < round(entries * 0.8):
-            plz = generateNumber(plz_lenght, 6)
-        else:
-            plz = generateNumber(plz_lenght, randint(0, 9))
-        dataArray.append(plz)
-    #i had to remove shuffle for the connectrion (age ==> plz) to work, else we would have 4 clusters
-    # shuffle(dataArray)
-    return dataArray  #
-
-
-def ageGenNS(entries):
-    dataArray = []
-    age_lenght = 2
-    for i in range(0, int(entries)):
-        if i < round(entries * 0.4):
-            age = generateNumber(age_lenght, 2)
-        elif i >= round(entries * 0.4) and i < round(entries * 0.8):
-            age = generateNumber(age_lenght, 5)
-        else:
-            age = generateNumber(age_lenght, randint(0, 9))
-        dataArray.append(age)
-    # shuffle(dataArray)
-    return dataArray
--- a/src/algorithms/kmeansMkI.py
+++ b/src/algorithms/kmeansMkI.py
@@ -135,8 +135,5 @@ def startup(data):
 	print(str(seconds) + " seconds for execution")

 # Start the algorithm and generate test data
-# data = dmtest.plzGen(10000)
-# data = dmtest.numGen(10000, 3, 5)
-
-data = dmtest.numGen(10000, 8, 7)
+data = dmtest.numGen(10000, 2, 5, True)
 startup(data)
--- a/src/algorithms/kmeansMkI_2d.py
+++ b/src/algorithms/kmeansMkI_2d.py
@@ -31,8 +31,6 @@ import matplotlib.pyplot as plt
 import dmlib
 import dmtest

-
-
 # CODE
 # Main function of the algorithm
 def kmeansmk1(xdata, ydata, clusters):
@@ -63,6 +61,7 @@ def kmeansmk1(xdata, ydata, clusters):
        plt.plot(globals()["cpoint_" + str(i)][0], globals()["cpoint_" + str(i)][1], 'ro')
    plt.scatter([int(x) for x in xdata], [int(y) for y in ydata], marker='x', s=7, color='k')
    plt.show()
+
 # Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
 def calcClusters(xdata, ydata, assigned_points, clusters):
    for cluster in range(0, clusters):
@@ -86,7 +85,6 @@ def calcClusters(xdata, ydata, assigned_points, clusters):

    return cpointunchanged

-
 def assignCluster(xdata, ydata, clusters, highpointx, highpointy):
    data_assigned = []
    assigned_cluster = 0
@@ -103,15 +101,13 @@ def assignCluster(xdata, ydata, clusters, highpointx, highpointy):
       # print('cluster number ' + str(cluster) + ' assigned')
        data_assigned.append(assigned_cluster)
    # Add the assigned values list to the new_data array
-    #new_data.append(data_assigned)
-
+    # new_data.append(data_assigned)
    return data_assigned

-
 # Startup function for collecting necesarry xdata
 def startup(xdata, ydata):
    # Using two clusters for testing
-    clusters = int(input("How many clusters are known? (hint: 2) "))
+    clusters = int(input("How many clusters are known? "))
    # cores = input("How many cores should be used? ")
    # path = input("Where is the xdata? ") or in this case xdata

@@ -125,9 +121,8 @@ def startup(xdata, ydata):
    seconds = time.time() - start_time
    print(str(seconds) + " seconds for execution")

-
 # Start the algorithm and generate test xdata
-xdata = dmtest.plzGenNS(1000)
-ydata = dmtest.ageGenNS(1000)
+xdata = dmtest.numGenLight(10000, False, 5)
+ydata = dmtest.numGenLight(10000, False, 2)

 startup(xdata, ydata)