From 54f81b229159e84ad87f7acbe5b2110af2efed4b Mon Sep 17 00:00:00 2001 From: creyd Date: Tue, 5 Jun 2018 16:58:05 +0200 Subject: [PATCH] General Bugfixes - Rewrote certain numGen functions - Removed ++ functions --- src/algorithms/dmlib.py | 31 ++++--------------- src/algorithms/dmtest.py | 55 +++++++++------------------------- src/algorithms/kmeansMkI.py | 5 +--- src/algorithms/kmeansMkI_2d.py | 15 ++++------ 4 files changed, 26 insertions(+), 80 deletions(-) diff --git a/src/algorithms/dmlib.py b/src/algorithms/dmlib.py index c498b02..25d9453 100644 --- a/src/algorithms/dmlib.py +++ b/src/algorithms/dmlib.py @@ -1,27 +1,26 @@ -# Calculate the difference between two points giving the indexes of these xdata entries import math -def calcdiff(point1, point2, data): + +# Calculate the difference between two points giving the indexes of these xdata entries +def calcdiff(point1, point2): if int(point2) > int(point1): difference = int(point2) - int(point1) else: difference = int(point1) - int(point2) - # print("Datapoint: " + str(xdata[point1]) + " | Cluster: " + str(xdata[point2]) + " | Difference: " + str(difference)) - return betrag(difference) + return difference +# Calculate the difference between two points in 2D space def calcdiff2d(point1, point2): point1 = [int(i) for i in point1] point2 = [int(i) for i in point2] - difference = math.sqrt(((point2[0])-(point1[0]))**2+((point2[0])-(point1[0]))**2) + difference = math.sqrt(((point2[0]) - (point1[0])) ** 2 + ((point2[1]) - (point1[1])) ** 2) return betrag(difference) - # Get the absolute value of a number and returns it as int def betrag(number): if number < 0: number = int((-2 * number) / 2) return number - # Determine the highest int value in an array and returns is as an int def findHighest(data): maximum = 0 @@ -29,21 +28,3 @@ def findHighest(data): if int(data[i]) > maximum: maximum = int(data[i]) return maximum - -def pp_calcdiff(data, clusterpoint): - max_diff = 0 - new_cluster = 0 - for item in range(0,len(data)): - if calcdiff(data[item], clusterpoint) > max_diff: - max_diff = calcdiff(data[item], clusterpoint) - new_cluster = data[item] - return new_cluster - -def pp_calcdiff_2(data, clusterpoint, clusterpoint_2): - max_diff = 0 - new_cluster = 0 - for item in range(0,len(data)): - if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff: - max_diff = calcdiff(data[item], clusterpoint) - new_cluster = data[item] - return new_cluster diff --git a/src/algorithms/dmtest.py b/src/algorithms/dmtest.py index 37c7dde..8cff71d 100644 --- a/src/algorithms/dmtest.py +++ b/src/algorithms/dmtest.py @@ -1,21 +1,21 @@ # For random generation of numbers import randint from random import randint, shuffle -# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs -def plzGen(entries): +# Simple generator for test nums (40-40-20 biased), returns 1D array of nums +def numGenLight(entries, shuffle, num_lenght): dataArray = [] - plz_lenght = 5 for i in range(0, int(entries)): if i < round(entries * 0.4): - plz = generateNumber(plz_lenght, 2) + num = generateNumber(num_lenght, 2) elif i >= round(entries * 0.4) and i < round(entries * 0.6): - plz = generateNumber(plz_lenght, 9) + num = generateNumber(num_lenght, 9) elif i >= round(entries * 0.6) and i < round(entries * 0.9): - plz = generateNumber(plz_lenght, 4) + num = generateNumber(num_lenght, 4) else: - plz = generateNumber(plz_lenght, randint(0,9)) - dataArray.append(plz) - shuffle(dataArray) + num = generateNumber(num_lenght, randint(0,9)) + dataArray.append(num) + if shuffle: + shuffle(dataArray) return dataArray # Function for generating the content of one single row randomly @@ -34,7 +34,7 @@ def writeFile(content, nameChunkStart, namePartStart): file.write(content[w] + "\n") # Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters -def numGen(entries, cluster, int_lenght): +def numGen(entries, cluster, int_lenght, suffle_value): dataArray = [] clusterArray = [] @@ -48,35 +48,8 @@ def numGen(entries, cluster, int_lenght): else: cluster_decider = randint(0, cluster - 1) dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider])) - shuffle(dataArray) + + if suffle_value: + shuffle(dataArray) + return dataArray -# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs -def plzGenNS(entries): - dataArray = [] - plz_lenght = 5 - for i in range(0, int(entries)): - if i < round(entries * 0.4): - plz = generateNumber(plz_lenght, 2) - elif i >= round(entries * 0.4) and i < round(entries * 0.8): - plz = generateNumber(plz_lenght, 6) - else: - plz = generateNumber(plz_lenght, randint(0, 9)) - dataArray.append(plz) - #i had to remove shuffle for the connectrion (age ==> plz) to work, else we would have 4 clusters - # shuffle(dataArray) - return dataArray # - - -def ageGenNS(entries): - dataArray = [] - age_lenght = 2 - for i in range(0, int(entries)): - if i < round(entries * 0.4): - age = generateNumber(age_lenght, 2) - elif i >= round(entries * 0.4) and i < round(entries * 0.8): - age = generateNumber(age_lenght, 5) - else: - age = generateNumber(age_lenght, randint(0, 9)) - dataArray.append(age) - # shuffle(dataArray) - return dataArray diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index 89a42bc..b6928a9 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -135,8 +135,5 @@ def startup(data): print(str(seconds) + " seconds for execution") # Start the algorithm and generate test data -# data = dmtest.plzGen(10000) -# data = dmtest.numGen(10000, 3, 5) - -data = dmtest.numGen(10000, 8, 7) +data = dmtest.numGen(10000, 2, 5, True) startup(data) diff --git a/src/algorithms/kmeansMkI_2d.py b/src/algorithms/kmeansMkI_2d.py index 764e784..83de3ee 100644 --- a/src/algorithms/kmeansMkI_2d.py +++ b/src/algorithms/kmeansMkI_2d.py @@ -31,8 +31,6 @@ import matplotlib.pyplot as plt import dmlib import dmtest - - # CODE # Main function of the algorithm def kmeansmk1(xdata, ydata, clusters): @@ -63,6 +61,7 @@ def kmeansmk1(xdata, ydata, clusters): plt.plot(globals()["cpoint_" + str(i)][0], globals()["cpoint_" + str(i)][1], 'ro') plt.scatter([int(x) for x in xdata], [int(y) for y in ydata], marker='x', s=7, color='k') plt.show() + # Calculates middle values for each cluster, takes 2D array (item, assigned_cluster) def calcClusters(xdata, ydata, assigned_points, clusters): for cluster in range(0, clusters): @@ -86,7 +85,6 @@ def calcClusters(xdata, ydata, assigned_points, clusters): return cpointunchanged - def assignCluster(xdata, ydata, clusters, highpointx, highpointy): data_assigned = [] assigned_cluster = 0 @@ -103,15 +101,13 @@ def assignCluster(xdata, ydata, clusters, highpointx, highpointy): # print('cluster number ' + str(cluster) + ' assigned') data_assigned.append(assigned_cluster) # Add the assigned values list to the new_data array - #new_data.append(data_assigned) - + # new_data.append(data_assigned) return data_assigned - # Startup function for collecting necesarry xdata def startup(xdata, ydata): # Using two clusters for testing - clusters = int(input("How many clusters are known? (hint: 2) ")) + clusters = int(input("How many clusters are known? ")) # cores = input("How many cores should be used? ") # path = input("Where is the xdata? ") or in this case xdata @@ -125,9 +121,8 @@ def startup(xdata, ydata): seconds = time.time() - start_time print(str(seconds) + " seconds for execution") - # Start the algorithm and generate test xdata -xdata = dmtest.plzGenNS(1000) -ydata = dmtest.ageGenNS(1000) +xdata = dmtest.numGenLight(10000, False, 5) +ydata = dmtest.numGenLight(10000, False, 2) startup(xdata, ydata)