General Bugfixes
- Rewrote certain numGen functions - Removed ++ functions
This commit is contained in:
@@ -1,27 +1,26 @@
|
|||||||
# Calculate the difference between two points giving the indexes of these xdata entries
|
|
||||||
import math
|
import math
|
||||||
def calcdiff(point1, point2, data):
|
|
||||||
|
# Calculate the difference between two points giving the indexes of these xdata entries
|
||||||
|
def calcdiff(point1, point2):
|
||||||
if int(point2) > int(point1):
|
if int(point2) > int(point1):
|
||||||
difference = int(point2) - int(point1)
|
difference = int(point2) - int(point1)
|
||||||
else:
|
else:
|
||||||
difference = int(point1) - int(point2)
|
difference = int(point1) - int(point2)
|
||||||
# print("Datapoint: " + str(xdata[point1]) + " | Cluster: " + str(xdata[point2]) + " | Difference: " + str(difference))
|
return difference
|
||||||
return betrag(difference)
|
|
||||||
|
|
||||||
|
# Calculate the difference between two points in 2D space
|
||||||
def calcdiff2d(point1, point2):
|
def calcdiff2d(point1, point2):
|
||||||
point1 = [int(i) for i in point1]
|
point1 = [int(i) for i in point1]
|
||||||
point2 = [int(i) for i in point2]
|
point2 = [int(i) for i in point2]
|
||||||
difference = math.sqrt(((point2[0])-(point1[0]))**2+((point2[0])-(point1[0]))**2)
|
difference = math.sqrt(((point2[0]) - (point1[0])) ** 2 + ((point2[1]) - (point1[1])) ** 2)
|
||||||
return betrag(difference)
|
return betrag(difference)
|
||||||
|
|
||||||
|
|
||||||
# Get the absolute value of a number and returns it as int
|
# Get the absolute value of a number and returns it as int
|
||||||
def betrag(number):
|
def betrag(number):
|
||||||
if number < 0:
|
if number < 0:
|
||||||
number = int((-2 * number) / 2)
|
number = int((-2 * number) / 2)
|
||||||
return number
|
return number
|
||||||
|
|
||||||
|
|
||||||
# Determine the highest int value in an array and returns is as an int
|
# Determine the highest int value in an array and returns is as an int
|
||||||
def findHighest(data):
|
def findHighest(data):
|
||||||
maximum = 0
|
maximum = 0
|
||||||
@@ -29,21 +28,3 @@ def findHighest(data):
|
|||||||
if int(data[i]) > maximum:
|
if int(data[i]) > maximum:
|
||||||
maximum = int(data[i])
|
maximum = int(data[i])
|
||||||
return maximum
|
return maximum
|
||||||
|
|
||||||
def pp_calcdiff(data, clusterpoint):
|
|
||||||
max_diff = 0
|
|
||||||
new_cluster = 0
|
|
||||||
for item in range(0,len(data)):
|
|
||||||
if calcdiff(data[item], clusterpoint) > max_diff:
|
|
||||||
max_diff = calcdiff(data[item], clusterpoint)
|
|
||||||
new_cluster = data[item]
|
|
||||||
return new_cluster
|
|
||||||
|
|
||||||
def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
|
|
||||||
max_diff = 0
|
|
||||||
new_cluster = 0
|
|
||||||
for item in range(0,len(data)):
|
|
||||||
if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
|
|
||||||
max_diff = calcdiff(data[item], clusterpoint)
|
|
||||||
new_cluster = data[item]
|
|
||||||
return new_cluster
|
|
||||||
|
|||||||
@@ -1,21 +1,21 @@
|
|||||||
# For random generation of numbers import randint
|
# For random generation of numbers import randint
|
||||||
from random import randint, shuffle
|
from random import randint, shuffle
|
||||||
|
|
||||||
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
|
# Simple generator for test nums (40-40-20 biased), returns 1D array of nums
|
||||||
def plzGen(entries):
|
def numGenLight(entries, shuffle, num_lenght):
|
||||||
dataArray = []
|
dataArray = []
|
||||||
plz_lenght = 5
|
|
||||||
for i in range(0, int(entries)):
|
for i in range(0, int(entries)):
|
||||||
if i < round(entries * 0.4):
|
if i < round(entries * 0.4):
|
||||||
plz = generateNumber(plz_lenght, 2)
|
num = generateNumber(num_lenght, 2)
|
||||||
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
|
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
|
||||||
plz = generateNumber(plz_lenght, 9)
|
num = generateNumber(num_lenght, 9)
|
||||||
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
|
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
|
||||||
plz = generateNumber(plz_lenght, 4)
|
num = generateNumber(num_lenght, 4)
|
||||||
else:
|
else:
|
||||||
plz = generateNumber(plz_lenght, randint(0,9))
|
num = generateNumber(num_lenght, randint(0,9))
|
||||||
dataArray.append(plz)
|
dataArray.append(num)
|
||||||
shuffle(dataArray)
|
if shuffle:
|
||||||
|
shuffle(dataArray)
|
||||||
return dataArray
|
return dataArray
|
||||||
|
|
||||||
# Function for generating the content of one single row randomly
|
# Function for generating the content of one single row randomly
|
||||||
@@ -34,7 +34,7 @@ def writeFile(content, nameChunkStart, namePartStart):
|
|||||||
file.write(content[w] + "\n")
|
file.write(content[w] + "\n")
|
||||||
|
|
||||||
# Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters
|
# Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters
|
||||||
def numGen(entries, cluster, int_lenght):
|
def numGen(entries, cluster, int_lenght, suffle_value):
|
||||||
dataArray = []
|
dataArray = []
|
||||||
clusterArray = []
|
clusterArray = []
|
||||||
|
|
||||||
@@ -48,35 +48,8 @@ def numGen(entries, cluster, int_lenght):
|
|||||||
else:
|
else:
|
||||||
cluster_decider = randint(0, cluster - 1)
|
cluster_decider = randint(0, cluster - 1)
|
||||||
dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider]))
|
dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider]))
|
||||||
shuffle(dataArray)
|
|
||||||
|
if suffle_value:
|
||||||
|
shuffle(dataArray)
|
||||||
|
|
||||||
return dataArray
|
return dataArray
|
||||||
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
|
|
||||||
def plzGenNS(entries):
|
|
||||||
dataArray = []
|
|
||||||
plz_lenght = 5
|
|
||||||
for i in range(0, int(entries)):
|
|
||||||
if i < round(entries * 0.4):
|
|
||||||
plz = generateNumber(plz_lenght, 2)
|
|
||||||
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
|
|
||||||
plz = generateNumber(plz_lenght, 6)
|
|
||||||
else:
|
|
||||||
plz = generateNumber(plz_lenght, randint(0, 9))
|
|
||||||
dataArray.append(plz)
|
|
||||||
#i had to remove shuffle for the connectrion (age ==> plz) to work, else we would have 4 clusters
|
|
||||||
# shuffle(dataArray)
|
|
||||||
return dataArray #
|
|
||||||
|
|
||||||
|
|
||||||
def ageGenNS(entries):
|
|
||||||
dataArray = []
|
|
||||||
age_lenght = 2
|
|
||||||
for i in range(0, int(entries)):
|
|
||||||
if i < round(entries * 0.4):
|
|
||||||
age = generateNumber(age_lenght, 2)
|
|
||||||
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
|
|
||||||
age = generateNumber(age_lenght, 5)
|
|
||||||
else:
|
|
||||||
age = generateNumber(age_lenght, randint(0, 9))
|
|
||||||
dataArray.append(age)
|
|
||||||
# shuffle(dataArray)
|
|
||||||
return dataArray
|
|
||||||
|
|||||||
@@ -135,8 +135,5 @@ def startup(data):
|
|||||||
print(str(seconds) + " seconds for execution")
|
print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
# Start the algorithm and generate test data
|
# Start the algorithm and generate test data
|
||||||
# data = dmtest.plzGen(10000)
|
data = dmtest.numGen(10000, 2, 5, True)
|
||||||
# data = dmtest.numGen(10000, 3, 5)
|
|
||||||
|
|
||||||
data = dmtest.numGen(10000, 8, 7)
|
|
||||||
startup(data)
|
startup(data)
|
||||||
|
|||||||
@@ -31,8 +31,6 @@ import matplotlib.pyplot as plt
|
|||||||
import dmlib
|
import dmlib
|
||||||
import dmtest
|
import dmtest
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# CODE
|
# CODE
|
||||||
# Main function of the algorithm
|
# Main function of the algorithm
|
||||||
def kmeansmk1(xdata, ydata, clusters):
|
def kmeansmk1(xdata, ydata, clusters):
|
||||||
@@ -63,6 +61,7 @@ def kmeansmk1(xdata, ydata, clusters):
|
|||||||
plt.plot(globals()["cpoint_" + str(i)][0], globals()["cpoint_" + str(i)][1], 'ro')
|
plt.plot(globals()["cpoint_" + str(i)][0], globals()["cpoint_" + str(i)][1], 'ro')
|
||||||
plt.scatter([int(x) for x in xdata], [int(y) for y in ydata], marker='x', s=7, color='k')
|
plt.scatter([int(x) for x in xdata], [int(y) for y in ydata], marker='x', s=7, color='k')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
||||||
def calcClusters(xdata, ydata, assigned_points, clusters):
|
def calcClusters(xdata, ydata, assigned_points, clusters):
|
||||||
for cluster in range(0, clusters):
|
for cluster in range(0, clusters):
|
||||||
@@ -86,7 +85,6 @@ def calcClusters(xdata, ydata, assigned_points, clusters):
|
|||||||
|
|
||||||
return cpointunchanged
|
return cpointunchanged
|
||||||
|
|
||||||
|
|
||||||
def assignCluster(xdata, ydata, clusters, highpointx, highpointy):
|
def assignCluster(xdata, ydata, clusters, highpointx, highpointy):
|
||||||
data_assigned = []
|
data_assigned = []
|
||||||
assigned_cluster = 0
|
assigned_cluster = 0
|
||||||
@@ -103,15 +101,13 @@ def assignCluster(xdata, ydata, clusters, highpointx, highpointy):
|
|||||||
# print('cluster number ' + str(cluster) + ' assigned')
|
# print('cluster number ' + str(cluster) + ' assigned')
|
||||||
data_assigned.append(assigned_cluster)
|
data_assigned.append(assigned_cluster)
|
||||||
# Add the assigned values list to the new_data array
|
# Add the assigned values list to the new_data array
|
||||||
#new_data.append(data_assigned)
|
# new_data.append(data_assigned)
|
||||||
|
|
||||||
return data_assigned
|
return data_assigned
|
||||||
|
|
||||||
|
|
||||||
# Startup function for collecting necesarry xdata
|
# Startup function for collecting necesarry xdata
|
||||||
def startup(xdata, ydata):
|
def startup(xdata, ydata):
|
||||||
# Using two clusters for testing
|
# Using two clusters for testing
|
||||||
clusters = int(input("How many clusters are known? (hint: 2) "))
|
clusters = int(input("How many clusters are known? "))
|
||||||
# cores = input("How many cores should be used? ")
|
# cores = input("How many cores should be used? ")
|
||||||
# path = input("Where is the xdata? ") or in this case xdata
|
# path = input("Where is the xdata? ") or in this case xdata
|
||||||
|
|
||||||
@@ -125,9 +121,8 @@ def startup(xdata, ydata):
|
|||||||
seconds = time.time() - start_time
|
seconds = time.time() - start_time
|
||||||
print(str(seconds) + " seconds for execution")
|
print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
|
|
||||||
# Start the algorithm and generate test xdata
|
# Start the algorithm and generate test xdata
|
||||||
xdata = dmtest.plzGenNS(1000)
|
xdata = dmtest.numGenLight(10000, False, 5)
|
||||||
ydata = dmtest.ageGenNS(1000)
|
ydata = dmtest.numGenLight(10000, False, 2)
|
||||||
|
|
||||||
startup(xdata, ydata)
|
startup(xdata, ydata)
|
||||||
|
|||||||
Reference in New Issue
Block a user