K-Means Step 1
- Added simple cluster assigning Co-Authored-By: tchemn <tchemn@users.noreply.github.com>
This commit is contained in:
@@ -24,12 +24,53 @@ import multiprocessing
|
|||||||
|
|
||||||
# CODE
|
# CODE
|
||||||
# Main function of the algorithm
|
# Main function of the algorithm
|
||||||
def kmeansmk1(clusters, data):
|
def kmeansmk1(data):
|
||||||
print("Sorting data into " + str(clusters) + " clusters.")
|
clusters = 2
|
||||||
|
data_size = len(data)
|
||||||
|
|
||||||
|
# Defining cluster points
|
||||||
|
for i in range(0, clusters):
|
||||||
|
globals()["cpoint_" + str(i)] = randint(0, data_size)
|
||||||
|
print("Cluster " + str(i) + ": " + str(data[globals()["cpoint_" + str(i)]]))
|
||||||
|
|
||||||
|
data_assigned = []
|
||||||
|
highPoint = findHighest(data)
|
||||||
|
|
||||||
|
for item in range(0, data_size):
|
||||||
|
min_cluster = highPoint
|
||||||
|
for cluster in range(0, clusters):
|
||||||
|
clusternumber = globals()["cpoint_" + str(cluster)]
|
||||||
|
if min_cluster > calcdiff(item, clusternumber, data):
|
||||||
|
min_cluster = calcdiff(item, clusternumber, data)
|
||||||
|
assinged_cluster = clusternumber
|
||||||
|
data_assigned.append(assinged_cluster)
|
||||||
|
|
||||||
|
# for item in range(0, data_size):
|
||||||
|
# print("Datapoint: " + str(data[item]) + " | Assigned cluster: " + str(data[data_assigned[item]]))
|
||||||
|
|
||||||
|
def findHighest(data):
|
||||||
|
maximum = 0
|
||||||
|
for i in range(0, len(data)):
|
||||||
|
if int(data[i]) > maximum:
|
||||||
|
maximum = int(data[i])
|
||||||
|
return maximum
|
||||||
|
|
||||||
|
def calcdiff(point1, point2, data):
|
||||||
|
if int(data[point2]) > int(data[point1]):
|
||||||
|
difference = int(data[point2]) - int(data[point1])
|
||||||
|
else:
|
||||||
|
difference = int(data[point1]) - int(data[point2])
|
||||||
|
# print("Datapoint: " + str(data[point1]) + " | Cluster: " + str(data[point2]) + " | Difference: " + str(difference))
|
||||||
|
return betrag(difference)
|
||||||
|
|
||||||
|
def betrag(zahl):
|
||||||
|
if zahl < 0:
|
||||||
|
zahl = int((-2 * zahl) / 2)
|
||||||
|
return zahl
|
||||||
|
|
||||||
# Startup function for collecting necesarry data
|
# Startup function for collecting necesarry data
|
||||||
def startup(data):
|
def startup(data):
|
||||||
clusters = int(input("How many clusters are known? "))
|
# clusters = int(input("How many clusters are known? "))
|
||||||
# cores = input("How many cores should be used? ")
|
# cores = input("How many cores should be used? ")
|
||||||
# path = input("Where is the data? ") or in this case data
|
# path = input("Where is the data? ") or in this case data
|
||||||
|
|
||||||
@@ -37,17 +78,17 @@ def startup(data):
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Firing up the engines!
|
# Firing up the engines!
|
||||||
kmeansmk1(clusters, data)
|
kmeansmk1(data)
|
||||||
# kmeansmk1(clusters, cores, path)
|
# kmeansmk1(clusters, cores, path)
|
||||||
|
|
||||||
# Stopping benchmark
|
# Stopping benchmark
|
||||||
seconds = time.time() - start_time
|
seconds = time.time() - start_time
|
||||||
print(str(seconds) + " seconds for execution")
|
# print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
# Simple generator for test data
|
# Simple generator for test data
|
||||||
def testgenerator():
|
def testgenerator():
|
||||||
dataArray = []
|
dataArray = []
|
||||||
for i in range(1,100):
|
for i in range(0,100):
|
||||||
if i <= 20:
|
if i <= 20:
|
||||||
plz = generatePLZ("09")
|
plz = generatePLZ("09")
|
||||||
elif i > 20 and i < 50:
|
elif i > 20 and i < 50:
|
||||||
|
|||||||
Reference in New Issue
Block a user