K-Means Step 1
- Added simple cluster assigning Co-Authored-By: tchemn <tchemn@users.noreply.github.com>
This commit is contained in:
@@ -24,12 +24,53 @@ import multiprocessing
|
||||
|
||||
# CODE
|
||||
# Main function of the algorithm
|
||||
def kmeansmk1(clusters, data):
|
||||
print("Sorting data into " + str(clusters) + " clusters.")
|
||||
def kmeansmk1(data):
|
||||
clusters = 2
|
||||
data_size = len(data)
|
||||
|
||||
# Defining cluster points
|
||||
for i in range(0, clusters):
|
||||
globals()["cpoint_" + str(i)] = randint(0, data_size)
|
||||
print("Cluster " + str(i) + ": " + str(data[globals()["cpoint_" + str(i)]]))
|
||||
|
||||
data_assigned = []
|
||||
highPoint = findHighest(data)
|
||||
|
||||
for item in range(0, data_size):
|
||||
min_cluster = highPoint
|
||||
for cluster in range(0, clusters):
|
||||
clusternumber = globals()["cpoint_" + str(cluster)]
|
||||
if min_cluster > calcdiff(item, clusternumber, data):
|
||||
min_cluster = calcdiff(item, clusternumber, data)
|
||||
assinged_cluster = clusternumber
|
||||
data_assigned.append(assinged_cluster)
|
||||
|
||||
# for item in range(0, data_size):
|
||||
# print("Datapoint: " + str(data[item]) + " | Assigned cluster: " + str(data[data_assigned[item]]))
|
||||
|
||||
def findHighest(data):
|
||||
maximum = 0
|
||||
for i in range(0, len(data)):
|
||||
if int(data[i]) > maximum:
|
||||
maximum = int(data[i])
|
||||
return maximum
|
||||
|
||||
def calcdiff(point1, point2, data):
|
||||
if int(data[point2]) > int(data[point1]):
|
||||
difference = int(data[point2]) - int(data[point1])
|
||||
else:
|
||||
difference = int(data[point1]) - int(data[point2])
|
||||
# print("Datapoint: " + str(data[point1]) + " | Cluster: " + str(data[point2]) + " | Difference: " + str(difference))
|
||||
return betrag(difference)
|
||||
|
||||
def betrag(zahl):
|
||||
if zahl < 0:
|
||||
zahl = int((-2 * zahl) / 2)
|
||||
return zahl
|
||||
|
||||
# Startup function for collecting necesarry data
|
||||
def startup(data):
|
||||
clusters = int(input("How many clusters are known? "))
|
||||
# clusters = int(input("How many clusters are known? "))
|
||||
# cores = input("How many cores should be used? ")
|
||||
# path = input("Where is the data? ") or in this case data
|
||||
|
||||
@@ -37,17 +78,17 @@ def startup(data):
|
||||
start_time = time.time()
|
||||
|
||||
# Firing up the engines!
|
||||
kmeansmk1(clusters, data)
|
||||
kmeansmk1(data)
|
||||
# kmeansmk1(clusters, cores, path)
|
||||
|
||||
# Stopping benchmark
|
||||
seconds = time.time() - start_time
|
||||
print(str(seconds) + " seconds for execution")
|
||||
# print(str(seconds) + " seconds for execution")
|
||||
|
||||
# Simple generator for test data
|
||||
def testgenerator():
|
||||
dataArray = []
|
||||
for i in range(1,100):
|
||||
for i in range(0,100):
|
||||
if i <= 20:
|
||||
plz = generatePLZ("09")
|
||||
elif i > 20 and i < 50:
|
||||
|
||||
Reference in New Issue
Block a user