diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index 2a3efc7..0753094 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -16,6 +16,9 @@ import time from datetime import date +# For random generation of numbers import randint and shuffle to shuffle an array +from random import randint, shuffle + # Importing libary for multi core processing import multiprocessing @@ -25,20 +28,47 @@ def kmeansmk1(clusters): print("Sorting data into " + str(clusters) + " clusters.") # Startup function for collecting necesarry data -def startup(): +def startup(data): clusters = int(input("How many clusters are known? ")) # cores = input("How many cores should be used? ") - # path = input("Where is the data? ") + # path = input("Where is the data? ") or in this case data # For benchmarking starting the timer now start_time = time.time() # Firing up the engines! - kmeansmk1(clusters) + kmeansmk1(clusters, data) # kmeansmk1(clusters, cores, path) # Stopping benchmark seconds = time.time() - start_time print(str(seconds) + " seconds for execution") -startup() \ No newline at end of file +# Simple generator for test data +def testgenerator(): + dataArray = [] + for i in range(1,100): + if i <= 20: + plz = generatePLZ("09") + elif i > 20 and i < 50: + plz = generatePLZ("08") + else: + plz = generatePLZ("") + dataArray.append(plz) + shuffle(dataArray) + return dataArray + +# Generates a PLZ from a certain start point +def generatePLZ(start): + if len(start) == 0: + plz = "" + for j in range(1,6): + plz = plz + str(randint(0,9)) + else: + plz = start + for j in range(1,4): + plz = plz + str(randint(0,9)) + return plz + +data = testgenerator() +startup(data) \ No newline at end of file