Added testdata generator

This commit is contained in:
2018-05-26 19:15:08 +02:00
parent 9761370e94
commit e15c5da69a

View File

@@ -16,6 +16,9 @@
import time
from datetime import date
# For random generation of numbers import randint and shuffle to shuffle an array
from random import randint, shuffle
# Importing libary for multi core processing
import multiprocessing
@@ -25,20 +28,47 @@ def kmeansmk1(clusters):
print("Sorting data into " + str(clusters) + " clusters.")
# Startup function for collecting necesarry data
def startup():
def startup(data):
clusters = int(input("How many clusters are known? "))
# cores = input("How many cores should be used? ")
# path = input("Where is the data? ")
# path = input("Where is the data? ") or in this case data
# For benchmarking starting the timer now
start_time = time.time()
# Firing up the engines!
kmeansmk1(clusters)
kmeansmk1(clusters, data)
# kmeansmk1(clusters, cores, path)
# Stopping benchmark
seconds = time.time() - start_time
print(str(seconds) + " seconds for execution")
startup()
# Simple generator for test data
def testgenerator():
dataArray = []
for i in range(1,100):
if i <= 20:
plz = generatePLZ("09")
elif i > 20 and i < 50:
plz = generatePLZ("08")
else:
plz = generatePLZ("")
dataArray.append(plz)
shuffle(dataArray)
return dataArray
# Generates a PLZ from a certain start point
def generatePLZ(start):
if len(start) == 0:
plz = ""
for j in range(1,6):
plz = plz + str(randint(0,9))
else:
plz = start
for j in range(1,4):
plz = plz + str(randint(0,9))
return plz
data = testgenerator()
startup(data)