Code Cleanup II

- Removed test functions from randomi
- Enabled timer at kmeans
- Adjusted functions of the dmlib libary
This commit is contained in:
2018-05-31 00:10:02 +02:00
parent d6b4152bde
commit a115b16378
3 changed files with 33 additions and 37 deletions

View File

@@ -1,28 +1,32 @@
# For random generation of numbers import randint
from random import randint, shuffle
# Simple generator for test data (100 plzs, 20-30-50 biased), returns 1D array of plzs
def testgenerator():
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
def plzGen(entries):
dataArray = []
for i in range(0,100):
if i <= 40:
plz = generatePLZ("05")
elif i > 40 and i < 80:
plz = generatePLZ("50")
plz_lenght = 5
for i in range(0, int(entries)):
if i < round(entries * 0.4):
plz = generateNumber(plz_lenght, 2)
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
plz = generateNumber(plz_lenght, 9)
else:
plz = generatePLZ("")
plz = generateNumber(plz_lenght, randint(0,9))
dataArray.append(plz)
shuffle(dataArray)
return dataArray
# Generates a PLZ from a certain start point
def generatePLZ(start):
if len(start) == 0:
plz = ""
for j in range(1,6):
plz = plz + str(randint(0,9))
else:
plz = start
for j in range(1,4):
plz = plz + str(randint(0,9))
return plz
# Function for generating the content of one single row randomly
def generateNumber(numberLenght, startingNumber):
number = str(startingNumber)
for length in range(0, numberLenght - 1):
number = number + str(randint(0,9))
return number
# Function for writing data into a file (content = string, nameChunkStart and namePartStart are for better naming)
# /testdata/ folder has to be created at this point
def writeFile(content, nameChunkStart, namePartStart):
filenumber = int(nameChunkStart) + int(namePartStart)
file = open("testdata/file" + str(filenumber) + ".txt", "w")
for w in range(0, len(content)):
file.write(content[w] + "\n")

View File

@@ -84,8 +84,7 @@ def assignCluster(data, highPoint, clusters):
# Startup function for collecting necesarry data
def startup(data):
# Using two clusters for testing
# clusters = int(input("How many clusters are known? "))
clusters = 2
clusters = int(input("How many clusters are known? "))
# cores = input("How many cores should be used? ")
# path = input("Where is the data? ") or in this case data
@@ -100,9 +99,14 @@ def startup(data):
# Stopping benchmark
seconds = time.time() - start_time
# print(str(seconds) + " seconds for execution")
print(str(seconds) + " seconds for execution")
# Printing final clusters
for i in range(0, clusters):
print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)]))
# Start the algorithm and generate test data
data = dmtest.testgenerator()
data = dmtest.plzGen(1000)
startup(data)
startup(data)

View File

@@ -21,18 +21,6 @@ from datetime import date
# Importing for multi core processing
import multiprocessing
def generate09():
plz = "09"
for i in range(0,3):
plz = plz + str(randint(0,9))
return plz
def generatePLZ():
plz = ""
for i in range(0,5):
plz = plz + str(randint(0,9))
return plz
# randomI function which creates each file
def randomI(units, rows, rowLength, partstart, cluster):
for setcounter in range(0, units):
@@ -58,7 +46,7 @@ def generateRow(rowLength):
row = row + str(randint(0, 9))
return row
# Function for writing data into a file
# Function for writing data into a file (content = string, setcount and partstart are for better naming)
def writeFile(content, setcounter, partstart):
filenumber = int(setcounter) + int(partstart)
file = open("testdata/file" + str(filenumber) + ".txt", "w")