From a115b16378b7a922b0db7ce0172ad22132dc2ce5 Mon Sep 17 00:00:00 2001 From: Conrad Date: Thu, 31 May 2018 00:10:02 +0200 Subject: [PATCH] Code Cleanup II - Removed test functions from randomi - Enabled timer at kmeans - Adjusted functions of the dmlib libary --- src/algorithms/dmtest.py | 42 +++++++++++++++++-------------- src/algorithms/kmeansMkI.py | 14 +++++++---- src/data_generators/randomi2.1.py | 14 +---------- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/src/algorithms/dmtest.py b/src/algorithms/dmtest.py index 686d126..7cc52be 100644 --- a/src/algorithms/dmtest.py +++ b/src/algorithms/dmtest.py @@ -1,28 +1,32 @@ # For random generation of numbers import randint from random import randint, shuffle -# Simple generator for test data (100 plzs, 20-30-50 biased), returns 1D array of plzs -def testgenerator(): +# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs +def plzGen(entries): dataArray = [] - for i in range(0,100): - if i <= 40: - plz = generatePLZ("05") - elif i > 40 and i < 80: - plz = generatePLZ("50") + plz_lenght = 5 + for i in range(0, int(entries)): + if i < round(entries * 0.4): + plz = generateNumber(plz_lenght, 2) + elif i >= round(entries * 0.4) and i < round(entries * 0.8): + plz = generateNumber(plz_lenght, 9) else: - plz = generatePLZ("") + plz = generateNumber(plz_lenght, randint(0,9)) dataArray.append(plz) shuffle(dataArray) return dataArray -# Generates a PLZ from a certain start point -def generatePLZ(start): - if len(start) == 0: - plz = "" - for j in range(1,6): - plz = plz + str(randint(0,9)) - else: - plz = start - for j in range(1,4): - plz = plz + str(randint(0,9)) - return plz +# Function for generating the content of one single row randomly +def generateNumber(numberLenght, startingNumber): + number = str(startingNumber) + for length in range(0, numberLenght - 1): + number = number + str(randint(0,9)) + return number + +# Function for writing data into a file (content = string, nameChunkStart and namePartStart are for better naming) +# /testdata/ folder has to be created at this point +def writeFile(content, nameChunkStart, namePartStart): + filenumber = int(nameChunkStart) + int(namePartStart) + file = open("testdata/file" + str(filenumber) + ".txt", "w") + for w in range(0, len(content)): + file.write(content[w] + "\n") \ No newline at end of file diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index 6da43ee..a7acacf 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -84,8 +84,7 @@ def assignCluster(data, highPoint, clusters): # Startup function for collecting necesarry data def startup(data): # Using two clusters for testing - # clusters = int(input("How many clusters are known? ")) - clusters = 2 + clusters = int(input("How many clusters are known? ")) # cores = input("How many cores should be used? ") # path = input("Where is the data? ") or in this case data @@ -100,9 +99,14 @@ def startup(data): # Stopping benchmark seconds = time.time() - start_time - # print(str(seconds) + " seconds for execution") + print(str(seconds) + " seconds for execution") + + # Printing final clusters + for i in range(0, clusters): + print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)])) + # Start the algorithm and generate test data -data = dmtest.testgenerator() +data = dmtest.plzGen(1000) -startup(data) \ No newline at end of file +startup(data) diff --git a/src/data_generators/randomi2.1.py b/src/data_generators/randomi2.1.py index 89893a3..b035f58 100644 --- a/src/data_generators/randomi2.1.py +++ b/src/data_generators/randomi2.1.py @@ -21,18 +21,6 @@ from datetime import date # Importing for multi core processing import multiprocessing -def generate09(): - plz = "09" - for i in range(0,3): - plz = plz + str(randint(0,9)) - return plz - -def generatePLZ(): - plz = "" - for i in range(0,5): - plz = plz + str(randint(0,9)) - return plz - # randomI function which creates each file def randomI(units, rows, rowLength, partstart, cluster): for setcounter in range(0, units): @@ -58,7 +46,7 @@ def generateRow(rowLength): row = row + str(randint(0, 9)) return row -# Function for writing data into a file +# Function for writing data into a file (content = string, setcount and partstart are for better naming) def writeFile(content, setcounter, partstart): filenumber = int(setcounter) + int(partstart) file = open("testdata/file" + str(filenumber) + ".txt", "w")