From aa43c93ae5ed6da69deef177a44cc47f30524878 Mon Sep 17 00:00:00 2001 From: Conrad Date: Sun, 3 Jun 2018 00:41:18 +0200 Subject: [PATCH] Added numGen generator - Removed unstable randomi version - Added numGen generator - Detected bug --- src/algorithms/dmtest.py | 20 ++++++- src/algorithms/kmeansMkI.py | 6 +- src/data_generators/randomi2.1.py | 96 ------------------------------- 3 files changed, 23 insertions(+), 99 deletions(-) delete mode 100644 src/data_generators/randomi2.1.py diff --git a/src/algorithms/dmtest.py b/src/algorithms/dmtest.py index d13dc48..900b69a 100644 --- a/src/algorithms/dmtest.py +++ b/src/algorithms/dmtest.py @@ -31,4 +31,22 @@ def writeFile(content, nameChunkStart, namePartStart): filenumber = int(nameChunkStart) + int(namePartStart) file = open("testdata/file" + str(filenumber) + ".txt", "w") for w in range(0, len(content)): - file.write(content[w] + "\n") \ No newline at end of file + file.write(content[w] + "\n") + +# Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters +def numGen(entries, cluster, int_lenght): + dataArray = [] + clusterArray = [] + + for cluster_num in range(0, cluster): + clusterArray.append(randint(10,99)) + + for item in range(0, entries): + decider = randint(0, 2) + if decider == 2: + dataArray.append(generateNumber(int_lenght, randint(1,9))) + else: + cluster_decider = randint(0, cluster - 1) + dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider])) + shuffle(dataArray) + return dataArray diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index f15b222..89a42bc 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -8,7 +8,7 @@ #usage: python pyscript.py #notes: #dependencies: mathplotlib -#known_issues: +#known_issues: When clusters are 'thin' or noice is to strong --> unaccurate #python_version: 3.x #============================================================================== @@ -135,6 +135,8 @@ def startup(data): print(str(seconds) + " seconds for execution") # Start the algorithm and generate test data -data = dmtest.plzGen(10000) +# data = dmtest.plzGen(10000) +# data = dmtest.numGen(10000, 3, 5) +data = dmtest.numGen(10000, 8, 7) startup(data) diff --git a/src/data_generators/randomi2.1.py b/src/data_generators/randomi2.1.py deleted file mode 100644 index b035f58..0000000 --- a/src/data_generators/randomi2.1.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python -#title: randomI2.1.py -#description: Personal -#author: Tillmann Brendel, Conrad Großer -#license: Pending -#date: 26.05.2018 -#version: 1.0 -#usage: python pyscript.py -#notes: -#known_issues: -#python_version: 3.x -#============================================================================== - -# For random generation of numbers import randint -from random import randint - -# Importing the time for benchmarking purposes -import time -from datetime import date - -# Importing for multi core processing -import multiprocessing - -# randomI function which creates each file -def randomI(units, rows, rowLength, partstart, cluster): - for setcounter in range(0, units): - writeFile(generateFile(rows, rowLength, cluster), setcounter, partstart) - -# Function for generating the content of one single file -def generateFile(rows, rowLength, cluster): - content = [] - for y in range(0, rows): - if y == 0: - if 1 == randint(1, cluster): - content.append(generate09()) - else: - content.append(generatePLZ()) - else: - content.append(generateRow(rowLength)) - return content - -# Function for generating the content of one single row randomly -def generateRow(rowLength): - row = "" - for z in range(0, rowLength): - row = row + str(randint(0, 9)) - return row - -# Function for writing data into a file (content = string, setcount and partstart are for better naming) -def writeFile(content, setcounter, partstart): - filenumber = int(setcounter) + int(partstart) - file = open("testdata/file" + str(filenumber) + ".txt", "w") - for w in range(0, len(content)): - file.write(content[w] + "\n") - -if __name__ == '__main__': - # Getting the user input - print("Hello World") - units = int(input("How many units would you like to generate? ")) - rows = int(input("How many rows should each unit have? ")) - rowLength = int(input("How long should each row be? ")) - cores = int(input("How many cores do you want to use? ")) - cluster = int(input("What fraction of postal codes should be in the 09xxx cluster? 1/")) - - # Splitting up the units - count = int(0) - partsize = units / cores - - # For benchmarking starting the timer now - start_time = time.time() - - # Initialize and prepare cores for process - while count < cores: - partstart = partsize * count - globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart, cluster)) - count = count + 1 - - # Starting each core - count = int(0) - while count < cores: - globals()["p" + str(count)].start() - print("Core " + str(count) + " started.") - count = count + 1 - - print("Working...") - - # Joining each core for the process - count = int(0) - while count < cores: - globals()["p" + str(count)].join() - count = count + 1 - - # Finishing up the process - sec = time.time() - start_time - print("Data is generated. Have fun!") - print("randomI took " + str(sec) + " seconds for execution.")