Code Cleanup II
- Removed test functions from randomi - Enabled timer at kmeans - Adjusted functions of the dmlib libary
This commit is contained in:
@@ -1,28 +1,32 @@
|
|||||||
# For random generation of numbers import randint
|
# For random generation of numbers import randint
|
||||||
from random import randint, shuffle
|
from random import randint, shuffle
|
||||||
|
|
||||||
# Simple generator for test data (100 plzs, 20-30-50 biased), returns 1D array of plzs
|
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
|
||||||
def testgenerator():
|
def plzGen(entries):
|
||||||
dataArray = []
|
dataArray = []
|
||||||
for i in range(0,100):
|
plz_lenght = 5
|
||||||
if i <= 40:
|
for i in range(0, int(entries)):
|
||||||
plz = generatePLZ("05")
|
if i < round(entries * 0.4):
|
||||||
elif i > 40 and i < 80:
|
plz = generateNumber(plz_lenght, 2)
|
||||||
plz = generatePLZ("50")
|
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
|
||||||
|
plz = generateNumber(plz_lenght, 9)
|
||||||
else:
|
else:
|
||||||
plz = generatePLZ("")
|
plz = generateNumber(plz_lenght, randint(0,9))
|
||||||
dataArray.append(plz)
|
dataArray.append(plz)
|
||||||
shuffle(dataArray)
|
shuffle(dataArray)
|
||||||
return dataArray
|
return dataArray
|
||||||
|
|
||||||
# Generates a PLZ from a certain start point
|
# Function for generating the content of one single row randomly
|
||||||
def generatePLZ(start):
|
def generateNumber(numberLenght, startingNumber):
|
||||||
if len(start) == 0:
|
number = str(startingNumber)
|
||||||
plz = ""
|
for length in range(0, numberLenght - 1):
|
||||||
for j in range(1,6):
|
number = number + str(randint(0,9))
|
||||||
plz = plz + str(randint(0,9))
|
return number
|
||||||
else:
|
|
||||||
plz = start
|
# Function for writing data into a file (content = string, nameChunkStart and namePartStart are for better naming)
|
||||||
for j in range(1,4):
|
# /testdata/ folder has to be created at this point
|
||||||
plz = plz + str(randint(0,9))
|
def writeFile(content, nameChunkStart, namePartStart):
|
||||||
return plz
|
filenumber = int(nameChunkStart) + int(namePartStart)
|
||||||
|
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||||
|
for w in range(0, len(content)):
|
||||||
|
file.write(content[w] + "\n")
|
||||||
@@ -84,8 +84,7 @@ def assignCluster(data, highPoint, clusters):
|
|||||||
# Startup function for collecting necesarry data
|
# Startup function for collecting necesarry data
|
||||||
def startup(data):
|
def startup(data):
|
||||||
# Using two clusters for testing
|
# Using two clusters for testing
|
||||||
# clusters = int(input("How many clusters are known? "))
|
clusters = int(input("How many clusters are known? "))
|
||||||
clusters = 2
|
|
||||||
# cores = input("How many cores should be used? ")
|
# cores = input("How many cores should be used? ")
|
||||||
# path = input("Where is the data? ") or in this case data
|
# path = input("Where is the data? ") or in this case data
|
||||||
|
|
||||||
@@ -100,9 +99,14 @@ def startup(data):
|
|||||||
|
|
||||||
# Stopping benchmark
|
# Stopping benchmark
|
||||||
seconds = time.time() - start_time
|
seconds = time.time() - start_time
|
||||||
# print(str(seconds) + " seconds for execution")
|
print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
|
# Printing final clusters
|
||||||
|
for i in range(0, clusters):
|
||||||
|
print("Cluster " + str(i + 1) + " found at " + str(globals()["cpoint_" + str(i)]))
|
||||||
|
|
||||||
|
|
||||||
# Start the algorithm and generate test data
|
# Start the algorithm and generate test data
|
||||||
data = dmtest.testgenerator()
|
data = dmtest.plzGen(1000)
|
||||||
|
|
||||||
startup(data)
|
startup(data)
|
||||||
@@ -21,18 +21,6 @@ from datetime import date
|
|||||||
# Importing for multi core processing
|
# Importing for multi core processing
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
|
||||||
def generate09():
|
|
||||||
plz = "09"
|
|
||||||
for i in range(0,3):
|
|
||||||
plz = plz + str(randint(0,9))
|
|
||||||
return plz
|
|
||||||
|
|
||||||
def generatePLZ():
|
|
||||||
plz = ""
|
|
||||||
for i in range(0,5):
|
|
||||||
plz = plz + str(randint(0,9))
|
|
||||||
return plz
|
|
||||||
|
|
||||||
# randomI function which creates each file
|
# randomI function which creates each file
|
||||||
def randomI(units, rows, rowLength, partstart, cluster):
|
def randomI(units, rows, rowLength, partstart, cluster):
|
||||||
for setcounter in range(0, units):
|
for setcounter in range(0, units):
|
||||||
@@ -58,7 +46,7 @@ def generateRow(rowLength):
|
|||||||
row = row + str(randint(0, 9))
|
row = row + str(randint(0, 9))
|
||||||
return row
|
return row
|
||||||
|
|
||||||
# Function for writing data into a file
|
# Function for writing data into a file (content = string, setcount and partstart are for better naming)
|
||||||
def writeFile(content, setcounter, partstart):
|
def writeFile(content, setcounter, partstart):
|
||||||
filenumber = int(setcounter) + int(partstart)
|
filenumber = int(setcounter) + int(partstart)
|
||||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||||
|
|||||||
Reference in New Issue
Block a user