Files
data_mining_algorithms/src/algorithms/dmtest.py
2019-07-04 18:39:01 +02:00

65 lines
2.1 KiB
Python

# For random generation of numbers import randint
from random import randint, shuffle
# Simple generator for test nums (40-40-20 biased), returns 1D array of nums
def numGenLight(entries, shuffle, num_lenght):
dataArray = []
for i in range(0, int(entries)):
if i < round(entries * 0.4):
num = generateNumber(num_lenght, 2)
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
num = generateNumber(num_lenght, 9)
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
num = generateNumber(num_lenght, 4)
else:
num = generateNumber(num_lenght, randint(0, 9))
dataArray.append(num)
if shuffle:
shuffle(dataArray)
return dataArray
# Function for generating the content of one single row randomly
def generateNumber(numberLenght, startingNumber):
number = str(startingNumber)
for length in range(0, numberLenght - 1):
number = number + str(randint(0, 9))
return number
# Function for writing data into a file
# content = string, nameChunkStart and namePartStart are for better naming
# /testdata/ folder has to be created at this point
def writeFile(content, nameChunkStart, namePartStart):
filenumber = int(nameChunkStart) + int(namePartStart)
file = open("testdata/file" + str(filenumber) + ".txt", "w")
for w in range(0, len(content)):
file.write(content[w] + "\n")
# Function for generating 'entries'x int_lenght'-long numbers in 'clusters' clusters
def numGen(entries, cluster, int_lenght, suffle_value):
dataArray = []
clusterArray = []
for cluster_num in range(0, cluster):
clusterArray.append(randint(10, 99))
for item in range(0, entries):
decider = randint(0, 2)
if decider == 2:
dataArray.append(generateNumber(int_lenght, randint(1, 9)))
else:
cluster_decider = randint(0, cluster - 1)
dataArray.append(
generateNumber(
int_lenght - 1,
clusterArray[cluster_decider]
))
if suffle_value:
shuffle(dataArray)
return dataArray