added two dimensional data analysis support

-added kmeansMkI_2d
-added calcdiff2d to dmlib
-added plzGenNS and ageGenNS  to dmtest to generate unshuffled testdata for kmeans 2d
This commit is contained in:
tchemn
2018-06-04 20:42:55 +02:00
parent aa43c93ae5
commit 667e7881cc
4 changed files with 195 additions and 17 deletions

View File

@@ -50,3 +50,33 @@ def numGen(entries, cluster, int_lenght):
dataArray.append(generateNumber(int_lenght - 1, clusterArray[cluster_decider]))
shuffle(dataArray)
return dataArray
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
def plzGenNS(entries):
dataArray = []
plz_lenght = 5
for i in range(0, int(entries)):
if i < round(entries * 0.4):
plz = generateNumber(plz_lenght, 2)
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
plz = generateNumber(plz_lenght, 6)
else:
plz = generateNumber(plz_lenght, randint(0, 9))
dataArray.append(plz)
#i had to remove shuffle for the connectrion (age ==> plz) to work, else we would have 4 clusters
# shuffle(dataArray)
return dataArray #
def ageGenNS(entries):
dataArray = []
age_lenght = 2
for i in range(0, int(entries)):
if i < round(entries * 0.4):
age = generateNumber(age_lenght, 2)
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
age = generateNumber(age_lenght, 5)
else:
age = generateNumber(age_lenght, randint(0, 9))
dataArray.append(age)
# shuffle(dataArray)
return dataArray