kmeans Minor Bugfixes

- Added cluster to test generator
- Added sample data to main
This commit is contained in:
2018-06-02 23:30:07 +02:00
parent 7ea392c302
commit 4e3ceac4a9
4 changed files with 62 additions and 6 deletions

View File

@@ -1,5 +1,5 @@
# Calculate the difference between two points giving the indexes of these data entries
def calcdiff(point1, point2, data):
def calcdiff(point1, point2):
if int(point2) > int(point1):
difference = int(point2) - int(point1)
else:
@@ -20,3 +20,21 @@ def findHighest(data):
if int(data[i]) > maximum:
maximum = int(data[i])
return maximum
def pp_calcdiff(data, clusterpoint):
max_diff = 0
new_cluster = 0
for item in range(0,len(data)):
if calcdiff(data[item], clusterpoint) > max_diff:
max_diff = calcdiff(data[item], clusterpoint)
new_cluster = data[item]
return new_cluster
def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
max_diff = 0
new_cluster = 0
for item in range(0,len(data)):
if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
max_diff = calcdiff(data[item], clusterpoint)
new_cluster = data[item]
return new_cluster

View File

@@ -8,8 +8,10 @@ def plzGen(entries):
for i in range(0, int(entries)):
if i < round(entries * 0.4):
plz = generateNumber(plz_lenght, 2)
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
plz = generateNumber(plz_lenght, 9)
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
plz = generateNumber(plz_lenght, 4)
else:
plz = generateNumber(plz_lenght, randint(0,9))
dataArray.append(plz)

View File

@@ -107,8 +107,8 @@ def assignCluster(data, highPoint, clusters):
# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference
for cluster in range(0, clusters):
if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0]):
min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0])
if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]):
min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)])
assinged_cluster = globals()["cpoint_" + str(cluster)]
# Assign the minimal difference cluster to the data
data_assigned.append(assinged_cluster)
@@ -135,6 +135,6 @@ def startup(data):
print(str(seconds) + " seconds for execution")
# Start the algorithm and generate test data
data = dmtest.plzGen(1000)
data = dmtest.plzGen(10000)
startup(data)

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env python
#title: main.py
#description:
#author: Conrad Großer
#license: https://github.com/tchemn/miner/blob/master/LICENSE
#date: 02.06.2018
#version: 0.1
#usage: PENDING
#notes:
#dependencies:
#known_issues:
#python_version: 3.x
#==============================================================================
# IMPORTS
# Importing the time for benchmarking purposes
import time
from datetime import date
# CODE (FUNCTIONS)
# EXECUTION
if __name__ == '__main__':
# Print welcoming message
print("Hello world")
# For benchmarking starting the timer now
start_time = time.time()
# Get parameters, call functions, execute program (...)
# BENCHMARKING [END]
sec = time.time() - start_time
print("The program took " + str(sec) + " seconds (" + str(sec/60) + " minutes) for execution.")