kmeans Minor Bugfixes
- Added cluster to test generator - Added sample data to main
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
# Calculate the difference between two points giving the indexes of these data entries
|
# Calculate the difference between two points giving the indexes of these data entries
|
||||||
def calcdiff(point1, point2, data):
|
def calcdiff(point1, point2):
|
||||||
if int(point2) > int(point1):
|
if int(point2) > int(point1):
|
||||||
difference = int(point2) - int(point1)
|
difference = int(point2) - int(point1)
|
||||||
else:
|
else:
|
||||||
@@ -19,4 +19,22 @@ def findHighest(data):
|
|||||||
for i in range(0, len(data)):
|
for i in range(0, len(data)):
|
||||||
if int(data[i]) > maximum:
|
if int(data[i]) > maximum:
|
||||||
maximum = int(data[i])
|
maximum = int(data[i])
|
||||||
return maximum
|
return maximum
|
||||||
|
|
||||||
|
def pp_calcdiff(data, clusterpoint):
|
||||||
|
max_diff = 0
|
||||||
|
new_cluster = 0
|
||||||
|
for item in range(0,len(data)):
|
||||||
|
if calcdiff(data[item], clusterpoint) > max_diff:
|
||||||
|
max_diff = calcdiff(data[item], clusterpoint)
|
||||||
|
new_cluster = data[item]
|
||||||
|
return new_cluster
|
||||||
|
|
||||||
|
def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
|
||||||
|
max_diff = 0
|
||||||
|
new_cluster = 0
|
||||||
|
for item in range(0,len(data)):
|
||||||
|
if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
|
||||||
|
max_diff = calcdiff(data[item], clusterpoint)
|
||||||
|
new_cluster = data[item]
|
||||||
|
return new_cluster
|
||||||
@@ -8,8 +8,10 @@ def plzGen(entries):
|
|||||||
for i in range(0, int(entries)):
|
for i in range(0, int(entries)):
|
||||||
if i < round(entries * 0.4):
|
if i < round(entries * 0.4):
|
||||||
plz = generateNumber(plz_lenght, 2)
|
plz = generateNumber(plz_lenght, 2)
|
||||||
elif i >= round(entries * 0.4) and i < round(entries * 0.8):
|
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
|
||||||
plz = generateNumber(plz_lenght, 9)
|
plz = generateNumber(plz_lenght, 9)
|
||||||
|
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
|
||||||
|
plz = generateNumber(plz_lenght, 4)
|
||||||
else:
|
else:
|
||||||
plz = generateNumber(plz_lenght, randint(0,9))
|
plz = generateNumber(plz_lenght, randint(0,9))
|
||||||
dataArray.append(plz)
|
dataArray.append(plz)
|
||||||
|
|||||||
@@ -107,8 +107,8 @@ def assignCluster(data, highPoint, clusters):
|
|||||||
|
|
||||||
# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference
|
# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference
|
||||||
for cluster in range(0, clusters):
|
for cluster in range(0, clusters):
|
||||||
if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0]):
|
if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]):
|
||||||
min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0])
|
min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)])
|
||||||
assinged_cluster = globals()["cpoint_" + str(cluster)]
|
assinged_cluster = globals()["cpoint_" + str(cluster)]
|
||||||
# Assign the minimal difference cluster to the data
|
# Assign the minimal difference cluster to the data
|
||||||
data_assigned.append(assinged_cluster)
|
data_assigned.append(assinged_cluster)
|
||||||
@@ -135,6 +135,6 @@ def startup(data):
|
|||||||
print(str(seconds) + " seconds for execution")
|
print(str(seconds) + " seconds for execution")
|
||||||
|
|
||||||
# Start the algorithm and generate test data
|
# Start the algorithm and generate test data
|
||||||
data = dmtest.plzGen(1000)
|
data = dmtest.plzGen(10000)
|
||||||
|
|
||||||
startup(data)
|
startup(data)
|
||||||
|
|||||||
36
src/main.py
36
src/main.py
@@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#title: main.py
|
||||||
|
#description:
|
||||||
|
#author: Conrad Großer
|
||||||
|
#license: https://github.com/tchemn/miner/blob/master/LICENSE
|
||||||
|
#date: 02.06.2018
|
||||||
|
#version: 0.1
|
||||||
|
#usage: PENDING
|
||||||
|
#notes:
|
||||||
|
#dependencies:
|
||||||
|
#known_issues:
|
||||||
|
#python_version: 3.x
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
# IMPORTS
|
||||||
|
|
||||||
|
# Importing the time for benchmarking purposes
|
||||||
|
import time
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
|
# CODE (FUNCTIONS)
|
||||||
|
|
||||||
|
|
||||||
|
# EXECUTION
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Print welcoming message
|
||||||
|
print("Hello world")
|
||||||
|
|
||||||
|
# For benchmarking starting the timer now
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Get parameters, call functions, execute program (...)
|
||||||
|
|
||||||
|
# BENCHMARKING [END]
|
||||||
|
sec = time.time() - start_time
|
||||||
|
print("The program took " + str(sec) + " seconds (" + str(sec/60) + " minutes) for execution.")
|
||||||
Reference in New Issue
Block a user