diff --git a/src/algorithms/dmlib.py b/src/algorithms/dmlib.py index 0d0fc71..8d7203e 100644 --- a/src/algorithms/dmlib.py +++ b/src/algorithms/dmlib.py @@ -1,5 +1,5 @@ # Calculate the difference between two points giving the indexes of these data entries -def calcdiff(point1, point2, data): +def calcdiff(point1, point2): if int(point2) > int(point1): difference = int(point2) - int(point1) else: @@ -19,4 +19,22 @@ def findHighest(data): for i in range(0, len(data)): if int(data[i]) > maximum: maximum = int(data[i]) - return maximum \ No newline at end of file + return maximum + +def pp_calcdiff(data, clusterpoint): + max_diff = 0 + new_cluster = 0 + for item in range(0,len(data)): + if calcdiff(data[item], clusterpoint) > max_diff: + max_diff = calcdiff(data[item], clusterpoint) + new_cluster = data[item] + return new_cluster + +def pp_calcdiff_2(data, clusterpoint, clusterpoint_2): + max_diff = 0 + new_cluster = 0 + for item in range(0,len(data)): + if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff: + max_diff = calcdiff(data[item], clusterpoint) + new_cluster = data[item] + return new_cluster \ No newline at end of file diff --git a/src/algorithms/dmtest.py b/src/algorithms/dmtest.py index 7cc52be..d13dc48 100644 --- a/src/algorithms/dmtest.py +++ b/src/algorithms/dmtest.py @@ -8,8 +8,10 @@ def plzGen(entries): for i in range(0, int(entries)): if i < round(entries * 0.4): plz = generateNumber(plz_lenght, 2) - elif i >= round(entries * 0.4) and i < round(entries * 0.8): + elif i >= round(entries * 0.4) and i < round(entries * 0.6): plz = generateNumber(plz_lenght, 9) + elif i >= round(entries * 0.6) and i < round(entries * 0.9): + plz = generateNumber(plz_lenght, 4) else: plz = generateNumber(plz_lenght, randint(0,9)) dataArray.append(plz) diff --git a/src/algorithms/kmeansMkI.py b/src/algorithms/kmeansMkI.py index ca94e6c..f15b222 100644 --- a/src/algorithms/kmeansMkI.py +++ b/src/algorithms/kmeansMkI.py @@ -107,8 +107,8 @@ def assignCluster(data, highPoint, clusters): # Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference for cluster in range(0, clusters): - if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0]): - min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)], new_data[0]) + if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]): + min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]) assinged_cluster = globals()["cpoint_" + str(cluster)] # Assign the minimal difference cluster to the data data_assigned.append(assinged_cluster) @@ -135,6 +135,6 @@ def startup(data): print(str(seconds) + " seconds for execution") # Start the algorithm and generate test data -data = dmtest.plzGen(1000) +data = dmtest.plzGen(10000) startup(data) diff --git a/src/main.py b/src/main.py index e69de29..9a5ddac 100644 --- a/src/main.py +++ b/src/main.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +#title: main.py +#description: +#author: Conrad Großer +#license: https://github.com/tchemn/miner/blob/master/LICENSE +#date: 02.06.2018 +#version: 0.1 +#usage: PENDING +#notes: +#dependencies: +#known_issues: +#python_version: 3.x +#============================================================================== + +# IMPORTS + +# Importing the time for benchmarking purposes +import time +from datetime import date + +# CODE (FUNCTIONS) + + +# EXECUTION +if __name__ == '__main__': + # Print welcoming message + print("Hello world") + + # For benchmarking starting the timer now + start_time = time.time() + + # Get parameters, call functions, execute program (...) + +# BENCHMARKING [END] +sec = time.time() - start_time +print("The program took " + str(sec) + " seconds (" + str(sec/60) + " minutes) for execution.") \ No newline at end of file