kmeans Update 1.2
- Added dependencies to info (adjusted template too) - Removed unnecessary global variables - Added commentary - Saved a few variables - Removed unnecessary libary 'numpy' and 'multiprocessing'
This commit is contained in:
@@ -4,9 +4,10 @@
|
|||||||
#author: Tillmann Brendel, Conrad Großer
|
#author: Tillmann Brendel, Conrad Großer
|
||||||
#license: Pending
|
#license: Pending
|
||||||
#date: 26.05.2018
|
#date: 26.05.2018
|
||||||
#version: 1.1
|
#version: 1.2
|
||||||
#usage: python pyscript.py
|
#usage: python pyscript.py
|
||||||
#notes:
|
#notes:
|
||||||
|
#dependencies: mathplotlib
|
||||||
#known_issues:
|
#known_issues:
|
||||||
#python_version: 3.x
|
#python_version: 3.x
|
||||||
#==============================================================================
|
#==============================================================================
|
||||||
@@ -20,11 +21,7 @@ from datetime import date
|
|||||||
# For random generation of numbers import randint
|
# For random generation of numbers import randint
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
# Importing libary for multi core processing
|
|
||||||
import multiprocessing
|
|
||||||
|
|
||||||
# Importing libaries for easy plotting
|
# Importing libaries for easy plotting
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
# Importing own libaries Datamining Libary and Datamining Test
|
# Importing own libaries Datamining Libary and Datamining Test
|
||||||
@@ -41,54 +38,59 @@ def kmeansmk1(data, clusters):
|
|||||||
|
|
||||||
# Get max value in the data array
|
# Get max value in the data array
|
||||||
highPoint = dmlib.findHighest(data)
|
highPoint = dmlib.findHighest(data)
|
||||||
|
|
||||||
|
# Define variables for running the algorithm (runs is just for benchmarking!)
|
||||||
done = 0
|
done = 0
|
||||||
runs = 0
|
runs = 0
|
||||||
|
|
||||||
|
# As long as calcClusters returns done it will rearange the clusters and assign the data to the clusters
|
||||||
while done == 0:
|
while done == 0:
|
||||||
runs = runs + 1
|
runs = runs + 1
|
||||||
new_data = assignCluster(data, highPoint, clusters)
|
new_data = assignCluster(data, highPoint, clusters)
|
||||||
calcClusters(new_data, clusters)
|
done = calcClusters(new_data, clusters)
|
||||||
for cluster in range(0, clusters):
|
|
||||||
|
|
||||||
#keeps the algorithm going until the central clusterpoint doesnt change anymore
|
|
||||||
if globals()["cpointchanged_" + str(cluster)] == 1:
|
|
||||||
done = 1
|
|
||||||
|
|
||||||
# Printing final clusters
|
# Printing final clusters
|
||||||
|
|
||||||
for i in range(0, clusters):
|
for i in range(0, clusters):
|
||||||
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
|
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
|
||||||
|
|
||||||
|
# Getting artificial array for visualizing 1D data in an 2D graphic of the size of the original data
|
||||||
# plotting the random data and the found clusters
|
|
||||||
anew = []
|
anew = []
|
||||||
inew = 0
|
inew = 0
|
||||||
while inew < 1000:
|
while inew < len(data):
|
||||||
anew.append(inew)
|
anew.append(inew)
|
||||||
inew = inew + 1
|
inew = inew + 1
|
||||||
floatdata = [int(x) for x in data]
|
|
||||||
|
# Drawing found clusters as lines
|
||||||
for i in range(0, clusters):
|
for i in range(0, clusters):
|
||||||
plt.axvline(x=int(globals()["cpoint_" + str(i)]), color='r')
|
plt.axvline(x=int(globals()["cpoint_" + str(i)]), color='r')
|
||||||
plt.scatter(floatdata, anew, marker='x', s=7, color='k')
|
|
||||||
|
# Showing graph
|
||||||
|
plt.scatter([int(x) for x in data], anew, marker='x', s=7, color='k')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
||||||
def calcClusters(data, clusters):
|
def calcClusters(data, clusters):
|
||||||
|
changed = 0
|
||||||
for cluster in range(0, clusters):
|
for cluster in range(0, clusters):
|
||||||
globals()["cpointchanged_" + str(cluster)] = 0
|
# Getting current cluster and saving it in temporary variable
|
||||||
globals()["oldcpoint_" + str(cluster)] = globals()["cpoint_" + str(cluster)]
|
prev_cluster = globals()["cpoint_" + str(cluster)]
|
||||||
|
# Sum of the cluster to calculate average difference between cluster center and data points
|
||||||
clustersum = 0
|
clustersum = 0
|
||||||
count = 0
|
item_count = 0
|
||||||
|
|
||||||
for item in range(0, len(data[0])):
|
for item in range(0, len(data[0])):
|
||||||
if data[1][item] == globals()["cpoint_" + str(cluster)]:
|
if data[1][item] == globals()["cpoint_" + str(cluster)]:
|
||||||
clustersum = clustersum + int(data[0][item])
|
clustersum = clustersum + int(data[0][item])
|
||||||
count = count + 1
|
item_count = item_count + 1
|
||||||
globals()["cpoint_" + str(cluster)] = round(clustersum / count)
|
globals()["cpoint_" + str(cluster)] = round(clustersum / item_count)
|
||||||
|
|
||||||
#checking if old clusterpoint is equal to the one just calculated
|
# Checking if previous clusterpoint is equal to the one just calculated
|
||||||
if globals()["oldcpoint_" + str(cluster)] == globals()["cpoint_" + str(cluster)]:
|
if prev_cluster == globals()["cpoint_" + str(cluster)]:
|
||||||
globals()["cpointchanged_" + str(cluster)] = 1
|
changed = 1
|
||||||
return 0
|
|
||||||
|
return changed
|
||||||
|
|
||||||
def assignCluster(data, highPoint, clusters):
|
def assignCluster(data, highPoint, clusters):
|
||||||
# Create a new data array for working
|
# Create a new data array for working
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
#version: Versionnumber
|
#version: Versionnumber
|
||||||
#usage: Description of how to use the programm quickly
|
#usage: Description of how to use the programm quickly
|
||||||
#notes: Notes for parameters, thanks (...)
|
#notes: Notes for parameters, thanks (...)
|
||||||
|
#dependencies: Preinstalled packages
|
||||||
#known_issues: Known issues in this version
|
#known_issues: Known issues in this version
|
||||||
#python_version: Compatible (tested) python version
|
#python_version: Compatible (tested) python version
|
||||||
#==============================================================================
|
#==============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user