Compare commits
3 Commits
master
...
dev_kmeans
| Author | SHA1 | Date | |
|---|---|---|---|
| 94ed193954 | |||
| e2ad63f90f | |||
| 25fa068df9 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
testdata/
|
||||
__pycache__/
|
||||
.DS_Store
|
||||
|
||||
@@ -1,40 +1,27 @@
|
||||
# Calculate the difference between two points giving the indexes of these data entries
|
||||
def calcdiff(point1, point2):
|
||||
if int(point2) > int(point1):
|
||||
difference = int(point2) - int(point1)
|
||||
else:
|
||||
difference = int(point1) - int(point2)
|
||||
# print("Datapoint: " + str(data[point1]) + " | Cluster: " + str(data[point2]) + " | Difference: " + str(difference))
|
||||
return betrag(difference)
|
||||
if int(point2) > int(point1):
|
||||
difference = int(point2) - int(point1)
|
||||
else:
|
||||
difference = int(point1) - int(point2)
|
||||
return abs(difference)
|
||||
|
||||
# Get the absolute value of a number and returns it as int
|
||||
def betrag(number):
|
||||
if number < 0:
|
||||
number = int((-2 * number) / 2)
|
||||
return number
|
||||
|
||||
# Determine the highest int value in an array and returns is as an int
|
||||
def findHighest(data):
|
||||
maximum = 0
|
||||
for i in range(0, len(data)):
|
||||
if int(data[i]) > maximum:
|
||||
maximum = int(data[i])
|
||||
return maximum
|
||||
|
||||
def pp_calcdiff(data, clusterpoint):
|
||||
max_diff = 0
|
||||
new_cluster = 0
|
||||
for item in range(0,len(data)):
|
||||
if calcdiff(data[item], clusterpoint) > max_diff:
|
||||
max_diff = calcdiff(data[item], clusterpoint)
|
||||
new_cluster = data[item]
|
||||
return new_cluster
|
||||
max_diff = 0
|
||||
new_cluster = 0
|
||||
for item in range(0, len(data)):
|
||||
if calcdiff(data[item], clusterpoint) > max_diff:
|
||||
max_diff = calcdiff(data[item], clusterpoint)
|
||||
new_cluster = data[item]
|
||||
return new_cluster
|
||||
|
||||
|
||||
def pp_calcdiff_2(data, clusterpoint, clusterpoint_2):
|
||||
max_diff = 0
|
||||
new_cluster = 0
|
||||
for item in range(0,len(data)):
|
||||
if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
|
||||
max_diff = calcdiff(data[item], clusterpoint)
|
||||
new_cluster = data[item]
|
||||
return new_cluster
|
||||
max_diff = 0
|
||||
new_cluster = 0
|
||||
for item in range(0, len(data)):
|
||||
if calcdiff(data[item], clusterpoint) + calcdiff(data[item], clusterpoint_2) > max_diff:
|
||||
max_diff = calcdiff(data[item], clusterpoint)
|
||||
new_cluster = data[item]
|
||||
return new_cluster
|
||||
|
||||
@@ -1,34 +1,38 @@
|
||||
# For random generation of numbers import randint
|
||||
from random import randint, shuffle
|
||||
|
||||
|
||||
# Simple generator for test plzs (40-40-20 biased), returns 1D array of plzs
|
||||
def plzGen(entries):
|
||||
dataArray = []
|
||||
plz_lenght = 5
|
||||
for i in range(0, int(entries)):
|
||||
if i < round(entries * 0.4):
|
||||
plz = generateNumber(plz_lenght, 2)
|
||||
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
|
||||
plz = generateNumber(plz_lenght, 9)
|
||||
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
|
||||
plz = generateNumber(plz_lenght, 4)
|
||||
else:
|
||||
plz = generateNumber(plz_lenght, randint(0,9))
|
||||
dataArray.append(plz)
|
||||
shuffle(dataArray)
|
||||
return dataArray
|
||||
dataArray = []
|
||||
plz_lenght = 5
|
||||
for i in range(0, int(entries)):
|
||||
if i < round(entries * 0.4):
|
||||
plz = generateNumber(plz_lenght, 2)
|
||||
elif i >= round(entries * 0.4) and i < round(entries * 0.6):
|
||||
plz = generateNumber(plz_lenght, 9)
|
||||
elif i >= round(entries * 0.6) and i < round(entries * 0.9):
|
||||
plz = generateNumber(plz_lenght, 4)
|
||||
else:
|
||||
plz = generateNumber(plz_lenght, randint(0, 9))
|
||||
dataArray.append(plz)
|
||||
shuffle(dataArray)
|
||||
return dataArray
|
||||
|
||||
|
||||
# Function for generating the content of one single row randomly
|
||||
def generateNumber(numberLenght, startingNumber):
|
||||
number = str(startingNumber)
|
||||
for length in range(0, numberLenght - 1):
|
||||
number = number + str(randint(0,9))
|
||||
return number
|
||||
number = str(startingNumber)
|
||||
for length in range(0, numberLenght - 1):
|
||||
number = number + str(randint(0, 9))
|
||||
return number
|
||||
|
||||
|
||||
# Function for writing data into a file (content = string, nameChunkStart and namePartStart are for better naming)
|
||||
# /testdata/ folder has to be created at this point
|
||||
def writeFile(content, nameChunkStart, namePartStart):
|
||||
filenumber = int(nameChunkStart) + int(namePartStart)
|
||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||
for w in range(0, len(content)):
|
||||
file.write(content[w] + "\n")
|
||||
filenumber = int(nameChunkStart) + int(namePartStart)
|
||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||
for w in range(0, len(content)):
|
||||
file.write(content[w] + "\n")
|
||||
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
#!/usr/bin/env python
|
||||
#title: kmeansMkI.py
|
||||
#description: Our personal Python K-Means++ implementation
|
||||
#author: Tillmann Brendel, Conrad Großer
|
||||
#license: Pending
|
||||
#date: 26.05.2018
|
||||
#version: 1.2
|
||||
#usage: python pyscript.py
|
||||
#notes:
|
||||
#dependencies: mathplotlib
|
||||
#known_issues:
|
||||
#python_version: 3.x
|
||||
#==============================================================================
|
||||
# title: kmeansMkI.py
|
||||
# description: Our personal Python K-Means++ implementation
|
||||
# author: Tillmann Brendel, Conrad Großer
|
||||
# license: Pending
|
||||
# date: 26.05.2018
|
||||
# version: 1.2
|
||||
# usage: python pyscript.py
|
||||
# notes:
|
||||
# dependencies: mathplotlib
|
||||
# known_issues:
|
||||
# python_version: 3.x
|
||||
# ==============================================================================
|
||||
|
||||
# IMPORTS
|
||||
|
||||
# Importing the time for benchmarking purposes
|
||||
import time
|
||||
from datetime import date
|
||||
@@ -28,111 +27,120 @@ import matplotlib.pyplot as plt
|
||||
import dmlib
|
||||
import dmtest
|
||||
|
||||
# CODE
|
||||
|
||||
# Main function of the algorithm
|
||||
def kmeansmk1(data, clusters):
|
||||
# Defining cluster points
|
||||
for i in range(0, clusters):
|
||||
globals()["cpoint_" + str(i)] = data[randint(0, len(data))]
|
||||
print("Initial cluster " + str(i + 1) + ": " + str(globals()["cpoint_" + str(i)]))
|
||||
globals()["cpoint_0"] = data[randint(0, len(data))]
|
||||
globals()["cpoint_1"] = dmlib.pp_calcdiff(data, globals()["cpoint_0"])
|
||||
|
||||
# Get max value in the data array
|
||||
highPoint = dmlib.findHighest(data)
|
||||
print("Initial cluster 1: " + str(globals()["cpoint_0"]))
|
||||
print("Initial cluster 2: " + str(globals()["cpoint_1"]))
|
||||
|
||||
# Define variables for running the algorithm (runs is just for benchmarking!)
|
||||
done = 0
|
||||
runs = 0
|
||||
# Defining cluster points
|
||||
for i in range(2, clusters):
|
||||
globals()["cpoint_" + str(i)] = dmlib.pp_calcdiff_2(data, globals()["cpoint_" + str(i - 1)], globals()["cpoint_" + str(i - 2)])
|
||||
print("Initial cluster " + str(i + 1) + ": " + str(globals()["cpoint_" + str(i)]))
|
||||
|
||||
# As long as calcClusters returns done it will rearange the clusters and assign the data to the clusters
|
||||
while done == 0:
|
||||
runs = runs + 1
|
||||
new_data = assignCluster(data, highPoint, clusters)
|
||||
done = calcClusters(new_data, clusters)
|
||||
# Get max value in the data array
|
||||
highPoint = max(data)
|
||||
|
||||
# Printing final clusters
|
||||
for i in range(0, clusters):
|
||||
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
|
||||
# Define variables for running the algorithm (runs is just for benchmarking!)
|
||||
done, runs = False, 0
|
||||
|
||||
# Getting artificial array for visualizing 1D data in an 2D graphic of the size of the original data
|
||||
anew = []
|
||||
inew = 0
|
||||
while inew < len(data):
|
||||
anew.append(inew)
|
||||
inew = inew + 1
|
||||
# As long as calcClusters returns done it will rearange the clusters and assign the data to the clusters
|
||||
while not done:
|
||||
runs += 1
|
||||
new_data = assignCluster(data, highPoint, clusters)
|
||||
done = calcClusters(new_data, clusters)
|
||||
|
||||
# Drawing found clusters as lines
|
||||
for i in range(0, clusters):
|
||||
plt.axvline(x=int(globals()["cpoint_" + str(i)]), color='r')
|
||||
# Printing final clusters
|
||||
for i in range(0, clusters):
|
||||
print("Endcluster " + str(i + 1) + " is calculated to be at " + str(globals()["cpoint_" + str(i)]) + " after " + str(runs) + " runs")
|
||||
|
||||
# Showing graph
|
||||
plt.scatter([int(x) for x in data], anew, marker='x', s=7, color='k')
|
||||
plt.show()
|
||||
# Getting artificial array for visualizing 1D data in an 2D graphic of the size of the original data
|
||||
anew, inew = [], 0
|
||||
|
||||
while inew < len(data):
|
||||
anew.append(inew)
|
||||
inew += 1
|
||||
|
||||
# Drawing found clusters as lines
|
||||
for i in range(0, clusters):
|
||||
plt.axvline(x=int(globals()["cpoint_" + str(i)]), color='r')
|
||||
|
||||
# Showing graph
|
||||
plt.scatter([int(x) for x in data], anew, marker='x', s=7, color='k')
|
||||
plt.show()
|
||||
|
||||
return 0
|
||||
|
||||
return 0
|
||||
|
||||
# Calculates middle values for each cluster, takes 2D array (item, assigned_cluster)
|
||||
def calcClusters(data, clusters):
|
||||
changed = 0
|
||||
for cluster in range(0, clusters):
|
||||
# Getting current cluster and saving it in temporary variable
|
||||
prev_cluster = globals()["cpoint_" + str(cluster)]
|
||||
# Sum of the cluster to calculate average difference between cluster center and data points
|
||||
clustersum = 0
|
||||
item_count = 0
|
||||
changed = False
|
||||
for cluster in range(0, clusters):
|
||||
# Getting current cluster and saving it in temporary variable
|
||||
prev_cluster = globals()["cpoint_" + str(cluster)]
|
||||
# Sum of the cluster to calculate average difference between cluster center and data points
|
||||
clustersum = 0
|
||||
item_count = 0
|
||||
|
||||
for item in range(0, len(data[0])):
|
||||
if data[1][item] == globals()["cpoint_" + str(cluster)]:
|
||||
clustersum = clustersum + int(data[0][item])
|
||||
item_count = item_count + 1
|
||||
globals()["cpoint_" + str(cluster)] = round(clustersum / item_count)
|
||||
for item in range(0, len(data[0])):
|
||||
if data[1][item] == globals()["cpoint_" + str(cluster)]:
|
||||
clustersum = clustersum + int(data[0][item])
|
||||
item_count = item_count + 1
|
||||
globals()["cpoint_" + str(cluster)] = round(clustersum / item_count)
|
||||
|
||||
# Checking if previous clusterpoint is equal to the one just calculated
|
||||
if prev_cluster == globals()["cpoint_" + str(cluster)]:
|
||||
changed = 1
|
||||
# Checking if previous clusterpoint is equal to the one just calculated
|
||||
if prev_cluster == globals()["cpoint_" + str(cluster)]:
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
return changed
|
||||
|
||||
def assignCluster(data, highPoint, clusters):
|
||||
# Create a new data array for working
|
||||
new_data = []
|
||||
new_data.append(data)
|
||||
# Create a new data array for working
|
||||
new_data = [data]
|
||||
|
||||
# Create new array for assigned clusters of each value
|
||||
data_assigned = []
|
||||
# Create new array for assigned clusters of each value
|
||||
data_assigned = []
|
||||
|
||||
# For each item in data find the minimal difference to a cluster and write it in the new data array in the second place (new_data[item][cluster_index])
|
||||
for item in range(0, len(new_data[0])):
|
||||
# Set the minimal cluster difference to the highest difference in the list to ease comparision
|
||||
min_cluster = highPoint
|
||||
# For each item in data find the minimal difference to a cluster and write it in the new data array in the second place (new_data[item][cluster_index])
|
||||
for item in data:
|
||||
# Set the minimal cluster difference to the highest difference in the list to ease comparision
|
||||
min_cluster = highPoint
|
||||
|
||||
# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference
|
||||
for cluster in range(0, clusters):
|
||||
if min_cluster > dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)]):
|
||||
min_cluster = dmlib.calcdiff(data[item], globals()["cpoint_" + str(cluster)])
|
||||
assinged_cluster = globals()["cpoint_" + str(cluster)]
|
||||
# Assign the minimal difference cluster to the data
|
||||
data_assigned.append(assinged_cluster)
|
||||
# Add the assigned values list to the new_data array
|
||||
new_data.append(data_assigned)
|
||||
# Check the difference between the point (item) and each cluster and set min_cluster to the smallest difference
|
||||
for cluster in range(0, clusters):
|
||||
if int(min_cluster) > dmlib.calcdiff(item, globals()["cpoint_" + str(cluster)]):
|
||||
min_cluster = dmlib.calcdiff(item, globals()["cpoint_" + str(cluster)])
|
||||
assinged_cluster = globals()["cpoint_" + str(cluster)]
|
||||
|
||||
# Assign the minimal difference cluster to the data
|
||||
data_assigned.append(assinged_cluster)
|
||||
# Add the assigned values list to the new_data array
|
||||
new_data.append(data_assigned)
|
||||
|
||||
return new_data
|
||||
|
||||
return new_data
|
||||
|
||||
# Startup function for collecting necesarry data
|
||||
def startup(data):
|
||||
# Using two clusters for testing
|
||||
clusters = int(input("How many clusters are known? "))
|
||||
# cores = input("How many cores should be used? ")
|
||||
# path = input("Where is the data? ") or in this case data
|
||||
# Using two clusters for testing
|
||||
clusters = int(input("How many clusters are known? "))
|
||||
# cores = input("How many cores should be used? ")
|
||||
# path = input("Where is the data? ") or in this case data
|
||||
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
|
||||
# Firing up the engines!
|
||||
kmeansmk1(data, clusters)
|
||||
# Firing up the engines!
|
||||
kmeansmk1(data, clusters)
|
||||
|
||||
# Stopping benchmark
|
||||
seconds = time.time() - start_time
|
||||
print(str(seconds) + " seconds for execution")
|
||||
|
||||
# Stopping benchmark
|
||||
seconds = time.time() - start_time
|
||||
print(str(seconds) + " seconds for execution")
|
||||
|
||||
# Start the algorithm and generate test data
|
||||
data = dmtest.plzGen(10000)
|
||||
|
||||
@@ -10,69 +10,79 @@ from datetime import date
|
||||
# Importing for multi core processing
|
||||
import multiprocessing
|
||||
|
||||
|
||||
# randomI function which creates each file
|
||||
def randomI(units, rows, rowLength, partstart):
|
||||
for setcounter in range(0, units):
|
||||
writeFile(generateFile(rows, rowLength), setcounter, partstart)
|
||||
for setcounter in range(0, units):
|
||||
writeFile(generateFile(rows, rowLength), setcounter, partstart)
|
||||
return True
|
||||
|
||||
|
||||
# Function for generating the content of one single file
|
||||
def generateFile(rows, rowLength):
|
||||
content = []
|
||||
for y in range(0, rows):
|
||||
content.append(generateRow(rowLength))
|
||||
return content
|
||||
content = []
|
||||
for entry in rows:
|
||||
content.append(generateRow(rowLength))
|
||||
return content
|
||||
|
||||
|
||||
# Function for generating the content of one single row randomly
|
||||
def generateRow(rowLength):
|
||||
row = ""
|
||||
for z in range(0, rowLength):
|
||||
row = row + str(randint(0, 9))
|
||||
return row
|
||||
row = ""
|
||||
for z in range(0, rowLength):
|
||||
row += str(randint(0, 9))
|
||||
return row
|
||||
|
||||
|
||||
# Function for writing data into a file
|
||||
def writeFile(content, setcounter, partstart):
|
||||
filenumber = int(setcounter) + int(partstart)
|
||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||
for w in range(0, len(content)):
|
||||
file.write(content[w] + "\n")
|
||||
filenumber = int(setcounter) + int(partstart)
|
||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||
for line in content:
|
||||
file.write(line + "\n")
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Getting the user input
|
||||
print("Hello World")
|
||||
units = int(input("How many units would you like to generate? "))
|
||||
rows = int(input("How many rows should each unit have? "))
|
||||
rowLength = int(input("How long should each row be? "))
|
||||
cores = int(input("How many cores do you want to use? "))
|
||||
# Getting the user input
|
||||
print("Hello World")
|
||||
units = int(input("How many units would you like to generate? "))
|
||||
rows = int(input("How many rows should each unit have? "))
|
||||
rowLength = int(input("How long should each row be? "))
|
||||
cores = int(input("How many cores do you want to use? "))
|
||||
|
||||
# Splitting up the units
|
||||
count = int(0)
|
||||
partsize = units / cores
|
||||
# Splitting up the units
|
||||
count = 0
|
||||
partsize = units / cores
|
||||
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
|
||||
# Initialize and prepare cores for process
|
||||
while count < cores:
|
||||
partstart = partsize * count
|
||||
globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart))
|
||||
count = count + 1
|
||||
# Initialize and prepare cores for process
|
||||
while count < cores:
|
||||
partstart = partsize * count
|
||||
globals()["p" + str(count)] = multiprocessing.Process(
|
||||
target=randomI,
|
||||
args=(int(partsize), rows, rowLength, partstart)
|
||||
)
|
||||
count += 1
|
||||
|
||||
# Starting each core
|
||||
count = int(0)
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].start()
|
||||
print("Core " + str(count) + " started.")
|
||||
count = count + 1
|
||||
# Starting each core
|
||||
count = 0
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].start()
|
||||
print("Core " + str(count) + " started.")
|
||||
count += 1
|
||||
|
||||
print("Working...")
|
||||
print("Working...")
|
||||
|
||||
# Joining each core for the process
|
||||
count = int(0)
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].join()
|
||||
count = count + 1
|
||||
# Joining each core for the process
|
||||
count = 0
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].join()
|
||||
count += 1
|
||||
|
||||
# Finishing up the process
|
||||
sec = time.time() - start_time
|
||||
print("Data is generated. Have fun!")
|
||||
print("randomI took " + str(sec) + " seconds for execution.")
|
||||
# Finishing up the process
|
||||
sec = time.time() - start_time
|
||||
print("Data is generated. Have fun!")
|
||||
print("randomI took " + str(sec) + " seconds for execution.")
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
#title: randomI2.1.py
|
||||
#description: Personal
|
||||
#author: Tillmann Brendel, Conrad Großer
|
||||
#license: Pending
|
||||
#date: 26.05.2018
|
||||
#version: 1.0
|
||||
#usage: python pyscript.py
|
||||
#notes:
|
||||
#known_issues:
|
||||
#python_version: 3.x
|
||||
#==============================================================================
|
||||
# title: randomI2.1.py
|
||||
# description: Personal
|
||||
# author: Tillmann Brendel, Conrad Großer
|
||||
# license: Pending
|
||||
# date: 26.05.2018
|
||||
# version: 1.0
|
||||
# usage: python pyscript.py
|
||||
# notes:
|
||||
# known_issues:
|
||||
# python_version: 3.x
|
||||
# ==============================================================================
|
||||
|
||||
# For random generation of numbers import randint
|
||||
from random import randint
|
||||
@@ -21,76 +21,87 @@ from datetime import date
|
||||
# Importing for multi core processing
|
||||
import multiprocessing
|
||||
|
||||
|
||||
# randomI function which creates each file
|
||||
def randomI(units, rows, rowLength, partstart, cluster):
|
||||
for setcounter in range(0, units):
|
||||
writeFile(generateFile(rows, rowLength, cluster), setcounter, partstart)
|
||||
for setcounter in range(0, units):
|
||||
writeFile(generateFile(rows, rowLength, cluster), setcounter, partstart)
|
||||
return True
|
||||
|
||||
|
||||
# Function for generating the content of one single file
|
||||
def generateFile(rows, rowLength, cluster):
|
||||
content = []
|
||||
for y in range(0, rows):
|
||||
if y == 0:
|
||||
if 1 == randint(1, cluster):
|
||||
content.append(generate09())
|
||||
else:
|
||||
content.append(generatePLZ())
|
||||
else:
|
||||
content.append(generateRow(rowLength))
|
||||
return content
|
||||
content = []
|
||||
for entry in rows:
|
||||
if entry == 0:
|
||||
if randint(1, cluster) == 1:
|
||||
content.append(generate09())
|
||||
else:
|
||||
content.append(generatePLZ())
|
||||
else:
|
||||
content.append(generateRow(rowLength))
|
||||
return content
|
||||
|
||||
|
||||
# Function for generating the content of one single row randomly
|
||||
def generateRow(rowLength):
|
||||
row = ""
|
||||
for z in range(0, rowLength):
|
||||
row = row + str(randint(0, 9))
|
||||
return row
|
||||
row = ''
|
||||
for z in range(0, rowLength):
|
||||
row += str(randint(0, 9))
|
||||
return row
|
||||
|
||||
|
||||
# Function for writing data into a file (content = string, setcount and partstart are for better naming)
|
||||
def writeFile(content, setcounter, partstart):
|
||||
filenumber = int(setcounter) + int(partstart)
|
||||
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||
for w in range(0, len(content)):
|
||||
file.write(content[w] + "\n")
|
||||
filenumber = int(setcounter) + int(partstart)
|
||||
file = open('testdata/file' + str(filenumber) + '.txt', 'w')
|
||||
|
||||
for line in content:
|
||||
file.write(line + '\n')
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Getting the user input
|
||||
print("Hello World")
|
||||
units = int(input("How many units would you like to generate? "))
|
||||
rows = int(input("How many rows should each unit have? "))
|
||||
rowLength = int(input("How long should each row be? "))
|
||||
cores = int(input("How many cores do you want to use? "))
|
||||
cluster = int(input("What fraction of postal codes should be in the 09xxx cluster? 1/"))
|
||||
# Getting the user input
|
||||
print('Hello World')
|
||||
units = int(input('How many units would you like to generate? '))
|
||||
rows = int(input('How many rows should each unit have? '))
|
||||
rowLength = int(input('How long should each row be? '))
|
||||
cores = int(input('How many cores do you want to use? '))
|
||||
cluster = int(input('What fraction of postal codes should be in the 09xxx cluster? 1/'))
|
||||
|
||||
# Splitting up the units
|
||||
count = int(0)
|
||||
partsize = units / cores
|
||||
count = 0
|
||||
partsize = units / cores
|
||||
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
# For benchmarking starting the timer now
|
||||
start_time = time.time()
|
||||
|
||||
# Initialize and prepare cores for process
|
||||
while count < cores:
|
||||
partstart = partsize * count
|
||||
globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart, cluster))
|
||||
count = count + 1
|
||||
# Initialize and prepare cores for process
|
||||
while count < cores:
|
||||
partstart = partsize * count
|
||||
globals()['p' + str(count)] = multiprocessing.Process(
|
||||
target=randomI,
|
||||
args=(int(partsize), rows, rowLength, partstart, cluster)
|
||||
)
|
||||
count += 1
|
||||
|
||||
# Starting each core
|
||||
count = int(0)
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].start()
|
||||
print("Core " + str(count) + " started.")
|
||||
count = count + 1
|
||||
# Starting each core
|
||||
count = int(0)
|
||||
while count < cores:
|
||||
globals()['p' + str(count)].start()
|
||||
print('Core ' + str(count) + ' started.')
|
||||
count += 1
|
||||
|
||||
print("Working...")
|
||||
print('Working...')
|
||||
|
||||
# Joining each core for the process
|
||||
count = int(0)
|
||||
while count < cores:
|
||||
globals()["p" + str(count)].join()
|
||||
count = count + 1
|
||||
# Joining each core for the process
|
||||
count = 0
|
||||
while count < cores:
|
||||
globals()['p' + str(count)].join()
|
||||
count += 1
|
||||
|
||||
# Finishing up the process
|
||||
sec = time.time() - start_time
|
||||
print("Data is generated. Have fun!")
|
||||
print("randomI took " + str(sec) + " seconds for execution.")
|
||||
# Finishing up the process
|
||||
sec = time.time() - start_time
|
||||
print('Data is generated. Have fun!')
|
||||
print('randomI took ' + str(sec) + ' seconds for execution.')
|
||||
|
||||
Reference in New Issue
Block a user