diff --git a/docs/Benchmarking.md b/docs/Benchmarking.md new file mode 100644 index 0000000..86710d2 --- /dev/null +++ b/docs/Benchmarking.md @@ -0,0 +1,48 @@ +# 10000/50/25 Benchmark + +## One Core Utilisation + +randomi.py + +### Conrads Setup (HDD) + 53.5 sec + +### Conrads Setup (m.2 SSD) + 63.29 sec + +### Tillmanns Setup (HDD) + 82.29 sec + +## 4 Core Utilisation + +randomi4c.py + +### Conrads Setup (HDD) + 17.71 sec + +### Conrads Setup (m.2 SSD) + 16.33 sec + +## 8 Core Utilisation + +randomiUnLi.py + +### Conrads Setup (HDD) + 12.58 sec + +### Conrads Setup (m.2 SSD) + 12.32 sec + +## One Core Utilisation (Mk II) + +randomi.py (Mk II) + +### Conrads Setup (HDD) + 54.45 sec + +### 8 Core Utilisation (Mk II) + +randomi.py (Mk II) + +### Conrads Setup (HDD) + 11.71 sec \ No newline at end of file diff --git a/src/data_generators/randomi.py b/src/data_generators/randomi.py index 4aa71ab..8bc38df 100644 --- a/src/data_generators/randomi.py +++ b/src/data_generators/randomi.py @@ -1,40 +1,79 @@ -# Meet randomI, a random data generator for test data generation +# Meet randomI Mk II, a random data generator for test data generation + +# For random generation of numbers import randint from random import randint +# Importing the time for benchmarking purposes import time from datetime import date -def main(i, a, b): - for x in range(0, i): - writeFile(generateFile(a, b), x) +# Importing for multi core processing +import multiprocessing +from typing import Any, Union -def generateFile(rows, lenghtOfRows): +# randomI function which creates each file +def randomI(units, rows, rowLength, partstart): + for setcounter in range(0, units): + writeFile(generateFile(rows, rowLength), setcounter, partstart) + +# Function for generating the content of one single file +def generateFile(rows, rowLength): content = [] for y in range(0, rows): - content.append(generateRow(lenghtOfRows)) + content.append(generateRow(rowLength)) return content -def generateRow(lenghtOfRows): +# Function for generating the content of one single row randomly +def generateRow(rowLength): row = "" - for z in range(0, lenghtOfRows): + for z in range(0, rowLength): row = row + str(randint(0, 9)) return row -def writeFile(content, x): - file = open("testdata/file" + str(x) + ".txt", "w") +# Function for writing data into a file +def writeFile(content, setcounter, partstart): + filenumber = int(setcounter) + int(partstart) + file = open("testdata/file" + str(filenumber) + ".txt", "w") for w in range(0, len(content)): file.write(content[w] + "\n") -print("Hello World") -i = int(input("How many datapices would you like to generate? ")) -a = int(input("How many rows should each datapiece have? ")) -b = int(input("How long should each row be? ")) +if __name__ == '__main__': + # Getting the user input + print("Hello World") + units = int(input("How many units would you like to generate? ")) + rows = int(input("How many rows should each unit have? ")) + rowLength = int(input("How long should each row be? ")) + cores = int(input("How many cores do you want to use? ")) -start_time = time.time() + # Splitting up the units + count = int(0) + partsize = units / cores -main(i, a, b) + # For benchmarking starting the timer now + start_time = time.time() -print("Data is generated. Have fun!") -sec = time.time() - start_time -minutes = sec / 60 -print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.") \ No newline at end of file + # Initialize and prepare cores for process + while count < cores: + partstart = partsize * count + globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart)) + count = count + 1 + + # Starting each core + count = int(0) + while count < cores: + globals()["p" + str(count)].start() + print("Core " + str(count) + " started.") + count = count + 1 + + print("Working...") + + # Joining each core for the process + count = int(0) + while count < cores: + globals()["p" + str(count)].join() + count = count + 1 + + # Finishing up the process + sec = time.time() - start_time + print("Data is generated. Have fun!") + print("randomI took " + str(sec) + " seconds for execution.") diff --git a/src/data_generators/randomi4c.py b/src/data_generators/randomi4c.py deleted file mode 100644 index 73e5ee0..0000000 --- a/src/data_generators/randomi4c.py +++ /dev/null @@ -1,64 +0,0 @@ -# Meet randomI, a random data generator for test data generation -from random import randint -import multiprocessing -import time -from datetime import date - - -def main(i, a, b, o): - print('go') - for x in range(0, int(i)): - content = generateFile(a, b) - writeFile(content, x, o) - - -def generateFile(rows, lenghtOfRows): - content = [] - for y in range(0, int(rows)): - content.append(generateRow(int(lenghtOfRows))) - return content - - -def generateRow(lenghtOfRows): - row = "" - for z in range(0, lenghtOfRows): - row = row + str(randint(0, 9)) - return row - - -def writeFile(content, x, o): - file = open("testdata/file" + str(x+o) + ".txt", "w") - for w in range(0, len(content)): - file.write(content[w] + "\n") - - -if __name__ == '__main__': - print("Hello World") - q = input("How many datapieces would you like to generate? ") - a = input("How many rows should each datapiece have? ") - b = input("How long should each row be? ") - start_time = time.time() - q = int(q) - i = q / 4 - o = 0 - print('preparing') - p1 = multiprocessing.Process(target=main, args=(i, a, b, o)) - o = q / 4 - p2 = multiprocessing.Process(target=main, args=(i, a, b, o)) - o = q / 2 - p3 = multiprocessing.Process(target=main, args=(i, a, b, o)) - o = q / 2 + q / 4 - p4 = multiprocessing.Process(target=main, args=(i, a, b, o)) - print('starting') - p1.start() - p2.start() - p3.start() - p4.start() - p1.join() - p2.join() - p3.join() - p4.join() - print("Data is generated. Have fun!") - sec = time.time() - start_time - minutes = sec / 60 - print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.") diff --git a/src/data_generators/randomiUnLi.py b/src/data_generators/randomiUnLi.py deleted file mode 100644 index 72d6f36..0000000 --- a/src/data_generators/randomiUnLi.py +++ /dev/null @@ -1,71 +0,0 @@ -# Meet randomI, a random data generator for test data generation -from random import randint -import multiprocessing -import time -from datetime import date -from typing import Any, Union - - -def main(div, a, b, o): - for x in range(0, int(div)): - writeFile(generateFile(a, b), x, o) - - -def generateFile(rows, lenghtOfRows): - content = [] - for y in range(0, rows): - content.append(generateRow(lenghtOfRows)) - return content - - -def generateRow(lenghtOfRows): - row = "" - for z in range(0, lenghtOfRows): - row = row + str(randint(0, 9)) - return row - - -def writeFile(content, x, o): - file = open("testdata/file" + str(x + o) + ".txt", "w") - for w in range(0, len(content)): - file.write(content[w] + "\n") - - -if __name__ == '__main__': - print("Hello World") - i = int(input("How many units would you like to generate? ")) - a = int(input("How many rows should each unit have? ")) - b = int(input("How long should each row be? ")) - c = int(input("How many threads do you want to use?")) - print('preparing') - counter = int(0) - div = i / c - - while counter < c: - o = div * counter - print('thread ' + str(counter) + ' set to start at ' + str(o)) - globals()["p" + str(counter)] = multiprocessing.Process(target=main, args=(div, a, b, o)) - counter = counter + 1 - counter = int(0) - start_time = time.time() - print('starting') - - while counter < c: - globals()["p" + str(counter)].start() - print('thread number ' + str(counter) + ' just started') - counter = counter + 1 - if counter == c: print(' ') - if counter == c: print('working, this might take a while') - - counter = int(0) - - while counter < c: - globals()["p" + str(counter)].join() - print('thread number ' + str(counter) + ' just finished') - counter = counter + 1 - - print(" ") - print("Data is generated. Have fun!") - sec = time.time() - start_time - minutes = sec / 60 - print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")