Revision randomi Mk II
- Fixed file naming bug - Improved performance - Merged all variants of randomi into randomi.py - Fully commented the code - Added benchmarks Co-Authored-By: tchemn <tchemn@users.noreply.github.com>
This commit is contained in:
48
docs/Benchmarking.md
Normal file
48
docs/Benchmarking.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# 10000/50/25 Benchmark
|
||||||
|
|
||||||
|
## One Core Utilisation
|
||||||
|
|
||||||
|
randomi.py
|
||||||
|
|
||||||
|
### Conrads Setup (HDD)
|
||||||
|
53.5 sec
|
||||||
|
|
||||||
|
### Conrads Setup (m.2 SSD)
|
||||||
|
63.29 sec
|
||||||
|
|
||||||
|
### Tillmanns Setup (HDD)
|
||||||
|
82.29 sec
|
||||||
|
|
||||||
|
## 4 Core Utilisation
|
||||||
|
|
||||||
|
randomi4c.py
|
||||||
|
|
||||||
|
### Conrads Setup (HDD)
|
||||||
|
17.71 sec
|
||||||
|
|
||||||
|
### Conrads Setup (m.2 SSD)
|
||||||
|
16.33 sec
|
||||||
|
|
||||||
|
## 8 Core Utilisation
|
||||||
|
|
||||||
|
randomiUnLi.py
|
||||||
|
|
||||||
|
### Conrads Setup (HDD)
|
||||||
|
12.58 sec
|
||||||
|
|
||||||
|
### Conrads Setup (m.2 SSD)
|
||||||
|
12.32 sec
|
||||||
|
|
||||||
|
## One Core Utilisation (Mk II)
|
||||||
|
|
||||||
|
randomi.py (Mk II)
|
||||||
|
|
||||||
|
### Conrads Setup (HDD)
|
||||||
|
54.45 sec
|
||||||
|
|
||||||
|
### 8 Core Utilisation (Mk II)
|
||||||
|
|
||||||
|
randomi.py (Mk II)
|
||||||
|
|
||||||
|
### Conrads Setup (HDD)
|
||||||
|
11.71 sec
|
||||||
@@ -1,40 +1,79 @@
|
|||||||
# Meet randomI, a random data generator for test data generation
|
# Meet randomI Mk II, a random data generator for test data generation
|
||||||
|
|
||||||
|
# For random generation of numbers import randint
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
|
# Importing the time for benchmarking purposes
|
||||||
import time
|
import time
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
def main(i, a, b):
|
# Importing for multi core processing
|
||||||
for x in range(0, i):
|
import multiprocessing
|
||||||
writeFile(generateFile(a, b), x)
|
from typing import Any, Union
|
||||||
|
|
||||||
def generateFile(rows, lenghtOfRows):
|
# randomI function which creates each file
|
||||||
|
def randomI(units, rows, rowLength, partstart):
|
||||||
|
for setcounter in range(0, units):
|
||||||
|
writeFile(generateFile(rows, rowLength), setcounter, partstart)
|
||||||
|
|
||||||
|
# Function for generating the content of one single file
|
||||||
|
def generateFile(rows, rowLength):
|
||||||
content = []
|
content = []
|
||||||
for y in range(0, rows):
|
for y in range(0, rows):
|
||||||
content.append(generateRow(lenghtOfRows))
|
content.append(generateRow(rowLength))
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def generateRow(lenghtOfRows):
|
# Function for generating the content of one single row randomly
|
||||||
|
def generateRow(rowLength):
|
||||||
row = ""
|
row = ""
|
||||||
for z in range(0, lenghtOfRows):
|
for z in range(0, rowLength):
|
||||||
row = row + str(randint(0, 9))
|
row = row + str(randint(0, 9))
|
||||||
return row
|
return row
|
||||||
|
|
||||||
def writeFile(content, x):
|
# Function for writing data into a file
|
||||||
file = open("testdata/file" + str(x) + ".txt", "w")
|
def writeFile(content, setcounter, partstart):
|
||||||
|
filenumber = int(setcounter) + int(partstart)
|
||||||
|
file = open("testdata/file" + str(filenumber) + ".txt", "w")
|
||||||
for w in range(0, len(content)):
|
for w in range(0, len(content)):
|
||||||
file.write(content[w] + "\n")
|
file.write(content[w] + "\n")
|
||||||
|
|
||||||
print("Hello World")
|
if __name__ == '__main__':
|
||||||
i = int(input("How many datapices would you like to generate? "))
|
# Getting the user input
|
||||||
a = int(input("How many rows should each datapiece have? "))
|
print("Hello World")
|
||||||
b = int(input("How long should each row be? "))
|
units = int(input("How many units would you like to generate? "))
|
||||||
|
rows = int(input("How many rows should each unit have? "))
|
||||||
|
rowLength = int(input("How long should each row be? "))
|
||||||
|
cores = int(input("How many cores do you want to use? "))
|
||||||
|
|
||||||
start_time = time.time()
|
# Splitting up the units
|
||||||
|
count = int(0)
|
||||||
|
partsize = units / cores
|
||||||
|
|
||||||
main(i, a, b)
|
# For benchmarking starting the timer now
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
print("Data is generated. Have fun!")
|
# Initialize and prepare cores for process
|
||||||
sec = time.time() - start_time
|
while count < cores:
|
||||||
minutes = sec / 60
|
partstart = partsize * count
|
||||||
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")
|
globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart))
|
||||||
|
count = count + 1
|
||||||
|
|
||||||
|
# Starting each core
|
||||||
|
count = int(0)
|
||||||
|
while count < cores:
|
||||||
|
globals()["p" + str(count)].start()
|
||||||
|
print("Core " + str(count) + " started.")
|
||||||
|
count = count + 1
|
||||||
|
|
||||||
|
print("Working...")
|
||||||
|
|
||||||
|
# Joining each core for the process
|
||||||
|
count = int(0)
|
||||||
|
while count < cores:
|
||||||
|
globals()["p" + str(count)].join()
|
||||||
|
count = count + 1
|
||||||
|
|
||||||
|
# Finishing up the process
|
||||||
|
sec = time.time() - start_time
|
||||||
|
print("Data is generated. Have fun!")
|
||||||
|
print("randomI took " + str(sec) + " seconds for execution.")
|
||||||
|
|||||||
@@ -1,64 +0,0 @@
|
|||||||
# Meet randomI, a random data generator for test data generation
|
|
||||||
from random import randint
|
|
||||||
import multiprocessing
|
|
||||||
import time
|
|
||||||
from datetime import date
|
|
||||||
|
|
||||||
|
|
||||||
def main(i, a, b, o):
|
|
||||||
print('go')
|
|
||||||
for x in range(0, int(i)):
|
|
||||||
content = generateFile(a, b)
|
|
||||||
writeFile(content, x, o)
|
|
||||||
|
|
||||||
|
|
||||||
def generateFile(rows, lenghtOfRows):
|
|
||||||
content = []
|
|
||||||
for y in range(0, int(rows)):
|
|
||||||
content.append(generateRow(int(lenghtOfRows)))
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def generateRow(lenghtOfRows):
|
|
||||||
row = ""
|
|
||||||
for z in range(0, lenghtOfRows):
|
|
||||||
row = row + str(randint(0, 9))
|
|
||||||
return row
|
|
||||||
|
|
||||||
|
|
||||||
def writeFile(content, x, o):
|
|
||||||
file = open("testdata/file" + str(x+o) + ".txt", "w")
|
|
||||||
for w in range(0, len(content)):
|
|
||||||
file.write(content[w] + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print("Hello World")
|
|
||||||
q = input("How many datapieces would you like to generate? ")
|
|
||||||
a = input("How many rows should each datapiece have? ")
|
|
||||||
b = input("How long should each row be? ")
|
|
||||||
start_time = time.time()
|
|
||||||
q = int(q)
|
|
||||||
i = q / 4
|
|
||||||
o = 0
|
|
||||||
print('preparing')
|
|
||||||
p1 = multiprocessing.Process(target=main, args=(i, a, b, o))
|
|
||||||
o = q / 4
|
|
||||||
p2 = multiprocessing.Process(target=main, args=(i, a, b, o))
|
|
||||||
o = q / 2
|
|
||||||
p3 = multiprocessing.Process(target=main, args=(i, a, b, o))
|
|
||||||
o = q / 2 + q / 4
|
|
||||||
p4 = multiprocessing.Process(target=main, args=(i, a, b, o))
|
|
||||||
print('starting')
|
|
||||||
p1.start()
|
|
||||||
p2.start()
|
|
||||||
p3.start()
|
|
||||||
p4.start()
|
|
||||||
p1.join()
|
|
||||||
p2.join()
|
|
||||||
p3.join()
|
|
||||||
p4.join()
|
|
||||||
print("Data is generated. Have fun!")
|
|
||||||
sec = time.time() - start_time
|
|
||||||
minutes = sec / 60
|
|
||||||
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
# Meet randomI, a random data generator for test data generation
|
|
||||||
from random import randint
|
|
||||||
import multiprocessing
|
|
||||||
import time
|
|
||||||
from datetime import date
|
|
||||||
from typing import Any, Union
|
|
||||||
|
|
||||||
|
|
||||||
def main(div, a, b, o):
|
|
||||||
for x in range(0, int(div)):
|
|
||||||
writeFile(generateFile(a, b), x, o)
|
|
||||||
|
|
||||||
|
|
||||||
def generateFile(rows, lenghtOfRows):
|
|
||||||
content = []
|
|
||||||
for y in range(0, rows):
|
|
||||||
content.append(generateRow(lenghtOfRows))
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def generateRow(lenghtOfRows):
|
|
||||||
row = ""
|
|
||||||
for z in range(0, lenghtOfRows):
|
|
||||||
row = row + str(randint(0, 9))
|
|
||||||
return row
|
|
||||||
|
|
||||||
|
|
||||||
def writeFile(content, x, o):
|
|
||||||
file = open("testdata/file" + str(x + o) + ".txt", "w")
|
|
||||||
for w in range(0, len(content)):
|
|
||||||
file.write(content[w] + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print("Hello World")
|
|
||||||
i = int(input("How many units would you like to generate? "))
|
|
||||||
a = int(input("How many rows should each unit have? "))
|
|
||||||
b = int(input("How long should each row be? "))
|
|
||||||
c = int(input("How many threads do you want to use?"))
|
|
||||||
print('preparing')
|
|
||||||
counter = int(0)
|
|
||||||
div = i / c
|
|
||||||
|
|
||||||
while counter < c:
|
|
||||||
o = div * counter
|
|
||||||
print('thread ' + str(counter) + ' set to start at ' + str(o))
|
|
||||||
globals()["p" + str(counter)] = multiprocessing.Process(target=main, args=(div, a, b, o))
|
|
||||||
counter = counter + 1
|
|
||||||
counter = int(0)
|
|
||||||
start_time = time.time()
|
|
||||||
print('starting')
|
|
||||||
|
|
||||||
while counter < c:
|
|
||||||
globals()["p" + str(counter)].start()
|
|
||||||
print('thread number ' + str(counter) + ' just started')
|
|
||||||
counter = counter + 1
|
|
||||||
if counter == c: print(' ')
|
|
||||||
if counter == c: print('working, this might take a while')
|
|
||||||
|
|
||||||
counter = int(0)
|
|
||||||
|
|
||||||
while counter < c:
|
|
||||||
globals()["p" + str(counter)].join()
|
|
||||||
print('thread number ' + str(counter) + ' just finished')
|
|
||||||
counter = counter + 1
|
|
||||||
|
|
||||||
print(" ")
|
|
||||||
print("Data is generated. Have fun!")
|
|
||||||
sec = time.time() - start_time
|
|
||||||
minutes = sec / 60
|
|
||||||
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")
|
|
||||||
Reference in New Issue
Block a user