Revision randomi Mk II

- Fixed file naming bug
- Improved performance
- Merged all variants of randomi into randomi.py
- Fully commented the code
- Added benchmarks

Co-Authored-By: tchemn <tchemn@users.noreply.github.com>
This commit is contained in:
2018-05-18 10:58:17 +02:00
parent f19bfb4710
commit b9295dd703
4 changed files with 107 additions and 155 deletions

48
docs/Benchmarking.md Normal file
View File

@@ -0,0 +1,48 @@
# 10000/50/25 Benchmark
## One Core Utilisation
randomi.py
### Conrads Setup (HDD)
53.5 sec
### Conrads Setup (m.2 SSD)
63.29 sec
### Tillmanns Setup (HDD)
82.29 sec
## 4 Core Utilisation
randomi4c.py
### Conrads Setup (HDD)
17.71 sec
### Conrads Setup (m.2 SSD)
16.33 sec
## 8 Core Utilisation
randomiUnLi.py
### Conrads Setup (HDD)
12.58 sec
### Conrads Setup (m.2 SSD)
12.32 sec
## One Core Utilisation (Mk II)
randomi.py (Mk II)
### Conrads Setup (HDD)
54.45 sec
### 8 Core Utilisation (Mk II)
randomi.py (Mk II)
### Conrads Setup (HDD)
11.71 sec

View File

@@ -1,40 +1,79 @@
# Meet randomI, a random data generator for test data generation # Meet randomI Mk II, a random data generator for test data generation
# For random generation of numbers import randint
from random import randint from random import randint
# Importing the time for benchmarking purposes
import time import time
from datetime import date from datetime import date
def main(i, a, b): # Importing for multi core processing
for x in range(0, i): import multiprocessing
writeFile(generateFile(a, b), x) from typing import Any, Union
def generateFile(rows, lenghtOfRows): # randomI function which creates each file
def randomI(units, rows, rowLength, partstart):
for setcounter in range(0, units):
writeFile(generateFile(rows, rowLength), setcounter, partstart)
# Function for generating the content of one single file
def generateFile(rows, rowLength):
content = [] content = []
for y in range(0, rows): for y in range(0, rows):
content.append(generateRow(lenghtOfRows)) content.append(generateRow(rowLength))
return content return content
def generateRow(lenghtOfRows): # Function for generating the content of one single row randomly
def generateRow(rowLength):
row = "" row = ""
for z in range(0, lenghtOfRows): for z in range(0, rowLength):
row = row + str(randint(0, 9)) row = row + str(randint(0, 9))
return row return row
def writeFile(content, x): # Function for writing data into a file
file = open("testdata/file" + str(x) + ".txt", "w") def writeFile(content, setcounter, partstart):
filenumber = int(setcounter) + int(partstart)
file = open("testdata/file" + str(filenumber) + ".txt", "w")
for w in range(0, len(content)): for w in range(0, len(content)):
file.write(content[w] + "\n") file.write(content[w] + "\n")
print("Hello World") if __name__ == '__main__':
i = int(input("How many datapices would you like to generate? ")) # Getting the user input
a = int(input("How many rows should each datapiece have? ")) print("Hello World")
b = int(input("How long should each row be? ")) units = int(input("How many units would you like to generate? "))
rows = int(input("How many rows should each unit have? "))
rowLength = int(input("How long should each row be? "))
cores = int(input("How many cores do you want to use? "))
start_time = time.time() # Splitting up the units
count = int(0)
partsize = units / cores
main(i, a, b) # For benchmarking starting the timer now
start_time = time.time()
print("Data is generated. Have fun!") # Initialize and prepare cores for process
sec = time.time() - start_time while count < cores:
minutes = sec / 60 partstart = partsize * count
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.") globals()["p" + str(count)] = multiprocessing.Process(target=randomI, args=(int(partsize), rows, rowLength, partstart))
count = count + 1
# Starting each core
count = int(0)
while count < cores:
globals()["p" + str(count)].start()
print("Core " + str(count) + " started.")
count = count + 1
print("Working...")
# Joining each core for the process
count = int(0)
while count < cores:
globals()["p" + str(count)].join()
count = count + 1
# Finishing up the process
sec = time.time() - start_time
print("Data is generated. Have fun!")
print("randomI took " + str(sec) + " seconds for execution.")

View File

@@ -1,64 +0,0 @@
# Meet randomI, a random data generator for test data generation
from random import randint
import multiprocessing
import time
from datetime import date
def main(i, a, b, o):
print('go')
for x in range(0, int(i)):
content = generateFile(a, b)
writeFile(content, x, o)
def generateFile(rows, lenghtOfRows):
content = []
for y in range(0, int(rows)):
content.append(generateRow(int(lenghtOfRows)))
return content
def generateRow(lenghtOfRows):
row = ""
for z in range(0, lenghtOfRows):
row = row + str(randint(0, 9))
return row
def writeFile(content, x, o):
file = open("testdata/file" + str(x+o) + ".txt", "w")
for w in range(0, len(content)):
file.write(content[w] + "\n")
if __name__ == '__main__':
print("Hello World")
q = input("How many datapieces would you like to generate? ")
a = input("How many rows should each datapiece have? ")
b = input("How long should each row be? ")
start_time = time.time()
q = int(q)
i = q / 4
o = 0
print('preparing')
p1 = multiprocessing.Process(target=main, args=(i, a, b, o))
o = q / 4
p2 = multiprocessing.Process(target=main, args=(i, a, b, o))
o = q / 2
p3 = multiprocessing.Process(target=main, args=(i, a, b, o))
o = q / 2 + q / 4
p4 = multiprocessing.Process(target=main, args=(i, a, b, o))
print('starting')
p1.start()
p2.start()
p3.start()
p4.start()
p1.join()
p2.join()
p3.join()
p4.join()
print("Data is generated. Have fun!")
sec = time.time() - start_time
minutes = sec / 60
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")

View File

@@ -1,71 +0,0 @@
# Meet randomI, a random data generator for test data generation
from random import randint
import multiprocessing
import time
from datetime import date
from typing import Any, Union
def main(div, a, b, o):
for x in range(0, int(div)):
writeFile(generateFile(a, b), x, o)
def generateFile(rows, lenghtOfRows):
content = []
for y in range(0, rows):
content.append(generateRow(lenghtOfRows))
return content
def generateRow(lenghtOfRows):
row = ""
for z in range(0, lenghtOfRows):
row = row + str(randint(0, 9))
return row
def writeFile(content, x, o):
file = open("testdata/file" + str(x + o) + ".txt", "w")
for w in range(0, len(content)):
file.write(content[w] + "\n")
if __name__ == '__main__':
print("Hello World")
i = int(input("How many units would you like to generate? "))
a = int(input("How many rows should each unit have? "))
b = int(input("How long should each row be? "))
c = int(input("How many threads do you want to use?"))
print('preparing')
counter = int(0)
div = i / c
while counter < c:
o = div * counter
print('thread ' + str(counter) + ' set to start at ' + str(o))
globals()["p" + str(counter)] = multiprocessing.Process(target=main, args=(div, a, b, o))
counter = counter + 1
counter = int(0)
start_time = time.time()
print('starting')
while counter < c:
globals()["p" + str(counter)].start()
print('thread number ' + str(counter) + ' just started')
counter = counter + 1
if counter == c: print(' ')
if counter == c: print('working, this might take a while')
counter = int(0)
while counter < c:
globals()["p" + str(counter)].join()
print('thread number ' + str(counter) + ' just finished')
counter = counter + 1
print(" ")
print("Data is generated. Have fun!")
sec = time.time() - start_time
minutes = sec / 60
print("randomI took " + str(sec) + " seconds (" + str(minutes) + " minutes) for execution.")