From 710e0b20695089d9006bf8d91dedb666195031b6 Mon Sep 17 00:00:00 2001 From: Conrad Date: Mon, 15 Jul 2019 23:14:42 +0200 Subject: [PATCH 1/7] Prepared for packaging --- entro.py | 87 -------------------------------------------- entro.py/__init__.py | 1 + entro.py/entro.py | 38 +++++++++++++++++++ setup.py | 21 +++++++++++ 4 files changed, 60 insertions(+), 87 deletions(-) delete mode 100644 entro.py create mode 100644 entro.py/__init__.py create mode 100644 entro.py/entro.py create mode 100644 setup.py diff --git a/entro.py b/entro.py deleted file mode 100644 index 4d70839..0000000 --- a/entro.py +++ /dev/null @@ -1,87 +0,0 @@ -''' - calc_entro.py calculates the entropy of a given string or file - - This uses the negative sum of the log (to the base of 2) of the probability - times the probability of a char to occur in a certain string as the entropy. -''' - -import math -import argparse - - -# Calculates the entropy of a given string (as described in the docstring) -def calculateEntropy(input_string): - alphabet, alphabet_size, entropy = {}, 0, 0 - - for char in input_string: - if char in alphabet: - alphabet[char] += 1 - else: - alphabet[char] = 1 - alphabet_size += 1 - - for char in alphabet: - alphabet[char] = alphabet[char] / alphabet_size - entropy -= alphabet[char] * math.log(alphabet[char], 2) - - return entropy, alphabet - - -# Outputs a given entropy including the original text and the alphabet with probabilities -def printEntropy(original_string, entropy_value, alphabet_dict, simple_bool): - print('---') - if simple_bool == False: - print('Content: ' + original_string) - print('Probabilities: ' + str(alphabet_dict)) - print('Entropy: ' + str(entropy_value) + ' bits') - print('---') - - -# Reads a file by a given path -def readEntropyFile(path_string): - f = open(path_string, 'r') - content = f.read().replace('\n', ' ') - f.close() - return content.strip() - - -# List of the arguments one can use to influence the behavior of the program -parser = argparse.ArgumentParser(description='Calculate the information entropy of some strings.') - -# INPUT ARGUMENTS -parser.add_argument('strings', nargs='*', default='', type=str, help='Strings to calculate the entropy of.') -parser.add_argument('--files', nargs='*', type=str, default='', help='Provide file path(s) to calculate the entropy of.') - -# OUTPUT OPTIONS -parser.add_argument('--simple', nargs='?', type=bool, default=False, help='Determines the explicitness of the output. (True = only entropy shown)') - -# CONVERT OPTIONS -parser.add_argument('--lower', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to lowercase before calculating.') -parser.add_argument('--upper', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to uppercase before calculating.') -parser.add_argument('--squash', nargs='?', type=bool, default=False, help='Removes all whitespaces before calculating.') -args = parser.parse_args() - -# Prepares the queue of different strings -queue = [] - -# Add all the provided strings to the list -for string in args.strings: - queue.append(string) - -# Add all the provided files to the list -for file in args.files: - string = readEntropyFile(file) - queue.append(string) - -# Interates over the collected strings and prints the entropies -for string in queue: - if args.lower != False: - string = string.lower() - elif args.upper != False: - string = string.upper() - - if args.squash != False: - string = string.replace(" ", "") - - a, b = calculateEntropy(string) - printEntropy(string, a, b, args.simple) diff --git a/entro.py/__init__.py b/entro.py/__init__.py new file mode 100644 index 0000000..ce472e3 --- /dev/null +++ b/entro.py/__init__.py @@ -0,0 +1 @@ +name = "entro.py" \ No newline at end of file diff --git a/entro.py/entro.py b/entro.py/entro.py new file mode 100644 index 0000000..3ac0589 --- /dev/null +++ b/entro.py/entro.py @@ -0,0 +1,38 @@ +import math + +# Calculates the entropy of a given string +# Returns the entropy and an alphabet with the calculated probabilities +def calculateEntropy(input_string): + alphabet, alphabet_size, entropy = {}, 0, 0 + + for char in input_string: + if char in alphabet: + alphabet[char] += 1 + else: + alphabet[char] = 1 + alphabet_size += 1 + + for char in alphabet: + alphabet[char] = alphabet[char] / alphabet_size + entropy -= alphabet[char] * math.log(alphabet[char], 2) + + return entropy, alphabet + + +# Calculates the entropy of a given string +# Returns only the entropy in bits as this is the minimal function +def calculateEntropyMin(input_string): + alphabet, alphabet_size, entropy = {}, 0, 0 + + for char in input_string: + if char in alphabet: + alphabet[char] += 1 + else: + alphabet[char] = 1 + alphabet_size += 1 + + for char in alphabet: + i = alphabet[char] / alphabet_size + entropy -= i * math.log(i, 2) + + return entropy diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..637fb3d --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="entro.py-creyD", + version="1.0", + author="Conrad Großer", + author_email="grosserconrad@gmail.com", + description="Small Information Entropy Calculator", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/creyD/entro.py", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], +) From aa1f3da92f7b6df908b598c29f6f857bd1da87db Mon Sep 17 00:00:00 2001 From: Conrad Date: Mon, 15 Jul 2019 23:48:44 +0200 Subject: [PATCH 2/7] Bugfixes + Renamed to entro_py --- .gitignore | 1 + entro.py/__init__.py | 1 - entro_py/__init__.py | 3 +++ entro.py/entro.py => entro_py/entro_py.py | 0 setup.py | 4 ++-- 5 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 .gitignore delete mode 100644 entro.py/__init__.py create mode 100644 entro_py/__init__.py rename entro.py/entro.py => entro_py/entro_py.py (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a2a8dea --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env/ diff --git a/entro.py/__init__.py b/entro.py/__init__.py deleted file mode 100644 index ce472e3..0000000 --- a/entro.py/__init__.py +++ /dev/null @@ -1 +0,0 @@ -name = "entro.py" \ No newline at end of file diff --git a/entro_py/__init__.py b/entro_py/__init__.py new file mode 100644 index 0000000..6f296bd --- /dev/null +++ b/entro_py/__init__.py @@ -0,0 +1,3 @@ +name = "entro_py" + +__all__ = ["calculateEntropy", "calculateEntropyMin"] \ No newline at end of file diff --git a/entro.py/entro.py b/entro_py/entro_py.py similarity index 100% rename from entro.py/entro.py rename to entro_py/entro_py.py diff --git a/setup.py b/setup.py index 637fb3d..cb1f7ac 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ with open("README.md", "r") as fh: long_description = fh.read() setuptools.setup( - name="entro.py-creyD", - version="1.0", + name="entro_py-creyD", + version="1.0.1", author="Conrad Großer", author_email="grosserconrad@gmail.com", description="Small Information Entropy Calculator", From d0ba734016393ccb98a68119413c570171e64410 Mon Sep 17 00:00:00 2001 From: Conrad Date: Tue, 16 Jul 2019 00:04:56 +0200 Subject: [PATCH 3/7] More Prep Module Prep --- .gitignore | 1 + entro_py/__init__.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a2a8dea..896cdee 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .env/ +__pycache__/ diff --git a/entro_py/__init__.py b/entro_py/__init__.py index 6f296bd..1f1d599 100644 --- a/entro_py/__init__.py +++ b/entro_py/__init__.py @@ -1,3 +1 @@ name = "entro_py" - -__all__ = ["calculateEntropy", "calculateEntropyMin"] \ No newline at end of file From 1b6d45b14580946021ee6dc77e001c6c6586fae9 Mon Sep 17 00:00:00 2001 From: Conrad Date: Tue, 16 Jul 2019 00:16:16 +0200 Subject: [PATCH 4/7] Updated Version Number - This is still a beta release (doesn't work) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cb1f7ac..fbaf225 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="entro_py-creyD", - version="1.0.1", + version="1.0.3", author="Conrad Großer", author_email="grosserconrad@gmail.com", description="Small Information Entropy Calculator", From ece4b7f9dd2cafcd17e34dc7a165659fe1046af4 Mon Sep 17 00:00:00 2001 From: Conrad Date: Wed, 2 Sep 2020 09:12:41 +0200 Subject: [PATCH 5/7] Tried to package the repo under new name and version --- .gitignore | 2 ++ entro_py/entro_py.py => entro-py-min/entro-py-min.py | 0 entro_py/__init__.py | 1 - setup.py | 4 ++-- 4 files changed, 4 insertions(+), 3 deletions(-) rename entro_py/entro_py.py => entro-py-min/entro-py-min.py (100%) delete mode 100644 entro_py/__init__.py diff --git a/.gitignore b/.gitignore index 896cdee..ebb9095 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .env/ __pycache__/ +dist/ +*.egg-info/ diff --git a/entro_py/entro_py.py b/entro-py-min/entro-py-min.py similarity index 100% rename from entro_py/entro_py.py rename to entro-py-min/entro-py-min.py diff --git a/entro_py/__init__.py b/entro_py/__init__.py deleted file mode 100644 index 1f1d599..0000000 --- a/entro_py/__init__.py +++ /dev/null @@ -1 +0,0 @@ -name = "entro_py" diff --git a/setup.py b/setup.py index fbaf225..38c5cc1 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ with open("README.md", "r") as fh: long_description = fh.read() setuptools.setup( - name="entro_py-creyD", - version="1.0.3", + name="entro_py_min", + version="0.0.1", author="Conrad Großer", author_email="grosserconrad@gmail.com", description="Small Information Entropy Calculator", From 6fbfea9c9a3e82d50f3c9aa8b1e298fe6c407555 Mon Sep 17 00:00:00 2001 From: ptrstr Date: Sat, 3 Oct 2020 10:49:26 -0400 Subject: [PATCH 6/7] Added installation for pip --- README.md | 6 +++ entro_py_min/__main__.py | 45 +++++++++++++++++++ .../entro_py_min.py | 21 +++++++++ 3 files changed, 72 insertions(+) create mode 100644 entro_py_min/__main__.py rename entro-py-min/entro-py-min.py => entro_py_min/entro_py_min.py (61%) diff --git a/README.md b/README.md index 2eb7bd4..cce5d10 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,12 @@ Well that is basically up to you. Entropy functions are used in Computer Science *Warning:* This can only be used for calculating the entropy of strings (by alphabet). There are however other types like coin tosses of fair or unfair coins (...), but you're gonna have to write calculators for this on your own - for now. +## Installing +You can install this package easily with `pip`: +``` +$ pip install git+https://github.com/creyD/entro.py@dev_package +``` + ## Usage You can run as much calculations as you want in one run of the script. For example use it like this with a simple string: ``` diff --git a/entro_py_min/__main__.py b/entro_py_min/__main__.py new file mode 100644 index 0000000..e1fdc02 --- /dev/null +++ b/entro_py_min/__main__.py @@ -0,0 +1,45 @@ +from . import entro_py_min +import argparse + + +# List of the arguments one can use to influence the behavior of the program +parser = argparse.ArgumentParser('entro_py_min', description='Calculate the information entropy of alphabets.') + +# INPUT ARGUMENTS +parser.add_argument('strings', nargs='*', default='', type=str, help='Strings to calculate the entropy of.') +parser.add_argument('--files', nargs='*', type=str, default='', help='Provide file path(s) to calculate the entropy of.') + +# OUTPUT OPTIONS +parser.add_argument('--simple', nargs='?', type=bool, default=False, help='Determines the explicitness of the output. (True = only entropy shown)') +parser.add_argument('--max', nargs='?', type=bool, default=False, help='Includes the maximum entropy.') + +# CONVERT OPTIONS +parser.add_argument('--lower', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to lowercase before calculating.') +parser.add_argument('--upper', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to uppercase before calculating.') +parser.add_argument('--squash', nargs='?', type=bool, default=False, help='Removes all whitespaces before calculating.') +args = parser.parse_args() + +# Prepares the queue of different strings +queue = [] + +# Add all the provided strings to the list +for string in args.strings: + queue.append(string) + +# Add all the provided files to the list +for file in args.files: + string = entro_py_min.readEntropyFile(file) + queue.append(string) + +# Interates over the collected strings and prints the entropies +for string in queue: + if args.lower: + string = string.lower() + elif args.upper: + string = string.upper() + + if args.squash: + string = string.replace(" ", "") + + a, b, c = entro_py_min.calculateEntropy(string) + entro_py_min.printEntropy(string, a, b, args.simple, (False if not args.max else c)) diff --git a/entro-py-min/entro-py-min.py b/entro_py_min/entro_py_min.py similarity index 61% rename from entro-py-min/entro-py-min.py rename to entro_py_min/entro_py_min.py index 3ac0589..d03cf53 100644 --- a/entro-py-min/entro-py-min.py +++ b/entro_py_min/entro_py_min.py @@ -1,5 +1,6 @@ import math + # Calculates the entropy of a given string # Returns the entropy and an alphabet with the calculated probabilities def calculateEntropy(input_string): @@ -36,3 +37,23 @@ def calculateEntropyMin(input_string): entropy -= i * math.log(i, 2) return entropy + + +# Outputs a given entropy including the original text and the alphabet with probabilities +def printEntropy(original_string, entropy_value, alphabet_dict, simple_bool, max_value): + print('---') + if not simple_bool: + print('Content: ' + original_string) + print('Probabilities: ' + str(alphabet_dict)) + print('Entropy: ' + str(entropy_value) + ' bits') + if max_value: + print('Maximum Entropy: ' + str(max_value) + ' bits') + print('---') + + +# Reads a file by a given path +def readEntropyFile(path_string): + f = open(path_string, 'r') + content = f.read().replace('\n', ' ') + f.close() + return content.strip() From d3c98442a970a572dbc8d90279018396f0873df3 Mon Sep 17 00:00:00 2001 From: Conrad Date: Mon, 5 Oct 2020 09:11:13 +0200 Subject: [PATCH 7/7] Added max entropy calculator --- entro_py_min/entro_py_min.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/entro_py_min/entro_py_min.py b/entro_py_min/entro_py_min.py index d03cf53..cb42844 100644 --- a/entro_py_min/entro_py_min.py +++ b/entro_py_min/entro_py_min.py @@ -4,38 +4,36 @@ import math # Calculates the entropy of a given string # Returns the entropy and an alphabet with the calculated probabilities def calculateEntropy(input_string): - alphabet, alphabet_size, entropy = {}, 0, 0 + alphabet, alphabet_size, entropy = {}, len(input_string), 0 for char in input_string: if char in alphabet: alphabet[char] += 1 else: alphabet[char] = 1 - alphabet_size += 1 for char in alphabet: alphabet[char] = alphabet[char] / alphabet_size entropy -= alphabet[char] * math.log(alphabet[char], 2) - return entropy, alphabet + max_entropy = - len(alphabet) * (1 / len(alphabet) * math.log(1 / len(alphabet), 2)) + return entropy, alphabet, max_entropy # Calculates the entropy of a given string # Returns only the entropy in bits as this is the minimal function def calculateEntropyMin(input_string): - alphabet, alphabet_size, entropy = {}, 0, 0 + alphabet, alphabet_size, entropy = {}, len(input_string), 0 for char in input_string: if char in alphabet: alphabet[char] += 1 else: alphabet[char] = 1 - alphabet_size += 1 for char in alphabet: i = alphabet[char] / alphabet_size entropy -= i * math.log(i, 2) - return entropy