mirror of
https://github.com/creyD/entro.py.git
synced 2026-04-12 12:00:28 +02:00
Merge branch 'dev_package'
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
.env/
|
||||
__pycache__/
|
||||
dist/
|
||||
*.egg-info/
|
||||
@@ -16,6 +16,12 @@ Well that is basically up to you. Entropy functions are used in Computer Science
|
||||
|
||||
*Update:* This script now can calculate the maximum entropy now too. This is pretty useful for pre-compression analyses. Maximum entropy is calculated by splitting the alphabet into parts of the same size and calculating the entropy of this, like: `-1 * SIZE_OF_ALPHABET * (DISTINCT_PROBABILITY * log(DISTINCT_PROBABILITY, 2))`.
|
||||
|
||||
## Installing
|
||||
You can install this package easily with `pip`:
|
||||
```
|
||||
$ pip install git+https://github.com/creyD/entro.py@dev_package
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
You can run as much calculations as you want in one run of the script. For example use it like this with a simple string (you can skip the quotation marks if you don't have spaces in your string - if you want):
|
||||
|
||||
90
entro.py
90
entro.py
@@ -1,90 +0,0 @@
|
||||
'''
|
||||
calc_entro.py calculates the entropy of a given string or file
|
||||
|
||||
This uses the negative sum of the log (to the base of 2) of the probability
|
||||
times the probability of a char to occur in a certain string as the entropy.
|
||||
'''
|
||||
|
||||
import math
|
||||
import argparse
|
||||
|
||||
|
||||
# Calculates the entropy of a given string (as described in the docstring)
|
||||
def calculateEntropy(input_string):
|
||||
alphabet, alphabet_size, entropy = {}, len(input_string), 0
|
||||
|
||||
for char in input_string:
|
||||
if char in alphabet:
|
||||
alphabet[char] += 1
|
||||
else:
|
||||
alphabet[char] = 1
|
||||
|
||||
for char in alphabet:
|
||||
alphabet[char] = alphabet[char] / alphabet_size
|
||||
entropy -= alphabet[char] * math.log(alphabet[char], 2)
|
||||
|
||||
max_entropy = - len(alphabet) * (1/len(alphabet) * math.log(1/len(alphabet), 2))
|
||||
return entropy, alphabet, max_entropy
|
||||
|
||||
|
||||
# Outputs a given entropy including the original text and the alphabet with probabilities
|
||||
def printEntropy(original_string, entropy_value, alphabet_dict, simple_bool, max_value):
|
||||
print('---')
|
||||
if not simple_bool:
|
||||
print('Content: ' + original_string)
|
||||
print('Probabilities: ' + str(alphabet_dict))
|
||||
print('Entropy: ' + str(entropy_value) + ' bits')
|
||||
if max_value:
|
||||
print('Maximum Entropy: ' + str(max_value) + ' bits')
|
||||
print('---')
|
||||
|
||||
|
||||
# Reads a file by a given path
|
||||
def readEntropyFile(path_string):
|
||||
f = open(path_string, 'r')
|
||||
content = f.read().replace('\n', ' ')
|
||||
f.close()
|
||||
return content.strip()
|
||||
|
||||
|
||||
# List of the arguments one can use to influence the behavior of the program
|
||||
parser = argparse.ArgumentParser(description='Calculate the information entropy of alphabets.')
|
||||
|
||||
# INPUT ARGUMENTS
|
||||
parser.add_argument('strings', nargs='*', default='', type=str, help='Strings to calculate the entropy of.')
|
||||
parser.add_argument('--files', nargs='*', type=str, default='', help='Provide file path(s) to calculate the entropy of.')
|
||||
|
||||
# OUTPUT OPTIONS
|
||||
parser.add_argument('--simple', nargs='?', type=bool, default=False, help='Determines the explicitness of the output. (True = only entropy shown)')
|
||||
parser.add_argument('--max', nargs='?', type=bool, default=False, help='Includes the maximum entropy.')
|
||||
|
||||
# CONVERT OPTIONS
|
||||
parser.add_argument('--lower', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to lowercase before calculating.')
|
||||
parser.add_argument('--upper', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to uppercase before calculating.')
|
||||
parser.add_argument('--squash', nargs='?', type=bool, default=False, help='Removes all whitespaces before calculating.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Prepares the queue of different strings
|
||||
queue = []
|
||||
|
||||
# Add all the provided strings to the list
|
||||
for string in args.strings:
|
||||
queue.append(string)
|
||||
|
||||
# Add all the provided files to the list
|
||||
for file in args.files:
|
||||
string = readEntropyFile(file)
|
||||
queue.append(string)
|
||||
|
||||
# Interates over the collected strings and prints the entropies
|
||||
for string in queue:
|
||||
if args.lower:
|
||||
string = string.lower()
|
||||
elif args.upper:
|
||||
string = string.upper()
|
||||
|
||||
if args.squash:
|
||||
string = string.replace(" ", "")
|
||||
|
||||
a, b, c = calculateEntropy(string)
|
||||
printEntropy(string, a, b, args.simple, (False if not args.max else c))
|
||||
45
entro_py_min/__main__.py
Normal file
45
entro_py_min/__main__.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from . import entro_py_min
|
||||
import argparse
|
||||
|
||||
|
||||
# List of the arguments one can use to influence the behavior of the program
|
||||
parser = argparse.ArgumentParser('entro_py_min', description='Calculate the information entropy of alphabets.')
|
||||
|
||||
# INPUT ARGUMENTS
|
||||
parser.add_argument('strings', nargs='*', default='', type=str, help='Strings to calculate the entropy of.')
|
||||
parser.add_argument('--files', nargs='*', type=str, default='', help='Provide file path(s) to calculate the entropy of.')
|
||||
|
||||
# OUTPUT OPTIONS
|
||||
parser.add_argument('--simple', nargs='?', type=bool, default=False, help='Determines the explicitness of the output. (True = only entropy shown)')
|
||||
parser.add_argument('--max', nargs='?', type=bool, default=False, help='Includes the maximum entropy.')
|
||||
|
||||
# CONVERT OPTIONS
|
||||
parser.add_argument('--lower', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to lowercase before calculating.')
|
||||
parser.add_argument('--upper', nargs='?', type=bool, default=False, help='Converts given strings or textfiles to uppercase before calculating.')
|
||||
parser.add_argument('--squash', nargs='?', type=bool, default=False, help='Removes all whitespaces before calculating.')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Prepares the queue of different strings
|
||||
queue = []
|
||||
|
||||
# Add all the provided strings to the list
|
||||
for string in args.strings:
|
||||
queue.append(string)
|
||||
|
||||
# Add all the provided files to the list
|
||||
for file in args.files:
|
||||
string = entro_py_min.readEntropyFile(file)
|
||||
queue.append(string)
|
||||
|
||||
# Interates over the collected strings and prints the entropies
|
||||
for string in queue:
|
||||
if args.lower:
|
||||
string = string.lower()
|
||||
elif args.upper:
|
||||
string = string.upper()
|
||||
|
||||
if args.squash:
|
||||
string = string.replace(" ", "")
|
||||
|
||||
a, b, c = entro_py_min.calculateEntropy(string)
|
||||
entro_py_min.printEntropy(string, a, b, args.simple, (False if not args.max else c))
|
||||
57
entro_py_min/entro_py_min.py
Normal file
57
entro_py_min/entro_py_min.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import math
|
||||
|
||||
|
||||
# Calculates the entropy of a given string
|
||||
# Returns the entropy and an alphabet with the calculated probabilities
|
||||
def calculateEntropy(input_string):
|
||||
alphabet, alphabet_size, entropy = {}, len(input_string), 0
|
||||
|
||||
for char in input_string:
|
||||
if char in alphabet:
|
||||
alphabet[char] += 1
|
||||
else:
|
||||
alphabet[char] = 1
|
||||
|
||||
for char in alphabet:
|
||||
alphabet[char] = alphabet[char] / alphabet_size
|
||||
entropy -= alphabet[char] * math.log(alphabet[char], 2)
|
||||
|
||||
max_entropy = - len(alphabet) * (1 / len(alphabet) * math.log(1 / len(alphabet), 2))
|
||||
return entropy, alphabet, max_entropy
|
||||
|
||||
|
||||
# Calculates the entropy of a given string
|
||||
# Returns only the entropy in bits as this is the minimal function
|
||||
def calculateEntropyMin(input_string):
|
||||
alphabet, alphabet_size, entropy = {}, len(input_string), 0
|
||||
|
||||
for char in input_string:
|
||||
if char in alphabet:
|
||||
alphabet[char] += 1
|
||||
else:
|
||||
alphabet[char] = 1
|
||||
|
||||
for char in alphabet:
|
||||
i = alphabet[char] / alphabet_size
|
||||
entropy -= i * math.log(i, 2)
|
||||
return entropy
|
||||
|
||||
|
||||
# Outputs a given entropy including the original text and the alphabet with probabilities
|
||||
def printEntropy(original_string, entropy_value, alphabet_dict, simple_bool, max_value):
|
||||
print('---')
|
||||
if not simple_bool:
|
||||
print('Content: ' + original_string)
|
||||
print('Probabilities: ' + str(alphabet_dict))
|
||||
print('Entropy: ' + str(entropy_value) + ' bits')
|
||||
if max_value:
|
||||
print('Maximum Entropy: ' + str(max_value) + ' bits')
|
||||
print('---')
|
||||
|
||||
|
||||
# Reads a file by a given path
|
||||
def readEntropyFile(path_string):
|
||||
f = open(path_string, 'r')
|
||||
content = f.read().replace('\n', ' ')
|
||||
f.close()
|
||||
return content.strip()
|
||||
21
setup.py
Normal file
21
setup.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import setuptools
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
setuptools.setup(
|
||||
name="entro_py_min",
|
||||
version="0.0.1",
|
||||
author="Conrad Großer",
|
||||
author_email="grosserconrad@gmail.com",
|
||||
description="Small Information Entropy Calculator",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/creyD/entro.py",
|
||||
packages=setuptools.find_packages(),
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
)
|
||||
Reference in New Issue
Block a user