Skip to content
Snippets Groups Projects
Unverified Commit 90419019 authored by STEVAN Antoine's avatar STEVAN Antoine :crab:
Browse files

add source files

parent edefeee9
Branches
No related tags found
No related merge requests found
import random
def build_huffman_tree(letter_count):
""" recieves dictionary with char:count entries
generates a LIST structure representing
the binary Huffman encoding tree"""
queue = [(x, px) for x,px in letter_count.items()]
while len(queue) > 1:
# combine two smallest elements
a, pa = extract_min(queue) # smallest in queue
b, pb = extract_min(queue) # next smallest
chars = [a,b]
weight = pa+pb # combined weight
queue.append((chars,weight)) # insert new node
#print(queue) # to see what whole queue is
#print()
x, px = extract_min(queue) # only root node left
return x
def extract_min(queue):
P = [px for x,px in queue]
return queue.pop(P.index(min(P)))
def generate_code(huff_tree, prefix=""):
""" receives a Huffman tree with embedded encoding,
and a prefix of encodings.
returns a dictionary where characters are
keys and associated binary strings are values."""
if isinstance(huff_tree, str): # a leaf
return {huff_tree: prefix}
else:
lchild, rchild = huff_tree[0], huff_tree[1]
codebook = {}
codebook.update( generate_code(lchild, prefix+'0'))
codebook.update( generate_code(rchild, prefix+'1'))
return codebook
def compress(text, encoding_dict):
""" compress text using encoding dictionary """
assert isinstance(text, str)
return "".join(encoding_dict[ch] for ch in text if ord(ch)<128)
def build_decoding_dict(encoding_dict):
"""build the "reverse" of encoding dictionary"""
return {y:x for (x,y) in encoding_dict.items()}
# return {y:x for x,y in encoding_dict.items()} # OK too
def decompress(bits, decoding_dict):
prefix = ""
result = []
for bit in bits:
prefix += bit
if prefix in decoding_dict:
result.append(decoding_dict[prefix])
prefix = ""
assert prefix == "" # must finish last codeword
return "".join(result) # converts list of chars to a string
# -*- coding: utf-8 -*-
##########
# original version downloaded from
# https://gist.github.com/BertrandBordage/
# in december 2019
##########
from math import floor, ceil
from typing import AnyStr # use a particular typing
ASCII_TO_INT: dict = {i.to_bytes(1, 'big'): i for i in range(256)}
INT_TO_ASCII: dict = {i: b for b, i in ASCII_TO_INT.items()}
def compressLZ(data: AnyStr) -> bytes:
if isinstance(data, str):
data = data.encode()
keys: dict = ASCII_TO_INT.copy()
n_keys: int = 256
compressed: list = []
start: int = 0
n_data: int = len(data)+1
while True:
if n_keys >= 512:
keys = ASCII_TO_INT.copy()
n_keys = 256
for i in range(1, n_data-start):
w: bytes = data[start:start+i]
if w not in keys:
compressed.append(keys[w[:-1]])
keys[w] = n_keys
start += i-1
n_keys += 1
break
else:
compressed.append(keys[w])
break
bits: str = ''.join([bin(i)[2:].zfill(9) for i in compressed])
return int(bits, 2).to_bytes(ceil(len(bits) / 8), 'big')
def decompressLZ(data: AnyStr) -> bytes:
if isinstance(data, str):
data = data.encode()
keys: dict = INT_TO_ASCII.copy()
bits: str = bin(int.from_bytes(data, 'big'))[2:].zfill(len(data) * 8)
n_extended_bytes: int = floor(len(bits) / 9)
bits: str = bits[-n_extended_bytes * 9:]
data_list: list = [int(bits[i*9:(i+1)*9], 2)
for i in range(n_extended_bytes)]
previous: bytes = keys[data_list[0]]
uncompressed: list = [previous]
n_keys: int = 256
for i in data_list[1:]:
if n_keys >= 512:
keys = INT_TO_ASCII.copy()
n_keys = 256
try:
current: bytes = keys[i]
except KeyError:
current = previous + previous[:1]
uncompressed.append(current)
keys[n_keys] = previous + current[:1]
previous = current
n_keys += 1
return b''.join(uncompressed)
# -*- coding: utf-8 -*-
"""
Created on "write the date here, please"
@author: "write your name here please"
"""
text = "I WENT AND CALLED, BUT GOT NO ANSWER. ON RETURNING, I WHISPERED TO CATHERINE THAT HE HAD HEARD A GOOD PART OF WHAT SHE SAID, I WAS SURE; ANDTOLD HOW I SAW HIM QUIT THE KITCHEN JUST AS SHE COMPLAINED OF HERBROTHER'S CONDUCT REGARDING HIM. SHE JUMPED UP IN A FINE FRIGHT, FLUNG HARETON ON TO THE SETTLE, AND RAN TO SEEK FOR HER FRIEND HERSELF; NOT TAKING LEISURE TO CONSIDER WHY SHE WAS SO FLURRIED, OR HOW HER TALK WOULD HAVE AFFECTED HIM. SHE WAS ABSENT SUCH A WHILE THAT JOSEPH PROPOSED WE SHOULD WAIT NO LONGER. HE CUNNINGLY CONJECTURED THEY WERE STAYING AWAY IN ORDER TO AVOID HEARING HIS PROTRACTED BLESSING."
print(text)
# letters
letters ="ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# PROBLEM : find the occurence frequencies of the letters of the alphabet in the following text
# We can decompose the problem as follows :
# 1- create a dictionary containing the letters with the occurrences equal to 0
# 2- for each letter in the text, increment the corresponding entry of the dictionary
# 3- normalize the values of the dictionary in order to have frequencies (the sum is equal to 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment