remove Lempel-Ziv and "arithmetic coding"

they do not work

remove Lempel-Ziv and "arithmetic coding"
c3c9a564 · STEVAN Antoine · d5a48dae · c3c9a564 · d5a48dae · d5a48dae
Unverified Commit c3c9a564 authored Jun 13, 2024 by STEVAN Antoine
--- a/README.md
+++ b/README.md
@@ -156,7 +156,6 @@ Basically speaking, the maximum level of compression that can be obtained with t
    1. Evaluate the compression ratio by considering that each character of the text files is encoded on 7 bits. Thus the compression rate is given by :
    $$\frac{7\times \textrm{number of characters in the initial text file}}{\textrm{number of characters in the compressed file}}$$
 4. How to improve the level of compression ? The occurrence frequencies of each character only capture the redundancy of the characters, but not the one between consecutive characters... So, you can do the same exercice by analyzing the occurence frequencies of all  2-letter (or more) words. This should allow you to capture more redundancy of the text and then to improve the compression ratio.
-5. If you have finish, compare the Huffman performance with those of other compression algorithms [_Arithmetic Coding_](src/arithmetic_coding.py) and [_Lempel-Ziv_](src/lempel_ziv.py).

 ## Compress the files et [upload them to the LMS](https://lms.isae.fr/mod/assign/view.php?id=107859) [[toc](#table-of-content)]
 - [ ] `src/occurrence_frequencies.py`

--- a/src/arithmetic_coding.py
+++ b/src/arithmetic_coding.py
-##########
-# original version downloaded from 
-# https://rosettacode.org/wiki/Arithmetic_coding/As_a_generalized_change_of_radix
-# in december 2019
-##########
-
-
-from collections import Counter
-
-def cumulative_freq(freq):
-    cf = {}
-    total = 0
-    for b in range(256):
-        if b in freq:
-            cf[b] = total
-            total += freq[b]
-    return cf
- 
-def arithmethic_coding(toBeEncoded, textRef, radix):
-#  toBeEncoded : the string to encode
-#  textRef : the string which is used to build the frequency distribution
-#  radix : the radix for the encoding     
-
-
-    # The frequency characters
-    freq = Counter(textRef)
- 
-    print(freq)
-    
-    # The cumulative frequency table
-    cf = cumulative_freq(freq)
-    print('cf=',cf)
- 
-    # Base
-    base = len(toBeEncoded)
- 
-    # Lower bound
-    lower = 0
- 
-    # Product of all frequencies
-    pf = 1
- 
-    # Each term is multiplied by the product of the
-    # frequencies of all previously occurring symbols
-    for b in toBeEncoded:
-        lower = lower*base + cf[b]*pf
-        pf *= freq[b]
- 
-    # Upper bound
-    upper = lower+pf
- 
-    pow = 0
-    while True:
-        pf //= radix
-        if pf==0: break
-        pow += 1
- 
-    enc = (upper-1) // radix**pow
-    return enc, pow, freq
- 
-def arithmethic_decoding(enc, radix, pow, freq):
- 
-    # Multiply enc by radix^pow
-    enc *= radix**pow;
- 
-    # Base
-    base = sum(freq.values())
- 
-    # Create the cumulative frequency table
-    cf = cumulative_freq(freq)
- 
-    # Create the dictionary
-    dict = {}
-    for k,v in cf.items():
-        dict[v] = k
- 
-    # Fill the gaps in the dictionary
-    lchar = None
-    for i in range(base):
-        if i in dict:
-            lchar = dict[i]
-        elif lchar is not None:
-            dict[i] = lchar
- 
-    # Decode the input number
-    decoded = bytearray()
-    for i in range(base-1, -1, -1):
-        pow = base**i
-        div = enc//pow
- 
-        c  = dict[div]
-        fv = freq[c]
-        cv = cf[c]
- 
-        rem = (enc - pow*cv) // fv
- 
-        enc = rem
-        decoded.append(c)
- 
-    # Return the decoded output
-    return bytes(decoded)
- 
-radix = 2      # can be any integer greater or equal with 2
- 
-# exemple 
-
-str='DABDDB DABDDBBDDBA ABRACADABRA TOBEORNOTTOBEORTOBEORNOT'
-str = str.encode() # used to handle the string by bytes
-enc, pow, freq = arithmethic_coding(str, str, radix)
-dec = arithmethic_decoding(enc, radix, pow, freq)
-
- 
-print("%-25s=> %19s * %d^%s" % (str, enc, radix, pow))
-    
-if str != dec:
-    raise Exception("\tHowever that is incorrect!")
-else:
-    print("yes, Bro.")
-
--- a/src/lempel_ziv.py
+++ b/src/lempel_ziv.py
-# -*- coding: utf-8 -*-
-
-##########
-# original version downloaded from
-# https://gist.github.com/BertrandBordage/  
-# in december 2019
-##########
-
-
-from math import floor, ceil
-from typing import AnyStr # use a particular typing 
-
-
-ASCII_TO_INT: dict = {i.to_bytes(1, 'big'): i for i in range(256)}
-INT_TO_ASCII: dict = {i: b for b, i in ASCII_TO_INT.items()}
-
-
-def compressLZ(data: AnyStr) -> bytes:
-    if isinstance(data, str):
-        data = data.encode()
-    keys: dict = ASCII_TO_INT.copy()
-    n_keys: int = 256
-    compressed: list = []
-    start: int = 0
-    n_data: int = len(data)+1
-    while True:
-        if n_keys >= 512:
-            keys = ASCII_TO_INT.copy()
-            n_keys = 256
-        for i in range(1, n_data-start):
-            w: bytes = data[start:start+i]
-            if w not in keys:
-                compressed.append(keys[w[:-1]])
-                keys[w] = n_keys
-                start += i-1
-                n_keys += 1
-                break
-        else:
-            compressed.append(keys[w])
-            break
-    bits: str = ''.join([bin(i)[2:].zfill(9) for i in compressed])
-    return int(bits, 2).to_bytes(ceil(len(bits) / 8), 'big')
-
-
-def decompressLZ(data: AnyStr) -> bytes:
-    if isinstance(data, str):
-        data = data.encode()
-    keys: dict = INT_TO_ASCII.copy()
-    bits: str = bin(int.from_bytes(data, 'big'))[2:].zfill(len(data) * 8)
-    n_extended_bytes: int = floor(len(bits) / 9)
-    bits: str = bits[-n_extended_bytes * 9:]
-    data_list: list = [int(bits[i*9:(i+1)*9], 2)
-                       for i in range(n_extended_bytes)]
-    previous: bytes = keys[data_list[0]]
-    uncompressed: list = [previous]
-    n_keys: int = 256
-    for i in data_list[1:]:
-        if n_keys >= 512:
-            keys = INT_TO_ASCII.copy()
-            n_keys = 256
-        try:
-            current: bytes = keys[i]
-        except KeyError:
-            current = previous + previous[:1]
-        uncompressed.append(current)
-        keys[n_keys] = previous + current[:1]
-        previous = current
-        n_keys += 1
-    return b''.join(uncompressed)
-
-
-