fix typo

fa74c582 · STEVAN Antoine · 83320d87 · fa74c582 · fa74c582 · fa74c582
Verified Commit fa74c582 authored 11 months ago by STEVAN Antoine
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ Now we will use the [_Wuthering Heights_ by Emily Bronte](data/english_wuthering
 > - return: `str`

 2. Apply this function to the file [`data/english_wuthering_heights.txt`](data/english_wuthering_heights.txt) and store its content in the variable `my_text`.
-3. Write a function `compute_occurence_frequencies` to compute the occurrence frequencies of the letters of a string `alphabet` in the string `text`. This function returns the corresponding dictionary.
+3. Write a function `compute_occurrence_frequencies` to compute the occurrence frequencies of the letters of a string `alphabet` in the string `text`. This function returns the corresponding dictionary.
 > - arguments:
 >   1. text: `str`
 >   2. alphabet: `str`
@@ -164,7 +164,7 @@ Basically speaking, the maximum level of compression that can be obtained with t
    If you want to learn more on the entropy concept, which has strong applications in compression, error correcting codes and cryptography, have a look on [this video](https://www.khanacademy.org/computing/computer-science/informationtheory/moderninfotheory/v/information-entropy) :)

    By assuming that the occurrence frequencies obtained of englishOF are representative of the occurrence probabilities of the characters in the english language, compute the entropy of the english language. For that :
-    1. add the line `from enhanced_occurrence_frequencies import read_text, compute_occurence_frequencies, detect_language` that allows you to use the functions defined in the previous file.
+    1. add the line `from enhanced_occurrence_frequencies import read_text, compute_occurrence_frequencies, detect_language` that allows you to use the functions defined in the previous file.
    2. In order to be fair in the evaluation of the compression, you need to integrate all the possible characters, instead of just the letters. So, to generate the string characters which contains the considered characters, copy-paste the code :
    ```python
    characters = [chr(i) for i in range(128)]
@@ -186,7 +186,7 @@ Basically speaking, the maximum level of compression that can be obtained with t
    Observe the "beauty of the math" by verifying the accuracy of the entropy bound !!
    1. Evaluate the compression ratio by considering that each character of the text files is encoded on 7 bits. Thus the compression rate is given by :
    $$\frac{7\times \textrm{number of characters in the initial text file}}{\textrm{number of characters in the compressed file}}$$
-4. How to improve the level of compression ? The occurrence frequencies of each character only capture the redundancy of the characters, but not the one between consecutive characters... So, you can do the same exercice by analyzing the occurence frequencies of all  2-letter (or more) words. This should allow you to capture more redundancy of the text and then to improve the compression ratio.
+4. How to improve the level of compression ? The occurrence frequencies of each character only capture the redundancy of the characters, but not the one between consecutive characters... So, you can do the same exercice by analyzing the occurrence frequencies of all  2-letter (or more) words. This should allow you to capture more redundancy of the text and then to improve the compression ratio.

 ## Upload your work to the LMS [[toc](#table-of-content)]
 - open a terminal

--- a/src/occurrence_frequencies.py
+++ b/src/occurrence_frequencies.py
@@ -21,7 +21,7 @@ if __name__ == "__main__":

    alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

-    # PROBLEM : find the occurence frequencies of the letters of the alphabet
+    # PROBLEM : find the occurrence frequencies of the letters of the alphabet
    # in the text

    # We can decompose the problem as follows :

--- a/tests/test_compute_occurence_frequencies.py
+++ b/tests/test_compute_occurence_frequencies.py
@@ -2,20 +2,20 @@ from tests.constants import ALPHABET


 def test_type():
-    from src.enhanced_occurrence_frequencies import compute_occurence_frequencies
+    from src.enhanced_occurrence_frequencies import compute_occurrence_frequencies

-    actual = compute_occurence_frequencies("a", ALPHABET)
+    actual = compute_occurrence_frequencies("a", ALPHABET)
    assert isinstance(actual, dict), (
-        "result of `compute_occurence_frequencies` should be a dictionary, "
+        "result of `compute_occurrence_frequencies` should be a dictionary, "
        f"found {type(actual).__name__}"
    )
    assert sorted(actual.keys()) == sorted(list(ALPHABET))


 def test_output():
-    from src.enhanced_occurrence_frequencies import compute_occurence_frequencies
+    from src.enhanced_occurrence_frequencies import compute_occurrence_frequencies

-    actual = compute_occurence_frequencies(
+    actual = compute_occurrence_frequencies(
        "this is some random text", ALPHABET
    )
    assert actual == {

--- a/tests/test_detect_language.py
+++ b/tests/test_detect_language.py
@@ -3,11 +3,11 @@ from tests.constants import ALPHABET, LANGUAGE_FILES

 def test_detect_language():
    from src.enhanced_occurrence_frequencies import (
-        compute_occurence_frequencies, read_text, detect_language
+        compute_occurrence_frequencies, read_text, detect_language
    )

    ofs = {
-        lang: compute_occurence_frequencies(read_text(file), ALPHABET)
+        lang: compute_occurrence_frequencies(read_text(file), ALPHABET)
        for lang, file in LANGUAGE_FILES.items()
    }


--- a/tests/test_entropy.py
+++ b/tests/test_entropy.py
@@ -11,12 +11,12 @@ ENTROPIES = {
 def test():
    from src.data_compression import entropy
    from src.enhanced_occurrence_frequencies import (
-        read_text, compute_occurence_frequencies
+        read_text, compute_occurrence_frequencies
    )

    for lang, file in LANGUAGE_FILES.items():
        e = entropy(
-            compute_occurence_frequencies(read_text(file), ASCII_ALPHABET)
+            compute_occurrence_frequencies(read_text(file), ASCII_ALPHABET)
        )
        expected = ENTROPIES[lang]
        assert abs(e - expected) <= PRECISION, (

--- a/tests/test_kl_div.py
+++ b/tests/test_kl_div.py
@@ -3,7 +3,7 @@ from tests.constants import ALPHABET, LANGUAGE_FILES, PRECISION

 def test_kl_div():
    from src.enhanced_occurrence_frequencies import (
-        read_text, compute_occurence_frequencies, kl_divergence
+        read_text, compute_occurrence_frequencies, kl_divergence
    )

    try:
@@ -13,7 +13,7 @@ def test_kl_div():
        pass

    ofs = {
-        lang: compute_occurence_frequencies(read_text(file), ALPHABET)
+        lang: compute_occurrence_frequencies(read_text(file), ALPHABET)
        for lang, file in LANGUAGE_FILES.items()
    }