Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 15, 2024

Commit

025384a

1 Parent(s): 43a7191

test: add e2e test TestGetAccuracyFromRecordedAudio

Browse files

Files changed (4) hide show

tests/events/GetAccuracyFromRecordedAudio.json +0 -0
tests/test_GetAccuracyFromRecordedAudio.py +45 -0
tests/test_dataset.py +37 -0
tests/{unitTests.py → test_phonem_converter_score.py} +2 -40

tests/events/GetAccuracyFromRecordedAudio.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tests/test_GetAccuracyFromRecordedAudio.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import json
+import unittest
+from aip_trainer.lambdas import lambdaSpeechToScore
+from tests import EVENTS_FOLDER
+def check_output_by_field(output, key, match, expected_output):
+    import re
+    assert len(output[key].strip()) > 0
+    for word in output[key].lstrip().rstrip().split(" "):
+        word_check = re.findall(match, word.strip())
+        assert len(word_check) == 1
+        assert word_check[0] == word.strip()
+    output[key] = expected_output[key]
+    return output
+class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
+    def test_GetAccuracyFromRecordedAudio(self):
+        self.maxDiff = None
+        with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
+            inputs_outputs = json.load(src)
+        inputs = inputs_outputs["inputs"]
+        outputs = inputs_outputs["outputs"]
+        for event_name, event_content in inputs.items():
+            expected_output = outputs[event_name]
+            output = lambdaSpeechToScore.lambda_handler(event_content, [])
+            output = json.loads(output)
+            assert len(output["matched_transcripts"].strip()) > 0
+            assert len(output["matched_transcripts_ipa"].strip()) > 0
+            output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
+            output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
+            output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
+            output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
+            output["matched_transcripts"] = expected_output["matched_transcripts"]
+            output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
+            output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
+            self.assertEqual(expected_output, output)
+if __name__ == '__main__':
+    unittest.main()

tests/test_dataset.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import json
+import unittest
+from aip_trainer.lambdas import lambdaGetSample
+from tests import test_logger
+def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
+    for _ in range(n):
+        event = {'body': json.dumps({'category': category, 'language': 'de'})}
+        response = lambdaGetSample.lambda_handler(event, [])
+        response_dict = json.loads(response)
+        number_of_words = len(response_dict['real_transcript'][0].split())
+        try:
+            assert threshold_min < number_of_words <= threshold_max
+        except AssertionError:
+            test_logger.error(
+                f"Category: {category} had a sentence with length {number_of_words}.")
+            raise AssertionError
+class TestDataset(unittest.TestCase):
+    def test_random_sentences(self):
+        helper_category(0, 0, 40)
+    def test_easy_sentences(self):
+        helper_category(1, 0, 8)
+    def test_normal_sentences(self):
+        helper_category(2, 8, 20)
+    def test_hard_sentences(self):
+        helper_category(3, 20, 10000)
+if __name__ == '__main__':
+    unittest.main()

tests/{unitTests.py → test_phonem_converter_score.py} RENAMED Viewed

@@ -1,52 +1,14 @@
-import json
-import os
 import unittest
 import epitran
-import structlog
 from aip_trainer.models import RuleBasedModels
-from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
-from aip_trainer.lambdas import lambdaGetSample
-from aip_trainer.utils import session_logger
-log_level = os.getenv("LOG_LEVEL", "INFO")
-session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
-test_logger = structlog.stdlib.get_logger(__name__)
-def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
-    for _ in range(n):
-        event = {'body': json.dumps({'category': category, 'language': 'de'})}
-        response = lambdaGetSample.lambda_handler(event, [])
-        response_dict = json.loads(response)
-        number_of_words = len(response_dict['real_transcript'][0].split())
-        try:
-            assert threshold_min < number_of_words <= threshold_max
-        except AssertionError:
-            test_logger.error(
-                f"Category: {category} had a sentence with length {number_of_words}.")
-            raise AssertionError
-class TestDataset(unittest.TestCase):
-    def test_random_sentences(self):
-        test_category(0, 0, 40)
-    def test_easy_sentences(self):
-        test_category(1, 0, 8)
-    def test_normal_sentences(self):
-        test_category(2, 8, 20)
-    def test_hard_sentences(self):
-        test_category(3, 20, 10000)
 class TestPhonemConverter(unittest.TestCase):
-    def test_english(self):
         phonem_converter = RuleBasedModels.EngPhonemConverter()
         output = phonem_converter.convertToPhonem('Hello, this is a test')
         self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')

 import unittest
 import epitran
+from aip_trainer import pronunciationTrainer
 from aip_trainer.models import RuleBasedModels
 class TestPhonemConverter(unittest.TestCase):
+    def test_english_ok(self):
         phonem_converter = RuleBasedModels.EngPhonemConverter()
         output = phonem_converter.convertToPhonem('Hello, this is a test')
         self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')