Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
025384a
1
Parent(s):
43a7191
test: add e2e test TestGetAccuracyFromRecordedAudio
Browse files
tests/events/GetAccuracyFromRecordedAudio.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tests/test_GetAccuracyFromRecordedAudio.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import unittest
|
| 3 |
+
|
| 4 |
+
from aip_trainer.lambdas import lambdaSpeechToScore
|
| 5 |
+
from tests import EVENTS_FOLDER
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def check_output_by_field(output, key, match, expected_output):
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
assert len(output[key].strip()) > 0
|
| 12 |
+
for word in output[key].lstrip().rstrip().split(" "):
|
| 13 |
+
word_check = re.findall(match, word.strip())
|
| 14 |
+
assert len(word_check) == 1
|
| 15 |
+
assert word_check[0] == word.strip()
|
| 16 |
+
output[key] = expected_output[key]
|
| 17 |
+
return output
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
| 21 |
+
def test_GetAccuracyFromRecordedAudio(self):
|
| 22 |
+
self.maxDiff = None
|
| 23 |
+
|
| 24 |
+
with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
|
| 25 |
+
inputs_outputs = json.load(src)
|
| 26 |
+
inputs = inputs_outputs["inputs"]
|
| 27 |
+
outputs = inputs_outputs["outputs"]
|
| 28 |
+
for event_name, event_content in inputs.items():
|
| 29 |
+
expected_output = outputs[event_name]
|
| 30 |
+
output = lambdaSpeechToScore.lambda_handler(event_content, [])
|
| 31 |
+
output = json.loads(output)
|
| 32 |
+
assert len(output["matched_transcripts"].strip()) > 0
|
| 33 |
+
assert len(output["matched_transcripts_ipa"].strip()) > 0
|
| 34 |
+
output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
|
| 35 |
+
output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
|
| 36 |
+
output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
|
| 37 |
+
output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
|
| 38 |
+
output["matched_transcripts"] = expected_output["matched_transcripts"]
|
| 39 |
+
output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
|
| 40 |
+
output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
|
| 41 |
+
self.assertEqual(expected_output, output)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
if __name__ == '__main__':
|
| 45 |
+
unittest.main()
|
tests/test_dataset.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import unittest
|
| 3 |
+
|
| 4 |
+
from aip_trainer.lambdas import lambdaGetSample
|
| 5 |
+
from tests import test_logger
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
| 9 |
+
for _ in range(n):
|
| 10 |
+
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
| 11 |
+
response = lambdaGetSample.lambda_handler(event, [])
|
| 12 |
+
response_dict = json.loads(response)
|
| 13 |
+
number_of_words = len(response_dict['real_transcript'][0].split())
|
| 14 |
+
try:
|
| 15 |
+
assert threshold_min < number_of_words <= threshold_max
|
| 16 |
+
except AssertionError:
|
| 17 |
+
test_logger.error(
|
| 18 |
+
f"Category: {category} had a sentence with length {number_of_words}.")
|
| 19 |
+
raise AssertionError
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TestDataset(unittest.TestCase):
|
| 23 |
+
def test_random_sentences(self):
|
| 24 |
+
helper_category(0, 0, 40)
|
| 25 |
+
|
| 26 |
+
def test_easy_sentences(self):
|
| 27 |
+
helper_category(1, 0, 8)
|
| 28 |
+
|
| 29 |
+
def test_normal_sentences(self):
|
| 30 |
+
helper_category(2, 8, 20)
|
| 31 |
+
|
| 32 |
+
def test_hard_sentences(self):
|
| 33 |
+
helper_category(3, 20, 10000)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if __name__ == '__main__':
|
| 37 |
+
unittest.main()
|
tests/{unitTests.py → test_phonem_converter_score.py}
RENAMED
|
@@ -1,52 +1,14 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import os
|
| 3 |
import unittest
|
| 4 |
|
| 5 |
import epitran
|
| 6 |
-
import structlog
|
| 7 |
|
|
|
|
| 8 |
from aip_trainer.models import RuleBasedModels
|
| 9 |
-
from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
|
| 10 |
-
from aip_trainer.lambdas import lambdaGetSample
|
| 11 |
-
from aip_trainer.utils import session_logger
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
log_level = os.getenv("LOG_LEVEL", "INFO")
|
| 15 |
-
session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
|
| 16 |
-
test_logger = structlog.stdlib.get_logger(__name__)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
| 20 |
-
for _ in range(n):
|
| 21 |
-
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
| 22 |
-
response = lambdaGetSample.lambda_handler(event, [])
|
| 23 |
-
response_dict = json.loads(response)
|
| 24 |
-
number_of_words = len(response_dict['real_transcript'][0].split())
|
| 25 |
-
try:
|
| 26 |
-
assert threshold_min < number_of_words <= threshold_max
|
| 27 |
-
except AssertionError:
|
| 28 |
-
test_logger.error(
|
| 29 |
-
f"Category: {category} had a sentence with length {number_of_words}.")
|
| 30 |
-
raise AssertionError
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class TestDataset(unittest.TestCase):
|
| 34 |
-
def test_random_sentences(self):
|
| 35 |
-
test_category(0, 0, 40)
|
| 36 |
-
|
| 37 |
-
def test_easy_sentences(self):
|
| 38 |
-
test_category(1, 0, 8)
|
| 39 |
-
|
| 40 |
-
def test_normal_sentences(self):
|
| 41 |
-
test_category(2, 8, 20)
|
| 42 |
-
|
| 43 |
-
def test_hard_sentences(self):
|
| 44 |
-
test_category(3, 20, 10000)
|
| 45 |
|
| 46 |
|
| 47 |
class TestPhonemConverter(unittest.TestCase):
|
| 48 |
|
| 49 |
-
def
|
| 50 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
| 51 |
output = phonem_converter.convertToPhonem('Hello, this is a test')
|
| 52 |
self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
|
|
|
|
|
|
|
|
|
|
| 1 |
import unittest
|
| 2 |
|
| 3 |
import epitran
|
|
|
|
| 4 |
|
| 5 |
+
from aip_trainer import pronunciationTrainer
|
| 6 |
from aip_trainer.models import RuleBasedModels
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
class TestPhonemConverter(unittest.TestCase):
|
| 10 |
|
| 11 |
+
def test_english_ok(self):
|
| 12 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
| 13 |
output = phonem_converter.convertToPhonem('Hello, this is a test')
|
| 14 |
self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
|