Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
023235e
1
Parent(s):
3fdcb38
feat: substitute sample table selection with direct input, update TestDataset tests
Browse files- aip_trainer/lambdas/lambdaGetSample.py +7 -9
- static/css/{style-new.css → style.css} +23 -23
- static/javascript/callbacks.js +25 -69
- static/main.html +1 -2
- tests/test_dataset.py +1 -1
aip_trainer/lambdas/lambdaGetSample.py
CHANGED
|
@@ -60,20 +60,18 @@ def lambda_handler(event, context):
|
|
| 60 |
category = int(body['category'])
|
| 61 |
except KeyError:
|
| 62 |
category = 0
|
| 63 |
-
|
| 64 |
language = body['language']
|
| 65 |
try:
|
| 66 |
-
|
| 67 |
except KeyError:
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
app_logger.info(f"category={category}, language={language},
|
| 71 |
-
lambda_df_lang = lambda_database[language]
|
| 72 |
-
current_transcript = lambda_df_lang[sample_idx] if sample_idx is not None else lambda_df_lang.get_random_sample_from_df(language, category)
|
| 73 |
# sentence_category = getSentenceCategory(current_transcript[0])
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
-
app_logger.info(f"real_transcript={current_transcript}, ipa_transcript={current_ipa}.")
|
| 77 |
result = {
|
| 78 |
'real_transcript': current_transcript,
|
| 79 |
'ipa_transcript': current_ipa,
|
|
|
|
| 60 |
category = int(body['category'])
|
| 61 |
except KeyError:
|
| 62 |
category = 0
|
|
|
|
| 63 |
language = body['language']
|
| 64 |
try:
|
| 65 |
+
current_transcript = str(body["transcript"])
|
| 66 |
except KeyError:
|
| 67 |
+
lambda_df_lang = lambda_database[language]
|
| 68 |
+
current_transcript = lambda_df_lang.get_random_sample_from_df(language, category)
|
| 69 |
+
app_logger.info(f"category={category}, language={language}, current_transcript={current_transcript}.")
|
|
|
|
|
|
|
| 70 |
# sentence_category = getSentenceCategory(current_transcript[0])
|
| 71 |
+
current_transcript = current_transcript if isinstance(current_transcript, str) else current_transcript[0]
|
| 72 |
+
current_ipa = lambda_ipa_converter[language].convertToPhonem(current_transcript)
|
| 73 |
|
| 74 |
+
app_logger.info(f"real_transcript='{current_transcript}', ipa_transcript='{current_ipa}'.")
|
| 75 |
result = {
|
| 76 |
'real_transcript': current_transcript,
|
| 77 |
'ipa_transcript': current_ipa,
|
static/css/{style-new.css → style.css}
RENAMED
|
@@ -144,7 +144,7 @@ a.disabled {
|
|
| 144 |
left: 2%;
|
| 145 |
top: 63%;
|
| 146 |
transform: translate(-0%, -0%);
|
| 147 |
-
height:
|
| 148 |
width: 96%;
|
| 149 |
max-width: 96%;
|
| 150 |
background: #ffff;
|
|
@@ -152,7 +152,7 @@ a.disabled {
|
|
| 152 |
border-radius: 20px;
|
| 153 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
| 154 |
overflow: scroll;
|
| 155 |
-
max-height:
|
| 156 |
}
|
| 157 |
|
| 158 |
.container-small {
|
|
@@ -240,8 +240,8 @@ a.disabled {
|
|
| 240 |
|
| 241 |
.mic-button-div {
|
| 242 |
position: fixed;
|
| 243 |
-
left:
|
| 244 |
-
top:
|
| 245 |
}
|
| 246 |
|
| 247 |
/*############### Drop-down ############# */
|
|
@@ -412,22 +412,22 @@ a.disabled {
|
|
| 412 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
| 413 |
}
|
| 414 |
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
|
| 432 |
.icon-text {
|
| 433 |
font-size: 0.8em !important;
|
|
@@ -445,7 +445,7 @@ a.disabled {
|
|
| 445 |
/* 80px */
|
| 446 |
height: 3.5em;
|
| 447 |
padding-top: 0.4em;
|
| 448 |
-
left:
|
| 449 |
line-height: 0px;
|
| 450 |
border: 6px solid #fff;
|
| 451 |
border-radius: 50%;
|
|
@@ -460,8 +460,8 @@ a.disabled {
|
|
| 460 |
|
| 461 |
.mic-button-div {
|
| 462 |
position: fixed;
|
| 463 |
-
left:
|
| 464 |
-
top:
|
| 465 |
}
|
| 466 |
|
| 467 |
.link-icon-div {
|
|
|
|
| 144 |
left: 2%;
|
| 145 |
top: 63%;
|
| 146 |
transform: translate(-0%, -0%);
|
| 147 |
+
height: 10%;
|
| 148 |
width: 96%;
|
| 149 |
max-width: 96%;
|
| 150 |
background: #ffff;
|
|
|
|
| 152 |
border-radius: 20px;
|
| 153 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
| 154 |
overflow: scroll;
|
| 155 |
+
max-height: 15%;
|
| 156 |
}
|
| 157 |
|
| 158 |
.container-small {
|
|
|
|
| 240 |
|
| 241 |
.mic-button-div {
|
| 242 |
position: fixed;
|
| 243 |
+
left: 50%;
|
| 244 |
+
top: 80%
|
| 245 |
}
|
| 246 |
|
| 247 |
/*############### Drop-down ############# */
|
|
|
|
| 412 |
box-shadow: 0 0 20px 8px #d0d0d0;
|
| 413 |
}
|
| 414 |
|
| 415 |
+
.container2 {
|
| 416 |
+
display: block;
|
| 417 |
+
position: absolute;
|
| 418 |
+
left: 2%;
|
| 419 |
+
top: 63%;
|
| 420 |
+
transform: translate(-0%, -0%);
|
| 421 |
+
height: 10%;
|
| 422 |
+
width: 96%;
|
| 423 |
+
max-width: 96%;
|
| 424 |
+
background: #ffff;
|
| 425 |
+
overflow: hidden;
|
| 426 |
+
border-radius: 20px;
|
| 427 |
+
box-shadow: 0 0 20px 8px #d0d0d0;
|
| 428 |
+
overflow: scroll;
|
| 429 |
+
max-height: 15%;
|
| 430 |
+
}
|
| 431 |
|
| 432 |
.icon-text {
|
| 433 |
font-size: 0.8em !important;
|
|
|
|
| 445 |
/* 80px */
|
| 446 |
height: 3.5em;
|
| 447 |
padding-top: 0.4em;
|
| 448 |
+
left: 50%;
|
| 449 |
line-height: 0px;
|
| 450 |
border: 6px solid #fff;
|
| 451 |
border-radius: 50%;
|
|
|
|
| 460 |
|
| 461 |
.mic-button-div {
|
| 462 |
position: fixed;
|
| 463 |
+
left: 50%;
|
| 464 |
+
top: 80%
|
| 465 |
}
|
| 466 |
|
| 467 |
.link-icon-div {
|
static/javascript/callbacks.js
CHANGED
|
@@ -23,9 +23,6 @@ let currentSoundRecorded = false;
|
|
| 23 |
let currentText, currentIpa, real_transcripts_ipa, matched_transcripts_ipa;
|
| 24 |
let wordCategories;
|
| 25 |
let startTime, endTime;
|
| 26 |
-
let allSamples = {};
|
| 27 |
-
let currentSamplesObj = {};
|
| 28 |
-
var timeout = null
|
| 29 |
|
| 30 |
// API related variables
|
| 31 |
let AILanguage = "de"; // Standard is German
|
|
@@ -180,7 +177,7 @@ const prepareUiForNextSample = async () => {
|
|
| 180 |
if (soundFileBad == null)
|
| 181 |
cacheSoundFiles();
|
| 182 |
|
| 183 |
-
updateScore(parseFloat(document.getElementById("pronunciation_accuracy").
|
| 184 |
|
| 185 |
document.getElementById("main_title").innerText = "Processing new sample...";
|
| 186 |
}
|
|
@@ -198,7 +195,6 @@ const populateSampleById = (dataById) => {
|
|
| 198 |
document.getElementById("recorded_ipa_script").innerText = ""
|
| 199 |
document.getElementById("pronunciation_accuracy").innerText = "";
|
| 200 |
document.getElementById("single_word_ipa_pair").innerText = "Reference | Spoken"
|
| 201 |
-
// document.getElementById("section_accuracy").innerText = "| Score: " + currentScore.toString() + " - (" + currentSample.toString() + ")";
|
| 202 |
document.getElementById("section_accuracy").innerText = `| Score: ${currentScore.toString()} - sample n: ${currentSample.toString()}`;
|
| 203 |
currentSample += 1;
|
| 204 |
|
|
@@ -277,7 +273,6 @@ const changeLanguage = (language, generateNewSample = false) => {
|
|
| 277 |
}
|
| 278 |
}
|
| 279 |
}
|
| 280 |
-
getTableFromSamples(allSamples, `${AILanguage}_sentence`);
|
| 281 |
if (generateNewSample)
|
| 282 |
getNextSample();
|
| 283 |
}
|
|
@@ -326,7 +321,7 @@ const startMediaDevice = () => {
|
|
| 326 |
try {
|
| 327 |
await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
| 328 |
method: "post",
|
| 329 |
-
body: JSON.stringify({ "title": currentText
|
| 330 |
|
| 331 |
}).then(res => res.json()).
|
| 332 |
then(mediaData => {
|
|
@@ -349,7 +344,7 @@ const startMediaDevice = () => {
|
|
| 349 |
real_transcripts_ipa = mediaData.real_transcripts_ipa.split(" ")
|
| 350 |
matched_transcripts_ipa = mediaData.matched_transcripts_ipa.split(" ")
|
| 351 |
wordCategories = mediaData.pair_accuracy_category.split(" ")
|
| 352 |
-
let currentTextWords = currentText
|
| 353 |
|
| 354 |
coloredWords = "";
|
| 355 |
for (let word_idx = 0; word_idx < currentTextWords.length; word_idx++) {
|
|
@@ -406,7 +401,8 @@ const playSoundForAnswerAccuracy = async (accuracy) => {
|
|
| 406 |
const playAudio = async () => {
|
| 407 |
|
| 408 |
document.getElementById("main_title").innerText = "Generating sound...";
|
| 409 |
-
|
|
|
|
| 410 |
document.getElementById("main_title").innerText = "Current Sound was played";
|
| 411 |
|
| 412 |
};
|
|
@@ -472,7 +468,8 @@ const stopRecording = () => {
|
|
| 472 |
const playCurrentWord = async (word_idx) => {
|
| 473 |
|
| 474 |
document.getElementById("main_title").innerText = "Generating word...";
|
| 475 |
-
|
|
|
|
| 476 |
document.getElementById("main_title").innerText = "Word was played";
|
| 477 |
}
|
| 478 |
|
|
@@ -532,10 +529,6 @@ const wrapWordForIndividualPlayback = (word, word_idx) => {
|
|
| 532 |
// ########## Function to initialize server ###############
|
| 533 |
// This is to try to avoid aws lambda cold start
|
| 534 |
try {
|
| 535 |
-
fetch(apiMainPathSTS + '/getAllSamples').then(res => res.json()).then(dataAllSamples => {
|
| 536 |
-
populateAllSamples(dataAllSamples);
|
| 537 |
-
getTableFromSamples(dataAllSamples, `${AILanguage}_sentence`);
|
| 538 |
-
});
|
| 539 |
fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
| 540 |
method: "post",
|
| 541 |
body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
|
|
@@ -571,67 +564,30 @@ const initializeServer = async () => {
|
|
| 571 |
}
|
| 572 |
}
|
| 573 |
|
| 574 |
-
const
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
}
|
| 588 |
-
table.appendChild(tr);
|
| 589 |
-
}
|
| 590 |
-
|
| 591 |
-
const createTableRow = (contentRow, sampleIdx, isFiltered = false) => {
|
| 592 |
-
var tr = document.createElement('tr');
|
| 593 |
-
tr.append(`${contentRow}`);
|
| 594 |
-
tr.onclick = async function () {
|
| 595 |
-
await prepareUiForNextSample()
|
| 596 |
-
// console.debug(`createTableRow:: ${isFiltered}, sampleIdx: `, sampleIdx);
|
| 597 |
-
await fetch(apiMainPathSample + '/getSample', {
|
| 598 |
-
method: "post",
|
| 599 |
-
body: JSON.stringify({
|
| 600 |
-
"language": AILanguage, "idx": sampleIdx
|
| 601 |
-
}),
|
| 602 |
-
}).then(res => {
|
| 603 |
-
let res2json = res.json()
|
| 604 |
-
// console.debug(`createTableRow:: ${isFiltered}, res2json: `, typeof res2json, "=>", res2json, "#");
|
| 605 |
-
return res2json
|
| 606 |
-
}).then(dataOnRowCreation => {
|
| 607 |
-
// console.debug(`createTableRow:: ${isFiltered}, dataOnRowCreation: `, typeof dataOnRowCreation, "=>", dataOnRowCreation, "#");
|
| 608 |
-
populateSampleById(dataOnRowCreation)
|
| 609 |
-
tr.style["background-color"] = "#f0f0f0";
|
| 610 |
-
})
|
| 611 |
-
};
|
| 612 |
-
return tr;
|
| 613 |
-
}
|
| 614 |
-
|
| 615 |
-
const filterAllSamples = async (obj, filter, lang) => {
|
| 616 |
-
if (filter == "") {
|
| 617 |
-
currentSamplesObj = {...obj}
|
| 618 |
-
};
|
| 619 |
-
objByLAng = obj[lang];
|
| 620 |
-
const filtered = Object.entries(objByLAng).filter(([key, value]) => value.toLowerCase().includes(filter));
|
| 621 |
-
currentSamplesObj = {
|
| 622 |
-
[lang]: Object.entries(filtered).map(([key, value]) => value[1])
|
| 623 |
-
};
|
| 624 |
}
|
| 625 |
|
| 626 |
-
// todo: fix the request from the rows filtered not working
|
| 627 |
$(document).ready(function(){
|
| 628 |
-
$("#field-filter-samples").on("keyup", function(e) {
|
| 629 |
e.preventDefault();
|
| 630 |
var keycode = (e.keyCode ? e.keyCode : e.which);
|
| 631 |
if (keycode === 13 || e.key === 'Enter') {
|
| 632 |
-
var valueFilter = $(this).val()
|
| 633 |
-
|
| 634 |
-
|
| 635 |
}
|
| 636 |
});
|
| 637 |
});
|
|
|
|
| 23 |
let currentText, currentIpa, real_transcripts_ipa, matched_transcripts_ipa;
|
| 24 |
let wordCategories;
|
| 25 |
let startTime, endTime;
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
// API related variables
|
| 28 |
let AILanguage = "de"; // Standard is German
|
|
|
|
| 177 |
if (soundFileBad == null)
|
| 178 |
cacheSoundFiles();
|
| 179 |
|
| 180 |
+
updateScore(parseFloat(document.getElementById("pronunciation_accuracy").innerText));
|
| 181 |
|
| 182 |
document.getElementById("main_title").innerText = "Processing new sample...";
|
| 183 |
}
|
|
|
|
| 195 |
document.getElementById("recorded_ipa_script").innerText = ""
|
| 196 |
document.getElementById("pronunciation_accuracy").innerText = "";
|
| 197 |
document.getElementById("single_word_ipa_pair").innerText = "Reference | Spoken"
|
|
|
|
| 198 |
document.getElementById("section_accuracy").innerText = `| Score: ${currentScore.toString()} - sample n: ${currentSample.toString()}`;
|
| 199 |
currentSample += 1;
|
| 200 |
|
|
|
|
| 273 |
}
|
| 274 |
}
|
| 275 |
}
|
|
|
|
| 276 |
if (generateNewSample)
|
| 277 |
getNextSample();
|
| 278 |
}
|
|
|
|
| 321 |
try {
|
| 322 |
await fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
| 323 |
method: "post",
|
| 324 |
+
body: JSON.stringify({ "title": currentText, "base64Audio": audioBase64, "language": AILanguage }),
|
| 325 |
|
| 326 |
}).then(res => res.json()).
|
| 327 |
then(mediaData => {
|
|
|
|
| 344 |
real_transcripts_ipa = mediaData.real_transcripts_ipa.split(" ")
|
| 345 |
matched_transcripts_ipa = mediaData.matched_transcripts_ipa.split(" ")
|
| 346 |
wordCategories = mediaData.pair_accuracy_category.split(" ")
|
| 347 |
+
let currentTextWords = currentText.split(" ")
|
| 348 |
|
| 349 |
coloredWords = "";
|
| 350 |
for (let word_idx = 0; word_idx < currentTextWords.length; word_idx++) {
|
|
|
|
| 401 |
const playAudio = async () => {
|
| 402 |
|
| 403 |
document.getElementById("main_title").innerText = "Generating sound...";
|
| 404 |
+
// console.debug(`playAudio:: currentText: `, typeof currentText, "=>", currentText, "#");
|
| 405 |
+
playWithMozillaApi(currentText);
|
| 406 |
document.getElementById("main_title").innerText = "Current Sound was played";
|
| 407 |
|
| 408 |
};
|
|
|
|
| 468 |
const playCurrentWord = async (word_idx) => {
|
| 469 |
|
| 470 |
document.getElementById("main_title").innerText = "Generating word...";
|
| 471 |
+
// console.debug(`playCurrentWord:: currentText: `, typeof currentText, "=>", currentText, "#");
|
| 472 |
+
playWithMozillaApi(currentText.split(' ')[word_idx]);
|
| 473 |
document.getElementById("main_title").innerText = "Word was played";
|
| 474 |
}
|
| 475 |
|
|
|
|
| 529 |
// ########## Function to initialize server ###############
|
| 530 |
// This is to try to avoid aws lambda cold start
|
| 531 |
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
fetch(apiMainPathSTS + '/GetAccuracyFromRecordedAudio', {
|
| 533 |
method: "post",
|
| 534 |
body: JSON.stringify({ "title": '', "base64Audio": '', "language": AILanguage }),
|
|
|
|
| 564 |
}
|
| 565 |
}
|
| 566 |
|
| 567 |
+
const getSampleFromTextInput = async (AILanguage, textInput) => {
|
| 568 |
+
await fetch(apiMainPathSample + '/getSample', {
|
| 569 |
+
method: "post",
|
| 570 |
+
body: JSON.stringify({
|
| 571 |
+
"language": AILanguage, "transcript": textInput
|
| 572 |
+
}),
|
| 573 |
+
}).then(res => {
|
| 574 |
+
let res2json = res.json()
|
| 575 |
+
// console.debug(`getSampleFromTextInput:: res2json: `, typeof res2json, "=>", res2json, "#");
|
| 576 |
+
return res2json
|
| 577 |
+
}).then(dataOnInput => {
|
| 578 |
+
console.log(`getSampleFromTextInput:: dataOnInput: `, typeof dataOnInput, "=>", dataOnInput, "#");
|
| 579 |
+
populateSampleById(dataOnInput)
|
| 580 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
}
|
| 582 |
|
|
|
|
| 583 |
$(document).ready(function(){
|
| 584 |
+
$("#field-filter-samples").on("keyup", async function(e) {
|
| 585 |
e.preventDefault();
|
| 586 |
var keycode = (e.keyCode ? e.keyCode : e.which);
|
| 587 |
if (keycode === 13 || e.key === 'Enter') {
|
| 588 |
+
var valueFilter = $(this).val()
|
| 589 |
+
// console.debug(`input:: valueFilter: `, typeof valueFilter, "=>", valueFilter, ", AILanguage: ", AILanguage, "#");
|
| 590 |
+
await getSampleFromTextInput(AILanguage, valueFilter);
|
| 591 |
}
|
| 592 |
});
|
| 593 |
});
|
static/main.html
CHANGED
|
@@ -18,7 +18,7 @@
|
|
| 18 |
></script>
|
| 19 |
|
| 20 |
<script src="static/javascript/callbacks.js"></script>
|
| 21 |
-
<link rel="stylesheet" href="static/css/style
|
| 22 |
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
|
| 23 |
</head>
|
| 24 |
|
|
@@ -127,7 +127,6 @@
|
|
| 127 |
<div class="container2">
|
| 128 |
<div id="div-field-filter-samples" style="position: absolute; width: 97%; margin: 1em;">
|
| 129 |
<input id="field-filter-samples" type="search" class="form-control" placeholder="Write and press enter to filter">
|
| 130 |
-
<div id="field-samples">{}</div>
|
| 131 |
</div>
|
| 132 |
</div>
|
| 133 |
|
|
|
|
| 18 |
></script>
|
| 19 |
|
| 20 |
<script src="static/javascript/callbacks.js"></script>
|
| 21 |
+
<link rel="stylesheet" href="static/css/style.css" />
|
| 22 |
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet" />
|
| 23 |
</head>
|
| 24 |
|
|
|
|
| 127 |
<div class="container2">
|
| 128 |
<div id="div-field-filter-samples" style="position: absolute; width: 97%; margin: 1em;">
|
| 129 |
<input id="field-filter-samples" type="search" class="form-control" placeholder="Write and press enter to filter">
|
|
|
|
| 130 |
</div>
|
| 131 |
</div>
|
| 132 |
|
tests/test_dataset.py
CHANGED
|
@@ -10,7 +10,7 @@ def helper_category(category: int, threshold_min: int, threshold_max: int, n: in
|
|
| 10 |
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
| 11 |
response = lambdaGetSample.lambda_handler(event, [])
|
| 12 |
response_dict = json.loads(response)
|
| 13 |
-
number_of_words = len(response_dict['real_transcript']
|
| 14 |
try:
|
| 15 |
assert threshold_min < number_of_words <= threshold_max
|
| 16 |
except AssertionError:
|
|
|
|
| 10 |
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
| 11 |
response = lambdaGetSample.lambda_handler(event, [])
|
| 12 |
response_dict = json.loads(response)
|
| 13 |
+
number_of_words = len(response_dict['real_transcript'].split())
|
| 14 |
try:
|
| 15 |
assert threshold_min < number_of_words <= threshold_max
|
| 16 |
except AssertionError:
|