Upload folder using huggingface_hub
Browse files- app/content.py +32 -2
- app/pages.py +0 -5
app/content.py
CHANGED
|
@@ -69,6 +69,13 @@ displayname2datasetname = {
|
|
| 69 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
| 70 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
'SEAME-Dev-Mandarin' : 'seame_dev_man',
|
| 74 |
'SEAME-Dev-Singlish' : 'seame_dev_sge',
|
|
@@ -158,21 +165,44 @@ dataset_diaplay_information = {
|
|
| 158 |
|
| 159 |
'YouTube ASR: Chinese with Chinese Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Chinese and some Chinese-English codeswitch audio clips, featuring with Chinese prompts. <br> It includes approximately 3.32 hours of audio, with individual clips ranging from 17 seconds to 1966 seconds in length.',
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
'SEAME-Dev-Mandarin' : 'Under Development',
|
| 162 |
'SEAME-Dev-Singlish' : 'Under Development',
|
| 163 |
|
| 164 |
'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
|
| 169 |
|
| 170 |
|
| 171 |
}
|
| 172 |
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
| 176 |
metrics_info = {
|
| 177 |
'wer' : 'Word Error Rate (WER) - The Lower, the better.',
|
| 178 |
'llama3_70b_judge_binary': 'Model-as-a-Judge Peformance. Using LLAMA-3-70B. Scale from 0-100. The higher, the better.',
|
|
|
|
| 69 |
'YouTube ASR: Chinese with English Prompt': 'ytb_asr_batch3_chinese',
|
| 70 |
'YouTube ASR: Chinese with Chinese Prompt': 'ytb_asr_batch3_zh_zh_prompt',
|
| 71 |
|
| 72 |
+
'YouTube SQA: Malay': 'ytb_sqa_batch3_malay',
|
| 73 |
+
'YouTube SQA: Chinese': 'ytb_sqa_batch3_chinese',
|
| 74 |
+
'YouTube SQA: Tamil': 'ytb_sqa_batch3_tamil',
|
| 75 |
+
|
| 76 |
+
'YouTube SDS: Malay': 'ytb_sds_batch3_malay',
|
| 77 |
+
'YouTube SDS: Chinese': 'ytb_sds_batch3_chinese',
|
| 78 |
+
'YouTube SDS: Tamil': 'ytb_sds_batch3_tamil',
|
| 79 |
|
| 80 |
'SEAME-Dev-Mandarin' : 'seame_dev_man',
|
| 81 |
'SEAME-Dev-Singlish' : 'seame_dev_sge',
|
|
|
|
| 165 |
|
| 166 |
'YouTube ASR: Chinese with Chinese Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Chinese and some Chinese-English codeswitch audio clips, featuring with Chinese prompts. <br> It includes approximately 3.32 hours of audio, with individual clips ranging from 17 seconds to 1966 seconds in length.',
|
| 167 |
|
| 168 |
+
'YouTube ASR: Tamil with Tamil Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with Tamil prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
| 169 |
+
|
| 170 |
+
'YouTube ASR: Tamil with English Prompt': 'YouTube Evaluation Dataset for ASR Task: <br> This dataset contains Tamil and some Tamil-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.44 hours of audio, with individual clips ranging from 30 seconds to 324 seconds in length.',
|
| 171 |
+
|
| 172 |
+
'YouTube ASR Translation: Malay2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Malay<i>',
|
| 173 |
+
|
| 174 |
+
# 'YouTube ASR Translation: Chinese2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Chinese<i>',
|
| 175 |
+
|
| 176 |
+
# 'YouTube ASR Translation: Tamil2English': 'YouTube Evaluation Dataset for ASR Task: <br> The audio of dataset is same as <i>YouTube ASR: Tamil<i>',
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
|
| 180 |
'SEAME-Dev-Mandarin' : 'Under Development',
|
| 181 |
'SEAME-Dev-Singlish' : 'Under Development',
|
| 182 |
|
| 183 |
'YouTube SQA: English with Singapore Content': 'YouTube Evaluation Dataset for Speech-QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 7.6 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
| 184 |
|
| 185 |
+
'YouTube SQA: Malay': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Malay<i>, it contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
|
| 186 |
+
|
| 187 |
+
'YouTube SQA: Chinese': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Chinese<i>',
|
| 188 |
+
|
| 189 |
+
'YouTube SQA: Tamil': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Tamil<i>',
|
| 190 |
+
|
| 191 |
'YouTube SDS: English with Singapore Content': 'YouTube Evaluation Dataset for Summary Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 5.4 hours of audio, with individual clips ranging from 8 seconds to 32 seconds in length.',
|
| 192 |
|
| 193 |
+
'YouTube SDS: Malay': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Malay<i>, it contains Malay and some Malay-English codeswitch audio clips, featuring with English prompts. <br> It includes approximately 2.55 hours of audio, with indicidual clips ranging form 30 seconds to 95 seconds in length.',
|
| 194 |
+
|
| 195 |
+
'YouTube SDS: Chinese': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Chinese<i>',
|
| 196 |
+
|
| 197 |
+
'YouTube SDS: Tamil': 'YouTube Evaluation Dataset for Speech-QA Task: <br> The auido of this dataset is same as <i>YouTube ASR: Tamil<i>',
|
| 198 |
+
|
| 199 |
+
|
| 200 |
'YouTube PQA: English with Singapore Content': 'YouTube Evaluation Dataset for Paralinguistics QA Task: <br> This dataset contains English and Singlish audio clips, featuring Singapore-related content. <br> It includes approximately 41.4 hours of audio, with individual clips ranging from 41 seconds to 83 seconds in length.',
|
| 201 |
|
| 202 |
|
| 203 |
}
|
| 204 |
|
| 205 |
|
|
|
|
|
|
|
| 206 |
metrics_info = {
|
| 207 |
'wer' : 'Word Error Rate (WER) - The Lower, the better.',
|
| 208 |
'llama3_70b_judge_binary': 'Model-as-a-Judge Peformance. Using LLAMA-3-70B. Scale from 0-100. The higher, the better.',
|
app/pages.py
CHANGED
|
@@ -522,11 +522,6 @@ def music_understanding():
|
|
| 522 |
|
| 523 |
|
| 524 |
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
def under_development():
|
| 531 |
st.title("Task: Under Development")
|
| 532 |
|
|
|
|
| 522 |
|
| 523 |
|
| 524 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
def under_development():
|
| 526 |
st.title("Task: Under Development")
|
| 527 |
|