Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| from helpers import load_model_file, load_wav_16k_mono_librosa, initialize_text_to_speech_model, load_label_mapping, predict_yamnet, classify, classify_realtime | |
| from helpers import interface, interface_realtime, updateHistory, clearHistory, clear, format_dictionary, format_json | |
| from helpers import generate_audio, TTS, TTS_ASR, TTS_chatbot, transcribe_speech, transcribe_speech_realtime, transcribe_realtime, translate_enpt | |
| from helpers import chatbot_response, add_text | |
| history = "" | |
| last_answer = "" | |
| examples_audio_classification = [ | |
| "content/talking-people.mp3", | |
| "content/miaow_16k.wav", | |
| "content/birds-in-forest-loop.wav", | |
| "content/drumming-jungle-music.wav", | |
| "content/driving-in-the-rain.wav", | |
| "content/city-alert-siren.wav", | |
| "content/small-group-applause.wav", | |
| "content/angry-male-crowd-ambience.wav", | |
| "content/slow-typing-on-a-keyboard.wav", | |
| "content/emergency-car-arrival.wav" | |
| ] | |
| examples_speech_recognition_en = [ | |
| "content/speech1-en.wav", | |
| "content/speech2-en.wav", | |
| "content/speech1-ptbr.wav", | |
| "content/speech2-ptbr.wav", | |
| "content/speech3-ptbr.wav" | |
| ] | |
| examples_speech_recognition_ptbr = [ | |
| "content/speech1-ptbr.wav", | |
| "content/speech2-ptbr.wav", | |
| "content/speech3-ptbr.wav", | |
| ] | |
| examples_chatbot_en = [ | |
| ['How does SocialEar assist people with hearing disabilities?'], | |
| ['Give me suggestions on how to use SocialEar'], | |
| ['How does SocialEar work?'], | |
| ['Are SocialEar results accurate?'], | |
| ['What accessibility features does SocialEar offer?'], | |
| ['Does SocialEar collect personal data?'], | |
| ['Can I use SocialEar to identify songs and artists from recorded audio?'], | |
| ] | |
| examples_chatbot_ptbr = [ | |
| ['Como o SocialEar auxilia pessoas com deficiência auditiva?'], | |
| ['Dê-me sugestões sobre como usar o SocialEar'], | |
| ['Como funciona o SocialEar?'], | |
| ['Os resultados do SocialEar são precisos?'], | |
| ['Quais recursos de acessibilidade o SocialEar oferece?'], | |
| ['O SocialEar coleta dados pessoais?'], | |
| ['Posso usar o SocialEar para identificar músicas e artistas de áudio gravado?'], | |
| ] | |
| def to_audioClassification(): | |
| return { | |
| audio_classification: gr.Row(visible=True), | |
| realtime_classification: gr.Row(visible=False), | |
| speech_recognition: gr.Row(visible=False), | |
| chatbot_qa: gr.Row(visible=False), | |
| } | |
| def to_realtimeAudioClassification(): | |
| return { | |
| audio_classification: gr.Row(visible=False), | |
| realtime_classification: gr.Row(visible=True), | |
| speech_recognition: gr.Row(visible=False), | |
| chatbot_qa: gr.Row(visible=False), | |
| } | |
| def to_speechRecognition(): | |
| return { | |
| audio_classification: gr.Row(visible=False), | |
| realtime_classification: gr.Row(visible=False), | |
| speech_recognition: gr.Row(visible=True), | |
| chatbot_qa: gr.Row(visible=False), | |
| } | |
| def to_chatbot(): | |
| return { | |
| audio_classification: gr.Row(visible=False), | |
| realtime_classification: gr.Row(visible=False), | |
| speech_recognition: gr.Row(visible=False), | |
| chatbot_qa: gr.Row(visible=True), | |
| } | |
| with gr.Blocks() as demo: | |
| with gr.Accordion("Language Output", open=False): | |
| language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True) | |
| with gr.Row(): | |
| btn0 = gr.Button("Audio Classification", scale=1, icon='content/Audio Classification.png', size='lg') | |
| btn1 = gr.Button("Realtime Audio Classification", scale=1, icon='content/Realtime Audio Classification.png', size='lg') | |
| btn2 = gr.Button("Speech Recognition", scale=1, icon='content/Speech Recognition.png', size='lg') | |
| btn3 = gr.Button("Help", scale=1, icon='content/Chatbot.png', size='lg') | |
| with gr.Row(visible=False) as audio_classification: | |
| with gr.Column(min_width=700): | |
| with gr.Accordion("Record an Audio", open=True): | |
| inputRecord = gr.Audio(label="Audio Input", source="microphone", type="filepath") | |
| with gr.Accordion("Upload a file", open=False): | |
| inputUpload = gr.Audio(label="Audio Input", source="upload", type="filepath") | |
| clearBtn = gr.ClearButton([inputRecord, inputUpload]) | |
| with gr.Column(min_width=700): | |
| output = gr.Label(label="Audio Classification") | |
| btn = gr.Button(value="Generate Audio") | |
| audioOutput = gr.Audio(label="Audio Output", interactive=False) | |
| inputRecord.stop_recording(interface, [inputRecord, language], [output]) | |
| inputUpload.upload(interface, [inputUpload, language], [output]) | |
| btn.click(fn=TTS, inputs=[output, language], outputs=audioOutput) | |
| examples = gr.Examples(fn=interface, examples=examples_audio_classification, inputs=[inputRecord], outputs=[output], run_on_click=True) | |
| with gr.Row(visible=False) as realtime_classification: | |
| with gr.Column(min_width=700): | |
| input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10) | |
| historyOutput = gr.Textbox(label="History", interactive=False) | |
| # historyOutput = gr.Label(label="History") | |
| with gr.Column(min_width=700): | |
| output = gr.Label(label="Audio Classification") | |
| input.change(interface_realtime, [input, language], output) | |
| input.change(updateHistory, None, historyOutput) | |
| input.start_recording(clearHistory, None, historyOutput) | |
| with gr.Row(visible=False) as speech_recognition: | |
| with gr.Column(min_width=700): | |
| with gr.Accordion("Record an Audio", open=True): | |
| inputRecord = gr.Audio(label="Audio Input", source="microphone", type="filepath") | |
| with gr.Accordion("Upload a file", open=False): | |
| inputUpload = gr.Audio(label="Audio Input", source="upload", type="filepath") | |
| clearBtn = gr.ClearButton([inputRecord]) | |
| with gr.Column(min_width=700): | |
| output = gr.Label(label="Transcription") | |
| inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [output]) | |
| inputUpload.upload(transcribe_speech, [inputUpload, language], [output]) | |
| examplesSpeechEn = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_en, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Examples") | |
| # examplesSpeechPtbr = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_ptbr, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Portuguese Examples") | |
| with gr.Row(visible=False) as chatbot_qa: | |
| chatbot = gr.Chatbot( | |
| [], | |
| elem_id="chatbot", | |
| bubble_full_width=False, | |
| avatar_images=(None, "content/avatar-socialear.png"), | |
| min_width=2000 | |
| ) | |
| with gr.Row(min_width=2000): | |
| txt = gr.Textbox( | |
| scale=4, | |
| show_label=False, | |
| placeholder="Enter text and press enter", | |
| container=False, | |
| min_width=1000 | |
| ) | |
| submit = gr.Button(value="", size='sm', scale=1, icon='content/send-icon.png') | |
| inputRecord = gr.Audio(label="Record a question", source="microphone", type="filepath", min_width=600) | |
| btn = gr.Button(value="Listen the answer") | |
| audioOutput = gr.Audio(interactive=False, min_width=600) | |
| txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| chatbot_response, [chatbot, language], chatbot) | |
| txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) | |
| submit.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| chatbot_response, [chatbot, language], chatbot).then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) | |
| inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [txt]) | |
| btn.click(fn=TTS_chatbot, inputs=[language], outputs=audioOutput) | |
| with gr.Row(min_width=2000): | |
| examplesChatbotEn = gr.Examples(examples=examples_chatbot_en, inputs=[txt], label="English Examples") | |
| examplesChatbotPtbr = gr.Examples(examples=examples_chatbot_ptbr, inputs=[txt], label="Portuguese Examples") | |
| btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa]) | |
| btn1.click(fn=to_realtimeAudioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa]) | |
| btn2.click(fn=to_speechRecognition, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa]) | |
| btn3.click(fn=to_chatbot, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa]) | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch() |